From b10fb9e5c9342d709385836fead2d48a7487fe7f Mon Sep 17 00:00:00 2001 From: andre-cc-natzka Date: Mon, 14 Nov 2022 23:31:07 +0100 Subject: [PATCH 01/13] Rebase branch into most recent version of master --- datafusion/common/src/scalar.rs | 157 +- .../core/src/physical_plan/joins/hash_join.rs | 30 +- datafusion/core/tests/sql/aggregates.rs | 86 + datafusion/core/tests/sql/group_by.rs | 180 ++ datafusion/core/tests/sql/mod.rs | 93 + datafusion/core/tests/sql/select.rs | 176 ++ datafusion/core/tests/sql/timestamp.rs | 2 +- datafusion/expr/src/type_coercion/binary.rs | 58 +- .../physical-expr/src/aggregate/min_max.rs | 127 +- .../physical-expr/src/datetime_expressions.rs | 2 +- .../physical-expr/src/expressions/binary.rs | 16 + datafusion/proto/proto/datafusion.proto | 17 +- datafusion/proto/src/from_proto.rs | 25 +- datafusion/proto/src/generated/datafusion.rs | 1613 +++++++++++++++++ datafusion/proto/src/generated/pbjson.rs | 237 ++- datafusion/proto/src/generated/prost.rs | 38 +- datafusion/proto/src/lib.rs | 18 +- datafusion/proto/src/to_proto.rs | 44 +- 18 files changed, 2870 insertions(+), 49 deletions(-) create mode 100644 datafusion/proto/src/generated/datafusion.rs diff --git a/datafusion/common/src/scalar.rs b/datafusion/common/src/scalar.rs index d95ba2199ec8d..c66c14bcab9ce 100644 --- a/datafusion/common/src/scalar.rs +++ b/datafusion/common/src/scalar.rs @@ -89,8 +89,14 @@ pub enum ScalarValue { Date32(Option), /// Date stored as a signed 64bit int milliseconds since UNIX epoch 1970-01-01 Date64(Option), + /// Time stored as a signed 32bit int as seconds since midnight + Time32Second(Option), + /// Time stored as a signed 32bit int as milliseconds since midnight + Time32Millisecond(Option), + /// Time stored as a signed 64bit int as microseconds since midnight + Time64Microsecond(Option), /// Time stored as a signed 64bit int as nanoseconds since midnight - Time64(Option), + Time64Nanosecond(Option), /// Timestamp Second TimestampSecond(Option, Option), /// Timestamp Milliseconds @@ -170,8 +176,14 @@ impl PartialEq for ScalarValue { (Date32(_), _) => false, (Date64(v1), Date64(v2)) => v1.eq(v2), (Date64(_), _) => false, - (Time64(v1), Time64(v2)) => v1.eq(v2), - (Time64(_), _) => false, + (Time32Second(v1), Time32Second(v2)) => v1.eq(v2), + (Time32Second(_), _) => false, + (Time32Millisecond(v1), Time32Millisecond(v2)) => v1.eq(v2), + (Time32Millisecond(_), _) => false, + (Time64Microsecond(v1), Time64Microsecond(v2)) => v1.eq(v2), + (Time64Microsecond(_), _) => false, + (Time64Nanosecond(v1), Time64Nanosecond(v2)) => v1.eq(v2), + (Time64Nanosecond(_), _) => false, (TimestampSecond(v1, _), TimestampSecond(v2, _)) => v1.eq(v2), (TimestampSecond(_, _), _) => false, (TimestampMillisecond(v1, _), TimestampMillisecond(v2, _)) => v1.eq(v2), @@ -263,8 +275,14 @@ impl PartialOrd for ScalarValue { (Date32(_), _) => None, (Date64(v1), Date64(v2)) => v1.partial_cmp(v2), (Date64(_), _) => None, - (Time64(v1), Time64(v2)) => v1.partial_cmp(v2), - (Time64(_), _) => None, + (Time32Second(v1), Time32Second(v2)) => v1.partial_cmp(v2), + (Time32Second(_), _) => None, + (Time32Millisecond(v1), Time32Millisecond(v2)) => v1.partial_cmp(v2), + (Time32Millisecond(_), _) => None, + (Time64Microsecond(v1), Time64Microsecond(v2)) => v1.partial_cmp(v2), + (Time64Microsecond(_), _) => None, + (Time64Nanosecond(v1), Time64Nanosecond(v2)) => v1.partial_cmp(v2), + (Time64Nanosecond(_), _) => None, (TimestampSecond(v1, _), TimestampSecond(v2, _)) => v1.partial_cmp(v2), (TimestampSecond(_, _), _) => None, (TimestampMillisecond(v1, _), TimestampMillisecond(v2, _)) => { @@ -670,7 +688,10 @@ impl std::hash::Hash for ScalarValue { } Date32(v) => v.hash(state), Date64(v) => v.hash(state), - Time64(v) => v.hash(state), + Time32Second(v) => v.hash(state), + Time32Millisecond(v) => v.hash(state), + Time64Microsecond(v) => v.hash(state), + Time64Nanosecond(v) => v.hash(state), TimestampSecond(v, _) => v.hash(state), TimestampMillisecond(v, _) => v.hash(state), TimestampMicrosecond(v, _) => v.hash(state), @@ -1036,7 +1057,10 @@ impl ScalarValue { ))), ScalarValue::Date32(_) => DataType::Date32, ScalarValue::Date64(_) => DataType::Date64, - ScalarValue::Time64(_) => DataType::Time64(TimeUnit::Nanosecond), + ScalarValue::Time32Second(_) => DataType::Time32(TimeUnit::Second), + ScalarValue::Time32Millisecond(_) => DataType::Time32(TimeUnit::Millisecond), + ScalarValue::Time64Microsecond(_) => DataType::Time64(TimeUnit::Microsecond), + ScalarValue::Time64Nanosecond(_) => DataType::Time64(TimeUnit::Nanosecond), ScalarValue::IntervalYearMonth(_) => { DataType::Interval(IntervalUnit::YearMonth) } @@ -1120,7 +1144,10 @@ impl ScalarValue { ScalarValue::List(v, _) => v.is_none(), ScalarValue::Date32(v) => v.is_none(), ScalarValue::Date64(v) => v.is_none(), - ScalarValue::Time64(v) => v.is_none(), + ScalarValue::Time32Second(v) => v.is_none(), + ScalarValue::Time32Millisecond(v) => v.is_none(), + ScalarValue::Time64Microsecond(v) => v.is_none(), + ScalarValue::Time64Nanosecond(v) => v.is_none(), ScalarValue::TimestampSecond(v, _) => v.is_none(), ScalarValue::TimestampMillisecond(v, _) => v.is_none(), ScalarValue::TimestampMicrosecond(v, _) => v.is_none(), @@ -1380,8 +1407,17 @@ impl ScalarValue { DataType::LargeBinary => build_array_string!(LargeBinaryArray, LargeBinary), DataType::Date32 => build_array_primitive!(Date32Array, Date32), DataType::Date64 => build_array_primitive!(Date64Array, Date64), + DataType::Time32(TimeUnit::Second) => { + build_array_primitive!(Time32SecondArray, Time32Second) + } + DataType::Time32(TimeUnit::Millisecond) => { + build_array_primitive!(Time32MillisecondArray, Time32Millisecond) + } + DataType::Time64(TimeUnit::Microsecond) => { + build_array_primitive!(Time64MicrosecondArray, Time64Microsecond) + } DataType::Time64(TimeUnit::Nanosecond) => { - build_array_primitive!(Time64NanosecondArray, Time64) + build_array_primitive!(Time64NanosecondArray, Time64Nanosecond) } DataType::Timestamp(TimeUnit::Second, _) => { build_array_primitive_tz!(TimestampSecondArray, TimestampSecond) @@ -1541,10 +1577,15 @@ impl ScalarValue { Arc::new(array) } // explicitly enumerate unsupported types so newly added - // types must be aknowledged + // types must be aknowledged, Time32 and Time64 types are + // not supported if the TimeUnit is not valid (Time32 can + // only be used with Second and Millisecond, Time64 only + // with Microsecond and Nanosecond) DataType::Float16 - | DataType::Time32(_) - | DataType::Time64(_) + | DataType::Time32(TimeUnit::Microsecond) + | DataType::Time32(TimeUnit::Nanosecond) + | DataType::Time64(TimeUnit::Second) + | DataType::Time64(TimeUnit::Millisecond) | DataType::Duration(_) | DataType::FixedSizeList(_, _) | DataType::Interval(_) @@ -1809,7 +1850,34 @@ impl ScalarValue { ScalarValue::Date64(e) => { build_array_from_option!(Date64, Date64Array, e, size) } - ScalarValue::Time64(e) => { + ScalarValue::Time32Second(e) => { + build_array_from_option!( + Time32, + TimeUnit::Second, + Time32SecondArray, + e, + size + ) + } + ScalarValue::Time32Millisecond(e) => { + build_array_from_option!( + Time32, + TimeUnit::Millisecond, + Time32MillisecondArray, + e, + size + ) + } + ScalarValue::Time64Microsecond(e) => { + build_array_from_option!( + Time64, + TimeUnit::Microsecond, + Time64MicrosecondArray, + e, + size + ) + } + ScalarValue::Time64Nanosecond(e) => { build_array_from_option!( Time64, TimeUnit::Nanosecond, @@ -1953,8 +2021,17 @@ impl ScalarValue { DataType::Date64 => { typed_cast!(array, index, Date64Array, Date64) } + DataType::Time32(TimeUnit::Second) => { + typed_cast!(array, index, Time32SecondArray, Time32Second) + } + DataType::Time32(TimeUnit::Millisecond) => { + typed_cast!(array, index, Time32MillisecondArray, Time32Millisecond) + } + DataType::Time64(TimeUnit::Microsecond) => { + typed_cast!(array, index, Time64MicrosecondArray, Time64Microsecond) + } DataType::Time64(TimeUnit::Nanosecond) => { - typed_cast!(array, index, Time64NanosecondArray, Time64) + typed_cast!(array, index, Time64NanosecondArray, Time64Nanosecond) } DataType::Timestamp(TimeUnit::Second, tz_opt) => { typed_cast_tz!( @@ -2159,7 +2236,16 @@ impl ScalarValue { ScalarValue::Date64(val) => { eq_array_primitive!(array, index, Date64Array, val) } - ScalarValue::Time64(val) => { + ScalarValue::Time32Second(val) => { + eq_array_primitive!(array, index, Time32SecondArray, val) + } + ScalarValue::Time32Millisecond(val) => { + eq_array_primitive!(array, index, Time32MillisecondArray, val) + } + ScalarValue::Time64Microsecond(val) => { + eq_array_primitive!(array, index, Time64MicrosecondArray, val) + } + ScalarValue::Time64Nanosecond(val) => { eq_array_primitive!(array, index, Time64NanosecondArray, val) } ScalarValue::TimestampSecond(val, _) => { @@ -2291,14 +2377,16 @@ macro_rules! impl_try_from { impl_try_from!(Int8, i8); impl_try_from!(Int16, i16); -// special implementation for i32 because of Date32 +// special implementation for i32 because of Date32 and Time32 impl TryFrom for i32 { type Error = DataFusionError; fn try_from(value: ScalarValue) -> Result { match value { ScalarValue::Int32(Some(inner_value)) - | ScalarValue::Date32(Some(inner_value)) => Ok(inner_value), + | ScalarValue::Date32(Some(inner_value)) + | ScalarValue::Time32Second(Some(inner_value)) + | ScalarValue::Time32Millisecond(Some(inner_value)) => Ok(inner_value), _ => Err(DataFusionError::Internal(format!( "Cannot convert {:?} to {}", value, @@ -2308,7 +2396,7 @@ impl TryFrom for i32 { } } -// special implementation for i64 because of TimeNanosecond +// special implementation for i64 because of Date64, Time64 and Timestamp impl TryFrom for i64 { type Error = DataFusionError; @@ -2316,7 +2404,8 @@ impl TryFrom for i64 { match value { ScalarValue::Int64(Some(inner_value)) | ScalarValue::Date64(Some(inner_value)) - | ScalarValue::Time64(Some(inner_value)) + | ScalarValue::Time64Microsecond(Some(inner_value)) + | ScalarValue::Time64Nanosecond(Some(inner_value)) | ScalarValue::TimestampNanosecond(Some(inner_value), _) | ScalarValue::TimestampMicrosecond(Some(inner_value), _) | ScalarValue::TimestampMillisecond(Some(inner_value), _) @@ -2390,7 +2479,14 @@ impl TryFrom<&DataType> for ScalarValue { DataType::LargeBinary => ScalarValue::LargeBinary(None), DataType::Date32 => ScalarValue::Date32(None), DataType::Date64 => ScalarValue::Date64(None), - DataType::Time64(TimeUnit::Nanosecond) => ScalarValue::Time64(None), + DataType::Time32(TimeUnit::Second) => ScalarValue::Time32Second(None), + DataType::Time32(TimeUnit::Millisecond) => { + ScalarValue::Time32Millisecond(None) + } + DataType::Time64(TimeUnit::Microsecond) => { + ScalarValue::Time64Microsecond(None) + } + DataType::Time64(TimeUnit::Nanosecond) => ScalarValue::Time64Nanosecond(None), DataType::Timestamp(TimeUnit::Second, tz_opt) => { ScalarValue::TimestampSecond(None, tz_opt.clone()) } @@ -2511,7 +2607,10 @@ impl fmt::Display for ScalarValue { }, ScalarValue::Date32(e) => format_option!(f, e)?, ScalarValue::Date64(e) => format_option!(f, e)?, - ScalarValue::Time64(e) => format_option!(f, e)?, + ScalarValue::Time32Second(e) => format_option!(f, e)?, + ScalarValue::Time32Millisecond(e) => format_option!(f, e)?, + ScalarValue::Time64Microsecond(e) => format_option!(f, e)?, + ScalarValue::Time64Nanosecond(e) => format_option!(f, e)?, ScalarValue::IntervalDayTime(e) => format_option!(f, e)?, ScalarValue::IntervalYearMonth(e) => format_option!(f, e)?, ScalarValue::IntervalMonthDayNano(e) => format_option!(f, e)?, @@ -2578,7 +2677,16 @@ impl fmt::Debug for ScalarValue { ScalarValue::List(_, _) => write!(f, "List([{}])", self), ScalarValue::Date32(_) => write!(f, "Date32(\"{}\")", self), ScalarValue::Date64(_) => write!(f, "Date64(\"{}\")", self), - ScalarValue::Time64(_) => write!(f, "Time64(\"{}\")", self), + ScalarValue::Time32Second(_) => write!(f, "Time32Second(\"{}\")", self), + ScalarValue::Time32Millisecond(_) => { + write!(f, "Time32Millisecond(\"{}\")", self) + } + ScalarValue::Time64Microsecond(_) => { + write!(f, "Time64Microsecond(\"{}\")", self) + } + ScalarValue::Time64Nanosecond(_) => { + write!(f, "Time64Nanosecond(\"{}\")", self) + } ScalarValue::IntervalDayTime(_) => { write!(f, "IntervalDayTime(\"{}\")", self) } @@ -3207,7 +3315,10 @@ mod tests { make_binary_test_case!(str_vals, LargeBinaryArray, LargeBinary), make_test_case!(i32_vals, Date32Array, Date32), make_test_case!(i64_vals, Date64Array, Date64), - make_test_case!(i64_vals, Time64NanosecondArray, Time64), + make_test_case!(i32_vals, Time32SecondArray, Time32Second), + make_test_case!(i32_vals, Time32MillisecondArray, Time32Millisecond), + make_test_case!(i64_vals, Time64MicrosecondArray, Time64Microsecond), + make_test_case!(i64_vals, Time64NanosecondArray, Time64Nanosecond), make_test_case!(i64_vals, TimestampSecondArray, TimestampSecond, None), make_test_case!( i64_vals, diff --git a/datafusion/core/src/physical_plan/joins/hash_join.rs b/datafusion/core/src/physical_plan/joins/hash_join.rs index 597e5e29844bc..f20219c72d7cf 100644 --- a/datafusion/core/src/physical_plan/joins/hash_join.rs +++ b/datafusion/core/src/physical_plan/joins/hash_join.rs @@ -26,7 +26,7 @@ use arrow::{ Date32Array, Date64Array, Decimal128Array, DictionaryArray, LargeStringArray, PrimitiveArray, TimestampMicrosecondArray, TimestampMillisecondArray, TimestampSecondArray, UInt32BufferBuilder, UInt32Builder, UInt64BufferBuilder, - UInt64Builder, + UInt64Builder, Time32SecondArray, Time32MillisecondArray, Time64MicrosecondArray, Time64NanosecondArray, }, compute, datatypes::{ @@ -1211,6 +1211,34 @@ fn equal_rows( DataType::Date64 => { equal_rows_elem!(Date64Array, l, r, left, right, null_equals_null) } + DataType::Time32(time_unit) => match time_unit { + TimeUnit::Second => { + equal_rows_elem!(Time32SecondArray, l, r, left, right, null_equals_null) + } + TimeUnit::Millisecond => { + equal_rows_elem!(Time32MillisecondArray, l, r, left, right, null_equals_null) + } + _ => { + err = Some(Err(DataFusionError::Internal( + "Unsupported data type in hasher".to_string(), + ))); + false + } + } + DataType::Time64(time_unit) => match time_unit { + TimeUnit::Microsecond => { + equal_rows_elem!(Time64MicrosecondArray, l, r, left, right, null_equals_null) + } + TimeUnit::Nanosecond => { + equal_rows_elem!(Time64NanosecondArray, l, r, left, right, null_equals_null) + } + _ => { + err = Some(Err(DataFusionError::Internal( + "Unsupported data type in hasher".to_string(), + ))); + false + } + } DataType::Timestamp(time_unit, None) => match time_unit { TimeUnit::Second => { equal_rows_elem!( diff --git a/datafusion/core/tests/sql/aggregates.rs b/datafusion/core/tests/sql/aggregates.rs index 4b8a158fb4ff7..f6334a33ad1fe 100644 --- a/datafusion/core/tests/sql/aggregates.rs +++ b/datafusion/core/tests/sql/aggregates.rs @@ -1506,6 +1506,92 @@ async fn aggregate_timestamps_max() -> Result<()> { Ok(()) } +#[tokio::test] +async fn aggregate_times_sum() -> Result<()> { + let ctx = SessionContext::new(); + ctx.register_table("t", table_with_times()).unwrap(); + + let results = plan_and_collect( + &ctx, + "SELECT sum(nanos), sum(micros), sum(millis), sum(secs) FROM t", + ) + .await + .unwrap_err(); + + assert_eq!(results.to_string(), "Error during planning: The function Sum does not support inputs of type Time64(Nanosecond)."); + + Ok(()) +} + +#[tokio::test] +async fn aggregate_times_count() -> Result<()> { + let ctx = SessionContext::new(); + ctx.register_table("t", table_with_times()).unwrap(); + + let results = execute_to_batches( + &ctx, + "SELECT count(nanos), count(micros), count(millis), count(secs) FROM t", + ) + .await; + + let expected = vec![ + "+----------------+-----------------+-----------------+---------------+", + "| COUNT(t.nanos) | COUNT(t.micros) | COUNT(t.millis) | COUNT(t.secs) |", + "+----------------+-----------------+-----------------+---------------+", + "| 4 | 4 | 4 | 4 |", + "+----------------+-----------------+-----------------+---------------+", + ]; + assert_batches_sorted_eq!(expected, &results); + + Ok(()) +} + +#[tokio::test] +async fn aggregate_times_min() -> Result<()> { + let ctx = SessionContext::new(); + ctx.register_table("t", table_with_times()).unwrap(); + + let results = execute_to_batches( + &ctx, + "SELECT min(nanos), min(micros), min(millis), min(secs) FROM t", + ) + .await; + + let expected = vec![ + "+--------------------+-----------------+---------------+-------------+", + "| MIN(t.nanos) | MIN(t.micros) | MIN(t.millis) | MIN(t.secs) |", + "+--------------------+-----------------+---------------+-------------+", + "| 18:06:30.243620451 | 18:06:30.243620 | 18:06:30.243 | 18:06:30 |", + "+--------------------+-----------------+---------------+-------------+", + ]; + assert_batches_sorted_eq!(expected, &results); + + Ok(()) +} + +#[tokio::test] +async fn aggregate_times_max() -> Result<()> { + let ctx = SessionContext::new(); + ctx.register_table("t", table_with_times()).unwrap(); + + let results = execute_to_batches( + &ctx, + "SELECT max(nanos), max(micros), max(millis), max(secs) FROM t", + ) + .await; + + let expected = vec![ + "+--------------------+-----------------+---------------+-------------+", + "| MAX(t.nanos) | MAX(t.micros) | MAX(t.millis) | MAX(t.secs) |", + "+--------------------+-----------------+---------------+-------------+", + "| 21:06:28.247821084 | 21:06:28.247821 | 21:06:28.247 | 21:06:28 |", + "+--------------------+-----------------+---------------+-------------+", + ]; + assert_batches_sorted_eq!(expected, &results); + + Ok(()) +} + #[tokio::test] async fn aggregate_timestamps_avg() -> Result<()> { let ctx = SessionContext::new(); diff --git a/datafusion/core/tests/sql/group_by.rs b/datafusion/core/tests/sql/group_by.rs index 56044862cdb92..fffcc356d4335 100644 --- a/datafusion/core/tests/sql/group_by.rs +++ b/datafusion/core/tests/sql/group_by.rs @@ -485,6 +485,186 @@ async fn csv_group_by_date() -> Result<()> { Ok(()) } +#[tokio::test] +async fn csv_group_by_time32second() -> Result<()> { + let ctx = SessionContext::new(); + let schema = Arc::new(Schema::new(vec![ + Field::new("time", DataType::Time32(TimeUnit::Second), false), + Field::new("cnt", DataType::Int32, false), + ])); + let data = RecordBatch::try_new( + schema.clone(), + vec![ + Arc::new(Time32SecondArray::from(vec![ + Some(5_000), + Some(5_000), + Some(5_500), + Some(5_500), + Some(5_900), + Some(5_900), + ])), + Arc::new(Int32Array::from(vec![ + Some(1), + Some(1), + Some(1), + Some(2), + Some(1), + Some(3), + ])), + ], + )?; + + ctx.register_batch("times", data)?; + let sql = "SELECT SUM(cnt) FROM times GROUP BY time"; + let actual = execute_to_batches(&ctx, sql).await; + let expected = vec![ + "+----------------+", + "| SUM(times.cnt) |", + "+----------------+", + "| 2 |", + "| 3 |", + "| 4 |", + "+----------------+", + ]; + assert_batches_sorted_eq!(expected, &actual); + Ok(()) +} + +#[tokio::test] +async fn csv_group_by_time32millisecond() -> Result<()> { + let ctx = SessionContext::new(); + let schema = Arc::new(Schema::new(vec![ + Field::new("time", DataType::Time32(TimeUnit::Millisecond), false), + Field::new("cnt", DataType::Int32, false), + ])); + let data = RecordBatch::try_new( + schema.clone(), + vec![ + Arc::new(Time32MillisecondArray::from(vec![ + Some(5_000_000), + Some(5_000_000), + Some(5_500_000), + Some(5_500_000), + Some(5_900_000), + Some(5_900_000), + ])), + Arc::new(Int32Array::from(vec![ + Some(1), + Some(1), + Some(1), + Some(2), + Some(1), + Some(3), + ])), + ], + )?; + + ctx.register_batch("times", data)?; + let sql = "SELECT SUM(cnt) FROM times GROUP BY time"; + let actual = execute_to_batches(&ctx, sql).await; + let expected = vec![ + "+----------------+", + "| SUM(times.cnt) |", + "+----------------+", + "| 2 |", + "| 3 |", + "| 4 |", + "+----------------+", + ]; + assert_batches_sorted_eq!(expected, &actual); + Ok(()) +} + +#[tokio::test] +async fn csv_group_by_time64microsecond() -> Result<()> { + let ctx = SessionContext::new(); + let schema = Arc::new(Schema::new(vec![ + Field::new("time", DataType::Time64(TimeUnit::Microsecond), false), + Field::new("cnt", DataType::Int64, false), + ])); + let data = RecordBatch::try_new( + schema.clone(), + vec![ + Arc::new(Time64MicrosecondArray::from(vec![ + Some(5_000_000_000), + Some(5_000_000_000), + Some(5_500_000_000), + Some(5_500_000_000), + Some(5_900_000_000), + Some(5_900_000_000), + ])), + Arc::new(Int64Array::from(vec![ + Some(1), + Some(1), + Some(1), + Some(2), + Some(1), + Some(3), + ])), + ], + )?; + + ctx.register_batch("times", data)?; + let sql = "SELECT SUM(cnt) FROM times GROUP BY time"; + let actual = execute_to_batches(&ctx, sql).await; + let expected = vec![ + "+----------------+", + "| SUM(times.cnt) |", + "+----------------+", + "| 2 |", + "| 3 |", + "| 4 |", + "+----------------+", + ]; + assert_batches_sorted_eq!(expected, &actual); + Ok(()) +} + +#[tokio::test] +async fn csv_group_by_time64nanosecond() -> Result<()> { + let ctx = SessionContext::new(); + let schema = Arc::new(Schema::new(vec![ + Field::new("time", DataType::Time64(TimeUnit::Nanosecond), false), + Field::new("cnt", DataType::Int64, false), + ])); + let data = RecordBatch::try_new( + schema.clone(), + vec![ + Arc::new(Time64NanosecondArray::from(vec![ + Some(5_000_000_000_000), + Some(5_000_000_000_000), + Some(5_500_000_000_000), + Some(5_500_000_000_000), + Some(5_900_000_000_000), + Some(5_900_000_000_000), + ])), + Arc::new(Int64Array::from(vec![ + Some(1), + Some(1), + Some(1), + Some(2), + Some(1), + Some(3), + ])), + ], + )?; + + ctx.register_batch("times", data)?; + let sql = "SELECT SUM(cnt) FROM times GROUP BY time"; + let actual = execute_to_batches(&ctx, sql).await; + let expected = vec![ + "+----------------+", + "| SUM(times.cnt) |", + "+----------------+", + "| 2 |", + "| 3 |", + "| 4 |", + "+----------------+", + ]; + assert_batches_sorted_eq!(expected, &actual); + Ok(()) +} + #[tokio::test] async fn group_by_date_trunc() -> Result<()> { let tmp_dir = TempDir::new()?; diff --git a/datafusion/core/tests/sql/mod.rs b/datafusion/core/tests/sql/mod.rs index 20d5371f1ce07..41364f764c1a3 100644 --- a/datafusion/core/tests/sql/mod.rs +++ b/datafusion/core/tests/sql/mod.rs @@ -1184,6 +1184,99 @@ pub fn make_timestamps() -> RecordBatch { .unwrap() } +/// Return a new table provider containing all of the supported timestamp types +pub fn table_with_times() -> Arc { + let batch = make_times(); + let schema = batch.schema(); + let partitions = vec![vec![batch]]; + Arc::new(MemTable::try_new(schema, partitions).unwrap()) +} + +/// Return record batch with all of the supported time types +/// values +/// +/// Columns are named: +/// "nanos" --> Time64NanosecondArray +/// "micros" --> Time64MicrosecondArray +/// "millis" --> Time32MillisecondArray +/// "secs" --> Time32SecondArray +/// "names" --> StringArray +pub fn make_times() -> RecordBatch { + let ts_strings = vec![ + Some("18:06:30.243620451"), + Some("20:08:28.161121654"), + Some("19:11:04.156423842"), + Some("21:06:28.247821084"), + ]; + + let ts_nanos = ts_strings + .into_iter() + .map(|t| { + t.map(|t| { + let integer_sec = t + .parse::() + .unwrap() + .num_seconds_from_midnight() as i64; + let extra_nano = + t.parse::().unwrap().nanosecond() as i64; + // Total time in nanoseconds given by integer number of seconds multiplied by 10^9 + // plus number of nanoseconds corresponding to the extra fraction of second + integer_sec * 1_000_000_000 + extra_nano + }) + }) + .collect::>(); + + let ts_micros = ts_nanos + .iter() + .map(|t| t.as_ref().map(|ts_nanos| ts_nanos / 1000)) + .collect::>(); + + let ts_millis = ts_nanos + .iter() + .map(|t| t.as_ref().map(|ts_nanos| { ts_nanos / 1000000 } as i32)) + .collect::>(); + + let ts_secs = ts_nanos + .iter() + .map(|t| t.as_ref().map(|ts_nanos| { ts_nanos / 1000000000 } as i32)) + .collect::>(); + + let names = ts_nanos + .iter() + .enumerate() + .map(|(i, _)| format!("Row {}", i)) + .collect::>(); + + let arr_nanos = Time64NanosecondArray::from(ts_nanos); + let arr_micros = Time64MicrosecondArray::from(ts_micros); + let arr_millis = Time32MillisecondArray::from(ts_millis); + let arr_secs = Time32SecondArray::from(ts_secs); + + let names = names.iter().map(|s| s.as_str()).collect::>(); + let arr_names = StringArray::from(names); + + let schema = Schema::new(vec![ + Field::new("nanos", arr_nanos.data_type().clone(), true), + Field::new("micros", arr_micros.data_type().clone(), true), + Field::new("millis", arr_millis.data_type().clone(), true), + Field::new("secs", arr_secs.data_type().clone(), true), + Field::new("name", arr_names.data_type().clone(), true), + ]); + let schema = Arc::new(schema); + + RecordBatch::try_new( + schema, + vec![ + Arc::new(arr_nanos), + Arc::new(arr_micros), + Arc::new(arr_millis), + Arc::new(arr_secs), + Arc::new(arr_names), + ], + ) + .unwrap() +} + #[tokio::test] async fn nyc() -> Result<()> { // schema for nyxtaxi csv files diff --git a/datafusion/core/tests/sql/select.rs b/datafusion/core/tests/sql/select.rs index c3890fac40617..6d7014507bb09 100644 --- a/datafusion/core/tests/sql/select.rs +++ b/datafusion/core/tests/sql/select.rs @@ -909,6 +909,182 @@ async fn query_on_string_dictionary() -> Result<()> { Ok(()) } +#[tokio::test] +async fn filter_with_time32second() -> Result<()> { + let ctx = SessionContext::new(); + let schema = Arc::new(Schema::new(vec![ + Field::new("time", DataType::Time32(TimeUnit::Second), false), + Field::new("value", DataType::Int64, false), + ])); + let data = RecordBatch::try_new( + schema.clone(), + vec![ + Arc::new(Time32SecondArray::from(vec![ + Some(5_000), + Some(5_000), + Some(5_500), + Some(5_500), + Some(5_900), + Some(5_900), + ])), + Arc::new(Int64Array::from(vec![ + Some(2505), + Some(2436), + Some(2384), + Some(1815), + Some(2330), + Some(2065), + ])), + ], + )?; + + ctx.register_batch("temporal", data)?; + let sql = "SELECT value FROM temporal WHERE time = '01:23:20'"; + let actual = execute_to_batches(&ctx, sql).await; + let expected = vec![ + "+-------+", + "| value |", + "+-------+", + "| 2436 |", + "| 2505 |", + "+-------+", + ]; + assert_batches_sorted_eq!(expected, &actual); + Ok(()) +} + +#[tokio::test] +async fn filter_with_time32millisecond() -> Result<()> { + let ctx = SessionContext::new(); + let schema = Arc::new(Schema::new(vec![ + Field::new("time", DataType::Time32(TimeUnit::Millisecond), false), + Field::new("value", DataType::Int64, false), + ])); + let data = RecordBatch::try_new( + schema.clone(), + vec![ + Arc::new(Time32MillisecondArray::from(vec![ + Some(5_000_000), + Some(5_000_000), + Some(5_500_000), + Some(5_500_000), + Some(5_900_000), + Some(5_900_000), + ])), + Arc::new(Int64Array::from(vec![ + Some(2505), + Some(2436), + Some(2384), + Some(1815), + Some(2330), + Some(2065), + ])), + ], + )?; + + ctx.register_batch("temporal", data)?; + let sql = "SELECT value FROM temporal WHERE time = '01:23:20'"; + let actual = execute_to_batches(&ctx, sql).await; + let expected = vec![ + "+-------+", + "| value |", + "+-------+", + "| 2436 |", + "| 2505 |", + "+-------+", + ]; + assert_batches_sorted_eq!(expected, &actual); + Ok(()) +} + +#[tokio::test] +async fn filter_with_time64microsecond() -> Result<()> { + let ctx = SessionContext::new(); + let schema = Arc::new(Schema::new(vec![ + Field::new("time", DataType::Time64(TimeUnit::Microsecond), false), + Field::new("value", DataType::Int64, false), + ])); + let data = RecordBatch::try_new( + schema.clone(), + vec![ + Arc::new(Time64MicrosecondArray::from(vec![ + Some(5_000_000_000), + Some(5_000_000_000), + Some(5_500_000_000), + Some(5_500_000_000), + Some(5_900_000_000), + Some(5_900_000_000), + ])), + Arc::new(Int64Array::from(vec![ + Some(2505), + Some(2436), + Some(2384), + Some(1815), + Some(2330), + Some(2065), + ])), + ], + )?; + + ctx.register_batch("temporal", data)?; + let sql = "SELECT value FROM temporal WHERE time = '01:23:20'"; + let actual = execute_to_batches(&ctx, sql).await; + let expected = vec![ + "+-------+", + "| value |", + "+-------+", + "| 2436 |", + "| 2505 |", + "+-------+", + ]; + assert_batches_sorted_eq!(expected, &actual); + Ok(()) +} + +#[tokio::test] +async fn filter_with_time64nanosecond() -> Result<()> { + let ctx = SessionContext::new(); + let schema = Arc::new(Schema::new(vec![ + Field::new("time", DataType::Time64(TimeUnit::Nanosecond), false), + Field::new("value", DataType::Int64, false), + ])); + let data = RecordBatch::try_new( + schema.clone(), + vec![ + Arc::new(Time64NanosecondArray::from(vec![ + Some(5_000_000_000_000), + Some(5_000_000_000_000), + Some(5_500_000_000_000), + Some(5_500_000_000_000), + Some(5_900_000_000_000), + Some(5_900_000_000_000), + ])), + Arc::new(Int64Array::from(vec![ + Some(2505), + Some(2436), + Some(2384), + Some(1815), + Some(2330), + Some(2065), + ])), + ], + )?; + + ctx.register_batch("temporal", data)?; + let sql = "SELECT value FROM temporal WHERE time = '01:23:20'"; + let actual = execute_to_batches(&ctx, sql).await; + let expected = vec![ + "+-------+", + "| value |", + "+-------+", + "| 2436 |", + "| 2505 |", + "+-------+", + ]; + assert_batches_sorted_eq!(expected, &actual); + Ok(()) +} + #[tokio::test] async fn query_cte_with_alias() -> Result<()> { let ctx = SessionContext::new(); diff --git a/datafusion/core/tests/sql/timestamp.rs b/datafusion/core/tests/sql/timestamp.rs index dec04f6532d28..7fb8e109f575c 100644 --- a/datafusion/core/tests/sql/timestamp.rs +++ b/datafusion/core/tests/sql/timestamp.rs @@ -890,7 +890,7 @@ async fn group_by_timestamp_millis() -> Result<()> { ), Field::new("count", DataType::Int32, false), ])); - let base_dt = Utc.with_ymd_and_hms(2018, 7, 1, 6, 0, 0).unwrap(); // 2018-Jul-01 06:00 + let base_dt = Utc.ymd(2018, 7, 1).and_hms(6, 0, 0); // 2018-Jul-01 06:00 let hour1 = Duration::hours(1); let timestamps = vec![ base_dt.timestamp_millis(), diff --git a/datafusion/expr/src/type_coercion/binary.rs b/datafusion/expr/src/type_coercion/binary.rs index 45510cb03e0ed..63bc24d0ef16e 100644 --- a/datafusion/expr/src/type_coercion/binary.rs +++ b/datafusion/expr/src/type_coercion/binary.rs @@ -20,7 +20,9 @@ use crate::type_coercion::is_numeric; use crate::Operator; use arrow::compute::can_cast_types; -use arrow::datatypes::{DataType, DECIMAL128_MAX_PRECISION, DECIMAL128_MAX_SCALE}; +use arrow::datatypes::{ + DataType, TimeUnit, DECIMAL128_MAX_PRECISION, DECIMAL128_MAX_SCALE, +}; use datafusion_common::DataFusionError; use datafusion_common::Result; @@ -513,11 +515,23 @@ fn like_coercion(lhs_type: &DataType, rhs_type: &DataType) -> Option { .or_else(|| null_coercion(lhs_type, rhs_type)) } +/// Checks if the TimeUnit associated with a Time32 or Time64 type is consistent, +/// as Time32 can only be used to Second and Millisecond accuracy, while Time64 +/// is exclusively used to Microsecond and Nanosecond accuracy +fn is_time_with_valid_unit(datatype: DataType) -> bool { + match datatype { + DataType::Time32(TimeUnit::Second) + | DataType::Time32(TimeUnit::Millisecond) + | DataType::Time64(TimeUnit::Microsecond) + | DataType::Time64(TimeUnit::Nanosecond) => true, + _ => false, + } +} + /// Coercion rules for Temporal columns: the type that both lhs and rhs can be /// casted to for the purpose of a date computation fn temporal_coercion(lhs_type: &DataType, rhs_type: &DataType) -> Option { use arrow::datatypes::DataType::*; - use arrow::datatypes::TimeUnit; match (lhs_type, rhs_type) { (Date64, Date32) => Some(Date64), (Date32, Date64) => Some(Date64), @@ -525,6 +539,22 @@ fn temporal_coercion(lhs_type: &DataType, rhs_type: &DataType) -> Option Some(Date32), (Utf8, Date64) => Some(Date64), (Date64, Utf8) => Some(Date64), + (Utf8, Time32(unit)) => match is_time_with_valid_unit(Time32(unit.clone())) { + false => None, + true => Some(Time32(unit.clone())), + }, + (Time32(unit), Utf8) => match is_time_with_valid_unit(Time32(unit.clone())) { + false => None, + true => Some(Time32(unit.clone())), + }, + (Utf8, Time64(unit)) => match is_time_with_valid_unit(Time64(unit.clone())) { + false => None, + true => Some(Time64(unit.clone())), + }, + (Time64(unit), Utf8) => match is_time_with_valid_unit(Time64(unit.clone())) { + false => None, + true => Some(Time64(unit.clone())), + }, (Timestamp(lhs_unit, lhs_tz), Timestamp(rhs_unit, rhs_tz)) => { let tz = match (lhs_tz, rhs_tz) { // can't cast across timezones @@ -826,6 +856,30 @@ mod tests { Operator::Lt, DataType::Date64 ); + test_coercion_binary_rule!( + DataType::Utf8, + DataType::Time32(TimeUnit::Second), + Operator::Eq, + DataType::Time32(TimeUnit::Second) + ); + test_coercion_binary_rule!( + DataType::Utf8, + DataType::Time32(TimeUnit::Millisecond), + Operator::Eq, + DataType::Time32(TimeUnit::Millisecond) + ); + test_coercion_binary_rule!( + DataType::Utf8, + DataType::Time64(TimeUnit::Microsecond), + Operator::Eq, + DataType::Time64(TimeUnit::Microsecond) + ); + test_coercion_binary_rule!( + DataType::Utf8, + DataType::Time64(TimeUnit::Nanosecond), + Operator::Eq, + DataType::Time64(TimeUnit::Nanosecond) + ); test_coercion_binary_rule!( DataType::Utf8, DataType::Utf8, diff --git a/datafusion/physical-expr/src/aggregate/min_max.rs b/datafusion/physical-expr/src/aggregate/min_max.rs index c415e7db51644..2d6961bfcb880 100644 --- a/datafusion/physical-expr/src/aggregate/min_max.rs +++ b/datafusion/physical-expr/src/aggregate/min_max.rs @@ -28,6 +28,7 @@ use arrow::{ array::{ ArrayRef, Date32Array, Date64Array, Float32Array, Float64Array, Int16Array, Int32Array, Int64Array, Int8Array, LargeStringArray, StringArray, + Time32MillisecondArray, Time32SecondArray, Time64MicrosecondArray, Time64NanosecondArray, TimestampMicrosecondArray, TimestampMillisecondArray, TimestampNanosecondArray, TimestampSecondArray, UInt16Array, UInt32Array, UInt64Array, UInt8Array, @@ -251,8 +252,32 @@ macro_rules! min_max_batch { ), DataType::Date32 => typed_min_max_batch!($VALUES, Date32Array, Date32, $OP), DataType::Date64 => typed_min_max_batch!($VALUES, Date64Array, Date64, $OP), + DataType::Time32(TimeUnit::Second) => { + typed_min_max_batch!($VALUES, Time32SecondArray, Time32Second, $OP) + } + DataType::Time32(TimeUnit::Millisecond) => { + typed_min_max_batch!( + $VALUES, + Time32MillisecondArray, + Time32Millisecond, + $OP + ) + } + DataType::Time64(TimeUnit::Microsecond) => { + typed_min_max_batch!( + $VALUES, + Time64MicrosecondArray, + Time64Microsecond, + $OP + ) + } DataType::Time64(TimeUnit::Nanosecond) => { - typed_min_max_batch!($VALUES, Time64NanosecondArray, Time64, $OP) + typed_min_max_batch!( + $VALUES, + Time64NanosecondArray, + Time64Nanosecond, + $OP + ) } other => { // This should have been handled before @@ -417,10 +442,28 @@ macro_rules! min_max { typed_min_max!(lhs, rhs, Date64, $OP) } ( - ScalarValue::Time64(lhs), - ScalarValue::Time64(rhs), + ScalarValue::Time32Second(lhs), + ScalarValue::Time32Second(rhs), + ) => { + typed_min_max!(lhs, rhs, Time32Second, $OP) + } + ( + ScalarValue::Time32Millisecond(lhs), + ScalarValue::Time32Millisecond(rhs), ) => { - typed_min_max!(lhs, rhs, Time64, $OP) + typed_min_max!(lhs, rhs, Time32Millisecond, $OP) + } + ( + ScalarValue::Time64Microsecond(lhs), + ScalarValue::Time64Microsecond(rhs), + ) => { + typed_min_max!(lhs, rhs, Time64Microsecond, $OP) + } + ( + ScalarValue::Time64Nanosecond(lhs), + ScalarValue::Time64Nanosecond(rhs), + ) => { + typed_min_max!(lhs, rhs, Time64Nanosecond, $OP) } e => { return Err(DataFusionError::Internal(format!( @@ -1073,24 +1116,90 @@ mod tests { } #[test] - fn min_time64() -> Result<()> { + fn min_time32second() -> Result<()> { + let a: ArrayRef = Arc::new(Time32SecondArray::from(vec![1, 2, 3, 4, 5])); + generic_test_op!( + a, + DataType::Time32(TimeUnit::Second), + Min, + ScalarValue::Time32Second(Some(1)) + ) + } + + #[test] + fn max_time32second() -> Result<()> { + let a: ArrayRef = Arc::new(Time32SecondArray::from(vec![1, 2, 3, 4, 5])); + generic_test_op!( + a, + DataType::Time32(TimeUnit::Second), + Max, + ScalarValue::Time32Second(Some(5)) + ) + } + + #[test] + fn min_time32millisecond() -> Result<()> { + let a: ArrayRef = Arc::new(Time32MillisecondArray::from(vec![1, 2, 3, 4, 5])); + generic_test_op!( + a, + DataType::Time32(TimeUnit::Millisecond), + Min, + ScalarValue::Time32Millisecond(Some(1)) + ) + } + + #[test] + fn max_time32millisecond() -> Result<()> { + let a: ArrayRef = Arc::new(Time32MillisecondArray::from(vec![1, 2, 3, 4, 5])); + generic_test_op!( + a, + DataType::Time32(TimeUnit::Millisecond), + Max, + ScalarValue::Time32Millisecond(Some(5)) + ) + } + + #[test] + fn min_time64microsecond() -> Result<()> { + let a: ArrayRef = Arc::new(Time64MicrosecondArray::from(vec![1, 2, 3, 4, 5])); + generic_test_op!( + a, + DataType::Time64(TimeUnit::Microsecond), + Min, + ScalarValue::Time64Microsecond(Some(1)) + ) + } + + #[test] + fn max_time64microsecond() -> Result<()> { + let a: ArrayRef = Arc::new(Time64MicrosecondArray::from(vec![1, 2, 3, 4, 5])); + generic_test_op!( + a, + DataType::Time64(TimeUnit::Microsecond), + Max, + ScalarValue::Time64Microsecond(Some(5)) + ) + } + + #[test] + fn min_time64nanosecond() -> Result<()> { let a: ArrayRef = Arc::new(Time64NanosecondArray::from(vec![1, 2, 3, 4, 5])); generic_test_op!( a, DataType::Time64(TimeUnit::Nanosecond), - Max, - ScalarValue::Time64(Some(5)) + Min, + ScalarValue::Time64Nanosecond(Some(1)) ) } #[test] - fn max_time64() -> Result<()> { + fn max_time64nanosecond() -> Result<()> { let a: ArrayRef = Arc::new(Time64NanosecondArray::from(vec![1, 2, 3, 4, 5])); generic_test_op!( a, DataType::Time64(TimeUnit::Nanosecond), Max, - ScalarValue::Time64(Some(5)) + ScalarValue::Time64Nanosecond(Some(5)) ) } } diff --git a/datafusion/physical-expr/src/datetime_expressions.rs b/datafusion/physical-expr/src/datetime_expressions.rs index 0a76edde5912d..02d9b9e826cf2 100644 --- a/datafusion/physical-expr/src/datetime_expressions.rs +++ b/datafusion/physical-expr/src/datetime_expressions.rs @@ -213,7 +213,7 @@ pub fn make_current_time( now_ts: DateTime, ) -> impl Fn(&[ColumnarValue]) -> Result { let nano = Some(now_ts.timestamp_nanos() % 86400000000000); - move |_arg| Ok(ColumnarValue::Scalar(ScalarValue::Time64(nano))) + move |_arg| Ok(ColumnarValue::Scalar(ScalarValue::Time64Nanosecond(nano))) } fn quarter_month(date: &NaiveDateTime) -> u32 { diff --git a/datafusion/physical-expr/src/expressions/binary.rs b/datafusion/physical-expr/src/expressions/binary.rs index a4904053086e1..b6531ad2452bf 100644 --- a/datafusion/physical-expr/src/expressions/binary.rs +++ b/datafusion/physical-expr/src/expressions/binary.rs @@ -460,6 +460,18 @@ macro_rules! binary_array_op { DataType::Date64 => { compute_op!($LEFT, $RIGHT, $OP, Date64Array) } + DataType::Time32(TimeUnit::Second) => { + compute_op!($LEFT, $RIGHT, $OP, Time32SecondArray) + } + DataType::Time32(TimeUnit::Millisecond) => { + compute_op!($LEFT, $RIGHT, $OP, Time32MillisecondArray) + } + DataType::Time64(TimeUnit::Microsecond) => { + compute_op!($LEFT, $RIGHT, $OP, Time64MicrosecondArray) + } + DataType::Time64(TimeUnit::Nanosecond) => { + compute_op!($LEFT, $RIGHT, $OP, Time64NanosecondArray) + } DataType::Boolean => compute_bool_op!($LEFT, $RIGHT, $OP, BooleanArray), other => Err(DataFusionError::Internal(format!( "Data type {:?} not supported for binary operation '{}' on dyn arrays", @@ -814,6 +826,10 @@ macro_rules! binary_array_op_dyn_scalar { ScalarValue::Float64(v) => compute_op_dyn_scalar!($LEFT, v, $OP, $OP_TYPE), ScalarValue::Date32(_) => compute_op_scalar!($LEFT, right, $OP, Date32Array), ScalarValue::Date64(_) => compute_op_scalar!($LEFT, right, $OP, Date64Array), + ScalarValue::Time32Second(_) => compute_op_scalar!($LEFT, right, $OP, Time32SecondArray), + ScalarValue::Time32Millisecond(_) => compute_op_scalar!($LEFT, right, $OP, Time32MillisecondArray), + ScalarValue::Time64Microsecond(_) => compute_op_scalar!($LEFT, right, $OP, Time64MicrosecondArray), + ScalarValue::Time64Nanosecond(_) => compute_op_scalar!($LEFT, right, $OP, Time64NanosecondArray), ScalarValue::TimestampSecond(..) => compute_op_scalar!($LEFT, right, $OP, TimestampSecondArray), ScalarValue::TimestampMillisecond(..) => compute_op_scalar!($LEFT, right, $OP, TimestampMillisecondArray), ScalarValue::TimestampMicrosecond(..) => compute_op_scalar!($LEFT, right, $OP, TimestampMicrosecondArray), diff --git a/datafusion/proto/proto/datafusion.proto b/datafusion/proto/proto/datafusion.proto index de5f3749d7f86..1911c59dfd89b 100644 --- a/datafusion/proto/proto/datafusion.proto +++ b/datafusion/proto/proto/datafusion.proto @@ -745,6 +745,20 @@ message ScalarListValue{ repeated ScalarValue values = 2; } +message ScalarTime32Value { + oneof value { + int32 time32_second_value = 1; + int32 time32_millisecond_value = 2; + }; +} + +message ScalarTime64Value { + oneof value { + int64 time64_microsecond_value = 1; + int64 time64_nanosecond_value = 2; + }; +} + message ScalarTimestampValue { oneof value { int64 time_microsecond_value = 1; @@ -803,6 +817,7 @@ message ScalarValue{ double float64_value = 13; // Literal Date32 value always has a unit of day int32 date_32_value = 14; + ScalarTime32Value time32_value = 15; ScalarListValue list_value = 17; //WAS: ScalarType null_list_value = 18; @@ -814,7 +829,7 @@ message ScalarValue{ ScalarDictionaryValue dictionary_value = 27; bytes binary_value = 28; bytes large_binary_value = 29; - int64 time64_value = 30; + ScalarTime64Value time64_value = 30; IntervalMonthDayNanoValue interval_month_day_nano = 31; StructValue struct_value = 32; ScalarFixedSizeBinary fixed_size_binary_value = 34; diff --git a/datafusion/proto/src/from_proto.rs b/datafusion/proto/src/from_proto.rs index 8f6377739470b..1de84ad94aa77 100644 --- a/datafusion/proto/src/from_proto.rs +++ b/datafusion/proto/src/from_proto.rs @@ -583,7 +583,30 @@ impl TryFrom<&protobuf::ScalarValue> for ScalarValue { ) } Value::Date64Value(v) => Self::Date64(Some(*v)), - Value::Time64Value(v) => Self::Time64(Some(*v)), + Value::Time32Value(v) => { + let time_value = + v.value.as_ref().ok_or_else(|| Error::required("value"))?; + match time_value { + protobuf::scalar_time32_value::Value::Time32SecondValue(t) => { + Self::Time32Second(Some(*t)) + } + protobuf::scalar_time32_value::Value::Time32MillisecondValue(t) => { + Self::Time32Millisecond(Some(*t)) + } + } + } + Value::Time64Value(v) => { + let time_value = + v.value.as_ref().ok_or_else(|| Error::required("value"))?; + match time_value { + protobuf::scalar_time64_value::Value::Time64MicrosecondValue(t) => { + Self::Time64Microsecond(Some(*t)) + } + protobuf::scalar_time64_value::Value::Time64NanosecondValue(t) => { + Self::Time64Nanosecond(Some(*t)) + } + } + } Value::IntervalYearmonthValue(v) => Self::IntervalYearMonth(Some(*v)), Value::IntervalDaytimeValue(v) => Self::IntervalDayTime(Some(*v)), Value::TimestampValue(v) => { diff --git a/datafusion/proto/src/generated/datafusion.rs b/datafusion/proto/src/generated/datafusion.rs new file mode 100644 index 0000000000000..aeb2daae76101 --- /dev/null +++ b/datafusion/proto/src/generated/datafusion.rs @@ -0,0 +1,1613 @@ +#[derive(Clone, PartialEq, ::prost::Message)] +pub struct ColumnRelation { + #[prost(string, tag="1")] + pub relation: ::prost::alloc::string::String, +} +#[derive(Clone, PartialEq, ::prost::Message)] +pub struct Column { + #[prost(string, tag="1")] + pub name: ::prost::alloc::string::String, + #[prost(message, optional, tag="2")] + pub relation: ::core::option::Option, +} +#[derive(Clone, PartialEq, ::prost::Message)] +pub struct DfField { + #[prost(message, optional, tag="1")] + pub field: ::core::option::Option, + #[prost(message, optional, tag="2")] + pub qualifier: ::core::option::Option, +} +#[derive(Clone, PartialEq, ::prost::Message)] +pub struct DfSchema { + #[prost(message, repeated, tag="1")] + pub columns: ::prost::alloc::vec::Vec, + #[prost(map="string, string", tag="2")] + pub metadata: ::std::collections::HashMap<::prost::alloc::string::String, ::prost::alloc::string::String>, +} +/// logical plan +/// LogicalPlan is a nested type +#[derive(Clone, PartialEq, ::prost::Message)] +pub struct LogicalPlanNode { + #[prost(oneof="logical_plan_node::LogicalPlanType", tags="1, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23")] + pub logical_plan_type: ::core::option::Option, +} +/// Nested message and enum types in `LogicalPlanNode`. +pub mod logical_plan_node { + #[derive(Clone, PartialEq, ::prost::Oneof)] + pub enum LogicalPlanType { + #[prost(message, tag="1")] + ListingScan(super::ListingTableScanNode), + #[prost(message, tag="3")] + Projection(::prost::alloc::boxed::Box), + #[prost(message, tag="4")] + Selection(::prost::alloc::boxed::Box), + #[prost(message, tag="5")] + Limit(::prost::alloc::boxed::Box), + #[prost(message, tag="6")] + Aggregate(::prost::alloc::boxed::Box), + #[prost(message, tag="7")] + Join(::prost::alloc::boxed::Box), + #[prost(message, tag="8")] + Sort(::prost::alloc::boxed::Box), + #[prost(message, tag="9")] + Repartition(::prost::alloc::boxed::Box), + #[prost(message, tag="10")] + EmptyRelation(super::EmptyRelationNode), + #[prost(message, tag="11")] + CreateExternalTable(super::CreateExternalTableNode), + #[prost(message, tag="12")] + Explain(::prost::alloc::boxed::Box), + #[prost(message, tag="13")] + Window(::prost::alloc::boxed::Box), + #[prost(message, tag="14")] + Analyze(::prost::alloc::boxed::Box), + #[prost(message, tag="15")] + CrossJoin(::prost::alloc::boxed::Box), + #[prost(message, tag="16")] + Values(super::ValuesNode), + #[prost(message, tag="17")] + Extension(super::LogicalExtensionNode), + #[prost(message, tag="18")] + CreateCatalogSchema(super::CreateCatalogSchemaNode), + #[prost(message, tag="19")] + Union(super::UnionNode), + #[prost(message, tag="20")] + CreateCatalog(super::CreateCatalogNode), + #[prost(message, tag="21")] + SubqueryAlias(::prost::alloc::boxed::Box), + #[prost(message, tag="22")] + CreateView(::prost::alloc::boxed::Box), + #[prost(message, tag="23")] + Distinct(::prost::alloc::boxed::Box), + } +} +#[derive(Clone, PartialEq, ::prost::Message)] +pub struct LogicalExtensionNode { + #[prost(bytes="vec", tag="1")] + pub node: ::prost::alloc::vec::Vec, + #[prost(message, repeated, tag="2")] + pub inputs: ::prost::alloc::vec::Vec, +} +#[derive(Clone, PartialEq, ::prost::Message)] +pub struct ProjectionColumns { + #[prost(string, repeated, tag="1")] + pub columns: ::prost::alloc::vec::Vec<::prost::alloc::string::String>, +} +#[derive(Clone, PartialEq, ::prost::Message)] +pub struct CsvFormat { + #[prost(bool, tag="1")] + pub has_header: bool, + #[prost(string, tag="2")] + pub delimiter: ::prost::alloc::string::String, +} +#[derive(Clone, PartialEq, ::prost::Message)] +pub struct ParquetFormat { + #[prost(bool, tag="1")] + pub enable_pruning: bool, +} +#[derive(Clone, PartialEq, ::prost::Message)] +pub struct AvroFormat { +} +#[derive(Clone, PartialEq, ::prost::Message)] +pub struct ListingTableScanNode { + #[prost(string, tag="1")] + pub table_name: ::prost::alloc::string::String, + #[prost(string, repeated, tag="2")] + pub paths: ::prost::alloc::vec::Vec<::prost::alloc::string::String>, + #[prost(string, tag="3")] + pub file_extension: ::prost::alloc::string::String, + #[prost(message, optional, tag="4")] + pub projection: ::core::option::Option, + #[prost(message, optional, tag="5")] + pub schema: ::core::option::Option, + #[prost(message, repeated, tag="6")] + pub filters: ::prost::alloc::vec::Vec, + #[prost(string, repeated, tag="7")] + pub table_partition_cols: ::prost::alloc::vec::Vec<::prost::alloc::string::String>, + #[prost(bool, tag="8")] + pub collect_stat: bool, + #[prost(uint32, tag="9")] + pub target_partitions: u32, + #[prost(oneof="listing_table_scan_node::FileFormatType", tags="10, 11, 12")] + pub file_format_type: ::core::option::Option, +} +/// Nested message and enum types in `ListingTableScanNode`. +pub mod listing_table_scan_node { + #[derive(Clone, PartialEq, ::prost::Oneof)] + pub enum FileFormatType { + #[prost(message, tag="10")] + Csv(super::CsvFormat), + #[prost(message, tag="11")] + Parquet(super::ParquetFormat), + #[prost(message, tag="12")] + Avro(super::AvroFormat), + } +} +#[derive(Clone, PartialEq, ::prost::Message)] +pub struct ProjectionNode { + #[prost(message, optional, boxed, tag="1")] + pub input: ::core::option::Option<::prost::alloc::boxed::Box>, + #[prost(message, repeated, tag="2")] + pub expr: ::prost::alloc::vec::Vec, + #[prost(oneof="projection_node::OptionalAlias", tags="3")] + pub optional_alias: ::core::option::Option, +} +/// Nested message and enum types in `ProjectionNode`. +pub mod projection_node { + #[derive(Clone, PartialEq, ::prost::Oneof)] + pub enum OptionalAlias { + #[prost(string, tag="3")] + Alias(::prost::alloc::string::String), + } +} +#[derive(Clone, PartialEq, ::prost::Message)] +pub struct SelectionNode { + #[prost(message, optional, boxed, tag="1")] + pub input: ::core::option::Option<::prost::alloc::boxed::Box>, + #[prost(message, optional, tag="2")] + pub expr: ::core::option::Option, +} +#[derive(Clone, PartialEq, ::prost::Message)] +pub struct SortNode { + #[prost(message, optional, boxed, tag="1")] + pub input: ::core::option::Option<::prost::alloc::boxed::Box>, + #[prost(message, repeated, tag="2")] + pub expr: ::prost::alloc::vec::Vec, + /// Maximum number of highest/lowest rows to fetch; negative means no limit + #[prost(int64, tag="3")] + pub fetch: i64, +} +#[derive(Clone, PartialEq, ::prost::Message)] +pub struct RepartitionNode { + #[prost(message, optional, boxed, tag="1")] + pub input: ::core::option::Option<::prost::alloc::boxed::Box>, + #[prost(oneof="repartition_node::PartitionMethod", tags="2, 3")] + pub partition_method: ::core::option::Option, +} +/// Nested message and enum types in `RepartitionNode`. +pub mod repartition_node { + #[derive(Clone, PartialEq, ::prost::Oneof)] + pub enum PartitionMethod { + #[prost(uint64, tag="2")] + RoundRobin(u64), + #[prost(message, tag="3")] + Hash(super::HashRepartition), + } +} +#[derive(Clone, PartialEq, ::prost::Message)] +pub struct HashRepartition { + #[prost(message, repeated, tag="1")] + pub hash_expr: ::prost::alloc::vec::Vec, + #[prost(uint64, tag="2")] + pub partition_count: u64, +} +#[derive(Clone, PartialEq, ::prost::Message)] +pub struct EmptyRelationNode { + #[prost(bool, tag="1")] + pub produce_one_row: bool, +} +#[derive(Clone, PartialEq, ::prost::Message)] +pub struct CreateExternalTableNode { + #[prost(string, tag="1")] + pub name: ::prost::alloc::string::String, + #[prost(string, tag="2")] + pub location: ::prost::alloc::string::String, + #[prost(string, tag="3")] + pub file_type: ::prost::alloc::string::String, + #[prost(bool, tag="4")] + pub has_header: bool, + #[prost(message, optional, tag="5")] + pub schema: ::core::option::Option, + #[prost(string, repeated, tag="6")] + pub table_partition_cols: ::prost::alloc::vec::Vec<::prost::alloc::string::String>, + #[prost(bool, tag="7")] + pub if_not_exists: bool, + #[prost(string, tag="8")] + pub delimiter: ::prost::alloc::string::String, + #[prost(string, tag="9")] + pub definition: ::prost::alloc::string::String, +} +#[derive(Clone, PartialEq, ::prost::Message)] +pub struct CreateCatalogSchemaNode { + #[prost(string, tag="1")] + pub schema_name: ::prost::alloc::string::String, + #[prost(bool, tag="2")] + pub if_not_exists: bool, + #[prost(message, optional, tag="3")] + pub schema: ::core::option::Option, +} +#[derive(Clone, PartialEq, ::prost::Message)] +pub struct CreateCatalogNode { + #[prost(string, tag="1")] + pub catalog_name: ::prost::alloc::string::String, + #[prost(bool, tag="2")] + pub if_not_exists: bool, + #[prost(message, optional, tag="3")] + pub schema: ::core::option::Option, +} +#[derive(Clone, PartialEq, ::prost::Message)] +pub struct CreateViewNode { + #[prost(string, tag="1")] + pub name: ::prost::alloc::string::String, + #[prost(message, optional, boxed, tag="2")] + pub input: ::core::option::Option<::prost::alloc::boxed::Box>, + #[prost(bool, tag="3")] + pub or_replace: bool, + #[prost(string, tag="4")] + pub definition: ::prost::alloc::string::String, +} +/// a node containing data for defining values list. unlike in SQL where it's two dimensional, here +/// the list is flattened, and with the field n_cols it can be parsed and partitioned into rows +#[derive(Clone, PartialEq, ::prost::Message)] +pub struct ValuesNode { + #[prost(uint64, tag="1")] + pub n_cols: u64, + #[prost(message, repeated, tag="2")] + pub values_list: ::prost::alloc::vec::Vec, +} +#[derive(Clone, PartialEq, ::prost::Message)] +pub struct AnalyzeNode { + #[prost(message, optional, boxed, tag="1")] + pub input: ::core::option::Option<::prost::alloc::boxed::Box>, + #[prost(bool, tag="2")] + pub verbose: bool, +} +#[derive(Clone, PartialEq, ::prost::Message)] +pub struct ExplainNode { + #[prost(message, optional, boxed, tag="1")] + pub input: ::core::option::Option<::prost::alloc::boxed::Box>, + #[prost(bool, tag="2")] + pub verbose: bool, +} +#[derive(Clone, PartialEq, ::prost::Message)] +pub struct AggregateNode { + #[prost(message, optional, boxed, tag="1")] + pub input: ::core::option::Option<::prost::alloc::boxed::Box>, + #[prost(message, repeated, tag="2")] + pub group_expr: ::prost::alloc::vec::Vec, + #[prost(message, repeated, tag="3")] + pub aggr_expr: ::prost::alloc::vec::Vec, +} +#[derive(Clone, PartialEq, ::prost::Message)] +pub struct WindowNode { + #[prost(message, optional, boxed, tag="1")] + pub input: ::core::option::Option<::prost::alloc::boxed::Box>, + #[prost(message, repeated, tag="2")] + pub window_expr: ::prost::alloc::vec::Vec, +} +#[derive(Clone, PartialEq, ::prost::Message)] +pub struct JoinNode { + #[prost(message, optional, boxed, tag="1")] + pub left: ::core::option::Option<::prost::alloc::boxed::Box>, + #[prost(message, optional, boxed, tag="2")] + pub right: ::core::option::Option<::prost::alloc::boxed::Box>, + #[prost(enumeration="JoinType", tag="3")] + pub join_type: i32, + #[prost(enumeration="JoinConstraint", tag="4")] + pub join_constraint: i32, + #[prost(message, repeated, tag="5")] + pub left_join_column: ::prost::alloc::vec::Vec, + #[prost(message, repeated, tag="6")] + pub right_join_column: ::prost::alloc::vec::Vec, + #[prost(bool, tag="7")] + pub null_equals_null: bool, + #[prost(message, optional, tag="8")] + pub filter: ::core::option::Option, +} +#[derive(Clone, PartialEq, ::prost::Message)] +pub struct DistinctNode { + #[prost(message, optional, boxed, tag="1")] + pub input: ::core::option::Option<::prost::alloc::boxed::Box>, +} +#[derive(Clone, PartialEq, ::prost::Message)] +pub struct UnionNode { + #[prost(message, repeated, tag="1")] + pub inputs: ::prost::alloc::vec::Vec, +} +#[derive(Clone, PartialEq, ::prost::Message)] +pub struct CrossJoinNode { + #[prost(message, optional, boxed, tag="1")] + pub left: ::core::option::Option<::prost::alloc::boxed::Box>, + #[prost(message, optional, boxed, tag="2")] + pub right: ::core::option::Option<::prost::alloc::boxed::Box>, +} +#[derive(Clone, PartialEq, ::prost::Message)] +pub struct LimitNode { + #[prost(message, optional, boxed, tag="1")] + pub input: ::core::option::Option<::prost::alloc::boxed::Box>, + /// The number of rows to skip before fetch; non-positive means don't skip any + #[prost(int64, tag="2")] + pub skip: i64, + /// Maximum number of rows to fetch; negative means no limit + #[prost(int64, tag="3")] + pub fetch: i64, +} +#[derive(Clone, PartialEq, ::prost::Message)] +pub struct SelectionExecNode { + #[prost(message, optional, tag="1")] + pub expr: ::core::option::Option, +} +#[derive(Clone, PartialEq, ::prost::Message)] +pub struct SubqueryAliasNode { + #[prost(message, optional, boxed, tag="1")] + pub input: ::core::option::Option<::prost::alloc::boxed::Box>, + #[prost(string, tag="2")] + pub alias: ::prost::alloc::string::String, +} +/// logical expressions +#[derive(Clone, PartialEq, ::prost::Message)] +pub struct LogicalExprNode { + #[prost(oneof="logical_expr_node::ExprType", tags="1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33")] + pub expr_type: ::core::option::Option, +} +/// Nested message and enum types in `LogicalExprNode`. +pub mod logical_expr_node { + #[derive(Clone, PartialEq, ::prost::Oneof)] + pub enum ExprType { + /// column references + #[prost(message, tag="1")] + Column(super::Column), + /// alias + #[prost(message, tag="2")] + Alias(::prost::alloc::boxed::Box), + #[prost(message, tag="3")] + Literal(super::ScalarValue), + /// binary expressions + #[prost(message, tag="4")] + BinaryExpr(::prost::alloc::boxed::Box), + /// aggregate expressions + #[prost(message, tag="5")] + AggregateExpr(::prost::alloc::boxed::Box), + /// null checks + #[prost(message, tag="6")] + IsNullExpr(::prost::alloc::boxed::Box), + #[prost(message, tag="7")] + IsNotNullExpr(::prost::alloc::boxed::Box), + #[prost(message, tag="8")] + NotExpr(::prost::alloc::boxed::Box), + #[prost(message, tag="9")] + Between(::prost::alloc::boxed::Box), + #[prost(message, tag="10")] + Case(::prost::alloc::boxed::Box), + #[prost(message, tag="11")] + Cast(::prost::alloc::boxed::Box), + #[prost(message, tag="12")] + Sort(::prost::alloc::boxed::Box), + #[prost(message, tag="13")] + Negative(::prost::alloc::boxed::Box), + #[prost(message, tag="14")] + InList(::prost::alloc::boxed::Box), + #[prost(bool, tag="15")] + Wildcard(bool), + #[prost(message, tag="16")] + ScalarFunction(super::ScalarFunctionNode), + #[prost(message, tag="17")] + TryCast(::prost::alloc::boxed::Box), + /// window expressions + #[prost(message, tag="18")] + WindowExpr(::prost::alloc::boxed::Box), + /// AggregateUDF expressions + #[prost(message, tag="19")] + AggregateUdfExpr(::prost::alloc::boxed::Box), + /// Scalar UDF expressions + #[prost(message, tag="20")] + ScalarUdfExpr(super::ScalarUdfExprNode), + #[prost(message, tag="21")] + GetIndexedField(::prost::alloc::boxed::Box), + #[prost(message, tag="22")] + GroupingSet(super::GroupingSetNode), + #[prost(message, tag="23")] + Cube(super::CubeNode), + #[prost(message, tag="24")] + Rollup(super::RollupNode), + #[prost(message, tag="25")] + IsTrue(::prost::alloc::boxed::Box), + #[prost(message, tag="26")] + IsFalse(::prost::alloc::boxed::Box), + #[prost(message, tag="27")] + IsUnknown(::prost::alloc::boxed::Box), + #[prost(message, tag="28")] + IsNotTrue(::prost::alloc::boxed::Box), + #[prost(message, tag="29")] + IsNotFalse(::prost::alloc::boxed::Box), + #[prost(message, tag="30")] + IsNotUnknown(::prost::alloc::boxed::Box), + #[prost(message, tag="31")] + Like(::prost::alloc::boxed::Box), + #[prost(message, tag="32")] + Ilike(::prost::alloc::boxed::Box), + #[prost(message, tag="33")] + SimilarTo(::prost::alloc::boxed::Box), + } +} +#[derive(Clone, PartialEq, ::prost::Message)] +pub struct LogicalExprList { + #[prost(message, repeated, tag="1")] + pub expr: ::prost::alloc::vec::Vec, +} +#[derive(Clone, PartialEq, ::prost::Message)] +pub struct GroupingSetNode { + #[prost(message, repeated, tag="1")] + pub expr: ::prost::alloc::vec::Vec, +} +#[derive(Clone, PartialEq, ::prost::Message)] +pub struct CubeNode { + #[prost(message, repeated, tag="1")] + pub expr: ::prost::alloc::vec::Vec, +} +#[derive(Clone, PartialEq, ::prost::Message)] +pub struct RollupNode { + #[prost(message, repeated, tag="1")] + pub expr: ::prost::alloc::vec::Vec, +} +#[derive(Clone, PartialEq, ::prost::Message)] +pub struct GetIndexedField { + #[prost(message, optional, boxed, tag="1")] + pub expr: ::core::option::Option<::prost::alloc::boxed::Box>, + #[prost(message, optional, tag="2")] + pub key: ::core::option::Option, +} +#[derive(Clone, PartialEq, ::prost::Message)] +pub struct IsNull { + #[prost(message, optional, boxed, tag="1")] + pub expr: ::core::option::Option<::prost::alloc::boxed::Box>, +} +#[derive(Clone, PartialEq, ::prost::Message)] +pub struct IsNotNull { + #[prost(message, optional, boxed, tag="1")] + pub expr: ::core::option::Option<::prost::alloc::boxed::Box>, +} +#[derive(Clone, PartialEq, ::prost::Message)] +pub struct IsTrue { + #[prost(message, optional, boxed, tag="1")] + pub expr: ::core::option::Option<::prost::alloc::boxed::Box>, +} +#[derive(Clone, PartialEq, ::prost::Message)] +pub struct IsFalse { + #[prost(message, optional, boxed, tag="1")] + pub expr: ::core::option::Option<::prost::alloc::boxed::Box>, +} +#[derive(Clone, PartialEq, ::prost::Message)] +pub struct IsUnknown { + #[prost(message, optional, boxed, tag="1")] + pub expr: ::core::option::Option<::prost::alloc::boxed::Box>, +} +#[derive(Clone, PartialEq, ::prost::Message)] +pub struct IsNotTrue { + #[prost(message, optional, boxed, tag="1")] + pub expr: ::core::option::Option<::prost::alloc::boxed::Box>, +} +#[derive(Clone, PartialEq, ::prost::Message)] +pub struct IsNotFalse { + #[prost(message, optional, boxed, tag="1")] + pub expr: ::core::option::Option<::prost::alloc::boxed::Box>, +} +#[derive(Clone, PartialEq, ::prost::Message)] +pub struct IsNotUnknown { + #[prost(message, optional, boxed, tag="1")] + pub expr: ::core::option::Option<::prost::alloc::boxed::Box>, +} +#[derive(Clone, PartialEq, ::prost::Message)] +pub struct Not { + #[prost(message, optional, boxed, tag="1")] + pub expr: ::core::option::Option<::prost::alloc::boxed::Box>, +} +#[derive(Clone, PartialEq, ::prost::Message)] +pub struct AliasNode { + #[prost(message, optional, boxed, tag="1")] + pub expr: ::core::option::Option<::prost::alloc::boxed::Box>, + #[prost(string, tag="2")] + pub alias: ::prost::alloc::string::String, +} +#[derive(Clone, PartialEq, ::prost::Message)] +pub struct BinaryExprNode { + #[prost(message, optional, boxed, tag="1")] + pub l: ::core::option::Option<::prost::alloc::boxed::Box>, + #[prost(message, optional, boxed, tag="2")] + pub r: ::core::option::Option<::prost::alloc::boxed::Box>, + #[prost(string, tag="3")] + pub op: ::prost::alloc::string::String, +} +#[derive(Clone, PartialEq, ::prost::Message)] +pub struct NegativeNode { + #[prost(message, optional, boxed, tag="1")] + pub expr: ::core::option::Option<::prost::alloc::boxed::Box>, +} +#[derive(Clone, PartialEq, ::prost::Message)] +pub struct InListNode { + #[prost(message, optional, boxed, tag="1")] + pub expr: ::core::option::Option<::prost::alloc::boxed::Box>, + #[prost(message, repeated, tag="2")] + pub list: ::prost::alloc::vec::Vec, + #[prost(bool, tag="3")] + pub negated: bool, +} +#[derive(Clone, PartialEq, ::prost::Message)] +pub struct ScalarFunctionNode { + #[prost(enumeration="ScalarFunction", tag="1")] + pub fun: i32, + #[prost(message, repeated, tag="2")] + pub args: ::prost::alloc::vec::Vec, +} +#[derive(Clone, PartialEq, ::prost::Message)] +pub struct AggregateExprNode { + #[prost(enumeration="AggregateFunction", tag="1")] + pub aggr_function: i32, + #[prost(message, repeated, tag="2")] + pub expr: ::prost::alloc::vec::Vec, + #[prost(bool, tag="3")] + pub distinct: bool, + #[prost(message, optional, boxed, tag="4")] + pub filter: ::core::option::Option<::prost::alloc::boxed::Box>, +} +#[derive(Clone, PartialEq, ::prost::Message)] +pub struct AggregateUdfExprNode { + #[prost(string, tag="1")] + pub fun_name: ::prost::alloc::string::String, + #[prost(message, repeated, tag="2")] + pub args: ::prost::alloc::vec::Vec, + #[prost(message, optional, boxed, tag="3")] + pub filter: ::core::option::Option<::prost::alloc::boxed::Box>, +} +#[derive(Clone, PartialEq, ::prost::Message)] +pub struct ScalarUdfExprNode { + #[prost(string, tag="1")] + pub fun_name: ::prost::alloc::string::String, + #[prost(message, repeated, tag="2")] + pub args: ::prost::alloc::vec::Vec, +} +#[derive(Clone, PartialEq, ::prost::Message)] +pub struct WindowExprNode { + #[prost(message, optional, boxed, tag="4")] + pub expr: ::core::option::Option<::prost::alloc::boxed::Box>, + #[prost(message, repeated, tag="5")] + pub partition_by: ::prost::alloc::vec::Vec, + #[prost(message, repeated, tag="6")] + pub order_by: ::prost::alloc::vec::Vec, + #[prost(oneof="window_expr_node::WindowFunction", tags="1, 2")] + pub window_function: ::core::option::Option, + /// repeated LogicalExprNode filter = 7; + #[prost(oneof="window_expr_node::WindowFrame", tags="8")] + pub window_frame: ::core::option::Option, +} +/// Nested message and enum types in `WindowExprNode`. +pub mod window_expr_node { + #[derive(Clone, PartialEq, ::prost::Oneof)] + pub enum WindowFunction { + #[prost(enumeration="super::AggregateFunction", tag="1")] + AggrFunction(i32), + /// udaf = 3 + #[prost(enumeration="super::BuiltInWindowFunction", tag="2")] + BuiltInFunction(i32), + } + /// repeated LogicalExprNode filter = 7; + #[derive(Clone, PartialEq, ::prost::Oneof)] + pub enum WindowFrame { + #[prost(message, tag="8")] + Frame(super::WindowFrame), + } +} +#[derive(Clone, PartialEq, ::prost::Message)] +pub struct BetweenNode { + #[prost(message, optional, boxed, tag="1")] + pub expr: ::core::option::Option<::prost::alloc::boxed::Box>, + #[prost(bool, tag="2")] + pub negated: bool, + #[prost(message, optional, boxed, tag="3")] + pub low: ::core::option::Option<::prost::alloc::boxed::Box>, + #[prost(message, optional, boxed, tag="4")] + pub high: ::core::option::Option<::prost::alloc::boxed::Box>, +} +#[derive(Clone, PartialEq, ::prost::Message)] +pub struct LikeNode { + #[prost(bool, tag="1")] + pub negated: bool, + #[prost(message, optional, boxed, tag="2")] + pub expr: ::core::option::Option<::prost::alloc::boxed::Box>, + #[prost(message, optional, boxed, tag="3")] + pub pattern: ::core::option::Option<::prost::alloc::boxed::Box>, + #[prost(string, tag="4")] + pub escape_char: ::prost::alloc::string::String, +} +#[derive(Clone, PartialEq, ::prost::Message)] +pub struct ILikeNode { + #[prost(bool, tag="1")] + pub negated: bool, + #[prost(message, optional, boxed, tag="2")] + pub expr: ::core::option::Option<::prost::alloc::boxed::Box>, + #[prost(message, optional, boxed, tag="3")] + pub pattern: ::core::option::Option<::prost::alloc::boxed::Box>, + #[prost(string, tag="4")] + pub escape_char: ::prost::alloc::string::String, +} +#[derive(Clone, PartialEq, ::prost::Message)] +pub struct SimilarToNode { + #[prost(bool, tag="1")] + pub negated: bool, + #[prost(message, optional, boxed, tag="2")] + pub expr: ::core::option::Option<::prost::alloc::boxed::Box>, + #[prost(message, optional, boxed, tag="3")] + pub pattern: ::core::option::Option<::prost::alloc::boxed::Box>, + #[prost(string, tag="4")] + pub escape_char: ::prost::alloc::string::String, +} +#[derive(Clone, PartialEq, ::prost::Message)] +pub struct CaseNode { + #[prost(message, optional, boxed, tag="1")] + pub expr: ::core::option::Option<::prost::alloc::boxed::Box>, + #[prost(message, repeated, tag="2")] + pub when_then_expr: ::prost::alloc::vec::Vec, + #[prost(message, optional, boxed, tag="3")] + pub else_expr: ::core::option::Option<::prost::alloc::boxed::Box>, +} +#[derive(Clone, PartialEq, ::prost::Message)] +pub struct WhenThen { + #[prost(message, optional, tag="1")] + pub when_expr: ::core::option::Option, + #[prost(message, optional, tag="2")] + pub then_expr: ::core::option::Option, +} +#[derive(Clone, PartialEq, ::prost::Message)] +pub struct CastNode { + #[prost(message, optional, boxed, tag="1")] + pub expr: ::core::option::Option<::prost::alloc::boxed::Box>, + #[prost(message, optional, tag="2")] + pub arrow_type: ::core::option::Option, +} +#[derive(Clone, PartialEq, ::prost::Message)] +pub struct TryCastNode { + #[prost(message, optional, boxed, tag="1")] + pub expr: ::core::option::Option<::prost::alloc::boxed::Box>, + #[prost(message, optional, tag="2")] + pub arrow_type: ::core::option::Option, +} +#[derive(Clone, PartialEq, ::prost::Message)] +pub struct SortExprNode { + #[prost(message, optional, boxed, tag="1")] + pub expr: ::core::option::Option<::prost::alloc::boxed::Box>, + #[prost(bool, tag="2")] + pub asc: bool, + #[prost(bool, tag="3")] + pub nulls_first: bool, +} +#[derive(Clone, PartialEq, ::prost::Message)] +pub struct WindowFrame { + #[prost(enumeration="WindowFrameUnits", tag="1")] + pub window_frame_units: i32, + #[prost(message, optional, tag="2")] + pub start_bound: ::core::option::Option, + /// "optional" keyword is stable in protoc 3.15 but prost is still on 3.14 (see and ) + /// this syntax is ugly but is binary compatible with the "optional" keyword (see ) + #[prost(oneof="window_frame::EndBound", tags="3")] + pub end_bound: ::core::option::Option, +} +/// Nested message and enum types in `WindowFrame`. +pub mod window_frame { + /// "optional" keyword is stable in protoc 3.15 but prost is still on 3.14 (see and ) + /// this syntax is ugly but is binary compatible with the "optional" keyword (see ) + #[derive(Clone, PartialEq, ::prost::Oneof)] + pub enum EndBound { + #[prost(message, tag="3")] + Bound(super::WindowFrameBound), + } +} +#[derive(Clone, PartialEq, ::prost::Message)] +pub struct WindowFrameBound { + #[prost(enumeration="WindowFrameBoundType", tag="1")] + pub window_frame_bound_type: i32, + /// "optional" keyword is stable in protoc 3.15 but prost is still on 3.14 (see and ) + /// this syntax is ugly but is binary compatible with the "optional" keyword (see ) + #[prost(oneof="window_frame_bound::BoundValue", tags="2")] + pub bound_value: ::core::option::Option, +} +/// Nested message and enum types in `WindowFrameBound`. +pub mod window_frame_bound { + /// "optional" keyword is stable in protoc 3.15 but prost is still on 3.14 (see and ) + /// this syntax is ugly but is binary compatible with the "optional" keyword (see ) + #[derive(Clone, PartialEq, ::prost::Oneof)] + pub enum BoundValue { + #[prost(uint64, tag="2")] + Value(u64), + } +} +// ///////////////////////////////////////////////////////////////////////////////////////////////// +// Arrow Data Types +// ///////////////////////////////////////////////////////////////////////////////////////////////// + +#[derive(Clone, PartialEq, ::prost::Message)] +pub struct Schema { + #[prost(message, repeated, tag="1")] + pub columns: ::prost::alloc::vec::Vec, +} +#[derive(Clone, PartialEq, ::prost::Message)] +pub struct Field { + /// name of the field + #[prost(string, tag="1")] + pub name: ::prost::alloc::string::String, + #[prost(message, optional, boxed, tag="2")] + pub arrow_type: ::core::option::Option<::prost::alloc::boxed::Box>, + #[prost(bool, tag="3")] + pub nullable: bool, + /// for complex data types like structs, unions + #[prost(message, repeated, tag="4")] + pub children: ::prost::alloc::vec::Vec, +} +#[derive(Clone, PartialEq, ::prost::Message)] +pub struct FixedSizeBinary { + #[prost(int32, tag="1")] + pub length: i32, +} +#[derive(Clone, PartialEq, ::prost::Message)] +pub struct Timestamp { + #[prost(enumeration="TimeUnit", tag="1")] + pub time_unit: i32, + #[prost(string, tag="2")] + pub timezone: ::prost::alloc::string::String, +} +#[derive(Clone, PartialEq, ::prost::Message)] +pub struct Decimal { + #[prost(uint64, tag="1")] + pub whole: u64, + #[prost(uint64, tag="2")] + pub fractional: u64, +} +#[derive(Clone, PartialEq, ::prost::Message)] +pub struct List { + #[prost(message, optional, boxed, tag="1")] + pub field_type: ::core::option::Option<::prost::alloc::boxed::Box>, +} +#[derive(Clone, PartialEq, ::prost::Message)] +pub struct FixedSizeList { + #[prost(message, optional, boxed, tag="1")] + pub field_type: ::core::option::Option<::prost::alloc::boxed::Box>, + #[prost(int32, tag="2")] + pub list_size: i32, +} +#[derive(Clone, PartialEq, ::prost::Message)] +pub struct Dictionary { + #[prost(message, optional, boxed, tag="1")] + pub key: ::core::option::Option<::prost::alloc::boxed::Box>, + #[prost(message, optional, boxed, tag="2")] + pub value: ::core::option::Option<::prost::alloc::boxed::Box>, +} +#[derive(Clone, PartialEq, ::prost::Message)] +pub struct Struct { + #[prost(message, repeated, tag="1")] + pub sub_field_types: ::prost::alloc::vec::Vec, +} +#[derive(Clone, PartialEq, ::prost::Message)] +pub struct Union { + #[prost(message, repeated, tag="1")] + pub union_types: ::prost::alloc::vec::Vec, + #[prost(enumeration="UnionMode", tag="2")] + pub union_mode: i32, + #[prost(int32, repeated, tag="3")] + pub type_ids: ::prost::alloc::vec::Vec, +} +#[derive(Clone, PartialEq, ::prost::Message)] +pub struct ScalarListValue { + #[prost(message, optional, tag="1")] + pub field: ::core::option::Option, + #[prost(message, repeated, tag="2")] + pub values: ::prost::alloc::vec::Vec, +} +#[derive(Clone, PartialEq, ::prost::Message)] +pub struct ScalarTimestampValue { + #[prost(string, tag="5")] + pub timezone: ::prost::alloc::string::String, + #[prost(oneof="scalar_timestamp_value::Value", tags="1, 2, 3, 4")] + pub value: ::core::option::Option, +} +/// Nested message and enum types in `ScalarTimestampValue`. +pub mod scalar_timestamp_value { + #[derive(Clone, PartialEq, ::prost::Oneof)] + pub enum Value { + #[prost(int64, tag="1")] + TimeMicrosecondValue(i64), + #[prost(int64, tag="2")] + TimeNanosecondValue(i64), + #[prost(int64, tag="3")] + TimeSecondValue(i64), + #[prost(int64, tag="4")] + TimeMillisecondValue(i64), + } +} +#[derive(Clone, PartialEq, ::prost::Message)] +pub struct ScalarTime32Value { + #[prost(oneof="scalar_time32_value::Value", tags="1, 2")] + pub value: ::core::option::Option, +} +/// Nested message and enum types in `ScalarTime32Value`. +pub mod scalar_time32_value { + #[derive(Clone, PartialEq, ::prost::Oneof)] + pub enum Value { + #[prost(int32, tag="1")] + Time32SecondValue(i32), + #[prost(int32, tag="2")] + Time32MillisecondValue(i32), + } +} +#[derive(Clone, PartialEq, ::prost::Message)] +pub struct ScalarTime64Value { + #[prost(oneof="scalar_time64_value::Value", tags="1, 2")] + pub value: ::core::option::Option, +} +/// Nested message and enum types in `ScalarTime64Value`. +pub mod scalar_time64_value { + #[derive(Clone, PartialEq, ::prost::Oneof)] + pub enum Value { + #[prost(int64, tag="1")] + Time64MicrosecondValue(i64), + #[prost(int64, tag="2")] + Time64NanosecondValue(i64), + } +} +#[derive(Clone, PartialEq, ::prost::Message)] +pub struct ScalarDictionaryValue { + #[prost(message, optional, tag="1")] + pub index_type: ::core::option::Option, + #[prost(message, optional, boxed, tag="2")] + pub value: ::core::option::Option<::prost::alloc::boxed::Box>, +} +#[derive(Clone, PartialEq, ::prost::Message)] +pub struct IntervalMonthDayNanoValue { + #[prost(int32, tag="1")] + pub months: i32, + #[prost(int32, tag="2")] + pub days: i32, + #[prost(int64, tag="3")] + pub nanos: i64, +} +#[derive(Clone, PartialEq, ::prost::Message)] +pub struct StructValue { + /// Note that a null struct value must have one or more fields, so we + /// encode a null StructValue as one witth an empty field_values + /// list. + #[prost(message, repeated, tag="2")] + pub field_values: ::prost::alloc::vec::Vec, + #[prost(message, repeated, tag="3")] + pub fields: ::prost::alloc::vec::Vec, +} +#[derive(Clone, PartialEq, ::prost::Message)] +pub struct ScalarValue { + #[prost(oneof="scalar_value::Value", tags="19, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 17, 18, 20, 21, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33")] + pub value: ::core::option::Option, +} +/// Nested message and enum types in `ScalarValue`. +pub mod scalar_value { + #[derive(Clone, PartialEq, ::prost::Oneof)] + pub enum Value { + /// Null value of any type (type is encoded) + #[prost(enumeration="super::PrimitiveScalarType", tag="19")] + NullValue(i32), + #[prost(bool, tag="1")] + BoolValue(bool), + #[prost(string, tag="2")] + Utf8Value(::prost::alloc::string::String), + #[prost(string, tag="3")] + LargeUtf8Value(::prost::alloc::string::String), + #[prost(int32, tag="4")] + Int8Value(i32), + #[prost(int32, tag="5")] + Int16Value(i32), + #[prost(int32, tag="6")] + Int32Value(i32), + #[prost(int64, tag="7")] + Int64Value(i64), + #[prost(uint32, tag="8")] + Uint8Value(u32), + #[prost(uint32, tag="9")] + Uint16Value(u32), + #[prost(uint32, tag="10")] + Uint32Value(u32), + #[prost(uint64, tag="11")] + Uint64Value(u64), + #[prost(float, tag="12")] + Float32Value(f32), + #[prost(double, tag="13")] + Float64Value(f64), + /// Literal Date32 value always has a unit of day + #[prost(int32, tag="14")] + Date32Value(i32), + #[prost(message, tag="17")] + ListValue(super::ScalarListValue), + #[prost(message, tag="18")] + NullListValue(super::ScalarType), + #[prost(message, tag="20")] + Decimal128Value(super::Decimal128), + #[prost(int64, tag="21")] + Date64Value(i64), + #[prost(int32, tag="24")] + IntervalYearmonthValue(i32), + #[prost(int64, tag="25")] + IntervalDaytimeValue(i64), + #[prost(message, tag="26")] + TimestampValue(super::ScalarTimestampValue), + #[prost(message, tag="27")] + DictionaryValue(::prost::alloc::boxed::Box), + #[prost(bytes, tag="28")] + BinaryValue(::prost::alloc::vec::Vec), + #[prost(bytes, tag="29")] + LargeBinaryValue(::prost::alloc::vec::Vec), + #[prost(message, tag="30")] + Time32Value(super::ScalarTime32Value), + #[prost(message, tag="31")] + Time64Value(super::ScalarTime64Value), + #[prost(message, tag="32")] + IntervalMonthDayNano(super::IntervalMonthDayNanoValue), + #[prost(message, tag="33")] + StructValue(super::StructValue), + } +} +#[derive(Clone, PartialEq, ::prost::Message)] +pub struct Decimal128 { + #[prost(bytes="vec", tag="1")] + pub value: ::prost::alloc::vec::Vec, + #[prost(int64, tag="2")] + pub p: i64, + #[prost(int64, tag="3")] + pub s: i64, +} +#[derive(Clone, PartialEq, ::prost::Message)] +pub struct ScalarType { + #[prost(oneof="scalar_type::Datatype", tags="1, 2")] + pub datatype: ::core::option::Option, +} +/// Nested message and enum types in `ScalarType`. +pub mod scalar_type { + #[derive(Clone, PartialEq, ::prost::Oneof)] + pub enum Datatype { + #[prost(enumeration="super::PrimitiveScalarType", tag="1")] + Scalar(i32), + #[prost(message, tag="2")] + List(super::ScalarListType), + } +} +#[derive(Clone, PartialEq, ::prost::Message)] +pub struct ScalarListType { + #[prost(string, repeated, tag="3")] + pub field_names: ::prost::alloc::vec::Vec<::prost::alloc::string::String>, + #[prost(enumeration="PrimitiveScalarType", tag="2")] + pub deepest_type: i32, +} +/// Broke out into multiple message types so that type +/// metadata did not need to be in separate message +/// All types that are of the empty message types contain no additional metadata +/// about the type +#[derive(Clone, PartialEq, ::prost::Message)] +pub struct ArrowType { + #[prost(oneof="arrow_type::ArrowTypeEnum", tags="1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 32, 15, 16, 31, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30")] + pub arrow_type_enum: ::core::option::Option, +} +/// Nested message and enum types in `ArrowType`. +pub mod arrow_type { + #[derive(Clone, PartialEq, ::prost::Oneof)] + pub enum ArrowTypeEnum { + /// arrow::Type::NA + #[prost(message, tag="1")] + None(super::EmptyMessage), + /// arrow::Type::BOOL + #[prost(message, tag="2")] + Bool(super::EmptyMessage), + /// arrow::Type::UINT8 + #[prost(message, tag="3")] + Uint8(super::EmptyMessage), + /// arrow::Type::INT8 + #[prost(message, tag="4")] + Int8(super::EmptyMessage), + /// represents arrow::Type fields in src/arrow/type.h + #[prost(message, tag="5")] + Uint16(super::EmptyMessage), + #[prost(message, tag="6")] + Int16(super::EmptyMessage), + #[prost(message, tag="7")] + Uint32(super::EmptyMessage), + #[prost(message, tag="8")] + Int32(super::EmptyMessage), + #[prost(message, tag="9")] + Uint64(super::EmptyMessage), + #[prost(message, tag="10")] + Int64(super::EmptyMessage), + #[prost(message, tag="11")] + Float16(super::EmptyMessage), + #[prost(message, tag="12")] + Float32(super::EmptyMessage), + #[prost(message, tag="13")] + Float64(super::EmptyMessage), + #[prost(message, tag="14")] + Utf8(super::EmptyMessage), + #[prost(message, tag="32")] + LargeUtf8(super::EmptyMessage), + #[prost(message, tag="15")] + Binary(super::EmptyMessage), + #[prost(int32, tag="16")] + FixedSizeBinary(i32), + #[prost(message, tag="31")] + LargeBinary(super::EmptyMessage), + #[prost(message, tag="17")] + Date32(super::EmptyMessage), + #[prost(message, tag="18")] + Date64(super::EmptyMessage), + #[prost(enumeration="super::TimeUnit", tag="19")] + Duration(i32), + #[prost(message, tag="20")] + Timestamp(super::Timestamp), + #[prost(enumeration="super::TimeUnit", tag="21")] + Time32(i32), + #[prost(enumeration="super::TimeUnit", tag="22")] + Time64(i32), + #[prost(enumeration="super::IntervalUnit", tag="23")] + Interval(i32), + #[prost(message, tag="24")] + Decimal(super::Decimal), + #[prost(message, tag="25")] + List(::prost::alloc::boxed::Box), + #[prost(message, tag="26")] + LargeList(::prost::alloc::boxed::Box), + #[prost(message, tag="27")] + FixedSizeList(::prost::alloc::boxed::Box), + #[prost(message, tag="28")] + Struct(super::Struct), + #[prost(message, tag="29")] + Union(super::Union), + #[prost(message, tag="30")] + Dictionary(::prost::alloc::boxed::Box), + } +} +/// Useful for representing an empty enum variant in rust +/// E.G. enum example{One, Two(i32)} +/// maps to +/// message example{ +/// oneof{ +/// EmptyMessage One = 1; +/// i32 Two = 2; +/// } +/// } +#[derive(Clone, PartialEq, ::prost::Message)] +pub struct EmptyMessage { +} +#[derive(Clone, PartialEq, ::prost::Message)] +pub struct OptimizedLogicalPlanType { + #[prost(string, tag="1")] + pub optimizer_name: ::prost::alloc::string::String, +} +#[derive(Clone, PartialEq, ::prost::Message)] +pub struct OptimizedPhysicalPlanType { + #[prost(string, tag="1")] + pub optimizer_name: ::prost::alloc::string::String, +} +#[derive(Clone, PartialEq, ::prost::Message)] +pub struct PlanType { + #[prost(oneof="plan_type::PlanTypeEnum", tags="1, 2, 3, 4, 5, 6")] + pub plan_type_enum: ::core::option::Option, +} +/// Nested message and enum types in `PlanType`. +pub mod plan_type { + #[derive(Clone, PartialEq, ::prost::Oneof)] + pub enum PlanTypeEnum { + #[prost(message, tag="1")] + InitialLogicalPlan(super::EmptyMessage), + #[prost(message, tag="2")] + OptimizedLogicalPlan(super::OptimizedLogicalPlanType), + #[prost(message, tag="3")] + FinalLogicalPlan(super::EmptyMessage), + #[prost(message, tag="4")] + InitialPhysicalPlan(super::EmptyMessage), + #[prost(message, tag="5")] + OptimizedPhysicalPlan(super::OptimizedPhysicalPlanType), + #[prost(message, tag="6")] + FinalPhysicalPlan(super::EmptyMessage), + } +} +#[derive(Clone, PartialEq, ::prost::Message)] +pub struct StringifiedPlan { + #[prost(message, optional, tag="1")] + pub plan_type: ::core::option::Option, + #[prost(string, tag="2")] + pub plan: ::prost::alloc::string::String, +} +#[derive(Clone, Copy, Debug, PartialEq, Eq, Hash, PartialOrd, Ord, ::prost::Enumeration)] +#[repr(i32)] +pub enum JoinType { + Inner = 0, + Left = 1, + Right = 2, + Full = 3, + Semi = 4, + Anti = 5, +} +impl JoinType { + /// String value of the enum field names used in the ProtoBuf definition. + /// + /// The values are not transformed in any way and thus are considered stable + /// (if the ProtoBuf definition does not change) and safe for programmatic use. + pub fn as_str_name(&self) -> &'static str { + match self { + JoinType::Inner => "INNER", + JoinType::Left => "LEFT", + JoinType::Right => "RIGHT", + JoinType::Full => "FULL", + JoinType::Semi => "SEMI", + JoinType::Anti => "ANTI", + } + } +} +#[derive(Clone, Copy, Debug, PartialEq, Eq, Hash, PartialOrd, Ord, ::prost::Enumeration)] +#[repr(i32)] +pub enum JoinConstraint { + On = 0, + Using = 1, +} +impl JoinConstraint { + /// String value of the enum field names used in the ProtoBuf definition. + /// + /// The values are not transformed in any way and thus are considered stable + /// (if the ProtoBuf definition does not change) and safe for programmatic use. + pub fn as_str_name(&self) -> &'static str { + match self { + JoinConstraint::On => "ON", + JoinConstraint::Using => "USING", + } + } +} +#[derive(Clone, Copy, Debug, PartialEq, Eq, Hash, PartialOrd, Ord, ::prost::Enumeration)] +#[repr(i32)] +pub enum ScalarFunction { + Abs = 0, + Acos = 1, + Asin = 2, + Atan = 3, + Ascii = 4, + Ceil = 5, + Cos = 6, + Digest = 7, + Exp = 8, + Floor = 9, + Ln = 10, + Log = 11, + Log10 = 12, + Log2 = 13, + Round = 14, + Signum = 15, + Sin = 16, + Sqrt = 17, + Tan = 18, + Trunc = 19, + Array = 20, + RegexpMatch = 21, + BitLength = 22, + Btrim = 23, + CharacterLength = 24, + Chr = 25, + Concat = 26, + ConcatWithSeparator = 27, + DatePart = 28, + DateTrunc = 29, + InitCap = 30, + Left = 31, + Lpad = 32, + Lower = 33, + Ltrim = 34, + Md5 = 35, + NullIf = 36, + OctetLength = 37, + Random = 38, + RegexpReplace = 39, + Repeat = 40, + Replace = 41, + Reverse = 42, + Right = 43, + Rpad = 44, + Rtrim = 45, + Sha224 = 46, + Sha256 = 47, + Sha384 = 48, + Sha512 = 49, + SplitPart = 50, + StartsWith = 51, + Strpos = 52, + Substr = 53, + ToHex = 54, + ToTimestamp = 55, + ToTimestampMillis = 56, + ToTimestampMicros = 57, + ToTimestampSeconds = 58, + Now = 59, + Translate = 60, + Trim = 61, + Upper = 62, + Coalesce = 63, + Power = 64, + StructFun = 65, + FromUnixtime = 66, + Atan2 = 67, + DateBin = 68, + ArrowTypeof = 69, +} +impl ScalarFunction { + /// String value of the enum field names used in the ProtoBuf definition. + /// + /// The values are not transformed in any way and thus are considered stable + /// (if the ProtoBuf definition does not change) and safe for programmatic use. + pub fn as_str_name(&self) -> &'static str { + match self { + ScalarFunction::Abs => "Abs", + ScalarFunction::Acos => "Acos", + ScalarFunction::Asin => "Asin", + ScalarFunction::Atan => "Atan", + ScalarFunction::Ascii => "Ascii", + ScalarFunction::Ceil => "Ceil", + ScalarFunction::Cos => "Cos", + ScalarFunction::Digest => "Digest", + ScalarFunction::Exp => "Exp", + ScalarFunction::Floor => "Floor", + ScalarFunction::Ln => "Ln", + ScalarFunction::Log => "Log", + ScalarFunction::Log10 => "Log10", + ScalarFunction::Log2 => "Log2", + ScalarFunction::Round => "Round", + ScalarFunction::Signum => "Signum", + ScalarFunction::Sin => "Sin", + ScalarFunction::Sqrt => "Sqrt", + ScalarFunction::Tan => "Tan", + ScalarFunction::Trunc => "Trunc", + ScalarFunction::Array => "Array", + ScalarFunction::RegexpMatch => "RegexpMatch", + ScalarFunction::BitLength => "BitLength", + ScalarFunction::Btrim => "Btrim", + ScalarFunction::CharacterLength => "CharacterLength", + ScalarFunction::Chr => "Chr", + ScalarFunction::Concat => "Concat", + ScalarFunction::ConcatWithSeparator => "ConcatWithSeparator", + ScalarFunction::DatePart => "DatePart", + ScalarFunction::DateTrunc => "DateTrunc", + ScalarFunction::InitCap => "InitCap", + ScalarFunction::Left => "Left", + ScalarFunction::Lpad => "Lpad", + ScalarFunction::Lower => "Lower", + ScalarFunction::Ltrim => "Ltrim", + ScalarFunction::Md5 => "MD5", + ScalarFunction::NullIf => "NullIf", + ScalarFunction::OctetLength => "OctetLength", + ScalarFunction::Random => "Random", + ScalarFunction::RegexpReplace => "RegexpReplace", + ScalarFunction::Repeat => "Repeat", + ScalarFunction::Replace => "Replace", + ScalarFunction::Reverse => "Reverse", + ScalarFunction::Right => "Right", + ScalarFunction::Rpad => "Rpad", + ScalarFunction::Rtrim => "Rtrim", + ScalarFunction::Sha224 => "SHA224", + ScalarFunction::Sha256 => "SHA256", + ScalarFunction::Sha384 => "SHA384", + ScalarFunction::Sha512 => "SHA512", + ScalarFunction::SplitPart => "SplitPart", + ScalarFunction::StartsWith => "StartsWith", + ScalarFunction::Strpos => "Strpos", + ScalarFunction::Substr => "Substr", + ScalarFunction::ToHex => "ToHex", + ScalarFunction::ToTimestamp => "ToTimestamp", + ScalarFunction::ToTimestampMillis => "ToTimestampMillis", + ScalarFunction::ToTimestampMicros => "ToTimestampMicros", + ScalarFunction::ToTimestampSeconds => "ToTimestampSeconds", + ScalarFunction::Now => "Now", + ScalarFunction::Translate => "Translate", + ScalarFunction::Trim => "Trim", + ScalarFunction::Upper => "Upper", + ScalarFunction::Coalesce => "Coalesce", + ScalarFunction::Power => "Power", + ScalarFunction::StructFun => "StructFun", + ScalarFunction::FromUnixtime => "FromUnixtime", + ScalarFunction::Atan2 => "Atan2", + ScalarFunction::DateBin => "DateBin", + ScalarFunction::ArrowTypeof => "ArrowTypeof", + } + } +} +#[derive(Clone, Copy, Debug, PartialEq, Eq, Hash, PartialOrd, Ord, ::prost::Enumeration)] +#[repr(i32)] +pub enum AggregateFunction { + Min = 0, + Max = 1, + Sum = 2, + Avg = 3, + Count = 4, + ApproxDistinct = 5, + ArrayAgg = 6, + Variance = 7, + VariancePop = 8, + Covariance = 9, + CovariancePop = 10, + Stddev = 11, + StddevPop = 12, + Correlation = 13, + ApproxPercentileCont = 14, + ApproxMedian = 15, + ApproxPercentileContWithWeight = 16, + Grouping = 17, + Median = 18, +} +impl AggregateFunction { + /// String value of the enum field names used in the ProtoBuf definition. + /// + /// The values are not transformed in any way and thus are considered stable + /// (if the ProtoBuf definition does not change) and safe for programmatic use. + pub fn as_str_name(&self) -> &'static str { + match self { + AggregateFunction::Min => "MIN", + AggregateFunction::Max => "MAX", + AggregateFunction::Sum => "SUM", + AggregateFunction::Avg => "AVG", + AggregateFunction::Count => "COUNT", + AggregateFunction::ApproxDistinct => "APPROX_DISTINCT", + AggregateFunction::ArrayAgg => "ARRAY_AGG", + AggregateFunction::Variance => "VARIANCE", + AggregateFunction::VariancePop => "VARIANCE_POP", + AggregateFunction::Covariance => "COVARIANCE", + AggregateFunction::CovariancePop => "COVARIANCE_POP", + AggregateFunction::Stddev => "STDDEV", + AggregateFunction::StddevPop => "STDDEV_POP", + AggregateFunction::Correlation => "CORRELATION", + AggregateFunction::ApproxPercentileCont => "APPROX_PERCENTILE_CONT", + AggregateFunction::ApproxMedian => "APPROX_MEDIAN", + AggregateFunction::ApproxPercentileContWithWeight => "APPROX_PERCENTILE_CONT_WITH_WEIGHT", + AggregateFunction::Grouping => "GROUPING", + AggregateFunction::Median => "MEDIAN", + } + } +} +#[derive(Clone, Copy, Debug, PartialEq, Eq, Hash, PartialOrd, Ord, ::prost::Enumeration)] +#[repr(i32)] +pub enum BuiltInWindowFunction { + RowNumber = 0, + Rank = 1, + DenseRank = 2, + PercentRank = 3, + CumeDist = 4, + Ntile = 5, + Lag = 6, + Lead = 7, + FirstValue = 8, + LastValue = 9, + NthValue = 10, +} +impl BuiltInWindowFunction { + /// String value of the enum field names used in the ProtoBuf definition. + /// + /// The values are not transformed in any way and thus are considered stable + /// (if the ProtoBuf definition does not change) and safe for programmatic use. + pub fn as_str_name(&self) -> &'static str { + match self { + BuiltInWindowFunction::RowNumber => "ROW_NUMBER", + BuiltInWindowFunction::Rank => "RANK", + BuiltInWindowFunction::DenseRank => "DENSE_RANK", + BuiltInWindowFunction::PercentRank => "PERCENT_RANK", + BuiltInWindowFunction::CumeDist => "CUME_DIST", + BuiltInWindowFunction::Ntile => "NTILE", + BuiltInWindowFunction::Lag => "LAG", + BuiltInWindowFunction::Lead => "LEAD", + BuiltInWindowFunction::FirstValue => "FIRST_VALUE", + BuiltInWindowFunction::LastValue => "LAST_VALUE", + BuiltInWindowFunction::NthValue => "NTH_VALUE", + } + } +} +#[derive(Clone, Copy, Debug, PartialEq, Eq, Hash, PartialOrd, Ord, ::prost::Enumeration)] +#[repr(i32)] +pub enum WindowFrameUnits { + Rows = 0, + Range = 1, + Groups = 2, +} +impl WindowFrameUnits { + /// String value of the enum field names used in the ProtoBuf definition. + /// + /// The values are not transformed in any way and thus are considered stable + /// (if the ProtoBuf definition does not change) and safe for programmatic use. + pub fn as_str_name(&self) -> &'static str { + match self { + WindowFrameUnits::Rows => "ROWS", + WindowFrameUnits::Range => "RANGE", + WindowFrameUnits::Groups => "GROUPS", + } + } +} +#[derive(Clone, Copy, Debug, PartialEq, Eq, Hash, PartialOrd, Ord, ::prost::Enumeration)] +#[repr(i32)] +pub enum WindowFrameBoundType { + CurrentRow = 0, + Preceding = 1, + Following = 2, +} +impl WindowFrameBoundType { + /// String value of the enum field names used in the ProtoBuf definition. + /// + /// The values are not transformed in any way and thus are considered stable + /// (if the ProtoBuf definition does not change) and safe for programmatic use. + pub fn as_str_name(&self) -> &'static str { + match self { + WindowFrameBoundType::CurrentRow => "CURRENT_ROW", + WindowFrameBoundType::Preceding => "PRECEDING", + WindowFrameBoundType::Following => "FOLLOWING", + } + } +} +#[derive(Clone, Copy, Debug, PartialEq, Eq, Hash, PartialOrd, Ord, ::prost::Enumeration)] +#[repr(i32)] +pub enum DateUnit { + Day = 0, + DateMillisecond = 1, +} +impl DateUnit { + /// String value of the enum field names used in the ProtoBuf definition. + /// + /// The values are not transformed in any way and thus are considered stable + /// (if the ProtoBuf definition does not change) and safe for programmatic use. + pub fn as_str_name(&self) -> &'static str { + match self { + DateUnit::Day => "Day", + DateUnit::DateMillisecond => "DateMillisecond", + } + } +} +#[derive(Clone, Copy, Debug, PartialEq, Eq, Hash, PartialOrd, Ord, ::prost::Enumeration)] +#[repr(i32)] +pub enum TimeUnit { + Second = 0, + Millisecond = 1, + Microsecond = 2, + Nanosecond = 3, +} +impl TimeUnit { + /// String value of the enum field names used in the ProtoBuf definition. + /// + /// The values are not transformed in any way and thus are considered stable + /// (if the ProtoBuf definition does not change) and safe for programmatic use. + pub fn as_str_name(&self) -> &'static str { + match self { + TimeUnit::Second => "Second", + TimeUnit::Millisecond => "Millisecond", + TimeUnit::Microsecond => "Microsecond", + TimeUnit::Nanosecond => "Nanosecond", + } + } +} +#[derive(Clone, Copy, Debug, PartialEq, Eq, Hash, PartialOrd, Ord, ::prost::Enumeration)] +#[repr(i32)] +pub enum IntervalUnit { + YearMonth = 0, + DayTime = 1, + MonthDayNano = 2, +} +impl IntervalUnit { + /// String value of the enum field names used in the ProtoBuf definition. + /// + /// The values are not transformed in any way and thus are considered stable + /// (if the ProtoBuf definition does not change) and safe for programmatic use. + pub fn as_str_name(&self) -> &'static str { + match self { + IntervalUnit::YearMonth => "YearMonth", + IntervalUnit::DayTime => "DayTime", + IntervalUnit::MonthDayNano => "MonthDayNano", + } + } +} +#[derive(Clone, Copy, Debug, PartialEq, Eq, Hash, PartialOrd, Ord, ::prost::Enumeration)] +#[repr(i32)] +pub enum UnionMode { + Sparse = 0, + Dense = 1, +} +impl UnionMode { + /// String value of the enum field names used in the ProtoBuf definition. + /// + /// The values are not transformed in any way and thus are considered stable + /// (if the ProtoBuf definition does not change) and safe for programmatic use. + pub fn as_str_name(&self) -> &'static str { + match self { + UnionMode::Sparse => "sparse", + UnionMode::Dense => "dense", + } + } +} +/// Contains all valid datafusion scalar type except for +/// List +#[derive(Clone, Copy, Debug, PartialEq, Eq, Hash, PartialOrd, Ord, ::prost::Enumeration)] +#[repr(i32)] +pub enum PrimitiveScalarType { + /// arrow::Type::BOOL + Bool = 0, + /// arrow::Type::UINT8 + Uint8 = 1, + /// arrow::Type::INT8 + Int8 = 2, + /// represents arrow::Type fields in src/arrow/type.h + Uint16 = 3, + Int16 = 4, + Uint32 = 5, + Int32 = 6, + Uint64 = 7, + Int64 = 8, + Float32 = 9, + Float64 = 10, + Utf8 = 11, + LargeUtf8 = 12, + Date32 = 13, + TimestampMicrosecond = 14, + TimestampNanosecond = 15, + Null = 16, + Decimal128 = 17, + Date64 = 20, + TimestampSecond = 21, + TimestampMillisecond = 22, + IntervalYearmonth = 23, + IntervalDaytime = 24, + IntervalMonthdaynano = 31, + Binary = 25, + LargeBinary = 26, + Time32Second = 27, + Time32Millisecond = 28, + Time64Microsecond = 29, + Time64Nanosecond = 30, +} + +impl PrimitiveScalarType { + /// String value of the enum field names used in the ProtoBuf definition. + /// + /// The values are not transformed in any way and thus are considered stable + /// (if the ProtoBuf definition does not change) and safe for programmatic use. + pub fn as_str_name(&self) -> &'static str { + match self { + PrimitiveScalarType::Bool => "BOOL", + PrimitiveScalarType::Uint8 => "UINT8", + PrimitiveScalarType::Int8 => "INT8", + PrimitiveScalarType::Uint16 => "UINT16", + PrimitiveScalarType::Int16 => "INT16", + PrimitiveScalarType::Uint32 => "UINT32", + PrimitiveScalarType::Int32 => "INT32", + PrimitiveScalarType::Uint64 => "UINT64", + PrimitiveScalarType::Int64 => "INT64", + PrimitiveScalarType::Float32 => "FLOAT32", + PrimitiveScalarType::Float64 => "FLOAT64", + PrimitiveScalarType::Utf8 => "UTF8", + PrimitiveScalarType::LargeUtf8 => "LARGE_UTF8", + PrimitiveScalarType::Date32 => "DATE32", + PrimitiveScalarType::TimestampMicrosecond => "TIMESTAMP_MICROSECOND", + PrimitiveScalarType::TimestampNanosecond => "TIMESTAMP_NANOSECOND", + PrimitiveScalarType::Null => "NULL", + PrimitiveScalarType::Decimal128 => "DECIMAL128", + PrimitiveScalarType::Date64 => "DATE64", + PrimitiveScalarType::TimestampSecond => "TIMESTAMP_SECOND", + PrimitiveScalarType::TimestampMillisecond => "TIMESTAMP_MILLISECOND", + PrimitiveScalarType::IntervalYearmonth => "INTERVAL_YEARMONTH", + PrimitiveScalarType::IntervalDaytime => "INTERVAL_DAYTIME", + PrimitiveScalarType::IntervalMonthdaynano => "INTERVAL_MONTHDAYNANO", + PrimitiveScalarType::Binary => "BINARY", + PrimitiveScalarType::LargeBinary => "LARGE_BINARY", + PrimitiveScalarType::Time32Second => "TIME32_SECOND", + PrimitiveScalarType::Time32Millisecond => "TIME32_MILLISECOND", + PrimitiveScalarType::Time64Microsecond => "TIME64MICROSECOND", + PrimitiveScalarType::Time64Nanosecond => "TIME64NANOSECOND", + } + } +} diff --git a/datafusion/proto/src/generated/pbjson.rs b/datafusion/proto/src/generated/pbjson.rs index fe67b590ac66e..13057080c8bed 100644 --- a/datafusion/proto/src/generated/pbjson.rs +++ b/datafusion/proto/src/generated/pbjson.rs @@ -10067,6 +10067,224 @@ impl<'de> serde::Deserialize<'de> for ScalarListValue { deserializer.deserialize_struct("datafusion.ScalarListValue", FIELDS, GeneratedVisitor) } } +impl serde::Serialize for ScalarTime32Value { + #[allow(deprecated)] + fn serialize(&self, serializer: S) -> std::result::Result + where + S: serde::Serializer, + { + use serde::ser::SerializeStruct; + let mut len = 0; + if self.value.is_some() { + len += 1; + } + let mut struct_ser = serializer.serialize_struct("datafusion.ScalarTime32Value", len)?; + if let Some(v) = self.value.as_ref() { + match v { + scalar_time32_value::Value::Time32SecondValue(v) => { + struct_ser.serialize_field("time32SecondValue", v)?; + } + scalar_time32_value::Value::Time32MillisecondValue(v) => { + struct_ser.serialize_field("time32MillisecondValue", v)?; + } + } + } + struct_ser.end() + } +} +impl<'de> serde::Deserialize<'de> for ScalarTime32Value { + #[allow(deprecated)] + fn deserialize(deserializer: D) -> std::result::Result + where + D: serde::Deserializer<'de>, + { + const FIELDS: &[&str] = &[ + "time32_second_value", + "time32SecondValue", + "time32_millisecond_value", + "time32MillisecondValue", + ]; + + #[allow(clippy::enum_variant_names)] + enum GeneratedField { + Time32SecondValue, + Time32MillisecondValue, + } + impl<'de> serde::Deserialize<'de> for GeneratedField { + fn deserialize(deserializer: D) -> std::result::Result + where + D: serde::Deserializer<'de>, + { + struct GeneratedVisitor; + + impl<'de> serde::de::Visitor<'de> for GeneratedVisitor { + type Value = GeneratedField; + + fn expecting(&self, formatter: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + write!(formatter, "expected one of: {:?}", &FIELDS) + } + + #[allow(unused_variables)] + fn visit_str(self, value: &str) -> std::result::Result + where + E: serde::de::Error, + { + match value { + "time32SecondValue" | "time32_second_value" => Ok(GeneratedField::Time32SecondValue), + "time32MillisecondValue" | "time32_millisecond_value" => Ok(GeneratedField::Time32MillisecondValue), + _ => Err(serde::de::Error::unknown_field(value, FIELDS)), + } + } + } + deserializer.deserialize_identifier(GeneratedVisitor) + } + } + struct GeneratedVisitor; + impl<'de> serde::de::Visitor<'de> for GeneratedVisitor { + type Value = ScalarTime32Value; + + fn expecting(&self, formatter: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + formatter.write_str("struct datafusion.ScalarTime32Value") + } + + fn visit_map(self, mut map: V) -> std::result::Result + where + V: serde::de::MapAccess<'de>, + { + let mut value__ = None; + while let Some(k) = map.next_key()? { + match k { + GeneratedField::Time32SecondValue => { + if value__.is_some() { + return Err(serde::de::Error::duplicate_field("time32SecondValue")); + } + value__ = map.next_value::<::std::option::Option<::pbjson::private::NumberDeserialize<_>>>()?.map(|x| scalar_time32_value::Value::Time32SecondValue(x.0)); + } + GeneratedField::Time32MillisecondValue => { + if value__.is_some() { + return Err(serde::de::Error::duplicate_field("time32MillisecondValue")); + } + value__ = map.next_value::<::std::option::Option<::pbjson::private::NumberDeserialize<_>>>()?.map(|x| scalar_time32_value::Value::Time32MillisecondValue(x.0)); + } + } + } + Ok(ScalarTime32Value { + value: value__, + }) + } + } + deserializer.deserialize_struct("datafusion.ScalarTime32Value", FIELDS, GeneratedVisitor) + } +} +impl serde::Serialize for ScalarTime64Value { + #[allow(deprecated)] + fn serialize(&self, serializer: S) -> std::result::Result + where + S: serde::Serializer, + { + use serde::ser::SerializeStruct; + let mut len = 0; + if self.value.is_some() { + len += 1; + } + let mut struct_ser = serializer.serialize_struct("datafusion.ScalarTime64Value", len)?; + if let Some(v) = self.value.as_ref() { + match v { + scalar_time64_value::Value::Time64MicrosecondValue(v) => { + struct_ser.serialize_field("time64MicrosecondValue", ToString::to_string(&v).as_str())?; + } + scalar_time64_value::Value::Time64NanosecondValue(v) => { + struct_ser.serialize_field("time64NanosecondValue", ToString::to_string(&v).as_str())?; + } + } + } + struct_ser.end() + } +} +impl<'de> serde::Deserialize<'de> for ScalarTime64Value { + #[allow(deprecated)] + fn deserialize(deserializer: D) -> std::result::Result + where + D: serde::Deserializer<'de>, + { + const FIELDS: &[&str] = &[ + "time64_microsecond_value", + "time64MicrosecondValue", + "time64_nanosecond_value", + "time64NanosecondValue", + ]; + + #[allow(clippy::enum_variant_names)] + enum GeneratedField { + Time64MicrosecondValue, + Time64NanosecondValue, + } + impl<'de> serde::Deserialize<'de> for GeneratedField { + fn deserialize(deserializer: D) -> std::result::Result + where + D: serde::Deserializer<'de>, + { + struct GeneratedVisitor; + + impl<'de> serde::de::Visitor<'de> for GeneratedVisitor { + type Value = GeneratedField; + + fn expecting(&self, formatter: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + write!(formatter, "expected one of: {:?}", &FIELDS) + } + + #[allow(unused_variables)] + fn visit_str(self, value: &str) -> std::result::Result + where + E: serde::de::Error, + { + match value { + "time64MicrosecondValue" | "time64_microsecond_value" => Ok(GeneratedField::Time64MicrosecondValue), + "time64NanosecondValue" | "time64_nanosecond_value" => Ok(GeneratedField::Time64NanosecondValue), + _ => Err(serde::de::Error::unknown_field(value, FIELDS)), + } + } + } + deserializer.deserialize_identifier(GeneratedVisitor) + } + } + struct GeneratedVisitor; + impl<'de> serde::de::Visitor<'de> for GeneratedVisitor { + type Value = ScalarTime64Value; + + fn expecting(&self, formatter: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + formatter.write_str("struct datafusion.ScalarTime64Value") + } + + fn visit_map(self, mut map: V) -> std::result::Result + where + V: serde::de::MapAccess<'de>, + { + let mut value__ = None; + while let Some(k) = map.next_key()? { + match k { + GeneratedField::Time64MicrosecondValue => { + if value__.is_some() { + return Err(serde::de::Error::duplicate_field("time64MicrosecondValue")); + } + value__ = map.next_value::<::std::option::Option<::pbjson::private::NumberDeserialize<_>>>()?.map(|x| scalar_time64_value::Value::Time64MicrosecondValue(x.0)); + } + GeneratedField::Time64NanosecondValue => { + if value__.is_some() { + return Err(serde::de::Error::duplicate_field("time64NanosecondValue")); + } + value__ = map.next_value::<::std::option::Option<::pbjson::private::NumberDeserialize<_>>>()?.map(|x| scalar_time64_value::Value::Time64NanosecondValue(x.0)); + } + } + } + Ok(ScalarTime64Value { + value: value__, + }) + } + } + deserializer.deserialize_struct("datafusion.ScalarTime64Value", FIELDS, GeneratedVisitor) + } +} impl serde::Serialize for ScalarTimestampValue { #[allow(deprecated)] fn serialize(&self, serializer: S) -> std::result::Result @@ -10387,6 +10605,9 @@ impl serde::Serialize for ScalarValue { scalar_value::Value::Date32Value(v) => { struct_ser.serialize_field("date32Value", v)?; } + scalar_value::Value::Time32Value(v) => { + struct_ser.serialize_field("time32Value", v)?; + } scalar_value::Value::ListValue(v) => { struct_ser.serialize_field("listValue", v)?; } @@ -10415,7 +10636,7 @@ impl serde::Serialize for ScalarValue { struct_ser.serialize_field("largeBinaryValue", pbjson::private::base64::encode(&v).as_str())?; } scalar_value::Value::Time64Value(v) => { - struct_ser.serialize_field("time64Value", ToString::to_string(&v).as_str())?; + struct_ser.serialize_field("time64Value", v)?; } scalar_value::Value::IntervalMonthDayNano(v) => { struct_ser.serialize_field("intervalMonthDayNano", v)?; @@ -10468,6 +10689,8 @@ impl<'de> serde::Deserialize<'de> for ScalarValue { "float64Value", "date_32_value", "date32Value", + "time32_value", + "time32Value", "list_value", "listValue", "decimal128_value", @@ -10513,6 +10736,7 @@ impl<'de> serde::Deserialize<'de> for ScalarValue { Float32Value, Float64Value, Date32Value, + Time32Value, ListValue, Decimal128Value, Date64Value, @@ -10562,6 +10786,7 @@ impl<'de> serde::Deserialize<'de> for ScalarValue { "float32Value" | "float32_value" => Ok(GeneratedField::Float32Value), "float64Value" | "float64_value" => Ok(GeneratedField::Float64Value), "date32Value" | "date_32_value" => Ok(GeneratedField::Date32Value), + "time32Value" | "time32_value" => Ok(GeneratedField::Time32Value), "listValue" | "list_value" => Ok(GeneratedField::ListValue), "decimal128Value" | "decimal128_value" => Ok(GeneratedField::Decimal128Value), "date64Value" | "date_64_value" => Ok(GeneratedField::Date64Value), @@ -10688,6 +10913,13 @@ impl<'de> serde::Deserialize<'de> for ScalarValue { } value__ = map.next_value::<::std::option::Option<::pbjson::private::NumberDeserialize<_>>>()?.map(|x| scalar_value::Value::Date32Value(x.0)); } + GeneratedField::Time32Value => { + if value__.is_some() { + return Err(serde::de::Error::duplicate_field("time32Value")); + } + value__ = map.next_value::<::std::option::Option<_>>()?.map(scalar_value::Value::Time32Value) +; + } GeneratedField::ListValue => { if value__.is_some() { return Err(serde::de::Error::duplicate_field("listValue")); @@ -10750,7 +10982,8 @@ impl<'de> serde::Deserialize<'de> for ScalarValue { if value__.is_some() { return Err(serde::de::Error::duplicate_field("time64Value")); } - value__ = map.next_value::<::std::option::Option<::pbjson::private::NumberDeserialize<_>>>()?.map(|x| scalar_value::Value::Time64Value(x.0)); + value__ = map.next_value::<::std::option::Option<_>>()?.map(scalar_value::Value::Time64Value) +; } GeneratedField::IntervalMonthDayNano => { if value__.is_some() { diff --git a/datafusion/proto/src/generated/prost.rs b/datafusion/proto/src/generated/prost.rs index 8962a7a8acd60..f2af6ecd624b8 100644 --- a/datafusion/proto/src/generated/prost.rs +++ b/datafusion/proto/src/generated/prost.rs @@ -840,6 +840,36 @@ pub struct ScalarListValue { pub values: ::prost::alloc::vec::Vec, } #[derive(Clone, PartialEq, ::prost::Message)] +pub struct ScalarTime32Value { + #[prost(oneof="scalar_time32_value::Value", tags="1, 2")] + pub value: ::core::option::Option, +} +/// Nested message and enum types in `ScalarTime32Value`. +pub mod scalar_time32_value { + #[derive(Clone, PartialEq, ::prost::Oneof)] + pub enum Value { + #[prost(int32, tag="1")] + Time32SecondValue(i32), + #[prost(int32, tag="2")] + Time32MillisecondValue(i32), + } +} +#[derive(Clone, PartialEq, ::prost::Message)] +pub struct ScalarTime64Value { + #[prost(oneof="scalar_time64_value::Value", tags="1, 2")] + pub value: ::core::option::Option, +} +/// Nested message and enum types in `ScalarTime64Value`. +pub mod scalar_time64_value { + #[derive(Clone, PartialEq, ::prost::Oneof)] + pub enum Value { + #[prost(int64, tag="1")] + Time64MicrosecondValue(i64), + #[prost(int64, tag="2")] + Time64NanosecondValue(i64), + } +} +#[derive(Clone, PartialEq, ::prost::Message)] pub struct ScalarTimestampValue { #[prost(string, tag="5")] pub timezone: ::prost::alloc::string::String, @@ -895,7 +925,7 @@ pub struct ScalarFixedSizeBinary { } #[derive(Clone, PartialEq, ::prost::Message)] pub struct ScalarValue { - #[prost(oneof="scalar_value::Value", tags="33, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 17, 20, 21, 24, 25, 26, 27, 28, 29, 30, 31, 32, 34")] + #[prost(oneof="scalar_value::Value", tags="33, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 17, 20, 21, 24, 25, 26, 27, 28, 29, 30, 31, 32, 34")] pub value: ::core::option::Option, } /// Nested message and enum types in `ScalarValue`. @@ -935,6 +965,8 @@ pub mod scalar_value { /// Literal Date32 value always has a unit of day #[prost(int32, tag="14")] Date32Value(i32), + #[prost(message, tag="15")] + Time32Value(super::ScalarTime32Value), /// WAS: ScalarType null_list_value = 18; #[prost(message, tag="17")] ListValue(super::ScalarListValue), @@ -954,8 +986,8 @@ pub mod scalar_value { BinaryValue(::prost::alloc::vec::Vec), #[prost(bytes, tag="29")] LargeBinaryValue(::prost::alloc::vec::Vec), - #[prost(int64, tag="30")] - Time64Value(i64), + #[prost(message, tag="30")] + Time64Value(super::ScalarTime64Value), #[prost(message, tag="31")] IntervalMonthDayNano(super::IntervalMonthDayNanoValue), #[prost(message, tag="32")] diff --git a/datafusion/proto/src/lib.rs b/datafusion/proto/src/lib.rs index 99a2365b49b69..bb7f131d8ca71 100644 --- a/datafusion/proto/src/lib.rs +++ b/datafusion/proto/src/lib.rs @@ -606,9 +606,21 @@ mod roundtrip_tests { ScalarValue::Date32(Some(0)), ScalarValue::Date32(Some(i32::MAX)), ScalarValue::Date32(None), - ScalarValue::Time64(Some(0)), - ScalarValue::Time64(Some(i64::MAX)), - ScalarValue::Time64(None), + ScalarValue::Date64(Some(0)), + ScalarValue::Date64(Some(i64::MAX)), + ScalarValue::Date64(None), + ScalarValue::Time32Second(Some(0)), + ScalarValue::Time32Second(Some(i32::MAX)), + ScalarValue::Time32Second(None), + ScalarValue::Time32Millisecond(Some(0)), + ScalarValue::Time32Millisecond(Some(i32::MAX)), + ScalarValue::Time32Millisecond(None), + ScalarValue::Time64Microsecond(Some(0)), + ScalarValue::Time64Microsecond(Some(i64::MAX)), + ScalarValue::Time64Microsecond(None), + ScalarValue::Time64Nanosecond(Some(0)), + ScalarValue::Time64Nanosecond(Some(i64::MAX)), + ScalarValue::Time64Nanosecond(None), ScalarValue::TimestampNanosecond(Some(0), None), ScalarValue::TimestampNanosecond(Some(i64::MAX), None), ScalarValue::TimestampNanosecond(Some(0), Some("UTC".to_string())), diff --git a/datafusion/proto/src/to_proto.rs b/datafusion/proto/src/to_proto.rs index 95224bf8e9f2a..d1f5834ed43a7 100644 --- a/datafusion/proto/src/to_proto.rs +++ b/datafusion/proto/src/to_proto.rs @@ -1075,8 +1075,48 @@ impl TryFrom<&ScalarValue> for protobuf::ScalarValue { }) } - datafusion::scalar::ScalarValue::Time64(v) => { - create_proto_scalar(v, &data_type, |v| Value::Time64Value(*v)) + + + + + datafusion::scalar::ScalarValue::Time32Second(v) => { + create_proto_scalar(v, &data_type, |v| { + Value::Time32Value(protobuf::ScalarTime32Value { + value: Some( + protobuf::scalar_time32_value::Value::Time32SecondValue(*v), + ), + }) + }) + } + + datafusion::scalar::ScalarValue::Time32Millisecond(v) => { + create_proto_scalar(v, &data_type, |v| { + Value::Time32Value(protobuf::ScalarTime32Value { + value: Some( + protobuf::scalar_time32_value::Value::Time32MillisecondValue(*v), + ), + }) + }) + } + + datafusion::scalar::ScalarValue::Time64Microsecond(v) => { + create_proto_scalar(v, &data_type, |v| { + Value::Time64Value(protobuf::ScalarTime64Value { + value: Some( + protobuf::scalar_time64_value::Value::Time64MicrosecondValue(*v), + ), + }) + }) + } + + datafusion::scalar::ScalarValue::Time64Nanosecond(v) => { + create_proto_scalar(v, &data_type, |v| { + Value::Time64Value(protobuf::ScalarTime64Value { + value: Some( + protobuf::scalar_time64_value::Value::Time64NanosecondValue(*v), + ), + }) + }) } datafusion::scalar::ScalarValue::IntervalMonthDayNano(v) => { From e7b1c6a7f2731aac47524ad83ad17ea8ef2b105e Mon Sep 17 00:00:00 2001 From: andre-cc-natzka Date: Wed, 9 Nov 2022 02:16:18 +0100 Subject: [PATCH 02/13] Implement Time32 and Time64 in hash_join and hash_util --- .../core/src/physical_plan/hash_utils.rs | 830 ++++++++++++++++++ .../core/src/physical_plan/joins/hash_join.rs | 7 +- 2 files changed, 834 insertions(+), 3 deletions(-) create mode 100644 datafusion/core/src/physical_plan/hash_utils.rs diff --git a/datafusion/core/src/physical_plan/hash_utils.rs b/datafusion/core/src/physical_plan/hash_utils.rs new file mode 100644 index 0000000000000..7de8643237470 --- /dev/null +++ b/datafusion/core/src/physical_plan/hash_utils.rs @@ -0,0 +1,830 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +//! Functionality used both on logical and physical plans + +use crate::error::{DataFusionError, Result}; +use ahash::RandomState; +use arrow::array::{ + Array, ArrayRef, BooleanArray, Date32Array, Date64Array, Decimal128Array, + DictionaryArray, FixedSizeBinaryArray, Float32Array, Float64Array, Int16Array, + Int32Array, Int64Array, Int8Array, LargeStringArray, StringArray, + TimestampMicrosecondArray, TimestampMillisecondArray, TimestampNanosecondArray, + TimestampSecondArray, UInt16Array, UInt32Array, UInt64Array, UInt8Array, +}; +use arrow::datatypes::{ + ArrowDictionaryKeyType, ArrowNativeType, DataType, Int16Type, Int32Type, Int64Type, + Int8Type, TimeUnit, UInt16Type, UInt32Type, UInt64Type, UInt8Type, +}; +use std::sync::Arc; + +// Combines two hashes into one hash +#[inline] +fn combine_hashes(l: u64, r: u64) -> u64 { + let hash = (17 * 37u64).wrapping_add(l); + hash.wrapping_mul(37).wrapping_add(r) +} + +fn hash_null(random_state: &RandomState, hashes_buffer: &'_ mut [u64], mul_col: bool) { + if mul_col { + hashes_buffer.iter_mut().for_each(|hash| { + // stable hash for null value + *hash = combine_hashes(random_state.hash_one(&1), *hash); + }) + } else { + hashes_buffer.iter_mut().for_each(|hash| { + *hash = random_state.hash_one(&1); + }) + } +} + +fn hash_decimal128<'a>( + array: &ArrayRef, + random_state: &RandomState, + hashes_buffer: &'a mut [u64], + mul_col: bool, +) { + let array = array.as_any().downcast_ref::().unwrap(); + if array.null_count() == 0 { + if mul_col { + for (i, hash) in hashes_buffer.iter_mut().enumerate() { + *hash = combine_hashes( + random_state.hash_one(&array.value(i).as_i128()), + *hash, + ); + } + } else { + for (i, hash) in hashes_buffer.iter_mut().enumerate() { + *hash = random_state.hash_one(&array.value(i).as_i128()); + } + } + } else if mul_col { + for (i, hash) in hashes_buffer.iter_mut().enumerate() { + if !array.is_null(i) { + *hash = combine_hashes( + random_state.hash_one(&array.value(i).as_i128()), + *hash, + ); + } + } + } else { + for (i, hash) in hashes_buffer.iter_mut().enumerate() { + if !array.is_null(i) { + *hash = random_state.hash_one(&array.value(i).as_i128()); + } + } + } +} + +macro_rules! hash_array { + ($array_type:ident, $column: ident, $ty: ty, $hashes: ident, $random_state: ident, $multi_col: ident) => { + let array = $column.as_any().downcast_ref::<$array_type>().unwrap(); + if array.null_count() == 0 { + if $multi_col { + for (i, hash) in $hashes.iter_mut().enumerate() { + *hash = + combine_hashes($random_state.hash_one(&array.value(i)), *hash); + } + } else { + for (i, hash) in $hashes.iter_mut().enumerate() { + *hash = $random_state.hash_one(&array.value(i)); + } + } + } else { + if $multi_col { + for (i, hash) in $hashes.iter_mut().enumerate() { + if !array.is_null(i) { + *hash = combine_hashes( + $random_state.hash_one(&array.value(i)), + *hash, + ); + } + } + } else { + for (i, hash) in $hashes.iter_mut().enumerate() { + if !array.is_null(i) { + *hash = $random_state.hash_one(&array.value(i)); + } + } + } + } + }; +} + +macro_rules! hash_array_primitive { + ($array_type:ident, $column: ident, $ty: ident, $hashes: ident, $random_state: ident, $multi_col: ident) => { + let array = $column.as_any().downcast_ref::<$array_type>().unwrap(); + let values = array.values(); + + if array.null_count() == 0 { + if $multi_col { + for (hash, value) in $hashes.iter_mut().zip(values.iter()) { + *hash = combine_hashes($random_state.hash_one(value), *hash); + } + } else { + for (hash, value) in $hashes.iter_mut().zip(values.iter()) { + *hash = $random_state.hash_one(value) + } + } + } else { + if $multi_col { + for (i, (hash, value)) in + $hashes.iter_mut().zip(values.iter()).enumerate() + { + if !array.is_null(i) { + *hash = combine_hashes($random_state.hash_one(value), *hash); + } + } + } else { + for (i, (hash, value)) in + $hashes.iter_mut().zip(values.iter()).enumerate() + { + if !array.is_null(i) { + *hash = $random_state.hash_one(value); + } + } + } + } + }; +} + +macro_rules! hash_array_float { + ($array_type:ident, $column: ident, $ty: ident, $hashes: ident, $random_state: ident, $multi_col: ident) => { + let array = $column.as_any().downcast_ref::<$array_type>().unwrap(); + let values = array.values(); + + if array.null_count() == 0 { + if $multi_col { + for (hash, value) in $hashes.iter_mut().zip(values.iter()) { + *hash = combine_hashes( + $random_state.hash_one(&$ty::from_le_bytes(value.to_le_bytes())), + *hash, + ); + } + } else { + for (hash, value) in $hashes.iter_mut().zip(values.iter()) { + *hash = + $random_state.hash_one(&$ty::from_le_bytes(value.to_le_bytes())) + } + } + } else { + if $multi_col { + for (i, (hash, value)) in + $hashes.iter_mut().zip(values.iter()).enumerate() + { + if !array.is_null(i) { + *hash = combine_hashes( + $random_state + .hash_one(&$ty::from_le_bytes(value.to_le_bytes())), + *hash, + ); + } + } + } else { + for (i, (hash, value)) in + $hashes.iter_mut().zip(values.iter()).enumerate() + { + if !array.is_null(i) { + *hash = $random_state + .hash_one(&$ty::from_le_bytes(value.to_le_bytes())); + } + } + } + } + }; +} + +/// Hash the values in a dictionary array +fn create_hashes_dictionary( + array: &ArrayRef, + random_state: &RandomState, + hashes_buffer: &mut [u64], + multi_col: bool, +) -> Result<()> { + let dict_array = array.as_any().downcast_ref::>().unwrap(); + + // Hash each dictionary value once, and then use that computed + // hash for each key value to avoid a potentially expensive + // redundant hashing for large dictionary elements (e.g. strings) + let dict_values = Arc::clone(dict_array.values()); + let mut dict_hashes = vec![0; dict_values.len()]; + create_hashes(&[dict_values], random_state, &mut dict_hashes)?; + + // combine hash for each index in values + if multi_col { + for (hash, key) in hashes_buffer.iter_mut().zip(dict_array.keys().iter()) { + if let Some(key) = key { + let idx = key + .to_usize() + .ok_or_else(|| { + DataFusionError::Internal(format!( + "Can not convert key value {:?} to usize in dictionary of type {:?}", + key, dict_array.data_type() + )) + })?; + *hash = combine_hashes(dict_hashes[idx], *hash) + } // no update for Null, consistent with other hashes + } + } else { + for (hash, key) in hashes_buffer.iter_mut().zip(dict_array.keys().iter()) { + if let Some(key) = key { + let idx = key + .to_usize() + .ok_or_else(|| { + DataFusionError::Internal(format!( + "Can not convert key value {:?} to usize in dictionary of type {:?}", + key, dict_array.data_type() + )) + })?; + *hash = dict_hashes[idx] + } // no update for Null, consistent with other hashes + } + } + Ok(()) +} + +/// Test version of `create_hashes` that produces the same value for +/// all hashes (to test collisions) +/// +/// See comments on `hashes_buffer` for more details +#[cfg(feature = "force_hash_collisions")] +pub fn create_hashes<'a>( + _arrays: &[ArrayRef], + _random_state: &RandomState, + hashes_buffer: &'a mut Vec, +) -> Result<&'a mut Vec> { + for hash in hashes_buffer.iter_mut() { + *hash = 0 + } + Ok(hashes_buffer) +} + +/// Test version of `create_row_hashes` that produces the same value for +/// all hashes (to test collisions) +/// +/// See comments on `hashes_buffer` for more details +#[cfg(feature = "force_hash_collisions")] +pub fn create_row_hashes<'a>( + _rows: &[Vec], + _random_state: &RandomState, + hashes_buffer: &'a mut Vec, +) -> Result<&'a mut Vec> { + for hash in hashes_buffer.iter_mut() { + *hash = 0 + } + Ok(hashes_buffer) +} + +/// Creates hash values for every row, based on their raw bytes. +#[cfg(not(feature = "force_hash_collisions"))] +pub fn create_row_hashes<'a>( + rows: &[Vec], + random_state: &RandomState, + hashes_buffer: &'a mut Vec, +) -> Result<&'a mut Vec> { + for hash in hashes_buffer.iter_mut() { + *hash = 0 + } + for (i, hash) in hashes_buffer.iter_mut().enumerate() { + *hash = random_state.hash_one(&rows[i]); + } + Ok(hashes_buffer) +} + +/// Creates hash values for every row, based on the values in the +/// columns. +/// +/// The number of rows to hash is determined by `hashes_buffer.len()`. +/// `hashes_buffer` should be pre-sized appropriately +#[cfg(not(feature = "force_hash_collisions"))] +pub fn create_hashes<'a>( + arrays: &[ArrayRef], + random_state: &RandomState, + hashes_buffer: &'a mut Vec, +) -> Result<&'a mut Vec> { + // combine hashes with `combine_hashes` if we have more than 1 column + + use arrow::array::{ + BinaryArray, LargeBinaryArray, Time32MillisecondArray, Time32SecondArray, + Time64MicrosecondArray, Time64NanosecondArray, + }; + let multi_col = arrays.len() > 1; + + for col in arrays { + match col.data_type() { + DataType::Null => { + hash_null(random_state, hashes_buffer, multi_col); + } + DataType::Decimal128(_, _) => { + hash_decimal128(col, random_state, hashes_buffer, multi_col); + } + DataType::UInt8 => { + hash_array_primitive!( + UInt8Array, + col, + u8, + hashes_buffer, + random_state, + multi_col + ); + } + DataType::UInt16 => { + hash_array_primitive!( + UInt16Array, + col, + u16, + hashes_buffer, + random_state, + multi_col + ); + } + DataType::UInt32 => { + hash_array_primitive!( + UInt32Array, + col, + u32, + hashes_buffer, + random_state, + multi_col + ); + } + DataType::UInt64 => { + hash_array_primitive!( + UInt64Array, + col, + u64, + hashes_buffer, + random_state, + multi_col + ); + } + DataType::Int8 => { + hash_array_primitive!( + Int8Array, + col, + i8, + hashes_buffer, + random_state, + multi_col + ); + } + DataType::Int16 => { + hash_array_primitive!( + Int16Array, + col, + i16, + hashes_buffer, + random_state, + multi_col + ); + } + DataType::Int32 => { + hash_array_primitive!( + Int32Array, + col, + i32, + hashes_buffer, + random_state, + multi_col + ); + } + DataType::Int64 => { + hash_array_primitive!( + Int64Array, + col, + i64, + hashes_buffer, + random_state, + multi_col + ); + } + DataType::Float32 => { + hash_array_float!( + Float32Array, + col, + u32, + hashes_buffer, + random_state, + multi_col + ); + } + DataType::Float64 => { + hash_array_float!( + Float64Array, + col, + u64, + hashes_buffer, + random_state, + multi_col + ); + } + DataType::Timestamp(TimeUnit::Second, None) => { + hash_array_primitive!( + TimestampSecondArray, + col, + i64, + hashes_buffer, + random_state, + multi_col + ); + } + DataType::Timestamp(TimeUnit::Millisecond, None) => { + hash_array_primitive!( + TimestampMillisecondArray, + col, + i64, + hashes_buffer, + random_state, + multi_col + ); + } + DataType::Timestamp(TimeUnit::Microsecond, None) => { + hash_array_primitive!( + TimestampMicrosecondArray, + col, + i64, + hashes_buffer, + random_state, + multi_col + ); + } + DataType::Timestamp(TimeUnit::Nanosecond, _) => { + hash_array_primitive!( + TimestampNanosecondArray, + col, + i64, + hashes_buffer, + random_state, + multi_col + ); + } + DataType::Time32(TimeUnit::Second) => { + hash_array_primitive!( + Time32SecondArray, + col, + i32, + hashes_buffer, + random_state, + multi_col + ); + } + DataType::Time32(TimeUnit::Millisecond) => { + hash_array_primitive!( + Time32MillisecondArray, + col, + i32, + hashes_buffer, + random_state, + multi_col + ); + } + DataType::Time64(TimeUnit::Microsecond) => { + hash_array_primitive!( + Time64MicrosecondArray, + col, + i64, + hashes_buffer, + random_state, + multi_col + ); + } + DataType::Time64(TimeUnit::Nanosecond) => { + hash_array_primitive!( + Time64NanosecondArray, + col, + i64, + hashes_buffer, + random_state, + multi_col + ); + } + DataType::Date32 => { + hash_array_primitive!( + Date32Array, + col, + i32, + hashes_buffer, + random_state, + multi_col + ); + } + DataType::Date64 => { + hash_array_primitive!( + Date64Array, + col, + i64, + hashes_buffer, + random_state, + multi_col + ); + } + DataType::Boolean => { + hash_array!( + BooleanArray, + col, + u8, + hashes_buffer, + random_state, + multi_col + ); + } + DataType::Utf8 => { + hash_array!( + StringArray, + col, + str, + hashes_buffer, + random_state, + multi_col + ); + } + DataType::LargeUtf8 => { + hash_array!( + LargeStringArray, + col, + str, + hashes_buffer, + random_state, + multi_col + ); + } + DataType::Binary => { + hash_array!( + BinaryArray, + col, + &[u8], + hashes_buffer, + random_state, + multi_col + ); + } + DataType::FixedSizeBinary(_) => { + hash_array!( + FixedSizeBinaryArray, + col, + &[u8], + hashes_buffer, + random_state, + multi_col + ); + } + DataType::LargeBinary => { + hash_array!( + LargeBinaryArray, + col, + &[u8], + hashes_buffer, + random_state, + multi_col + ); + } + DataType::Dictionary(index_type, _) => match **index_type { + DataType::Int8 => { + create_hashes_dictionary::( + col, + random_state, + hashes_buffer, + multi_col, + )?; + } + DataType::Int16 => { + create_hashes_dictionary::( + col, + random_state, + hashes_buffer, + multi_col, + )?; + } + DataType::Int32 => { + create_hashes_dictionary::( + col, + random_state, + hashes_buffer, + multi_col, + )?; + } + DataType::Int64 => { + create_hashes_dictionary::( + col, + random_state, + hashes_buffer, + multi_col, + )?; + } + DataType::UInt8 => { + create_hashes_dictionary::( + col, + random_state, + hashes_buffer, + multi_col, + )?; + } + DataType::UInt16 => { + create_hashes_dictionary::( + col, + random_state, + hashes_buffer, + multi_col, + )?; + } + DataType::UInt32 => { + create_hashes_dictionary::( + col, + random_state, + hashes_buffer, + multi_col, + )?; + } + DataType::UInt64 => { + create_hashes_dictionary::( + col, + random_state, + hashes_buffer, + multi_col, + )?; + } + _ => { + return Err(DataFusionError::Internal(format!( + "Unsupported dictionary type in hasher hashing: {}", + col.data_type(), + ))) + } + }, + _ => { + // This is internal because we should have caught this before. + return Err(DataFusionError::Internal(format!( + "Unsupported data type in hasher: {}", + col.data_type() + ))); + } + } + } + Ok(hashes_buffer) +} + +#[cfg(test)] +mod tests { + use crate::from_slice::FromSlice; + use arrow::{ + array::{BinaryArray, DictionaryArray, FixedSizeBinaryArray}, + datatypes::Int8Type, + }; + use std::sync::Arc; + + use super::*; + + #[test] + fn create_hashes_for_decimal_array() -> Result<()> { + let array = vec![1, 2, 3, 4] + .into_iter() + .map(Some) + .collect::() + .with_precision_and_scale(20, 3) + .unwrap(); + let array_ref = Arc::new(array); + let random_state = RandomState::with_seeds(0, 0, 0, 0); + let hashes_buff = &mut vec![0; array_ref.len()]; + let hashes = create_hashes(&[array_ref], &random_state, hashes_buff)?; + assert_eq!(hashes.len(), 4); + Ok(()) + } + + #[test] + fn create_hashes_for_float_arrays() -> Result<()> { + let f32_arr = Arc::new(Float32Array::from_slice(&[0.12, 0.5, 1f32, 444.7])); + let f64_arr = Arc::new(Float64Array::from_slice(&[0.12, 0.5, 1f64, 444.7])); + + let random_state = RandomState::with_seeds(0, 0, 0, 0); + let hashes_buff = &mut vec![0; f32_arr.len()]; + let hashes = create_hashes(&[f32_arr], &random_state, hashes_buff)?; + assert_eq!(hashes.len(), 4,); + + let hashes = create_hashes(&[f64_arr], &random_state, hashes_buff)?; + assert_eq!(hashes.len(), 4,); + + Ok(()) + } + + #[test] + fn create_hashes_binary() -> Result<()> { + let byte_array = Arc::new(BinaryArray::from_vec(vec![ + &[4, 3, 2], + &[4, 3, 2], + &[1, 2, 3], + ])); + + let random_state = RandomState::with_seeds(0, 0, 0, 0); + let hashes_buff = &mut vec![0; byte_array.len()]; + let hashes = create_hashes(&[byte_array], &random_state, hashes_buff)?; + assert_eq!(hashes.len(), 3,); + + Ok(()) + } + + #[test] + fn create_hashes_fixed_size_binary() -> Result<()> { + let input_arg = vec![vec![1, 2], vec![5, 6], vec![5, 6]]; + let fixed_size_binary_array = + Arc::new(FixedSizeBinaryArray::try_from_iter(input_arg.into_iter()).unwrap()); + + let random_state = RandomState::with_seeds(0, 0, 0, 0); + let hashes_buff = &mut vec![0; fixed_size_binary_array.len()]; + let hashes = + create_hashes(&[fixed_size_binary_array], &random_state, hashes_buff)?; + assert_eq!(hashes.len(), 3,); + + Ok(()) + } + + #[test] + // Tests actual values of hashes, which are different if forcing collisions + #[cfg(not(feature = "force_hash_collisions"))] + fn create_hashes_for_dict_arrays() { + let strings = vec![Some("foo"), None, Some("bar"), Some("foo"), None]; + + let string_array = Arc::new(strings.iter().cloned().collect::()); + let dict_array = Arc::new( + strings + .iter() + .cloned() + .collect::>(), + ); + + let random_state = RandomState::with_seeds(0, 0, 0, 0); + + let mut string_hashes = vec![0; strings.len()]; + create_hashes(&[string_array], &random_state, &mut string_hashes).unwrap(); + + let mut dict_hashes = vec![0; strings.len()]; + create_hashes(&[dict_array], &random_state, &mut dict_hashes).unwrap(); + + // Null values result in a zero hash, + for (val, hash) in strings.iter().zip(string_hashes.iter()) { + match val { + Some(_) => assert_ne!(*hash, 0), + None => assert_eq!(*hash, 0), + } + } + + // same logical values should hash to the same hash value + assert_eq!(string_hashes, dict_hashes); + + // Same values should map to same hash values + assert_eq!(strings[1], strings[4]); + assert_eq!(dict_hashes[1], dict_hashes[4]); + assert_eq!(strings[0], strings[3]); + assert_eq!(dict_hashes[0], dict_hashes[3]); + + // different strings should map to different hash values + assert_ne!(strings[0], strings[2]); + assert_ne!(dict_hashes[0], dict_hashes[2]); + } + + #[test] + // Tests actual values of hashes, which are different if forcing collisions + #[cfg(not(feature = "force_hash_collisions"))] + fn create_multi_column_hash_for_dict_arrays() { + let strings1 = vec![Some("foo"), None, Some("bar")]; + let strings2 = vec![Some("blarg"), Some("blah"), None]; + + let string_array = Arc::new(strings1.iter().cloned().collect::()); + let dict_array = Arc::new( + strings2 + .iter() + .cloned() + .collect::>(), + ); + + let random_state = RandomState::with_seeds(0, 0, 0, 0); + + let mut one_col_hashes = vec![0; strings1.len()]; + create_hashes(&[dict_array.clone()], &random_state, &mut one_col_hashes).unwrap(); + + let mut two_col_hashes = vec![0; strings1.len()]; + create_hashes( + &[dict_array, string_array], + &random_state, + &mut two_col_hashes, + ) + .unwrap(); + + assert_eq!(one_col_hashes.len(), 3); + assert_eq!(two_col_hashes.len(), 3); + + assert_ne!(one_col_hashes, two_col_hashes); + } +} diff --git a/datafusion/core/src/physical_plan/joins/hash_join.rs b/datafusion/core/src/physical_plan/joins/hash_join.rs index f20219c72d7cf..4475f5a859322 100644 --- a/datafusion/core/src/physical_plan/joins/hash_join.rs +++ b/datafusion/core/src/physical_plan/joins/hash_join.rs @@ -24,9 +24,10 @@ use arrow::{ array::{ as_dictionary_array, as_string_array, ArrayData, ArrayRef, BooleanArray, Date32Array, Date64Array, Decimal128Array, DictionaryArray, LargeStringArray, - PrimitiveArray, TimestampMicrosecondArray, TimestampMillisecondArray, - TimestampSecondArray, UInt32BufferBuilder, UInt32Builder, UInt64BufferBuilder, - UInt64Builder, Time32SecondArray, Time32MillisecondArray, Time64MicrosecondArray, Time64NanosecondArray, + PrimitiveArray, Time32MillisecondArray, Time32SecondArray, + Time64MicrosecondArray, Time64NanosecondArray, TimestampMicrosecondArray, + TimestampMillisecondArray, TimestampSecondArray, UInt32BufferBuilder, + UInt32Builder, UInt64BufferBuilder, UInt64Builder, }, compute, datatypes::{ From fec5754ac4cd4eab96b6ead73d6c906c5025b053 Mon Sep 17 00:00:00 2001 From: andre-cc-natzka Date: Wed, 9 Nov 2022 12:59:52 +0100 Subject: [PATCH 03/13] Add review comments --- datafusion/common/src/scalar.rs | 2 +- datafusion/proto/src/to_proto.rs | 3 +++ 2 files changed, 4 insertions(+), 1 deletion(-) diff --git a/datafusion/common/src/scalar.rs b/datafusion/common/src/scalar.rs index c66c14bcab9ce..d066280ab2295 100644 --- a/datafusion/common/src/scalar.rs +++ b/datafusion/common/src/scalar.rs @@ -2396,7 +2396,7 @@ impl TryFrom for i32 { } } -// special implementation for i64 because of Date64, Time64 and Timestamp +// special implementation for i64 because of Date64, Time64, Time64 and Timestamp impl TryFrom for i64 { type Error = DataFusionError; diff --git a/datafusion/proto/src/to_proto.rs b/datafusion/proto/src/to_proto.rs index d1f5834ed43a7..da9dc1240733c 100644 --- a/datafusion/proto/src/to_proto.rs +++ b/datafusion/proto/src/to_proto.rs @@ -1079,6 +1079,9 @@ impl TryFrom<&ScalarValue> for protobuf::ScalarValue { + // Since the protos only support Time64 and always interpret it to nanosecond accuracy, + // all ScalarValues of types Time32 and Time64 are adapted into a Time64Value, taking + // into account the necessary conversion into nanoseconds datafusion::scalar::ScalarValue::Time32Second(v) => { create_proto_scalar(v, &data_type, |v| { Value::Time32Value(protobuf::ScalarTime32Value { From 46e5088381ffa6590920ed2d1f7896c02ae327ea Mon Sep 17 00:00:00 2001 From: andre-cc-natzka Date: Thu, 10 Nov 2022 11:09:16 +0100 Subject: [PATCH 04/13] Changes in proto to provide full support for Time32 and Time64 --- datafusion/common/src/scalar.rs | 2 +- datafusion/proto/proto/datafusion.proto | 25 ++- datafusion/proto/src/from_proto.rs | 252 ++++++++++++++++++++++++ datafusion/proto/src/to_proto.rs | 3 - 4 files changed, 276 insertions(+), 6 deletions(-) diff --git a/datafusion/common/src/scalar.rs b/datafusion/common/src/scalar.rs index d066280ab2295..c66c14bcab9ce 100644 --- a/datafusion/common/src/scalar.rs +++ b/datafusion/common/src/scalar.rs @@ -2396,7 +2396,7 @@ impl TryFrom for i32 { } } -// special implementation for i64 because of Date64, Time64, Time64 and Timestamp +// special implementation for i64 because of Date64, Time64 and Timestamp impl TryFrom for i64 { type Error = DataFusionError; diff --git a/datafusion/proto/proto/datafusion.proto b/datafusion/proto/proto/datafusion.proto index 1911c59dfd89b..e559b19a5f241 100644 --- a/datafusion/proto/proto/datafusion.proto +++ b/datafusion/proto/proto/datafusion.proto @@ -761,14 +761,28 @@ message ScalarTime64Value { message ScalarTimestampValue { oneof value { - int64 time_microsecond_value = 1; - int64 time_nanosecond_value = 2; + int64 time_microsecond_value = 1; + int64 time_nanosecond_value = 2; int64 time_second_value = 3; int64 time_millisecond_value = 4; }; string timezone = 5; } +message ScalarTime32Value { + oneof value { + int32 time32_second_value = 1; + int32 time32_millisecond_value = 2; + }; +} + +message ScalarTime64Value { + oneof value { + int64 time64_microsecond_value = 1; + int64 time64_nanosecond_value = 2; + }; +} + message ScalarDictionaryValue { ArrowType index_type = 1; ScalarValue value = 2; @@ -829,10 +843,17 @@ message ScalarValue{ ScalarDictionaryValue dictionary_value = 27; bytes binary_value = 28; bytes large_binary_value = 29; +<<<<<<< HEAD ScalarTime64Value time64_value = 30; IntervalMonthDayNanoValue interval_month_day_nano = 31; StructValue struct_value = 32; ScalarFixedSizeBinary fixed_size_binary_value = 34; +======= + ScalarTime32Value time32_value = 30; + ScalarTime64Value time64_value = 31; + IntervalMonthDayNanoValue interval_month_day_nano = 32; + StructValue struct_value = 33; +>>>>>>> Changes in proto to provide full support for Time32 and Time64 } } diff --git a/datafusion/proto/src/from_proto.rs b/datafusion/proto/src/from_proto.rs index 1de84ad94aa77..d6dd52dbd520c 100644 --- a/datafusion/proto/src/from_proto.rs +++ b/datafusion/proto/src/from_proto.rs @@ -202,6 +202,68 @@ impl From for WindowFrameUnits { } } +<<<<<<< HEAD +======= +impl From for DataType { + fn from(scalar: protobuf::PrimitiveScalarType) -> Self { + match scalar { + protobuf::PrimitiveScalarType::Bool => DataType::Boolean, + protobuf::PrimitiveScalarType::Uint8 => DataType::UInt8, + protobuf::PrimitiveScalarType::Int8 => DataType::Int8, + protobuf::PrimitiveScalarType::Uint16 => DataType::UInt16, + protobuf::PrimitiveScalarType::Int16 => DataType::Int16, + protobuf::PrimitiveScalarType::Uint32 => DataType::UInt32, + protobuf::PrimitiveScalarType::Int32 => DataType::Int32, + protobuf::PrimitiveScalarType::Uint64 => DataType::UInt64, + protobuf::PrimitiveScalarType::Int64 => DataType::Int64, + protobuf::PrimitiveScalarType::Float32 => DataType::Float32, + protobuf::PrimitiveScalarType::Float64 => DataType::Float64, + protobuf::PrimitiveScalarType::Utf8 => DataType::Utf8, + protobuf::PrimitiveScalarType::LargeUtf8 => DataType::LargeUtf8, + protobuf::PrimitiveScalarType::Binary => DataType::Binary, + protobuf::PrimitiveScalarType::LargeBinary => DataType::LargeBinary, + protobuf::PrimitiveScalarType::Date32 => DataType::Date32, + protobuf::PrimitiveScalarType::Time32Second => { + DataType::Time32(TimeUnit::Second) + } + protobuf::PrimitiveScalarType::Time32Millisecond => { + DataType::Time32(TimeUnit::Millisecond) + } + protobuf::PrimitiveScalarType::TimestampMicrosecond => { + DataType::Timestamp(TimeUnit::Microsecond, None) + } + protobuf::PrimitiveScalarType::TimestampNanosecond => { + DataType::Timestamp(TimeUnit::Nanosecond, None) + } + protobuf::PrimitiveScalarType::Null => DataType::Null, + protobuf::PrimitiveScalarType::Decimal128 => DataType::Decimal128(0, 0), + protobuf::PrimitiveScalarType::Date64 => DataType::Date64, + protobuf::PrimitiveScalarType::TimestampSecond => { + DataType::Timestamp(TimeUnit::Second, None) + } + protobuf::PrimitiveScalarType::TimestampMillisecond => { + DataType::Timestamp(TimeUnit::Millisecond, None) + } + protobuf::PrimitiveScalarType::IntervalYearmonth => { + DataType::Interval(IntervalUnit::YearMonth) + } + protobuf::PrimitiveScalarType::IntervalDaytime => { + DataType::Interval(IntervalUnit::DayTime) + } + protobuf::PrimitiveScalarType::IntervalMonthdaynano => { + DataType::Interval(IntervalUnit::MonthDayNano) + } + protobuf::PrimitiveScalarType::Time64Microsecond => { + DataType::Time64(TimeUnit::Microsecond) + } + protobuf::PrimitiveScalarType::Time64Nanosecond => { + DataType::Time64(TimeUnit::Nanosecond) + } + } + } +} + +>>>>>>> Changes in proto to provide full support for Time32 and Time64 impl TryFrom<&protobuf::ArrowType> for DataType { type Error = Error; @@ -1294,6 +1356,196 @@ fn vec_to_array(v: Vec) -> [T; N] { }) } +<<<<<<< HEAD +======= +//Does not typecheck lists +fn typechecked_scalar_value_conversion( + tested_type: &protobuf::scalar_value::Value, + required_type: protobuf::PrimitiveScalarType, +) -> Result { + use protobuf::{scalar_value::Value, PrimitiveScalarType}; + + Ok(match (tested_type, &required_type) { + (Value::BoolValue(v), PrimitiveScalarType::Bool) => { + ScalarValue::Boolean(Some(*v)) + } + (Value::Int8Value(v), PrimitiveScalarType::Int8) => { + ScalarValue::Int8(Some(*v as i8)) + } + (Value::Int16Value(v), PrimitiveScalarType::Int16) => { + ScalarValue::Int16(Some(*v as i16)) + } + (Value::Int32Value(v), PrimitiveScalarType::Int32) => { + ScalarValue::Int32(Some(*v)) + } + (Value::Int64Value(v), PrimitiveScalarType::Int64) => { + ScalarValue::Int64(Some(*v)) + } + (Value::Uint8Value(v), PrimitiveScalarType::Uint8) => { + ScalarValue::UInt8(Some(*v as u8)) + } + (Value::Uint16Value(v), PrimitiveScalarType::Uint16) => { + ScalarValue::UInt16(Some(*v as u16)) + } + (Value::Uint32Value(v), PrimitiveScalarType::Uint32) => { + ScalarValue::UInt32(Some(*v)) + } + (Value::Uint64Value(v), PrimitiveScalarType::Uint64) => { + ScalarValue::UInt64(Some(*v)) + } + (Value::Float32Value(v), PrimitiveScalarType::Float32) => { + ScalarValue::Float32(Some(*v)) + } + (Value::Float64Value(v), PrimitiveScalarType::Float64) => { + ScalarValue::Float64(Some(*v)) + } + (Value::Date32Value(v), PrimitiveScalarType::Date32) => { + ScalarValue::Date32(Some(*v)) + } + ( + Value::TimestampValue(protobuf::ScalarTimestampValue { + timezone, + value: + Some(protobuf::scalar_timestamp_value::Value::TimeMicrosecondValue(v)), + }), + PrimitiveScalarType::TimestampMicrosecond, + ) => ScalarValue::TimestampMicrosecond(Some(*v), unwrap_timezone(timezone)), + ( + Value::TimestampValue(protobuf::ScalarTimestampValue { + timezone, + value: + Some(protobuf::scalar_timestamp_value::Value::TimeNanosecondValue(v)), + }), + PrimitiveScalarType::TimestampNanosecond, + ) => ScalarValue::TimestampNanosecond(Some(*v), unwrap_timezone(timezone)), + ( + Value::TimestampValue(protobuf::ScalarTimestampValue { + timezone, + value: Some(protobuf::scalar_timestamp_value::Value::TimeSecondValue(v)), + }), + PrimitiveScalarType::TimestampSecond, + ) => ScalarValue::TimestampSecond(Some(*v), unwrap_timezone(timezone)), + ( + Value::TimestampValue(protobuf::ScalarTimestampValue { + timezone, + value: + Some(protobuf::scalar_timestamp_value::Value::TimeMillisecondValue(v)), + }), + PrimitiveScalarType::TimestampMillisecond, + ) => ScalarValue::TimestampMillisecond(Some(*v), unwrap_timezone(timezone)), + (Value::Utf8Value(v), PrimitiveScalarType::Utf8) => { + ScalarValue::Utf8(Some(v.to_owned())) + } + (Value::LargeUtf8Value(v), PrimitiveScalarType::LargeUtf8) => { + ScalarValue::LargeUtf8(Some(v.to_owned())) + } + + (Value::NullValue(i32_enum), required_scalar_type) => { + if *i32_enum == *required_scalar_type as i32 { + let pb_scalar_type = PrimitiveScalarType::try_from(i32_enum)?; + let scalar_value: ScalarValue = match pb_scalar_type { + PrimitiveScalarType::Bool => ScalarValue::Boolean(None), + PrimitiveScalarType::Uint8 => ScalarValue::UInt8(None), + PrimitiveScalarType::Int8 => ScalarValue::Int8(None), + PrimitiveScalarType::Uint16 => ScalarValue::UInt16(None), + PrimitiveScalarType::Int16 => ScalarValue::Int16(None), + PrimitiveScalarType::Uint32 => ScalarValue::UInt32(None), + PrimitiveScalarType::Int32 => ScalarValue::Int32(None), + PrimitiveScalarType::Uint64 => ScalarValue::UInt64(None), + PrimitiveScalarType::Int64 => ScalarValue::Int64(None), + PrimitiveScalarType::Float32 => ScalarValue::Float32(None), + PrimitiveScalarType::Float64 => ScalarValue::Float64(None), + PrimitiveScalarType::Utf8 => ScalarValue::Utf8(None), + PrimitiveScalarType::LargeUtf8 => ScalarValue::LargeUtf8(None), + PrimitiveScalarType::Date32 => ScalarValue::Date32(None), + PrimitiveScalarType::Time32Second => ScalarValue::Time32Second(None), + PrimitiveScalarType::Time32Millisecond => { + ScalarValue::Time32Millisecond(None) + } + PrimitiveScalarType::Time64Microsecond => { + ScalarValue::Time64Microsecond(None) + } + PrimitiveScalarType::Time64Nanosecond => { + ScalarValue::Time64Nanosecond(None) + } + PrimitiveScalarType::TimestampMicrosecond => { + ScalarValue::TimestampMicrosecond(None, None) + } + PrimitiveScalarType::TimestampNanosecond => { + ScalarValue::TimestampNanosecond(None, None) + } + PrimitiveScalarType::Null => { + return Err(proto_error( + "Untyped scalar null is not a valid scalar value", + )); + } + PrimitiveScalarType::Decimal128 => { + ScalarValue::Decimal128(None, 0, 0) + } + PrimitiveScalarType::Date64 => ScalarValue::Date64(None), + PrimitiveScalarType::TimestampSecond => { + ScalarValue::TimestampSecond(None, None) + } + PrimitiveScalarType::TimestampMillisecond => { + ScalarValue::TimestampMillisecond(None, None) + } + PrimitiveScalarType::IntervalYearmonth => { + ScalarValue::IntervalYearMonth(None) + } + PrimitiveScalarType::IntervalDaytime => { + ScalarValue::IntervalDayTime(None) + } + PrimitiveScalarType::IntervalMonthdaynano => { + ScalarValue::IntervalMonthDayNano(None) + } + PrimitiveScalarType::Binary => ScalarValue::Binary(None), + PrimitiveScalarType::LargeBinary => ScalarValue::LargeBinary(None), + }; + scalar_value + } else { + return Err(proto_error("Could not convert to the proper type")); + } + } + (Value::Decimal128Value(val), PrimitiveScalarType::Decimal128) => { + let array = vec_to_array(val.value.clone()); + ScalarValue::Decimal128( + Some(i128::from_be_bytes(array)), + val.p as u8, + val.s as u8, + ) + } + (Value::Date64Value(v), PrimitiveScalarType::Date64) => { + ScalarValue::Date64(Some(*v)) + } + (Value::IntervalYearmonthValue(v), PrimitiveScalarType::IntervalYearmonth) => { + ScalarValue::IntervalYearMonth(Some(*v)) + } + (Value::IntervalDaytimeValue(v), PrimitiveScalarType::IntervalDaytime) => { + ScalarValue::IntervalDayTime(Some(*v)) + } + (Value::IntervalMonthDayNano(v), PrimitiveScalarType::IntervalMonthdaynano) => { + let protobuf::IntervalMonthDayNanoValue { + months, + days, + nanos, + } = v; + ScalarValue::IntervalMonthDayNano(Some(IntervalMonthDayNanoType::make_value( + *months, *days, *nanos, + ))) + } + _ => return Err(proto_error("Could not convert to the proper type")), + }) +} + +fn unwrap_timezone(proto_value: &str) -> Option { + if proto_value.is_empty() { + None + } else { + Some(proto_value.to_string()) + } +} + +>>>>>>> Changes in proto to provide full support for Time32 and Time64 pub fn from_proto_binary_op(op: &str) -> Result { match op { "And" => Ok(Operator::And), diff --git a/datafusion/proto/src/to_proto.rs b/datafusion/proto/src/to_proto.rs index da9dc1240733c..d1f5834ed43a7 100644 --- a/datafusion/proto/src/to_proto.rs +++ b/datafusion/proto/src/to_proto.rs @@ -1079,9 +1079,6 @@ impl TryFrom<&ScalarValue> for protobuf::ScalarValue { - // Since the protos only support Time64 and always interpret it to nanosecond accuracy, - // all ScalarValues of types Time32 and Time64 are adapted into a Time64Value, taking - // into account the necessary conversion into nanoseconds datafusion::scalar::ScalarValue::Time32Second(v) => { create_proto_scalar(v, &data_type, |v| { Value::Time32Value(protobuf::ScalarTime32Value { From 60e069eeb120e638367f9fb202fd462936391e67 Mon Sep 17 00:00:00 2001 From: andre-cc-natzka Date: Mon, 14 Nov 2022 14:57:07 +0100 Subject: [PATCH 05/13] Add test to ensure Time32 and Time64 are fully supported --- datafusion/core/tests/sql/aggregates.rs | 86 +++++++++++ datafusion/core/tests/sql/group_by.rs | 180 ++++++++++++++++++++++++ datafusion/core/tests/sql/mod.rs | 93 ++++++++++++ datafusion/core/tests/sql/select.rs | 178 +++++++++++++++++++++++ datafusion/core/tests/sql/timestamp.rs | 2 +- 5 files changed, 538 insertions(+), 1 deletion(-) diff --git a/datafusion/core/tests/sql/aggregates.rs b/datafusion/core/tests/sql/aggregates.rs index f6334a33ad1fe..fd796d24d9814 100644 --- a/datafusion/core/tests/sql/aggregates.rs +++ b/datafusion/core/tests/sql/aggregates.rs @@ -1592,6 +1592,92 @@ async fn aggregate_times_max() -> Result<()> { Ok(()) } +#[tokio::test] +async fn aggregate_times_sum() -> Result<()> { + let ctx = SessionContext::new(); + ctx.register_table("t", table_with_times()).unwrap(); + + let results = plan_and_collect( + &ctx, + "SELECT sum(nanos), sum(micros), sum(millis), sum(secs) FROM t", + ) + .await + .unwrap_err(); + + assert_eq!(results.to_string(), "Error during planning: The function Sum does not support inputs of type Time64(Nanosecond)."); + + Ok(()) +} + +#[tokio::test] +async fn aggregate_times_count() -> Result<()> { + let ctx = SessionContext::new(); + ctx.register_table("t", table_with_times()).unwrap(); + + let results = execute_to_batches( + &ctx, + "SELECT count(nanos), count(micros), count(millis), count(secs) FROM t", + ) + .await; + + let expected = vec![ + "+----------------+-----------------+-----------------+---------------+", + "| COUNT(t.nanos) | COUNT(t.micros) | COUNT(t.millis) | COUNT(t.secs) |", + "+----------------+-----------------+-----------------+---------------+", + "| 4 | 4 | 4 | 4 |", + "+----------------+-----------------+-----------------+---------------+", + ]; + assert_batches_sorted_eq!(expected, &results); + + Ok(()) +} + +#[tokio::test] +async fn aggregate_times_min() -> Result<()> { + let ctx = SessionContext::new(); + ctx.register_table("t", table_with_times()).unwrap(); + + let results = execute_to_batches( + &ctx, + "SELECT min(nanos), min(micros), min(millis), min(secs) FROM t", + ) + .await; + + let expected = vec![ + "+--------------------+-----------------+---------------+-------------+", + "| MIN(t.nanos) | MIN(t.micros) | MIN(t.millis) | MIN(t.secs) |", + "+--------------------+-----------------+---------------+-------------+", + "| 18:06:30.243620451 | 18:06:30.243620 | 18:06:30.243 | 18:06:30 |", + "+--------------------+-----------------+---------------+-------------+", + ]; + assert_batches_sorted_eq!(expected, &results); + + Ok(()) +} + +#[tokio::test] +async fn aggregate_times_max() -> Result<()> { + let ctx = SessionContext::new(); + ctx.register_table("t", table_with_times()).unwrap(); + + let results = execute_to_batches( + &ctx, + "SELECT max(nanos), max(micros), max(millis), max(secs) FROM t", + ) + .await; + + let expected = vec![ + "+--------------------+-----------------+---------------+-------------+", + "| MAX(t.nanos) | MAX(t.micros) | MAX(t.millis) | MAX(t.secs) |", + "+--------------------+-----------------+---------------+-------------+", + "| 21:06:28.247821084 | 21:06:28.247821 | 21:06:28.247 | 21:06:28 |", + "+--------------------+-----------------+---------------+-------------+", + ]; + assert_batches_sorted_eq!(expected, &results); + + Ok(()) +} + #[tokio::test] async fn aggregate_timestamps_avg() -> Result<()> { let ctx = SessionContext::new(); diff --git a/datafusion/core/tests/sql/group_by.rs b/datafusion/core/tests/sql/group_by.rs index fffcc356d4335..94e2b9a601683 100644 --- a/datafusion/core/tests/sql/group_by.rs +++ b/datafusion/core/tests/sql/group_by.rs @@ -665,6 +665,186 @@ async fn csv_group_by_time64nanosecond() -> Result<()> { Ok(()) } +#[tokio::test] +async fn csv_group_by_time32second() -> Result<()> { + let ctx = SessionContext::new(); + let schema = Arc::new(Schema::new(vec![ + Field::new("time", DataType::Time32(TimeUnit::Second), false), + Field::new("cnt", DataType::Int32, false), + ])); + let data = RecordBatch::try_new( + schema.clone(), + vec![ + Arc::new(Time32SecondArray::from(vec![ + Some(5_000), + Some(5_000), + Some(5_500), + Some(5_500), + Some(5_900), + Some(5_900), + ])), + Arc::new(Int32Array::from(vec![ + Some(1), + Some(1), + Some(1), + Some(2), + Some(1), + Some(3), + ])), + ], + )?; + + ctx.register_batch("times", data)?; + let sql = "SELECT SUM(cnt) FROM times GROUP BY time"; + let actual = execute_to_batches(&ctx, sql).await; + let expected = vec![ + "+----------------+", + "| SUM(times.cnt) |", + "+----------------+", + "| 2 |", + "| 3 |", + "| 4 |", + "+----------------+", + ]; + assert_batches_sorted_eq!(expected, &actual); + Ok(()) +} + +#[tokio::test] +async fn csv_group_by_time32millisecond() -> Result<()> { + let ctx = SessionContext::new(); + let schema = Arc::new(Schema::new(vec![ + Field::new("time", DataType::Time32(TimeUnit::Millisecond), false), + Field::new("cnt", DataType::Int32, false), + ])); + let data = RecordBatch::try_new( + schema.clone(), + vec![ + Arc::new(Time32MillisecondArray::from(vec![ + Some(5_000_000), + Some(5_000_000), + Some(5_500_000), + Some(5_500_000), + Some(5_900_000), + Some(5_900_000), + ])), + Arc::new(Int32Array::from(vec![ + Some(1), + Some(1), + Some(1), + Some(2), + Some(1), + Some(3), + ])), + ], + )?; + + ctx.register_batch("times", data)?; + let sql = "SELECT SUM(cnt) FROM times GROUP BY time"; + let actual = execute_to_batches(&ctx, sql).await; + let expected = vec![ + "+----------------+", + "| SUM(times.cnt) |", + "+----------------+", + "| 2 |", + "| 3 |", + "| 4 |", + "+----------------+", + ]; + assert_batches_sorted_eq!(expected, &actual); + Ok(()) +} + +#[tokio::test] +async fn csv_group_by_time64microsecond() -> Result<()> { + let ctx = SessionContext::new(); + let schema = Arc::new(Schema::new(vec![ + Field::new("time", DataType::Time64(TimeUnit::Microsecond), false), + Field::new("cnt", DataType::Int64, false), + ])); + let data = RecordBatch::try_new( + schema.clone(), + vec![ + Arc::new(Time64MicrosecondArray::from(vec![ + Some(5_000_000_000), + Some(5_000_000_000), + Some(5_500_000_000), + Some(5_500_000_000), + Some(5_900_000_000), + Some(5_900_000_000), + ])), + Arc::new(Int64Array::from(vec![ + Some(1), + Some(1), + Some(1), + Some(2), + Some(1), + Some(3), + ])), + ], + )?; + + ctx.register_batch("times", data)?; + let sql = "SELECT SUM(cnt) FROM times GROUP BY time"; + let actual = execute_to_batches(&ctx, sql).await; + let expected = vec![ + "+----------------+", + "| SUM(times.cnt) |", + "+----------------+", + "| 2 |", + "| 3 |", + "| 4 |", + "+----------------+", + ]; + assert_batches_sorted_eq!(expected, &actual); + Ok(()) +} + +#[tokio::test] +async fn csv_group_by_time64nanosecond() -> Result<()> { + let ctx = SessionContext::new(); + let schema = Arc::new(Schema::new(vec![ + Field::new("time", DataType::Time64(TimeUnit::Nanosecond), false), + Field::new("cnt", DataType::Int64, false), + ])); + let data = RecordBatch::try_new( + schema.clone(), + vec![ + Arc::new(Time64NanosecondArray::from(vec![ + Some(5_000_000_000_000), + Some(5_000_000_000_000), + Some(5_500_000_000_000), + Some(5_500_000_000_000), + Some(5_900_000_000_000), + Some(5_900_000_000_000), + ])), + Arc::new(Int64Array::from(vec![ + Some(1), + Some(1), + Some(1), + Some(2), + Some(1), + Some(3), + ])), + ], + )?; + + ctx.register_batch("times", data)?; + let sql = "SELECT SUM(cnt) FROM times GROUP BY time"; + let actual = execute_to_batches(&ctx, sql).await; + let expected = vec![ + "+----------------+", + "| SUM(times.cnt) |", + "+----------------+", + "| 2 |", + "| 3 |", + "| 4 |", + "+----------------+", + ]; + assert_batches_sorted_eq!(expected, &actual); + Ok(()) +} + #[tokio::test] async fn group_by_date_trunc() -> Result<()> { let tmp_dir = TempDir::new()?; diff --git a/datafusion/core/tests/sql/mod.rs b/datafusion/core/tests/sql/mod.rs index 41364f764c1a3..1bd8fb5f79fb2 100644 --- a/datafusion/core/tests/sql/mod.rs +++ b/datafusion/core/tests/sql/mod.rs @@ -1277,6 +1277,99 @@ pub fn make_times() -> RecordBatch { .unwrap() } +/// Return a new table provider containing all of the supported timestamp types +pub fn table_with_times() -> Arc { + let batch = make_times(); + let schema = batch.schema(); + let partitions = vec![vec![batch]]; + Arc::new(MemTable::try_new(schema, partitions).unwrap()) +} + +/// Return record batch with all of the supported time types +/// values +/// +/// Columns are named: +/// "nanos" --> Time64NanosecondArray +/// "micros" --> Time64MicrosecondArray +/// "millis" --> Time32MillisecondArray +/// "secs" --> Time32SecondArray +/// "names" --> StringArray +pub fn make_times() -> RecordBatch { + let ts_strings = vec![ + Some("18:06:30.243620451"), + Some("20:08:28.161121654"), + Some("19:11:04.156423842"), + Some("21:06:28.247821084"), + ]; + + let ts_nanos = ts_strings + .into_iter() + .map(|t| { + t.map(|t| { + let integer_sec = t + .parse::() + .unwrap() + .num_seconds_from_midnight() as i64; + let extra_nano = + t.parse::().unwrap().nanosecond() as i64; + // Total time in nanoseconds given by integer number of seconds multiplied by 10^9 + // plus number of nanoseconds corresponding to the extra fraction of second + integer_sec * 1_000_000_000 + extra_nano + }) + }) + .collect::>(); + + let ts_micros = ts_nanos + .iter() + .map(|t| t.as_ref().map(|ts_nanos| ts_nanos / 1000)) + .collect::>(); + + let ts_millis = ts_nanos + .iter() + .map(|t| t.as_ref().map(|ts_nanos| { ts_nanos / 1000000 } as i32)) + .collect::>(); + + let ts_secs = ts_nanos + .iter() + .map(|t| t.as_ref().map(|ts_nanos| { ts_nanos / 1000000000 } as i32)) + .collect::>(); + + let names = ts_nanos + .iter() + .enumerate() + .map(|(i, _)| format!("Row {}", i)) + .collect::>(); + + let arr_nanos = Time64NanosecondArray::from(ts_nanos); + let arr_micros = Time64MicrosecondArray::from(ts_micros); + let arr_millis = Time32MillisecondArray::from(ts_millis); + let arr_secs = Time32SecondArray::from(ts_secs); + + let names = names.iter().map(|s| s.as_str()).collect::>(); + let arr_names = StringArray::from(names); + + let schema = Schema::new(vec![ + Field::new("nanos", arr_nanos.data_type().clone(), true), + Field::new("micros", arr_micros.data_type().clone(), true), + Field::new("millis", arr_millis.data_type().clone(), true), + Field::new("secs", arr_secs.data_type().clone(), true), + Field::new("name", arr_names.data_type().clone(), true), + ]); + let schema = Arc::new(schema); + + RecordBatch::try_new( + schema, + vec![ + Arc::new(arr_nanos), + Arc::new(arr_micros), + Arc::new(arr_millis), + Arc::new(arr_secs), + Arc::new(arr_names), + ], + ) + .unwrap() +} + #[tokio::test] async fn nyc() -> Result<()> { // schema for nyxtaxi csv files diff --git a/datafusion/core/tests/sql/select.rs b/datafusion/core/tests/sql/select.rs index 6d7014507bb09..5efa1175b5b38 100644 --- a/datafusion/core/tests/sql/select.rs +++ b/datafusion/core/tests/sql/select.rs @@ -814,6 +814,8 @@ async fn query_on_string_dictionary() -> Result<()> { ]; assert_batches_eq!(expected, &actual); + // filtering with Time32 and Time64 types + // Expression evaluation let sql = "SELECT concat(d1, '-foo') FROM test"; let actual = execute_to_batches(&ctx, sql).await; @@ -1085,6 +1087,182 @@ async fn filter_with_time64nanosecond() -> Result<()> { Ok(()) } +#[tokio::test] +async fn filter_with_time32second() -> Result<()> { + let ctx = SessionContext::new(); + let schema = Arc::new(Schema::new(vec![ + Field::new("time", DataType::Time32(TimeUnit::Second), false), + Field::new("value", DataType::Int64, false), + ])); + let data = RecordBatch::try_new( + schema.clone(), + vec![ + Arc::new(Time32SecondArray::from(vec![ + Some(5_000), + Some(5_000), + Some(5_500), + Some(5_500), + Some(5_900), + Some(5_900), + ])), + Arc::new(Int64Array::from(vec![ + Some(2505), + Some(2436), + Some(2384), + Some(1815), + Some(2330), + Some(2065), + ])), + ], + )?; + + ctx.register_batch("temporal", data)?; + let sql = "SELECT value FROM temporal WHERE time = '01:23:20'"; + let actual = execute_to_batches(&ctx, sql).await; + let expected = vec![ + "+-------+", + "| value |", + "+-------+", + "| 2436 |", + "| 2505 |", + "+-------+", + ]; + assert_batches_sorted_eq!(expected, &actual); + Ok(()) +} + +#[tokio::test] +async fn filter_with_time32millisecond() -> Result<()> { + let ctx = SessionContext::new(); + let schema = Arc::new(Schema::new(vec![ + Field::new("time", DataType::Time32(TimeUnit::Millisecond), false), + Field::new("value", DataType::Int64, false), + ])); + let data = RecordBatch::try_new( + schema.clone(), + vec![ + Arc::new(Time32MillisecondArray::from(vec![ + Some(5_000_000), + Some(5_000_000), + Some(5_500_000), + Some(5_500_000), + Some(5_900_000), + Some(5_900_000), + ])), + Arc::new(Int64Array::from(vec![ + Some(2505), + Some(2436), + Some(2384), + Some(1815), + Some(2330), + Some(2065), + ])), + ], + )?; + + ctx.register_batch("temporal", data)?; + let sql = "SELECT value FROM temporal WHERE time = '01:23:20'"; + let actual = execute_to_batches(&ctx, sql).await; + let expected = vec![ + "+-------+", + "| value |", + "+-------+", + "| 2436 |", + "| 2505 |", + "+-------+", + ]; + assert_batches_sorted_eq!(expected, &actual); + Ok(()) +} + +#[tokio::test] +async fn filter_with_time64microsecond() -> Result<()> { + let ctx = SessionContext::new(); + let schema = Arc::new(Schema::new(vec![ + Field::new("time", DataType::Time64(TimeUnit::Microsecond), false), + Field::new("value", DataType::Int64, false), + ])); + let data = RecordBatch::try_new( + schema.clone(), + vec![ + Arc::new(Time64MicrosecondArray::from(vec![ + Some(5_000_000_000), + Some(5_000_000_000), + Some(5_500_000_000), + Some(5_500_000_000), + Some(5_900_000_000), + Some(5_900_000_000), + ])), + Arc::new(Int64Array::from(vec![ + Some(2505), + Some(2436), + Some(2384), + Some(1815), + Some(2330), + Some(2065), + ])), + ], + )?; + + ctx.register_batch("temporal", data)?; + let sql = "SELECT value FROM temporal WHERE time = '01:23:20'"; + let actual = execute_to_batches(&ctx, sql).await; + let expected = vec![ + "+-------+", + "| value |", + "+-------+", + "| 2436 |", + "| 2505 |", + "+-------+", + ]; + assert_batches_sorted_eq!(expected, &actual); + Ok(()) +} + +#[tokio::test] +async fn filter_with_time64nanosecond() -> Result<()> { + let ctx = SessionContext::new(); + let schema = Arc::new(Schema::new(vec![ + Field::new("time", DataType::Time64(TimeUnit::Nanosecond), false), + Field::new("value", DataType::Int64, false), + ])); + let data = RecordBatch::try_new( + schema.clone(), + vec![ + Arc::new(Time64NanosecondArray::from(vec![ + Some(5_000_000_000_000), + Some(5_000_000_000_000), + Some(5_500_000_000_000), + Some(5_500_000_000_000), + Some(5_900_000_000_000), + Some(5_900_000_000_000), + ])), + Arc::new(Int64Array::from(vec![ + Some(2505), + Some(2436), + Some(2384), + Some(1815), + Some(2330), + Some(2065), + ])), + ], + )?; + + ctx.register_batch("temporal", data)?; + let sql = "SELECT value FROM temporal WHERE time = '01:23:20'"; + let actual = execute_to_batches(&ctx, sql).await; + let expected = vec![ + "+-------+", + "| value |", + "+-------+", + "| 2436 |", + "| 2505 |", + "+-------+", + ]; + assert_batches_sorted_eq!(expected, &actual); + Ok(()) +} + #[tokio::test] async fn query_cte_with_alias() -> Result<()> { let ctx = SessionContext::new(); diff --git a/datafusion/core/tests/sql/timestamp.rs b/datafusion/core/tests/sql/timestamp.rs index 7fb8e109f575c..f139faab5e2cd 100644 --- a/datafusion/core/tests/sql/timestamp.rs +++ b/datafusion/core/tests/sql/timestamp.rs @@ -1555,7 +1555,7 @@ async fn cast_timestamp_to_timestamptz() -> Result<()> { #[tokio::test] async fn test_cast_to_time() -> Result<()> { let ctx = SessionContext::new(); - let sql = "SELECT 0::TIME"; + let sql = "SELECT 0::TIME64"; let actual = execute_to_batches(&ctx, sql).await; let expected = vec![ From 0f2c58cf966e7c4297a1a08ae666eee55e0bb621 Mon Sep 17 00:00:00 2001 From: andre-cc-natzka Date: Mon, 14 Nov 2022 23:49:22 +0100 Subject: [PATCH 06/13] Changes in protos to allow full support of Time32 and Time64 --- datafusion/proto/proto/datafusion.proto | 7 ------- 1 file changed, 7 deletions(-) diff --git a/datafusion/proto/proto/datafusion.proto b/datafusion/proto/proto/datafusion.proto index e559b19a5f241..eeaf919066105 100644 --- a/datafusion/proto/proto/datafusion.proto +++ b/datafusion/proto/proto/datafusion.proto @@ -843,17 +843,10 @@ message ScalarValue{ ScalarDictionaryValue dictionary_value = 27; bytes binary_value = 28; bytes large_binary_value = 29; -<<<<<<< HEAD ScalarTime64Value time64_value = 30; IntervalMonthDayNanoValue interval_month_day_nano = 31; StructValue struct_value = 32; ScalarFixedSizeBinary fixed_size_binary_value = 34; -======= - ScalarTime32Value time32_value = 30; - ScalarTime64Value time64_value = 31; - IntervalMonthDayNanoValue interval_month_day_nano = 32; - StructValue struct_value = 33; ->>>>>>> Changes in proto to provide full support for Time32 and Time64 } } From cd08e711977db43eb6c9092e90eda9ed8de52be8 Mon Sep 17 00:00:00 2001 From: andre-cc-natzka Date: Mon, 14 Nov 2022 23:52:03 +0100 Subject: [PATCH 07/13] Correct changes in protos to allow full support of Time32 and Time64 --- datafusion/proto/proto/datafusion.proto | 14 -------------- 1 file changed, 14 deletions(-) diff --git a/datafusion/proto/proto/datafusion.proto b/datafusion/proto/proto/datafusion.proto index eeaf919066105..74a2fe1408187 100644 --- a/datafusion/proto/proto/datafusion.proto +++ b/datafusion/proto/proto/datafusion.proto @@ -769,20 +769,6 @@ message ScalarTimestampValue { string timezone = 5; } -message ScalarTime32Value { - oneof value { - int32 time32_second_value = 1; - int32 time32_millisecond_value = 2; - }; -} - -message ScalarTime64Value { - oneof value { - int64 time64_microsecond_value = 1; - int64 time64_nanosecond_value = 2; - }; -} - message ScalarDictionaryValue { ArrowType index_type = 1; ScalarValue value = 2; From 5d147d9b9c58f286103b256aabf78c031bcd0754 Mon Sep 17 00:00:00 2001 From: andre-cc-natzka Date: Tue, 15 Nov 2022 00:00:41 +0100 Subject: [PATCH 08/13] Fix tests --- datafusion/core/tests/sql/aggregates.rs | 86 -------- datafusion/core/tests/sql/group_by.rs | 180 ----------------- datafusion/core/tests/sql/mod.rs | 93 --------- datafusion/core/tests/sql/select.rs | 176 ----------------- datafusion/proto/src/from_proto.rs | 252 ------------------------ 5 files changed, 787 deletions(-) diff --git a/datafusion/core/tests/sql/aggregates.rs b/datafusion/core/tests/sql/aggregates.rs index fd796d24d9814..f6334a33ad1fe 100644 --- a/datafusion/core/tests/sql/aggregates.rs +++ b/datafusion/core/tests/sql/aggregates.rs @@ -1592,92 +1592,6 @@ async fn aggregate_times_max() -> Result<()> { Ok(()) } -#[tokio::test] -async fn aggregate_times_sum() -> Result<()> { - let ctx = SessionContext::new(); - ctx.register_table("t", table_with_times()).unwrap(); - - let results = plan_and_collect( - &ctx, - "SELECT sum(nanos), sum(micros), sum(millis), sum(secs) FROM t", - ) - .await - .unwrap_err(); - - assert_eq!(results.to_string(), "Error during planning: The function Sum does not support inputs of type Time64(Nanosecond)."); - - Ok(()) -} - -#[tokio::test] -async fn aggregate_times_count() -> Result<()> { - let ctx = SessionContext::new(); - ctx.register_table("t", table_with_times()).unwrap(); - - let results = execute_to_batches( - &ctx, - "SELECT count(nanos), count(micros), count(millis), count(secs) FROM t", - ) - .await; - - let expected = vec![ - "+----------------+-----------------+-----------------+---------------+", - "| COUNT(t.nanos) | COUNT(t.micros) | COUNT(t.millis) | COUNT(t.secs) |", - "+----------------+-----------------+-----------------+---------------+", - "| 4 | 4 | 4 | 4 |", - "+----------------+-----------------+-----------------+---------------+", - ]; - assert_batches_sorted_eq!(expected, &results); - - Ok(()) -} - -#[tokio::test] -async fn aggregate_times_min() -> Result<()> { - let ctx = SessionContext::new(); - ctx.register_table("t", table_with_times()).unwrap(); - - let results = execute_to_batches( - &ctx, - "SELECT min(nanos), min(micros), min(millis), min(secs) FROM t", - ) - .await; - - let expected = vec![ - "+--------------------+-----------------+---------------+-------------+", - "| MIN(t.nanos) | MIN(t.micros) | MIN(t.millis) | MIN(t.secs) |", - "+--------------------+-----------------+---------------+-------------+", - "| 18:06:30.243620451 | 18:06:30.243620 | 18:06:30.243 | 18:06:30 |", - "+--------------------+-----------------+---------------+-------------+", - ]; - assert_batches_sorted_eq!(expected, &results); - - Ok(()) -} - -#[tokio::test] -async fn aggregate_times_max() -> Result<()> { - let ctx = SessionContext::new(); - ctx.register_table("t", table_with_times()).unwrap(); - - let results = execute_to_batches( - &ctx, - "SELECT max(nanos), max(micros), max(millis), max(secs) FROM t", - ) - .await; - - let expected = vec![ - "+--------------------+-----------------+---------------+-------------+", - "| MAX(t.nanos) | MAX(t.micros) | MAX(t.millis) | MAX(t.secs) |", - "+--------------------+-----------------+---------------+-------------+", - "| 21:06:28.247821084 | 21:06:28.247821 | 21:06:28.247 | 21:06:28 |", - "+--------------------+-----------------+---------------+-------------+", - ]; - assert_batches_sorted_eq!(expected, &results); - - Ok(()) -} - #[tokio::test] async fn aggregate_timestamps_avg() -> Result<()> { let ctx = SessionContext::new(); diff --git a/datafusion/core/tests/sql/group_by.rs b/datafusion/core/tests/sql/group_by.rs index 94e2b9a601683..fffcc356d4335 100644 --- a/datafusion/core/tests/sql/group_by.rs +++ b/datafusion/core/tests/sql/group_by.rs @@ -665,186 +665,6 @@ async fn csv_group_by_time64nanosecond() -> Result<()> { Ok(()) } -#[tokio::test] -async fn csv_group_by_time32second() -> Result<()> { - let ctx = SessionContext::new(); - let schema = Arc::new(Schema::new(vec![ - Field::new("time", DataType::Time32(TimeUnit::Second), false), - Field::new("cnt", DataType::Int32, false), - ])); - let data = RecordBatch::try_new( - schema.clone(), - vec![ - Arc::new(Time32SecondArray::from(vec![ - Some(5_000), - Some(5_000), - Some(5_500), - Some(5_500), - Some(5_900), - Some(5_900), - ])), - Arc::new(Int32Array::from(vec![ - Some(1), - Some(1), - Some(1), - Some(2), - Some(1), - Some(3), - ])), - ], - )?; - - ctx.register_batch("times", data)?; - let sql = "SELECT SUM(cnt) FROM times GROUP BY time"; - let actual = execute_to_batches(&ctx, sql).await; - let expected = vec![ - "+----------------+", - "| SUM(times.cnt) |", - "+----------------+", - "| 2 |", - "| 3 |", - "| 4 |", - "+----------------+", - ]; - assert_batches_sorted_eq!(expected, &actual); - Ok(()) -} - -#[tokio::test] -async fn csv_group_by_time32millisecond() -> Result<()> { - let ctx = SessionContext::new(); - let schema = Arc::new(Schema::new(vec![ - Field::new("time", DataType::Time32(TimeUnit::Millisecond), false), - Field::new("cnt", DataType::Int32, false), - ])); - let data = RecordBatch::try_new( - schema.clone(), - vec![ - Arc::new(Time32MillisecondArray::from(vec![ - Some(5_000_000), - Some(5_000_000), - Some(5_500_000), - Some(5_500_000), - Some(5_900_000), - Some(5_900_000), - ])), - Arc::new(Int32Array::from(vec![ - Some(1), - Some(1), - Some(1), - Some(2), - Some(1), - Some(3), - ])), - ], - )?; - - ctx.register_batch("times", data)?; - let sql = "SELECT SUM(cnt) FROM times GROUP BY time"; - let actual = execute_to_batches(&ctx, sql).await; - let expected = vec![ - "+----------------+", - "| SUM(times.cnt) |", - "+----------------+", - "| 2 |", - "| 3 |", - "| 4 |", - "+----------------+", - ]; - assert_batches_sorted_eq!(expected, &actual); - Ok(()) -} - -#[tokio::test] -async fn csv_group_by_time64microsecond() -> Result<()> { - let ctx = SessionContext::new(); - let schema = Arc::new(Schema::new(vec![ - Field::new("time", DataType::Time64(TimeUnit::Microsecond), false), - Field::new("cnt", DataType::Int64, false), - ])); - let data = RecordBatch::try_new( - schema.clone(), - vec![ - Arc::new(Time64MicrosecondArray::from(vec![ - Some(5_000_000_000), - Some(5_000_000_000), - Some(5_500_000_000), - Some(5_500_000_000), - Some(5_900_000_000), - Some(5_900_000_000), - ])), - Arc::new(Int64Array::from(vec![ - Some(1), - Some(1), - Some(1), - Some(2), - Some(1), - Some(3), - ])), - ], - )?; - - ctx.register_batch("times", data)?; - let sql = "SELECT SUM(cnt) FROM times GROUP BY time"; - let actual = execute_to_batches(&ctx, sql).await; - let expected = vec![ - "+----------------+", - "| SUM(times.cnt) |", - "+----------------+", - "| 2 |", - "| 3 |", - "| 4 |", - "+----------------+", - ]; - assert_batches_sorted_eq!(expected, &actual); - Ok(()) -} - -#[tokio::test] -async fn csv_group_by_time64nanosecond() -> Result<()> { - let ctx = SessionContext::new(); - let schema = Arc::new(Schema::new(vec![ - Field::new("time", DataType::Time64(TimeUnit::Nanosecond), false), - Field::new("cnt", DataType::Int64, false), - ])); - let data = RecordBatch::try_new( - schema.clone(), - vec![ - Arc::new(Time64NanosecondArray::from(vec![ - Some(5_000_000_000_000), - Some(5_000_000_000_000), - Some(5_500_000_000_000), - Some(5_500_000_000_000), - Some(5_900_000_000_000), - Some(5_900_000_000_000), - ])), - Arc::new(Int64Array::from(vec![ - Some(1), - Some(1), - Some(1), - Some(2), - Some(1), - Some(3), - ])), - ], - )?; - - ctx.register_batch("times", data)?; - let sql = "SELECT SUM(cnt) FROM times GROUP BY time"; - let actual = execute_to_batches(&ctx, sql).await; - let expected = vec![ - "+----------------+", - "| SUM(times.cnt) |", - "+----------------+", - "| 2 |", - "| 3 |", - "| 4 |", - "+----------------+", - ]; - assert_batches_sorted_eq!(expected, &actual); - Ok(()) -} - #[tokio::test] async fn group_by_date_trunc() -> Result<()> { let tmp_dir = TempDir::new()?; diff --git a/datafusion/core/tests/sql/mod.rs b/datafusion/core/tests/sql/mod.rs index 1bd8fb5f79fb2..41364f764c1a3 100644 --- a/datafusion/core/tests/sql/mod.rs +++ b/datafusion/core/tests/sql/mod.rs @@ -1277,99 +1277,6 @@ pub fn make_times() -> RecordBatch { .unwrap() } -/// Return a new table provider containing all of the supported timestamp types -pub fn table_with_times() -> Arc { - let batch = make_times(); - let schema = batch.schema(); - let partitions = vec![vec![batch]]; - Arc::new(MemTable::try_new(schema, partitions).unwrap()) -} - -/// Return record batch with all of the supported time types -/// values -/// -/// Columns are named: -/// "nanos" --> Time64NanosecondArray -/// "micros" --> Time64MicrosecondArray -/// "millis" --> Time32MillisecondArray -/// "secs" --> Time32SecondArray -/// "names" --> StringArray -pub fn make_times() -> RecordBatch { - let ts_strings = vec![ - Some("18:06:30.243620451"), - Some("20:08:28.161121654"), - Some("19:11:04.156423842"), - Some("21:06:28.247821084"), - ]; - - let ts_nanos = ts_strings - .into_iter() - .map(|t| { - t.map(|t| { - let integer_sec = t - .parse::() - .unwrap() - .num_seconds_from_midnight() as i64; - let extra_nano = - t.parse::().unwrap().nanosecond() as i64; - // Total time in nanoseconds given by integer number of seconds multiplied by 10^9 - // plus number of nanoseconds corresponding to the extra fraction of second - integer_sec * 1_000_000_000 + extra_nano - }) - }) - .collect::>(); - - let ts_micros = ts_nanos - .iter() - .map(|t| t.as_ref().map(|ts_nanos| ts_nanos / 1000)) - .collect::>(); - - let ts_millis = ts_nanos - .iter() - .map(|t| t.as_ref().map(|ts_nanos| { ts_nanos / 1000000 } as i32)) - .collect::>(); - - let ts_secs = ts_nanos - .iter() - .map(|t| t.as_ref().map(|ts_nanos| { ts_nanos / 1000000000 } as i32)) - .collect::>(); - - let names = ts_nanos - .iter() - .enumerate() - .map(|(i, _)| format!("Row {}", i)) - .collect::>(); - - let arr_nanos = Time64NanosecondArray::from(ts_nanos); - let arr_micros = Time64MicrosecondArray::from(ts_micros); - let arr_millis = Time32MillisecondArray::from(ts_millis); - let arr_secs = Time32SecondArray::from(ts_secs); - - let names = names.iter().map(|s| s.as_str()).collect::>(); - let arr_names = StringArray::from(names); - - let schema = Schema::new(vec![ - Field::new("nanos", arr_nanos.data_type().clone(), true), - Field::new("micros", arr_micros.data_type().clone(), true), - Field::new("millis", arr_millis.data_type().clone(), true), - Field::new("secs", arr_secs.data_type().clone(), true), - Field::new("name", arr_names.data_type().clone(), true), - ]); - let schema = Arc::new(schema); - - RecordBatch::try_new( - schema, - vec![ - Arc::new(arr_nanos), - Arc::new(arr_micros), - Arc::new(arr_millis), - Arc::new(arr_secs), - Arc::new(arr_names), - ], - ) - .unwrap() -} - #[tokio::test] async fn nyc() -> Result<()> { // schema for nyxtaxi csv files diff --git a/datafusion/core/tests/sql/select.rs b/datafusion/core/tests/sql/select.rs index 5efa1175b5b38..a2106ee9719db 100644 --- a/datafusion/core/tests/sql/select.rs +++ b/datafusion/core/tests/sql/select.rs @@ -1087,182 +1087,6 @@ async fn filter_with_time64nanosecond() -> Result<()> { Ok(()) } -#[tokio::test] -async fn filter_with_time32second() -> Result<()> { - let ctx = SessionContext::new(); - let schema = Arc::new(Schema::new(vec![ - Field::new("time", DataType::Time32(TimeUnit::Second), false), - Field::new("value", DataType::Int64, false), - ])); - let data = RecordBatch::try_new( - schema.clone(), - vec![ - Arc::new(Time32SecondArray::from(vec![ - Some(5_000), - Some(5_000), - Some(5_500), - Some(5_500), - Some(5_900), - Some(5_900), - ])), - Arc::new(Int64Array::from(vec![ - Some(2505), - Some(2436), - Some(2384), - Some(1815), - Some(2330), - Some(2065), - ])), - ], - )?; - - ctx.register_batch("temporal", data)?; - let sql = "SELECT value FROM temporal WHERE time = '01:23:20'"; - let actual = execute_to_batches(&ctx, sql).await; - let expected = vec![ - "+-------+", - "| value |", - "+-------+", - "| 2436 |", - "| 2505 |", - "+-------+", - ]; - assert_batches_sorted_eq!(expected, &actual); - Ok(()) -} - -#[tokio::test] -async fn filter_with_time32millisecond() -> Result<()> { - let ctx = SessionContext::new(); - let schema = Arc::new(Schema::new(vec![ - Field::new("time", DataType::Time32(TimeUnit::Millisecond), false), - Field::new("value", DataType::Int64, false), - ])); - let data = RecordBatch::try_new( - schema.clone(), - vec![ - Arc::new(Time32MillisecondArray::from(vec![ - Some(5_000_000), - Some(5_000_000), - Some(5_500_000), - Some(5_500_000), - Some(5_900_000), - Some(5_900_000), - ])), - Arc::new(Int64Array::from(vec![ - Some(2505), - Some(2436), - Some(2384), - Some(1815), - Some(2330), - Some(2065), - ])), - ], - )?; - - ctx.register_batch("temporal", data)?; - let sql = "SELECT value FROM temporal WHERE time = '01:23:20'"; - let actual = execute_to_batches(&ctx, sql).await; - let expected = vec![ - "+-------+", - "| value |", - "+-------+", - "| 2436 |", - "| 2505 |", - "+-------+", - ]; - assert_batches_sorted_eq!(expected, &actual); - Ok(()) -} - -#[tokio::test] -async fn filter_with_time64microsecond() -> Result<()> { - let ctx = SessionContext::new(); - let schema = Arc::new(Schema::new(vec![ - Field::new("time", DataType::Time64(TimeUnit::Microsecond), false), - Field::new("value", DataType::Int64, false), - ])); - let data = RecordBatch::try_new( - schema.clone(), - vec![ - Arc::new(Time64MicrosecondArray::from(vec![ - Some(5_000_000_000), - Some(5_000_000_000), - Some(5_500_000_000), - Some(5_500_000_000), - Some(5_900_000_000), - Some(5_900_000_000), - ])), - Arc::new(Int64Array::from(vec![ - Some(2505), - Some(2436), - Some(2384), - Some(1815), - Some(2330), - Some(2065), - ])), - ], - )?; - - ctx.register_batch("temporal", data)?; - let sql = "SELECT value FROM temporal WHERE time = '01:23:20'"; - let actual = execute_to_batches(&ctx, sql).await; - let expected = vec![ - "+-------+", - "| value |", - "+-------+", - "| 2436 |", - "| 2505 |", - "+-------+", - ]; - assert_batches_sorted_eq!(expected, &actual); - Ok(()) -} - -#[tokio::test] -async fn filter_with_time64nanosecond() -> Result<()> { - let ctx = SessionContext::new(); - let schema = Arc::new(Schema::new(vec![ - Field::new("time", DataType::Time64(TimeUnit::Nanosecond), false), - Field::new("value", DataType::Int64, false), - ])); - let data = RecordBatch::try_new( - schema.clone(), - vec![ - Arc::new(Time64NanosecondArray::from(vec![ - Some(5_000_000_000_000), - Some(5_000_000_000_000), - Some(5_500_000_000_000), - Some(5_500_000_000_000), - Some(5_900_000_000_000), - Some(5_900_000_000_000), - ])), - Arc::new(Int64Array::from(vec![ - Some(2505), - Some(2436), - Some(2384), - Some(1815), - Some(2330), - Some(2065), - ])), - ], - )?; - - ctx.register_batch("temporal", data)?; - let sql = "SELECT value FROM temporal WHERE time = '01:23:20'"; - let actual = execute_to_batches(&ctx, sql).await; - let expected = vec![ - "+-------+", - "| value |", - "+-------+", - "| 2436 |", - "| 2505 |", - "+-------+", - ]; - assert_batches_sorted_eq!(expected, &actual); - Ok(()) -} - #[tokio::test] async fn query_cte_with_alias() -> Result<()> { let ctx = SessionContext::new(); diff --git a/datafusion/proto/src/from_proto.rs b/datafusion/proto/src/from_proto.rs index d6dd52dbd520c..1de84ad94aa77 100644 --- a/datafusion/proto/src/from_proto.rs +++ b/datafusion/proto/src/from_proto.rs @@ -202,68 +202,6 @@ impl From for WindowFrameUnits { } } -<<<<<<< HEAD -======= -impl From for DataType { - fn from(scalar: protobuf::PrimitiveScalarType) -> Self { - match scalar { - protobuf::PrimitiveScalarType::Bool => DataType::Boolean, - protobuf::PrimitiveScalarType::Uint8 => DataType::UInt8, - protobuf::PrimitiveScalarType::Int8 => DataType::Int8, - protobuf::PrimitiveScalarType::Uint16 => DataType::UInt16, - protobuf::PrimitiveScalarType::Int16 => DataType::Int16, - protobuf::PrimitiveScalarType::Uint32 => DataType::UInt32, - protobuf::PrimitiveScalarType::Int32 => DataType::Int32, - protobuf::PrimitiveScalarType::Uint64 => DataType::UInt64, - protobuf::PrimitiveScalarType::Int64 => DataType::Int64, - protobuf::PrimitiveScalarType::Float32 => DataType::Float32, - protobuf::PrimitiveScalarType::Float64 => DataType::Float64, - protobuf::PrimitiveScalarType::Utf8 => DataType::Utf8, - protobuf::PrimitiveScalarType::LargeUtf8 => DataType::LargeUtf8, - protobuf::PrimitiveScalarType::Binary => DataType::Binary, - protobuf::PrimitiveScalarType::LargeBinary => DataType::LargeBinary, - protobuf::PrimitiveScalarType::Date32 => DataType::Date32, - protobuf::PrimitiveScalarType::Time32Second => { - DataType::Time32(TimeUnit::Second) - } - protobuf::PrimitiveScalarType::Time32Millisecond => { - DataType::Time32(TimeUnit::Millisecond) - } - protobuf::PrimitiveScalarType::TimestampMicrosecond => { - DataType::Timestamp(TimeUnit::Microsecond, None) - } - protobuf::PrimitiveScalarType::TimestampNanosecond => { - DataType::Timestamp(TimeUnit::Nanosecond, None) - } - protobuf::PrimitiveScalarType::Null => DataType::Null, - protobuf::PrimitiveScalarType::Decimal128 => DataType::Decimal128(0, 0), - protobuf::PrimitiveScalarType::Date64 => DataType::Date64, - protobuf::PrimitiveScalarType::TimestampSecond => { - DataType::Timestamp(TimeUnit::Second, None) - } - protobuf::PrimitiveScalarType::TimestampMillisecond => { - DataType::Timestamp(TimeUnit::Millisecond, None) - } - protobuf::PrimitiveScalarType::IntervalYearmonth => { - DataType::Interval(IntervalUnit::YearMonth) - } - protobuf::PrimitiveScalarType::IntervalDaytime => { - DataType::Interval(IntervalUnit::DayTime) - } - protobuf::PrimitiveScalarType::IntervalMonthdaynano => { - DataType::Interval(IntervalUnit::MonthDayNano) - } - protobuf::PrimitiveScalarType::Time64Microsecond => { - DataType::Time64(TimeUnit::Microsecond) - } - protobuf::PrimitiveScalarType::Time64Nanosecond => { - DataType::Time64(TimeUnit::Nanosecond) - } - } - } -} - ->>>>>>> Changes in proto to provide full support for Time32 and Time64 impl TryFrom<&protobuf::ArrowType> for DataType { type Error = Error; @@ -1356,196 +1294,6 @@ fn vec_to_array(v: Vec) -> [T; N] { }) } -<<<<<<< HEAD -======= -//Does not typecheck lists -fn typechecked_scalar_value_conversion( - tested_type: &protobuf::scalar_value::Value, - required_type: protobuf::PrimitiveScalarType, -) -> Result { - use protobuf::{scalar_value::Value, PrimitiveScalarType}; - - Ok(match (tested_type, &required_type) { - (Value::BoolValue(v), PrimitiveScalarType::Bool) => { - ScalarValue::Boolean(Some(*v)) - } - (Value::Int8Value(v), PrimitiveScalarType::Int8) => { - ScalarValue::Int8(Some(*v as i8)) - } - (Value::Int16Value(v), PrimitiveScalarType::Int16) => { - ScalarValue::Int16(Some(*v as i16)) - } - (Value::Int32Value(v), PrimitiveScalarType::Int32) => { - ScalarValue::Int32(Some(*v)) - } - (Value::Int64Value(v), PrimitiveScalarType::Int64) => { - ScalarValue::Int64(Some(*v)) - } - (Value::Uint8Value(v), PrimitiveScalarType::Uint8) => { - ScalarValue::UInt8(Some(*v as u8)) - } - (Value::Uint16Value(v), PrimitiveScalarType::Uint16) => { - ScalarValue::UInt16(Some(*v as u16)) - } - (Value::Uint32Value(v), PrimitiveScalarType::Uint32) => { - ScalarValue::UInt32(Some(*v)) - } - (Value::Uint64Value(v), PrimitiveScalarType::Uint64) => { - ScalarValue::UInt64(Some(*v)) - } - (Value::Float32Value(v), PrimitiveScalarType::Float32) => { - ScalarValue::Float32(Some(*v)) - } - (Value::Float64Value(v), PrimitiveScalarType::Float64) => { - ScalarValue::Float64(Some(*v)) - } - (Value::Date32Value(v), PrimitiveScalarType::Date32) => { - ScalarValue::Date32(Some(*v)) - } - ( - Value::TimestampValue(protobuf::ScalarTimestampValue { - timezone, - value: - Some(protobuf::scalar_timestamp_value::Value::TimeMicrosecondValue(v)), - }), - PrimitiveScalarType::TimestampMicrosecond, - ) => ScalarValue::TimestampMicrosecond(Some(*v), unwrap_timezone(timezone)), - ( - Value::TimestampValue(protobuf::ScalarTimestampValue { - timezone, - value: - Some(protobuf::scalar_timestamp_value::Value::TimeNanosecondValue(v)), - }), - PrimitiveScalarType::TimestampNanosecond, - ) => ScalarValue::TimestampNanosecond(Some(*v), unwrap_timezone(timezone)), - ( - Value::TimestampValue(protobuf::ScalarTimestampValue { - timezone, - value: Some(protobuf::scalar_timestamp_value::Value::TimeSecondValue(v)), - }), - PrimitiveScalarType::TimestampSecond, - ) => ScalarValue::TimestampSecond(Some(*v), unwrap_timezone(timezone)), - ( - Value::TimestampValue(protobuf::ScalarTimestampValue { - timezone, - value: - Some(protobuf::scalar_timestamp_value::Value::TimeMillisecondValue(v)), - }), - PrimitiveScalarType::TimestampMillisecond, - ) => ScalarValue::TimestampMillisecond(Some(*v), unwrap_timezone(timezone)), - (Value::Utf8Value(v), PrimitiveScalarType::Utf8) => { - ScalarValue::Utf8(Some(v.to_owned())) - } - (Value::LargeUtf8Value(v), PrimitiveScalarType::LargeUtf8) => { - ScalarValue::LargeUtf8(Some(v.to_owned())) - } - - (Value::NullValue(i32_enum), required_scalar_type) => { - if *i32_enum == *required_scalar_type as i32 { - let pb_scalar_type = PrimitiveScalarType::try_from(i32_enum)?; - let scalar_value: ScalarValue = match pb_scalar_type { - PrimitiveScalarType::Bool => ScalarValue::Boolean(None), - PrimitiveScalarType::Uint8 => ScalarValue::UInt8(None), - PrimitiveScalarType::Int8 => ScalarValue::Int8(None), - PrimitiveScalarType::Uint16 => ScalarValue::UInt16(None), - PrimitiveScalarType::Int16 => ScalarValue::Int16(None), - PrimitiveScalarType::Uint32 => ScalarValue::UInt32(None), - PrimitiveScalarType::Int32 => ScalarValue::Int32(None), - PrimitiveScalarType::Uint64 => ScalarValue::UInt64(None), - PrimitiveScalarType::Int64 => ScalarValue::Int64(None), - PrimitiveScalarType::Float32 => ScalarValue::Float32(None), - PrimitiveScalarType::Float64 => ScalarValue::Float64(None), - PrimitiveScalarType::Utf8 => ScalarValue::Utf8(None), - PrimitiveScalarType::LargeUtf8 => ScalarValue::LargeUtf8(None), - PrimitiveScalarType::Date32 => ScalarValue::Date32(None), - PrimitiveScalarType::Time32Second => ScalarValue::Time32Second(None), - PrimitiveScalarType::Time32Millisecond => { - ScalarValue::Time32Millisecond(None) - } - PrimitiveScalarType::Time64Microsecond => { - ScalarValue::Time64Microsecond(None) - } - PrimitiveScalarType::Time64Nanosecond => { - ScalarValue::Time64Nanosecond(None) - } - PrimitiveScalarType::TimestampMicrosecond => { - ScalarValue::TimestampMicrosecond(None, None) - } - PrimitiveScalarType::TimestampNanosecond => { - ScalarValue::TimestampNanosecond(None, None) - } - PrimitiveScalarType::Null => { - return Err(proto_error( - "Untyped scalar null is not a valid scalar value", - )); - } - PrimitiveScalarType::Decimal128 => { - ScalarValue::Decimal128(None, 0, 0) - } - PrimitiveScalarType::Date64 => ScalarValue::Date64(None), - PrimitiveScalarType::TimestampSecond => { - ScalarValue::TimestampSecond(None, None) - } - PrimitiveScalarType::TimestampMillisecond => { - ScalarValue::TimestampMillisecond(None, None) - } - PrimitiveScalarType::IntervalYearmonth => { - ScalarValue::IntervalYearMonth(None) - } - PrimitiveScalarType::IntervalDaytime => { - ScalarValue::IntervalDayTime(None) - } - PrimitiveScalarType::IntervalMonthdaynano => { - ScalarValue::IntervalMonthDayNano(None) - } - PrimitiveScalarType::Binary => ScalarValue::Binary(None), - PrimitiveScalarType::LargeBinary => ScalarValue::LargeBinary(None), - }; - scalar_value - } else { - return Err(proto_error("Could not convert to the proper type")); - } - } - (Value::Decimal128Value(val), PrimitiveScalarType::Decimal128) => { - let array = vec_to_array(val.value.clone()); - ScalarValue::Decimal128( - Some(i128::from_be_bytes(array)), - val.p as u8, - val.s as u8, - ) - } - (Value::Date64Value(v), PrimitiveScalarType::Date64) => { - ScalarValue::Date64(Some(*v)) - } - (Value::IntervalYearmonthValue(v), PrimitiveScalarType::IntervalYearmonth) => { - ScalarValue::IntervalYearMonth(Some(*v)) - } - (Value::IntervalDaytimeValue(v), PrimitiveScalarType::IntervalDaytime) => { - ScalarValue::IntervalDayTime(Some(*v)) - } - (Value::IntervalMonthDayNano(v), PrimitiveScalarType::IntervalMonthdaynano) => { - let protobuf::IntervalMonthDayNanoValue { - months, - days, - nanos, - } = v; - ScalarValue::IntervalMonthDayNano(Some(IntervalMonthDayNanoType::make_value( - *months, *days, *nanos, - ))) - } - _ => return Err(proto_error("Could not convert to the proper type")), - }) -} - -fn unwrap_timezone(proto_value: &str) -> Option { - if proto_value.is_empty() { - None - } else { - Some(proto_value.to_string()) - } -} - ->>>>>>> Changes in proto to provide full support for Time32 and Time64 pub fn from_proto_binary_op(op: &str) -> Result { match op { "And" => Ok(Operator::And), From b3f98d58d5be82bc8fd4c3cda120f38f10f69b52 Mon Sep 17 00:00:00 2001 From: andre-cc-natzka Date: Tue, 15 Nov 2022 16:52:56 +0100 Subject: [PATCH 09/13] Remove core/src/physical_plan/hash_utils.rs and useless comment --- .../core/src/physical_plan/hash_utils.rs | 830 ------------------ datafusion/core/tests/sql/select.rs | 2 - 2 files changed, 832 deletions(-) delete mode 100644 datafusion/core/src/physical_plan/hash_utils.rs diff --git a/datafusion/core/src/physical_plan/hash_utils.rs b/datafusion/core/src/physical_plan/hash_utils.rs deleted file mode 100644 index 7de8643237470..0000000000000 --- a/datafusion/core/src/physical_plan/hash_utils.rs +++ /dev/null @@ -1,830 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, -// software distributed under the License is distributed on an -// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -// KIND, either express or implied. See the License for the -// specific language governing permissions and limitations -// under the License. - -//! Functionality used both on logical and physical plans - -use crate::error::{DataFusionError, Result}; -use ahash::RandomState; -use arrow::array::{ - Array, ArrayRef, BooleanArray, Date32Array, Date64Array, Decimal128Array, - DictionaryArray, FixedSizeBinaryArray, Float32Array, Float64Array, Int16Array, - Int32Array, Int64Array, Int8Array, LargeStringArray, StringArray, - TimestampMicrosecondArray, TimestampMillisecondArray, TimestampNanosecondArray, - TimestampSecondArray, UInt16Array, UInt32Array, UInt64Array, UInt8Array, -}; -use arrow::datatypes::{ - ArrowDictionaryKeyType, ArrowNativeType, DataType, Int16Type, Int32Type, Int64Type, - Int8Type, TimeUnit, UInt16Type, UInt32Type, UInt64Type, UInt8Type, -}; -use std::sync::Arc; - -// Combines two hashes into one hash -#[inline] -fn combine_hashes(l: u64, r: u64) -> u64 { - let hash = (17 * 37u64).wrapping_add(l); - hash.wrapping_mul(37).wrapping_add(r) -} - -fn hash_null(random_state: &RandomState, hashes_buffer: &'_ mut [u64], mul_col: bool) { - if mul_col { - hashes_buffer.iter_mut().for_each(|hash| { - // stable hash for null value - *hash = combine_hashes(random_state.hash_one(&1), *hash); - }) - } else { - hashes_buffer.iter_mut().for_each(|hash| { - *hash = random_state.hash_one(&1); - }) - } -} - -fn hash_decimal128<'a>( - array: &ArrayRef, - random_state: &RandomState, - hashes_buffer: &'a mut [u64], - mul_col: bool, -) { - let array = array.as_any().downcast_ref::().unwrap(); - if array.null_count() == 0 { - if mul_col { - for (i, hash) in hashes_buffer.iter_mut().enumerate() { - *hash = combine_hashes( - random_state.hash_one(&array.value(i).as_i128()), - *hash, - ); - } - } else { - for (i, hash) in hashes_buffer.iter_mut().enumerate() { - *hash = random_state.hash_one(&array.value(i).as_i128()); - } - } - } else if mul_col { - for (i, hash) in hashes_buffer.iter_mut().enumerate() { - if !array.is_null(i) { - *hash = combine_hashes( - random_state.hash_one(&array.value(i).as_i128()), - *hash, - ); - } - } - } else { - for (i, hash) in hashes_buffer.iter_mut().enumerate() { - if !array.is_null(i) { - *hash = random_state.hash_one(&array.value(i).as_i128()); - } - } - } -} - -macro_rules! hash_array { - ($array_type:ident, $column: ident, $ty: ty, $hashes: ident, $random_state: ident, $multi_col: ident) => { - let array = $column.as_any().downcast_ref::<$array_type>().unwrap(); - if array.null_count() == 0 { - if $multi_col { - for (i, hash) in $hashes.iter_mut().enumerate() { - *hash = - combine_hashes($random_state.hash_one(&array.value(i)), *hash); - } - } else { - for (i, hash) in $hashes.iter_mut().enumerate() { - *hash = $random_state.hash_one(&array.value(i)); - } - } - } else { - if $multi_col { - for (i, hash) in $hashes.iter_mut().enumerate() { - if !array.is_null(i) { - *hash = combine_hashes( - $random_state.hash_one(&array.value(i)), - *hash, - ); - } - } - } else { - for (i, hash) in $hashes.iter_mut().enumerate() { - if !array.is_null(i) { - *hash = $random_state.hash_one(&array.value(i)); - } - } - } - } - }; -} - -macro_rules! hash_array_primitive { - ($array_type:ident, $column: ident, $ty: ident, $hashes: ident, $random_state: ident, $multi_col: ident) => { - let array = $column.as_any().downcast_ref::<$array_type>().unwrap(); - let values = array.values(); - - if array.null_count() == 0 { - if $multi_col { - for (hash, value) in $hashes.iter_mut().zip(values.iter()) { - *hash = combine_hashes($random_state.hash_one(value), *hash); - } - } else { - for (hash, value) in $hashes.iter_mut().zip(values.iter()) { - *hash = $random_state.hash_one(value) - } - } - } else { - if $multi_col { - for (i, (hash, value)) in - $hashes.iter_mut().zip(values.iter()).enumerate() - { - if !array.is_null(i) { - *hash = combine_hashes($random_state.hash_one(value), *hash); - } - } - } else { - for (i, (hash, value)) in - $hashes.iter_mut().zip(values.iter()).enumerate() - { - if !array.is_null(i) { - *hash = $random_state.hash_one(value); - } - } - } - } - }; -} - -macro_rules! hash_array_float { - ($array_type:ident, $column: ident, $ty: ident, $hashes: ident, $random_state: ident, $multi_col: ident) => { - let array = $column.as_any().downcast_ref::<$array_type>().unwrap(); - let values = array.values(); - - if array.null_count() == 0 { - if $multi_col { - for (hash, value) in $hashes.iter_mut().zip(values.iter()) { - *hash = combine_hashes( - $random_state.hash_one(&$ty::from_le_bytes(value.to_le_bytes())), - *hash, - ); - } - } else { - for (hash, value) in $hashes.iter_mut().zip(values.iter()) { - *hash = - $random_state.hash_one(&$ty::from_le_bytes(value.to_le_bytes())) - } - } - } else { - if $multi_col { - for (i, (hash, value)) in - $hashes.iter_mut().zip(values.iter()).enumerate() - { - if !array.is_null(i) { - *hash = combine_hashes( - $random_state - .hash_one(&$ty::from_le_bytes(value.to_le_bytes())), - *hash, - ); - } - } - } else { - for (i, (hash, value)) in - $hashes.iter_mut().zip(values.iter()).enumerate() - { - if !array.is_null(i) { - *hash = $random_state - .hash_one(&$ty::from_le_bytes(value.to_le_bytes())); - } - } - } - } - }; -} - -/// Hash the values in a dictionary array -fn create_hashes_dictionary( - array: &ArrayRef, - random_state: &RandomState, - hashes_buffer: &mut [u64], - multi_col: bool, -) -> Result<()> { - let dict_array = array.as_any().downcast_ref::>().unwrap(); - - // Hash each dictionary value once, and then use that computed - // hash for each key value to avoid a potentially expensive - // redundant hashing for large dictionary elements (e.g. strings) - let dict_values = Arc::clone(dict_array.values()); - let mut dict_hashes = vec![0; dict_values.len()]; - create_hashes(&[dict_values], random_state, &mut dict_hashes)?; - - // combine hash for each index in values - if multi_col { - for (hash, key) in hashes_buffer.iter_mut().zip(dict_array.keys().iter()) { - if let Some(key) = key { - let idx = key - .to_usize() - .ok_or_else(|| { - DataFusionError::Internal(format!( - "Can not convert key value {:?} to usize in dictionary of type {:?}", - key, dict_array.data_type() - )) - })?; - *hash = combine_hashes(dict_hashes[idx], *hash) - } // no update for Null, consistent with other hashes - } - } else { - for (hash, key) in hashes_buffer.iter_mut().zip(dict_array.keys().iter()) { - if let Some(key) = key { - let idx = key - .to_usize() - .ok_or_else(|| { - DataFusionError::Internal(format!( - "Can not convert key value {:?} to usize in dictionary of type {:?}", - key, dict_array.data_type() - )) - })?; - *hash = dict_hashes[idx] - } // no update for Null, consistent with other hashes - } - } - Ok(()) -} - -/// Test version of `create_hashes` that produces the same value for -/// all hashes (to test collisions) -/// -/// See comments on `hashes_buffer` for more details -#[cfg(feature = "force_hash_collisions")] -pub fn create_hashes<'a>( - _arrays: &[ArrayRef], - _random_state: &RandomState, - hashes_buffer: &'a mut Vec, -) -> Result<&'a mut Vec> { - for hash in hashes_buffer.iter_mut() { - *hash = 0 - } - Ok(hashes_buffer) -} - -/// Test version of `create_row_hashes` that produces the same value for -/// all hashes (to test collisions) -/// -/// See comments on `hashes_buffer` for more details -#[cfg(feature = "force_hash_collisions")] -pub fn create_row_hashes<'a>( - _rows: &[Vec], - _random_state: &RandomState, - hashes_buffer: &'a mut Vec, -) -> Result<&'a mut Vec> { - for hash in hashes_buffer.iter_mut() { - *hash = 0 - } - Ok(hashes_buffer) -} - -/// Creates hash values for every row, based on their raw bytes. -#[cfg(not(feature = "force_hash_collisions"))] -pub fn create_row_hashes<'a>( - rows: &[Vec], - random_state: &RandomState, - hashes_buffer: &'a mut Vec, -) -> Result<&'a mut Vec> { - for hash in hashes_buffer.iter_mut() { - *hash = 0 - } - for (i, hash) in hashes_buffer.iter_mut().enumerate() { - *hash = random_state.hash_one(&rows[i]); - } - Ok(hashes_buffer) -} - -/// Creates hash values for every row, based on the values in the -/// columns. -/// -/// The number of rows to hash is determined by `hashes_buffer.len()`. -/// `hashes_buffer` should be pre-sized appropriately -#[cfg(not(feature = "force_hash_collisions"))] -pub fn create_hashes<'a>( - arrays: &[ArrayRef], - random_state: &RandomState, - hashes_buffer: &'a mut Vec, -) -> Result<&'a mut Vec> { - // combine hashes with `combine_hashes` if we have more than 1 column - - use arrow::array::{ - BinaryArray, LargeBinaryArray, Time32MillisecondArray, Time32SecondArray, - Time64MicrosecondArray, Time64NanosecondArray, - }; - let multi_col = arrays.len() > 1; - - for col in arrays { - match col.data_type() { - DataType::Null => { - hash_null(random_state, hashes_buffer, multi_col); - } - DataType::Decimal128(_, _) => { - hash_decimal128(col, random_state, hashes_buffer, multi_col); - } - DataType::UInt8 => { - hash_array_primitive!( - UInt8Array, - col, - u8, - hashes_buffer, - random_state, - multi_col - ); - } - DataType::UInt16 => { - hash_array_primitive!( - UInt16Array, - col, - u16, - hashes_buffer, - random_state, - multi_col - ); - } - DataType::UInt32 => { - hash_array_primitive!( - UInt32Array, - col, - u32, - hashes_buffer, - random_state, - multi_col - ); - } - DataType::UInt64 => { - hash_array_primitive!( - UInt64Array, - col, - u64, - hashes_buffer, - random_state, - multi_col - ); - } - DataType::Int8 => { - hash_array_primitive!( - Int8Array, - col, - i8, - hashes_buffer, - random_state, - multi_col - ); - } - DataType::Int16 => { - hash_array_primitive!( - Int16Array, - col, - i16, - hashes_buffer, - random_state, - multi_col - ); - } - DataType::Int32 => { - hash_array_primitive!( - Int32Array, - col, - i32, - hashes_buffer, - random_state, - multi_col - ); - } - DataType::Int64 => { - hash_array_primitive!( - Int64Array, - col, - i64, - hashes_buffer, - random_state, - multi_col - ); - } - DataType::Float32 => { - hash_array_float!( - Float32Array, - col, - u32, - hashes_buffer, - random_state, - multi_col - ); - } - DataType::Float64 => { - hash_array_float!( - Float64Array, - col, - u64, - hashes_buffer, - random_state, - multi_col - ); - } - DataType::Timestamp(TimeUnit::Second, None) => { - hash_array_primitive!( - TimestampSecondArray, - col, - i64, - hashes_buffer, - random_state, - multi_col - ); - } - DataType::Timestamp(TimeUnit::Millisecond, None) => { - hash_array_primitive!( - TimestampMillisecondArray, - col, - i64, - hashes_buffer, - random_state, - multi_col - ); - } - DataType::Timestamp(TimeUnit::Microsecond, None) => { - hash_array_primitive!( - TimestampMicrosecondArray, - col, - i64, - hashes_buffer, - random_state, - multi_col - ); - } - DataType::Timestamp(TimeUnit::Nanosecond, _) => { - hash_array_primitive!( - TimestampNanosecondArray, - col, - i64, - hashes_buffer, - random_state, - multi_col - ); - } - DataType::Time32(TimeUnit::Second) => { - hash_array_primitive!( - Time32SecondArray, - col, - i32, - hashes_buffer, - random_state, - multi_col - ); - } - DataType::Time32(TimeUnit::Millisecond) => { - hash_array_primitive!( - Time32MillisecondArray, - col, - i32, - hashes_buffer, - random_state, - multi_col - ); - } - DataType::Time64(TimeUnit::Microsecond) => { - hash_array_primitive!( - Time64MicrosecondArray, - col, - i64, - hashes_buffer, - random_state, - multi_col - ); - } - DataType::Time64(TimeUnit::Nanosecond) => { - hash_array_primitive!( - Time64NanosecondArray, - col, - i64, - hashes_buffer, - random_state, - multi_col - ); - } - DataType::Date32 => { - hash_array_primitive!( - Date32Array, - col, - i32, - hashes_buffer, - random_state, - multi_col - ); - } - DataType::Date64 => { - hash_array_primitive!( - Date64Array, - col, - i64, - hashes_buffer, - random_state, - multi_col - ); - } - DataType::Boolean => { - hash_array!( - BooleanArray, - col, - u8, - hashes_buffer, - random_state, - multi_col - ); - } - DataType::Utf8 => { - hash_array!( - StringArray, - col, - str, - hashes_buffer, - random_state, - multi_col - ); - } - DataType::LargeUtf8 => { - hash_array!( - LargeStringArray, - col, - str, - hashes_buffer, - random_state, - multi_col - ); - } - DataType::Binary => { - hash_array!( - BinaryArray, - col, - &[u8], - hashes_buffer, - random_state, - multi_col - ); - } - DataType::FixedSizeBinary(_) => { - hash_array!( - FixedSizeBinaryArray, - col, - &[u8], - hashes_buffer, - random_state, - multi_col - ); - } - DataType::LargeBinary => { - hash_array!( - LargeBinaryArray, - col, - &[u8], - hashes_buffer, - random_state, - multi_col - ); - } - DataType::Dictionary(index_type, _) => match **index_type { - DataType::Int8 => { - create_hashes_dictionary::( - col, - random_state, - hashes_buffer, - multi_col, - )?; - } - DataType::Int16 => { - create_hashes_dictionary::( - col, - random_state, - hashes_buffer, - multi_col, - )?; - } - DataType::Int32 => { - create_hashes_dictionary::( - col, - random_state, - hashes_buffer, - multi_col, - )?; - } - DataType::Int64 => { - create_hashes_dictionary::( - col, - random_state, - hashes_buffer, - multi_col, - )?; - } - DataType::UInt8 => { - create_hashes_dictionary::( - col, - random_state, - hashes_buffer, - multi_col, - )?; - } - DataType::UInt16 => { - create_hashes_dictionary::( - col, - random_state, - hashes_buffer, - multi_col, - )?; - } - DataType::UInt32 => { - create_hashes_dictionary::( - col, - random_state, - hashes_buffer, - multi_col, - )?; - } - DataType::UInt64 => { - create_hashes_dictionary::( - col, - random_state, - hashes_buffer, - multi_col, - )?; - } - _ => { - return Err(DataFusionError::Internal(format!( - "Unsupported dictionary type in hasher hashing: {}", - col.data_type(), - ))) - } - }, - _ => { - // This is internal because we should have caught this before. - return Err(DataFusionError::Internal(format!( - "Unsupported data type in hasher: {}", - col.data_type() - ))); - } - } - } - Ok(hashes_buffer) -} - -#[cfg(test)] -mod tests { - use crate::from_slice::FromSlice; - use arrow::{ - array::{BinaryArray, DictionaryArray, FixedSizeBinaryArray}, - datatypes::Int8Type, - }; - use std::sync::Arc; - - use super::*; - - #[test] - fn create_hashes_for_decimal_array() -> Result<()> { - let array = vec![1, 2, 3, 4] - .into_iter() - .map(Some) - .collect::() - .with_precision_and_scale(20, 3) - .unwrap(); - let array_ref = Arc::new(array); - let random_state = RandomState::with_seeds(0, 0, 0, 0); - let hashes_buff = &mut vec![0; array_ref.len()]; - let hashes = create_hashes(&[array_ref], &random_state, hashes_buff)?; - assert_eq!(hashes.len(), 4); - Ok(()) - } - - #[test] - fn create_hashes_for_float_arrays() -> Result<()> { - let f32_arr = Arc::new(Float32Array::from_slice(&[0.12, 0.5, 1f32, 444.7])); - let f64_arr = Arc::new(Float64Array::from_slice(&[0.12, 0.5, 1f64, 444.7])); - - let random_state = RandomState::with_seeds(0, 0, 0, 0); - let hashes_buff = &mut vec![0; f32_arr.len()]; - let hashes = create_hashes(&[f32_arr], &random_state, hashes_buff)?; - assert_eq!(hashes.len(), 4,); - - let hashes = create_hashes(&[f64_arr], &random_state, hashes_buff)?; - assert_eq!(hashes.len(), 4,); - - Ok(()) - } - - #[test] - fn create_hashes_binary() -> Result<()> { - let byte_array = Arc::new(BinaryArray::from_vec(vec![ - &[4, 3, 2], - &[4, 3, 2], - &[1, 2, 3], - ])); - - let random_state = RandomState::with_seeds(0, 0, 0, 0); - let hashes_buff = &mut vec![0; byte_array.len()]; - let hashes = create_hashes(&[byte_array], &random_state, hashes_buff)?; - assert_eq!(hashes.len(), 3,); - - Ok(()) - } - - #[test] - fn create_hashes_fixed_size_binary() -> Result<()> { - let input_arg = vec![vec![1, 2], vec![5, 6], vec![5, 6]]; - let fixed_size_binary_array = - Arc::new(FixedSizeBinaryArray::try_from_iter(input_arg.into_iter()).unwrap()); - - let random_state = RandomState::with_seeds(0, 0, 0, 0); - let hashes_buff = &mut vec![0; fixed_size_binary_array.len()]; - let hashes = - create_hashes(&[fixed_size_binary_array], &random_state, hashes_buff)?; - assert_eq!(hashes.len(), 3,); - - Ok(()) - } - - #[test] - // Tests actual values of hashes, which are different if forcing collisions - #[cfg(not(feature = "force_hash_collisions"))] - fn create_hashes_for_dict_arrays() { - let strings = vec![Some("foo"), None, Some("bar"), Some("foo"), None]; - - let string_array = Arc::new(strings.iter().cloned().collect::()); - let dict_array = Arc::new( - strings - .iter() - .cloned() - .collect::>(), - ); - - let random_state = RandomState::with_seeds(0, 0, 0, 0); - - let mut string_hashes = vec![0; strings.len()]; - create_hashes(&[string_array], &random_state, &mut string_hashes).unwrap(); - - let mut dict_hashes = vec![0; strings.len()]; - create_hashes(&[dict_array], &random_state, &mut dict_hashes).unwrap(); - - // Null values result in a zero hash, - for (val, hash) in strings.iter().zip(string_hashes.iter()) { - match val { - Some(_) => assert_ne!(*hash, 0), - None => assert_eq!(*hash, 0), - } - } - - // same logical values should hash to the same hash value - assert_eq!(string_hashes, dict_hashes); - - // Same values should map to same hash values - assert_eq!(strings[1], strings[4]); - assert_eq!(dict_hashes[1], dict_hashes[4]); - assert_eq!(strings[0], strings[3]); - assert_eq!(dict_hashes[0], dict_hashes[3]); - - // different strings should map to different hash values - assert_ne!(strings[0], strings[2]); - assert_ne!(dict_hashes[0], dict_hashes[2]); - } - - #[test] - // Tests actual values of hashes, which are different if forcing collisions - #[cfg(not(feature = "force_hash_collisions"))] - fn create_multi_column_hash_for_dict_arrays() { - let strings1 = vec![Some("foo"), None, Some("bar")]; - let strings2 = vec![Some("blarg"), Some("blah"), None]; - - let string_array = Arc::new(strings1.iter().cloned().collect::()); - let dict_array = Arc::new( - strings2 - .iter() - .cloned() - .collect::>(), - ); - - let random_state = RandomState::with_seeds(0, 0, 0, 0); - - let mut one_col_hashes = vec![0; strings1.len()]; - create_hashes(&[dict_array.clone()], &random_state, &mut one_col_hashes).unwrap(); - - let mut two_col_hashes = vec![0; strings1.len()]; - create_hashes( - &[dict_array, string_array], - &random_state, - &mut two_col_hashes, - ) - .unwrap(); - - assert_eq!(one_col_hashes.len(), 3); - assert_eq!(two_col_hashes.len(), 3); - - assert_ne!(one_col_hashes, two_col_hashes); - } -} diff --git a/datafusion/core/tests/sql/select.rs b/datafusion/core/tests/sql/select.rs index a2106ee9719db..6d7014507bb09 100644 --- a/datafusion/core/tests/sql/select.rs +++ b/datafusion/core/tests/sql/select.rs @@ -814,8 +814,6 @@ async fn query_on_string_dictionary() -> Result<()> { ]; assert_batches_eq!(expected, &actual); - // filtering with Time32 and Time64 types - // Expression evaluation let sql = "SELECT concat(d1, '-foo') FROM test"; let actual = execute_to_batches(&ctx, sql).await; From 5d9e1eb9e31c44429185336a7aaa8f4408b5424c Mon Sep 17 00:00:00 2001 From: andre-cc-natzka Date: Wed, 16 Nov 2022 22:46:39 +0100 Subject: [PATCH 10/13] Fix failing test --- datafusion/core/tests/sql/timestamp.rs | 2 +- datafusion/proto/src/to_proto.rs | 16 +++++++++------- 2 files changed, 10 insertions(+), 8 deletions(-) diff --git a/datafusion/core/tests/sql/timestamp.rs b/datafusion/core/tests/sql/timestamp.rs index f139faab5e2cd..7fb8e109f575c 100644 --- a/datafusion/core/tests/sql/timestamp.rs +++ b/datafusion/core/tests/sql/timestamp.rs @@ -1555,7 +1555,7 @@ async fn cast_timestamp_to_timestamptz() -> Result<()> { #[tokio::test] async fn test_cast_to_time() -> Result<()> { let ctx = SessionContext::new(); - let sql = "SELECT 0::TIME64"; + let sql = "SELECT 0::TIME"; let actual = execute_to_batches(&ctx, sql).await; let expected = vec![ diff --git a/datafusion/proto/src/to_proto.rs b/datafusion/proto/src/to_proto.rs index d1f5834ed43a7..6e152cc0c3b85 100644 --- a/datafusion/proto/src/to_proto.rs +++ b/datafusion/proto/src/to_proto.rs @@ -1075,10 +1075,6 @@ impl TryFrom<&ScalarValue> for protobuf::ScalarValue { }) } - - - - datafusion::scalar::ScalarValue::Time32Second(v) => { create_proto_scalar(v, &data_type, |v| { Value::Time32Value(protobuf::ScalarTime32Value { @@ -1093,7 +1089,9 @@ impl TryFrom<&ScalarValue> for protobuf::ScalarValue { create_proto_scalar(v, &data_type, |v| { Value::Time32Value(protobuf::ScalarTime32Value { value: Some( - protobuf::scalar_time32_value::Value::Time32MillisecondValue(*v), + protobuf::scalar_time32_value::Value::Time32MillisecondValue( + *v, + ), ), }) }) @@ -1103,7 +1101,9 @@ impl TryFrom<&ScalarValue> for protobuf::ScalarValue { create_proto_scalar(v, &data_type, |v| { Value::Time64Value(protobuf::ScalarTime64Value { value: Some( - protobuf::scalar_time64_value::Value::Time64MicrosecondValue(*v), + protobuf::scalar_time64_value::Value::Time64MicrosecondValue( + *v, + ), ), }) }) @@ -1113,7 +1113,9 @@ impl TryFrom<&ScalarValue> for protobuf::ScalarValue { create_proto_scalar(v, &data_type, |v| { Value::Time64Value(protobuf::ScalarTime64Value { value: Some( - protobuf::scalar_time64_value::Value::Time64NanosecondValue(*v), + protobuf::scalar_time64_value::Value::Time64NanosecondValue( + *v, + ), ), }) }) From 7ea1d8fab59fdf1345cb41c2fb27e706fbd65bc4 Mon Sep 17 00:00:00 2001 From: Andrew Lamb Date: Sun, 20 Nov 2022 07:23:28 -0500 Subject: [PATCH 11/13] Update to avoid deprecated chrono feature --- datafusion/core/tests/sql/timestamp.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/datafusion/core/tests/sql/timestamp.rs b/datafusion/core/tests/sql/timestamp.rs index 7e46e4dbc081f..cb70ab2d0adc1 100644 --- a/datafusion/core/tests/sql/timestamp.rs +++ b/datafusion/core/tests/sql/timestamp.rs @@ -890,7 +890,7 @@ async fn group_by_timestamp_millis() -> Result<()> { ), Field::new("count", DataType::Int32, false), ])); - let base_dt = Utc.ymd(2018, 7, 1).and_hms(6, 0, 0); // 2018-Jul-01 06:00 + let base_dt = Utc.with_ymd_and_hms(2018, 7, 1, 6, 0, 0).unwrap(); // 2018-Jul-01 06:00 let hour1 = Duration::hours(1); let timestamps = vec![ base_dt.timestamp_millis(), From 28a933c6c605d799876cb4bcd2170cf1402dc8a9 Mon Sep 17 00:00:00 2001 From: Andrew Lamb Date: Sun, 20 Nov 2022 07:29:06 -0500 Subject: [PATCH 12/13] Remove generated file --- datafusion/proto/src/generated/datafusion.rs | 1613 ------------------ 1 file changed, 1613 deletions(-) delete mode 100644 datafusion/proto/src/generated/datafusion.rs diff --git a/datafusion/proto/src/generated/datafusion.rs b/datafusion/proto/src/generated/datafusion.rs deleted file mode 100644 index aeb2daae76101..0000000000000 --- a/datafusion/proto/src/generated/datafusion.rs +++ /dev/null @@ -1,1613 +0,0 @@ -#[derive(Clone, PartialEq, ::prost::Message)] -pub struct ColumnRelation { - #[prost(string, tag="1")] - pub relation: ::prost::alloc::string::String, -} -#[derive(Clone, PartialEq, ::prost::Message)] -pub struct Column { - #[prost(string, tag="1")] - pub name: ::prost::alloc::string::String, - #[prost(message, optional, tag="2")] - pub relation: ::core::option::Option, -} -#[derive(Clone, PartialEq, ::prost::Message)] -pub struct DfField { - #[prost(message, optional, tag="1")] - pub field: ::core::option::Option, - #[prost(message, optional, tag="2")] - pub qualifier: ::core::option::Option, -} -#[derive(Clone, PartialEq, ::prost::Message)] -pub struct DfSchema { - #[prost(message, repeated, tag="1")] - pub columns: ::prost::alloc::vec::Vec, - #[prost(map="string, string", tag="2")] - pub metadata: ::std::collections::HashMap<::prost::alloc::string::String, ::prost::alloc::string::String>, -} -/// logical plan -/// LogicalPlan is a nested type -#[derive(Clone, PartialEq, ::prost::Message)] -pub struct LogicalPlanNode { - #[prost(oneof="logical_plan_node::LogicalPlanType", tags="1, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23")] - pub logical_plan_type: ::core::option::Option, -} -/// Nested message and enum types in `LogicalPlanNode`. -pub mod logical_plan_node { - #[derive(Clone, PartialEq, ::prost::Oneof)] - pub enum LogicalPlanType { - #[prost(message, tag="1")] - ListingScan(super::ListingTableScanNode), - #[prost(message, tag="3")] - Projection(::prost::alloc::boxed::Box), - #[prost(message, tag="4")] - Selection(::prost::alloc::boxed::Box), - #[prost(message, tag="5")] - Limit(::prost::alloc::boxed::Box), - #[prost(message, tag="6")] - Aggregate(::prost::alloc::boxed::Box), - #[prost(message, tag="7")] - Join(::prost::alloc::boxed::Box), - #[prost(message, tag="8")] - Sort(::prost::alloc::boxed::Box), - #[prost(message, tag="9")] - Repartition(::prost::alloc::boxed::Box), - #[prost(message, tag="10")] - EmptyRelation(super::EmptyRelationNode), - #[prost(message, tag="11")] - CreateExternalTable(super::CreateExternalTableNode), - #[prost(message, tag="12")] - Explain(::prost::alloc::boxed::Box), - #[prost(message, tag="13")] - Window(::prost::alloc::boxed::Box), - #[prost(message, tag="14")] - Analyze(::prost::alloc::boxed::Box), - #[prost(message, tag="15")] - CrossJoin(::prost::alloc::boxed::Box), - #[prost(message, tag="16")] - Values(super::ValuesNode), - #[prost(message, tag="17")] - Extension(super::LogicalExtensionNode), - #[prost(message, tag="18")] - CreateCatalogSchema(super::CreateCatalogSchemaNode), - #[prost(message, tag="19")] - Union(super::UnionNode), - #[prost(message, tag="20")] - CreateCatalog(super::CreateCatalogNode), - #[prost(message, tag="21")] - SubqueryAlias(::prost::alloc::boxed::Box), - #[prost(message, tag="22")] - CreateView(::prost::alloc::boxed::Box), - #[prost(message, tag="23")] - Distinct(::prost::alloc::boxed::Box), - } -} -#[derive(Clone, PartialEq, ::prost::Message)] -pub struct LogicalExtensionNode { - #[prost(bytes="vec", tag="1")] - pub node: ::prost::alloc::vec::Vec, - #[prost(message, repeated, tag="2")] - pub inputs: ::prost::alloc::vec::Vec, -} -#[derive(Clone, PartialEq, ::prost::Message)] -pub struct ProjectionColumns { - #[prost(string, repeated, tag="1")] - pub columns: ::prost::alloc::vec::Vec<::prost::alloc::string::String>, -} -#[derive(Clone, PartialEq, ::prost::Message)] -pub struct CsvFormat { - #[prost(bool, tag="1")] - pub has_header: bool, - #[prost(string, tag="2")] - pub delimiter: ::prost::alloc::string::String, -} -#[derive(Clone, PartialEq, ::prost::Message)] -pub struct ParquetFormat { - #[prost(bool, tag="1")] - pub enable_pruning: bool, -} -#[derive(Clone, PartialEq, ::prost::Message)] -pub struct AvroFormat { -} -#[derive(Clone, PartialEq, ::prost::Message)] -pub struct ListingTableScanNode { - #[prost(string, tag="1")] - pub table_name: ::prost::alloc::string::String, - #[prost(string, repeated, tag="2")] - pub paths: ::prost::alloc::vec::Vec<::prost::alloc::string::String>, - #[prost(string, tag="3")] - pub file_extension: ::prost::alloc::string::String, - #[prost(message, optional, tag="4")] - pub projection: ::core::option::Option, - #[prost(message, optional, tag="5")] - pub schema: ::core::option::Option, - #[prost(message, repeated, tag="6")] - pub filters: ::prost::alloc::vec::Vec, - #[prost(string, repeated, tag="7")] - pub table_partition_cols: ::prost::alloc::vec::Vec<::prost::alloc::string::String>, - #[prost(bool, tag="8")] - pub collect_stat: bool, - #[prost(uint32, tag="9")] - pub target_partitions: u32, - #[prost(oneof="listing_table_scan_node::FileFormatType", tags="10, 11, 12")] - pub file_format_type: ::core::option::Option, -} -/// Nested message and enum types in `ListingTableScanNode`. -pub mod listing_table_scan_node { - #[derive(Clone, PartialEq, ::prost::Oneof)] - pub enum FileFormatType { - #[prost(message, tag="10")] - Csv(super::CsvFormat), - #[prost(message, tag="11")] - Parquet(super::ParquetFormat), - #[prost(message, tag="12")] - Avro(super::AvroFormat), - } -} -#[derive(Clone, PartialEq, ::prost::Message)] -pub struct ProjectionNode { - #[prost(message, optional, boxed, tag="1")] - pub input: ::core::option::Option<::prost::alloc::boxed::Box>, - #[prost(message, repeated, tag="2")] - pub expr: ::prost::alloc::vec::Vec, - #[prost(oneof="projection_node::OptionalAlias", tags="3")] - pub optional_alias: ::core::option::Option, -} -/// Nested message and enum types in `ProjectionNode`. -pub mod projection_node { - #[derive(Clone, PartialEq, ::prost::Oneof)] - pub enum OptionalAlias { - #[prost(string, tag="3")] - Alias(::prost::alloc::string::String), - } -} -#[derive(Clone, PartialEq, ::prost::Message)] -pub struct SelectionNode { - #[prost(message, optional, boxed, tag="1")] - pub input: ::core::option::Option<::prost::alloc::boxed::Box>, - #[prost(message, optional, tag="2")] - pub expr: ::core::option::Option, -} -#[derive(Clone, PartialEq, ::prost::Message)] -pub struct SortNode { - #[prost(message, optional, boxed, tag="1")] - pub input: ::core::option::Option<::prost::alloc::boxed::Box>, - #[prost(message, repeated, tag="2")] - pub expr: ::prost::alloc::vec::Vec, - /// Maximum number of highest/lowest rows to fetch; negative means no limit - #[prost(int64, tag="3")] - pub fetch: i64, -} -#[derive(Clone, PartialEq, ::prost::Message)] -pub struct RepartitionNode { - #[prost(message, optional, boxed, tag="1")] - pub input: ::core::option::Option<::prost::alloc::boxed::Box>, - #[prost(oneof="repartition_node::PartitionMethod", tags="2, 3")] - pub partition_method: ::core::option::Option, -} -/// Nested message and enum types in `RepartitionNode`. -pub mod repartition_node { - #[derive(Clone, PartialEq, ::prost::Oneof)] - pub enum PartitionMethod { - #[prost(uint64, tag="2")] - RoundRobin(u64), - #[prost(message, tag="3")] - Hash(super::HashRepartition), - } -} -#[derive(Clone, PartialEq, ::prost::Message)] -pub struct HashRepartition { - #[prost(message, repeated, tag="1")] - pub hash_expr: ::prost::alloc::vec::Vec, - #[prost(uint64, tag="2")] - pub partition_count: u64, -} -#[derive(Clone, PartialEq, ::prost::Message)] -pub struct EmptyRelationNode { - #[prost(bool, tag="1")] - pub produce_one_row: bool, -} -#[derive(Clone, PartialEq, ::prost::Message)] -pub struct CreateExternalTableNode { - #[prost(string, tag="1")] - pub name: ::prost::alloc::string::String, - #[prost(string, tag="2")] - pub location: ::prost::alloc::string::String, - #[prost(string, tag="3")] - pub file_type: ::prost::alloc::string::String, - #[prost(bool, tag="4")] - pub has_header: bool, - #[prost(message, optional, tag="5")] - pub schema: ::core::option::Option, - #[prost(string, repeated, tag="6")] - pub table_partition_cols: ::prost::alloc::vec::Vec<::prost::alloc::string::String>, - #[prost(bool, tag="7")] - pub if_not_exists: bool, - #[prost(string, tag="8")] - pub delimiter: ::prost::alloc::string::String, - #[prost(string, tag="9")] - pub definition: ::prost::alloc::string::String, -} -#[derive(Clone, PartialEq, ::prost::Message)] -pub struct CreateCatalogSchemaNode { - #[prost(string, tag="1")] - pub schema_name: ::prost::alloc::string::String, - #[prost(bool, tag="2")] - pub if_not_exists: bool, - #[prost(message, optional, tag="3")] - pub schema: ::core::option::Option, -} -#[derive(Clone, PartialEq, ::prost::Message)] -pub struct CreateCatalogNode { - #[prost(string, tag="1")] - pub catalog_name: ::prost::alloc::string::String, - #[prost(bool, tag="2")] - pub if_not_exists: bool, - #[prost(message, optional, tag="3")] - pub schema: ::core::option::Option, -} -#[derive(Clone, PartialEq, ::prost::Message)] -pub struct CreateViewNode { - #[prost(string, tag="1")] - pub name: ::prost::alloc::string::String, - #[prost(message, optional, boxed, tag="2")] - pub input: ::core::option::Option<::prost::alloc::boxed::Box>, - #[prost(bool, tag="3")] - pub or_replace: bool, - #[prost(string, tag="4")] - pub definition: ::prost::alloc::string::String, -} -/// a node containing data for defining values list. unlike in SQL where it's two dimensional, here -/// the list is flattened, and with the field n_cols it can be parsed and partitioned into rows -#[derive(Clone, PartialEq, ::prost::Message)] -pub struct ValuesNode { - #[prost(uint64, tag="1")] - pub n_cols: u64, - #[prost(message, repeated, tag="2")] - pub values_list: ::prost::alloc::vec::Vec, -} -#[derive(Clone, PartialEq, ::prost::Message)] -pub struct AnalyzeNode { - #[prost(message, optional, boxed, tag="1")] - pub input: ::core::option::Option<::prost::alloc::boxed::Box>, - #[prost(bool, tag="2")] - pub verbose: bool, -} -#[derive(Clone, PartialEq, ::prost::Message)] -pub struct ExplainNode { - #[prost(message, optional, boxed, tag="1")] - pub input: ::core::option::Option<::prost::alloc::boxed::Box>, - #[prost(bool, tag="2")] - pub verbose: bool, -} -#[derive(Clone, PartialEq, ::prost::Message)] -pub struct AggregateNode { - #[prost(message, optional, boxed, tag="1")] - pub input: ::core::option::Option<::prost::alloc::boxed::Box>, - #[prost(message, repeated, tag="2")] - pub group_expr: ::prost::alloc::vec::Vec, - #[prost(message, repeated, tag="3")] - pub aggr_expr: ::prost::alloc::vec::Vec, -} -#[derive(Clone, PartialEq, ::prost::Message)] -pub struct WindowNode { - #[prost(message, optional, boxed, tag="1")] - pub input: ::core::option::Option<::prost::alloc::boxed::Box>, - #[prost(message, repeated, tag="2")] - pub window_expr: ::prost::alloc::vec::Vec, -} -#[derive(Clone, PartialEq, ::prost::Message)] -pub struct JoinNode { - #[prost(message, optional, boxed, tag="1")] - pub left: ::core::option::Option<::prost::alloc::boxed::Box>, - #[prost(message, optional, boxed, tag="2")] - pub right: ::core::option::Option<::prost::alloc::boxed::Box>, - #[prost(enumeration="JoinType", tag="3")] - pub join_type: i32, - #[prost(enumeration="JoinConstraint", tag="4")] - pub join_constraint: i32, - #[prost(message, repeated, tag="5")] - pub left_join_column: ::prost::alloc::vec::Vec, - #[prost(message, repeated, tag="6")] - pub right_join_column: ::prost::alloc::vec::Vec, - #[prost(bool, tag="7")] - pub null_equals_null: bool, - #[prost(message, optional, tag="8")] - pub filter: ::core::option::Option, -} -#[derive(Clone, PartialEq, ::prost::Message)] -pub struct DistinctNode { - #[prost(message, optional, boxed, tag="1")] - pub input: ::core::option::Option<::prost::alloc::boxed::Box>, -} -#[derive(Clone, PartialEq, ::prost::Message)] -pub struct UnionNode { - #[prost(message, repeated, tag="1")] - pub inputs: ::prost::alloc::vec::Vec, -} -#[derive(Clone, PartialEq, ::prost::Message)] -pub struct CrossJoinNode { - #[prost(message, optional, boxed, tag="1")] - pub left: ::core::option::Option<::prost::alloc::boxed::Box>, - #[prost(message, optional, boxed, tag="2")] - pub right: ::core::option::Option<::prost::alloc::boxed::Box>, -} -#[derive(Clone, PartialEq, ::prost::Message)] -pub struct LimitNode { - #[prost(message, optional, boxed, tag="1")] - pub input: ::core::option::Option<::prost::alloc::boxed::Box>, - /// The number of rows to skip before fetch; non-positive means don't skip any - #[prost(int64, tag="2")] - pub skip: i64, - /// Maximum number of rows to fetch; negative means no limit - #[prost(int64, tag="3")] - pub fetch: i64, -} -#[derive(Clone, PartialEq, ::prost::Message)] -pub struct SelectionExecNode { - #[prost(message, optional, tag="1")] - pub expr: ::core::option::Option, -} -#[derive(Clone, PartialEq, ::prost::Message)] -pub struct SubqueryAliasNode { - #[prost(message, optional, boxed, tag="1")] - pub input: ::core::option::Option<::prost::alloc::boxed::Box>, - #[prost(string, tag="2")] - pub alias: ::prost::alloc::string::String, -} -/// logical expressions -#[derive(Clone, PartialEq, ::prost::Message)] -pub struct LogicalExprNode { - #[prost(oneof="logical_expr_node::ExprType", tags="1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33")] - pub expr_type: ::core::option::Option, -} -/// Nested message and enum types in `LogicalExprNode`. -pub mod logical_expr_node { - #[derive(Clone, PartialEq, ::prost::Oneof)] - pub enum ExprType { - /// column references - #[prost(message, tag="1")] - Column(super::Column), - /// alias - #[prost(message, tag="2")] - Alias(::prost::alloc::boxed::Box), - #[prost(message, tag="3")] - Literal(super::ScalarValue), - /// binary expressions - #[prost(message, tag="4")] - BinaryExpr(::prost::alloc::boxed::Box), - /// aggregate expressions - #[prost(message, tag="5")] - AggregateExpr(::prost::alloc::boxed::Box), - /// null checks - #[prost(message, tag="6")] - IsNullExpr(::prost::alloc::boxed::Box), - #[prost(message, tag="7")] - IsNotNullExpr(::prost::alloc::boxed::Box), - #[prost(message, tag="8")] - NotExpr(::prost::alloc::boxed::Box), - #[prost(message, tag="9")] - Between(::prost::alloc::boxed::Box), - #[prost(message, tag="10")] - Case(::prost::alloc::boxed::Box), - #[prost(message, tag="11")] - Cast(::prost::alloc::boxed::Box), - #[prost(message, tag="12")] - Sort(::prost::alloc::boxed::Box), - #[prost(message, tag="13")] - Negative(::prost::alloc::boxed::Box), - #[prost(message, tag="14")] - InList(::prost::alloc::boxed::Box), - #[prost(bool, tag="15")] - Wildcard(bool), - #[prost(message, tag="16")] - ScalarFunction(super::ScalarFunctionNode), - #[prost(message, tag="17")] - TryCast(::prost::alloc::boxed::Box), - /// window expressions - #[prost(message, tag="18")] - WindowExpr(::prost::alloc::boxed::Box), - /// AggregateUDF expressions - #[prost(message, tag="19")] - AggregateUdfExpr(::prost::alloc::boxed::Box), - /// Scalar UDF expressions - #[prost(message, tag="20")] - ScalarUdfExpr(super::ScalarUdfExprNode), - #[prost(message, tag="21")] - GetIndexedField(::prost::alloc::boxed::Box), - #[prost(message, tag="22")] - GroupingSet(super::GroupingSetNode), - #[prost(message, tag="23")] - Cube(super::CubeNode), - #[prost(message, tag="24")] - Rollup(super::RollupNode), - #[prost(message, tag="25")] - IsTrue(::prost::alloc::boxed::Box), - #[prost(message, tag="26")] - IsFalse(::prost::alloc::boxed::Box), - #[prost(message, tag="27")] - IsUnknown(::prost::alloc::boxed::Box), - #[prost(message, tag="28")] - IsNotTrue(::prost::alloc::boxed::Box), - #[prost(message, tag="29")] - IsNotFalse(::prost::alloc::boxed::Box), - #[prost(message, tag="30")] - IsNotUnknown(::prost::alloc::boxed::Box), - #[prost(message, tag="31")] - Like(::prost::alloc::boxed::Box), - #[prost(message, tag="32")] - Ilike(::prost::alloc::boxed::Box), - #[prost(message, tag="33")] - SimilarTo(::prost::alloc::boxed::Box), - } -} -#[derive(Clone, PartialEq, ::prost::Message)] -pub struct LogicalExprList { - #[prost(message, repeated, tag="1")] - pub expr: ::prost::alloc::vec::Vec, -} -#[derive(Clone, PartialEq, ::prost::Message)] -pub struct GroupingSetNode { - #[prost(message, repeated, tag="1")] - pub expr: ::prost::alloc::vec::Vec, -} -#[derive(Clone, PartialEq, ::prost::Message)] -pub struct CubeNode { - #[prost(message, repeated, tag="1")] - pub expr: ::prost::alloc::vec::Vec, -} -#[derive(Clone, PartialEq, ::prost::Message)] -pub struct RollupNode { - #[prost(message, repeated, tag="1")] - pub expr: ::prost::alloc::vec::Vec, -} -#[derive(Clone, PartialEq, ::prost::Message)] -pub struct GetIndexedField { - #[prost(message, optional, boxed, tag="1")] - pub expr: ::core::option::Option<::prost::alloc::boxed::Box>, - #[prost(message, optional, tag="2")] - pub key: ::core::option::Option, -} -#[derive(Clone, PartialEq, ::prost::Message)] -pub struct IsNull { - #[prost(message, optional, boxed, tag="1")] - pub expr: ::core::option::Option<::prost::alloc::boxed::Box>, -} -#[derive(Clone, PartialEq, ::prost::Message)] -pub struct IsNotNull { - #[prost(message, optional, boxed, tag="1")] - pub expr: ::core::option::Option<::prost::alloc::boxed::Box>, -} -#[derive(Clone, PartialEq, ::prost::Message)] -pub struct IsTrue { - #[prost(message, optional, boxed, tag="1")] - pub expr: ::core::option::Option<::prost::alloc::boxed::Box>, -} -#[derive(Clone, PartialEq, ::prost::Message)] -pub struct IsFalse { - #[prost(message, optional, boxed, tag="1")] - pub expr: ::core::option::Option<::prost::alloc::boxed::Box>, -} -#[derive(Clone, PartialEq, ::prost::Message)] -pub struct IsUnknown { - #[prost(message, optional, boxed, tag="1")] - pub expr: ::core::option::Option<::prost::alloc::boxed::Box>, -} -#[derive(Clone, PartialEq, ::prost::Message)] -pub struct IsNotTrue { - #[prost(message, optional, boxed, tag="1")] - pub expr: ::core::option::Option<::prost::alloc::boxed::Box>, -} -#[derive(Clone, PartialEq, ::prost::Message)] -pub struct IsNotFalse { - #[prost(message, optional, boxed, tag="1")] - pub expr: ::core::option::Option<::prost::alloc::boxed::Box>, -} -#[derive(Clone, PartialEq, ::prost::Message)] -pub struct IsNotUnknown { - #[prost(message, optional, boxed, tag="1")] - pub expr: ::core::option::Option<::prost::alloc::boxed::Box>, -} -#[derive(Clone, PartialEq, ::prost::Message)] -pub struct Not { - #[prost(message, optional, boxed, tag="1")] - pub expr: ::core::option::Option<::prost::alloc::boxed::Box>, -} -#[derive(Clone, PartialEq, ::prost::Message)] -pub struct AliasNode { - #[prost(message, optional, boxed, tag="1")] - pub expr: ::core::option::Option<::prost::alloc::boxed::Box>, - #[prost(string, tag="2")] - pub alias: ::prost::alloc::string::String, -} -#[derive(Clone, PartialEq, ::prost::Message)] -pub struct BinaryExprNode { - #[prost(message, optional, boxed, tag="1")] - pub l: ::core::option::Option<::prost::alloc::boxed::Box>, - #[prost(message, optional, boxed, tag="2")] - pub r: ::core::option::Option<::prost::alloc::boxed::Box>, - #[prost(string, tag="3")] - pub op: ::prost::alloc::string::String, -} -#[derive(Clone, PartialEq, ::prost::Message)] -pub struct NegativeNode { - #[prost(message, optional, boxed, tag="1")] - pub expr: ::core::option::Option<::prost::alloc::boxed::Box>, -} -#[derive(Clone, PartialEq, ::prost::Message)] -pub struct InListNode { - #[prost(message, optional, boxed, tag="1")] - pub expr: ::core::option::Option<::prost::alloc::boxed::Box>, - #[prost(message, repeated, tag="2")] - pub list: ::prost::alloc::vec::Vec, - #[prost(bool, tag="3")] - pub negated: bool, -} -#[derive(Clone, PartialEq, ::prost::Message)] -pub struct ScalarFunctionNode { - #[prost(enumeration="ScalarFunction", tag="1")] - pub fun: i32, - #[prost(message, repeated, tag="2")] - pub args: ::prost::alloc::vec::Vec, -} -#[derive(Clone, PartialEq, ::prost::Message)] -pub struct AggregateExprNode { - #[prost(enumeration="AggregateFunction", tag="1")] - pub aggr_function: i32, - #[prost(message, repeated, tag="2")] - pub expr: ::prost::alloc::vec::Vec, - #[prost(bool, tag="3")] - pub distinct: bool, - #[prost(message, optional, boxed, tag="4")] - pub filter: ::core::option::Option<::prost::alloc::boxed::Box>, -} -#[derive(Clone, PartialEq, ::prost::Message)] -pub struct AggregateUdfExprNode { - #[prost(string, tag="1")] - pub fun_name: ::prost::alloc::string::String, - #[prost(message, repeated, tag="2")] - pub args: ::prost::alloc::vec::Vec, - #[prost(message, optional, boxed, tag="3")] - pub filter: ::core::option::Option<::prost::alloc::boxed::Box>, -} -#[derive(Clone, PartialEq, ::prost::Message)] -pub struct ScalarUdfExprNode { - #[prost(string, tag="1")] - pub fun_name: ::prost::alloc::string::String, - #[prost(message, repeated, tag="2")] - pub args: ::prost::alloc::vec::Vec, -} -#[derive(Clone, PartialEq, ::prost::Message)] -pub struct WindowExprNode { - #[prost(message, optional, boxed, tag="4")] - pub expr: ::core::option::Option<::prost::alloc::boxed::Box>, - #[prost(message, repeated, tag="5")] - pub partition_by: ::prost::alloc::vec::Vec, - #[prost(message, repeated, tag="6")] - pub order_by: ::prost::alloc::vec::Vec, - #[prost(oneof="window_expr_node::WindowFunction", tags="1, 2")] - pub window_function: ::core::option::Option, - /// repeated LogicalExprNode filter = 7; - #[prost(oneof="window_expr_node::WindowFrame", tags="8")] - pub window_frame: ::core::option::Option, -} -/// Nested message and enum types in `WindowExprNode`. -pub mod window_expr_node { - #[derive(Clone, PartialEq, ::prost::Oneof)] - pub enum WindowFunction { - #[prost(enumeration="super::AggregateFunction", tag="1")] - AggrFunction(i32), - /// udaf = 3 - #[prost(enumeration="super::BuiltInWindowFunction", tag="2")] - BuiltInFunction(i32), - } - /// repeated LogicalExprNode filter = 7; - #[derive(Clone, PartialEq, ::prost::Oneof)] - pub enum WindowFrame { - #[prost(message, tag="8")] - Frame(super::WindowFrame), - } -} -#[derive(Clone, PartialEq, ::prost::Message)] -pub struct BetweenNode { - #[prost(message, optional, boxed, tag="1")] - pub expr: ::core::option::Option<::prost::alloc::boxed::Box>, - #[prost(bool, tag="2")] - pub negated: bool, - #[prost(message, optional, boxed, tag="3")] - pub low: ::core::option::Option<::prost::alloc::boxed::Box>, - #[prost(message, optional, boxed, tag="4")] - pub high: ::core::option::Option<::prost::alloc::boxed::Box>, -} -#[derive(Clone, PartialEq, ::prost::Message)] -pub struct LikeNode { - #[prost(bool, tag="1")] - pub negated: bool, - #[prost(message, optional, boxed, tag="2")] - pub expr: ::core::option::Option<::prost::alloc::boxed::Box>, - #[prost(message, optional, boxed, tag="3")] - pub pattern: ::core::option::Option<::prost::alloc::boxed::Box>, - #[prost(string, tag="4")] - pub escape_char: ::prost::alloc::string::String, -} -#[derive(Clone, PartialEq, ::prost::Message)] -pub struct ILikeNode { - #[prost(bool, tag="1")] - pub negated: bool, - #[prost(message, optional, boxed, tag="2")] - pub expr: ::core::option::Option<::prost::alloc::boxed::Box>, - #[prost(message, optional, boxed, tag="3")] - pub pattern: ::core::option::Option<::prost::alloc::boxed::Box>, - #[prost(string, tag="4")] - pub escape_char: ::prost::alloc::string::String, -} -#[derive(Clone, PartialEq, ::prost::Message)] -pub struct SimilarToNode { - #[prost(bool, tag="1")] - pub negated: bool, - #[prost(message, optional, boxed, tag="2")] - pub expr: ::core::option::Option<::prost::alloc::boxed::Box>, - #[prost(message, optional, boxed, tag="3")] - pub pattern: ::core::option::Option<::prost::alloc::boxed::Box>, - #[prost(string, tag="4")] - pub escape_char: ::prost::alloc::string::String, -} -#[derive(Clone, PartialEq, ::prost::Message)] -pub struct CaseNode { - #[prost(message, optional, boxed, tag="1")] - pub expr: ::core::option::Option<::prost::alloc::boxed::Box>, - #[prost(message, repeated, tag="2")] - pub when_then_expr: ::prost::alloc::vec::Vec, - #[prost(message, optional, boxed, tag="3")] - pub else_expr: ::core::option::Option<::prost::alloc::boxed::Box>, -} -#[derive(Clone, PartialEq, ::prost::Message)] -pub struct WhenThen { - #[prost(message, optional, tag="1")] - pub when_expr: ::core::option::Option, - #[prost(message, optional, tag="2")] - pub then_expr: ::core::option::Option, -} -#[derive(Clone, PartialEq, ::prost::Message)] -pub struct CastNode { - #[prost(message, optional, boxed, tag="1")] - pub expr: ::core::option::Option<::prost::alloc::boxed::Box>, - #[prost(message, optional, tag="2")] - pub arrow_type: ::core::option::Option, -} -#[derive(Clone, PartialEq, ::prost::Message)] -pub struct TryCastNode { - #[prost(message, optional, boxed, tag="1")] - pub expr: ::core::option::Option<::prost::alloc::boxed::Box>, - #[prost(message, optional, tag="2")] - pub arrow_type: ::core::option::Option, -} -#[derive(Clone, PartialEq, ::prost::Message)] -pub struct SortExprNode { - #[prost(message, optional, boxed, tag="1")] - pub expr: ::core::option::Option<::prost::alloc::boxed::Box>, - #[prost(bool, tag="2")] - pub asc: bool, - #[prost(bool, tag="3")] - pub nulls_first: bool, -} -#[derive(Clone, PartialEq, ::prost::Message)] -pub struct WindowFrame { - #[prost(enumeration="WindowFrameUnits", tag="1")] - pub window_frame_units: i32, - #[prost(message, optional, tag="2")] - pub start_bound: ::core::option::Option, - /// "optional" keyword is stable in protoc 3.15 but prost is still on 3.14 (see and ) - /// this syntax is ugly but is binary compatible with the "optional" keyword (see ) - #[prost(oneof="window_frame::EndBound", tags="3")] - pub end_bound: ::core::option::Option, -} -/// Nested message and enum types in `WindowFrame`. -pub mod window_frame { - /// "optional" keyword is stable in protoc 3.15 but prost is still on 3.14 (see and ) - /// this syntax is ugly but is binary compatible with the "optional" keyword (see ) - #[derive(Clone, PartialEq, ::prost::Oneof)] - pub enum EndBound { - #[prost(message, tag="3")] - Bound(super::WindowFrameBound), - } -} -#[derive(Clone, PartialEq, ::prost::Message)] -pub struct WindowFrameBound { - #[prost(enumeration="WindowFrameBoundType", tag="1")] - pub window_frame_bound_type: i32, - /// "optional" keyword is stable in protoc 3.15 but prost is still on 3.14 (see and ) - /// this syntax is ugly but is binary compatible with the "optional" keyword (see ) - #[prost(oneof="window_frame_bound::BoundValue", tags="2")] - pub bound_value: ::core::option::Option, -} -/// Nested message and enum types in `WindowFrameBound`. -pub mod window_frame_bound { - /// "optional" keyword is stable in protoc 3.15 but prost is still on 3.14 (see and ) - /// this syntax is ugly but is binary compatible with the "optional" keyword (see ) - #[derive(Clone, PartialEq, ::prost::Oneof)] - pub enum BoundValue { - #[prost(uint64, tag="2")] - Value(u64), - } -} -// ///////////////////////////////////////////////////////////////////////////////////////////////// -// Arrow Data Types -// ///////////////////////////////////////////////////////////////////////////////////////////////// - -#[derive(Clone, PartialEq, ::prost::Message)] -pub struct Schema { - #[prost(message, repeated, tag="1")] - pub columns: ::prost::alloc::vec::Vec, -} -#[derive(Clone, PartialEq, ::prost::Message)] -pub struct Field { - /// name of the field - #[prost(string, tag="1")] - pub name: ::prost::alloc::string::String, - #[prost(message, optional, boxed, tag="2")] - pub arrow_type: ::core::option::Option<::prost::alloc::boxed::Box>, - #[prost(bool, tag="3")] - pub nullable: bool, - /// for complex data types like structs, unions - #[prost(message, repeated, tag="4")] - pub children: ::prost::alloc::vec::Vec, -} -#[derive(Clone, PartialEq, ::prost::Message)] -pub struct FixedSizeBinary { - #[prost(int32, tag="1")] - pub length: i32, -} -#[derive(Clone, PartialEq, ::prost::Message)] -pub struct Timestamp { - #[prost(enumeration="TimeUnit", tag="1")] - pub time_unit: i32, - #[prost(string, tag="2")] - pub timezone: ::prost::alloc::string::String, -} -#[derive(Clone, PartialEq, ::prost::Message)] -pub struct Decimal { - #[prost(uint64, tag="1")] - pub whole: u64, - #[prost(uint64, tag="2")] - pub fractional: u64, -} -#[derive(Clone, PartialEq, ::prost::Message)] -pub struct List { - #[prost(message, optional, boxed, tag="1")] - pub field_type: ::core::option::Option<::prost::alloc::boxed::Box>, -} -#[derive(Clone, PartialEq, ::prost::Message)] -pub struct FixedSizeList { - #[prost(message, optional, boxed, tag="1")] - pub field_type: ::core::option::Option<::prost::alloc::boxed::Box>, - #[prost(int32, tag="2")] - pub list_size: i32, -} -#[derive(Clone, PartialEq, ::prost::Message)] -pub struct Dictionary { - #[prost(message, optional, boxed, tag="1")] - pub key: ::core::option::Option<::prost::alloc::boxed::Box>, - #[prost(message, optional, boxed, tag="2")] - pub value: ::core::option::Option<::prost::alloc::boxed::Box>, -} -#[derive(Clone, PartialEq, ::prost::Message)] -pub struct Struct { - #[prost(message, repeated, tag="1")] - pub sub_field_types: ::prost::alloc::vec::Vec, -} -#[derive(Clone, PartialEq, ::prost::Message)] -pub struct Union { - #[prost(message, repeated, tag="1")] - pub union_types: ::prost::alloc::vec::Vec, - #[prost(enumeration="UnionMode", tag="2")] - pub union_mode: i32, - #[prost(int32, repeated, tag="3")] - pub type_ids: ::prost::alloc::vec::Vec, -} -#[derive(Clone, PartialEq, ::prost::Message)] -pub struct ScalarListValue { - #[prost(message, optional, tag="1")] - pub field: ::core::option::Option, - #[prost(message, repeated, tag="2")] - pub values: ::prost::alloc::vec::Vec, -} -#[derive(Clone, PartialEq, ::prost::Message)] -pub struct ScalarTimestampValue { - #[prost(string, tag="5")] - pub timezone: ::prost::alloc::string::String, - #[prost(oneof="scalar_timestamp_value::Value", tags="1, 2, 3, 4")] - pub value: ::core::option::Option, -} -/// Nested message and enum types in `ScalarTimestampValue`. -pub mod scalar_timestamp_value { - #[derive(Clone, PartialEq, ::prost::Oneof)] - pub enum Value { - #[prost(int64, tag="1")] - TimeMicrosecondValue(i64), - #[prost(int64, tag="2")] - TimeNanosecondValue(i64), - #[prost(int64, tag="3")] - TimeSecondValue(i64), - #[prost(int64, tag="4")] - TimeMillisecondValue(i64), - } -} -#[derive(Clone, PartialEq, ::prost::Message)] -pub struct ScalarTime32Value { - #[prost(oneof="scalar_time32_value::Value", tags="1, 2")] - pub value: ::core::option::Option, -} -/// Nested message and enum types in `ScalarTime32Value`. -pub mod scalar_time32_value { - #[derive(Clone, PartialEq, ::prost::Oneof)] - pub enum Value { - #[prost(int32, tag="1")] - Time32SecondValue(i32), - #[prost(int32, tag="2")] - Time32MillisecondValue(i32), - } -} -#[derive(Clone, PartialEq, ::prost::Message)] -pub struct ScalarTime64Value { - #[prost(oneof="scalar_time64_value::Value", tags="1, 2")] - pub value: ::core::option::Option, -} -/// Nested message and enum types in `ScalarTime64Value`. -pub mod scalar_time64_value { - #[derive(Clone, PartialEq, ::prost::Oneof)] - pub enum Value { - #[prost(int64, tag="1")] - Time64MicrosecondValue(i64), - #[prost(int64, tag="2")] - Time64NanosecondValue(i64), - } -} -#[derive(Clone, PartialEq, ::prost::Message)] -pub struct ScalarDictionaryValue { - #[prost(message, optional, tag="1")] - pub index_type: ::core::option::Option, - #[prost(message, optional, boxed, tag="2")] - pub value: ::core::option::Option<::prost::alloc::boxed::Box>, -} -#[derive(Clone, PartialEq, ::prost::Message)] -pub struct IntervalMonthDayNanoValue { - #[prost(int32, tag="1")] - pub months: i32, - #[prost(int32, tag="2")] - pub days: i32, - #[prost(int64, tag="3")] - pub nanos: i64, -} -#[derive(Clone, PartialEq, ::prost::Message)] -pub struct StructValue { - /// Note that a null struct value must have one or more fields, so we - /// encode a null StructValue as one witth an empty field_values - /// list. - #[prost(message, repeated, tag="2")] - pub field_values: ::prost::alloc::vec::Vec, - #[prost(message, repeated, tag="3")] - pub fields: ::prost::alloc::vec::Vec, -} -#[derive(Clone, PartialEq, ::prost::Message)] -pub struct ScalarValue { - #[prost(oneof="scalar_value::Value", tags="19, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 17, 18, 20, 21, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33")] - pub value: ::core::option::Option, -} -/// Nested message and enum types in `ScalarValue`. -pub mod scalar_value { - #[derive(Clone, PartialEq, ::prost::Oneof)] - pub enum Value { - /// Null value of any type (type is encoded) - #[prost(enumeration="super::PrimitiveScalarType", tag="19")] - NullValue(i32), - #[prost(bool, tag="1")] - BoolValue(bool), - #[prost(string, tag="2")] - Utf8Value(::prost::alloc::string::String), - #[prost(string, tag="3")] - LargeUtf8Value(::prost::alloc::string::String), - #[prost(int32, tag="4")] - Int8Value(i32), - #[prost(int32, tag="5")] - Int16Value(i32), - #[prost(int32, tag="6")] - Int32Value(i32), - #[prost(int64, tag="7")] - Int64Value(i64), - #[prost(uint32, tag="8")] - Uint8Value(u32), - #[prost(uint32, tag="9")] - Uint16Value(u32), - #[prost(uint32, tag="10")] - Uint32Value(u32), - #[prost(uint64, tag="11")] - Uint64Value(u64), - #[prost(float, tag="12")] - Float32Value(f32), - #[prost(double, tag="13")] - Float64Value(f64), - /// Literal Date32 value always has a unit of day - #[prost(int32, tag="14")] - Date32Value(i32), - #[prost(message, tag="17")] - ListValue(super::ScalarListValue), - #[prost(message, tag="18")] - NullListValue(super::ScalarType), - #[prost(message, tag="20")] - Decimal128Value(super::Decimal128), - #[prost(int64, tag="21")] - Date64Value(i64), - #[prost(int32, tag="24")] - IntervalYearmonthValue(i32), - #[prost(int64, tag="25")] - IntervalDaytimeValue(i64), - #[prost(message, tag="26")] - TimestampValue(super::ScalarTimestampValue), - #[prost(message, tag="27")] - DictionaryValue(::prost::alloc::boxed::Box), - #[prost(bytes, tag="28")] - BinaryValue(::prost::alloc::vec::Vec), - #[prost(bytes, tag="29")] - LargeBinaryValue(::prost::alloc::vec::Vec), - #[prost(message, tag="30")] - Time32Value(super::ScalarTime32Value), - #[prost(message, tag="31")] - Time64Value(super::ScalarTime64Value), - #[prost(message, tag="32")] - IntervalMonthDayNano(super::IntervalMonthDayNanoValue), - #[prost(message, tag="33")] - StructValue(super::StructValue), - } -} -#[derive(Clone, PartialEq, ::prost::Message)] -pub struct Decimal128 { - #[prost(bytes="vec", tag="1")] - pub value: ::prost::alloc::vec::Vec, - #[prost(int64, tag="2")] - pub p: i64, - #[prost(int64, tag="3")] - pub s: i64, -} -#[derive(Clone, PartialEq, ::prost::Message)] -pub struct ScalarType { - #[prost(oneof="scalar_type::Datatype", tags="1, 2")] - pub datatype: ::core::option::Option, -} -/// Nested message and enum types in `ScalarType`. -pub mod scalar_type { - #[derive(Clone, PartialEq, ::prost::Oneof)] - pub enum Datatype { - #[prost(enumeration="super::PrimitiveScalarType", tag="1")] - Scalar(i32), - #[prost(message, tag="2")] - List(super::ScalarListType), - } -} -#[derive(Clone, PartialEq, ::prost::Message)] -pub struct ScalarListType { - #[prost(string, repeated, tag="3")] - pub field_names: ::prost::alloc::vec::Vec<::prost::alloc::string::String>, - #[prost(enumeration="PrimitiveScalarType", tag="2")] - pub deepest_type: i32, -} -/// Broke out into multiple message types so that type -/// metadata did not need to be in separate message -/// All types that are of the empty message types contain no additional metadata -/// about the type -#[derive(Clone, PartialEq, ::prost::Message)] -pub struct ArrowType { - #[prost(oneof="arrow_type::ArrowTypeEnum", tags="1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 32, 15, 16, 31, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30")] - pub arrow_type_enum: ::core::option::Option, -} -/// Nested message and enum types in `ArrowType`. -pub mod arrow_type { - #[derive(Clone, PartialEq, ::prost::Oneof)] - pub enum ArrowTypeEnum { - /// arrow::Type::NA - #[prost(message, tag="1")] - None(super::EmptyMessage), - /// arrow::Type::BOOL - #[prost(message, tag="2")] - Bool(super::EmptyMessage), - /// arrow::Type::UINT8 - #[prost(message, tag="3")] - Uint8(super::EmptyMessage), - /// arrow::Type::INT8 - #[prost(message, tag="4")] - Int8(super::EmptyMessage), - /// represents arrow::Type fields in src/arrow/type.h - #[prost(message, tag="5")] - Uint16(super::EmptyMessage), - #[prost(message, tag="6")] - Int16(super::EmptyMessage), - #[prost(message, tag="7")] - Uint32(super::EmptyMessage), - #[prost(message, tag="8")] - Int32(super::EmptyMessage), - #[prost(message, tag="9")] - Uint64(super::EmptyMessage), - #[prost(message, tag="10")] - Int64(super::EmptyMessage), - #[prost(message, tag="11")] - Float16(super::EmptyMessage), - #[prost(message, tag="12")] - Float32(super::EmptyMessage), - #[prost(message, tag="13")] - Float64(super::EmptyMessage), - #[prost(message, tag="14")] - Utf8(super::EmptyMessage), - #[prost(message, tag="32")] - LargeUtf8(super::EmptyMessage), - #[prost(message, tag="15")] - Binary(super::EmptyMessage), - #[prost(int32, tag="16")] - FixedSizeBinary(i32), - #[prost(message, tag="31")] - LargeBinary(super::EmptyMessage), - #[prost(message, tag="17")] - Date32(super::EmptyMessage), - #[prost(message, tag="18")] - Date64(super::EmptyMessage), - #[prost(enumeration="super::TimeUnit", tag="19")] - Duration(i32), - #[prost(message, tag="20")] - Timestamp(super::Timestamp), - #[prost(enumeration="super::TimeUnit", tag="21")] - Time32(i32), - #[prost(enumeration="super::TimeUnit", tag="22")] - Time64(i32), - #[prost(enumeration="super::IntervalUnit", tag="23")] - Interval(i32), - #[prost(message, tag="24")] - Decimal(super::Decimal), - #[prost(message, tag="25")] - List(::prost::alloc::boxed::Box), - #[prost(message, tag="26")] - LargeList(::prost::alloc::boxed::Box), - #[prost(message, tag="27")] - FixedSizeList(::prost::alloc::boxed::Box), - #[prost(message, tag="28")] - Struct(super::Struct), - #[prost(message, tag="29")] - Union(super::Union), - #[prost(message, tag="30")] - Dictionary(::prost::alloc::boxed::Box), - } -} -/// Useful for representing an empty enum variant in rust -/// E.G. enum example{One, Two(i32)} -/// maps to -/// message example{ -/// oneof{ -/// EmptyMessage One = 1; -/// i32 Two = 2; -/// } -/// } -#[derive(Clone, PartialEq, ::prost::Message)] -pub struct EmptyMessage { -} -#[derive(Clone, PartialEq, ::prost::Message)] -pub struct OptimizedLogicalPlanType { - #[prost(string, tag="1")] - pub optimizer_name: ::prost::alloc::string::String, -} -#[derive(Clone, PartialEq, ::prost::Message)] -pub struct OptimizedPhysicalPlanType { - #[prost(string, tag="1")] - pub optimizer_name: ::prost::alloc::string::String, -} -#[derive(Clone, PartialEq, ::prost::Message)] -pub struct PlanType { - #[prost(oneof="plan_type::PlanTypeEnum", tags="1, 2, 3, 4, 5, 6")] - pub plan_type_enum: ::core::option::Option, -} -/// Nested message and enum types in `PlanType`. -pub mod plan_type { - #[derive(Clone, PartialEq, ::prost::Oneof)] - pub enum PlanTypeEnum { - #[prost(message, tag="1")] - InitialLogicalPlan(super::EmptyMessage), - #[prost(message, tag="2")] - OptimizedLogicalPlan(super::OptimizedLogicalPlanType), - #[prost(message, tag="3")] - FinalLogicalPlan(super::EmptyMessage), - #[prost(message, tag="4")] - InitialPhysicalPlan(super::EmptyMessage), - #[prost(message, tag="5")] - OptimizedPhysicalPlan(super::OptimizedPhysicalPlanType), - #[prost(message, tag="6")] - FinalPhysicalPlan(super::EmptyMessage), - } -} -#[derive(Clone, PartialEq, ::prost::Message)] -pub struct StringifiedPlan { - #[prost(message, optional, tag="1")] - pub plan_type: ::core::option::Option, - #[prost(string, tag="2")] - pub plan: ::prost::alloc::string::String, -} -#[derive(Clone, Copy, Debug, PartialEq, Eq, Hash, PartialOrd, Ord, ::prost::Enumeration)] -#[repr(i32)] -pub enum JoinType { - Inner = 0, - Left = 1, - Right = 2, - Full = 3, - Semi = 4, - Anti = 5, -} -impl JoinType { - /// String value of the enum field names used in the ProtoBuf definition. - /// - /// The values are not transformed in any way and thus are considered stable - /// (if the ProtoBuf definition does not change) and safe for programmatic use. - pub fn as_str_name(&self) -> &'static str { - match self { - JoinType::Inner => "INNER", - JoinType::Left => "LEFT", - JoinType::Right => "RIGHT", - JoinType::Full => "FULL", - JoinType::Semi => "SEMI", - JoinType::Anti => "ANTI", - } - } -} -#[derive(Clone, Copy, Debug, PartialEq, Eq, Hash, PartialOrd, Ord, ::prost::Enumeration)] -#[repr(i32)] -pub enum JoinConstraint { - On = 0, - Using = 1, -} -impl JoinConstraint { - /// String value of the enum field names used in the ProtoBuf definition. - /// - /// The values are not transformed in any way and thus are considered stable - /// (if the ProtoBuf definition does not change) and safe for programmatic use. - pub fn as_str_name(&self) -> &'static str { - match self { - JoinConstraint::On => "ON", - JoinConstraint::Using => "USING", - } - } -} -#[derive(Clone, Copy, Debug, PartialEq, Eq, Hash, PartialOrd, Ord, ::prost::Enumeration)] -#[repr(i32)] -pub enum ScalarFunction { - Abs = 0, - Acos = 1, - Asin = 2, - Atan = 3, - Ascii = 4, - Ceil = 5, - Cos = 6, - Digest = 7, - Exp = 8, - Floor = 9, - Ln = 10, - Log = 11, - Log10 = 12, - Log2 = 13, - Round = 14, - Signum = 15, - Sin = 16, - Sqrt = 17, - Tan = 18, - Trunc = 19, - Array = 20, - RegexpMatch = 21, - BitLength = 22, - Btrim = 23, - CharacterLength = 24, - Chr = 25, - Concat = 26, - ConcatWithSeparator = 27, - DatePart = 28, - DateTrunc = 29, - InitCap = 30, - Left = 31, - Lpad = 32, - Lower = 33, - Ltrim = 34, - Md5 = 35, - NullIf = 36, - OctetLength = 37, - Random = 38, - RegexpReplace = 39, - Repeat = 40, - Replace = 41, - Reverse = 42, - Right = 43, - Rpad = 44, - Rtrim = 45, - Sha224 = 46, - Sha256 = 47, - Sha384 = 48, - Sha512 = 49, - SplitPart = 50, - StartsWith = 51, - Strpos = 52, - Substr = 53, - ToHex = 54, - ToTimestamp = 55, - ToTimestampMillis = 56, - ToTimestampMicros = 57, - ToTimestampSeconds = 58, - Now = 59, - Translate = 60, - Trim = 61, - Upper = 62, - Coalesce = 63, - Power = 64, - StructFun = 65, - FromUnixtime = 66, - Atan2 = 67, - DateBin = 68, - ArrowTypeof = 69, -} -impl ScalarFunction { - /// String value of the enum field names used in the ProtoBuf definition. - /// - /// The values are not transformed in any way and thus are considered stable - /// (if the ProtoBuf definition does not change) and safe for programmatic use. - pub fn as_str_name(&self) -> &'static str { - match self { - ScalarFunction::Abs => "Abs", - ScalarFunction::Acos => "Acos", - ScalarFunction::Asin => "Asin", - ScalarFunction::Atan => "Atan", - ScalarFunction::Ascii => "Ascii", - ScalarFunction::Ceil => "Ceil", - ScalarFunction::Cos => "Cos", - ScalarFunction::Digest => "Digest", - ScalarFunction::Exp => "Exp", - ScalarFunction::Floor => "Floor", - ScalarFunction::Ln => "Ln", - ScalarFunction::Log => "Log", - ScalarFunction::Log10 => "Log10", - ScalarFunction::Log2 => "Log2", - ScalarFunction::Round => "Round", - ScalarFunction::Signum => "Signum", - ScalarFunction::Sin => "Sin", - ScalarFunction::Sqrt => "Sqrt", - ScalarFunction::Tan => "Tan", - ScalarFunction::Trunc => "Trunc", - ScalarFunction::Array => "Array", - ScalarFunction::RegexpMatch => "RegexpMatch", - ScalarFunction::BitLength => "BitLength", - ScalarFunction::Btrim => "Btrim", - ScalarFunction::CharacterLength => "CharacterLength", - ScalarFunction::Chr => "Chr", - ScalarFunction::Concat => "Concat", - ScalarFunction::ConcatWithSeparator => "ConcatWithSeparator", - ScalarFunction::DatePart => "DatePart", - ScalarFunction::DateTrunc => "DateTrunc", - ScalarFunction::InitCap => "InitCap", - ScalarFunction::Left => "Left", - ScalarFunction::Lpad => "Lpad", - ScalarFunction::Lower => "Lower", - ScalarFunction::Ltrim => "Ltrim", - ScalarFunction::Md5 => "MD5", - ScalarFunction::NullIf => "NullIf", - ScalarFunction::OctetLength => "OctetLength", - ScalarFunction::Random => "Random", - ScalarFunction::RegexpReplace => "RegexpReplace", - ScalarFunction::Repeat => "Repeat", - ScalarFunction::Replace => "Replace", - ScalarFunction::Reverse => "Reverse", - ScalarFunction::Right => "Right", - ScalarFunction::Rpad => "Rpad", - ScalarFunction::Rtrim => "Rtrim", - ScalarFunction::Sha224 => "SHA224", - ScalarFunction::Sha256 => "SHA256", - ScalarFunction::Sha384 => "SHA384", - ScalarFunction::Sha512 => "SHA512", - ScalarFunction::SplitPart => "SplitPart", - ScalarFunction::StartsWith => "StartsWith", - ScalarFunction::Strpos => "Strpos", - ScalarFunction::Substr => "Substr", - ScalarFunction::ToHex => "ToHex", - ScalarFunction::ToTimestamp => "ToTimestamp", - ScalarFunction::ToTimestampMillis => "ToTimestampMillis", - ScalarFunction::ToTimestampMicros => "ToTimestampMicros", - ScalarFunction::ToTimestampSeconds => "ToTimestampSeconds", - ScalarFunction::Now => "Now", - ScalarFunction::Translate => "Translate", - ScalarFunction::Trim => "Trim", - ScalarFunction::Upper => "Upper", - ScalarFunction::Coalesce => "Coalesce", - ScalarFunction::Power => "Power", - ScalarFunction::StructFun => "StructFun", - ScalarFunction::FromUnixtime => "FromUnixtime", - ScalarFunction::Atan2 => "Atan2", - ScalarFunction::DateBin => "DateBin", - ScalarFunction::ArrowTypeof => "ArrowTypeof", - } - } -} -#[derive(Clone, Copy, Debug, PartialEq, Eq, Hash, PartialOrd, Ord, ::prost::Enumeration)] -#[repr(i32)] -pub enum AggregateFunction { - Min = 0, - Max = 1, - Sum = 2, - Avg = 3, - Count = 4, - ApproxDistinct = 5, - ArrayAgg = 6, - Variance = 7, - VariancePop = 8, - Covariance = 9, - CovariancePop = 10, - Stddev = 11, - StddevPop = 12, - Correlation = 13, - ApproxPercentileCont = 14, - ApproxMedian = 15, - ApproxPercentileContWithWeight = 16, - Grouping = 17, - Median = 18, -} -impl AggregateFunction { - /// String value of the enum field names used in the ProtoBuf definition. - /// - /// The values are not transformed in any way and thus are considered stable - /// (if the ProtoBuf definition does not change) and safe for programmatic use. - pub fn as_str_name(&self) -> &'static str { - match self { - AggregateFunction::Min => "MIN", - AggregateFunction::Max => "MAX", - AggregateFunction::Sum => "SUM", - AggregateFunction::Avg => "AVG", - AggregateFunction::Count => "COUNT", - AggregateFunction::ApproxDistinct => "APPROX_DISTINCT", - AggregateFunction::ArrayAgg => "ARRAY_AGG", - AggregateFunction::Variance => "VARIANCE", - AggregateFunction::VariancePop => "VARIANCE_POP", - AggregateFunction::Covariance => "COVARIANCE", - AggregateFunction::CovariancePop => "COVARIANCE_POP", - AggregateFunction::Stddev => "STDDEV", - AggregateFunction::StddevPop => "STDDEV_POP", - AggregateFunction::Correlation => "CORRELATION", - AggregateFunction::ApproxPercentileCont => "APPROX_PERCENTILE_CONT", - AggregateFunction::ApproxMedian => "APPROX_MEDIAN", - AggregateFunction::ApproxPercentileContWithWeight => "APPROX_PERCENTILE_CONT_WITH_WEIGHT", - AggregateFunction::Grouping => "GROUPING", - AggregateFunction::Median => "MEDIAN", - } - } -} -#[derive(Clone, Copy, Debug, PartialEq, Eq, Hash, PartialOrd, Ord, ::prost::Enumeration)] -#[repr(i32)] -pub enum BuiltInWindowFunction { - RowNumber = 0, - Rank = 1, - DenseRank = 2, - PercentRank = 3, - CumeDist = 4, - Ntile = 5, - Lag = 6, - Lead = 7, - FirstValue = 8, - LastValue = 9, - NthValue = 10, -} -impl BuiltInWindowFunction { - /// String value of the enum field names used in the ProtoBuf definition. - /// - /// The values are not transformed in any way and thus are considered stable - /// (if the ProtoBuf definition does not change) and safe for programmatic use. - pub fn as_str_name(&self) -> &'static str { - match self { - BuiltInWindowFunction::RowNumber => "ROW_NUMBER", - BuiltInWindowFunction::Rank => "RANK", - BuiltInWindowFunction::DenseRank => "DENSE_RANK", - BuiltInWindowFunction::PercentRank => "PERCENT_RANK", - BuiltInWindowFunction::CumeDist => "CUME_DIST", - BuiltInWindowFunction::Ntile => "NTILE", - BuiltInWindowFunction::Lag => "LAG", - BuiltInWindowFunction::Lead => "LEAD", - BuiltInWindowFunction::FirstValue => "FIRST_VALUE", - BuiltInWindowFunction::LastValue => "LAST_VALUE", - BuiltInWindowFunction::NthValue => "NTH_VALUE", - } - } -} -#[derive(Clone, Copy, Debug, PartialEq, Eq, Hash, PartialOrd, Ord, ::prost::Enumeration)] -#[repr(i32)] -pub enum WindowFrameUnits { - Rows = 0, - Range = 1, - Groups = 2, -} -impl WindowFrameUnits { - /// String value of the enum field names used in the ProtoBuf definition. - /// - /// The values are not transformed in any way and thus are considered stable - /// (if the ProtoBuf definition does not change) and safe for programmatic use. - pub fn as_str_name(&self) -> &'static str { - match self { - WindowFrameUnits::Rows => "ROWS", - WindowFrameUnits::Range => "RANGE", - WindowFrameUnits::Groups => "GROUPS", - } - } -} -#[derive(Clone, Copy, Debug, PartialEq, Eq, Hash, PartialOrd, Ord, ::prost::Enumeration)] -#[repr(i32)] -pub enum WindowFrameBoundType { - CurrentRow = 0, - Preceding = 1, - Following = 2, -} -impl WindowFrameBoundType { - /// String value of the enum field names used in the ProtoBuf definition. - /// - /// The values are not transformed in any way and thus are considered stable - /// (if the ProtoBuf definition does not change) and safe for programmatic use. - pub fn as_str_name(&self) -> &'static str { - match self { - WindowFrameBoundType::CurrentRow => "CURRENT_ROW", - WindowFrameBoundType::Preceding => "PRECEDING", - WindowFrameBoundType::Following => "FOLLOWING", - } - } -} -#[derive(Clone, Copy, Debug, PartialEq, Eq, Hash, PartialOrd, Ord, ::prost::Enumeration)] -#[repr(i32)] -pub enum DateUnit { - Day = 0, - DateMillisecond = 1, -} -impl DateUnit { - /// String value of the enum field names used in the ProtoBuf definition. - /// - /// The values are not transformed in any way and thus are considered stable - /// (if the ProtoBuf definition does not change) and safe for programmatic use. - pub fn as_str_name(&self) -> &'static str { - match self { - DateUnit::Day => "Day", - DateUnit::DateMillisecond => "DateMillisecond", - } - } -} -#[derive(Clone, Copy, Debug, PartialEq, Eq, Hash, PartialOrd, Ord, ::prost::Enumeration)] -#[repr(i32)] -pub enum TimeUnit { - Second = 0, - Millisecond = 1, - Microsecond = 2, - Nanosecond = 3, -} -impl TimeUnit { - /// String value of the enum field names used in the ProtoBuf definition. - /// - /// The values are not transformed in any way and thus are considered stable - /// (if the ProtoBuf definition does not change) and safe for programmatic use. - pub fn as_str_name(&self) -> &'static str { - match self { - TimeUnit::Second => "Second", - TimeUnit::Millisecond => "Millisecond", - TimeUnit::Microsecond => "Microsecond", - TimeUnit::Nanosecond => "Nanosecond", - } - } -} -#[derive(Clone, Copy, Debug, PartialEq, Eq, Hash, PartialOrd, Ord, ::prost::Enumeration)] -#[repr(i32)] -pub enum IntervalUnit { - YearMonth = 0, - DayTime = 1, - MonthDayNano = 2, -} -impl IntervalUnit { - /// String value of the enum field names used in the ProtoBuf definition. - /// - /// The values are not transformed in any way and thus are considered stable - /// (if the ProtoBuf definition does not change) and safe for programmatic use. - pub fn as_str_name(&self) -> &'static str { - match self { - IntervalUnit::YearMonth => "YearMonth", - IntervalUnit::DayTime => "DayTime", - IntervalUnit::MonthDayNano => "MonthDayNano", - } - } -} -#[derive(Clone, Copy, Debug, PartialEq, Eq, Hash, PartialOrd, Ord, ::prost::Enumeration)] -#[repr(i32)] -pub enum UnionMode { - Sparse = 0, - Dense = 1, -} -impl UnionMode { - /// String value of the enum field names used in the ProtoBuf definition. - /// - /// The values are not transformed in any way and thus are considered stable - /// (if the ProtoBuf definition does not change) and safe for programmatic use. - pub fn as_str_name(&self) -> &'static str { - match self { - UnionMode::Sparse => "sparse", - UnionMode::Dense => "dense", - } - } -} -/// Contains all valid datafusion scalar type except for -/// List -#[derive(Clone, Copy, Debug, PartialEq, Eq, Hash, PartialOrd, Ord, ::prost::Enumeration)] -#[repr(i32)] -pub enum PrimitiveScalarType { - /// arrow::Type::BOOL - Bool = 0, - /// arrow::Type::UINT8 - Uint8 = 1, - /// arrow::Type::INT8 - Int8 = 2, - /// represents arrow::Type fields in src/arrow/type.h - Uint16 = 3, - Int16 = 4, - Uint32 = 5, - Int32 = 6, - Uint64 = 7, - Int64 = 8, - Float32 = 9, - Float64 = 10, - Utf8 = 11, - LargeUtf8 = 12, - Date32 = 13, - TimestampMicrosecond = 14, - TimestampNanosecond = 15, - Null = 16, - Decimal128 = 17, - Date64 = 20, - TimestampSecond = 21, - TimestampMillisecond = 22, - IntervalYearmonth = 23, - IntervalDaytime = 24, - IntervalMonthdaynano = 31, - Binary = 25, - LargeBinary = 26, - Time32Second = 27, - Time32Millisecond = 28, - Time64Microsecond = 29, - Time64Nanosecond = 30, -} - -impl PrimitiveScalarType { - /// String value of the enum field names used in the ProtoBuf definition. - /// - /// The values are not transformed in any way and thus are considered stable - /// (if the ProtoBuf definition does not change) and safe for programmatic use. - pub fn as_str_name(&self) -> &'static str { - match self { - PrimitiveScalarType::Bool => "BOOL", - PrimitiveScalarType::Uint8 => "UINT8", - PrimitiveScalarType::Int8 => "INT8", - PrimitiveScalarType::Uint16 => "UINT16", - PrimitiveScalarType::Int16 => "INT16", - PrimitiveScalarType::Uint32 => "UINT32", - PrimitiveScalarType::Int32 => "INT32", - PrimitiveScalarType::Uint64 => "UINT64", - PrimitiveScalarType::Int64 => "INT64", - PrimitiveScalarType::Float32 => "FLOAT32", - PrimitiveScalarType::Float64 => "FLOAT64", - PrimitiveScalarType::Utf8 => "UTF8", - PrimitiveScalarType::LargeUtf8 => "LARGE_UTF8", - PrimitiveScalarType::Date32 => "DATE32", - PrimitiveScalarType::TimestampMicrosecond => "TIMESTAMP_MICROSECOND", - PrimitiveScalarType::TimestampNanosecond => "TIMESTAMP_NANOSECOND", - PrimitiveScalarType::Null => "NULL", - PrimitiveScalarType::Decimal128 => "DECIMAL128", - PrimitiveScalarType::Date64 => "DATE64", - PrimitiveScalarType::TimestampSecond => "TIMESTAMP_SECOND", - PrimitiveScalarType::TimestampMillisecond => "TIMESTAMP_MILLISECOND", - PrimitiveScalarType::IntervalYearmonth => "INTERVAL_YEARMONTH", - PrimitiveScalarType::IntervalDaytime => "INTERVAL_DAYTIME", - PrimitiveScalarType::IntervalMonthdaynano => "INTERVAL_MONTHDAYNANO", - PrimitiveScalarType::Binary => "BINARY", - PrimitiveScalarType::LargeBinary => "LARGE_BINARY", - PrimitiveScalarType::Time32Second => "TIME32_SECOND", - PrimitiveScalarType::Time32Millisecond => "TIME32_MILLISECOND", - PrimitiveScalarType::Time64Microsecond => "TIME64MICROSECOND", - PrimitiveScalarType::Time64Nanosecond => "TIME64NANOSECOND", - } - } -} From 0397fe645f5df64147d278afe9a5f1a090063f6c Mon Sep 17 00:00:00 2001 From: Andrew Lamb Date: Sun, 20 Nov 2022 14:42:38 -0500 Subject: [PATCH 13/13] Clippy --- datafusion/expr/src/type_coercion/binary.rs | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/datafusion/expr/src/type_coercion/binary.rs b/datafusion/expr/src/type_coercion/binary.rs index aa2b33aa0d46f..70e09f2fbdf2b 100644 --- a/datafusion/expr/src/type_coercion/binary.rs +++ b/datafusion/expr/src/type_coercion/binary.rs @@ -523,13 +523,13 @@ fn like_coercion(lhs_type: &DataType, rhs_type: &DataType) -> Option { /// as Time32 can only be used to Second and Millisecond accuracy, while Time64 /// is exclusively used to Microsecond and Nanosecond accuracy fn is_time_with_valid_unit(datatype: DataType) -> bool { - match datatype { + matches!( + datatype, DataType::Time32(TimeUnit::Second) - | DataType::Time32(TimeUnit::Millisecond) - | DataType::Time64(TimeUnit::Microsecond) - | DataType::Time64(TimeUnit::Nanosecond) => true, - _ => false, - } + | DataType::Time32(TimeUnit::Millisecond) + | DataType::Time64(TimeUnit::Microsecond) + | DataType::Time64(TimeUnit::Nanosecond) + ) } /// Coercion rules for Temporal columns: the type that both lhs and rhs can be