From 16551d92b02c33a43d97679c1308fa762de115f0 Mon Sep 17 00:00:00 2001 From: Matt Date: Fri, 6 Jan 2023 16:37:34 -0800 Subject: [PATCH 1/5] delete tests duplicated between sqllogictests and aggregates / arrow_typeof --- datafusion/core/tests/sql/aggregates.rs | 90 ------------ datafusion/core/tests/sql/arrow_typeof.rs | 139 ------------------ .../sqllogictests/test_files/aggregate.slt | 36 +++-- 3 files changed, 17 insertions(+), 248 deletions(-) delete mode 100644 datafusion/core/tests/sql/arrow_typeof.rs diff --git a/datafusion/core/tests/sql/aggregates.rs b/datafusion/core/tests/sql/aggregates.rs index 89077ae192d81..9af1d93012ae8 100644 --- a/datafusion/core/tests/sql/aggregates.rs +++ b/datafusion/core/tests/sql/aggregates.rs @@ -20,42 +20,6 @@ use datafusion::scalar::ScalarValue; use datafusion::test_util::scan_empty; use datafusion_common::cast::as_float64_array; -#[tokio::test] -async fn csv_query_avg_multi_batch() -> Result<()> { - let ctx = SessionContext::new(); - register_aggregate_csv(&ctx).await?; - let sql = "SELECT avg(c12) FROM aggregate_test_100"; - let dataframe = ctx.sql(sql).await.unwrap(); - let results = dataframe.collect().await.unwrap(); - let batch = &results[0]; - let column = batch.column(0); - let array = as_float64_array(column)?; - let actual = array.value(0); - let expected = 0.5089725; - // Due to float number's accuracy, different batch size will lead to different - // answers. - assert!((expected - actual).abs() < 0.01); - Ok(()) -} - -#[tokio::test] -#[ignore] // https://github.com/apache/arrow-datafusion/issues/3353 -async fn csv_query_approx_count() -> Result<()> { - let ctx = SessionContext::new(); - register_aggregate_csv(&ctx).await?; - let sql = "SELECT approx_distinct(c9) count_c9, approx_distinct(cast(c9 as varchar)) count_c9_str FROM aggregate_test_100"; - let actual = execute_to_batches(&ctx, sql).await; - let expected = vec![ - "+----------+--------------+", - "| count_c9 | count_c9_str |", - "+----------+--------------+", - "| 100 | 99 |", - "+----------+--------------+", - ]; - assert_batches_eq!(expected, &actual); - Ok(()) -} - #[tokio::test] async fn csv_query_approx_percentile_cont_with_weight() -> Result<()> { let ctx = SessionContext::new(); @@ -120,60 +84,6 @@ async fn csv_query_approx_percentile_cont_with_histogram_bins() -> Result<()> { Ok(()) } -#[tokio::test] -async fn csv_query_array_agg() -> Result<()> { - let ctx = SessionContext::new(); - register_aggregate_csv(&ctx).await?; - let sql = - "SELECT array_agg(c13) FROM (SELECT * FROM aggregate_test_100 ORDER BY c13 LIMIT 2) test"; - let actual = execute_to_batches(&ctx, sql).await; - let expected = vec![ - "+------------------------------------------------------------------+", - "| ARRAYAGG(test.c13) |", - "+------------------------------------------------------------------+", - "| [0VVIHzxWtNOFLtnhjHEKjXaJOSLJfm, 0keZ5G8BffGwgF2RwQD59TFzMStxCB] |", - "+------------------------------------------------------------------+", - ]; - assert_batches_eq!(expected, &actual); - Ok(()) -} - -#[tokio::test] -async fn csv_query_array_agg_empty() -> Result<()> { - let ctx = SessionContext::new(); - register_aggregate_csv(&ctx).await?; - let sql = - "SELECT array_agg(c13) FROM (SELECT * FROM aggregate_test_100 LIMIT 0) test"; - let actual = execute_to_batches(&ctx, sql).await; - let expected = vec![ - "+--------------------+", - "| ARRAYAGG(test.c13) |", - "+--------------------+", - "| [] |", - "+--------------------+", - ]; - assert_batches_eq!(expected, &actual); - Ok(()) -} - -#[tokio::test] -async fn csv_query_array_agg_one() -> Result<()> { - let ctx = SessionContext::new(); - register_aggregate_csv(&ctx).await?; - let sql = - "SELECT array_agg(c13) FROM (SELECT * FROM aggregate_test_100 ORDER BY c13 LIMIT 1) test"; - let actual = execute_to_batches(&ctx, sql).await; - let expected = vec![ - "+----------------------------------+", - "| ARRAYAGG(test.c13) |", - "+----------------------------------+", - "| [0VVIHzxWtNOFLtnhjHEKjXaJOSLJfm] |", - "+----------------------------------+", - ]; - assert_batches_eq!(expected, &actual); - Ok(()) -} - #[tokio::test] async fn csv_query_array_agg_unsupported() -> Result<()> { let ctx = SessionContext::new(); diff --git a/datafusion/core/tests/sql/arrow_typeof.rs b/datafusion/core/tests/sql/arrow_typeof.rs deleted file mode 100644 index 4477ad53c12ba..0000000000000 --- a/datafusion/core/tests/sql/arrow_typeof.rs +++ /dev/null @@ -1,139 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, -// software distributed under the License is distributed on an -// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -// KIND, either express or implied. See the License for the -// specific language governing permissions and limitations -// under the License. - -use super::*; - -#[tokio::test] -async fn arrow_typeof_null() -> Result<()> { - let ctx = SessionContext::new(); - let sql = "SELECT arrow_typeof(null)"; - let actual = execute(&ctx, sql).await; - let expected = "Null"; - assert_eq!(expected, &actual[0][0]); - - Ok(()) -} - -#[tokio::test] -async fn arrow_typeof_boolean() -> Result<()> { - let ctx = SessionContext::new(); - let sql = "SELECT arrow_typeof(true)"; - let actual = execute(&ctx, sql).await; - let expected = "Boolean"; - assert_eq!(expected, &actual[0][0]); - - Ok(()) -} - -#[tokio::test] -async fn arrow_typeof_i64() -> Result<()> { - let ctx = SessionContext::new(); - let sql = "SELECT arrow_typeof(1)"; - let actual = execute(&ctx, sql).await; - let expected = "Int64"; - assert_eq!(expected, &actual[0][0]); - - Ok(()) -} - -#[tokio::test] -async fn arrow_typeof_i32() -> Result<()> { - let ctx = SessionContext::new(); - let sql = "SELECT arrow_typeof(1::int)"; - let actual = execute(&ctx, sql).await; - let expected = "Int32"; - assert_eq!(expected, &actual[0][0]); - - Ok(()) -} - -#[tokio::test] -async fn arrow_typeof_f64() -> Result<()> { - let ctx = SessionContext::new(); - let sql = "SELECT arrow_typeof(1.0)"; - let actual = execute(&ctx, sql).await; - let expected = "Float64"; - assert_eq!(expected, &actual[0][0]); - - Ok(()) -} - -#[tokio::test] -async fn arrow_typeof_f32() -> Result<()> { - let ctx = SessionContext::new(); - let sql = "SELECT arrow_typeof(1.0::float)"; - let actual = execute(&ctx, sql).await; - let expected = "Float32"; - assert_eq!(expected, &actual[0][0]); - - Ok(()) -} - -#[tokio::test] -async fn arrow_typeof_decimal() -> Result<()> { - let ctx = SessionContext::new(); - let sql = "SELECT arrow_typeof(1::Decimal)"; - let actual = execute(&ctx, sql).await; - let expected = "Decimal128(38, 10)"; - assert_eq!(expected, &actual[0][0]); - - Ok(()) -} - -#[tokio::test] -async fn arrow_typeof_timestamp() -> Result<()> { - let ctx = SessionContext::new(); - let sql = "SELECT arrow_typeof(now()::timestamp)"; - let actual = execute(&ctx, sql).await; - let expected = "Timestamp(Nanosecond, None)"; - assert_eq!(expected, &actual[0][0]); - - Ok(()) -} - -#[tokio::test] -async fn arrow_typeof_timestamp_utc() -> Result<()> { - let ctx = SessionContext::new(); - let sql = "SELECT arrow_typeof(now())"; - let actual = execute(&ctx, sql).await; - let expected = "Timestamp(Nanosecond, Some(\"+00:00\"))"; - assert_eq!(expected, &actual[0][0]); - - Ok(()) -} - -#[tokio::test] -async fn arrow_typeof_timestamp_date32() -> Result<()> { - let ctx = SessionContext::new(); - let sql = "SELECT arrow_typeof(now()::date)"; - let actual = execute(&ctx, sql).await; - let expected = "Date32"; - assert_eq!(expected, &actual[0][0]); - - Ok(()) -} - -#[tokio::test] -async fn arrow_typeof_utf8() -> Result<()> { - let ctx = SessionContext::new(); - let sql = "SELECT arrow_typeof('1')"; - let actual = execute(&ctx, sql).await; - let expected = "Utf8"; - assert_eq!(expected, &actual[0][0]); - - Ok(()) -} diff --git a/datafusion/core/tests/sqllogictests/test_files/aggregate.slt b/datafusion/core/tests/sqllogictests/test_files/aggregate.slt index 7a1b012b84108..f3a312e352ffb 100644 --- a/datafusion/core/tests/sqllogictests/test_files/aggregate.slt +++ b/datafusion/core/tests/sqllogictests/test_files/aggregate.slt @@ -846,26 +846,23 @@ SELECT count(1 + 1) ---- 1 -# FIX: "CSV Writer does not support List(Field { name: \"item\", data_type: Utf8, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: None }) data type") # csv_query_array_agg -# query T -# SELECT array_agg(c13) FROM (SELECT * FROM aggregate_test_100 ORDER BY c13 LIMIT 2) test -# ---- -# [0VVIHzxWtNOFLtnhjHEKjXaJOSLJfm0keZ5G8BffGwgF2RwQD59TFzMStxCB] +query T +SELECT array_agg(c13) FROM (SELECT * FROM aggregate_test_100 ORDER BY c13 LIMIT 2) test +---- +[0VVIHzxWtNOFLtnhjHEKjXaJOSLJfm, 0keZ5G8BffGwgF2RwQD59TFzMStxCB] -# FIX: see above # csv_query_array_agg_empty -# query I -# SELECT array_agg(c13) FROM (SELECT * FROM aggregate_test_100 LIMIT 0) test -# ---- -# [] +query I +SELECT array_agg(c13) FROM (SELECT * FROM aggregate_test_100 LIMIT 0) test +---- +[] -# FIX: see above # csv_query_array_agg_one -# query I -# SELECT array_agg(c13) FROM (SELECT * FROM aggregate_test_100 ORDER BY c13 LIMIT 1) test -# ---- -# [0VVIHzxWtNOFLtnhjHEKjXaJOSLJfm] +query I +SELECT array_agg(c13) FROM (SELECT * FROM aggregate_test_100 ORDER BY c13 LIMIT 1) test +---- +[0VVIHzxWtNOFLtnhjHEKjXaJOSLJfm] # csv_query_array_agg_with_overflow query IIRIII @@ -976,6 +973,7 @@ select max(c1) from d_table ---- 110.009 +# FIX: doesn't check datatype # aggregate_decimal_sum query R select sum(c1) from d_table @@ -991,10 +989,10 @@ select avg(c1) from d_table # FIX: different test table # aggregate -# query I -# SELECT SUM(c1), SUM(c2) FROM test -# ---- -# 60 220 +query I +SELECT SUM(c1), SUM(c2) FROM test +---- +60 220 # TODO: aggregate_empty From 29e393d3a6ea4dde6b9d3f5e52b20ea7799b56e5 Mon Sep 17 00:00:00 2001 From: Matt Date: Fri, 6 Jan 2023 16:50:22 -0800 Subject: [PATCH 2/5] recomment failing test --- .../core/tests/sqllogictests/test_files/aggregate.slt | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/datafusion/core/tests/sqllogictests/test_files/aggregate.slt b/datafusion/core/tests/sqllogictests/test_files/aggregate.slt index f3a312e352ffb..4d78c69f7062b 100644 --- a/datafusion/core/tests/sqllogictests/test_files/aggregate.slt +++ b/datafusion/core/tests/sqllogictests/test_files/aggregate.slt @@ -93,7 +93,7 @@ SELECT stddev_pop(c12) FROM aggregate_test_100 # csv_query_stddev_4 query R SELECT stddev(c12) FROM aggregate_test_100 ----- +----q 0.3054095399405338 # csv_query_stddev_5 @@ -989,10 +989,10 @@ select avg(c1) from d_table # FIX: different test table # aggregate -query I -SELECT SUM(c1), SUM(c2) FROM test ----- -60 220 +# query I +# SELECT SUM(c1), SUM(c2) FROM test +# ---- +# 60 220 # TODO: aggregate_empty From df4bb0a1060677c9b955cf50b08de34622a8bd20 Mon Sep 17 00:00:00 2001 From: Matt Date: Fri, 6 Jan 2023 16:51:22 -0800 Subject: [PATCH 3/5] remove typo --- datafusion/core/tests/sqllogictests/test_files/aggregate.slt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/datafusion/core/tests/sqllogictests/test_files/aggregate.slt b/datafusion/core/tests/sqllogictests/test_files/aggregate.slt index 4d78c69f7062b..481ff45c8789e 100644 --- a/datafusion/core/tests/sqllogictests/test_files/aggregate.slt +++ b/datafusion/core/tests/sqllogictests/test_files/aggregate.slt @@ -93,7 +93,7 @@ SELECT stddev_pop(c12) FROM aggregate_test_100 # csv_query_stddev_4 query R SELECT stddev(c12) FROM aggregate_test_100 -----q +---- 0.3054095399405338 # csv_query_stddev_5 From 47aaec526d7d548205b8b0758d621925cde39fa3 Mon Sep 17 00:00:00 2001 From: Matt Date: Sat, 7 Jan 2023 15:31:28 -0800 Subject: [PATCH 4/5] add back in test that's broken in sqllogictest --- datafusion/core/tests/sql/aggregates.rs | 18 ++++++++++++++++++ 1 file changed, 18 insertions(+) diff --git a/datafusion/core/tests/sql/aggregates.rs b/datafusion/core/tests/sql/aggregates.rs index 9af1d93012ae8..9911df9c7ff85 100644 --- a/datafusion/core/tests/sql/aggregates.rs +++ b/datafusion/core/tests/sql/aggregates.rs @@ -20,6 +20,24 @@ use datafusion::scalar::ScalarValue; use datafusion::test_util::scan_empty; use datafusion_common::cast::as_float64_array; +#[tokio::test] +#[ignore] // https://github.com/apache/arrow-datafusion/issues/3353 +async fn csv_query_approx_count() -> Result<()> { + let ctx = SessionContext::new(); + register_aggregate_csv(&ctx).await?; + let sql = "SELECT approx_distinct(c9) count_c9, approx_distinct(cast(c9 as varchar)) count_c9_str FROM aggregate_test_100"; + let actual = execute_to_batches(&ctx, sql).await; + let expected = vec![ + "+----------+--------------+", + "| count_c9 | count_c9_str |", + "+----------+--------------+", + "| 100 | 99 |", + "+----------+--------------+", + ]; + assert_batches_eq!(expected, &actual); + Ok(()) +} + #[tokio::test] async fn csv_query_approx_percentile_cont_with_weight() -> Result<()> { let ctx = SessionContext::new(); From de93ebf63b2812b36bb46e3eda36c4a85cbda583 Mon Sep 17 00:00:00 2001 From: Matt Date: Sun, 8 Jan 2023 08:57:54 -0800 Subject: [PATCH 5/5] remove arrow module --- datafusion/core/tests/sql/mod.rs | 1 - 1 file changed, 1 deletion(-) diff --git a/datafusion/core/tests/sql/mod.rs b/datafusion/core/tests/sql/mod.rs index 445950e1d085e..365a2a21cf1f8 100644 --- a/datafusion/core/tests/sql/mod.rs +++ b/datafusion/core/tests/sql/mod.rs @@ -104,7 +104,6 @@ pub mod union; pub mod wildcard; pub mod window; -pub mod arrow_typeof; pub mod decimal; pub mod explain; pub mod idenfifers;