From 55782c8b3b93d25aeca61b3dee19d0fde1e8cd3b Mon Sep 17 00:00:00 2001 From: jiangzhx Date: Wed, 14 Jun 2023 11:15:48 +0800 Subject: [PATCH 1/4] port register_aggregate_test_100 to sql --- .../core/tests/sqllogictests/src/setup.rs | 13 ----------- .../sqllogictests/test_files/aggregate.slt | 22 +++++++++++++++++++ 2 files changed, 22 insertions(+), 13 deletions(-) diff --git a/datafusion/core/tests/sqllogictests/src/setup.rs b/datafusion/core/tests/sqllogictests/src/setup.rs index 26f9d2501a916..2316e488a9bef 100644 --- a/datafusion/core/tests/sqllogictests/src/setup.rs +++ b/datafusion/core/tests/sqllogictests/src/setup.rs @@ -67,7 +67,6 @@ pub async fn register_avro_tables(ctx: &mut crate::TestContext) { } pub async fn register_aggregate_tables(ctx: &SessionContext) { - register_aggregate_test_100(ctx).await; register_decimal_table(ctx); register_median_test_tables(ctx); register_test_data(ctx); @@ -182,18 +181,6 @@ fn register_decimal_table(ctx: &SessionContext) { ctx.register_table("d_table", provider).unwrap(); } -async fn register_aggregate_test_100(ctx: &SessionContext) { - let test_data = datafusion::test_util::arrow_test_data(); - let schema = test_util::aggr_test_schema(); - ctx.register_csv( - "aggregate_test_100", - &format!("{test_data}/csv/aggregate_test_100.csv"), - CsvReadOptions::new().schema(&schema), - ) - .await - .unwrap(); -} - pub async fn register_scalar_tables(ctx: &SessionContext) { register_nan_table(ctx) } diff --git a/datafusion/core/tests/sqllogictests/test_files/aggregate.slt b/datafusion/core/tests/sqllogictests/test_files/aggregate.slt index ca1f8035f6e0b..fe16c8e8860cb 100644 --- a/datafusion/core/tests/sqllogictests/test_files/aggregate.slt +++ b/datafusion/core/tests/sqllogictests/test_files/aggregate.slt @@ -15,6 +15,28 @@ # specific language governing permissions and limitations # under the License. +####### +# Setup test data table +####### +statement ok +CREATE EXTERNAL TABLE aggregate_test_100 ( + c1 VARCHAR NOT NULL, + c2 TINYINT NOT NULL, + c3 SMALLINT NOT NULL, + c4 SMALLINT, + c5 INT, + c6 BIGINT NOT NULL, + c7 SMALLINT NOT NULL, + c8 INT NOT NULL, + c9 INT UNSIGNED NOT NULL, + c10 BIGINT UNSIGNED NOT NULL, + c11 FLOAT NOT NULL, + c12 DOUBLE NOT NULL, + c13 VARCHAR NOT NULL +) +STORED AS CSV +WITH HEADER ROW +LOCATION '../../testing/data/csv/aggregate_test_100.csv' ####### # Error tests From ca07cc64d6841b2c867591435da0e853024d53a3 Mon Sep 17 00:00:00 2001 From: jiangzhx Date: Wed, 14 Jun 2023 11:47:42 +0800 Subject: [PATCH 2/4] port register_decimal_table to sql --- .../core/tests/sqllogictests/src/main.rs | 1 - .../core/tests/sqllogictests/src/setup.rs | 13 +----- .../core/tests/sqllogictests/src/utils.rs | 40 ------------------- .../sqllogictests/test_files/aggregate.slt | 5 +++ 4 files changed, 6 insertions(+), 53 deletions(-) delete mode 100644 datafusion/core/tests/sqllogictests/src/utils.rs diff --git a/datafusion/core/tests/sqllogictests/src/main.rs b/datafusion/core/tests/sqllogictests/src/main.rs index adb3be9a6e8f0..18ee15cc8753f 100644 --- a/datafusion/core/tests/sqllogictests/src/main.rs +++ b/datafusion/core/tests/sqllogictests/src/main.rs @@ -33,7 +33,6 @@ use crate::engines::postgres::Postgres; mod engines; mod setup; -mod utils; const TEST_DIRECTORY: &str = "tests/sqllogictests/test_files/"; const PG_COMPAT_FILE_PREFIX: &str = "pg_compat_"; diff --git a/datafusion/core/tests/sqllogictests/src/setup.rs b/datafusion/core/tests/sqllogictests/src/setup.rs index 2316e488a9bef..5e6a1d1141ae7 100644 --- a/datafusion/core/tests/sqllogictests/src/setup.rs +++ b/datafusion/core/tests/sqllogictests/src/setup.rs @@ -24,15 +24,13 @@ use datafusion::{ datatypes::{DataType, Field, Schema}, record_batch::RecordBatch, }, - datasource::MemTable, prelude::{CsvReadOptions, SessionContext}, - test_util, }; use std::fs::File; use std::io::Write; use std::sync::Arc; -use crate::{utils, TestContext}; +use crate::TestContext; #[cfg(feature = "avro")] pub async fn register_avro_tables(ctx: &mut crate::TestContext) { @@ -67,7 +65,6 @@ pub async fn register_avro_tables(ctx: &mut crate::TestContext) { } pub async fn register_aggregate_tables(ctx: &SessionContext) { - register_decimal_table(ctx); register_median_test_tables(ctx); register_test_data(ctx); } @@ -173,14 +170,6 @@ fn register_test_data(ctx: &SessionContext) { ctx.register_batch("test", data).unwrap(); } -fn register_decimal_table(ctx: &SessionContext) { - let batch_decimal = utils::make_decimal(); - let schema = batch_decimal.schema(); - let partitions = vec![vec![batch_decimal]]; - let provider = Arc::new(MemTable::try_new(schema, partitions).unwrap()); - ctx.register_table("d_table", provider).unwrap(); -} - pub async fn register_scalar_tables(ctx: &SessionContext) { register_nan_table(ctx) } diff --git a/datafusion/core/tests/sqllogictests/src/utils.rs b/datafusion/core/tests/sqllogictests/src/utils.rs deleted file mode 100644 index 4d064a76e2ad2..0000000000000 --- a/datafusion/core/tests/sqllogictests/src/utils.rs +++ /dev/null @@ -1,40 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, -// software distributed under the License is distributed on an -// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -// KIND, either express or implied. See the License for the -// specific language governing permissions and limitations -// under the License. - -use datafusion::arrow::{ - array::{Array, Decimal128Builder}, - datatypes::{Field, Schema}, - record_batch::RecordBatch, -}; -use std::sync::Arc; - -// TODO: move this to datafusion::test_utils? -pub fn make_decimal() -> RecordBatch { - let mut decimal_builder = Decimal128Builder::with_capacity(20); - for i in 110000..110010 { - decimal_builder.append_value(i as i128); - } - for i in 100000..100010 { - decimal_builder.append_value(-i as i128); - } - let array = decimal_builder - .finish() - .with_precision_and_scale(10, 3) - .unwrap(); - let schema = Schema::new(vec![Field::new("c1", array.data_type().clone(), true)]); - RecordBatch::try_new(Arc::new(schema), vec![Arc::new(array)]).unwrap() -} diff --git a/datafusion/core/tests/sqllogictests/test_files/aggregate.slt b/datafusion/core/tests/sqllogictests/test_files/aggregate.slt index fe16c8e8860cb..fb86c6460136a 100644 --- a/datafusion/core/tests/sqllogictests/test_files/aggregate.slt +++ b/datafusion/core/tests/sqllogictests/test_files/aggregate.slt @@ -38,6 +38,11 @@ STORED AS CSV WITH HEADER ROW LOCATION '../../testing/data/csv/aggregate_test_100.csv' +statement ok +CREATE TABLE d_table (c1 decimal(10,3)) as values +(110.000), (110.001), (110.002), (110.003), (110.004), (110.005), (110.006), (110.007), (110.008), (110.009), +(-100.000),(-100.001),(-100.002),(-100.003),(-100.004),(-100.005),(-100.006),(-100.007),(-100.008),(-100.009) + ####### # Error tests ####### From 67ca40b18d42a28bc859a58d1f810d4bcf1ba59d Mon Sep 17 00:00:00 2001 From: jiangzhx Date: Wed, 14 Jun 2023 13:37:56 +0800 Subject: [PATCH 3/4] port register_median_test_tables to sql --- .../core/tests/sqllogictests/src/setup.rs | 77 +------------------ .../sqllogictests/test_files/aggregate.slt | 47 +++++++---- 2 files changed, 34 insertions(+), 90 deletions(-) diff --git a/datafusion/core/tests/sqllogictests/src/setup.rs b/datafusion/core/tests/sqllogictests/src/setup.rs index 5e6a1d1141ae7..1769461c3b685 100644 --- a/datafusion/core/tests/sqllogictests/src/setup.rs +++ b/datafusion/core/tests/sqllogictests/src/setup.rs @@ -17,10 +17,7 @@ use datafusion::{ arrow::{ - array::{ - ArrayRef, Float32Array, Float64Array, Int16Array, Int32Array, Int64Array, - Int8Array, UInt16Array, UInt32Array, UInt64Array, UInt8Array, - }, + array::{Float64Array, Int64Array}, datatypes::{DataType, Field, Schema}, record_batch::RecordBatch, }, @@ -65,81 +62,9 @@ pub async fn register_avro_tables(ctx: &mut crate::TestContext) { } pub async fn register_aggregate_tables(ctx: &SessionContext) { - register_median_test_tables(ctx); register_test_data(ctx); } -fn register_median_test_tables(ctx: &SessionContext) { - // Register median tables - let items: Vec<(&str, DataType, ArrayRef)> = vec![ - ( - "i8", - DataType::Int8, - Arc::new(Int8Array::from(vec![i8::MIN, i8::MIN, 100, i8::MAX])), - ), - ( - "i16", - DataType::Int16, - Arc::new(Int16Array::from(vec![i16::MIN, i16::MIN, 100, i16::MAX])), - ), - ( - "i32", - DataType::Int32, - Arc::new(Int32Array::from(vec![i32::MIN, i32::MIN, 100, i32::MAX])), - ), - ( - "i64", - DataType::Int64, - Arc::new(Int64Array::from(vec![i64::MIN, i64::MIN, 100, i64::MAX])), - ), - ( - "u8", - DataType::UInt8, - Arc::new(UInt8Array::from(vec![u8::MIN, u8::MIN, 100, u8::MAX])), - ), - ( - "u16", - DataType::UInt16, - Arc::new(UInt16Array::from(vec![u16::MIN, u16::MIN, 100, u16::MAX])), - ), - ( - "u32", - DataType::UInt32, - Arc::new(UInt32Array::from(vec![u32::MIN, u32::MIN, 100, u32::MAX])), - ), - ( - "u64", - DataType::UInt64, - Arc::new(UInt64Array::from(vec![u64::MIN, u64::MIN, 100, u64::MAX])), - ), - ( - "f32", - DataType::Float32, - Arc::new(Float32Array::from(vec![1.1, 4.4, 5.5, 3.3, 2.2])), - ), - ( - "f64", - DataType::Float64, - Arc::new(Float64Array::from(vec![1.1, 4.4, 5.5, 3.3, 2.2])), - ), - ( - "f64_nan", - DataType::Float64, - Arc::new(Float64Array::from(vec![1.1, f64::NAN, f64::NAN, f64::NAN])), - ), - ]; - - for (name, data_type, values) in items { - let batch = RecordBatch::try_new( - Arc::new(Schema::new(vec![Field::new("a", data_type, false)])), - vec![values], - ) - .unwrap(); - let table_name = &format!("median_{name}"); - ctx.register_batch(table_name, batch).unwrap(); - } -} - fn register_test_data(ctx: &SessionContext) { let schema = Arc::new(Schema::new(vec![ Field::new("c1", DataType::Int64, true), diff --git a/datafusion/core/tests/sqllogictests/test_files/aggregate.slt b/datafusion/core/tests/sqllogictests/test_files/aggregate.slt index fb86c6460136a..f5fedcb390b6c 100644 --- a/datafusion/core/tests/sqllogictests/test_files/aggregate.slt +++ b/datafusion/core/tests/sqllogictests/test_files/aggregate.slt @@ -43,6 +43,25 @@ CREATE TABLE d_table (c1 decimal(10,3)) as values (110.000), (110.001), (110.002), (110.003), (110.004), (110.005), (110.006), (110.007), (110.008), (110.009), (-100.000),(-100.001),(-100.002),(-100.003),(-100.004),(-100.005),(-100.006),(-100.007),(-100.008),(-100.009) +statement ok +CREATE TABLE median_table ( + col_i8 TINYINT, + col_i16 SMALLINT, + col_i32 INT, + col_i64 BIGINT, + col_u8 TINYINT UNSIGNED, + col_u16 SMALLINT UNSIGNED, + col_u32 INT UNSIGNED, + col_u64 BIGINT UNSIGNED, + col_f32 FLOAT, + col_f64 DOUBLE, + col_f64_nan DOUBLE +) as VALUES +( -128, -32768, -2147483648, arrow_cast(-9223372036854775808,'Int64'), 0, 0, 0, arrow_cast(0,'UInt64'), 1.1, 1.1, 1.1 ), +( -128, -32768, -2147483648, arrow_cast(-9223372036854775808,'Int64'), 0, 0, 0, arrow_cast(0,'UInt64'), 4.4, 4.4, arrow_cast('NAN','Float64') ), +( 100, 100, 100, arrow_cast(100,'Int64'), 100,100,100, arrow_cast(100,'UInt64'), 3.3, 3.3, arrow_cast('NAN','Float64') ), +( 127, 32767, 2147483647, arrow_cast(9223372036854775807,'Int64'), 255, 65535, 4294967295, 18446744073709551615, 2.2, 2.2, arrow_cast('NAN','Float64') ) + ####### # Error tests ####### @@ -334,73 +353,73 @@ SELECT median(c12) FROM aggregate_test_100 # median_i8 query I -SELECT median(a) FROM median_i8 +SELECT median(col_i8) FROM median_table ---- -14 # median_i16 query I -SELECT median(a) FROM median_i16 +SELECT median(col_i16) FROM median_table ---- -16334 # median_i32 query I -SELECT median(a) FROM median_i32 +SELECT median(col_i32) FROM median_table ---- -1073741774 # median_i64 query I -SELECT median(a) FROM median_i64 +SELECT median(col_i64) FROM median_table ---- -4611686018427387854 # median_u8 query I -SELECT median(a) FROM median_u8 +SELECT median(col_u8) FROM median_table ---- 50 # median_u16 query I -SELECT median(a) FROM median_u16 +SELECT median(col_u16) FROM median_table ---- 50 # median_u32 query I -SELECT median(a) FROM median_u32 +SELECT median(col_u32) FROM median_table ---- 50 # median_u64 query I -SELECT median(a) FROM median_u64 +SELECT median(col_u64) FROM median_table ---- 50 # median_f32 query R -SELECT median(a) FROM median_f32 +SELECT median(col_f32) FROM median_table ---- -3.3 +2.75 # median_f64 query R -SELECT median(a) FROM median_f64 +SELECT median(col_f64) FROM median_table ---- -3.3 +2.75 # median_f64_nan query R -SELECT median(a) FROM median_f64_nan +SELECT median(col_f64_nan) FROM median_table ---- NaN # approx_median_f64_nan query R -SELECT approx_median(a) FROM median_f64_nan +SELECT approx_median(col_f64_nan) FROM median_table ---- NaN From 0b349f3da79ed5617bcd49c6ef7c27bc0603e76f Mon Sep 17 00:00:00 2001 From: jiangzhx Date: Wed, 14 Jun 2023 13:42:27 +0800 Subject: [PATCH 4/4] port register_test_data to sql --- .../core/tests/sqllogictests/src/main.rs | 4 --- .../core/tests/sqllogictests/src/setup.rs | 36 +------------------ .../sqllogictests/test_files/aggregate.slt | 4 +++ 3 files changed, 5 insertions(+), 39 deletions(-) diff --git a/datafusion/core/tests/sqllogictests/src/main.rs b/datafusion/core/tests/sqllogictests/src/main.rs index 18ee15cc8753f..f71e98b54be83 100644 --- a/datafusion/core/tests/sqllogictests/src/main.rs +++ b/datafusion/core/tests/sqllogictests/src/main.rs @@ -262,10 +262,6 @@ async fn context_for_test_file(relative_path: &Path) -> Option { let file_name = relative_path.file_name().unwrap().to_str().unwrap(); match file_name { - "aggregate.slt" => { - info!("Registering aggregate tables"); - setup::register_aggregate_tables(test_ctx.session_ctx()).await; - } "scalar.slt" => { info!("Registering scalar tables"); setup::register_scalar_tables(test_ctx.session_ctx()).await; diff --git a/datafusion/core/tests/sqllogictests/src/setup.rs b/datafusion/core/tests/sqllogictests/src/setup.rs index 1769461c3b685..34365f509a53f 100644 --- a/datafusion/core/tests/sqllogictests/src/setup.rs +++ b/datafusion/core/tests/sqllogictests/src/setup.rs @@ -17,7 +17,7 @@ use datafusion::{ arrow::{ - array::{Float64Array, Int64Array}, + array::Float64Array, datatypes::{DataType, Field, Schema}, record_batch::RecordBatch, }, @@ -61,40 +61,6 @@ pub async fn register_avro_tables(ctx: &mut crate::TestContext) { .unwrap(); } -pub async fn register_aggregate_tables(ctx: &SessionContext) { - register_test_data(ctx); -} - -fn register_test_data(ctx: &SessionContext) { - let schema = Arc::new(Schema::new(vec![ - Field::new("c1", DataType::Int64, true), - Field::new("c2", DataType::Int64, true), - ])); - - let data = RecordBatch::try_new( - schema, - vec![ - Arc::new(Int64Array::from(vec![ - Some(0), - Some(1), - None, - Some(3), - Some(3), - ])), - Arc::new(Int64Array::from(vec![ - None, - Some(1), - Some(1), - Some(2), - Some(2), - ])), - ], - ) - .unwrap(); - - ctx.register_batch("test", data).unwrap(); -} - pub async fn register_scalar_tables(ctx: &SessionContext) { register_nan_table(ctx) } diff --git a/datafusion/core/tests/sqllogictests/test_files/aggregate.slt b/datafusion/core/tests/sqllogictests/test_files/aggregate.slt index f5fedcb390b6c..bf1ab2cbd1fb1 100644 --- a/datafusion/core/tests/sqllogictests/test_files/aggregate.slt +++ b/datafusion/core/tests/sqllogictests/test_files/aggregate.slt @@ -62,6 +62,10 @@ CREATE TABLE median_table ( ( 100, 100, 100, arrow_cast(100,'Int64'), 100,100,100, arrow_cast(100,'UInt64'), 3.3, 3.3, arrow_cast('NAN','Float64') ), ( 127, 32767, 2147483647, arrow_cast(9223372036854775807,'Int64'), 255, 65535, 4294967295, 18446744073709551615, 2.2, 2.2, arrow_cast('NAN','Float64') ) +statement ok +CREATE TABLE test (c1 BIGINT,c2 BIGINT) as values +(0,null), (1,1), (null,1), (3,2), (3,2) + ####### # Error tests #######