Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 0 additions & 5 deletions datafusion/core/tests/sqllogictests/src/main.rs
Original file line number Diff line number Diff line change
Expand Up @@ -33,7 +33,6 @@ use crate::engines::postgres::Postgres;

mod engines;
mod setup;
mod utils;

const TEST_DIRECTORY: &str = "tests/sqllogictests/test_files/";
const PG_COMPAT_FILE_PREFIX: &str = "pg_compat_";
Expand Down Expand Up @@ -263,10 +262,6 @@ async fn context_for_test_file(relative_path: &Path) -> Option<TestContext> {

let file_name = relative_path.file_name().unwrap().to_str().unwrap();
match file_name {
"aggregate.slt" => {
info!("Registering aggregate tables");
setup::register_aggregate_tables(test_ctx.session_ctx()).await;
}
"scalar.slt" => {
info!("Registering scalar tables");
setup::register_scalar_tables(test_ctx.session_ctx()).await;
Expand Down
137 changes: 2 additions & 135 deletions datafusion/core/tests/sqllogictests/src/setup.rs
Original file line number Diff line number Diff line change
Expand Up @@ -17,22 +17,17 @@

use datafusion::{
arrow::{
array::{
ArrayRef, Float32Array, Float64Array, Int16Array, Int32Array, Int64Array,
Int8Array, UInt16Array, UInt32Array, UInt64Array, UInt8Array,
},
array::Float64Array,
datatypes::{DataType, Field, Schema},
record_batch::RecordBatch,
},
datasource::MemTable,
prelude::{CsvReadOptions, SessionContext},
test_util,
};
use std::fs::File;
use std::io::Write;
use std::sync::Arc;

use crate::{utils, TestContext};
use crate::TestContext;

#[cfg(feature = "avro")]
pub async fn register_avro_tables(ctx: &mut crate::TestContext) {
Expand Down Expand Up @@ -66,134 +61,6 @@ pub async fn register_avro_tables(ctx: &mut crate::TestContext) {
.unwrap();
}

pub async fn register_aggregate_tables(ctx: &SessionContext) {
register_aggregate_test_100(ctx).await;
register_decimal_table(ctx);
register_median_test_tables(ctx);
register_test_data(ctx);
}

fn register_median_test_tables(ctx: &SessionContext) {
// Register median tables
let items: Vec<(&str, DataType, ArrayRef)> = vec![
(
"i8",
DataType::Int8,
Arc::new(Int8Array::from(vec![i8::MIN, i8::MIN, 100, i8::MAX])),
),
(
"i16",
DataType::Int16,
Arc::new(Int16Array::from(vec![i16::MIN, i16::MIN, 100, i16::MAX])),
),
(
"i32",
DataType::Int32,
Arc::new(Int32Array::from(vec![i32::MIN, i32::MIN, 100, i32::MAX])),
),
(
"i64",
DataType::Int64,
Arc::new(Int64Array::from(vec![i64::MIN, i64::MIN, 100, i64::MAX])),
),
(
"u8",
DataType::UInt8,
Arc::new(UInt8Array::from(vec![u8::MIN, u8::MIN, 100, u8::MAX])),
),
(
"u16",
DataType::UInt16,
Arc::new(UInt16Array::from(vec![u16::MIN, u16::MIN, 100, u16::MAX])),
),
(
"u32",
DataType::UInt32,
Arc::new(UInt32Array::from(vec![u32::MIN, u32::MIN, 100, u32::MAX])),
),
(
"u64",
DataType::UInt64,
Arc::new(UInt64Array::from(vec![u64::MIN, u64::MIN, 100, u64::MAX])),
),
(
"f32",
DataType::Float32,
Arc::new(Float32Array::from(vec![1.1, 4.4, 5.5, 3.3, 2.2])),
),
(
"f64",
DataType::Float64,
Arc::new(Float64Array::from(vec![1.1, 4.4, 5.5, 3.3, 2.2])),
),
(
"f64_nan",
DataType::Float64,
Arc::new(Float64Array::from(vec![1.1, f64::NAN, f64::NAN, f64::NAN])),
),
];

for (name, data_type, values) in items {
let batch = RecordBatch::try_new(
Arc::new(Schema::new(vec![Field::new("a", data_type, false)])),
vec![values],
)
.unwrap();
let table_name = &format!("median_{name}");
ctx.register_batch(table_name, batch).unwrap();
}
}

fn register_test_data(ctx: &SessionContext) {
let schema = Arc::new(Schema::new(vec![
Field::new("c1", DataType::Int64, true),
Field::new("c2", DataType::Int64, true),
]));

let data = RecordBatch::try_new(
schema,
vec![
Arc::new(Int64Array::from(vec![
Some(0),
Some(1),
None,
Some(3),
Some(3),
])),
Arc::new(Int64Array::from(vec![
None,
Some(1),
Some(1),
Some(2),
Some(2),
])),
],
)
.unwrap();

ctx.register_batch("test", data).unwrap();
}

fn register_decimal_table(ctx: &SessionContext) {
let batch_decimal = utils::make_decimal();
let schema = batch_decimal.schema();
let partitions = vec![vec![batch_decimal]];
let provider = Arc::new(MemTable::try_new(schema, partitions).unwrap());
ctx.register_table("d_table", provider).unwrap();
}

async fn register_aggregate_test_100(ctx: &SessionContext) {
let test_data = datafusion::test_util::arrow_test_data();
let schema = test_util::aggr_test_schema();
ctx.register_csv(
"aggregate_test_100",
&format!("{test_data}/csv/aggregate_test_100.csv"),
CsvReadOptions::new().schema(&schema),
)
.await
.unwrap();
}

pub async fn register_scalar_tables(ctx: &SessionContext) {
register_nan_table(ctx)
}
Expand Down
40 changes: 0 additions & 40 deletions datafusion/core/tests/sqllogictests/src/utils.rs

This file was deleted.

78 changes: 64 additions & 14 deletions datafusion/core/tests/sqllogictests/test_files/aggregate.slt
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,56 @@
# specific language governing permissions and limitations
# under the License.

#######
# Setup test data table
#######
statement ok
CREATE EXTERNAL TABLE aggregate_test_100 (
c1 VARCHAR NOT NULL,
c2 TINYINT NOT NULL,
c3 SMALLINT NOT NULL,
c4 SMALLINT,
c5 INT,
c6 BIGINT NOT NULL,
c7 SMALLINT NOT NULL,
c8 INT NOT NULL,
c9 INT UNSIGNED NOT NULL,
c10 BIGINT UNSIGNED NOT NULL,
c11 FLOAT NOT NULL,
c12 DOUBLE NOT NULL,
c13 VARCHAR NOT NULL
)
STORED AS CSV
WITH HEADER ROW
LOCATION '../../testing/data/csv/aggregate_test_100.csv'

statement ok
CREATE TABLE d_table (c1 decimal(10,3)) as values
(110.000), (110.001), (110.002), (110.003), (110.004), (110.005), (110.006), (110.007), (110.008), (110.009),
(-100.000),(-100.001),(-100.002),(-100.003),(-100.004),(-100.005),(-100.006),(-100.007),(-100.008),(-100.009)

statement ok
CREATE TABLE median_table (
col_i8 TINYINT,
col_i16 SMALLINT,
col_i32 INT,
col_i64 BIGINT,
col_u8 TINYINT UNSIGNED,
col_u16 SMALLINT UNSIGNED,
col_u32 INT UNSIGNED,
col_u64 BIGINT UNSIGNED,
col_f32 FLOAT,
col_f64 DOUBLE,
col_f64_nan DOUBLE
) as VALUES
( -128, -32768, -2147483648, arrow_cast(-9223372036854775808,'Int64'), 0, 0, 0, arrow_cast(0,'UInt64'), 1.1, 1.1, 1.1 ),
( -128, -32768, -2147483648, arrow_cast(-9223372036854775808,'Int64'), 0, 0, 0, arrow_cast(0,'UInt64'), 4.4, 4.4, arrow_cast('NAN','Float64') ),
( 100, 100, 100, arrow_cast(100,'Int64'), 100,100,100, arrow_cast(100,'UInt64'), 3.3, 3.3, arrow_cast('NAN','Float64') ),
( 127, 32767, 2147483647, arrow_cast(9223372036854775807,'Int64'), 255, 65535, 4294967295, 18446744073709551615, 2.2, 2.2, arrow_cast('NAN','Float64') )

statement ok
CREATE TABLE test (c1 BIGINT,c2 BIGINT) as values
(0,null), (1,1), (null,1), (3,2), (3,2)

#######
# Error tests
Expand Down Expand Up @@ -307,73 +357,73 @@ SELECT median(c12) FROM aggregate_test_100

# median_i8
query I
SELECT median(a) FROM median_i8
SELECT median(col_i8) FROM median_table
----
-14

# median_i16
query I
SELECT median(a) FROM median_i16
SELECT median(col_i16) FROM median_table
----
-16334

# median_i32
query I
SELECT median(a) FROM median_i32
SELECT median(col_i32) FROM median_table
----
-1073741774

# median_i64
query I
SELECT median(a) FROM median_i64
SELECT median(col_i64) FROM median_table
----
-4611686018427387854

# median_u8
query I
SELECT median(a) FROM median_u8
SELECT median(col_u8) FROM median_table
----
50

# median_u16
query I
SELECT median(a) FROM median_u16
SELECT median(col_u16) FROM median_table
----
50

# median_u32
query I
SELECT median(a) FROM median_u32
SELECT median(col_u32) FROM median_table
----
50

# median_u64
query I
SELECT median(a) FROM median_u64
SELECT median(col_u64) FROM median_table
----
50

# median_f32
query R
SELECT median(a) FROM median_f32
SELECT median(col_f32) FROM median_table
----
3.3
2.75
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Change 2.75 to 3.3
Cause by testdata has been changed from
1.1, 4.4, 5.5, 3.3, 2.2
to
1.1, 4.4, 3.3, 2.2
It's makes it easier to create all the testdata in one table, because other testdata only have 4 column.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I agree this makes sense -- thank you for the explanation.


# median_f64
query R
SELECT median(a) FROM median_f64
SELECT median(col_f64) FROM median_table
----
3.3
2.75
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The change from 3.3 to 2.75 is due to the same reason as the previous message.


# median_f64_nan
query R
SELECT median(a) FROM median_f64_nan
SELECT median(col_f64_nan) FROM median_table
----
NaN

# approx_median_f64_nan
query R
SELECT approx_median(a) FROM median_f64_nan
SELECT approx_median(col_f64_nan) FROM median_table
----
NaN

Expand Down