From 1280c6edd8e8f7e0c0dcf78f967c6e91064052ee Mon Sep 17 00:00:00 2001 From: Andrew Lamb Date: Tue, 27 Jul 2021 09:31:08 -0400 Subject: [PATCH 1/3] Add a test grouping on null --- datafusion/tests/sql.rs | 45 +++++++++++++++++++++++++++++++++++++++++ 1 file changed, 45 insertions(+) diff --git a/datafusion/tests/sql.rs b/datafusion/tests/sql.rs index d9f7c6ea41211..0e373c91ffdfc 100644 --- a/datafusion/tests/sql.rs +++ b/datafusion/tests/sql.rs @@ -2966,6 +2966,45 @@ async fn query_count_distinct() -> Result<()> { Ok(()) } +#[tokio::test] +async fn query_group_on_null() -> Result<()> { + let schema = Arc::new(Schema::new(vec![Field::new("c1", DataType::Int32, true)])); + + let data = RecordBatch::try_new( + schema.clone(), + vec![Arc::new(Int32Array::from(vec![ + Some(0), + Some(3), + None, + Some(1), + Some(3), + ]))], + )?; + + let table = MemTable::try_new(schema, vec![vec![data]])?; + + let mut ctx = ExecutionContext::new(); + ctx.register_table("test", Arc::new(table))?; + let sql = "SELECT COUNT(*), c1 FROM test GROUP BY c1"; + + let actual = execute_to_batches(&mut ctx, sql).await; + + // this is incorrect: the results should also + // include a row for NULL (c1=NULL, count = 1) + // https://github.com/apache/arrow-datafusion/issues/782 + let expected = vec![ + "+-----------------+----+", + "| COUNT(UInt8(1)) | c1 |", + "+-----------------+----+", + "| 2 | 3 |", + "| 2 | 0 |", + "| 1 | 1 |", + "+-----------------+----+", + ]; + assert_batches_eq!(expected, &actual); + Ok(()) +} + #[tokio::test] async fn query_on_string_dictionary() -> Result<()> { // Test to ensure DataFusion can operate on dictionary types @@ -3019,6 +3058,12 @@ async fn query_on_string_dictionary() -> Result<()> { let expected = vec![vec!["2"]]; assert_eq!(expected, actual); + // grouping + let sql = "SELECT d1, COUNT(*) FROM test group by d1"; + let actual = execute(&mut ctx, sql).await; + let expected = vec![vec!["one", "1"], vec!["three", "1"]]; + assert_eq!(expected, actual); + Ok(()) } From 5ed43ac9af6a4255856a7e83f280eeafe1f3afa6 Mon Sep 17 00:00:00 2001 From: Andrew Lamb Date: Tue, 27 Jul 2021 09:33:12 -0400 Subject: [PATCH 2/3] Remove mistakenly added test --- datafusion/tests/sql.rs | 6 ------ 1 file changed, 6 deletions(-) diff --git a/datafusion/tests/sql.rs b/datafusion/tests/sql.rs index 0e373c91ffdfc..ea7f7f36c679f 100644 --- a/datafusion/tests/sql.rs +++ b/datafusion/tests/sql.rs @@ -3058,12 +3058,6 @@ async fn query_on_string_dictionary() -> Result<()> { let expected = vec![vec!["2"]]; assert_eq!(expected, actual); - // grouping - let sql = "SELECT d1, COUNT(*) FROM test group by d1"; - let actual = execute(&mut ctx, sql).await; - let expected = vec![vec!["one", "1"], vec!["three", "1"]]; - assert_eq!(expected, actual); - Ok(()) } From 9f2067afca1c0d5c9fd4f2ea8806dd4dd4c67257 Mon Sep 17 00:00:00 2001 From: Andrew Lamb Date: Tue, 27 Jul 2021 11:00:26 -0400 Subject: [PATCH 3/3] use sort blind comparison --- datafusion/tests/sql.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/datafusion/tests/sql.rs b/datafusion/tests/sql.rs index ea7f7f36c679f..ffa25115a4aa0 100644 --- a/datafusion/tests/sql.rs +++ b/datafusion/tests/sql.rs @@ -3001,7 +3001,7 @@ async fn query_group_on_null() -> Result<()> { "| 1 | 1 |", "+-----------------+----+", ]; - assert_batches_eq!(expected, &actual); + assert_batches_sorted_eq!(expected, &actual); Ok(()) }