diff --git a/datafusion/core/tests/sqllogictests/test_files/array.slt b/datafusion/core/tests/sqllogictests/test_files/array.slt index 1f43c5f8e154e..7eebb23d9cc9c 100644 --- a/datafusion/core/tests/sqllogictests/test_files/array.slt +++ b/datafusion/core/tests/sqllogictests/test_files/array.slt @@ -19,108 +19,294 @@ ## Array expressions Tests ############# + +### Tables + + +statement ok +CREATE TABLE values( + a INT, + b INT, + c INT, + d FLOAT, + e VARCHAR +) AS VALUES + (1, 1, 2, 1.1, 'Lorem'), + (2, 3, 4, 2.2, 'ipsum'), + (3, 5, 6, 3.3, 'dolor'), + (4, 7, 8, 4.4, 'sit'), + (NULL, 9, 10, 5.5, 'amet'), + (5, NULL, 12, 6.6, ','), + (6, 11, NULL, 7.7, 'consectetur'), + (7, 13, 14, NULL, 'adipiscing'), + (8, 15, 16, 8.8, NULL) +; + +statement ok +CREATE TABLE arrays +AS VALUES + (make_array(make_array(NULL, 2),make_array(3, NULL)), make_array(1.1, 2.2, 3.3), make_array('L', 'o', 'r', 'e', 'm')), + (make_array(make_array(3, 4),make_array(5, 6)), make_array(NULL, 5.5, 6.6), make_array('i', 'p', NULL, 'u', 'm')), + (make_array(make_array(5, 6),make_array(7, 8)), make_array(7.7, 8.8, 9.9), make_array('d', NULL, 'l', 'o', 'r')), + (make_array(make_array(7, NULL),make_array(9, 10)), make_array(10.1, NULL, 12.2), make_array('s', 'i', 't')), + (NULL, make_array(13.3, 14.4, 15.5), make_array('a', 'm', 'e', 't')), + (make_array(make_array(11, 12),make_array(13, 14)), NULL, make_array(',')), + (make_array(make_array(15, 16),make_array(NULL, 18)), make_array(16.6, 17.7, 18.8), NULL) +; + +statement ok +CREATE TABLE arrays_values +AS VALUES + (make_array(NULL, 2, 3, 4, 5, 6, 7, 8, 9, 10), 1, 1, ','), + (make_array(11, 12, 13, 14, 15, 16, 17, 18, NULL, 20), 12, 2, '.'), + (make_array(21, 22, 23, NULL, 25, 26, 27, 28, 29, 30), 23, 3, '-'), + (make_array(31, 32, 33, 34, 35, NULL, 37, 38, 39, 40), 34, 4, 'ok'), + (NULL, 44, 5, '@'), + (make_array(41, 42, 43, 44, 45, 46, 47, 48, 49, 50), NULL, 6, '$'), + (make_array(51, 52, NULL, 54, 55, 56, 57, 58, 59, 60), 55, NULL, '^'), + (make_array(61, 62, 63, 64, 65, 66, 67, 68, 69, 70), 66, 7, NULL) +; + +statement ok +CREATE TABLE arrays_values_without_nulls +AS VALUES + (make_array(1, 2, 3, 4, 5, 6, 7, 8, 9, 10), 1, 1, ','), + (make_array(11, 12, 13, 14, 15, 16, 17, 18, 19, 20), 12, 2, '.'), + (make_array(21, 22, 23, 24, 25, 26, 27, 28, 29, 30), 23, 3, '-'), + (make_array(31, 32, 33, 34, 35, 26, 37, 38, 39, 40), 34, 4, 'ok') +; + +# arrays table +query ??? +select column1, column2, column3 from arrays; +---- +[[, 2], [3, ]] [1.1, 2.2, 3.3] [L, o, r, e, m] +[[3, 4], [5, 6]] [, 5.5, 6.6] [i, p, , u, m] +[[5, 6], [7, 8]] [7.7, 8.8, 9.9] [d, , l, o, r] +[[7, ], [9, 10]] [10.1, , 12.2] [s, i, t] +NULL [13.3, 14.4, 15.5] [a, m, e, t] +[[11, 12], [13, 14]] NULL [,] +[[15, 16], [, 18]] [16.6, 17.7, 18.8] NULL + +# values table +query IIIRT +select a, b, c, d, e from values; +---- +1 1 2 1.1 Lorem +2 3 4 2.2 ipsum +3 5 6 3.3 dolor +4 7 8 4.4 sit +NULL 9 10 5.5 amet +5 NULL 12 6.6 , +6 11 NULL 7.7 consectetur +7 13 14 NULL adipiscing +8 15 16 8.8 NULL + +# arrays_values table +query ?IIT +select column1, column2, column3, column4 from arrays_values; +---- +[, 2, 3, 4, 5, 6, 7, 8, 9, 10] 1 1 , +[11, 12, 13, 14, 15, 16, 17, 18, , 20] 12 2 . +[21, 22, 23, , 25, 26, 27, 28, 29, 30] 23 3 - +[31, 32, 33, 34, 35, , 37, 38, 39, 40] 34 4 ok +NULL 44 5 @ +[41, 42, 43, 44, 45, 46, 47, 48, 49, 50] NULL 6 $ +[51, 52, , 54, 55, 56, 57, 58, 59, 60] 55 NULL ^ +[61, 62, 63, 64, 65, 66, 67, 68, 69, 70] 66 7 NULL + +# arrays_values_without_nulls table +query ?II +select column1, column2, column3 from arrays_values_without_nulls; +---- +[1, 2, 3, 4, 5, 6, 7, 8, 9, 10] 1 1 +[11, 12, 13, 14, 15, 16, 17, 18, 19, 20] 12 2 +[21, 22, 23, 24, 25, 26, 27, 28, 29, 30] 23 3 +[31, 32, 33, 34, 35, 26, 37, 38, 39, 40] 34 4 + +### Array function tests + + ## make_array -# array scalar function #1 -query ??? rowsort +# make_array scalar function #1 +query ??? select make_array(1, 2, 3), make_array(1.0, 2.0, 3.0), make_array('h', 'e', 'l', 'l', 'o'); ---- [1, 2, 3] [1.0, 2.0, 3.0] [h, e, l, l, o] -# array scalar function #2 -query ??? rowsort +# make_array scalar function #2 +query ??? select make_array(1, 2, 3), make_array(make_array(1, 2), make_array(3, 4)), make_array([[[[1], [2]]]]); ---- [1, 2, 3] [[1, 2], [3, 4]] [[[[[1], [2]]]]] -# array scalar function #3 -query ?? rowsort +# make_array scalar function #3 +query ?? select make_array([1, 2, 3], [4, 5, 6], [7, 8, 9]), make_array([[1, 2], [3, 4]], [[5, 6], [7, 8]]); ---- [[1, 2, 3], [4, 5, 6], [7, 8, 9]] [[[1, 2], [3, 4]], [[5, 6], [7, 8]]] -# array scalar function #4 -query ?? rowsort +# make_array scalar function #4 +query ?? select make_array([1.0, 2.0], [3.0, 4.0]), make_array('h', 'e', 'l', 'l', 'o'); ---- [[1.0, 2.0], [3.0, 4.0]] [h, e, l, l, o] -# array scalar function #5 -query ? rowsort +# make_array scalar function #5 +query ? select make_array(make_array(make_array(make_array(1, 2, 3), make_array(4, 5, 6)), make_array(make_array(7, 8, 9), make_array(10, 11, 12)))) ---- [[[[1, 2, 3], [4, 5, 6]], [[7, 8, 9], [10, 11, 12]]]] -# array scalar function #6 -query ? rowsort +# make_array scalar function #6 +query ? select make_array() ---- [] -# array scalar function #7 -query ?? rowsort +# make_array scalar function #7 +query ?? select make_array(make_array()), make_array(make_array(make_array())) ---- [[]] [[[]]] -# array scalar function with nulls -query ??? rowsort +# make_array scalar function with nulls +query ??? select make_array(1, NULL, 3), make_array(NULL, 2.0, NULL), make_array('h', NULL, 'l', NULL, 'o'); ---- [1, , 3] [, 2.0, ] [h, , l, , o] -# array scalar function with nulls #2 -query ?? rowsort +# make_array scalar function with nulls #2 +query ?? select make_array(1, 2, NULL), make_array(make_array(NULL, 2), make_array(NULL, 3)); ---- [1, 2, ] [[, 2], [, 3]] -# array scalar function with nulls #3 -query ??? rowsort +# make_array scalar function with nulls #3 +query ??? select make_array(NULL), make_array(NULL, NULL, NULL), make_array(make_array(NULL, NULL), make_array(NULL, NULL)); ---- [] [] [[], []] +# make_array with columns #1 +query ???? +select make_array(a), make_array(b, c), make_array(d), make_array(e) from values; +---- +[1] [1, 2] [1.1] [Lorem] +[2] [3, 4] [2.2] [ipsum] +[3] [5, 6] [3.3] [dolor] +[4] [7, 8] [4.4] [sit] +[0] [9, 10] [5.5] [amet] +[5] [0, 12] [6.6] [,] +[6] [11, 0] [7.7] [consectetur] +[7] [13, 14] [0.0] [adipiscing] +[8] [15, 16] [8.8] [] + +# make_array with columns #2 +query ? +select make_array(a, b, c, d) from values; +---- +[1.0, 1.0, 2.0, 1.1] +[2.0, 3.0, 4.0, 2.2] +[3.0, 5.0, 6.0, 3.3] +[4.0, 7.0, 8.0, 4.4] +[0.0, 9.0, 10.0, 5.5] +[5.0, 0.0, 12.0, 6.6] +[6.0, 11.0, 0.0, 7.7] +[7.0, 13.0, 14.0, 0.0] +[8.0, 15.0, 16.0, 8.8] + ## array_append # array_append scalar function #2 -query ? rowsort +query ? select array_append(make_array(), 4); ---- [4] # array_append scalar function #2 -query ?? rowsort +query ?? select array_append(make_array(), make_array()), array_append(make_array(), make_array(4)); ---- [[]] [[4]] # array_append scalar function #3 -query ??? rowsort +query ??? select array_append(make_array(1, 2, 3), 4), array_append(make_array(1.0, 2.0, 3.0), 4.0), array_append(make_array('h', 'e', 'l', 'l'), 'o'); ---- [1, 2, 3, 4] [1.0, 2.0, 3.0, 4.0] [h, e, l, l, o] +# array_append with columns +query ? +select array_append(column1, column2) from arrays_values; +---- +[, 2, 3, 4, 5, 6, 7, 8, 9, 10, 1] +[11, 12, 13, 14, 15, 16, 17, 18, , 20, 12] +[21, 22, 23, , 25, 26, 27, 28, 29, 30, 23] +[31, 32, 33, 34, 35, , 37, 38, 39, 40, 34] +[44] +[41, 42, 43, 44, 45, 46, 47, 48, 49, 50, ] +[51, 52, , 54, 55, 56, 57, 58, 59, 60, 55] +[61, 62, 63, 64, 65, 66, 67, 68, 69, 70, 66] + +# array_append with columns and scalars +query ?? +select array_append(column2, 100.1), array_append(column3, '.') from arrays; +---- +[1.1, 2.2, 3.3, 100.1] [L, o, r, e, m, .] +[, 5.5, 6.6, 100.1] [i, p, , u, m, .] +[7.7, 8.8, 9.9, 100.1] [d, , l, o, r, .] +[10.1, , 12.2, 100.1] [s, i, t, .] +[13.3, 14.4, 15.5, 100.1] [a, m, e, t, .] +[100.1] [,, .] +[16.6, 17.7, 18.8, 100.1] [.] + ## array_prepend # array_prepend scalar function #1 -query ? rowsort +query ? select array_prepend(4, make_array()); ---- [4] # array_prepend scalar function #2 -query ?? rowsort +query ?? select array_prepend(make_array(), make_array()), array_prepend(make_array(4), make_array()); ---- [[]] [[4]] # array_prepend scalar function #3 -query ??? rowsort +query ??? select array_prepend(1, make_array(2, 3, 4)), array_prepend(1.0, make_array(2.0, 3.0, 4.0)), array_prepend('h', make_array('e', 'l', 'l', 'o')); ---- [1, 2, 3, 4] [1.0, 2.0, 3.0, 4.0] [h, e, l, l, o] +# array_prepend with columns +query ? +select array_prepend(column2, column1) from arrays_values; +---- +[1, , 2, 3, 4, 5, 6, 7, 8, 9, 10] +[12, 11, 12, 13, 14, 15, 16, 17, 18, , 20] +[23, 21, 22, 23, , 25, 26, 27, 28, 29, 30] +[34, 31, 32, 33, 34, 35, , 37, 38, 39, 40] +[44] +[, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50] +[55, 51, 52, , 54, 55, 56, 57, 58, 59, 60] +[66, 61, 62, 63, 64, 65, 66, 67, 68, 69, 70] + +# array_prepend with columns and scalars +query ?? +select array_prepend(100.1, column2), array_prepend('.', column3) from arrays; +---- +[100.1, 1.1, 2.2, 3.3] [., L, o, r, e, m] +[100.1, , 5.5, 6.6] [., i, p, , u, m] +[100.1, 7.7, 8.8, 9.9] [., d, , l, o, r] +[100.1, 10.1, , 12.2] [., s, i, t] +[100.1, 13.3, 14.4, 15.5] [., a, m, e, t] +[100.1] [., ,] +[100.1, 16.6, 17.7, 18.8] [.] + ## array_fill # array_fill scalar function #1 @@ -144,37 +330,37 @@ select array_fill(1, make_array()) ## array_concat # array_concat scalar function #1 -query ?? rowsort +query ?? select array_concat(make_array(1, 2, 3), make_array(4, 5, 6), make_array(7, 8, 9)), array_concat(make_array([1], [2]), make_array([3], [4])); ---- [1, 2, 3, 4, 5, 6, 7, 8, 9] [[1], [2], [3], [4]] # array_concat scalar function #2 -query ? rowsort +query ? select array_concat(make_array(make_array(1, 2), make_array(3, 4)), make_array(make_array(5, 6), make_array(7, 8))); ---- [[1, 2], [3, 4], [5, 6], [7, 8]] # array_concat scalar function #3 -query ? rowsort +query ? select array_concat(make_array([1], [2], [3]), make_array([4], [5], [6]), make_array([7], [8], [9])); ---- [[1], [2], [3], [4], [5], [6], [7], [8], [9]] # array_concat scalar function #4 -query ? rowsort +query ? select array_concat(make_array([[1]]), make_array([[2]])); ---- [[[1]], [[2]]] # array_concat scalar function #5 -query ? rowsort +query ? select array_concat(make_array(2, 3), make_array()); ---- [2, 3] # array_concat scalar function #6 -query ? rowsort +query ? select array_concat(make_array(), make_array(2, 3)); ---- [2, 3] @@ -193,12 +379,50 @@ select array_position(['h', 'e', 'l', 'l', 'o'], 'l', 4), array_position([1, 2, ---- 4 5 2 +# array_position with columns +query II +select array_position(column1, column2), array_position(column1, column2, column3) from arrays_values_without_nulls; +---- +1 1 +2 2 +3 3 +4 4 + +# array_position with columns and scalars +query II +select array_position(column1, 3), array_position(column1, 3, 5) from arrays_values_without_nulls; +---- +3 NULL +NULL NULL +NULL NULL +NULL NULL + +## array_positions + # array_positions scalar function -query ??? rowsort +query ??? select array_positions(['h', 'e', 'l', 'l', 'o'], 'l'), array_positions([1, 2, 3, 4, 5], 5), array_positions([1, 1, 1], 1); ---- [3, 4] [5] [1, 2, 3] +# array_positions with columns +query ? +select array_positions(column1, column2) from arrays_values_without_nulls; +---- +[1] +[2] +[3] +[4] + +# array_positions with columns and scalars +query ?? +select array_positions(column1, 4), array_positions(array[1, 2, 23, 13, 33, 45], column2) from arrays_values_without_nulls; +---- +[4] [1] +[] [] +[] [3] +[] [] + ## array_replace # array_replace scalar function @@ -210,7 +434,7 @@ select array_replace(make_array(1, 2, 3, 4), 2, 3), array_replace(make_array(1, ## array_to_string # array_to_string scalar function -query TTT rowsort +query TTT select array_to_string(['h', 'e', 'l', 'l', 'o'], ','), array_to_string([1, 2, 3, 4, 5], '-'), array_to_string([1.0, 2.0, 3.0], '|'); ---- h,e,l,l,o 1-2-3-4-5 1|2|3 @@ -228,13 +452,13 @@ Error during planning: Cannot automatically convert Utf8 to List\(Field \{ name: select array_to_string(make_array(), ',') # array_to_string scalar function with nulls #1 -query TTT rowsort +query TTT select array_to_string(make_array('h', NULL, 'l', NULL, 'o'), ','), array_to_string(make_array(1, NULL, 3, NULL, 5), '-'), array_to_string(make_array(NULL, 2.0, 3.0), '|'); ---- h,l,o 1-3-5 2|3 # array_to_string scalar function with nulls #2 -query TTT rowsort +query TTT select array_to_string(make_array('h', NULL, NULL, NULL, 'o'), ',', '-'), array_to_string(make_array(NULL, 2, NULL, 4, 5), '-', 'nil'), array_to_string(make_array(1.0, NULL, 3.0), '|', '0'); ---- h,-,-,-,o nil-2-nil-4-5 1|0|3 @@ -288,19 +512,19 @@ select trim_array(make_array(), 0), trim_array(make_array(), 1) ## array_length # array_length scalar function -query III rowsort +query III select array_length(make_array(1, 2, 3, 4, 5)), array_length(make_array(1, 2, 3)), array_length(make_array([1, 2], [3, 4], [5, 6])); ---- 5 3 3 # array_length scalar function #2 -query III rowsort +query III select array_length(make_array(1, 2, 3, 4, 5), 1), array_length(make_array(1, 2, 3), 1), array_length(make_array([1, 2], [3, 4], [5, 6]), 1); ---- 5 3 3 # array_length scalar function #3 -query III rowsort +query III select array_length(make_array(1, 2, 3, 4, 5), 2), array_length(make_array(1, 2, 3), 2), array_length(make_array([1, 2], [3, 4], [5, 6]), 2); ---- NULL NULL 2 @@ -312,7 +536,7 @@ Error during planning: Cannot automatically convert List\(Field \{ name: "item", select array_length(array_fill(3, [3, 2, 5]), 1), array_length(array_fill(3, [3, 2, 5]), 2), array_length(array_fill(3, [3, 2, 5]), 3), array_length(array_fill(3, [3, 2, 5]), 4); # array_length scalar function #5 -query III rowsort +query III select array_length(make_array()), array_length(make_array(), 1), array_length(make_array(), 2) ---- 0 0 NULL @@ -337,14 +561,14 @@ caused by Error during planning: Cannot automatically convert List\(Field \{ name: "item", data_type: UInt8, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: \{\} \}\) to UInt8 select array_dims(make_array()), array_dims(make_array(make_array())) +## array_ndims + # array_ndims scalar function -query III rowsort +query III select array_ndims(make_array(1, 2, 3)), array_ndims(make_array([1, 2], [3, 4])), array_ndims(make_array([[[[1], [2]]]])); ---- 1 2 5 -## array_ndims - # array_ndims scalar function #2 query error DataFusion error: SQL error: ParserError\("Expected an SQL statement, found: caused"\) caused by @@ -352,183 +576,106 @@ Error during planning: Cannot automatically convert List\(Field \{ name: "item", select array_ndims(array_fill(1, [1, 2, 3])), array_ndims([[[[[[[[[[[[[[[[[[[[[1]]]]]]]]]]]]]]]]]]]]]); # array_ndims scalar function #3 -query II rowsort +query II select array_ndims(make_array()), array_ndims(make_array(make_array())) ---- 1 2 -## array concatenate opeartor - -# array concatenate operator #1 (like array_concat scalar function) -query ?? rowsort -select make_array(1, 2, 3) || make_array(4, 5, 6) || make_array(7, 8, 9), make_array([1], [2]) || make_array([3], [4]); ----- -[1, 2, 3, 4, 5, 6, 7, 8, 9] [[1], [2], [3], [4]] - -# array concatenate operator #2 (like array_append scalar function) -query ??? rowsort -select make_array(1, 2, 3) || 4, make_array(1.0, 2.0, 3.0) || 4.0, make_array('h', 'e', 'l', 'l') || 'o'; ----- -[1, 2, 3, 4] [1.0, 2.0, 3.0, 4.0] [h, e, l, l, o] - -# array concatenate operator #3 (like array_prepend scalar function) -query ??? rowsort -select 1 || make_array(2, 3, 4), 1.0 || make_array(2.0, 3.0, 4.0), 'h' || make_array('e', 'l', 'l', 'o'); ----- -[1, 2, 3, 4] [1.0, 2.0, 3.0, 4.0] [h, e, l, l, o] - -# make_array - -query ? -select make_array(1, 2.0) ----- -[1.0, 2.0] - -query ? -select make_array(null, 1.0) ----- -[, 1.0] - -query ? -select make_array(1, 2.0, null, 3) ----- -[1.0, 2.0, , 3.0] - -query ? -select make_array(1.0, '2', null) ----- -[1.0, 2, ] - -statement ok -create table foo1 (x int, y double) as values (1, 2.0); - -query ? -select make_array(x, y) from foo1; ----- -[1.0, 2.0] - -statement ok -create table foo2 (x float, y varchar) as values (1.0, '1'); - -query ? -select make_array(x, y) from foo2; ----- -[1.0, 1] - -# array_contains +## array_contains # array_contains scalar function #1 -query BBB rowsort +query BBB select array_contains(make_array(1, 2, 3), make_array(1, 1, 2, 3)), array_contains([1, 2, 3], [1, 1, 2]), array_contains([1, 2, 3], [2, 1, 3, 1]); ---- true true true # array_contains scalar function #2 -query BB rowsort +query BB select array_contains([[1, 2], [3, 4]], [[1, 2], [3, 4], [1, 3]]), array_contains([[[1], [2]], [[3], [4]]], [1, 2, 2, 3, 4]); ---- true true # array_contains scalar function #3 -query BBB rowsort +query BBB select array_contains(make_array(1, 2, 3), make_array(1, 2, 3, 4)), array_contains([1, 2, 3], [1, 1, 4]), array_contains([1, 2, 3], [2, 1, 3, 4]); ---- false false false # array_contains scalar function #4 -query BB rowsort +query BB select array_contains([[1, 2], [3, 4]], [[1, 2], [3, 4], [1, 5]]), array_contains([[[1], [2]], [[3], [4]]], [1, 2, 2, 3, 5]); ---- false false # array_contains scalar function #5 -query BB rowsort +query BB select array_contains([true, true, false, true, false], [true, false, false]), array_contains([true, false, true], [true, true]); ---- true true # array_contains scalar function #6 -query BB rowsort +query BB select array_contains(make_array(true, true, true), make_array(false, false)), array_contains([false, false, false], [true, true]); ---- false false -## array_contains array +### Array operators tests -statement ok -CREATE TABLE t -AS VALUES -(make_array(1,2,3), make_array(1,2,3)), -(make_array(1,2,3), make_array(2,3)), -(make_array(2,3), make_array(1,2,3)), -(null, make_array(1,2,3)), -(make_array(2,3), null) -; +## array concatenate operator +# array concatenate operator with scalars #1 (like array_concat scalar function) query ?? -SELECT - column1, - column2 -FROM t +select make_array(1, 2, 3) || make_array(4, 5, 6) || make_array(7, 8, 9), make_array([1], [2]) || make_array([3], [4]); ---- -[1, 2, 3] [1, 2, 3] -[1, 2, 3] [2, 3] -[2, 3] [1, 2, 3] -NULL [1, 2, 3] -[2, 3] NULL - +[1, 2, 3, 4, 5, 6, 7, 8, 9] [[1], [2], [3], [4]] -# incorrect answer (one row) to https://github.com/apache/arrow-datafusion/issues/6709 +# array concatenate operator with scalars #2 (like array_append scalar function) +query ??? +select make_array(1, 2, 3) || 4, make_array(1.0, 2.0, 3.0) || 4.0, make_array('h', 'e', 'l', 'l') || 'o'; +---- +[1, 2, 3, 4] [1.0, 2.0, 3.0, 4.0] [h, e, l, l, o] -query BB -SELECT - array_contains(column1, column2) as c12, - array_contains(column1, column2) as c21 -FROM t +# array concatenate operator with scalars #3 (like array_prepend scalar function) +query ??? +select 1 || make_array(2, 3, 4), 1.0 || make_array(2.0, 3.0, 4.0), 'h' || make_array('e', 'l', 'l', 'o'); ---- -true true +[1, 2, 3, 4] [1.0, 2.0, 3.0, 4.0] [h, e, l, l, o] -statement ok -drop table t +### Array casting tests -## array_contains array (nested) +## make_array +# make_array scalar function #1 +query ? +select make_array(1, 2.0) +---- +[1.0, 2.0] -statement ok -CREATE TABLE t -AS VALUES -(make_array(make_array(2),make_array(3,4)), make_array(3,4)), -(make_array(make_array(2),make_array(3,4)), make_array(4,3)), -(make_array(make_array(2),make_array(3,4)), make_array(3)), -(make_array(make_array(2),make_array(3,4)), make_array(2)) -; +# make_array scalar function #2 +query ? +select make_array(null, 1.0) +---- +[, 1.0] +# make_array scalar function #3 +query ? +select make_array(1, 2.0, null, 3) +---- +[1.0, 2.0, , 3.0] -query ?? -SELECT - column1, - column2 -FROM t +# make_array scalar function #4 +query ? +select make_array(1.0, '2', null) ---- -[[2], [3, 4]] [3, 4] -[[2], [3, 4]] [4, 3] -[[2], [3, 4]] [3] -[[2], [3, 4]] [2] +[1.0, 2, ] -# incorrect answer (one row) to https://github.com/apache/arrow-datafusion/issues/6709 +### FixedSizeListArray -query BB -SELECT - array_contains(column1, column2) as c12, - array_contains(column1, column2) as c21 -FROM t ----- -true true statement ok CREATE EXTERNAL TABLE fixed_size_list_array STORED AS PARQUET LOCATION 'tests/data/fixed_size_list_array.parquet'; @@ -566,3 +713,19 @@ query ? select make_array(f0) from fixed_size_list_array ---- [[1, 2], [3, 4]] + + +### Delete tables + + +statement ok +drop table values; + +statement ok +drop table arrays; + +statement ok +drop table arrays_values; + +statement ok +drop table arrays_values_without_nulls; diff --git a/datafusion/expr/src/built_in_function.rs b/datafusion/expr/src/built_in_function.rs index 69054622757d6..103f1047fa48c 100644 --- a/datafusion/expr/src/built_in_function.rs +++ b/datafusion/expr/src/built_in_function.rs @@ -488,9 +488,9 @@ impl BuiltinScalarFunction { )))), BuiltinScalarFunction::ArrayLength => Ok(UInt8), BuiltinScalarFunction::ArrayNdims => Ok(UInt8), - BuiltinScalarFunction::ArrayPosition => Ok(UInt8), + BuiltinScalarFunction::ArrayPosition => Ok(UInt64), BuiltinScalarFunction::ArrayPositions => { - Ok(List(Arc::new(Field::new("item", UInt8, true)))) + Ok(List(Arc::new(Field::new("item", UInt64, true)))) } BuiltinScalarFunction::ArrayPrepend => Ok(List(Arc::new(Field::new( "item", diff --git a/datafusion/physical-expr/src/array_expressions.rs b/datafusion/physical-expr/src/array_expressions.rs index bddeef526a4d0..cd174918db379 100644 --- a/datafusion/physical-expr/src/array_expressions.rs +++ b/datafusion/physical-expr/src/array_expressions.rs @@ -18,7 +18,7 @@ //! Array expressions use arrow::array::*; -use arrow::buffer::Buffer; +use arrow::buffer::{Buffer, OffsetBuffer}; use arrow::compute; use arrow::datatypes::{DataType, Field}; use core::any::type_name; @@ -197,15 +197,53 @@ pub fn make_array(values: &[ColumnarValue]) -> Result { macro_rules! append { ($ARRAY:expr, $ELEMENT:expr, $ARRAY_TYPE:ident) => {{ - let child_array = - downcast_arg!(downcast_arg!($ARRAY, ListArray).values(), $ARRAY_TYPE); + let mut offsets: Vec = vec![0]; + let mut values = + downcast_arg!(new_empty_array($ELEMENT.data_type()), $ARRAY_TYPE).clone(); + let element = downcast_arg!($ELEMENT, $ARRAY_TYPE); - let cat = compute::concat(&[child_array, element])?; - let mut scalars = vec![]; - for i in 0..cat.len() { - scalars.push(ColumnarValue::Scalar(ScalarValue::try_from_array(&cat, i)?)); + for (arr, el) in $ARRAY.iter().zip(element.iter()) { + let last_offset: i32 = offsets.last().copied().ok_or_else(|| { + DataFusionError::Internal(format!("offsets should not be empty",)) + })?; + match arr { + Some(arr) => { + let child_array = downcast_arg!(arr, $ARRAY_TYPE); + values = downcast_arg!( + compute::concat(&[ + &values, + child_array, + &$ARRAY_TYPE::from(vec![el]) + ])? + .clone(), + $ARRAY_TYPE + ) + .clone(); + offsets.push(last_offset + child_array.len() as i32 + 1i32); + } + None => { + values = downcast_arg!( + compute::concat(&[ + &values, + &$ARRAY_TYPE::from(vec![el.clone()]) + ])? + .clone(), + $ARRAY_TYPE + ) + .clone(); + offsets.push(last_offset + 1i32); + } + } } - scalars + + let field = Arc::new(Field::new("item", $ELEMENT.data_type().clone(), true)); + + Arc::new(ListArray::try_new( + field, + OffsetBuffer::new(offsets.into()), + Arc::new(values), + None, + )?) }}; } @@ -221,7 +259,7 @@ pub fn array_append(args: &[ArrayRef]) -> Result { let arr = as_list_array(&args[0])?; let element = &args[1]; - let scalars = match (arr.value_type(), element.data_type()) { + let res = match (arr.value_type(), element.data_type()) { (DataType::Utf8, DataType::Utf8) => append!(arr, element, StringArray), (DataType::LargeUtf8, DataType::LargeUtf8) => append!(arr, element, LargeStringArray), (DataType::Boolean, DataType::Boolean) => append!(arr, element, BooleanArray), @@ -243,20 +281,58 @@ pub fn array_append(args: &[ArrayRef]) -> Result { } }; - Ok(array(scalars.as_slice())?.into_array(1)) + Ok(res) } macro_rules! prepend { ($ARRAY:expr, $ELEMENT:expr, $ARRAY_TYPE:ident) => {{ - let child_array = - downcast_arg!(downcast_arg!($ARRAY, ListArray).values(), $ARRAY_TYPE); + let mut offsets: Vec = vec![0]; + let mut values = + downcast_arg!(new_empty_array($ELEMENT.data_type()), $ARRAY_TYPE).clone(); + let element = downcast_arg!($ELEMENT, $ARRAY_TYPE); - let cat = compute::concat(&[element, child_array])?; - let mut scalars = vec![]; - for i in 0..cat.len() { - scalars.push(ColumnarValue::Scalar(ScalarValue::try_from_array(&cat, i)?)); + for (arr, el) in $ARRAY.iter().zip(element.iter()) { + let last_offset: i32 = offsets.last().copied().ok_or_else(|| { + DataFusionError::Internal(format!("offsets should not be empty",)) + })?; + match arr { + Some(arr) => { + let child_array = downcast_arg!(arr, $ARRAY_TYPE); + values = downcast_arg!( + compute::concat(&[ + &values, + &$ARRAY_TYPE::from(vec![el]), + child_array + ])? + .clone(), + $ARRAY_TYPE + ) + .clone(); + offsets.push(last_offset + child_array.len() as i32 + 1i32); + } + None => { + values = downcast_arg!( + compute::concat(&[ + &values, + &$ARRAY_TYPE::from(vec![el.clone()]) + ])? + .clone(), + $ARRAY_TYPE + ) + .clone(); + offsets.push(last_offset + 1i32); + } + } } - scalars + + let field = Arc::new(Field::new("item", $ELEMENT.data_type().clone(), true)); + + Arc::new(ListArray::try_new( + field, + OffsetBuffer::new(offsets.into()), + Arc::new(values), + None, + )?) }}; } @@ -272,7 +348,7 @@ pub fn array_prepend(args: &[ArrayRef]) -> Result { let element = &args[0]; let arr = as_list_array(&args[1])?; - let scalars = match (arr.value_type(), element.data_type()) { + let res = match (arr.value_type(), element.data_type()) { (DataType::Utf8, DataType::Utf8) => prepend!(arr, element, StringArray), (DataType::LargeUtf8, DataType::LargeUtf8) => prepend!(arr, element, LargeStringArray), (DataType::Boolean, DataType::Boolean) => prepend!(arr, element, BooleanArray), @@ -294,7 +370,7 @@ pub fn array_prepend(args: &[ArrayRef]) -> Result { } }; - Ok(array(scalars.as_slice())?.into_array(1)) + Ok(res) } /// Array_concat/Array_cat SQL function @@ -420,74 +496,58 @@ pub fn array_fill(args: &[ColumnarValue]) -> Result { macro_rules! position { ($ARRAY:expr, $ELEMENT:expr, $INDEX:expr, $ARRAY_TYPE:ident) => {{ - let child_array = - downcast_arg!(downcast_arg!($ARRAY, ListArray).values(), $ARRAY_TYPE); - let element = downcast_arg!($ELEMENT, $ARRAY_TYPE).value(0); - - match child_array + let element = downcast_arg!($ELEMENT, $ARRAY_TYPE); + $ARRAY .iter() - .skip($INDEX) - .position(|x| x == Some(element)) - { - Some(value) => Ok(ColumnarValue::Scalar(ScalarValue::UInt8(Some( - (value + $INDEX + 1) as u8, - )))), - None => Ok(ColumnarValue::Scalar(ScalarValue::Null)), - } + .zip(element.iter()) + .zip($INDEX.iter()) + .map(|((arr, el), i)| { + let index = match i { + Some(i) => { + if i <= 0 { + 0 + } else { + i - 1 + } + } + None => { + return Err(DataFusionError::Execution( + "initial position must not be null".to_string(), + )) + } + }; + + match arr { + Some(arr) => { + let child_array = downcast_arg!(arr, $ARRAY_TYPE); + + match child_array + .iter() + .skip(index as usize) + .position(|x| x == el) + { + Some(value) => Ok(Some(value as u64 + index as u64 + 1u64)), + None => Ok(None), + } + } + None => Ok(None), + } + }) + .collect::>()? }}; } /// Array_position SQL function -pub fn array_position(args: &[ColumnarValue]) -> Result { - let arr = match &args[0] { - ColumnarValue::Scalar(scalar) => scalar.to_array().clone(), - ColumnarValue::Array(arr) => arr.clone(), - }; - - let element = match &args[1] { - ColumnarValue::Scalar(scalar) => scalar.to_array().clone(), - _ => { - return Err(DataFusionError::Internal( - "Array_position function requires scalar element".to_string(), - )) - } - }; +pub fn array_position(args: &[ArrayRef]) -> Result { + let arr = as_list_array(&args[0])?; + let element = &args[1]; - let mut index: usize = 0; + let mut index = Int64Array::from_value(0, arr.len()); if args.len() == 3 { - let scalar = - match &args[2] { - ColumnarValue::Scalar(scalar) => scalar.clone(), - _ => return Err(DataFusionError::Internal( - "Array_position function requires positive integer scalar element" - .to_string(), - )), - }; - - index = - match scalar { - ScalarValue::Int8(Some(value)) => value as usize, - ScalarValue::Int16(Some(value)) => value as usize, - ScalarValue::Int32(Some(value)) => value as usize, - ScalarValue::Int64(Some(value)) => value as usize, - ScalarValue::UInt8(Some(value)) => value as usize, - ScalarValue::UInt16(Some(value)) => value as usize, - ScalarValue::UInt32(Some(value)) => value as usize, - ScalarValue::UInt64(Some(value)) => value as usize, - _ => return Err(DataFusionError::Internal( - "Array_position function requires positive integer scalar element" - .to_string(), - )), - }; - - if index == 0 { - index = 0; - } else { - index -= 1; - } + index = as_int64_array(&args[2])?.clone(); } - match arr.data_type() { + let res = match arr.data_type() { DataType::List(field) => match field.data_type() { DataType::Utf8 => position!(arr, element, index, StringArray), DataType::LargeUtf8 => position!(arr, element, index, LargeStringArray), @@ -502,50 +562,75 @@ pub fn array_position(args: &[ColumnarValue]) -> Result { DataType::UInt16 => position!(arr, element, index, UInt16Array), DataType::UInt32 => position!(arr, element, index, UInt32Array), DataType::UInt64 => position!(arr, element, index, UInt64Array), - data_type => Err(DataFusionError::NotImplemented(format!( - "Array_position is not implemented for types '{data_type:?}'." - ))), + data_type => { + return Err(DataFusionError::NotImplemented(format!( + "Array_position is not implemented for types '{data_type:?}'." + ))) + } }, - data_type => Err(DataFusionError::NotImplemented(format!( - "Array is not type '{data_type:?}'." - ))), - } + data_type => { + return Err(DataFusionError::NotImplemented(format!( + "Array is not type '{data_type:?}'." + ))) + } + }; + + Ok(Arc::new(res)) } macro_rules! positions { ($ARRAY:expr, $ELEMENT:expr, $ARRAY_TYPE:ident) => {{ - let child_array = - downcast_arg!(downcast_arg!($ARRAY, ListArray).values(), $ARRAY_TYPE); - let element = downcast_arg!($ELEMENT, $ARRAY_TYPE).value(0); - - let mut res = vec![]; - for (i, x) in child_array.iter().enumerate() { - if x == Some(element) { - res.push(ColumnarValue::Array(Arc::new(UInt8Array::from(vec![ - Some((i + 1) as u8), - ])))); - } + let element = downcast_arg!($ELEMENT, $ARRAY_TYPE); + let mut offsets: Vec = vec![0]; + let mut values = + downcast_arg!(new_empty_array(&DataType::UInt64), UInt64Array).clone(); + for comp in $ARRAY + .iter() + .zip(element.iter()) + .map(|(arr, el)| match arr { + Some(arr) => { + let child_array = downcast_arg!(arr, $ARRAY_TYPE); + let res = child_array + .iter() + .enumerate() + .filter(|(_, x)| *x == el) + .flat_map(|(i, _)| Some((i + 1) as u64)) + .collect::(); + + Ok(res) + } + None => Ok(downcast_arg!( + new_empty_array(&DataType::UInt64), + UInt64Array + ) + .clone()), + }) + .collect::>>()? + { + let last_offset: i32 = offsets.last().copied().ok_or_else(|| { + DataFusionError::Internal(format!("offsets should not be empty",)) + })?; + values = + downcast_arg!(compute::concat(&[&values, &comp,])?.clone(), UInt64Array) + .clone(); + offsets.push(last_offset + comp.len() as i32); } - res + let field = Arc::new(Field::new("item", DataType::UInt64, true)); + + Arc::new(ListArray::try_new( + field, + OffsetBuffer::new(offsets.into()), + Arc::new(values), + None, + )?) }}; } /// Array_positions SQL function -pub fn array_positions(args: &[ColumnarValue]) -> Result { - let arr = match &args[0] { - ColumnarValue::Scalar(scalar) => scalar.to_array().clone(), - ColumnarValue::Array(arr) => arr.clone(), - }; - - let element = match &args[1] { - ColumnarValue::Scalar(scalar) => scalar.to_array().clone(), - _ => { - return Err(DataFusionError::Internal( - "Array_positions function requires scalar element".to_string(), - )) - } - }; +pub fn array_positions(args: &[ArrayRef]) -> Result { + let arr = as_list_array(&args[0])?; + let element = &args[1]; let res = match arr.data_type() { DataType::List(field) => match field.data_type() { @@ -575,7 +660,7 @@ pub fn array_positions(args: &[ColumnarValue]) -> Result { } }; - array(res.as_slice()) + Ok(res) } macro_rules! remove { @@ -1465,29 +1550,22 @@ mod tests { #[test] fn test_array_position() { // array_position([1, 2, 3, 4], 3) = 3 - let list_array = return_array(); - let array = array_position(&[ - list_array, - ColumnarValue::Scalar(ScalarValue::Int64(Some(3))), - ]) - .expect("failed to initialize function array_position") - .into_array(1); - let result = - as_uint8_array(&array).expect("failed to initialize function array_position"); + let list_array = return_array().into_array(1); + let array = array_position(&[list_array, Arc::new(Int64Array::from_value(3, 1))]) + .expect("failed to initialize function array_position"); + let result = as_uint64_array(&array) + .expect("failed to initialize function array_position"); - assert_eq!(result, &UInt8Array::from(vec![3])); + assert_eq!(result, &UInt64Array::from(vec![3])); } #[test] fn test_array_positions() { // array_positions([1, 2, 3, 4], 3) = [3] - let list_array = return_array(); - let array = array_positions(&[ - list_array, - ColumnarValue::Scalar(ScalarValue::Int64(Some(3))), - ]) - .expect("failed to initialize function array_position") - .into_array(1); + let list_array = return_array().into_array(1); + let array = + array_positions(&[list_array, Arc::new(Int64Array::from_value(3, 1))]) + .expect("failed to initialize function array_position"); let result = as_list_array(&array).expect("failed to initialize function array_position"); @@ -1497,7 +1575,7 @@ mod tests { result .value(0) .as_any() - .downcast_ref::() + .downcast_ref::() .unwrap() .values() ); diff --git a/datafusion/physical-expr/src/functions.rs b/datafusion/physical-expr/src/functions.rs index 08916d89c9862..215582a1a8be9 100644 --- a/datafusion/physical-expr/src/functions.rs +++ b/datafusion/physical-expr/src/functions.rs @@ -423,11 +423,11 @@ pub fn create_physical_fun( Arc::new(|args| make_scalar_function(array_expressions::array_ndims)(args)) } BuiltinScalarFunction::ArrayPosition => { - Arc::new(array_expressions::array_position) - } - BuiltinScalarFunction::ArrayPositions => { - Arc::new(array_expressions::array_positions) + Arc::new(|args| make_scalar_function(array_expressions::array_position)(args)) } + BuiltinScalarFunction::ArrayPositions => Arc::new(|args| { + make_scalar_function(array_expressions::array_positions)(args) + }), BuiltinScalarFunction::ArrayPrepend => { Arc::new(|args| make_scalar_function(array_expressions::array_prepend)(args)) }