From 2941e7196c6713f21008909d9abc331a37c6fdc7 Mon Sep 17 00:00:00 2001 From: izveigor Date: Fri, 30 Jun 2023 12:16:46 +0300 Subject: [PATCH 1/4] test: sqllogictests with columns for array_append, array_prepend, array_position and array_positions --- .../tests/sqllogictests/test_files/array.slt | 478 ++++++++++++------ 1 file changed, 315 insertions(+), 163 deletions(-) diff --git a/datafusion/core/tests/sqllogictests/test_files/array.slt b/datafusion/core/tests/sqllogictests/test_files/array.slt index 0d99e6cbb3a1d..dedbe025150af 100644 --- a/datafusion/core/tests/sqllogictests/test_files/array.slt +++ b/datafusion/core/tests/sqllogictests/test_files/array.slt @@ -19,108 +19,216 @@ ## Array expressions Tests ############# + +### Tables + + +statement ok +CREATE TABLE values( + a INT, + b INT, + c INT, + d FLOAT, + e VARCHAR +) AS VALUES + (1, 1, 2, 1.1, 'Lorem'), + (2, 3, 4, 2.2, 'ipsum'), + (3, 5, 6, 3.3, 'dolor'), + (4, 7, 8, 4.4, 'sit') +; + +statement ok +CREATE TABLE arrays +AS VALUES + (make_array(make_array(1,2),make_array(3,4)), make_array(1.1,2.2,3.3), make_array('L', 'o', 'r', 'e', 'm')), + (make_array(make_array(3,4),make_array(5,6)), make_array(4.4,5.5,6.6), make_array('i', 'p', 's', 'u', 'm')), + (make_array(make_array(5,6),make_array(7,8)), make_array(7.7,8.8,9.9), make_array('d', 'o', 'l', 'o', 'r')), + (make_array(make_array(7,8),make_array(9,10)), make_array(10.1,11.1,12.2), make_array('s', 'i', 't')) +; + +statement ok +CREATE TABLE arrays_values +AS VALUES + (make_array(1, 2, 3, 4, 5, 6, 7, 8, 9, 10), 1, 1, ','), + (make_array(11, 12, 13, 14, 15, 16, 17, 18, 19, 20), 12, 2, '.'), + (make_array(21, 22, 23, 24, 25, 26, 27, 28, 29, 30), 23, 3, '-'), + (make_array(31, 32, 33, 34, 35, 36, 37, 38, 39, 40), 34, 4, 'ok') +; + +# arrays table +query ??? +select column1, column2, column3 from arrays; +---- +[[1, 2], [3, 4]] [1.1, 2.2, 3.3] [L, o, r, e, m] +[[3, 4], [5, 6]] [4.4, 5.5, 6.6] [i, p, s, u, m] +[[5, 6], [7, 8]] [7.7, 8.8, 9.9] [d, o, l, o, r] +[[7, 8], [9, 10]] [10.1, 11.1, 12.2] [s, i, t] + +# values table +query IIIRT +select a, b, c, d, e from values; +---- +1 1 2 1.1 Lorem +2 3 4 2.2 ipsum +3 5 6 3.3 dolor +4 7 8 4.4 sit + +# arrays_values table +query ?II +select column1, column2, column3 from arrays_values; +---- +[1, 2, 3, 4, 5, 6, 7, 8, 9, 10] 1 1 +[11, 12, 13, 14, 15, 16, 17, 18, 19, 20] 12 2 +[21, 22, 23, 24, 25, 26, 27, 28, 29, 30] 23 3 +[31, 32, 33, 34, 35, 36, 37, 38, 39, 40] 34 4 + +### Array function tests + + ## make_array -# array scalar function #1 -query ??? rowsort +# make_array scalar function #1 +query ??? select make_array(1, 2, 3), make_array(1.0, 2.0, 3.0), make_array('h', 'e', 'l', 'l', 'o'); ---- [1, 2, 3] [1.0, 2.0, 3.0] [h, e, l, l, o] -# array scalar function #2 -query ??? rowsort +# make_array scalar function #2 +query ??? select make_array(1, 2, 3), make_array(make_array(1, 2), make_array(3, 4)), make_array([[[[1], [2]]]]); ---- [1, 2, 3] [[1, 2], [3, 4]] [[[[[1], [2]]]]] -# array scalar function #3 -query ?? rowsort +# make_array scalar function #3 +query ?? select make_array([1, 2, 3], [4, 5, 6], [7, 8, 9]), make_array([[1, 2], [3, 4]], [[5, 6], [7, 8]]); ---- [[1, 2, 3], [4, 5, 6], [7, 8, 9]] [[[1, 2], [3, 4]], [[5, 6], [7, 8]]] -# array scalar function #4 -query ?? rowsort +# make_array scalar function #4 +query ?? select make_array([1.0, 2.0], [3.0, 4.0]), make_array('h', 'e', 'l', 'l', 'o'); ---- [[1.0, 2.0], [3.0, 4.0]] [h, e, l, l, o] -# array scalar function #5 -query ? rowsort +# make_array scalar function #5 +query ? select make_array(make_array(make_array(make_array(1, 2, 3), make_array(4, 5, 6)), make_array(make_array(7, 8, 9), make_array(10, 11, 12)))) ---- [[[[1, 2, 3], [4, 5, 6]], [[7, 8, 9], [10, 11, 12]]]] -# array scalar function #6 -query ? rowsort +# make_array scalar function #6 +query ? select make_array() ---- [] -# array scalar function #7 -query ?? rowsort +# make_array scalar function #7 +query ?? select make_array(make_array()), make_array(make_array(make_array())) ---- [[]] [[[]]] -# array scalar function with nulls -query ??? rowsort +# make_array scalar function with nulls +query ??? select make_array(1, NULL, 3), make_array(NULL, 2.0, NULL), make_array('h', NULL, 'l', NULL, 'o'); ---- [1, , 3] [, 2.0, ] [h, , l, , o] -# array scalar function with nulls #2 -query ?? rowsort +# make_array scalar function with nulls #2 +query ?? select make_array(1, 2, NULL), make_array(make_array(NULL, 2), make_array(NULL, 3)); ---- [1, 2, ] [[, 2], [, 3]] -# array scalar function with nulls #3 -query ??? rowsort +# make_array scalar function with nulls #3 +query ??? select make_array(NULL), make_array(NULL, NULL, NULL), make_array(make_array(NULL, NULL), make_array(NULL, NULL)); ---- [] [] [[], []] +# make_array with columns #1 +query ???? +select make_array(a), make_array(b, c), make_array(d), make_array(e) from values; +---- +[1] [1, 2] [1.1] [Lorem] +[2] [3, 4] [2.2] [ipsum] +[3] [5, 6] [3.3] [dolor] +[4] [7, 8] [4.4] [sit] + +# make_array with columns #2 +query ? +select make_array(a, b, c, d) from values; +---- +[1.0, 1.0, 2.0, 1.1] +[2.0, 3.0, 4.0, 2.2] +[3.0, 5.0, 6.0, 3.3] +[4.0, 7.0, 8.0, 4.4] + ## array_append # array_append scalar function #2 -query ? rowsort +query ? select array_append(make_array(), 4); ---- [4] # array_append scalar function #2 -query ?? rowsort +query ?? select array_append(make_array(), make_array()), array_append(make_array(), make_array(4)); ---- [[]] [[4]] # array_append scalar function #3 -query ??? rowsort +query ??? select array_append(make_array(1, 2, 3), 4), array_append(make_array(1.0, 2.0, 3.0), 4.0), array_append(make_array('h', 'e', 'l', 'l'), 'o'); ---- [1, 2, 3, 4] [1.0, 2.0, 3.0, 4.0] [h, e, l, l, o] +# array_append with columns +query ? +select array_append(column1, column2) from arrays_values; +---- +[1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 1, 12, 23, 34] + +# array_append with columns and scalars +query ?? +select array_append(column2, 100.1), array_append(column3, '.') from arrays; +---- +[1.1, 2.2, 3.3, 4.4, 5.5, 6.6, 7.7, 8.8, 9.9, 10.1, 11.1, 12.2, 100.1, 100.1, 100.1, 100.1] [L, o, r, e, m, i, p, s, u, m, d, o, l, o, r, s, i, t, ., ., ., .] + ## array_prepend # array_prepend scalar function #1 -query ? rowsort +query ? select array_prepend(4, make_array()); ---- [4] # array_prepend scalar function #2 -query ?? rowsort +query ?? select array_prepend(make_array(), make_array()), array_prepend(make_array(4), make_array()); ---- [[]] [[4]] # array_prepend scalar function #3 -query ??? rowsort +query ??? select array_prepend(1, make_array(2, 3, 4)), array_prepend(1.0, make_array(2.0, 3.0, 4.0)), array_prepend('h', make_array('e', 'l', 'l', 'o')); ---- [1, 2, 3, 4] [1.0, 2.0, 3.0, 4.0] [h, e, l, l, o] +# array_prepend with columns +query ? +select array_prepend(column2, column1) from arrays_values; +---- +[1, 12, 23, 34, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40] + +# array_prepend with columns and scalars +query ?? +select array_prepend(100.1, column2), array_prepend('.', column3) from arrays; +---- +[100.1, 100.1, 100.1, 100.1, 1.1, 2.2, 3.3, 4.4, 5.5, 6.6, 7.7, 8.8, 9.9, 10.1, 11.1, 12.2] [., ., ., ., L, o, r, e, m, i, p, s, u, m, d, o, l, o, r, s, i, t] + ## array_fill # array_fill scalar function #1 @@ -144,41 +252,51 @@ select array_fill(1, make_array()) ## array_concat # array_concat scalar function #1 -query ?? rowsort +query ?? select array_concat(make_array(1, 2, 3), make_array(4, 5, 6), make_array(7, 8, 9)), array_concat(make_array([1], [2]), make_array([3], [4])); ---- [1, 2, 3, 4, 5, 6, 7, 8, 9] [[1], [2], [3], [4]] # array_concat scalar function #2 -query ? rowsort +query ? select array_concat(make_array(make_array(1, 2), make_array(3, 4)), make_array(make_array(5, 6), make_array(7, 8))); ---- [[1, 2], [3, 4], [5, 6], [7, 8]] # array_concat scalar function #3 -query ? rowsort +query ? select array_concat(make_array([1], [2], [3]), make_array([4], [5], [6]), make_array([7], [8], [9])); ---- [[1], [2], [3], [4], [5], [6], [7], [8], [9]] # array_concat scalar function #4 -query ? rowsort +query ? select array_concat(make_array([[1]]), make_array([[2]])); ---- [[[1]], [[2]]] # array_concat scalar function #5 -query ? rowsort +query ? select array_concat(make_array(2, 3), make_array()); ---- [2, 3] # array_concat scalar function #6 -query ? rowsort +query ? select array_concat(make_array(), make_array(2, 3)); ---- [2, 3] +# array_concat with columns +query ??? +select array_concat(column1, column2), array_concat(column2, column2), array_concat(column3, column3) from arrays; +---- + +# array_concat with columns and scalars +query ??? +select array_concat(column1, make_array(make_array(1, 2), make_array(3, 4))), array_concat(column2, make_array(1.1, 2.2, 3.3)), array_concat(column3, make_array('.', '.', '.')) from arrays; +---- + ## array_position # array_position scalar function #1 @@ -193,12 +311,34 @@ select array_position(['h', 'e', 'l', 'l', 'o'], 'l', 4), array_position([1, 2, ---- 4 5 2 +# array_position with columns +query ??? +select array_position(column1, column2), array_position(column1, column2, column3) from arrays_values; +---- + +# array_position with columns and scalars +query ??? +select array_position(column1, 3), array_position(column1, 3, 5) from arrays_values; +---- + +## array_positions + # array_positions scalar function -query ??? rowsort +query ??? select array_positions(['h', 'e', 'l', 'l', 'o'], 'l'), array_positions([1, 2, 3, 4, 5], 5), array_positions([1, 1, 1], 1); ---- [3, 4] [5] [1, 2, 3] +# array_positions with columns +query ??? +select array_positions(column1, column2) from arrays_values; +---- + +# array_positions with columns and scalars +query ??? +select array_positions(column1, 4), array_positions(array[1, 2, 23, 13, 33, 45], column2) from arrays_values; +---- + ## array_replace # array_replace scalar function @@ -207,10 +347,20 @@ select array_replace(make_array(1, 2, 3, 4), 2, 3), array_replace(make_array(1, ---- [1, 3, 3, 4] [1, 0, 0, 5, 0, 6, 7] [1, 2, 3] +# array_replace with columns +query ??? +select array_replace(column1, column2, column3) from arrays_values; +---- + +# array_replace with columns and scalars +query ??? +select array_replace(column1, column2, 55), array_replace(column1, 22, column3), array_replace(array[1, 2, 3, 4, 5, 6, 7, 8, 9, 10], column2, column3) from arrays_values; +---- + ## array_to_string # array_to_string scalar function -query TTT rowsort +query TTT select array_to_string(['h', 'e', 'l', 'l', 'o'], ','), array_to_string([1, 2, 3, 4, 5], '-'), array_to_string([1.0, 2.0, 3.0], '|'); ---- h,e,l,l,o 1-2-3-4-5 1|2|3 @@ -228,17 +378,27 @@ Error during planning: Cannot automatically convert Utf8 to List\(Field \{ name: select array_to_string(make_array(), ',') # array_to_string scalar function with nulls #1 -query TTT rowsort +query TTT select array_to_string(make_array('h', NULL, 'l', NULL, 'o'), ','), array_to_string(make_array(1, NULL, 3, NULL, 5), '-'), array_to_string(make_array(NULL, 2.0, 3.0), '|'); ---- h,l,o 1-3-5 2|3 # array_to_string scalar function with nulls #2 -query TTT rowsort +query TTT select array_to_string(make_array('h', NULL, NULL, NULL, 'o'), ',', '-'), array_to_string(make_array(NULL, 2, NULL, 4, 5), '-', 'nil'), array_to_string(make_array(1.0, NULL, 3.0), '|', '0'); ---- h,-,-,-,o nil-2-nil-4-5 1|0|3 +# array_to_string with columns +query ? +select array_to_string(column1, column4) from arrays_values; +---- + +# array_to_string with columns and scalars +query ? +select array_to_string(column1, '_') from arrays_values; +---- + ## cardinality # cardinality scalar function @@ -259,6 +419,11 @@ select cardinality(make_array()), cardinality(make_array(make_array())) ---- 0 0 +# cardinality with columns +query I +select cardinality(column1), cardinality(column2), cardinality(column3) from arr; +---- + ## trim_array # trim_array scalar function #1 @@ -285,22 +450,32 @@ select trim_array(make_array(), 0), trim_array(make_array(), 1) ---- [] [] +# trim_array with columns +query ? +select trim_array(column1, column3) from arrays_values; +---- + +# trim_array with columns and scalars +query ?? +select trim_array(column1, 5), trim_array(array[1, 2, 3, 4, 5, 6, 7, 8, 9, 10], column3) from arrays_values; +---- + ## array_length # array_length scalar function -query III rowsort +query III select array_length(make_array(1, 2, 3, 4, 5)), array_length(make_array(1, 2, 3)), array_length(make_array([1, 2], [3, 4], [5, 6])); ---- 5 3 3 # array_length scalar function #2 -query III rowsort +query III select array_length(make_array(1, 2, 3, 4, 5), 1), array_length(make_array(1, 2, 3), 1), array_length(make_array([1, 2], [3, 4], [5, 6]), 1); ---- 5 3 3 # array_length scalar function #3 -query III rowsort +query III select array_length(make_array(1, 2, 3, 4, 5), 2), array_length(make_array(1, 2, 3), 2), array_length(make_array([1, 2], [3, 4], [5, 6]), 2); ---- NULL NULL 2 @@ -312,11 +487,21 @@ Error during planning: Cannot automatically convert List\(Field \{ name: "item", select array_length(array_fill(3, [3, 2, 5]), 1), array_length(array_fill(3, [3, 2, 5]), 2), array_length(array_fill(3, [3, 2, 5]), 3), array_length(array_fill(3, [3, 2, 5]), 4); # array_length scalar function #5 -query III rowsort +query III select array_length(make_array()), array_length(make_array(), 1), array_length(make_array(), 2) ---- 0 0 NULL +# array_length with columns +query ? +select array_length(column1, column3) from arrays_values; +---- + +# array_length with columns and scalars +query ??? +select array_length(array[array[1, 2], array[3, 4]], column3), array_length(column1, 1) from arrays_values; +---- + ## array_dims # array_dims scalar function @@ -337,14 +522,19 @@ caused by Error during planning: Cannot automatically convert List\(Field \{ name: "item", data_type: UInt8, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: \{\} \}\) to UInt8 select array_dims(make_array()), array_dims(make_array(make_array())) +# array_dims with columns +query I +select array_dims(column1), array_dims(column2), array_dims(column3) from arrays; +---- + +## array_ndims + # array_ndims scalar function -query III rowsort +query III select array_ndims(make_array(1, 2, 3)), array_ndims(make_array([1, 2], [3, 4])), array_ndims(make_array([[[[1], [2]]]])); ---- 1 2 5 -## array_ndims - # array_ndims scalar function #2 query error DataFusion error: SQL error: ParserError\("Expected an SQL statement, found: caused"\) caused by @@ -352,182 +542,144 @@ Error during planning: Cannot automatically convert List\(Field \{ name: "item", select array_ndims(array_fill(1, [1, 2, 3])), array_ndims([[[[[[[[[[[[[[[[[[[[[1]]]]]]]]]]]]]]]]]]]]]); # array_ndims scalar function #3 -query II rowsort +query II select array_ndims(make_array()), array_ndims(make_array(make_array())) ---- 1 2 -## array concatenate opeartor - -# array concatenate operator #1 (like array_concat scalar function) -query ?? rowsort -select make_array(1, 2, 3) || make_array(4, 5, 6) || make_array(7, 8, 9), make_array([1], [2]) || make_array([3], [4]); ----- -[1, 2, 3, 4, 5, 6, 7, 8, 9] [[1], [2], [3], [4]] - -# array concatenate operator #2 (like array_append scalar function) -query ??? rowsort -select make_array(1, 2, 3) || 4, make_array(1.0, 2.0, 3.0) || 4.0, make_array('h', 'e', 'l', 'l') || 'o'; ----- -[1, 2, 3, 4] [1.0, 2.0, 3.0, 4.0] [h, e, l, l, o] - -# array concatenate operator #3 (like array_prepend scalar function) -query ??? rowsort -select 1 || make_array(2, 3, 4), 1.0 || make_array(2.0, 3.0, 4.0), 'h' || make_array('e', 'l', 'l', 'o'); ----- -[1, 2, 3, 4] [1.0, 2.0, 3.0, 4.0] [h, e, l, l, o] - -# make_array - -query ? -select make_array(1, 2.0) ----- -[1.0, 2.0] - -query ? -select make_array(null, 1.0) ----- -[, 1.0] - -query ? -select make_array(1, 2.0, null, 3) ----- -[1.0, 2.0, , 3.0] - -query ? -select make_array(1.0, '2', null) ----- -[1.0, 2, ] - -statement ok -create table foo1 (x int, y double) as values (1, 2.0); - -query ? -select make_array(x, y) from foo1; ----- -[1.0, 2.0] - -statement ok -create table foo2 (x float, y varchar) as values (1.0, '1'); - -query ? -select make_array(x, y) from foo2; +# array_ndims with columns +query II +select array_ndims(column1), array_ndims(column2), array_ndims(column3) from arrays; ---- -[1.0, 1] - -# array_contains - +## array_contains # array_contains scalar function #1 -query BBB rowsort +query BBB select array_contains(make_array(1, 2, 3), make_array(1, 1, 2, 3)), array_contains([1, 2, 3], [1, 1, 2]), array_contains([1, 2, 3], [2, 1, 3, 1]); ---- true true true # array_contains scalar function #2 -query BB rowsort +query BB select array_contains([[1, 2], [3, 4]], [[1, 2], [3, 4], [1, 3]]), array_contains([[[1], [2]], [[3], [4]]], [1, 2, 2, 3, 4]); ---- true true # array_contains scalar function #3 -query BBB rowsort +query BBB select array_contains(make_array(1, 2, 3), make_array(1, 2, 3, 4)), array_contains([1, 2, 3], [1, 1, 4]), array_contains([1, 2, 3], [2, 1, 3, 4]); ---- false false false # array_contains scalar function #4 -query BB rowsort +query BB select array_contains([[1, 2], [3, 4]], [[1, 2], [3, 4], [1, 5]]), array_contains([[[1], [2]], [[3], [4]]], [1, 2, 2, 3, 5]); ---- false false # array_contains scalar function #5 -query BB rowsort +query BB select array_contains([true, true, false, true, false], [true, false, false]), array_contains([true, false, true], [true, true]); ---- true true # array_contains scalar function #6 -query BB rowsort +query BB select array_contains(make_array(true, true, true), make_array(false, false)), array_contains([false, false, false], [true, true]); ---- false false -## array_contains array +# array_contains with columns +query BBB +select array_contains(column1, column1), array_contains(column2, column2), array_contains(column3, column3) from arrays; +---- +true true +# array_contains with columns and scalars +query BB +select array_contains(column1, array[1, 2]), array_contains(array[5, 6, 7, 8], column1) from arr; +---- -statement ok -CREATE TABLE t -AS VALUES -(make_array(1,2,3), make_array(1,2,3)), -(make_array(1,2,3), make_array(2,3)), -(make_array(2,3), make_array(1,2,3)), -(null, make_array(1,2,3)), -(make_array(2,3), null) -; + +### Array operators tests +## array concatenate operator + +# array concatenate operator with scalars #1 (like array_concat scalar function) query ?? -SELECT - column1, - column2 -FROM t +select make_array(1, 2, 3) || make_array(4, 5, 6) || make_array(7, 8, 9), make_array([1], [2]) || make_array([3], [4]); ---- -[1, 2, 3] [1, 2, 3] -[1, 2, 3] [2, 3] -[2, 3] [1, 2, 3] -NULL [1, 2, 3] -[2, 3] NULL - +[1, 2, 3, 4, 5, 6, 7, 8, 9] [[1], [2], [3], [4]] -# incorrect answer (one row) to https://github.com/apache/arrow-datafusion/issues/6709 +# array concatenate operator with scalars #2 (like array_append scalar function) +query ??? +select make_array(1, 2, 3) || 4, make_array(1.0, 2.0, 3.0) || 4.0, make_array('h', 'e', 'l', 'l') || 'o'; +---- +[1, 2, 3, 4] [1.0, 2.0, 3.0, 4.0] [h, e, l, l, o] -query BB -SELECT - array_contains(column1, column2) as c12, - array_contains(column1, column2) as c21 -FROM t +# array concatenate operator with scalars #3 (like array_prepend scalar function) +query ??? +select 1 || make_array(2, 3, 4), 1.0 || make_array(2.0, 3.0, 4.0), 'h' || make_array('e', 'l', 'l', 'o'); ---- -true true +[1, 2, 3, 4] [1.0, 2.0, 3.0, 4.0] [h, e, l, l, o] -statement ok -drop table t +### Array casting tests -## array_contains array (nested) +## make_array +# make_array scalar function #1 +query ? +select make_array(1, 2.0) +---- +[1.0, 2.0] -statement ok -CREATE TABLE t -AS VALUES -(make_array(make_array(2),make_array(3,4)), make_array(3,4)), -(make_array(make_array(2),make_array(3,4)), make_array(4,3)), -(make_array(make_array(2),make_array(3,4)), make_array(3)), -(make_array(make_array(2),make_array(3,4)), make_array(2)) -; +# make_array scalar function #2 +query ? +select make_array(null, 1.0) +---- +[, 1.0] +# make_array scalar function #3 +query ? +select make_array(1, 2.0, null, 3) +---- +[1.0, 2.0, , 3.0] -query ?? -SELECT - column1, - column2 -FROM t +# make_array scalar function #4 +query ? +select make_array(1.0, '2', null) ---- -[[2], [3, 4]] [3, 4] -[[2], [3, 4]] [4, 3] -[[2], [3, 4]] [3] -[[2], [3, 4]] [2] +[1.0, 2, ] +# make_array with columns #1 +query ? +select make_array(column1, column2) from arrays; +---- +[1.0, 2.0] -# incorrect answer (one row) to https://github.com/apache/arrow-datafusion/issues/6709 +# make_array with columns #2 +query ? +select make_array(column2, column3) from arrays; +---- +[1.0, 1] -query BB -SELECT - array_contains(column1, column2) as c12, - array_contains(column1, column2) as c21 -FROM t +# make_array with columns #3 +query ? +select make_array(column1, column2, column3) from arrays; ---- -true true + +### Delete tables + + +statement ok +drop table values; + +statement ok +drop table arrays; + +statement ok +drop table arrays_values; From 2becd85e03c3b98015b9a4b73e5cb4096a5d2a13 Mon Sep 17 00:00:00 2001 From: izveigor Date: Sun, 2 Jul 2023 14:41:58 +0300 Subject: [PATCH 2/4] feat: column support for array_append and array_prepend --- .../tests/sqllogictests/test_files/array.slt | 120 +++++++++++++++--- .../physical-expr/src/array_expressions.rs | 114 ++++++++++++++--- 2 files changed, 194 insertions(+), 40 deletions(-) diff --git a/datafusion/core/tests/sqllogictests/test_files/array.slt b/datafusion/core/tests/sqllogictests/test_files/array.slt index dedbe025150af..9c1c904a24a22 100644 --- a/datafusion/core/tests/sqllogictests/test_files/array.slt +++ b/datafusion/core/tests/sqllogictests/test_files/array.slt @@ -34,35 +34,59 @@ CREATE TABLE values( (1, 1, 2, 1.1, 'Lorem'), (2, 3, 4, 2.2, 'ipsum'), (3, 5, 6, 3.3, 'dolor'), - (4, 7, 8, 4.4, 'sit') + (4, 7, 8, 4.4, 'sit'), + (NULL, 9, 10, 5.5, 'amet'), + (5, NULL, 12, 6.6, ','), + (6, 11, NULL, 7.7, 'consectetur'), + (7, 13, 14, NULL, 'adipiscing'), + (8, 15, 16, 8.8, NULL) ; statement ok CREATE TABLE arrays AS VALUES - (make_array(make_array(1,2),make_array(3,4)), make_array(1.1,2.2,3.3), make_array('L', 'o', 'r', 'e', 'm')), - (make_array(make_array(3,4),make_array(5,6)), make_array(4.4,5.5,6.6), make_array('i', 'p', 's', 'u', 'm')), - (make_array(make_array(5,6),make_array(7,8)), make_array(7.7,8.8,9.9), make_array('d', 'o', 'l', 'o', 'r')), - (make_array(make_array(7,8),make_array(9,10)), make_array(10.1,11.1,12.2), make_array('s', 'i', 't')) + (make_array(make_array(NULL, 2),make_array(3, NULL)), make_array(1.1, 2.2, 3.3), make_array('L', 'o', 'r', 'e', 'm')), + (make_array(make_array(3, 4),make_array(5, 6)), make_array(NULL, 5.5, 6.6), make_array('i', 'p', NULL, 'u', 'm')), + (make_array(make_array(5, 6),make_array(7, 8)), make_array(7.7, 8.8, 9.9), make_array('d', NULL, 'l', 'o', 'r')), + (make_array(make_array(7, NULL),make_array(9, 10)), make_array(10.1, NULL, 12.2), make_array('s', 'i', 't')), + (NULL, make_array(13.3, 14.4, 15.5), make_array('a', 'm', 'e', 't')), + (make_array(make_array(11, 12),make_array(13, 14)), NULL, make_array(',')), + (make_array(make_array(15, 16),make_array(NULL, 18)), make_array(16.6, 17.7, 18.8), NULL) ; statement ok CREATE TABLE arrays_values +AS VALUES + (make_array(NULL, 2, 3, 4, 5, 6, 7, 8, 9, 10), 1, 1, ','), + (make_array(11, 12, 13, 14, 15, 16, 17, 18, NULL, 20), 12, 2, '.'), + (make_array(21, 22, 23, NULL, 25, 26, 27, 28, 29, 30), 23, 3, '-'), + (make_array(31, 32, 33, 34, 35, NULL, 37, 38, 39, 40), 34, 4, 'ok'), + (NULL, 44, 5, '@'), + (make_array(41, 42, 43, 44, 45, 46, 47, 48, 49, 50), NULL, 6, '$'), + (make_array(51, 52, NULL, 54, 55, 56, 57, 58, 59, 60), 55, NULL, '^'), + (make_array(61, 62, 63, 64, 65, 66, 67, 68, 69, 70), 66, 7, NULL) +; + +statement ok +CREATE TABLE arrays_values_without_nulls AS VALUES (make_array(1, 2, 3, 4, 5, 6, 7, 8, 9, 10), 1, 1, ','), (make_array(11, 12, 13, 14, 15, 16, 17, 18, 19, 20), 12, 2, '.'), (make_array(21, 22, 23, 24, 25, 26, 27, 28, 29, 30), 23, 3, '-'), - (make_array(31, 32, 33, 34, 35, 36, 37, 38, 39, 40), 34, 4, 'ok') + (make_array(31, 32, 33, 34, 35, 26, 37, 38, 39, 40), 34, 4, 'ok') ; # arrays table query ??? select column1, column2, column3 from arrays; ---- -[[1, 2], [3, 4]] [1.1, 2.2, 3.3] [L, o, r, e, m] -[[3, 4], [5, 6]] [4.4, 5.5, 6.6] [i, p, s, u, m] -[[5, 6], [7, 8]] [7.7, 8.8, 9.9] [d, o, l, o, r] -[[7, 8], [9, 10]] [10.1, 11.1, 12.2] [s, i, t] +[[, 2], [3, ]] [1.1, 2.2, 3.3] [L, o, r, e, m] +[[3, 4], [5, 6]] [, 5.5, 6.6] [i, p, , u, m] +[[5, 6], [7, 8]] [7.7, 8.8, 9.9] [d, , l, o, r] +[[7, ], [9, 10]] [10.1, , 12.2] [s, i, t] +NULL [13.3, 14.4, 15.5] [a, m, e, t] +[[11, 12], [13, 14]] NULL [,] +[[15, 16], [, 18]] [16.6, 17.7, 18.8] NULL # values table query IIIRT @@ -72,15 +96,33 @@ select a, b, c, d, e from values; 2 3 4 2.2 ipsum 3 5 6 3.3 dolor 4 7 8 4.4 sit +NULL 9 10 5.5 amet +5 NULL 12 6.6 , +6 11 NULL 7.7 consectetur +7 13 14 NULL adipiscing +8 15 16 8.8 NULL # arrays_values table +query ?IIT +select column1, column2, column3, column4 from arrays_values; +---- +[, 2, 3, 4, 5, 6, 7, 8, 9, 10] 1 1 , +[11, 12, 13, 14, 15, 16, 17, 18, , 20] 12 2 . +[21, 22, 23, , 25, 26, 27, 28, 29, 30] 23 3 - +[31, 32, 33, 34, 35, , 37, 38, 39, 40] 34 4 ok +NULL 44 5 @ +[41, 42, 43, 44, 45, 46, 47, 48, 49, 50] NULL 6 $ +[51, 52, , 54, 55, 56, 57, 58, 59, 60] 55 NULL ^ +[61, 62, 63, 64, 65, 66, 67, 68, 69, 70] 66 7 NULL + +# arrays_values_without_nulls table query ?II -select column1, column2, column3 from arrays_values; +select column1, column2, column3 from arrays_values_without_nulls; ---- [1, 2, 3, 4, 5, 6, 7, 8, 9, 10] 1 1 [11, 12, 13, 14, 15, 16, 17, 18, 19, 20] 12 2 [21, 22, 23, 24, 25, 26, 27, 28, 29, 30] 23 3 -[31, 32, 33, 34, 35, 36, 37, 38, 39, 40] 34 4 +[31, 32, 33, 34, 35, 26, 37, 38, 39, 40] 34 4 ### Array function tests @@ -155,6 +197,11 @@ select make_array(a), make_array(b, c), make_array(d), make_array(e) from values [2] [3, 4] [2.2] [ipsum] [3] [5, 6] [3.3] [dolor] [4] [7, 8] [4.4] [sit] +[0] [9, 10] [5.5] [amet] +[5] [0, 12] [6.6] [,] +[6] [11, 0] [7.7] [consectetur] +[7] [13, 14] [0.0] [adipiscing] +[8] [15, 16] [8.8] [] # make_array with columns #2 query ? @@ -164,6 +211,11 @@ select make_array(a, b, c, d) from values; [2.0, 3.0, 4.0, 2.2] [3.0, 5.0, 6.0, 3.3] [4.0, 7.0, 8.0, 4.4] +[0.0, 9.0, 10.0, 5.5] +[5.0, 0.0, 12.0, 6.6] +[6.0, 11.0, 0.0, 7.7] +[7.0, 13.0, 14.0, 0.0] +[8.0, 15.0, 16.0, 8.8] ## array_append @@ -189,13 +241,26 @@ select array_append(make_array(1, 2, 3), 4), array_append(make_array(1.0, 2.0, 3 query ? select array_append(column1, column2) from arrays_values; ---- -[1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 1, 12, 23, 34] +[, 2, 3, 4, 5, 6, 7, 8, 9, 10, 1] +[11, 12, 13, 14, 15, 16, 17, 18, , 20, 12] +[21, 22, 23, , 25, 26, 27, 28, 29, 30, 23] +[31, 32, 33, 34, 35, , 37, 38, 39, 40, 34] +[44] +[41, 42, 43, 44, 45, 46, 47, 48, 49, 50, ] +[51, 52, , 54, 55, 56, 57, 58, 59, 60, 55] +[61, 62, 63, 64, 65, 66, 67, 68, 69, 70, 66] # array_append with columns and scalars query ?? select array_append(column2, 100.1), array_append(column3, '.') from arrays; ---- -[1.1, 2.2, 3.3, 4.4, 5.5, 6.6, 7.7, 8.8, 9.9, 10.1, 11.1, 12.2, 100.1, 100.1, 100.1, 100.1] [L, o, r, e, m, i, p, s, u, m, d, o, l, o, r, s, i, t, ., ., ., .] +[1.1, 2.2, 3.3, 100.1] [L, o, r, e, m, .] +[, 5.5, 6.6, 100.1] [i, p, , u, m, .] +[7.7, 8.8, 9.9, 100.1] [d, , l, o, r, .] +[10.1, , 12.2, 100.1] [s, i, t, .] +[13.3, 14.4, 15.5, 100.1] [a, m, e, t, .] +[100.1] [,, .] +[16.6, 17.7, 18.8, 100.1] [.] ## array_prepend @@ -221,13 +286,26 @@ select array_prepend(1, make_array(2, 3, 4)), array_prepend(1.0, make_array(2.0, query ? select array_prepend(column2, column1) from arrays_values; ---- -[1, 12, 23, 34, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40] +[1, , 2, 3, 4, 5, 6, 7, 8, 9, 10] +[12, 11, 12, 13, 14, 15, 16, 17, 18, , 20] +[23, 21, 22, 23, , 25, 26, 27, 28, 29, 30] +[34, 31, 32, 33, 34, 35, , 37, 38, 39, 40] +[44] +[, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50] +[55, 51, 52, , 54, 55, 56, 57, 58, 59, 60] +[66, 61, 62, 63, 64, 65, 66, 67, 68, 69, 70] # array_prepend with columns and scalars query ?? select array_prepend(100.1, column2), array_prepend('.', column3) from arrays; ---- -[100.1, 100.1, 100.1, 100.1, 1.1, 2.2, 3.3, 4.4, 5.5, 6.6, 7.7, 8.8, 9.9, 10.1, 11.1, 12.2] [., ., ., ., L, o, r, e, m, i, p, s, u, m, d, o, l, o, r, s, i, t] +[100.1, 1.1, 2.2, 3.3] [., L, o, r, e, m] +[100.1, , 5.5, 6.6] [., i, p, , u, m] +[100.1, 7.7, 8.8, 9.9] [., d, , l, o, r] +[100.1, 10.1, , 12.2] [., s, i, t] +[100.1, 13.3, 14.4, 15.5] [., a, m, e, t] +[100.1] [., ,] +[100.1, 16.6, 17.7, 18.8] [.] ## array_fill @@ -313,12 +391,12 @@ select array_position(['h', 'e', 'l', 'l', 'o'], 'l', 4), array_position([1, 2, # array_position with columns query ??? -select array_position(column1, column2), array_position(column1, column2, column3) from arrays_values; +select array_position(column1, column2), array_position(column1, column2, column3) from arrays_values_without_nulls; ---- # array_position with columns and scalars query ??? -select array_position(column1, 3), array_position(column1, 3, 5) from arrays_values; +select array_position(column1, 3), array_position(column1, 3, 5) from arrays_values_without_nulls; ---- ## array_positions @@ -331,12 +409,12 @@ select array_positions(['h', 'e', 'l', 'l', 'o'], 'l'), array_positions([1, 2, 3 # array_positions with columns query ??? -select array_positions(column1, column2) from arrays_values; +select array_positions(column1, column2) from arrays_values_without_nulls; ---- # array_positions with columns and scalars query ??? -select array_positions(column1, 4), array_positions(array[1, 2, 23, 13, 33, 45], column2) from arrays_values; +select array_positions(column1, 4), array_positions(array[1, 2, 23, 13, 33, 45], column2) from arrays_values_without_nulls; ---- ## array_replace @@ -396,7 +474,7 @@ select array_to_string(column1, column4) from arrays_values; # array_to_string with columns and scalars query ? -select array_to_string(column1, '_') from arrays_values; +select array_to_string(column1, '_'), array_to_string(make_array(1, 2, 3, 4, 5, 6, 7, 8, 9, 10), column4) from arrays_values; ---- ## cardinality diff --git a/datafusion/physical-expr/src/array_expressions.rs b/datafusion/physical-expr/src/array_expressions.rs index 911c94b06d765..9ab3aa942131b 100644 --- a/datafusion/physical-expr/src/array_expressions.rs +++ b/datafusion/physical-expr/src/array_expressions.rs @@ -18,7 +18,7 @@ //! Array expressions use arrow::array::*; -use arrow::buffer::Buffer; +use arrow::buffer::{Buffer, OffsetBuffer}; use arrow::compute; use arrow::datatypes::{DataType, Field}; use core::any::type_name; @@ -197,15 +197,53 @@ pub fn make_array(values: &[ColumnarValue]) -> Result { macro_rules! append { ($ARRAY:expr, $ELEMENT:expr, $ARRAY_TYPE:ident) => {{ - let child_array = - downcast_arg!(downcast_arg!($ARRAY, ListArray).values(), $ARRAY_TYPE); + let mut offsets: Vec = vec![0]; + let mut values = + downcast_arg!(new_empty_array($ELEMENT.data_type()), $ARRAY_TYPE).clone(); + let element = downcast_arg!($ELEMENT, $ARRAY_TYPE); - let cat = compute::concat(&[child_array, element])?; - let mut scalars = vec![]; - for i in 0..cat.len() { - scalars.push(ColumnarValue::Scalar(ScalarValue::try_from_array(&cat, i)?)); + for (arr, el) in $ARRAY.iter().zip(element.iter()) { + let last_offset: i32 = offsets.last().copied().ok_or_else(|| { + DataFusionError::Internal(format!("offsets should not be empty",)) + })?; + match arr { + Some(arr) => { + let child_array = downcast_arg!(arr, $ARRAY_TYPE); + values = downcast_arg!( + compute::concat(&[ + &values, + child_array, + &$ARRAY_TYPE::from(vec![el]) + ])? + .clone(), + $ARRAY_TYPE + ) + .clone(); + offsets.extend([last_offset + child_array.len() as i32 + 1i32]); + } + None => { + values = downcast_arg!( + compute::concat(&[ + &values, + &$ARRAY_TYPE::from(vec![el.clone()]) + ])? + .clone(), + $ARRAY_TYPE + ) + .clone(); + offsets.extend([last_offset + 1i32]); + } + } } - scalars + + let field = Arc::new(Field::new("item", $ELEMENT.data_type().clone(), true)); + + Arc::new(ListArray::try_new( + field, + OffsetBuffer::new(offsets.into()), + Arc::new(values), + None, + )?) }}; } @@ -221,7 +259,7 @@ pub fn array_append(args: &[ArrayRef]) -> Result { let arr = as_list_array(&args[0])?; let element = &args[1]; - let scalars = match (arr.value_type(), element.data_type()) { + let res = match (arr.value_type(), element.data_type()) { (DataType::Utf8, DataType::Utf8) => append!(arr, element, StringArray), (DataType::LargeUtf8, DataType::LargeUtf8) => append!(arr, element, LargeStringArray), (DataType::Boolean, DataType::Boolean) => append!(arr, element, BooleanArray), @@ -243,20 +281,58 @@ pub fn array_append(args: &[ArrayRef]) -> Result { } }; - Ok(array(scalars.as_slice())?.into_array(1)) + Ok(res) } macro_rules! prepend { ($ARRAY:expr, $ELEMENT:expr, $ARRAY_TYPE:ident) => {{ - let child_array = - downcast_arg!(downcast_arg!($ARRAY, ListArray).values(), $ARRAY_TYPE); + let mut offsets: Vec = vec![0]; + let mut values = + downcast_arg!(new_empty_array($ELEMENT.data_type()), $ARRAY_TYPE).clone(); + let element = downcast_arg!($ELEMENT, $ARRAY_TYPE); - let cat = compute::concat(&[element, child_array])?; - let mut scalars = vec![]; - for i in 0..cat.len() { - scalars.push(ColumnarValue::Scalar(ScalarValue::try_from_array(&cat, i)?)); + for (arr, el) in $ARRAY.iter().zip(element.iter()) { + let last_offset: i32 = offsets.last().copied().ok_or_else(|| { + DataFusionError::Internal(format!("offsets should not be empty",)) + })?; + match arr { + Some(arr) => { + let child_array = downcast_arg!(arr, $ARRAY_TYPE); + values = downcast_arg!( + compute::concat(&[ + &values, + &$ARRAY_TYPE::from(vec![el]), + child_array + ])? + .clone(), + $ARRAY_TYPE + ) + .clone(); + offsets.extend([last_offset + child_array.len() as i32 + 1i32]); + } + None => { + values = downcast_arg!( + compute::concat(&[ + &values, + &$ARRAY_TYPE::from(vec![el.clone()]) + ])? + .clone(), + $ARRAY_TYPE + ) + .clone(); + offsets.extend([last_offset + 1i32]); + } + } } - scalars + + let field = Arc::new(Field::new("item", $ELEMENT.data_type().clone(), true)); + + Arc::new(ListArray::try_new( + field, + OffsetBuffer::new(offsets.into()), + Arc::new(values), + None, + )?) }}; } @@ -272,7 +348,7 @@ pub fn array_prepend(args: &[ArrayRef]) -> Result { let element = &args[0]; let arr = as_list_array(&args[1])?; - let scalars = match (arr.value_type(), element.data_type()) { + let res = match (arr.value_type(), element.data_type()) { (DataType::Utf8, DataType::Utf8) => prepend!(arr, element, StringArray), (DataType::LargeUtf8, DataType::LargeUtf8) => prepend!(arr, element, LargeStringArray), (DataType::Boolean, DataType::Boolean) => prepend!(arr, element, BooleanArray), @@ -294,7 +370,7 @@ pub fn array_prepend(args: &[ArrayRef]) -> Result { } }; - Ok(array(scalars.as_slice())?.into_array(1)) + Ok(res) } /// Array_concat/Array_cat SQL function From 340bcdc2e83e5c38fe8f79e556920fc7b4c4e3c4 Mon Sep 17 00:00:00 2001 From: izveigor Date: Mon, 3 Jul 2023 14:19:23 +0300 Subject: [PATCH 3/4] feat: column support for array_position and array_positions --- .../tests/sqllogictests/test_files/array.slt | 121 ++------- datafusion/expr/src/built_in_function.rs | 4 +- .../physical-expr/src/array_expressions.rs | 232 +++++++++--------- datafusion/physical-expr/src/functions.rs | 8 +- 4 files changed, 146 insertions(+), 219 deletions(-) diff --git a/datafusion/core/tests/sqllogictests/test_files/array.slt b/datafusion/core/tests/sqllogictests/test_files/array.slt index 9c1c904a24a22..87649251f6bcd 100644 --- a/datafusion/core/tests/sqllogictests/test_files/array.slt +++ b/datafusion/core/tests/sqllogictests/test_files/array.slt @@ -365,16 +365,6 @@ select array_concat(make_array(), make_array(2, 3)); ---- [2, 3] -# array_concat with columns -query ??? -select array_concat(column1, column2), array_concat(column2, column2), array_concat(column3, column3) from arrays; ----- - -# array_concat with columns and scalars -query ??? -select array_concat(column1, make_array(make_array(1, 2), make_array(3, 4))), array_concat(column2, make_array(1.1, 2.2, 3.3)), array_concat(column3, make_array('.', '.', '.')) from arrays; ----- - ## array_position # array_position scalar function #1 @@ -390,14 +380,22 @@ select array_position(['h', 'e', 'l', 'l', 'o'], 'l', 4), array_position([1, 2, 4 5 2 # array_position with columns -query ??? +query II select array_position(column1, column2), array_position(column1, column2, column3) from arrays_values_without_nulls; ---- +1 1 +2 2 +3 3 +4 4 # array_position with columns and scalars -query ??? +query II select array_position(column1, 3), array_position(column1, 3, 5) from arrays_values_without_nulls; ---- +3 NULL +NULL NULL +NULL NULL +NULL NULL ## array_positions @@ -408,14 +406,22 @@ select array_positions(['h', 'e', 'l', 'l', 'o'], 'l'), array_positions([1, 2, 3 [3, 4] [5] [1, 2, 3] # array_positions with columns -query ??? +query ? select array_positions(column1, column2) from arrays_values_without_nulls; ---- +[1] +[2] +[3] +[4] # array_positions with columns and scalars -query ??? +query ?? select array_positions(column1, 4), array_positions(array[1, 2, 23, 13, 33, 45], column2) from arrays_values_without_nulls; ---- +[4] [1] +[] [] +[] [3] +[] [] ## array_replace @@ -425,16 +431,6 @@ select array_replace(make_array(1, 2, 3, 4), 2, 3), array_replace(make_array(1, ---- [1, 3, 3, 4] [1, 0, 0, 5, 0, 6, 7] [1, 2, 3] -# array_replace with columns -query ??? -select array_replace(column1, column2, column3) from arrays_values; ----- - -# array_replace with columns and scalars -query ??? -select array_replace(column1, column2, 55), array_replace(column1, 22, column3), array_replace(array[1, 2, 3, 4, 5, 6, 7, 8, 9, 10], column2, column3) from arrays_values; ----- - ## array_to_string # array_to_string scalar function @@ -467,16 +463,6 @@ select array_to_string(make_array('h', NULL, NULL, NULL, 'o'), ',', '-'), array_ ---- h,-,-,-,o nil-2-nil-4-5 1|0|3 -# array_to_string with columns -query ? -select array_to_string(column1, column4) from arrays_values; ----- - -# array_to_string with columns and scalars -query ? -select array_to_string(column1, '_'), array_to_string(make_array(1, 2, 3, 4, 5, 6, 7, 8, 9, 10), column4) from arrays_values; ----- - ## cardinality # cardinality scalar function @@ -497,11 +483,6 @@ select cardinality(make_array()), cardinality(make_array(make_array())) ---- 0 0 -# cardinality with columns -query I -select cardinality(column1), cardinality(column2), cardinality(column3) from arr; ----- - ## trim_array # trim_array scalar function #1 @@ -528,16 +509,6 @@ select trim_array(make_array(), 0), trim_array(make_array(), 1) ---- [] [] -# trim_array with columns -query ? -select trim_array(column1, column3) from arrays_values; ----- - -# trim_array with columns and scalars -query ?? -select trim_array(column1, 5), trim_array(array[1, 2, 3, 4, 5, 6, 7, 8, 9, 10], column3) from arrays_values; ----- - ## array_length # array_length scalar function @@ -570,16 +541,6 @@ select array_length(make_array()), array_length(make_array(), 1), array_length(m ---- 0 0 NULL -# array_length with columns -query ? -select array_length(column1, column3) from arrays_values; ----- - -# array_length with columns and scalars -query ??? -select array_length(array[array[1, 2], array[3, 4]], column3), array_length(column1, 1) from arrays_values; ----- - ## array_dims # array_dims scalar function @@ -600,11 +561,6 @@ caused by Error during planning: Cannot automatically convert List\(Field \{ name: "item", data_type: UInt8, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: \{\} \}\) to UInt8 select array_dims(make_array()), array_dims(make_array(make_array())) -# array_dims with columns -query I -select array_dims(column1), array_dims(column2), array_dims(column3) from arrays; ----- - ## array_ndims # array_ndims scalar function @@ -625,11 +581,6 @@ select array_ndims(make_array()), array_ndims(make_array(make_array())) ---- 1 2 -# array_ndims with columns -query II -select array_ndims(column1), array_ndims(column2), array_ndims(column3) from arrays; ----- - ## array_contains # array_contains scalar function #1 @@ -668,18 +619,6 @@ select array_contains(make_array(true, true, true), make_array(false, false)), a ---- false false -# array_contains with columns -query BBB -select array_contains(column1, column1), array_contains(column2, column2), array_contains(column3, column3) from arrays; ----- -true true - -# array_contains with columns and scalars -query BB -select array_contains(column1, array[1, 2]), array_contains(array[5, 6, 7, 8], column1) from arr; ----- - - ### Array operators tests @@ -733,23 +672,6 @@ select make_array(1.0, '2', null) ---- [1.0, 2, ] -# make_array with columns #1 -query ? -select make_array(column1, column2) from arrays; ----- -[1.0, 2.0] - -# make_array with columns #2 -query ? -select make_array(column2, column3) from arrays; ----- -[1.0, 1] - -# make_array with columns #3 -query ? -select make_array(column1, column2, column3) from arrays; ----- - ### Delete tables @@ -761,3 +683,6 @@ drop table arrays; statement ok drop table arrays_values; + +statement ok +drop table arrays_values_without_nulls; diff --git a/datafusion/expr/src/built_in_function.rs b/datafusion/expr/src/built_in_function.rs index 2eaa2792b9db8..77a811d3236cb 100644 --- a/datafusion/expr/src/built_in_function.rs +++ b/datafusion/expr/src/built_in_function.rs @@ -482,9 +482,9 @@ impl BuiltinScalarFunction { )))), BuiltinScalarFunction::ArrayLength => Ok(UInt8), BuiltinScalarFunction::ArrayNdims => Ok(UInt8), - BuiltinScalarFunction::ArrayPosition => Ok(UInt8), + BuiltinScalarFunction::ArrayPosition => Ok(UInt64), BuiltinScalarFunction::ArrayPositions => { - Ok(List(Arc::new(Field::new("item", UInt8, true)))) + Ok(List(Arc::new(Field::new("item", UInt64, true)))) } BuiltinScalarFunction::ArrayPrepend => Ok(List(Arc::new(Field::new( "item", diff --git a/datafusion/physical-expr/src/array_expressions.rs b/datafusion/physical-expr/src/array_expressions.rs index 9ab3aa942131b..41c44898a3983 100644 --- a/datafusion/physical-expr/src/array_expressions.rs +++ b/datafusion/physical-expr/src/array_expressions.rs @@ -219,7 +219,7 @@ macro_rules! append { $ARRAY_TYPE ) .clone(); - offsets.extend([last_offset + child_array.len() as i32 + 1i32]); + offsets.push(last_offset + child_array.len() as i32 + 1i32); } None => { values = downcast_arg!( @@ -231,7 +231,7 @@ macro_rules! append { $ARRAY_TYPE ) .clone(); - offsets.extend([last_offset + 1i32]); + offsets.push(last_offset + 1i32); } } } @@ -308,7 +308,7 @@ macro_rules! prepend { $ARRAY_TYPE ) .clone(); - offsets.extend([last_offset + child_array.len() as i32 + 1i32]); + offsets.push(last_offset + child_array.len() as i32 + 1i32); } None => { values = downcast_arg!( @@ -320,7 +320,7 @@ macro_rules! prepend { $ARRAY_TYPE ) .clone(); - offsets.extend([last_offset + 1i32]); + offsets.push(last_offset + 1i32); } } } @@ -496,74 +496,58 @@ pub fn array_fill(args: &[ColumnarValue]) -> Result { macro_rules! position { ($ARRAY:expr, $ELEMENT:expr, $INDEX:expr, $ARRAY_TYPE:ident) => {{ - let child_array = - downcast_arg!(downcast_arg!($ARRAY, ListArray).values(), $ARRAY_TYPE); - let element = downcast_arg!($ELEMENT, $ARRAY_TYPE).value(0); - - match child_array + let element = downcast_arg!($ELEMENT, $ARRAY_TYPE); + $ARRAY .iter() - .skip($INDEX) - .position(|x| x == Some(element)) - { - Some(value) => Ok(ColumnarValue::Scalar(ScalarValue::UInt8(Some( - (value + $INDEX + 1) as u8, - )))), - None => Ok(ColumnarValue::Scalar(ScalarValue::Null)), - } + .zip(element.iter()) + .zip($INDEX.iter()) + .map(|((arr, el), i)| { + let index = match i { + Some(i) => { + if i <= 0 { + 0 + } else { + i - 1 + } + } + None => { + return Err(DataFusionError::Internal( + "initial position must not be null".to_string(), + )) + } + }; + + match arr { + Some(arr) => { + let child_array = downcast_arg!(arr, $ARRAY_TYPE); + + match child_array + .iter() + .skip(index as usize) + .position(|x| x == el) + { + Some(value) => Ok(Some(value as u64 + index as u64 + 1u64)), + None => Ok(None), + } + } + None => Ok(None), + } + }) + .collect::>()? }}; } /// Array_position SQL function -pub fn array_position(args: &[ColumnarValue]) -> Result { - let arr = match &args[0] { - ColumnarValue::Scalar(scalar) => scalar.to_array().clone(), - ColumnarValue::Array(arr) => arr.clone(), - }; - - let element = match &args[1] { - ColumnarValue::Scalar(scalar) => scalar.to_array().clone(), - _ => { - return Err(DataFusionError::Internal( - "Array_position function requires scalar element".to_string(), - )) - } - }; +pub fn array_position(args: &[ArrayRef]) -> Result { + let arr = as_list_array(&args[0])?; + let element = &args[1]; - let mut index: usize = 0; + let mut index = Int64Array::from_value(0, arr.len()); if args.len() == 3 { - let scalar = - match &args[2] { - ColumnarValue::Scalar(scalar) => scalar.clone(), - _ => return Err(DataFusionError::Internal( - "Array_position function requires positive integer scalar element" - .to_string(), - )), - }; - - index = - match scalar { - ScalarValue::Int8(Some(value)) => value as usize, - ScalarValue::Int16(Some(value)) => value as usize, - ScalarValue::Int32(Some(value)) => value as usize, - ScalarValue::Int64(Some(value)) => value as usize, - ScalarValue::UInt8(Some(value)) => value as usize, - ScalarValue::UInt16(Some(value)) => value as usize, - ScalarValue::UInt32(Some(value)) => value as usize, - ScalarValue::UInt64(Some(value)) => value as usize, - _ => return Err(DataFusionError::Internal( - "Array_position function requires positive integer scalar element" - .to_string(), - )), - }; - - if index == 0 { - index = 0; - } else { - index -= 1; - } + index = as_int64_array(&args[2])?.clone(); } - match arr.data_type() { + let res = match arr.data_type() { DataType::List(field) => match field.data_type() { DataType::Utf8 => position!(arr, element, index, StringArray), DataType::LargeUtf8 => position!(arr, element, index, LargeStringArray), @@ -578,50 +562,75 @@ pub fn array_position(args: &[ColumnarValue]) -> Result { DataType::UInt16 => position!(arr, element, index, UInt16Array), DataType::UInt32 => position!(arr, element, index, UInt32Array), DataType::UInt64 => position!(arr, element, index, UInt64Array), - data_type => Err(DataFusionError::NotImplemented(format!( - "Array_position is not implemented for types '{data_type:?}'." - ))), + data_type => { + return Err(DataFusionError::NotImplemented(format!( + "Array_position is not implemented for types '{data_type:?}'." + ))) + } }, - data_type => Err(DataFusionError::NotImplemented(format!( - "Array is not type '{data_type:?}'." - ))), - } + data_type => { + return Err(DataFusionError::NotImplemented(format!( + "Array is not type '{data_type:?}'." + ))) + } + }; + + Ok(Arc::new(res)) } macro_rules! positions { ($ARRAY:expr, $ELEMENT:expr, $ARRAY_TYPE:ident) => {{ - let child_array = - downcast_arg!(downcast_arg!($ARRAY, ListArray).values(), $ARRAY_TYPE); - let element = downcast_arg!($ELEMENT, $ARRAY_TYPE).value(0); - - let mut res = vec![]; - for (i, x) in child_array.iter().enumerate() { - if x == Some(element) { - res.push(ColumnarValue::Array(Arc::new(UInt8Array::from(vec![ - Some((i + 1) as u8), - ])))); - } + let element = downcast_arg!($ELEMENT, $ARRAY_TYPE); + let mut offsets: Vec = vec![0]; + let mut values = + downcast_arg!(new_empty_array(&DataType::UInt64), UInt64Array).clone(); + for comp in $ARRAY + .iter() + .zip(element.iter()) + .map(|(arr, el)| match arr { + Some(arr) => { + let child_array = downcast_arg!(arr, $ARRAY_TYPE); + let res = child_array + .iter() + .enumerate() + .filter(|(_, x)| *x == el) + .flat_map(|(i, _)| Some((i + 1) as u64)) + .collect::(); + + Ok(res) + } + None => Ok(downcast_arg!( + new_empty_array(&DataType::UInt64), + UInt64Array + ) + .clone()), + }) + .collect::>>()? + { + let last_offset: i32 = offsets.last().copied().ok_or_else(|| { + DataFusionError::Internal(format!("offsets should not be empty",)) + })?; + values = + downcast_arg!(compute::concat(&[&values, &comp,])?.clone(), UInt64Array) + .clone(); + offsets.push(last_offset + comp.len() as i32); } - res + let field = Arc::new(Field::new("item", DataType::UInt64, true)); + + Arc::new(ListArray::try_new( + field, + OffsetBuffer::new(offsets.into()), + Arc::new(values), + None, + )?) }}; } /// Array_positions SQL function -pub fn array_positions(args: &[ColumnarValue]) -> Result { - let arr = match &args[0] { - ColumnarValue::Scalar(scalar) => scalar.to_array().clone(), - ColumnarValue::Array(arr) => arr.clone(), - }; - - let element = match &args[1] { - ColumnarValue::Scalar(scalar) => scalar.to_array().clone(), - _ => { - return Err(DataFusionError::Internal( - "Array_positions function requires scalar element".to_string(), - )) - } - }; +pub fn array_positions(args: &[ArrayRef]) -> Result { + let arr = as_list_array(&args[0])?; + let element = &args[1]; let res = match arr.data_type() { DataType::List(field) => match field.data_type() { @@ -651,7 +660,7 @@ pub fn array_positions(args: &[ColumnarValue]) -> Result { } }; - array(res.as_slice()) + Ok(res) } macro_rules! remove { @@ -1541,29 +1550,22 @@ mod tests { #[test] fn test_array_position() { // array_position([1, 2, 3, 4], 3) = 3 - let list_array = return_array(); - let array = array_position(&[ - list_array, - ColumnarValue::Scalar(ScalarValue::Int64(Some(3))), - ]) - .expect("failed to initialize function array_position") - .into_array(1); - let result = - as_uint8_array(&array).expect("failed to initialize function array_position"); + let list_array = return_array().into_array(1); + let array = array_position(&[list_array, Arc::new(Int64Array::from_value(3, 1))]) + .expect("failed to initialize function array_position"); + let result = as_uint64_array(&array) + .expect("failed to initialize function array_position"); - assert_eq!(result, &UInt8Array::from(vec![3])); + assert_eq!(result, &UInt64Array::from(vec![3])); } #[test] fn test_array_positions() { // array_positions([1, 2, 3, 4], 3) = [3] - let list_array = return_array(); - let array = array_positions(&[ - list_array, - ColumnarValue::Scalar(ScalarValue::Int64(Some(3))), - ]) - .expect("failed to initialize function array_position") - .into_array(1); + let list_array = return_array().into_array(1); + let array = + array_positions(&[list_array, Arc::new(Int64Array::from_value(3, 1))]) + .expect("failed to initialize function array_position"); let result = as_list_array(&array).expect("failed to initialize function array_position"); @@ -1573,7 +1575,7 @@ mod tests { result .value(0) .as_any() - .downcast_ref::() + .downcast_ref::() .unwrap() .values() ); diff --git a/datafusion/physical-expr/src/functions.rs b/datafusion/physical-expr/src/functions.rs index 3221b6f2932ce..1391bacb2503f 100644 --- a/datafusion/physical-expr/src/functions.rs +++ b/datafusion/physical-expr/src/functions.rs @@ -403,11 +403,11 @@ pub fn create_physical_fun( Arc::new(|args| make_scalar_function(array_expressions::array_ndims)(args)) } BuiltinScalarFunction::ArrayPosition => { - Arc::new(array_expressions::array_position) - } - BuiltinScalarFunction::ArrayPositions => { - Arc::new(array_expressions::array_positions) + Arc::new(|args| make_scalar_function(array_expressions::array_position)(args)) } + BuiltinScalarFunction::ArrayPositions => Arc::new(|args| { + make_scalar_function(array_expressions::array_positions)(args) + }), BuiltinScalarFunction::ArrayPrepend => { Arc::new(|args| make_scalar_function(array_expressions::array_prepend)(args)) } From fc9b93aec1420300105bf13112b8028682088653 Mon Sep 17 00:00:00 2001 From: izveigor Date: Wed, 5 Jul 2023 21:59:47 +0300 Subject: [PATCH 4/4] fix: error type --- datafusion/physical-expr/src/array_expressions.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/datafusion/physical-expr/src/array_expressions.rs b/datafusion/physical-expr/src/array_expressions.rs index 33fb76f4a4992..cd174918db379 100644 --- a/datafusion/physical-expr/src/array_expressions.rs +++ b/datafusion/physical-expr/src/array_expressions.rs @@ -511,7 +511,7 @@ macro_rules! position { } } None => { - return Err(DataFusionError::Internal( + return Err(DataFusionError::Execution( "initial position must not be null".to_string(), )) }