diff --git a/datafusion/functions-nested/src/remove.rs b/datafusion/functions-nested/src/remove.rs index 3d4076800e1e..54dec8ca18f4 100644 --- a/datafusion/functions-nested/src/remove.rs +++ b/datafusion/functions-nested/src/remove.rs @@ -40,13 +40,13 @@ make_udf_expr_and_func!( ArrayRemove, array_remove, array element, - "removes the first element from the array equal to the given value.", + "removes the first element from the array equal to the given value. NULL elements already in the array are preserved when removing a non-NULL value. If `element` evaluates to NULL, the result is NULL rather than removing NULL entries.", array_remove_udf ); #[user_doc( doc_section(label = "Array Functions"), - description = "Removes the first element from the array equal to the given value.", + description = "Removes the first element from the array equal to the given value. NULL elements already in the array are preserved when removing a non-NULL value. If `element` evaluates to NULL, the result is NULL rather than removing NULL entries.", syntax_example = "array_remove(array, element)", sql_example = r#"```sql > select array_remove([1, 2, 2, 3, 2, 1, 4], 2); @@ -55,6 +55,13 @@ make_udf_expr_and_func!( +----------------------------------------------+ | [1, 2, 3, 2, 1, 4] | +----------------------------------------------+ + +> select array_remove([1, 2, NULL, 2, 4], 2); ++---------------------------------------------------+ +| array_remove(List([1,2,NULL,2,4]),Int64(2)) | ++---------------------------------------------------+ +| [1, NULL, 2, 4] | ++---------------------------------------------------+ ```"#, argument( name = "array", @@ -130,14 +137,14 @@ make_udf_expr_and_func!( ArrayRemoveN, array_remove_n, array element max, - "removes the first `max` elements from the array equal to the given value.", + "removes the first `max` elements from the array equal to the given value. NULL elements already in the array are preserved when removing a non-NULL value. If `element` evaluates to NULL, the result is NULL rather than removing NULL entries.", array_remove_n_udf ); #[user_doc( doc_section(label = "Array Functions"), - description = "Removes the first `max` elements from the array equal to the given value.", - syntax_example = "array_remove_n(array, element, max))", + description = "Removes the first `max` elements from the array equal to the given value. NULL elements already in the array are preserved when removing a non-NULL value. If `element` evaluates to NULL, the result is NULL rather than removing NULL entries.", + syntax_example = "array_remove_n(array, element, max)", sql_example = r#"```sql > select array_remove_n([1, 2, 2, 3, 2, 1, 4], 2, 2); +---------------------------------------------------------+ @@ -145,6 +152,13 @@ make_udf_expr_and_func!( +---------------------------------------------------------+ | [1, 3, 2, 1, 4] | +---------------------------------------------------------+ + +> select array_remove_n([1, 2, NULL, 2, 4], 2, 2); ++----------------------------------------------------------+ +| array_remove_n(List([1,2,NULL,2,4]),Int64(2),Int64(2)) | ++----------------------------------------------------------+ +| [1, NULL, 4] | ++----------------------------------------------------------+ ```"#, argument( name = "array", @@ -225,13 +239,13 @@ make_udf_expr_and_func!( ArrayRemoveAll, array_remove_all, array element, - "removes all elements from the array equal to the given value.", + "removes all elements from the array equal to the given value. NULL elements already in the array are preserved when removing a non-NULL value. If `element` evaluates to NULL, the result is NULL rather than removing NULL entries.", array_remove_all_udf ); #[user_doc( doc_section(label = "Array Functions"), - description = "Removes all elements from the array equal to the given value.", + description = "Removes all elements from the array equal to the given value. NULL elements already in the array are preserved when removing a non-NULL value. If `element` evaluates to NULL, the result is NULL rather than removing NULL entries.", syntax_example = "array_remove_all(array, element)", sql_example = r#"```sql > select array_remove_all([1, 2, 2, 3, 2, 1, 4], 2); @@ -240,6 +254,13 @@ make_udf_expr_and_func!( +--------------------------------------------------+ | [1, 3, 1, 4] | +--------------------------------------------------+ + +> select array_remove_all([1, 2, NULL, 2, 4], 2); ++-----------------------------------------------------+ +| array_remove_all(List([1,2,NULL,2,4]),Int64(2)) | ++-----------------------------------------------------+ +| [1, NULL, 4] | ++-----------------------------------------------------+ ```"#, argument( name = "array", diff --git a/docs/source/user-guide/expressions.md b/docs/source/user-guide/expressions.md index 56d78ac473f1..0cd69ead4c33 100644 --- a/docs/source/user-guide/expressions.md +++ b/docs/source/user-guide/expressions.md @@ -209,46 +209,46 @@ select log(-1), log(0), sqrt(-1); ## Array Expressions -| Syntax | Description | -| ---------------------------------------------- | ----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | -| array_any_value(array) | Returns the first non-null element in the array. `array_any_value([NULL, 1, 2, 3]) -> 1` | -| array_append(array, element) | Appends an element to the end of an array. `array_append([1, 2, 3], 4) -> [1, 2, 3, 4]` | -| array_concat(array[, ..., array_n]) | Concatenates arrays. `array_concat([1, 2, 3], [4, 5, 6]) -> [1, 2, 3, 4, 5, 6]` | -| array_has(array, element) | Returns true if the array contains the element `array_has([1,2,3], 1) -> true` | -| array_has_all(array, sub-array) | Returns true if all elements of sub-array exist in array `array_has_all([1,2,3], [1,3]) -> true` | -| array_has_any(array, sub-array) | Returns true if any elements exist in both arrays `array_has_any([1,2,3], [1,4]) -> true` | -| array_dims(array) | Returns an array of the array's dimensions. `array_dims([[1, 2, 3], [4, 5, 6]]) -> [2, 3]` | -| array_distinct(array) | Returns distinct values from the array after removing duplicates. `array_distinct([1, 3, 2, 3, 1, 2, 4]) -> [1, 2, 3, 4]` | -| array_element(array, index) | Extracts the element with the index n from the array `array_element([1, 2, 3, 4], 3) -> 3` | -| empty(array) | Returns true for an empty array or false for a non-empty array. `empty([1]) -> false` | -| flatten(array) | Converts an array of arrays to a flat array `flatten([[1], [2, 3], [4, 5, 6]]) -> [1, 2, 3, 4, 5, 6]` | -| array_length(array, dimension) | Returns the length of the array dimension. `array_length([1, 2, 3, 4, 5]) -> 5` | -| array_ndims(array) | Returns the number of dimensions of the array. `array_ndims([[1, 2, 3], [4, 5, 6]]) -> 2` | -| array_pop_front(array) | Returns the array without the first element. `array_pop_front([1, 2, 3]) -> [2, 3]` | -| array_pop_back(array) | Returns the array without the last element. `array_pop_back([1, 2, 3]) -> [1, 2]` | -| array_position(array, element) | Searches for an element in the array, returns first occurrence. `array_position([1, 2, 2, 3, 4], 2) -> 2` | -| array_positions(array, element) | Searches for an element in the array, returns all occurrences. `array_positions([1, 2, 2, 3, 4], 2) -> [2, 3]` | -| array_prepend(element, array) | Prepends an element to the beginning of an array. `array_prepend(1, [2, 3, 4]) -> [1, 2, 3, 4]` | -| array_repeat(element, count) | Returns an array containing element `count` times. `array_repeat(1, 3) -> [1, 1, 1]` | -| array_remove(array, element) | Removes the first element from the array equal to the given value. `array_remove([1, 2, 2, 3, 2, 1, 4], 2) -> [1, 2, 3, 2, 1, 4]` | -| array_remove_n(array, element, max) | Removes the first `max` elements from the array equal to the given value. `array_remove_n([1, 2, 2, 3, 2, 1, 4], 2, 2) -> [1, 3, 2, 1, 4]` | -| array_remove_all(array, element) | Removes all elements from the array equal to the given value. `array_remove_all([1, 2, 2, 3, 2, 1, 4], 2) -> [1, 3, 1, 4]` | -| array_replace(array, from, to) | Replaces the first occurrence of the specified element with another specified element. `array_replace([1, 2, 2, 3, 2, 1, 4], 2, 5) -> [1, 5, 2, 3, 2, 1, 4]` | -| array_replace_n(array, from, to, max) | Replaces the first `max` occurrences of the specified element with another specified element. `array_replace_n([1, 2, 2, 3, 2, 1, 4], 2, 5, 2) -> [1, 5, 5, 3, 2, 1, 4]` | -| array_replace_all(array, from, to) | Replaces all occurrences of the specified element with another specified element. `array_replace_all([1, 2, 2, 3, 2, 1, 4], 2, 5) -> [1, 5, 5, 3, 5, 1, 4]` | -| array_slice(array, begin,end) | Returns a slice of the array. `array_slice([1, 2, 3, 4, 5, 6, 7, 8], 3, 6) -> [3, 4, 5, 6]` | -| array_slice(array, begin, end, stride) | Returns a slice of the array with added stride feature. `array_slice([1, 2, 3, 4, 5, 6, 7, 8], 3, 6, 2) -> [3, 5, 6]` | -| array_to_string(array, delimiter) | Converts each element to its text representation. `array_to_string([1, 2, 3, 4], ',') -> 1,2,3,4` | -| array_intersect(array1, array2) | Returns an array of the elements in the intersection of array1 and array2. `array_intersect([1, 2, 3, 4], [5, 6, 3, 4]) -> [3, 4]` | -| array_union(array1, array2) | Returns an array of the elements in the union of array1 and array2 without duplicates. `array_union([1, 2, 3, 4], [5, 6, 3, 4]) -> [1, 2, 3, 4, 5, 6]` | -| array_except(array1, array2) | Returns an array of the elements that appear in the first array but not in the second. `array_except([1, 2, 3, 4], [5, 6, 3, 4]) -> [1, 2]` | -| array_resize(array, size, value) | Resizes the list to contain size elements. Initializes new elements with value or empty if value is not set. `array_resize([1, 2, 3], 5, 0) -> [1, 2, 3, 0, 0]` | -| array_sort(array, desc, null_first) | Returns sorted array. `array_sort([3, 1, 2, 5, 4]) -> [1, 2, 3, 4, 5]` | -| cardinality(array/map) | Returns the total number of elements in the array or map. `cardinality([[1, 2, 3], [4, 5, 6]]) -> 6` | -| make_array(value1, [value2 [, ...]]) | Returns an Arrow array using the specified input expressions. `make_array(1, 2, 3) -> [1, 2, 3]` | -| range(start [, stop, step]) | Returns an Arrow array between start and stop with step. `SELECT range(2, 10, 3) -> [2, 5, 8]` | -| string_to_array(array, delimiter, null_string) | Splits a `string` based on a `delimiter` and returns an array of parts. Any parts matching the optional `null_string` will be replaced with `NULL`. `string_to_array('abc#def#ghi', '#', ' ') -> ['abc', 'def', 'ghi']` | -| trim_array(array, n) | Deprecated | +| Syntax | Description | +| ---------------------------------------------- | -------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | +| array_any_value(array) | Returns the first non-null element in the array. `array_any_value([NULL, 1, 2, 3]) -> 1` | +| array_append(array, element) | Appends an element to the end of an array. `array_append([1, 2, 3], 4) -> [1, 2, 3, 4]` | +| array_concat(array[, ..., array_n]) | Concatenates arrays. `array_concat([1, 2, 3], [4, 5, 6]) -> [1, 2, 3, 4, 5, 6]` | +| array_has(array, element) | Returns true if the array contains the element `array_has([1,2,3], 1) -> true` | +| array_has_all(array, sub-array) | Returns true if all elements of sub-array exist in array `array_has_all([1,2,3], [1,3]) -> true` | +| array_has_any(array, sub-array) | Returns true if any elements exist in both arrays `array_has_any([1,2,3], [1,4]) -> true` | +| array_dims(array) | Returns an array of the array's dimensions. `array_dims([[1, 2, 3], [4, 5, 6]]) -> [2, 3]` | +| array_distinct(array) | Returns distinct values from the array after removing duplicates. `array_distinct([1, 3, 2, 3, 1, 2, 4]) -> [1, 2, 3, 4]` | +| array_element(array, index) | Extracts the element with the index n from the array `array_element([1, 2, 3, 4], 3) -> 3` | +| empty(array) | Returns true for an empty array or false for a non-empty array. `empty([1]) -> false` | +| flatten(array) | Converts an array of arrays to a flat array `flatten([[1], [2, 3], [4, 5, 6]]) -> [1, 2, 3, 4, 5, 6]` | +| array_length(array, dimension) | Returns the length of the array dimension. `array_length([1, 2, 3, 4, 5]) -> 5` | +| array_ndims(array) | Returns the number of dimensions of the array. `array_ndims([[1, 2, 3], [4, 5, 6]]) -> 2` | +| array_pop_front(array) | Returns the array without the first element. `array_pop_front([1, 2, 3]) -> [2, 3]` | +| array_pop_back(array) | Returns the array without the last element. `array_pop_back([1, 2, 3]) -> [1, 2]` | +| array_position(array, element) | Searches for an element in the array, returns first occurrence. `array_position([1, 2, 2, 3, 4], 2) -> 2` | +| array_positions(array, element) | Searches for an element in the array, returns all occurrences. `array_positions([1, 2, 2, 3, 4], 2) -> [2, 3]` | +| array_prepend(element, array) | Prepends an element to the beginning of an array. `array_prepend(1, [2, 3, 4]) -> [1, 2, 3, 4]` | +| array_repeat(element, count) | Returns an array containing element `count` times. `array_repeat(1, 3) -> [1, 1, 1]` | +| array_remove(array, element) | Removes the first element from the array equal to the given value. `NULL` elements already in the array are preserved when removing a non-`NULL` value, and `array_remove(array, NULL)` returns `NULL`. `array_remove([1, 2, NULL, 2, 4], 2) -> [1, NULL, 2, 4]` | +| array_remove_n(array, element, max) | Removes the first `max` elements from the array equal to the given value. `NULL` elements already in the array are preserved when removing a non-`NULL` value, and `array_remove_n(array, NULL, max)` returns `NULL`. `array_remove_n([1, 2, NULL, 2, 4], 2, 2) -> [1, NULL, 4]` | +| array_remove_all(array, element) | Removes all elements from the array equal to the given value. `NULL` elements already in the array are preserved when removing a non-`NULL` value, and `array_remove_all(array, NULL)` returns `NULL`. `array_remove_all([1, 2, NULL, 2, 4], 2) -> [1, NULL, 4]` | +| array_replace(array, from, to) | Replaces the first occurrence of the specified element with another specified element. `array_replace([1, 2, 2, 3, 2, 1, 4], 2, 5) -> [1, 5, 2, 3, 2, 1, 4]` | +| array_replace_n(array, from, to, max) | Replaces the first `max` occurrences of the specified element with another specified element. `array_replace_n([1, 2, 2, 3, 2, 1, 4], 2, 5, 2) -> [1, 5, 5, 3, 2, 1, 4]` | +| array_replace_all(array, from, to) | Replaces all occurrences of the specified element with another specified element. `array_replace_all([1, 2, 2, 3, 2, 1, 4], 2, 5) -> [1, 5, 5, 3, 5, 1, 4]` | +| array_slice(array, begin,end) | Returns a slice of the array. `array_slice([1, 2, 3, 4, 5, 6, 7, 8], 3, 6) -> [3, 4, 5, 6]` | +| array_slice(array, begin, end, stride) | Returns a slice of the array with added stride feature. `array_slice([1, 2, 3, 4, 5, 6, 7, 8], 3, 6, 2) -> [3, 5, 6]` | +| array_to_string(array, delimiter) | Converts each element to its text representation. `array_to_string([1, 2, 3, 4], ',') -> 1,2,3,4` | +| array_intersect(array1, array2) | Returns an array of the elements in the intersection of array1 and array2. `array_intersect([1, 2, 3, 4], [5, 6, 3, 4]) -> [3, 4]` | +| array_union(array1, array2) | Returns an array of the elements in the union of array1 and array2 without duplicates. `array_union([1, 2, 3, 4], [5, 6, 3, 4]) -> [1, 2, 3, 4, 5, 6]` | +| array_except(array1, array2) | Returns an array of the elements that appear in the first array but not in the second. `array_except([1, 2, 3, 4], [5, 6, 3, 4]) -> [1, 2]` | +| array_resize(array, size, value) | Resizes the list to contain size elements. Initializes new elements with value or empty if value is not set. `array_resize([1, 2, 3], 5, 0) -> [1, 2, 3, 0, 0]` | +| array_sort(array, desc, null_first) | Returns sorted array. `array_sort([3, 1, 2, 5, 4]) -> [1, 2, 3, 4, 5]` | +| cardinality(array/map) | Returns the total number of elements in the array or map. `cardinality([[1, 2, 3], [4, 5, 6]]) -> 6` | +| make_array(value1, [value2 [, ...]]) | Returns an Arrow array using the specified input expressions. `make_array(1, 2, 3) -> [1, 2, 3]` | +| range(start [, stop, step]) | Returns an Arrow array between start and stop with step. `SELECT range(2, 10, 3) -> [2, 5, 8]` | +| string_to_array(array, delimiter, null_string) | Splits a `string` based on a `delimiter` and returns an array of parts. Any parts matching the optional `null_string` will be replaced with `NULL`. `string_to_array('abc#def#ghi', '#', ' ') -> ['abc', 'def', 'ghi']` | +| trim_array(array, n) | Deprecated | ## Regular Expressions diff --git a/docs/source/user-guide/sql/scalar_functions.md b/docs/source/user-guide/sql/scalar_functions.md index 254151c2c20e..6b39ea263fea 100644 --- a/docs/source/user-guide/sql/scalar_functions.md +++ b/docs/source/user-guide/sql/scalar_functions.md @@ -3880,7 +3880,7 @@ _Alias of [array_prepend](#array_prepend)._ ### `array_remove` -Removes the first element from the array equal to the given value. +Removes the first element from the array equal to the given value. NULL elements already in the array are preserved when removing a non-NULL value. If `element` evaluates to NULL, the result is NULL rather than removing NULL entries. ```sql array_remove(array, element) @@ -3900,6 +3900,13 @@ array_remove(array, element) +----------------------------------------------+ | [1, 2, 3, 2, 1, 4] | +----------------------------------------------+ + +> select array_remove([1, 2, NULL, 2, 4], 2); ++---------------------------------------------------+ +| array_remove(List([1,2,NULL,2,4]),Int64(2)) | ++---------------------------------------------------+ +| [1, NULL, 2, 4] | ++---------------------------------------------------+ ``` #### Aliases @@ -3908,7 +3915,7 @@ array_remove(array, element) ### `array_remove_all` -Removes all elements from the array equal to the given value. +Removes all elements from the array equal to the given value. NULL elements already in the array are preserved when removing a non-NULL value. If `element` evaluates to NULL, the result is NULL rather than removing NULL entries. ```sql array_remove_all(array, element) @@ -3928,6 +3935,13 @@ array_remove_all(array, element) +--------------------------------------------------+ | [1, 3, 1, 4] | +--------------------------------------------------+ + +> select array_remove_all([1, 2, NULL, 2, 4], 2); ++-----------------------------------------------------+ +| array_remove_all(List([1,2,NULL,2,4]),Int64(2)) | ++-----------------------------------------------------+ +| [1, NULL, 4] | ++-----------------------------------------------------+ ``` #### Aliases @@ -3936,10 +3950,10 @@ array_remove_all(array, element) ### `array_remove_n` -Removes the first `max` elements from the array equal to the given value. +Removes the first `max` elements from the array equal to the given value. NULL elements already in the array are preserved when removing a non-NULL value. If `element` evaluates to NULL, the result is NULL rather than removing NULL entries. ```sql -array_remove_n(array, element, max)) +array_remove_n(array, element, max) ``` #### Arguments @@ -3957,6 +3971,13 @@ array_remove_n(array, element, max)) +---------------------------------------------------------+ | [1, 3, 2, 1, 4] | +---------------------------------------------------------+ + +> select array_remove_n([1, 2, NULL, 2, 4], 2, 2); ++----------------------------------------------------------+ +| array_remove_n(List([1,2,NULL,2,4]),Int64(2),Int64(2)) | ++----------------------------------------------------------+ +| [1, NULL, 4] | ++----------------------------------------------------------+ ``` #### Aliases