Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
10 changes: 6 additions & 4 deletions be/src/vec/exprs/lambda_function/varray_map_function.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -134,8 +134,9 @@ class ArrayMapFunction : public LambdaFunction {
auto type_array = array_column_type_name.type;
if (type_array->is_nullable()) {
// get the nullmap of nullable column
const auto& column_array_nullmap =
assert_cast<const ColumnNullable&>(*column_array).get_null_map_column();
// hold the null column instead of a reference 'cause `column_array` will be assigned and freed below.
auto column_array_nullmap =
assert_cast<const ColumnNullable&>(*column_array).get_null_map_column_ptr();

// get the array column from nullable column
column_array = assert_cast<const ColumnNullable*>(column_array.get())
Expand All @@ -146,8 +147,9 @@ class ArrayMapFunction : public LambdaFunction {
->get_nested_type();

// need to union nullmap from all columns
VectorizedUtils::update_null_map(outside_null_map->get_data(),
column_array_nullmap.get_data());
VectorizedUtils::update_null_map(
outside_null_map->get_data(),
assert_cast<const ColumnUInt8&>(*column_array_nullmap).get_data());
}

// here is the array column
Expand Down
5 changes: 4 additions & 1 deletion be/src/vec/functions/array/function_array_element.h
Original file line number Diff line number Diff line change
Expand Up @@ -103,6 +103,9 @@ class FunctionArrayElement : public IFunction {
UInt8* dst_null_map = dst_null_column->get_data().data();
const UInt8* src_null_map = nullptr;
ColumnsWithTypeAndName args;
block.replace_by_position(
arguments[0],
block.get_by_position(arguments[0]).column->convert_to_full_column_if_const());
auto col_left = block.get_by_position(arguments[0]);
if (col_left.column->is_nullable()) {
auto null_col = check_and_get_column<ColumnNullable>(*col_left.column);
Expand Down Expand Up @@ -332,7 +335,7 @@ class FunctionArrayElement : public IFunction {
const UInt8* src_null_map, UInt8* dst_null_map) const {
// check array nested column type and get data
auto left_column = arguments[0].column->convert_to_full_column_if_const();
const auto& array_column = reinterpret_cast<const ColumnArray&>(*left_column);
const auto& array_column = assert_cast<const ColumnArray&>(*left_column);
const auto& offsets = array_column.get_offsets();
DCHECK(offsets.size() == input_rows_count);
const UInt8* nested_null_map = nullptr;
Expand Down
3 changes: 3 additions & 0 deletions regression-test/data/function_p0/test_array_map.out
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
-- This file is automatically generated. You should know what you did if you want to edit this
-- !sql --

232 changes: 232 additions & 0 deletions regression-test/suites/function_p0/test_array_map.groovy
Original file line number Diff line number Diff line change
@@ -0,0 +1,232 @@
// Licensed to the Apache Software Foundation (ASF) under one
// or more contributor license agreements. See the NOTICE file
// distributed with this work for additional information
// regarding copyright ownership. The ASF licenses this file
// to you under the Apache License, Version 2.0 (the
// "License"); you may not use this file except in compliance
// with the License. You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing,
// software distributed under the License is distributed on an
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
// KIND, either express or implied. See the License for the
// specific language governing permissions and limitations
// under the License.

suite("test_array_map") {
sql """
drop table if exists mock_table;
"""

sql """
CREATE ALIAS FUNCTION clean_html_entity_test(string) WITH PARAMETER(html) AS
REPLACE(
REPLACE(
REPLACE(
REPLACE(
REPLACE(
REPLACE(
REPLACE(
REPLACE(
REPLACE(
REPLACE(
REPLACE(html, '&amp;', '&'),
'&lt;', '<'
),
'&gt;', '>'
),
'&quot;', '"'
),
'&apos;', '\\\''
),'&euro;', '€'
),
'&nbsp;', ' '
), "Ⅰ", "I"), "Ⅱ", "II"), "Ⅲ", "III"),".", ". ");
"""
sql """ CREATE ALIAS FUNCTION clean_html_tag_test(string) WITH PARAMETER(html) AS REGEXP_REPLACE(html, '</?[^>]+>', ''); """
sql """
CREATE TABLE `mock_table` (
`aa` varchar(255) NULL,
`ab` varchar(255) NULL,
`ac` varchar(255) NULL,
`ad` text NULL,
`ae` text NULL,
`af` text NULL,
`ag` text NULL,
`ah` text NULL,
`ai` text NULL,
`aj` varchar(255) NULL,
`ak` text NULL,
`al` text NULL,
`am` text NULL,
`an` text NULL,
`ao` text NULL,
`ap` text NULL,
`aq` text NULL,
`ar` text NULL,
`as` text NULL,
`at` text NULL,
`au` text NULL,
`av` bigint NULL,
`aw` text NULL,
`ax` varchar(255) NULL,
`ay` text NULL,
`az` varchar(255) NULL,
`ba` varchar(255) NULL,
`bb` varchar(255) NULL,
`bc` int NULL,
`bd` int NULL,
`be` varchar(255) NULL,
`bf` varchar(255) NULL,
`bg` array<varchar(255)> NULL,
`bh` json NULL,
`bi` varchar(255) NULL,
`bj` varchar(255) NULL,
`bk` array<varchar(255)> NULL,
`bl` boolean NULL,
INDEX idx_ag (`ag`) USING INVERTED PROPERTIES("support_phrase" = "true", "parser" = "unicode", "lower_case" = "true"),
INDEX idx_ad (`ad`) USING INVERTED PROPERTIES("support_phrase" = "true", "parser" = "unicode", "lower_case" = "true"),
INDEX idx_ah (`ah`) USING INVERTED PROPERTIES("support_phrase" = "true", "parser" = "unicode", "lower_case" = "true"),
INDEX idx_ac (`ac`) USING INVERTED PROPERTIES("support_phrase" = "true", "parser" = "unicode", "lower_case" = "true"),
INDEX idx_ak (`ak`) USING INVERTED PROPERTIES("support_phrase" = "true", "parser" = "unicode", "lower_case" = "true"),
INDEX idx_al (`al`) USING INVERTED PROPERTIES("support_phrase" = "true", "parser" = "unicode", "lower_case" = "true"),
INDEX idx_am (`am`) USING INVERTED PROPERTIES("support_phrase" = "true", "parser" = "unicode", "lower_case" = "true"),
INDEX idx_ag_ngrambf (`ag`) USING NGRAM_BF PROPERTIES("bf_size" = "256", "gram_size" = "2"),
INDEX idx_ad_ngrambf (`ad`) USING NGRAM_BF PROPERTIES("bf_size" = "256", "gram_size" = "2"),
INDEX idx_ac_ngrambf (`ac`) USING NGRAM_BF PROPERTIES("bf_size" = "256", "gram_size" = "2"),
INDEX idx_ah_ngrambf (`ah`) USING NGRAM_BF PROPERTIES("bf_size" = "256", "gram_size" = "2"),
INDEX idx_bi (`bi`) USING INVERTED,
INDEX idx_ar (`ar`) USING INVERTED PROPERTIES("support_phrase" = "true", "parser" = "unicode", "lower_case" = "true"),
INDEX idx_ar_ngrambf (`ar`) USING NGRAM_BF PROPERTIES("bf_size" = "256", "gram_size" = "2"),
INDEX idx_bl (`bl`) USING INVERTED
) ENGINE=OLAP
UNIQUE KEY(`aa`)
DISTRIBUTED BY HASH(`aa`) BUCKETS 16
PROPERTIES (
"replication_allocation" = "tag.location.default: 1",
"min_load_replica_num" = "-1",
"is_being_synced" = "false",
"storage_medium" = "hdd",
"storage_format" = "V2",
"inverted_index_storage_format" = "V1",
"enable_unique_key_merge_on_write" = "true",
"light_schema_change" = "true",
"disable_auto_compaction" = "false",
"enable_single_replica_compaction" = "false",
"group_commit_interval_ms" = "10000",
"group_commit_data_bytes" = "134217728",
"enable_mow_light_delete" = "false"
);
"""
sql """
CREATE VIEW `mock_view` AS
WITH
bm AS (SELECT
`aa`, `ab`, `ac`, `ad`, `ae`, `af`, `ag`, `ah`, `ai`, `aj`, `ak`, `al`, `am`, `an`, `ao`, `ap`, `aq`, `ar`, `as`, `at`, `au`, `av`, `aw`, `ax`, `ay`, `az`, `ba`, `bb`, `bc`, `bd`, `be`, `bf`, `bg`, `bh`, `bi`, `bj`, `bk`, `bl`,
CASE WHEN YEAR(`as`) >= 1970 THEN `as` ELSE NULL END as `bn`,
CASE WHEN YEAR(`au`) >= 1970 THEN `au` ELSE NULL END as `bo`,
CASE WHEN YEAR(`at`) >= 1970 THEN `at` ELSE NULL END as `bp`,
LENGTH(`aw`) as `bq`,
TRIM(`clean_html_entity_test`(`clean_html_tag_test`(`ah`))) as `br`,
TRIM(`clean_html_entity_test`(`clean_html_tag_test`(`ad`))) as `bs`,
ARRAY_MAP(x-> if(least((left(x, 5) = '6841-'), (length(x) = 10)), concat_ws('-', substring(x, 1, 7), substring(x, 8)), if(least((left(x, 5) = '6841-'), (length(x) = 9)), concat_ws('-', substring(x, 1, 6), substring(x, 7)), x)), `bk`) as `bt`,
ARRAY_JOIN(TOKENIZE(TRIM(`clean_html_entity_test`(`clean_html_tag_test`(`ad`))),'"parser"="unicode", "lower_case"="false", "stopwords"="none"'), " ") as `bu`,
ARRAY_JOIN(TOKENIZE(TRIM(`clean_html_entity_test`(`clean_html_tag_test`(`ah`))),'"parser"="unicode", "lower_case"="false", "stopwords"="none"'), " ") as `bv`
FROM mock_table),
bw AS (SELECT
`aa`, `ab`, `ac`, `ad`, `ae`, `af`, `ag`, `ah`, `ai`, `aj`, `ak`, `al`, `am`, `an`, `ao`, `ap`, `aq`, `ar`, `as`, `at`, `au`, `av`, `aw`, `ax`, `ay`, `az`, `ba`, `bb`, `bc`, `bd`, `be`, `bf`, `bg`, `bh`, `bi`, `bj`, `bk`, `bl`, `bn`, `bo`, `bp`, `bq`, `br`, `bs`, `bt`, `bu`, `bv`,
CASE
WHEN LENGTH(`bn`) = 10 THEN STR_TO_DATE(`bn`, 'yyyy-MM-dd')
WHEN LENGTH(`bn`) = 19 THEN STR_TO_DATE(`bn`, 'yyyy-MM-dd HH:mm:ss')
WHEN LENGTH(`bn`) = 26 THEN STR_TO_DATE(`bn`, 'yyyy-MM-dd HH:mm:ss.SSSSSS')
ELSE NULL
END AS `bx`,
CASE
WHEN LENGTH(`bo`) = 10 THEN STR_TO_DATE(`bo`, 'yyyy-MM-dd')
WHEN LENGTH(`bo`) = 19 THEN STR_TO_DATE(`bo`, 'yyyy-MM-dd HH:mm:ss')
WHEN LENGTH(`bo`) = 26 THEN STR_TO_DATE(`bo`, 'yyyy-MM-dd HH:mm:ss.SSSSSS')
ELSE NULL
END AS `by`,
CASE
WHEN LENGTH(`bp`) = 10 THEN STR_TO_DATE(`bp`, 'yyyy-MM-dd')
WHEN LENGTH(`bp`) = 19 THEN STR_TO_DATE(`bp`, 'yyyy-MM-dd HH:mm:ss')
WHEN LENGTH(`bp`) = 26 THEN STR_TO_DATE(`bp`, 'yyyy-MM-dd HH:mm:ss.SSSSSS')
ELSE NULL
END AS `bz`,
ARRAY_REMOVE(
ARRAY_COMPACT(
ARRAY_UNION(
ARRAY_MAP(x-> ARRAY_JOIN(ARRAY_SLICE(split_by_string(x, '-'), 1, size(split_by_string(x, '-')) -1), '-'), `bt`),
ARRAY_MAP(x-> ARRAY_JOIN(ARRAY_SLICE(split_by_string(x, '-'), 1, size(split_by_string(x, '-')) -2), '-'), `bt`),
ARRAY_MAP(x-> ARRAY_JOIN(ARRAY_SLICE(split_by_string(x, '-'), 1, size(split_by_string(x, '-')) -3), '-'), `bt`))), '') as `ca`,
SPLIT_BY_STRING(MASK(`bu`, '*', '*', '*'), ' ') as `cb`,
SPLIT_BY_STRING(`bu`, ' ') as `cc`,
array_first_index(x-> locate('*', x ) = 0, SPLIT_BY_STRING(MASK(`bu`, '*', '*', '*'), ' ')) as `cd`,
array_last_index(x-> locate('*', x ) = 0, SPLIT_BY_STRING(MASK(`bu`, '*', '*', '*'), ' ')) as `ce`,
SPLIT_BY_STRING(MASK(`bv`, '*', '*', '*'), ' ') as `cf`,
SPLIT_BY_STRING(`bv`, ' ') as `cg`,
array_first_index(x-> locate('*', x ) = 0, SPLIT_BY_STRING(MASK(`bv`, '*', '*', '*'), ' ')) as `ch`,
array_last_index(x-> locate('*', x ) = 0, SPLIT_BY_STRING(MASK(`bv`, '*', '*', '*'), ' ')) as `ci`
FROM bm),
cj AS (SELECT
`aa`, `ab`, `ac`, `ad`, `ae`, `af`, `ag`, `ah`, `ai`, `aj`, `ak`, `al`, `am`, `an`, `ao`, `ap`, `aq`, `ar`, `as`, `at`, `au`, `av`, `aw`, `ax`, `ay`, `az`, `ba`, `bb`, `bc`, `bd`, `be`, `bf`, `bg`, `bh`, `bi`, `bj`, `bk`, `bl`, `bn`, `bo`, `bp`, `bq`, `br`, `bs`, `bt`, `bu`, `bv`, `bx`, `by`, `bz`, `ca`, `cb`, `cc`, `cd`, `ce`, `cf`, `cg`, `ch`, `ci`,
ARRAY_COMPACT(ARRAY_EXCEPT(`bt`, `ca`)) as `ck`,
ARRAY_COMPACT(ARRAY_UNION(`bt`, `ca`)) as `cl`,
CASE
WHEN SIZE(`cc`) = 0 THEN `bs`
WHEN `cd`=1 AND `ce` < size(`cb`) and `ce` - `cd` > 1 THEN ARRAY_JOIN(ARRAY_SLICE(`cc`, 1, `ce`), " ")
WHEN `cd`=2 AND `ce` < size(`cb`) and `ce` - `cd` > 1 THEN ARRAY_JOIN(ARRAY_SLICE(`cc`, 1, `ce`), " ")
WHEN `cd` >2 AND `ce` = size(`cb`) and `ce` - `cd` > 1 THEN
CASE
WHEN element_at(`cc`, 1) = element_at(`cc`, `cd`-1) THEN ARRAY_JOIN(ARRAY_SLICE(`cc`, `cd`-1), "")
ELSE ARRAY_JOIN(ARRAY_SLICE(`cc`, `cd`), " ")
END
ELSE ARRAY_JOIN(`cc`, " ")
END AS `cm`,
CASE
WHEN size(`cc`) = 0 THEN "tokenize_failed"
WHEN `cd` = 0 THEN "en"
WHEN `cd`=1 AND `ce` = size(`cb`) THEN "zh"
WHEN `cd`=1 AND `ce` < size(`cb`) THEN "zh_en"
WHEN `cd`=2 AND `ce` < size(`cb`) THEN "zh_en"
WHEN `cd` >2 AND `ce` = size(`cb`) THEN "en_zh"
ELSE "mixed"
END AS `cn`,
CASE
WHEN SIZE(`cg`) = 0 THEN `br`
WHEN `ch`=1 AND `ci` < size(`cf`) and `ci` - `ch` > 1 THEN ARRAY_JOIN(ARRAY_SLICE(`cg`, 1, `ci`), " ")
WHEN `ch`=2 AND `ci` < size(`cf`) and `ci` - `ch` > 1 THEN ARRAY_JOIN(ARRAY_SLICE(`cg`, 1, `ci`), " ")
WHEN `ch` >2 AND `ci` = size(`cf`) and `ci` - `ch` > 1 THEN
CASE
WHEN element_at(`cg`, 1) = element_at(`cg`, `ch`-1) THEN ARRAY_JOIN(ARRAY_SLICE(`cg`, `ch`-1), "")
ELSE ARRAY_JOIN(ARRAY_SLICE(`cg`, `ch`), " ")
END
ELSE ARRAY_JOIN(`cg`, " ")
END AS `co`,
CASE
WHEN size(`cg`) = 0 THEN "tokenize_failed"
WHEN `ch` = 0 THEN "en"
WHEN `ch`=1 AND `ci` = size(`cf`) THEN "zh"
WHEN `ch`=1 AND `ci` < size(`cf`) THEN "zh_en"
WHEN `ch`=2 AND `ci` < size(`cf`) THEN "zh_en"
WHEN `ch` >2 AND `ci` = size(`cf`) THEN "en_zh"
ELSE "mixed"
END AS `cp`
FROM bw)
SELECT * FROM cj;
"""
sql """
insert into mock_table(aa, ab,ac,ad) values('1','2','3','4');
"""


/*
FIXME
qt_sql """
SELECT * FROM mock_view LIMIT 530000,1000;
"""
*/

}
Loading