Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 2 additions & 1 deletion be/src/vec/exec/format/parquet/vparquet_group_reader.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -146,7 +146,8 @@ Status RowGroupReader::init(
const string& predicate_col_name = predicate_col_names[i];
int slot_id = predicate_col_slot_ids[i];
auto field = const_cast<FieldSchema*>(schema.get_column(predicate_col_name));
if (_can_filter_by_dict(slot_id,
if (!_lazy_read_ctx.has_complex_type &&
_can_filter_by_dict(slot_id,
_row_group_meta.columns[field->physical_column_index].meta_data)) {
_dict_filter_cols.emplace_back(std::make_pair(predicate_col_name, slot_id));
} else {
Expand Down
2 changes: 2 additions & 0 deletions be/src/vec/exec/format/parquet/vparquet_group_reader.h
Original file line number Diff line number Diff line change
Expand Up @@ -97,6 +97,8 @@ class RowGroupReader {
std::unordered_map<std::string, VExprContextSPtr> predicate_missing_columns;
// lazy read missing columns or all missing columns
std::unordered_map<std::string, VExprContextSPtr> missing_columns;
// should turn off filtering by page index, lazy read and dict filter if having complex type
bool has_complex_type = false;
};

/**
Expand Down
6 changes: 3 additions & 3 deletions be/src/vec/exec/format/parquet/vparquet_reader.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -416,7 +416,7 @@ Status ParquetReader::set_fill_columns(
_lazy_read_ctx.all_read_columns.emplace_back(read_col);
PrimitiveType column_type = schema.get_column(read_col)->type.type;
if (column_type == TYPE_ARRAY || column_type == TYPE_MAP || column_type == TYPE_STRUCT) {
_has_complex_type = true;
_lazy_read_ctx.has_complex_type = true;
}
if (predicate_columns.size() > 0) {
auto iter = predicate_columns.find(read_col);
Expand Down Expand Up @@ -450,7 +450,7 @@ Status ParquetReader::set_fill_columns(
}
}

if (!_has_complex_type && _enable_lazy_mat &&
if (!_lazy_read_ctx.has_complex_type && _enable_lazy_mat &&
_lazy_read_ctx.predicate_columns.first.size() > 0 &&
_lazy_read_ctx.lazy_read_columns.size() > 0) {
_lazy_read_ctx.can_lazy_read = true;
Expand Down Expand Up @@ -736,7 +736,7 @@ Status ParquetReader::_process_page_index(const tparquet::RowGroup& row_group,
_statistics.read_rows += row_group.num_rows;
};

if (_has_complex_type || _lazy_read_ctx.conjuncts.empty() ||
if (_lazy_read_ctx.has_complex_type || _lazy_read_ctx.conjuncts.empty() ||
_colname_to_value_range == nullptr || _colname_to_value_range->empty()) {
read_whole_row_group();
return Status::OK();
Expand Down
2 changes: 0 additions & 2 deletions be/src/vec/exec/format/parquet/vparquet_reader.h
Original file line number Diff line number Diff line change
Expand Up @@ -239,8 +239,6 @@ class ParquetReader : public GenericReader {
RowRange _whole_range = RowRange(0, 0);
const std::vector<int64_t>* _delete_rows = nullptr;
int64_t _delete_rows_index = 0;
// should turn off filtering by page index and lazy read if having complex type
bool _has_complex_type = false;

// Used for column lazy read.
RowGroupReader::LazyReadContext _lazy_read_ctx;
Expand Down
34 changes: 29 additions & 5 deletions regression-test/data/external_table_p2/hive/test_complex_types.out

Large diffs are not rendered by default.

Original file line number Diff line number Diff line change
Expand Up @@ -51,6 +51,22 @@ suite("test_complex_types", "p2,external,hive,external_remote,external_remote_hi

qt_array_last """select max(array_last(i -> i > 0, capacity)) from byd where array_last(i -> i > 0, capacity) < 0.99"""

qt_null_struct_element_orc """select count(struct_element(favor, 'tip')) from byd where id % 13 = 0"""

qt_map_key_select_orc """select id, singles["p0X72J-mkMe40O-vOa-opfI"] as map_key from byd where singles["p0X72J-mkMe40O-vOa-opfI"] is not null"""

qt_map_keys_orc """select map_keys(singles) from byd where id = 1077"""

qt_map_values_orc """select map_values(singles) from byd where id = 1433"""

qt_map_contains_key_orc """select * from byd where map_contains_key(singles, 'B0mXFX-QvgUgo7-Dih-6rDu') = 1"""

qt_array_max_orc """select count(array_max(capacity)) from byd where array_max(capacity) > 0.99"""

qt_array_filter_orc """select count(array_size(array_filter(i -> (i > 0.99), capacity))) from byd where array_size(array_filter(i -> (i > 0.99), capacity))"""

qt_array_last_orc """select max(array_last(i -> i > 0, capacity)) from byd where array_last(i -> i > 0, capacity) < 0.99"""

qt_offsets_check """select * from complex_offsets_check order by id"""

qt_map_with_nullable_key """select * from parquet_all_types limit 1"""
Expand Down