Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
24 changes: 17 additions & 7 deletions be/src/vec/exec/format/orc/vorc_reader.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -586,6 +586,10 @@ std::tuple<bool, orc::Literal, orc::PredicateDataType> OrcReader::_make_orc_lite
// only get the predicate_type
return std::make_tuple(true, orc::Literal(true), predicate_type);
}
// this only happens when the literals of in_predicate contains null value, like in (1, null)
if (literal->get_column_ptr()->is_null_at(0)) {
return std::make_tuple(false, orc::Literal(false), predicate_type);
}
auto literal_data = literal->get_column_ptr()->get_data_at(0);
auto* slot = _tuple_descriptor->slots()[slot_ref->column_id()];
auto slot_type = slot->type();
Expand Down Expand Up @@ -666,12 +670,13 @@ bool OrcReader::_check_rest_children_can_push_down(const VExprSPtr& expr) {
return false;
}

bool at_least_one_child_can_push_down = false;
for (size_t i = 1; i < expr->children().size(); ++i) {
if (!_check_literal_can_push_down(expr, i)) {
return false;
if (_check_literal_can_push_down(expr, i)) {
at_least_one_child_can_push_down = true;
}
}
return true;
return at_least_one_child_can_push_down;
}

// check if the expr can be pushed down to orc reader
Expand Down Expand Up @@ -774,12 +779,17 @@ void OrcReader::_build_filter_in(const VExprSPtr& expr,
for (size_t i = 1; i < expr->children().size(); ++i) {
DCHECK(expr->children()[i]->is_literal());
const auto* literal = static_cast<const VLiteral*>(expr->children()[i].get());
DCHECK(_vliteral_to_orc_literal.contains(literal));
auto orc_literal = _vliteral_to_orc_literal.find(literal)->second;
literals.emplace_back(orc_literal);
if (_vliteral_to_orc_literal.contains(literal)) {
auto orc_literal = _vliteral_to_orc_literal.find(literal)->second;
literals.emplace_back(orc_literal);
}
}
DCHECK(!literals.empty());
builder->in(slot_ref->expr_name(), predicate_type, literals);
if (literals.size() == 1) {
builder->equals(slot_ref->expr_name(), predicate_type, literals[0]);
} else {
builder->in(slot_ref->expr_name(), predicate_type, literals);
}
}

void OrcReader::_build_is_null(const VExprSPtr& expr,
Expand Down
3 changes: 3 additions & 0 deletions be/test/vec/exec/orc_reader_test.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -111,6 +111,8 @@ TEST_F(OrcReaderTest, test_build_search_argument) {
R"|({"1":{"lst":["rec",11,{"1":{"i32":6},"2":{"rec":{"1":{"lst":["rec",1,{"1":{"i32":0},"2":{"rec":{"1":{"i32":2}}}}]},"3":{"i64":-1}}},"3":{"i32":3},"4":{"i32":2},"20":{"i32":-1},"29":{"tf":1}},{"1":{"i32":2},"2":{"rec":{"1":{"lst":["rec",1,{"1":{"i32":0},"2":{"rec":{"1":{"i32":2}}}}]},"3":{"i64":-1}}},"3":{"i32":11},"4":{"i32":2},"20":{"i32":-1},"26":{"rec":{"1":{"rec":{"2":{"str":"lt"}}},"2":{"i32":0},"3":{"lst":["rec",2,{"1":{"lst":["rec",1,{"1":{"i32":0},"2":{"rec":{"1":{"i32":5}}}}]},"3":{"i64":-1}},{"1":{"lst":["rec",1,{"1":{"i32":0},"2":{"rec":{"1":{"i32":5}}}}]},"3":{"i64":-1}}]},"4":{"rec":{"1":{"lst":["rec",1,{"1":{"i32":0},"2":{"rec":{"1":{"i32":2}}}}]},"3":{"i64":-1}}},"5":{"tf":0},"7":{"str":"lt(int, int)"},"11":{"i64":0},"13":{"tf":1},"14":{"tf":0},"15":{"tf":0},"16":{"i64":360}}},"28":{"i32":5},"29":{"tf":1}},{"1":{"i32":16},"2":{"rec":{"1":{"lst":["rec",1,{"1":{"i32":0},"2":{"rec":{"1":{"i32":5}}}}]},"3":{"i64":-1}}},"4":{"i32":0},"15":{"rec":{"1":{"i32":0},"2":{"i32":0},"3":{"i32":-1}}},"20":{"i32":-1},"29":{"tf":1},"36":{"str":"o_orderkey"}},{"1":{"i32":9},"2":{"rec":{"1":{"lst":["rec",1,{"1":{"i32":0},"2":{"rec":{"1":{"i32":5}}}}]},"3":{"i64":-1}}},"4":{"i32":0},"10":{"rec":{"1":{"i64":2}}},"20":{"i32":-1},"29":{"tf":0}},{"1":{"i32":6},"2":{"rec":{"1":{"lst":["rec",1,{"1":{"i32":0},"2":{"rec":{"1":{"i32":2}}}}]},"3":{"i64":-1}}},"3":{"i32":2},"4":{"i32":2},"20":{"i32":-1},"29":{"tf":1}},{"1":{"i32":2},"2":{"rec":{"1":{"lst":["rec",1,{"1":{"i32":0},"2":{"rec":{"1":{"i32":2}}}}]},"3":{"i64":-1}}},"3":{"i32":11},"4":{"i32":2},"20":{"i32":-1},"26":{"rec":{"1":{"rec":{"2":{"str":"lt"}}},"2":{"i32":0},"3":{"lst":["rec",2,{"1":{"lst":["rec",1,{"1":{"i32":0},"2":{"rec":{"1":{"i32":30},"3":{"i32":12},"4":{"i32":2}}}}]},"3":{"i64":-1}},{"1":{"lst":["rec",1,{"1":{"i32":0},"2":{"rec":{"1":{"i32":30},"3":{"i32":12},"4":{"i32":2}}}}]},"3":{"i64":-1}}]},"4":{"rec":{"1":{"lst":["rec",1,{"1":{"i32":0},"2":{"rec":{"1":{"i32":2}}}}]},"3":{"i64":-1}}},"5":{"tf":0},"7":{"str":"lt(decimalv3(12,2), decimalv3(12,2))"},"11":{"i64":0},"13":{"tf":1},"14":{"tf":0},"15":{"tf":0},"16":{"i64":360}}},"28":{"i32":30},"29":{"tf":1}},{"1":{"i32":16},"2":{"rec":{"1":{"lst":["rec",1,{"1":{"i32":0},"2":{"rec":{"1":{"i32":30},"3":{"i32":12},"4":{"i32":2}}}}]},"3":{"i64":-1}}},"4":{"i32":0},"15":{"rec":{"1":{"i32":3},"2":{"i32":0},"3":{"i32":-1}}},"20":{"i32":-1},"29":{"tf":1},"36":{"str":"o_totalprice"}},{"1":{"i32":10},"2":{"rec":{"1":{"lst":["rec",1,{"1":{"i32":0},"2":{"rec":{"1":{"i32":30},"3":{"i32":12},"4":{"i32":2}}}}]},"3":{"i64":-1}}},"4":{"i32":0},"19":{"rec":{"1":{"str":"173665.47"}}},"20":{"i32":-1},"29":{"tf":0}},{"1":{"i32":2},"2":{"rec":{"1":{"lst":["rec",1,{"1":{"i32":0},"2":{"rec":{"1":{"i32":2}}}}]},"3":{"i64":-1}}},"3":{"i32":14},"4":{"i32":2},"20":{"i32":-1},"26":{"rec":{"1":{"rec":{"2":{"str":"ge"}}},"2":{"i32":0},"3":{"lst":["rec",2,{"1":{"lst":["rec",1,{"1":{"i32":0},"2":{"rec":{"1":{"i32":5}}}}]},"3":{"i64":-1}},{"1":{"lst":["rec",1,{"1":{"i32":0},"2":{"rec":{"1":{"i32":5}}}}]},"3":{"i64":-1}}]},"4":{"rec":{"1":{"lst":["rec",1,{"1":{"i32":0},"2":{"rec":{"1":{"i32":2}}}}]},"3":{"i64":-1}}},"5":{"tf":0},"7":{"str":"ge(int, int)"},"11":{"i64":0},"13":{"tf":1},"14":{"tf":0},"15":{"tf":0},"16":{"i64":360}}},"28":{"i32":5},"29":{"tf":1}},{"1":{"i32":16},"2":{"rec":{"1":{"lst":["rec",1,{"1":{"i32":0},"2":{"rec":{"1":{"i32":5}}}}]},"3":{"i64":-1}}},"4":{"i32":0},"15":{"rec":{"1":{"i32":1},"2":{"i32":0},"3":{"i32":-1}}},"20":{"i32":-1},"29":{"tf":1},"36":{"str":"o_custkey"}},{"1":{"i32":9},"2":{"rec":{"1":{"lst":["rec",1,{"1":{"i32":0},"2":{"rec":{"1":{"i32":5}}}}]},"3":{"i64":-1}}},"4":{"i32":0},"10":{"rec":{"1":{"i64":36901}}},"20":{"i32":-1},"29":{"tf":0}}]}})|",
// select count(o_orderkey) from tpch1_orc.orders where o_orderkey < 1 + 1;
R"|({"1":{"lst":["rec",3,{"1":{"i32":2},"2":{"rec":{"1":{"lst":["rec",1,{"1":{"i32":0},"2":{"rec":{"1":{"i32":2}}}}]},"3":{"i64":-1}}},"3":{"i32":11},"4":{"i32":2},"20":{"i32":-1},"26":{"rec":{"1":{"rec":{"2":{"str":"lt"}}},"2":{"i32":0},"3":{"lst":["rec",2,{"1":{"lst":["rec",1,{"1":{"i32":0},"2":{"rec":{"1":{"i32":5}}}}]},"3":{"i64":-1}},{"1":{"lst":["rec",1,{"1":{"i32":0},"2":{"rec":{"1":{"i32":5}}}}]},"3":{"i64":-1}}]},"4":{"rec":{"1":{"lst":["rec",1,{"1":{"i32":0},"2":{"rec":{"1":{"i32":2}}}}]},"3":{"i64":-1}}},"5":{"tf":0},"7":{"str":"lt(int, int)"},"11":{"i64":0},"13":{"tf":1},"14":{"tf":0},"15":{"tf":0},"16":{"i64":360}}},"28":{"i32":5},"29":{"tf":1}},{"1":{"i32":16},"2":{"rec":{"1":{"lst":["rec",1,{"1":{"i32":0},"2":{"rec":{"1":{"i32":5}}}}]},"3":{"i64":-1}}},"4":{"i32":0},"15":{"rec":{"1":{"i32":0},"2":{"i32":0},"3":{"i32":-1}}},"20":{"i32":-1},"29":{"tf":1},"36":{"str":"o_orderkey"}},{"1":{"i32":9},"2":{"rec":{"1":{"lst":["rec",1,{"1":{"i32":0},"2":{"rec":{"1":{"i32":5}}}}]},"3":{"i64":-1}}},"4":{"i32":0},"10":{"rec":{"1":{"i64":2}}},"20":{"i32":-1},"29":{"tf":0}}]}})|",
// select count(o_orderkey) from tpch1_orc.orders where o_orderkey in (null, 25);
R"|({"1":{"lst":["rec",4,{"1":{"i32":11},"2":{"rec":{"1":{"lst":["rec",1,{"1":{"i32":0},"2":{"rec":{"1":{"i32":2}}}}]},"3":{"i64":-1}}},"3":{"i32":5},"4":{"i32":3},"11":{"rec":{"1":{"tf":0}}},"20":{"i32":-1},"29":{"tf":1}},{"1":{"i32":16},"2":{"rec":{"1":{"lst":["rec",1,{"1":{"i32":0},"2":{"rec":{"1":{"i32":5}}}}]},"3":{"i64":-1}}},"4":{"i32":0},"15":{"rec":{"1":{"i32":0},"2":{"i32":0},"3":{"i32":-1}}},"20":{"i32":-1},"29":{"tf":1},"36":{"str":"o_orderkey"}},{"1":{"i32":15},"2":{"rec":{"1":{"lst":["rec",1,{"1":{"i32":0},"2":{"rec":{"1":{"i32":5}}}}]},"3":{"i64":-1}}},"4":{"i32":0},"20":{"i32":-1},"29":{"tf":1}},{"1":{"i32":9},"2":{"rec":{"1":{"lst":["rec",1,{"1":{"i32":0},"2":{"rec":{"1":{"i32":5}}}}]},"3":{"i64":-1}}},"4":{"i32":0},"10":{"rec":{"1":{"i64":25}}},"20":{"i32":-1},"29":{"tf":0}}]}})|",
// SELECT count(o_orderkey) FROM tpch1_orc.orders WHERE o_comment LIKE '%delayed%' OR o_orderpriority = '1-URGENT';
R"|({"1":{"lst":["rec",7,{"1":{"i32":6},"2":{"rec":{"1":{"lst":["rec",1,{"1":{"i32":0},"2":{"rec":{"1":{"i32":2}}}}]},"3":{"i64":-1}}},"3":{"i32":3},"4":{"i32":2},"20":{"i32":-1},"29":{"tf":1}},{"1":{"i32":20},"2":{"rec":{"1":{"lst":["rec",1,{"1":{"i32":0},"2":{"rec":{"1":{"i32":2}}}}]},"3":{"i64":-1}}},"4":{"i32":2},"20":{"i32":-1},"26":{"rec":{"1":{"rec":{"2":{"str":"like"}}},"2":{"i32":0},"3":{"lst":["rec",2,{"1":{"lst":["rec",1,{"1":{"i32":0},"2":{"rec":{"1":{"i32":23},"2":{"i32":2147483643}}}}]},"3":{"i64":-1}},{"1":{"lst":["rec",1,{"1":{"i32":0},"2":{"rec":{"1":{"i32":15},"2":{"i32":9}}}}]},"3":{"i64":9}}]},"4":{"rec":{"1":{"lst":["rec",1,{"1":{"i32":0},"2":{"rec":{"1":{"i32":2}}}}]},"3":{"i64":-1}}},"5":{"tf":0},"7":{"str":"like(text, varchar(9))"},"9":{"rec":{"1":{"str":""}}},"11":{"i64":0},"13":{"tf":1},"14":{"tf":0},"15":{"tf":0},"16":{"i64":360}}},"29":{"tf":1}},{"1":{"i32":16},"2":{"rec":{"1":{"lst":["rec",1,{"1":{"i32":0},"2":{"rec":{"1":{"i32":23},"2":{"i32":2147483643}}}}]},"3":{"i64":-1}}},"4":{"i32":0},"15":{"rec":{"1":{"i32":8},"2":{"i32":0},"3":{"i32":-1}}},"20":{"i32":-1},"29":{"tf":1},"36":{"str":"o_comment"}},{"1":{"i32":17},"2":{"rec":{"1":{"lst":["rec",1,{"1":{"i32":0},"2":{"rec":{"1":{"i32":15},"2":{"i32":65533}}}}]},"3":{"i64":-1}}},"4":{"i32":0},"16":{"rec":{"1":{"str":"%delayed%"}}},"20":{"i32":-1},"29":{"tf":0}},{"1":{"i32":2},"2":{"rec":{"1":{"lst":["rec",1,{"1":{"i32":0},"2":{"rec":{"1":{"i32":2}}}}]},"3":{"i64":-1}}},"3":{"i32":9},"4":{"i32":2},"20":{"i32":-1},"26":{"rec":{"1":{"rec":{"2":{"str":"eq"}}},"2":{"i32":0},"3":{"lst":["rec",2,{"1":{"lst":["rec",1,{"1":{"i32":0},"2":{"rec":{"1":{"i32":23},"2":{"i32":2147483643}}}}]},"3":{"i64":-1}},{"1":{"lst":["rec",1,{"1":{"i32":0},"2":{"rec":{"1":{"i32":15},"2":{"i32":65533}}}}]},"3":{"i64":-1}}]},"4":{"rec":{"1":{"lst":["rec",1,{"1":{"i32":0},"2":{"rec":{"1":{"i32":2}}}}]},"3":{"i64":-1}}},"5":{"tf":0},"7":{"str":"eq(text, varchar(65533))"},"11":{"i64":0},"13":{"tf":1},"14":{"tf":0},"15":{"tf":0},"16":{"i64":360}}},"28":{"i32":23},"29":{"tf":1}},{"1":{"i32":16},"2":{"rec":{"1":{"lst":["rec",1,{"1":{"i32":0},"2":{"rec":{"1":{"i32":23},"2":{"i32":2147483643}}}}]},"3":{"i64":-1}}},"4":{"i32":0},"15":{"rec":{"1":{"i32":5},"2":{"i32":0},"3":{"i32":-1}}},"20":{"i32":-1},"29":{"tf":1},"36":{"str":"o_orderpriority"}},{"1":{"i32":17},"2":{"rec":{"1":{"lst":["rec",1,{"1":{"i32":0},"2":{"rec":{"1":{"i32":15},"2":{"i32":65533}}}}]},"3":{"i64":-1}}},"4":{"i32":0},"16":{"rec":{"1":{"str":"1-URGENT"}}},"20":{"i32":-1},"29":{"tf":0}}]}})|",
// select count(o_orderkey) from tpch1_orc.orders where o_orderkey between 1 and 100 or random() > 0.5;
Expand Down Expand Up @@ -143,6 +145,7 @@ TEST_F(OrcReaderTest, test_build_search_argument) {
"leaf-0 = (o_orderkey < 2), leaf-1 = (o_custkey < 36901), expr = (or leaf-0 (not "
"leaf-1))",
"leaf-0 = (o_orderkey < 2), expr = leaf-0",
"leaf-0 = (o_orderkey = 25), expr = leaf-0",
CANNOT_PUSH_DOWN_ERROR,
CANNOT_PUSH_DOWN_ERROR,
CANNOT_PUSH_DOWN_ERROR,
Expand Down
102 changes: 102 additions & 0 deletions regression-test/data/external_table_p0/hive/test_hive_orc.out
Original file line number Diff line number Diff line change
Expand Up @@ -134,6 +134,57 @@ tablets tinyint_col 179 182 182 187 183 181 177 183 177 187 183 202 202 186 528
-- !predicate_pushdown8 --
1533

-- !predicate_pushdown_in1 --
0

-- !predicate_pushdown_in2 --
1634

-- !predicate_pushdown_in3 --
1597

-- !orc_all_types_tinyint_col_is_null --
366

-- !orc_all_types_smallint_col_is_null --
359

-- !orc_all_types_int_col_is_null --
361

-- !orc_all_types_bigint_col_is_null --
377

-- !orc_all_types_boolean_col_is_null --
369

-- !orc_all_types_float_col_is_null --
340

-- !orc_all_types_double_col_is_null --
395

-- !orc_all_types_string_col_is_null --
347

-- !orc_all_types_binary_col_is_null --
362

-- !orc_all_types_timestamp_col_is_null --
339

-- !orc_all_types_decimal_col_is_null --
367

-- !orc_all_types_char_col_is_null --
0

-- !orc_all_types_varchar_col_is_null --
0

-- !orc_all_types_date_col_is_null --
378

-- !select_top50 --
4 55 999742610 400899305488827731 false 6.5976813E8 7.8723304616937395E17 \N base tennis pit vertical friday 2022-08-19T07:29:58 \N tablets smallint_col 2019-02-07 [7.53124931825377e+17] ["NbSSBtwzpxNSkkwga"] tablets smallint_col
2 49 999613702 105493714032727452 \N 6.3322381E8 9.8642324410240179E17 Unveil bright recruit participate. Suspect impression camera mathematical revelation. Fault live2 elbow debt west hydrogen current. how literary 2022-09-03T17:20:21 481707.1065 tablets boolean_col 2020-01-12 [] ["HoMrAnn", "wteEFvIwoZsVpVQdscMb", null, "zcGFmv", "kGEBBckbMtX", "hrEtCGFdPWZK"] tablets boolean_col
Expand Down Expand Up @@ -321,6 +372,57 @@ tablets tinyint_col 179 182 182 187 183 181 177 183 177 187 183 202 202 186 528
-- !predicate_pushdown8 --
1533

-- !predicate_pushdown_in1 --
0

-- !predicate_pushdown_in2 --
1634

-- !predicate_pushdown_in3 --
1597

-- !orc_all_types_tinyint_col_is_null --
366

-- !orc_all_types_smallint_col_is_null --
359

-- !orc_all_types_int_col_is_null --
361

-- !orc_all_types_bigint_col_is_null --
377

-- !orc_all_types_boolean_col_is_null --
369

-- !orc_all_types_float_col_is_null --
340

-- !orc_all_types_double_col_is_null --
395

-- !orc_all_types_string_col_is_null --
347

-- !orc_all_types_binary_col_is_null --
362

-- !orc_all_types_timestamp_col_is_null --
339

-- !orc_all_types_decimal_col_is_null --
367

-- !orc_all_types_char_col_is_null --
0

-- !orc_all_types_varchar_col_is_null --
0

-- !orc_all_types_date_col_is_null --
378

-- !select_top50 --
4 55 999742610 400899305488827731 false 6.5976813E8 7.8723304616937395E17 \N base tennis pit vertical friday 2022-08-19T07:29:58 \N tablets smallint_col 2019-02-07 [7.53124931825377e+17] ["NbSSBtwzpxNSkkwga"] tablets smallint_col
2 49 999613702 105493714032727452 \N 6.3322381E8 9.8642324410240179E17 Unveil bright recruit participate. Suspect impression camera mathematical revelation. Fault live2 elbow debt west hydrogen current. how literary 2022-09-03T17:20:21 481707.1065 tablets boolean_col 2020-01-12 [] ["HoMrAnn", "wteEFvIwoZsVpVQdscMb", null, "zcGFmv", "kGEBBckbMtX", "hrEtCGFdPWZK"] tablets boolean_col
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -90,6 +90,28 @@ suite("test_hive_orc", "all_types,p0,external,hive,external_docker,external_dock
qt_predicate_pushdown6 """ SELECT count(o_orderkey) FROM tpch1_orc.orders WHERE o_orderstatus <> 'F' AND o_custkey < 54321; """
qt_predicate_pushdown7 """ SELECT count(o_orderkey) FROM tpch1_orc.orders WHERE o_comment LIKE '%delayed%' OR o_orderpriority = '1-URGENT'; """
qt_predicate_pushdown8 """ SELECT count(o_orderkey) FROM tpch1_orc.orders WHERE o_orderkey IN (1000000, 2000000, 3000000) OR o_clerk = 'Clerk#000000470'; """

qt_predicate_pushdown_in1 """ select count(*) from orc_all_types where boolean_col in (null); """
qt_predicate_pushdown_in2 """ select count(*) from orc_all_types where boolean_col in (null, 0); """
qt_predicate_pushdown_in3 """ select count(*) from orc_all_types where boolean_col in (null, 1); """

def test_col_is_null = { String col ->
"qt_orc_all_types_${col}_is_null" """ select count(*) from orc_all_types where ${col} is null; """
}
test_col_is_null("tinyint_col")
test_col_is_null("smallint_col")
test_col_is_null("int_col")
test_col_is_null("bigint_col")
test_col_is_null("boolean_col")
test_col_is_null("float_col")
test_col_is_null("double_col")
test_col_is_null("string_col")
test_col_is_null("binary_col")
test_col_is_null("timestamp_col")
test_col_is_null("decimal_col")
test_col_is_null("char_col")
test_col_is_null("varchar_col")
test_col_is_null("date_col")
}

String enabled = context.config.otherConfigs.get("enableHiveTest")
Expand Down
Loading