diff --git a/be/src/exec/CMakeLists.txt b/be/src/exec/CMakeLists.txt index f9ff83b1bb3312..554ecc13afdbac 100644 --- a/be/src/exec/CMakeLists.txt +++ b/be/src/exec/CMakeLists.txt @@ -63,6 +63,13 @@ set(EXEC_FILES csv_scan_node.cpp csv_scanner.cpp es_scan_node.cpp + es_http_scan_node.cpp + es_http_scanner.cpp + es/es_predicate.cpp + es/es_scan_reader.cpp + es/es_scroll_query.cpp + es/es_scroll_parser.cpp + es/es_query_builder.cpp spill_sort_node.cc union_node.cpp union_node_ir.cpp @@ -101,6 +108,8 @@ add_library(Exec STATIC ) # TODO: why is this test disabled? +#ADD_BE_TEST(es/es_query_builder_test) +#ADD_BE_TEST(es/es_scan_reader_test) #ADD_BE_TEST(new_olap_scan_node_test) #ADD_BE_TEST(pre_aggregation_node_test) #ADD_BE_TEST(hash_table_test) diff --git a/be/src/exec/es/es_predicate.cpp b/be/src/exec/es/es_predicate.cpp new file mode 100644 index 00000000000000..a7f0bc1f453929 --- /dev/null +++ b/be/src/exec/es/es_predicate.cpp @@ -0,0 +1,414 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +#include "exec/es/es_predicate.h" + +#include +#include +#include +#include +#include + +#include "common/status.h" +#include "common/logging.h" +#include "exec/es/es_query_builder.h" +#include "exprs/expr.h" +#include "exprs/expr_context.h" +#include "exprs/in_predicate.h" + +#include "gen_cpp/PlanNodes_types.h" +#include "olap/olap_common.h" +#include "olap/utils.h" +#include "runtime/client_cache.h" +#include "runtime/runtime_state.h" +#include "runtime/row_batch.h" +#include "runtime/datetime_value.h" +#include "runtime/large_int_value.h" +#include "runtime/string_value.h" +#include "runtime/tuple_row.h" + +#include "service/backend_options.h" +#include "util/debug_util.h" +#include "util/runtime_profile.h" + +namespace doris { + +using namespace std; + +std::string ExtLiteral::value_to_string() { + std::stringstream ss; + switch (_type) { + case TYPE_TINYINT: + ss << std::to_string(get_byte()); + break; + case TYPE_SMALLINT: + ss << std::to_string(get_short()); + break; + case TYPE_INT: + ss << std::to_string(get_int()); + break; + case TYPE_BIGINT: + ss << std::to_string(get_long()); + break; + case TYPE_FLOAT: + ss << std::to_string(get_float()); + break; + case TYPE_DOUBLE: + ss << std::to_string(get_double()); + break; + case TYPE_CHAR: + case TYPE_VARCHAR: + ss << get_string(); + break; + case TYPE_DATE: + case TYPE_DATETIME: + ss << get_date_string(); + break; + case TYPE_BOOLEAN: + ss << std::to_string(get_bool()); + break; + case TYPE_DECIMAL: + ss << get_decimal_string(); + break; + case TYPE_DECIMALV2: + ss << get_decimalv2_string(); + break; + case TYPE_LARGEINT: + ss << get_largeint_string(); + break; + default: + DCHECK(false); + break; + } + return ss.str(); +} + +ExtLiteral::~ExtLiteral(){ +} + +int8_t ExtLiteral::get_byte() { + DCHECK(_type == TYPE_TINYINT); + return *(reinterpret_cast(_value)); +} + +int16_t ExtLiteral::get_short() { + DCHECK(_type == TYPE_SMALLINT); + return *(reinterpret_cast(_value)); +} + +int32_t ExtLiteral::get_int() { + DCHECK(_type == TYPE_INT); + return *(reinterpret_cast(_value)); +} + +int64_t ExtLiteral::get_long() { + DCHECK(_type == TYPE_BIGINT); + return *(reinterpret_cast(_value)); +} + +float ExtLiteral::get_float() { + DCHECK(_type == TYPE_FLOAT); + return *(reinterpret_cast(_value)); +} + +double ExtLiteral::get_double() { + DCHECK(_type == TYPE_DOUBLE); + return *(reinterpret_cast(_value)); +} + +std::string ExtLiteral::get_string() { + DCHECK(_type == TYPE_VARCHAR || _type == TYPE_CHAR); + return (reinterpret_cast(_value))->to_string(); +} + +std::string ExtLiteral::get_date_string() { + DCHECK(_type == TYPE_DATE || _type == TYPE_DATETIME); + DateTimeValue date_value = *reinterpret_cast(_value); + if (_type == TYPE_DATE) { + date_value.cast_to_date(); + } + + char str[MAX_DTVALUE_STR_LEN]; + date_value.to_string(str); + return std::string(str, strlen(str)); +} + +bool ExtLiteral::get_bool() { + DCHECK(_type == TYPE_BOOLEAN); + return *(reinterpret_cast(_value)); +} + +std::string ExtLiteral::get_decimal_string() { + DCHECK(_type == TYPE_DECIMAL); + return reinterpret_cast(_value)->to_string(); +} + +std::string ExtLiteral::get_decimalv2_string() { + DCHECK(_type == TYPE_DECIMALV2); + return reinterpret_cast(_value)->to_string(); +} + +std::string ExtLiteral::get_largeint_string() { + DCHECK(_type == TYPE_LARGEINT); + return LargeIntValue::to_string(*reinterpret_cast<__int128*>(_value)); +} + +EsPredicate::EsPredicate(ExprContext* context, + const TupleDescriptor* tuple_desc) : + _context(context), + _disjuncts_num(0), + _tuple_desc(tuple_desc), + _es_query_status(Status::OK) { +} + +EsPredicate::~EsPredicate() { + for(int i=0; i < _disjuncts.size(); i++) { + delete _disjuncts[i]; + } + _disjuncts.clear(); +} + +Status EsPredicate::build_disjuncts_list() { + return build_disjuncts_list(_context->root()); +} + +// make sure to build by build_disjuncts_list +const vector& EsPredicate::get_predicate_list(){ + return _disjuncts; +} + +static bool ignore_cast(const SlotDescriptor* slot, const Expr* expr) { + if (slot->type().is_date_type() && expr->type().is_date_type()) { + return true; + } + if (slot->type().is_string_type() && expr->type().is_string_type()) { + return true; + } + return false; +} + +static bool is_literal_node(const Expr* expr) { + switch (expr->node_type()) { + case TExprNodeType::BOOL_LITERAL: + case TExprNodeType::INT_LITERAL: + case TExprNodeType::LARGE_INT_LITERAL: + case TExprNodeType::FLOAT_LITERAL: + case TExprNodeType::DECIMAL_LITERAL: + case TExprNodeType::STRING_LITERAL: + case TExprNodeType::DATE_LITERAL: + return true; + default: + return false; + } +} + +Status EsPredicate::build_disjuncts_list(const Expr* conjunct) { + if (TExprNodeType::BINARY_PRED == conjunct->node_type()) { + if (conjunct->children().size() != 2) { + return Status("build disjuncts failed: number of childs is not 2"); + } + + SlotRef* slot_ref = nullptr; + TExprOpcode::type op; + Expr* expr = nullptr; + if (TExprNodeType::SLOT_REF == conjunct->get_child(0)->node_type()) { + expr = conjunct->get_child(1); + slot_ref = (SlotRef*)(conjunct->get_child(0)); + op = conjunct->op(); + } else if (TExprNodeType::SLOT_REF == conjunct->get_child(1)->node_type()) { + expr = conjunct->get_child(0); + slot_ref = (SlotRef*)(conjunct->get_child(1)); + op = conjunct->op(); + } else { + return Status("build disjuncts failed: no SLOT_REF child"); + } + + const SlotDescriptor* slot_desc = get_slot_desc(slot_ref); + if (slot_desc == nullptr) { + return Status("build disjuncts failed: slot_desc is null"); + } + + if (!is_literal_node(expr)) { + return Status("build disjuncts failed: expr is not literal type"); + } + + ExtLiteral literal(expr->type().type, _context->get_value(expr, NULL)); + ExtPredicate* predicate = new ExtBinaryPredicate( + TExprNodeType::BINARY_PRED, + slot_desc->col_name(), + slot_desc->type(), + op, + literal); + + _disjuncts.push_back(predicate); + return Status::OK; + } + + if (is_match_func(conjunct)) { + Expr* expr = conjunct->get_child(1); + ExtLiteral literal(expr->type().type, _context->get_value(expr, NULL)); + vector query_conditions; + query_conditions.emplace_back(literal); + vector cols; //TODO + ExtPredicate* predicate = new ExtFunction( + TExprNodeType::FUNCTION_CALL, + conjunct->fn().name.function_name, + cols, + query_conditions); + if (_es_query_status.ok()) { + _es_query_status + = BooleanQueryBuilder::check_es_query(*(ExtFunction *)predicate); + if (!_es_query_status.ok()) { + delete predicate; + return _es_query_status; + } + } + _disjuncts.push_back(predicate); + + return Status::OK; + } + + if (TExprNodeType::FUNCTION_CALL == conjunct->node_type()) { + std::string fname = conjunct->fn().name.function_name; + if (fname != "like") { + return Status("build disjuncts failed: function name is not like"); + } + + SlotRef* slot_ref = nullptr; + Expr* expr = nullptr; + if (TExprNodeType::SLOT_REF == conjunct->get_child(0)->node_type()) { + expr = conjunct->get_child(1); + slot_ref = (SlotRef*)(conjunct->get_child(0)); + } else if (TExprNodeType::SLOT_REF == conjunct->get_child(1)->node_type()) { + expr = conjunct->get_child(0); + slot_ref = (SlotRef*)(conjunct->get_child(1)); + } else { + return Status("build disjuncts failed: no SLOT_REF child"); + } + + const SlotDescriptor* slot_desc = get_slot_desc(slot_ref); + if (slot_desc == nullptr) { + return Status("build disjuncts failed: slot_desc is null"); + } + + PrimitiveType type = expr->type().type; + if (type != TYPE_VARCHAR && type != TYPE_CHAR) { + return Status("build disjuncts failed: like value is not a string"); + } + + ExtLiteral literal(type, _context->get_value(expr, NULL)); + ExtPredicate* predicate = new ExtLikePredicate( + TExprNodeType::LIKE_PRED, + slot_desc->col_name(), + slot_desc->type(), + literal); + + _disjuncts.push_back(predicate); + return Status::OK; + } + + if (TExprNodeType::IN_PRED == conjunct->node_type()) { + // the op code maybe FILTER_NEW_IN, it means there is function in list + // like col_a in (abs(1)) + if (TExprOpcode::FILTER_IN != conjunct->op() + && TExprOpcode::FILTER_NOT_IN != conjunct->op()) { + return Status("build disjuncts failed: " + "opcode in IN_PRED is neither FILTER_IN nor FILTER_NOT_IN"); + } + + vector in_pred_values; + const InPredicate* pred = dynamic_cast(conjunct); + const Expr* expr = Expr::expr_without_cast(pred->get_child(0)); + if (expr->node_type() != TExprNodeType::SLOT_REF) { + return Status("build disjuncts failed: node type is not slot ref"); + } + + const SlotDescriptor* slot_desc = get_slot_desc((const SlotRef *)expr); + if (slot_desc == nullptr) { + return Status("build disjuncts failed: slot_desc is null"); + } + + if (pred->get_child(0)->type().type != slot_desc->type().type) { + if (!ignore_cast(slot_desc, pred->get_child(0))) { + return Status("build disjuncts failed"); + } + } + + HybirdSetBase::IteratorBase* iter = pred->hybird_set()->begin(); + while (iter->has_next()) { + if (nullptr == iter->get_value()) { + return Status("build disjuncts failed: hybird set has a null value"); + } + + ExtLiteral literal(slot_desc->type().type, const_cast(iter->get_value())); + in_pred_values.emplace_back(literal); + iter->next(); + } + + ExtPredicate* predicate = new ExtInPredicate( + TExprNodeType::IN_PRED, + pred->is_not_in(), + slot_desc->col_name(), + slot_desc->type(), + in_pred_values); + _disjuncts.push_back(predicate); + + return Status::OK; + } + + if (TExprNodeType::COMPOUND_PRED == conjunct->node_type()) { + if (TExprOpcode::COMPOUND_OR != conjunct->op()) { + return Status("build disjuncts failed: op is not COMPOUND_OR"); + } + Status status = build_disjuncts_list(conjunct->get_child(0)); + if (!status.ok()) { + return status; + } + status = build_disjuncts_list(conjunct->get_child(1)); + if (!status.ok()) { + return status; + } + + return Status::OK; + } + + // if go to here, report error + std::stringstream ss; + ss << "build disjuncts failed: node type " << conjunct->node_type() << " is not supported"; + return Status(ss.str()); +} + +bool EsPredicate::is_match_func(const Expr* conjunct) { + if (TExprNodeType::FUNCTION_CALL == conjunct->node_type() + && conjunct->fn().name.function_name == "esquery") { + return true; + } + return false; +} + +const SlotDescriptor* EsPredicate::get_slot_desc(const SlotRef* slotRef) { + const SlotDescriptor* slot_desc = nullptr; + for (SlotDescriptor* slot : _tuple_desc->slots()) { + if (slot->id() == slotRef->slot_id()) { + slot_desc = slot; + break; + } + } + return slot_desc; +} + +} diff --git a/be/src/exec/es/es_predicate.h b/be/src/exec/es/es_predicate.h new file mode 100644 index 00000000000000..6b24754e6710a5 --- /dev/null +++ b/be/src/exec/es/es_predicate.h @@ -0,0 +1,200 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +#ifndef BE_EXEC_ES_PREDICATE_H +#define BE_EXEC_ES_PREDICATE_H + +#include +#include + +#include "exprs/slot_ref.h" +#include "gen_cpp/Exprs_types.h" +#include "gen_cpp/Opcodes_types.h" +#include "gen_cpp/PaloExternalDataSourceService_types.h" +#include "runtime/descriptors.h" +#include "runtime/tuple.h" +#include "runtime/primitive_type.h" + +namespace doris { + +class Status; +class ExprContext; +class ExtBinaryPredicate; + +class ExtLiteral { +public: + ExtLiteral(PrimitiveType type, void *value) : + _type(type), + _value(value) { + _str = value_to_string(); + } + ~ExtLiteral(); + const std::string& to_string() const { + return _str; + } + +private: + int8_t get_byte(); + int16_t get_short(); + int32_t get_int(); + int64_t get_long(); + float get_float(); + double get_double(); + std::string get_string(); + std::string get_date_string(); + bool get_bool(); + std::string get_decimal_string(); + std::string get_decimalv2_string(); + std::string get_largeint_string(); + + std::string value_to_string(); + + PrimitiveType _type; + void* _value; + std::string _str; +}; + +struct ExtColumnDesc { + ExtColumnDesc(const std::string& name, const TypeDescriptor& type) : + name(name), + type(type) { + } + + std::string name; + TypeDescriptor type; +}; + +struct ExtPredicate { + ExtPredicate(TExprNodeType::type node_type) : node_type(node_type) { + } + + TExprNodeType::type node_type; +}; + +struct ExtBinaryPredicate : public ExtPredicate { + ExtBinaryPredicate( + TExprNodeType::type node_type, + const std::string& name, + const TypeDescriptor& type, + TExprOpcode::type op, + const ExtLiteral& value) : + ExtPredicate(node_type), + col(name, type), + op(op), + value(value) { + } + + ExtColumnDesc col; + TExprOpcode::type op; + ExtLiteral value; +}; + +struct ExtInPredicate : public ExtPredicate { + ExtInPredicate( + TExprNodeType::type node_type, + bool is_not_in, + const std::string& name, + const TypeDescriptor& type, + const std::vector& values) : + ExtPredicate(node_type), + is_not_in(is_not_in), + col(name, type), + values(values) { + } + + bool is_not_in; + ExtColumnDesc col; + std::vector values; +}; + +struct ExtLikePredicate : public ExtPredicate { + ExtLikePredicate( + TExprNodeType::type node_type, + const std::string& name, + const TypeDescriptor& type, + ExtLiteral value) : + ExtPredicate(node_type), + col(name, type), + value(value) { + } + + ExtColumnDesc col; + ExtLiteral value; +}; + +struct ExtIsNullPredicate : public ExtPredicate { + ExtIsNullPredicate( + TExprNodeType::type node_type, + const std::string& name, + const TypeDescriptor& type, + bool is_not_null, + ExtLiteral value) : + ExtPredicate(node_type), + col(name, type), + is_not_null(is_not_null) { + } + + ExtColumnDesc col; + bool is_not_null; +}; + +struct ExtFunction : public ExtPredicate { + ExtFunction(TExprNodeType::type node_type, + const std::string& func_name, + std::vector cols, + std::vector values) : + ExtPredicate(node_type), + func_name(func_name), + cols(cols), + values(values) { + } + + const std::string& func_name; + std::vector cols; + const std::vector values; +}; + +class EsPredicate { +public: + EsPredicate(ExprContext* context, const TupleDescriptor* tuple_desc); + ~EsPredicate(); + const std::vector& get_predicate_list(); + Status build_disjuncts_list(); + // public for tests + EsPredicate(const std::vector& all_predicates) { + _disjuncts = all_predicates; + }; + + Status get_es_query_status() { + return _es_query_status; + } + +private: + Status build_disjuncts_list(const Expr* conjunct); + bool is_match_func(const Expr* conjunct); + const SlotDescriptor* get_slot_desc(const SlotRef* slotRef); + + ExprContext* _context; + int _disjuncts_num; + const TupleDescriptor* _tuple_desc; + std::vector _disjuncts; + Status _es_query_status; +}; + +} + +#endif diff --git a/be/src/exec/es/es_query_builder.cpp b/be/src/exec/es/es_query_builder.cpp new file mode 100644 index 00000000000000..8fc260c6715601 --- /dev/null +++ b/be/src/exec/es/es_query_builder.cpp @@ -0,0 +1,375 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +#include "exec/es/es_query_builder.h" + +#include +#include "rapidjson/rapidjson.h" +#include "rapidjson/stringbuffer.h" +#include "rapidjson/writer.h" +#include "common/logging.h" + +namespace doris { + +ESQueryBuilder::ESQueryBuilder(const std::string& es_query_str) : _es_query_str(es_query_str) { + +} +ESQueryBuilder::ESQueryBuilder(const ExtFunction& es_query) { + auto first = es_query.values.front(); + _es_query_str = first.to_string(); +} + +// note: call this function must invoke BooleanQueryBuilder::check_es_query to check validation +void ESQueryBuilder::to_json(rapidjson::Document* document, rapidjson::Value* query) { + rapidjson::Document scratch_document; + scratch_document.Parse(_es_query_str.c_str()); + rapidjson::Document::AllocatorType& allocator = document->GetAllocator(); + rapidjson::Value query_key; + rapidjson::Value query_value; + //{ "term": { "dv": "2" } } + rapidjson::Value::ConstMemberIterator first = scratch_document.MemberBegin(); + // deep copy, reference http://rapidjson.org/md_doc_tutorial.html#DeepCopyValue + query_key.CopyFrom(first->name, allocator); + // if we found one key, then end loop as QueryDSL only support one `query` root + query_value.CopyFrom(first->value, allocator); + // Move Semantics, reference http://rapidjson.org/md_doc_tutorial.html#MoveSemantics + query->AddMember(query_key, query_value, allocator); +} + +TermQueryBuilder::TermQueryBuilder(const std::string& field, const std::string& term) : _field(field), _term(term) { + +} + +TermQueryBuilder::TermQueryBuilder(const ExtBinaryPredicate& binary_predicate) { + _field = binary_predicate.col.name; + _term = binary_predicate.value.to_string(); +} + +void TermQueryBuilder::to_json(rapidjson::Document* document, rapidjson::Value* query) { + rapidjson::Document::AllocatorType& allocator = document->GetAllocator(); + rapidjson::Value term_node(rapidjson::kObjectType); + term_node.SetObject(); + rapidjson::Value field_value(_field.c_str(), allocator); + rapidjson::Value term_value(_term.c_str(), allocator); + term_node.AddMember(field_value, term_value, allocator); + query->AddMember("term", term_node, allocator); +} + +RangeQueryBuilder::RangeQueryBuilder(const ExtBinaryPredicate& range_predicate) { + _field = range_predicate.col.name; + _value = range_predicate.value.to_string(); + _op = range_predicate.op; +} + +void RangeQueryBuilder::to_json(rapidjson::Document* document, rapidjson::Value* query) { + rapidjson::Document::AllocatorType& allocator = document->GetAllocator(); + rapidjson::Value field_value(_field.c_str(), allocator); + rapidjson::Value value(_value.c_str(), allocator); + rapidjson::Value op_node(rapidjson::kObjectType); + op_node.SetObject(); + switch (_op) { + case TExprOpcode::LT: + op_node.AddMember("lt", value, allocator); + break; + case TExprOpcode::LE: + op_node.AddMember("le", value, allocator); + break; + case TExprOpcode::GT: + op_node.AddMember("gt", value, allocator); + break; + case TExprOpcode::GE: + op_node.AddMember("ge", value, allocator); + break; + default: + break; + } + rapidjson::Value field_node(rapidjson::kObjectType); + field_node.SetObject(); + field_node.AddMember(field_value, op_node, allocator); + query->AddMember("range", field_node, allocator); +} + +void WildCardQueryBuilder::to_json(rapidjson::Document* document, rapidjson::Value* query) { + rapidjson::Document::AllocatorType& allocator = document->GetAllocator(); + rapidjson::Value term_node(rapidjson::kObjectType); + term_node.SetObject(); + rapidjson::Value field_value(_field.c_str(), allocator); + rapidjson::Value term_value(_like_value.c_str(), allocator); + term_node.AddMember(field_value, term_value, allocator); + query->AddMember("wildcard", term_node, allocator); +} +WildCardQueryBuilder::WildCardQueryBuilder(const ExtLikePredicate& like_predicate) { + _like_value = like_predicate.value.to_string(); + std::replace(_like_value.begin(), _like_value.end(), '_', '?'); + std::replace(_like_value.begin(), _like_value.end(), '%', '*'); + _field = like_predicate.col.name; +} + +void TermsInSetQueryBuilder::to_json(rapidjson::Document* document, rapidjson::Value* query) { + rapidjson::Document::AllocatorType& allocator = document->GetAllocator(); + rapidjson::Value terms_node(rapidjson::kObjectType); + rapidjson::Value values_node(rapidjson::kArrayType); + for (auto& value : _values) { + rapidjson::Value value_value(value.c_str(), allocator); + values_node.PushBack(value_value, allocator); + } + rapidjson::Value field_value(_field.c_str(), allocator); + terms_node.AddMember(field_value, values_node, allocator); + query->AddMember("terms", terms_node, allocator); +} + +TermsInSetQueryBuilder::TermsInSetQueryBuilder(const ExtInPredicate& in_predicate) { + _field = in_predicate.col.name; + for (auto& value : in_predicate.values) { + _values.push_back(value.to_string()); + } +} + +void MatchAllQueryBuilder::to_json(rapidjson::Document* document, rapidjson::Value* query) { + rapidjson::Document::AllocatorType& allocator = document->GetAllocator(); + rapidjson::Value match_all_node(rapidjson::kObjectType); + match_all_node.SetObject(); + query->AddMember("match_all", match_all_node, allocator); +} + +BooleanQueryBuilder::BooleanQueryBuilder() { + +} +BooleanQueryBuilder::~BooleanQueryBuilder() { + for (auto clause : _must_clauses) { + delete clause; + clause = nullptr; + } + for (auto clause : _must_not_clauses) { + delete clause; + clause = nullptr; + } + for (auto clause : _filter_clauses) { + delete clause; + clause = nullptr; + } + for (auto clause : _should_clauses) { + delete clause; + clause = nullptr; + } +} + +BooleanQueryBuilder::BooleanQueryBuilder(const std::vector& predicates) { + for (auto predicate : predicates) { + switch (predicate->node_type) { + case TExprNodeType::BINARY_PRED: { + ExtBinaryPredicate* binary_predicate = (ExtBinaryPredicate*)predicate; + switch (binary_predicate->op) { + case TExprOpcode::EQ: { + TermQueryBuilder* term_query = new TermQueryBuilder(*binary_predicate); + _should_clauses.push_back(term_query); + break; + } + case TExprOpcode::NE:{ // process NE + TermQueryBuilder* term_query = new TermQueryBuilder(*binary_predicate); + BooleanQueryBuilder* bool_query = new BooleanQueryBuilder(); + bool_query->must_not(term_query); + _should_clauses.push_back(bool_query); + break; + } + case TExprOpcode::LT: + case TExprOpcode::LE: + case TExprOpcode::GT: + case TExprOpcode::GE: { + RangeQueryBuilder* range_query = new RangeQueryBuilder(*binary_predicate); + _should_clauses.push_back(range_query); + break; + } + default: + break; + } + break; + } + case TExprNodeType::IN_PRED: { + ExtInPredicate* in_predicate = (ExtInPredicate *)predicate; + bool is_not_in = in_predicate->is_not_in; + if (is_not_in) { // process not in predicate + TermsInSetQueryBuilder* terms_predicate = new TermsInSetQueryBuilder(*in_predicate); + BooleanQueryBuilder* bool_query = new BooleanQueryBuilder(); + bool_query->must_not(terms_predicate); + _should_clauses.push_back(bool_query); + } else { // process in predicate + TermsInSetQueryBuilder* terms_query= new TermsInSetQueryBuilder(*in_predicate); + _should_clauses.push_back(terms_query); + } + break; + } + case TExprNodeType::LIKE_PRED: { + ExtLikePredicate* like_predicate = (ExtLikePredicate *)predicate; + WildCardQueryBuilder* wild_card_query = new WildCardQueryBuilder(*like_predicate); + _should_clauses.push_back(wild_card_query); + break; + } + case TExprNodeType::FUNCTION_CALL: { + ExtFunction* function_predicate = (ExtFunction *)predicate; + if ("esquery" == function_predicate->func_name ) { + ESQueryBuilder* es_query = new ESQueryBuilder(*function_predicate); + _should_clauses.push_back(es_query); + }; + break; + } + default: + break; + } + } +} + +void BooleanQueryBuilder::to_json(rapidjson::Document* document, rapidjson::Value* query) { + rapidjson::Document::AllocatorType &allocator = document->GetAllocator(); + rapidjson::Value root_node_object(rapidjson::kObjectType); + if (_filter_clauses.size() > 0) { + rapidjson::Value filter_node(rapidjson::kArrayType); + for (auto must_clause : _filter_clauses) { + rapidjson::Value must_clause_query(rapidjson::kObjectType); + must_clause_query.SetObject(); + must_clause->to_json(document, &must_clause_query); + filter_node.PushBack(must_clause_query, allocator); + } + root_node_object.AddMember("filter", filter_node, allocator); + } + + if (_should_clauses.size() > 0) { + rapidjson::Value should_node(rapidjson::kArrayType); + for (auto should_clause : _should_clauses) { + rapidjson::Value should_clause_query(rapidjson::kObjectType); + should_clause_query.SetObject(); + should_clause->to_json(document, &should_clause_query); + should_node.PushBack(should_clause_query, allocator); + } + root_node_object.AddMember("should", should_node, allocator); + } + + if (_must_not_clauses.size() > 0) { + rapidjson::Value must_not_node(rapidjson::kArrayType); + for (auto must_not_clause : _must_not_clauses) { + rapidjson::Value must_not_clause_query(rapidjson::kObjectType); + must_not_clause_query.SetObject(); + must_not_clause->to_json(document, &must_not_clause_query); + must_not_node.PushBack(must_not_clause_query, allocator); + } + root_node_object.AddMember("must_not", must_not_node, allocator); + } + query->AddMember("bool", root_node_object, allocator); +} + +void BooleanQueryBuilder::should(QueryBuilder* filter) { + _should_clauses.push_back(filter); +} +void BooleanQueryBuilder::filter(QueryBuilder* filter) { + _filter_clauses.push_back(filter); +} +void BooleanQueryBuilder::must(QueryBuilder* filter) { + _filter_clauses.push_back(filter); +} +void BooleanQueryBuilder::must_not(QueryBuilder* filter) { + _must_not_clauses.push_back(filter); +} + +Status BooleanQueryBuilder::check_es_query(const ExtFunction& extFunction) { + const std::string& esquery_str = extFunction.values.front().to_string(); + rapidjson::Document scratch_document; + scratch_document.Parse(esquery_str.c_str()); + rapidjson::Document::AllocatorType& allocator = scratch_document.GetAllocator(); + rapidjson::Value query_key; + // { "term": { "dv": "2" } } + if (!scratch_document.HasParseError()) { + if (!scratch_document.IsObject()) { + return Status(TStatusCode::ES_REQUEST_ERROR, "esquery must be a object"); + } + rapidjson::SizeType object_count = scratch_document.MemberCount(); + if (object_count != 1) { + return Status(TStatusCode::ES_REQUEST_ERROR, "esquery must only one root"); + } + // deep copy, reference http://rapidjson.org/md_doc_tutorial.html#DeepCopyValue + rapidjson::Value::ConstMemberIterator first = scratch_document.MemberBegin(); + query_key.CopyFrom(first->name, allocator); + if (!query_key.IsString()) { + // if we found one key, then end loop as QueryDSL only support one `query` root + return Status(TStatusCode::ES_REQUEST_ERROR, "esquery root key must be string"); + } + } else { + return Status(TStatusCode::ES_REQUEST_ERROR, "malformed esquery json"); + } + return Status::OK; +} + +void BooleanQueryBuilder::validate(const std::vector& espredicates, std::vector* result) { + int conjunct_size = espredicates.size(); + result->reserve(conjunct_size); + for (auto espredicate : espredicates) { + bool flag = true; + for (auto predicate : espredicate->get_predicate_list()) { + switch (predicate->node_type) { + case TExprNodeType::BINARY_PRED: { + ExtBinaryPredicate* binary_predicate = (ExtBinaryPredicate*)predicate; + TExprOpcode::type op = binary_predicate->op; + if (op != TExprOpcode::EQ && op != TExprOpcode::NE + && op != TExprOpcode::LT && op != TExprOpcode::LE + && op != TExprOpcode::GT && op != TExprOpcode::GE) { + flag = false; + } + break; + } + case TExprNodeType::LIKE_PRED: + case TExprNodeType::IN_PRED: { + break; + } + case TExprNodeType::FUNCTION_CALL: { + ExtFunction* function_predicate = (ExtFunction *)predicate; + if ("esquery" == function_predicate->func_name ) { + Status st = check_es_query(*function_predicate); + if (!st.ok()) { + flag = false; + } + } else { + flag = false; + } + break; + } + default: { + flag = false; + break; + } + } + if (!flag) { + break; + } + } + result->push_back(flag); + } +} + +void BooleanQueryBuilder::to_query(const std::vector& predicates, rapidjson::Document* root, rapidjson::Value* query) { + if (predicates.size() == 0) { + MatchAllQueryBuilder match_all_query; + match_all_query.to_json(root, query); + return; + } + root->SetObject(); + BooleanQueryBuilder bool_query; + for (auto es_predicate : predicates) { + vector or_predicates = es_predicate->get_predicate_list(); + BooleanQueryBuilder* inner_bool_query = new BooleanQueryBuilder(or_predicates); + bool_query.must(inner_bool_query); + } + bool_query.to_json(root, query);} +} diff --git a/be/src/exec/es/es_query_builder.h b/be/src/exec/es/es_query_builder.h new file mode 100644 index 00000000000000..e7c5e563356069 --- /dev/null +++ b/be/src/exec/es/es_query_builder.h @@ -0,0 +1,130 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +#pragma once + +#include +#include + +#include "rapidjson/document.h" +#include "exec/es/es_predicate.h" +#include "common/status.h" + +namespace doris { + +class QueryBuilder { + +public: + virtual void to_json(rapidjson::Document* document, rapidjson::Value* query) = 0; + virtual ~QueryBuilder() { + }; +}; + +// process esquery(fieldA, json dsl) function +class ESQueryBuilder : public QueryBuilder { +public: + ESQueryBuilder(const std::string& es_query_str); + ESQueryBuilder(const ExtFunction& es_query); + void to_json(rapidjson::Document* document, rapidjson::Value* query) override; +private: + std::string _es_query_str; +}; + +// process field = value +class TermQueryBuilder : public QueryBuilder { + +public: + TermQueryBuilder(const std::string& field, const std::string& term); + TermQueryBuilder(const ExtBinaryPredicate& binary_predicate); + void to_json(rapidjson::Document* document, rapidjson::Value* query) override; + +private: + std::string _field; + std::string _term; +}; + +// process range predicate field >= value or field < value etc. +class RangeQueryBuilder : public QueryBuilder { + +public: + RangeQueryBuilder(const ExtBinaryPredicate& range_predicate); + void to_json(rapidjson::Document* document, rapidjson::Value* query) override; +private: + std::string _field; + std::string _value; + TExprOpcode::type _op; +}; + +// process in predicate : field in [value1, value2] +class TermsInSetQueryBuilder : public QueryBuilder { + +public: + TermsInSetQueryBuilder(const ExtInPredicate& in_predicate); + void to_json(rapidjson::Document* document, rapidjson::Value* query) override; +private: + std::string _field; + std::vector _values; +}; + +// process like predicate : field like "a%b%c_" +class WildCardQueryBuilder : public QueryBuilder { + +public: + WildCardQueryBuilder(const ExtLikePredicate& like_predicate); + void to_json(rapidjson::Document* document, rapidjson::Value* query) override; + +private: + std::string _like_value; + std::string _field; +}; + +// no predicates: all doccument match +class MatchAllQueryBuilder : public QueryBuilder { + +public: + void to_json(rapidjson::Document* document, rapidjson::Value* query) override; +}; + +// proccess bool compound query, and play the role of a bridge for transferring predicates to es native query +class BooleanQueryBuilder : public QueryBuilder { + +public: + BooleanQueryBuilder(const std::vector& predicates); + BooleanQueryBuilder(); + ~BooleanQueryBuilder(); + // class method for transfer predicate to es query value, invoker should enclose this value with `query` + static void to_query(const std::vector& predicates, rapidjson::Document* root, rapidjson::Value* query); + // validate esquery syntax + static Status check_es_query(const ExtFunction& extFunction); + // decide which predicate can process + static void validate(const std::vector& espredicates, std::vector* result); + +private: + // add child query + void should(QueryBuilder* filter); + void filter(QueryBuilder* filter); + void must(QueryBuilder* filter); + void must_not(QueryBuilder* filter); + void to_json(rapidjson::Document* document, rapidjson::Value* query) override; + + std::vector _must_clauses; + std::vector _must_not_clauses; + std::vector _filter_clauses; + std::vector _should_clauses; +}; + +} diff --git a/be/src/exec/es/es_scan_reader.cpp b/be/src/exec/es/es_scan_reader.cpp new file mode 100644 index 00000000000000..e2f17a0908d1a7 --- /dev/null +++ b/be/src/exec/es/es_scan_reader.cpp @@ -0,0 +1,157 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +#include "exec/es/es_scan_reader.h" + +#include +#include +#include + +#include "common/logging.h" +#include "common/status.h" +#include "exec/es/es_scroll_query.h" + +namespace doris { +const std::string REUQEST_SCROLL_FILTER_PATH = "filter_path=_scroll_id,hits.hits._source,hits.total,_id,hits.hits._source.fields"; +const std::string REQUEST_SCROLL_PATH = "_scroll"; +const std::string REQUEST_PREFERENCE_PREFIX = "&preference=_shards:"; +const std::string REQUEST_SEARCH_SCROLL_PATH = "/_search/scroll"; +const std::string REQUEST_SEPARATOR = "/"; +const std::string REQUEST_SCROLL_TIME = "5m"; + +ESScanReader::ESScanReader(const std::string& target, const std::map& props) { + _target = target; + _index = props.at(KEY_INDEX); + _type = props.at(KEY_TYPE); + if (props.find(KEY_USER_NAME) != props.end()) { + _user_name = props.at(KEY_USER_NAME); + } + if (props.find(KEY_PASS_WORD) != props.end()){ + _passwd = props.at(KEY_PASS_WORD); + } + if (props.find(KEY_SHARD) != props.end()) { + _shards = props.at(KEY_SHARD); + } + if (props.find(KEY_QUERY) != props.end()) { + _query = props.at(KEY_QUERY); + } + std::string batch_size_str = props.at(KEY_BATCH_SIZE); + _batch_size = atoi(batch_size_str.c_str()); + _init_scroll_url = _target + REQUEST_SEPARATOR + _index + REQUEST_SEPARATOR + _type + "/_search?scroll=" + REQUEST_SCROLL_TIME + REQUEST_PREFERENCE_PREFIX + _shards + "&" + REUQEST_SCROLL_FILTER_PATH; + _next_scroll_url = _target + REQUEST_SEARCH_SCROLL_PATH + "?" + REUQEST_SCROLL_FILTER_PATH; + _eos = false; +} + +ESScanReader::~ESScanReader() { +} + +Status ESScanReader::open() { + _is_first = true; + RETURN_IF_ERROR(_network_client.init(_init_scroll_url)); + _network_client.set_basic_auth(_user_name, _passwd); + _network_client.set_content_type("application/json"); + // phase open, we cached the first response for `get_next` phase + Status status = _network_client.execute_post_request(_query, &_cached_response); + if (!status.ok() || _network_client.get_http_status() != 200) { + std::stringstream ss; + ss << "Failed to connect to ES server, errmsg is: " << status.get_error_msg(); + LOG(WARNING) << ss.str(); + return Status(ss.str()); + } + VLOG(1) << "open _cached response: " << _cached_response; + return Status::OK; +} + +Status ESScanReader::get_next(bool* scan_eos, std::unique_ptr& scroll_parser) { + std::string response; + // if is first scroll request, should return the cached response + *scan_eos = true; + if (_eos) { + return Status::OK; + } + + if (_is_first) { + response = _cached_response; + _is_first = false; + } else { + RETURN_IF_ERROR(_network_client.init(_next_scroll_url)); + _network_client.set_basic_auth(_user_name, _passwd); + _network_client.set_content_type("application/json"); + _network_client.set_timeout_ms(5 * 1000); + RETURN_IF_ERROR(_network_client.execute_post_request( + ESScrollQueryBuilder::build_next_scroll_body(_scroll_id, REQUEST_SCROLL_TIME), &response)); + long status = _network_client.get_http_status(); + if (status == 404) { + LOG(WARNING) << "request scroll search failure 404[" + << ", response: " << (response.empty() ? "empty response" : response); + return Status("No search context found for " + _scroll_id); + } + if (status != 200) { + LOG(WARNING) << "request scroll search failure[" + << "http status" << status + << ", response: " << (response.empty() ? "empty response" : response); + if (status == 404) { + return Status("No search context found for " + _scroll_id); + } + return Status("request scroll search failure: " + (response.empty() ? "empty response" : response)); + } + } + + scroll_parser.reset(new ScrollParser()); + Status status = scroll_parser->parse(response); + if (!status.ok()){ + _eos = true; + LOG(WARNING) << status.get_error_msg(); + return status; + } + + _scroll_id = scroll_parser->get_scroll_id(); + if (scroll_parser->get_total() == 0) { + _eos = true; + return Status::OK; + } + + if (scroll_parser->get_size() < _batch_size) { + _eos = true; + } else { + _eos = false; + } + + *scan_eos = false; + return Status::OK; +} + +Status ESScanReader::close() { + if (_scroll_id.empty()) { + return Status::OK; + } + + std::string scratch_target = _target + REQUEST_SEARCH_SCROLL_PATH; + RETURN_IF_ERROR(_network_client.init(scratch_target)); + _network_client.set_basic_auth(_user_name, _passwd); + _network_client.set_method(DELETE); + _network_client.set_content_type("application/json"); + _network_client.set_timeout_ms(5 * 1000); + std::string response; + RETURN_IF_ERROR(_network_client.execute_delete_request(ESScrollQueryBuilder::build_clear_scroll_body(_scroll_id), &response)); + if (_network_client.get_http_status() == 200) { + return Status::OK; + } else { + return Status("es_scan_reader delete scroll context failure"); + } +} +} diff --git a/be/src/exec/es/es_scan_reader.h b/be/src/exec/es/es_scan_reader.h new file mode 100644 index 00000000000000..b03701c11e772c --- /dev/null +++ b/be/src/exec/es/es_scan_reader.h @@ -0,0 +1,74 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +#pragma once + +#include + +#include "exec/es/es_scroll_parser.h" +#include "http/http_client.h" + +using std::string; + +namespace doris { + +class Status; + +class ESScanReader { + +public: + static constexpr const char* KEY_USER_NAME = "user"; + static constexpr const char* KEY_PASS_WORD = "password"; + static constexpr const char* KEY_HOST_PORT = "host_port"; + static constexpr const char* KEY_INDEX = "index"; + static constexpr const char* KEY_TYPE = "type"; + static constexpr const char* KEY_SHARD = "shard_id"; + static constexpr const char* KEY_QUERY = "query"; + static constexpr const char* KEY_BATCH_SIZE = "batch_size"; + ESScanReader(const std::string& target, const std::map& props); + ~ESScanReader(); + + // launch the first scroll request, this method will cache the first scroll response, and return the this cached response when invoke get_next + Status open(); + // invoke get_next to get next batch documents from elasticsearch + Status get_next(bool *eos, std::unique_ptr& parser); + // clear scroll context from elasticsearch + Status close(); + +private: + std::string _target; + std::string _user_name; + std::string _passwd; + std::string _scroll_id; + HttpClient _network_client; + std::string _index; + std::string _type; + // push down filter + std::string _query; + // elaticsearch shards to fetch document + std::string _shards; + // distinguish the first scroll phase and the following scroll + bool _is_first; + std::string _init_scroll_url; + std::string _next_scroll_url; + bool _eos; + int _batch_size; + + std::string _cached_response; +}; +} + diff --git a/be/src/exec/es/es_scroll_parser.cpp b/be/src/exec/es/es_scroll_parser.cpp new file mode 100644 index 00000000000000..e1ee317f777cf5 --- /dev/null +++ b/be/src/exec/es/es_scroll_parser.cpp @@ -0,0 +1,353 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +#include "exec/es/es_scroll_parser.h" + +#include +#include +#include + +#include "common/logging.h" +#include "common/status.h" +#include "runtime/mem_pool.h" +#include "runtime/mem_tracker.h" +#include "util/string_parser.hpp" + +namespace doris { + +static const char* FIELD_SCROLL_ID = "_scroll_id"; +static const char* FIELD_HITS = "hits"; +static const char* FIELD_INNER_HITS = "hits"; +static const char* FIELD_SOURCE = "_source"; +static const char* FIELD_TOTAL = "total"; + +static const string ERROR_INVALID_COL_DATA = "Data source returned inconsistent column data. " + "Expected value of type $0 based on column metadata. This likely indicates a " + "problem with the data source library."; +static const string ERROR_MEM_LIMIT_EXCEEDED = "DataSourceScanNode::$0() failed to allocate " + "$1 bytes for $2."; +static const string ERROR_COL_DATA_IS_ARRAY = "Data source returned an array for the type $0" + "based on column metadata."; + +#define RETURN_ERROR_IF_COL_IS_ARRAY(col, type) \ + do { \ + if (col.IsArray()) { \ + return Status(strings::Substitute(ERROR_COL_DATA_IS_ARRAY, type_to_string(type))); \ + } \ + } while (false) + + +#define RETURN_ERROR_IF_COL_IS_NOT_STRING(col, type) \ + do { \ + if (!col.IsString()) { \ + return Status(strings::Substitute(ERROR_INVALID_COL_DATA, type_to_string(type))); \ + } \ + } while (false) + + +#define RETURN_ERROR_IF_PARSING_FAILED(result, type) \ + do { \ + if (result != StringParser::PARSE_SUCCESS) { \ + return Status(strings::Substitute(ERROR_INVALID_COL_DATA, type_to_string(type))); \ + } \ + } while (false) + +template +static Status get_int_value(const rapidjson::Value &col, PrimitiveType type, void* slot) { + if (col.IsNumber()) { + *reinterpret_cast(slot) = (T)(sizeof(T) < 8 ? col.GetInt() : col.GetInt64()); + return Status::OK; + } + + RETURN_ERROR_IF_COL_IS_ARRAY(col, type); + RETURN_ERROR_IF_COL_IS_NOT_STRING(col, type); + + StringParser::ParseResult result; + const std::string& val = col.GetString(); + size_t len = col.GetStringLength(); + T v = StringParser::string_to_int(val.c_str(), len, &result); + RETURN_ERROR_IF_PARSING_FAILED(result, type); + + if (sizeof(T) < 16) { + *reinterpret_cast(slot) = v; + } else { + DCHECK(sizeof(T) == 16); + memcpy(slot, &v, sizeof(v)); + } + + return Status::OK; +} + +template +static Status get_float_value(const rapidjson::Value &col, PrimitiveType type, void* slot) { + DCHECK(sizeof(T) == 4 || sizeof(T) == 8); + if (col.IsNumber()) { + *reinterpret_cast(slot) = (T)(sizeof(T) == 4 ? col.GetFloat() : col.GetDouble()); + return Status::OK; + } + + RETURN_ERROR_IF_COL_IS_ARRAY(col, type); + RETURN_ERROR_IF_COL_IS_NOT_STRING(col, type); + + StringParser::ParseResult result; + const std::string& val = col.GetString(); + size_t len = col.GetStringLength(); + T v = StringParser::string_to_float(val.c_str(), len, &result); + RETURN_ERROR_IF_PARSING_FAILED(result, type); + *reinterpret_cast(slot) = v; + + return Status::OK; +} + +ScrollParser::ScrollParser() : + _scroll_id(""), + _total(0), + _size(0), + _line_index(0) { +} + +ScrollParser::~ScrollParser() { +} + +Status ScrollParser::parse(const std::string& scroll_result) { + _document_node.Parse(scroll_result.c_str()); + if (_document_node.HasParseError()) { + std::stringstream ss; + ss << "Parsing json error, json is: " << scroll_result; + return Status(ss.str()); + } + + if (!_document_node.HasMember(FIELD_SCROLL_ID)) { + return Status("Document has not a scroll id field"); + } + + const rapidjson::Value &scroll_node = _document_node[FIELD_SCROLL_ID]; + _scroll_id = scroll_node.GetString(); + // { hits: { total : 2, "hits" : [ {}, {}, {} ]}} + const rapidjson::Value &outer_hits_node = _document_node[FIELD_HITS]; + const rapidjson::Value &field_total = outer_hits_node[FIELD_TOTAL]; + _total = field_total.GetInt(); + if (_total == 0) { + return Status::OK; + } + + VLOG(1) << "es_scan_reader total hits: " << _total << " documents"; + const rapidjson::Value &inner_hits_node = outer_hits_node[FIELD_INNER_HITS]; + if (!inner_hits_node.IsArray()) { + return Status("inner hits node is not an array"); + } + + rapidjson::Document::AllocatorType& a = _document_node.GetAllocator(); + _inner_hits_node.CopyFrom(inner_hits_node, a); + _size = _inner_hits_node.Size(); + + return Status::OK; +} + +int ScrollParser::get_size() { + return _size; +} + +const std::string& ScrollParser::get_scroll_id() { + return _scroll_id; +} + +int ScrollParser::get_total() { + return _total; +} + +Status ScrollParser::fill_tuple(const TupleDescriptor* tuple_desc, + Tuple* tuple, MemPool* tuple_pool, bool* line_eof) { + *line_eof = true; + if (_size <= 0 || _line_index >= _size) { + return Status::OK; + } + + const rapidjson::Value& obj = _inner_hits_node[_line_index++]; + const rapidjson::Value& line = obj[FIELD_SOURCE]; + if (!line.IsObject()) { + return Status("Parse inner hits failed"); + } + + tuple->init(tuple_desc->byte_size()); + for (int i = 0; i < tuple_desc->slots().size(); ++i) { + const SlotDescriptor* slot_desc = tuple_desc->slots()[i]; + + if (!slot_desc->is_materialized()) { + continue; + } + + const char* col_name = slot_desc->col_name().c_str(); + rapidjson::Value::ConstMemberIterator itr = line.FindMember(col_name); + if (itr == line.MemberEnd()) { + tuple->set_null(slot_desc->null_indicator_offset()); + continue; + } + + tuple->set_not_null(slot_desc->null_indicator_offset()); + const rapidjson::Value &col = line[col_name]; + + void* slot = tuple->get_slot(slot_desc->tuple_offset()); + PrimitiveType type = slot_desc->type().type; + switch (type) { + case TYPE_CHAR: + case TYPE_VARCHAR: { + RETURN_ERROR_IF_COL_IS_ARRAY(col, type); + RETURN_ERROR_IF_COL_IS_NOT_STRING(col, type); + + const std::string& val = col.GetString(); + size_t val_size = col.GetStringLength(); + char* buffer = reinterpret_cast(tuple_pool->try_allocate_unaligned(val_size)); + if (UNLIKELY(buffer == NULL)) { + string details = strings::Substitute(ERROR_MEM_LIMIT_EXCEEDED, "MaterializeNextRow", + val_size, "string slot"); + return tuple_pool->mem_tracker()->MemLimitExceeded(NULL, details, val_size); + } + memcpy(buffer, val.data(), val_size); + reinterpret_cast(slot)->ptr = buffer; + reinterpret_cast(slot)->len = val_size; + break; + } + + case TYPE_TINYINT: { + Status status = get_int_value(col, type, slot); + if (!status.ok()) { + return status; + } + break; + } + + case TYPE_SMALLINT: { + Status status = get_int_value(col, type, slot); + if (!status.ok()) { + return status; + } + break; + } + + case TYPE_INT: { + Status status = get_int_value(col, type, slot); + if (!status.ok()) { + return status; + } + break; + } + + case TYPE_BIGINT: { + Status status = get_int_value(col, type, slot); + if (!status.ok()) { + return status; + } + break; + } + + case TYPE_LARGEINT: { + Status status = get_int_value<__int128>(col, type, slot); + if (!status.ok()) { + return status; + } + break; + } + + case TYPE_DOUBLE: { + Status status = get_float_value(col, type, slot); + if (!status.ok()) { + return status; + } + break; + } + + case TYPE_FLOAT: { + Status status = get_float_value(col, type, slot); + if (!status.ok()) { + return status; + } + break; + } + + case TYPE_BOOLEAN: { + if (col.IsBool()) { + *reinterpret_cast(slot) = col.GetBool(); + break; + } + + if (col.IsNumber()) { + *reinterpret_cast(slot) = col.GetInt(); + break; + } + + RETURN_ERROR_IF_COL_IS_ARRAY(col, type); + RETURN_ERROR_IF_COL_IS_NOT_STRING(col, type); + + const std::string& val = col.GetString(); + size_t val_size = col.GetStringLength(); + StringParser::ParseResult result; + bool b = + StringParser::string_to_bool(val.c_str(), val_size, &result); + RETURN_ERROR_IF_PARSING_FAILED(result, type); + *reinterpret_cast(slot) = b; + break; + } + + case TYPE_DATE: + case TYPE_DATETIME: { + if (col.IsNumber()) { + if (!reinterpret_cast(slot)->from_unixtime(col.GetInt64())) { + return Status(strings::Substitute(ERROR_INVALID_COL_DATA, type_to_string(type))); + } + + if (type == TYPE_DATE) { + reinterpret_cast(slot)->cast_to_date(); + } else { + reinterpret_cast(slot)->set_type(TIME_DATETIME); + } + break; + } + + RETURN_ERROR_IF_COL_IS_ARRAY(col, type); + RETURN_ERROR_IF_COL_IS_NOT_STRING(col, type); + + DateTimeValue* ts_slot = reinterpret_cast(slot); + const std::string& val = col.GetString(); + size_t val_size = col.GetStringLength(); + if (!ts_slot->from_date_str(val.c_str(), val_size)) { + return Status(strings::Substitute(ERROR_INVALID_COL_DATA, type_to_string(type))); + } + + if (ts_slot->year() < 1900) { + return Status(strings::Substitute(ERROR_INVALID_COL_DATA, type_to_string(type))); + } + + if (type == TYPE_DATE) { + ts_slot->cast_to_date(); + } else { + ts_slot->to_datetime(); + } + break; + } + + default: { + DCHECK(false); + break; + } + } + } + + *line_eof = false; + return Status::OK; +} +} diff --git a/be/src/exec/es/es_scroll_parser.h b/be/src/exec/es/es_scroll_parser.h new file mode 100644 index 00000000000000..5af75a85eec2aa --- /dev/null +++ b/be/src/exec/es/es_scroll_parser.h @@ -0,0 +1,54 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +#pragma once + +#include + +#include "rapidjson/document.h" +#include "runtime/descriptors.h" +#include "runtime/tuple.h" + +namespace doris { + +class Status; + +class ScrollParser { + +public: + ScrollParser(); + ~ScrollParser(); + + Status parse(const std::string& scroll_result); + Status fill_tuple(const TupleDescriptor* _tuple_desc, Tuple* tuple, + MemPool* mem_pool, bool* line_eof); + + const std::string& get_scroll_id(); + int get_total(); + int get_size(); + +private: + + std::string _scroll_id; + int _total; + int _size; + rapidjson::SizeType _line_index; + + rapidjson::Document _document_node; + rapidjson::Value _inner_hits_node; +}; +} diff --git a/be/src/exec/es/es_scroll_query.cpp b/be/src/exec/es/es_scroll_query.cpp new file mode 100644 index 00000000000000..1c405136e749d3 --- /dev/null +++ b/be/src/exec/es/es_scroll_query.cpp @@ -0,0 +1,102 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +#include "exec/es/es_scroll_query.h" + +#include +#include + +#include "common/logging.h" +#include "exec/es/es_query_builder.h" +#include "exec/es/es_scan_reader.h" +#include "rapidjson/document.h" +#include "rapidjson/stringbuffer.h" +#include "rapidjson/writer.h" + +namespace doris { + +ESScrollQueryBuilder::ESScrollQueryBuilder() { + +} + +ESScrollQueryBuilder::~ESScrollQueryBuilder() { + +} + +std::string ESScrollQueryBuilder::build_next_scroll_body(const std::string& scroll_id, const std::string& scroll) { + rapidjson::Document scroll_dsl; + rapidjson::Document::AllocatorType &allocator = scroll_dsl.GetAllocator(); + scroll_dsl.SetObject(); + rapidjson::Value scroll_id_value(scroll_id.c_str(), allocator); + scroll_dsl.AddMember("scroll_id", scroll_id_value, allocator); + rapidjson::Value scroll_value(scroll.c_str(), allocator); + scroll_dsl.AddMember("scroll", scroll_value, allocator); + rapidjson::StringBuffer buffer; + rapidjson::Writer writer(buffer); + scroll_dsl.Accept(writer); + return buffer.GetString(); +} +std::string ESScrollQueryBuilder::build_clear_scroll_body(const std::string& scroll_id) { + rapidjson::Document delete_scroll_dsl; + rapidjson::Document::AllocatorType &allocator = delete_scroll_dsl.GetAllocator(); + delete_scroll_dsl.SetObject(); + rapidjson::Value scroll_id_value(scroll_id.c_str(), allocator); + delete_scroll_dsl.AddMember("scroll_id", scroll_id_value, allocator); + rapidjson::StringBuffer buffer; + rapidjson::Writer writer(buffer); + delete_scroll_dsl.Accept(writer); + return buffer.GetString(); +} + +std::string ESScrollQueryBuilder::build(const std::map& properties, + const std::vector& fields, + std::vector& predicates) { + rapidjson::Document es_query_dsl; + rapidjson::Document::AllocatorType &allocator = es_query_dsl.GetAllocator(); + es_query_dsl.SetObject(); + // generate the filter caluse + rapidjson::Document scratch_document; + rapidjson::Value query_node(rapidjson::kObjectType); + query_node.SetObject(); + BooleanQueryBuilder::to_query(predicates, &scratch_document, &query_node); + // note: add `query` for this value.... + es_query_dsl.AddMember("query", query_node, allocator); + // just filter the selected fields for reducing the network cost + if (fields.size() > 0) { + rapidjson::Value source_node(rapidjson::kArrayType); + for (auto iter = fields.begin(); iter != fields.end(); iter++) { + rapidjson::Value field(iter->c_str(), allocator); + source_node.PushBack(field, allocator); + } + es_query_dsl.AddMember("_source", source_node, allocator); + } + int size = atoi(properties.at(ESScanReader::KEY_BATCH_SIZE).c_str()); + rapidjson::Value sort_node(rapidjson::kArrayType); + // use the scroll-scan mode for scan index documents + rapidjson::Value field("_doc", allocator); + sort_node.PushBack(field, allocator); + es_query_dsl.AddMember("sort", sort_node, allocator); + // number of docuements returned + es_query_dsl.AddMember("size", size, allocator); + rapidjson::StringBuffer buffer; + rapidjson::Writer writer(buffer); + es_query_dsl.Accept(writer); + std::string es_query_dsl_json = buffer.GetString(); + return es_query_dsl_json; + +} +} diff --git a/be/src/exec/es/es_scroll_query.h b/be/src/exec/es/es_scroll_query.h new file mode 100644 index 00000000000000..0f6c20457ad713 --- /dev/null +++ b/be/src/exec/es/es_scroll_query.h @@ -0,0 +1,40 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +#pragma once + +#include +#include + +#include "exec/es/es_predicate.h" + +namespace doris { + +class ESScrollQueryBuilder { + +public: + ESScrollQueryBuilder(); + ~ESScrollQueryBuilder(); + // build the query DSL for elasticsearch + static std::string build_next_scroll_body(const std::string& scroll_id, const std::string& scroll); + static std::string build_clear_scroll_body(const std::string& scroll_id); + // @note: predicates should processed before pass it to this method, + // tie breaker for predicate wheather can push down es can reference the push-down filters + static std::string build(const std::map& properties, + const std::vector& fields, std::vector& predicates); +}; +} diff --git a/be/src/exec/es_http_scan_node.cpp b/be/src/exec/es_http_scan_node.cpp new file mode 100644 index 00000000000000..5467b4966d4d20 --- /dev/null +++ b/be/src/exec/es_http_scan_node.cpp @@ -0,0 +1,448 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +#include "exec/es_http_scan_node.h" + +#include +#include + +#include "common/object_pool.h" +#include "exec/es/es_predicate.h" +#include "exec/es/es_query_builder.h" +#include "exec/es/es_scan_reader.h" +#include "exec/es/es_scroll_query.h" +#include "exprs/expr.h" +#include "runtime/runtime_state.h" +#include "runtime/row_batch.h" +#include "runtime/dpp_sink_internal.h" +#include "service/backend_options.h" +#include "util/runtime_profile.h" + +namespace doris { + +EsHttpScanNode::EsHttpScanNode( + ObjectPool* pool, const TPlanNode& tnode, const DescriptorTbl& descs) : + ScanNode(pool, tnode, descs), + _tuple_id(tnode.es_scan_node.tuple_id), + _runtime_state(nullptr), + _tuple_desc(nullptr), + _num_running_scanners(0), + _scan_finished(false), + _eos(false), + _max_buffered_batches(1024), + _wait_scanner_timer(nullptr) { +} + +EsHttpScanNode::~EsHttpScanNode() { +} + +Status EsHttpScanNode::init(const TPlanNode& tnode, RuntimeState* state) { + RETURN_IF_ERROR(ScanNode::init(tnode)); + + // use TEsScanNode + _properties = tnode.es_scan_node.properties; + return Status::OK; +} + +Status EsHttpScanNode::prepare(RuntimeState* state) { + VLOG_QUERY << "EsHttpScanNode prepare"; + RETURN_IF_ERROR(ScanNode::prepare(state)); + + _runtime_state = state; + _tuple_desc = state->desc_tbl().get_tuple_descriptor(_tuple_id); + if (_tuple_desc == nullptr) { + std::stringstream ss; + ss << "Failed to get tuple descriptor, _tuple_id=" << _tuple_id; + return Status(ss.str()); + } + + // set up column name vector for ESScrollQueryBuilder + for (auto slot_desc : _tuple_desc->slots()) { + if (!slot_desc->is_materialized()) { + continue; + } + _column_names.push_back(slot_desc->col_name()); + } + + _wait_scanner_timer = ADD_TIMER(runtime_profile(), "WaitScannerTime"); + + return Status::OK; +} + +// build predicate +Status EsHttpScanNode::build_conjuncts_list() { + Status status = Status::OK; + for (int i = 0; i < _conjunct_ctxs.size(); ++i) { + EsPredicate* predicate = _pool->add( + new EsPredicate(_conjunct_ctxs[i], _tuple_desc)); + status = predicate->build_disjuncts_list(); + if (status.ok()) { + _predicates.push_back(predicate); + _predicate_to_conjunct.push_back(i); + } else { + VLOG(1) << status.get_error_msg(); + status = predicate->get_es_query_status(); + if (!status.ok()) { + LOG(WARNING) << status.get_error_msg(); + return status; + } + } + } + + return Status::OK; +} + +Status EsHttpScanNode::open(RuntimeState* state) { + SCOPED_TIMER(_runtime_profile->total_time_counter()); + RETURN_IF_ERROR(ExecNode::open(state)); + RETURN_IF_ERROR(exec_debug_action(TExecNodePhase::OPEN)); + RETURN_IF_CANCELLED(state); + + // if conjunct is constant, compute direct and set eos = true + for (int conj_idx = 0; conj_idx < _conjunct_ctxs.size(); ++conj_idx) { + if (_conjunct_ctxs[conj_idx]->root()->is_constant()) { + void* value = _conjunct_ctxs[conj_idx]->get_value(NULL); + if (value == NULL || *reinterpret_cast(value) == false) { + _eos = true; + } + } + } + + RETURN_IF_ERROR(build_conjuncts_list()); + + // remove those predicates which ES cannot support + std::vector list; + BooleanQueryBuilder::validate(_predicates, &list); + DCHECK(list.size() == _predicate_to_conjunct.size()); + for(int i = list.size() - 1; i >= 0; i--) { + if(!list[i]) { + _predicate_to_conjunct.erase(_predicate_to_conjunct.begin() + i); + _predicates.erase(_predicates.begin() + i); + } + } + + // filter the conjuncts and ES will process them later + for (int i = _predicate_to_conjunct.size() - 1; i >= 0; i--) { + int conjunct_index = _predicate_to_conjunct[i]; + _conjunct_ctxs[conjunct_index]->close(_runtime_state); + _conjunct_ctxs.erase(_conjunct_ctxs.begin() + conjunct_index); + } + + RETURN_IF_ERROR(start_scanners()); + + return Status::OK; +} + +Status EsHttpScanNode::start_scanners() { + { + std::unique_lock l(_batch_queue_lock); + _num_running_scanners = _scan_ranges.size(); + } + + for (int i = 0; i < _scan_ranges.size(); i++) { + std::promise p; + std::future f = p.get_future(); + _scanner_threads.emplace_back(&EsHttpScanNode::scanner_worker, this, i, + _scan_ranges.size(), std::ref(p)); + Status status = f.get(); + if (!status.ok()) return status; + } + return Status::OK; +} + +Status EsHttpScanNode::get_next(RuntimeState* state, RowBatch* row_batch, + bool* eos) { + SCOPED_TIMER(_runtime_profile->total_time_counter()); + if (state->is_cancelled()) { + std::unique_lock l(_batch_queue_lock); + if (update_status(Status::CANCELLED)) { + _queue_writer_cond.notify_all(); + } + } + + if (_eos) { + *eos = true; + return Status::OK; + } + + if (_scan_finished.load()) { + *eos = true; + return Status::OK; + } + + std::shared_ptr scanner_batch; + { + std::unique_lock l(_batch_queue_lock); + while (_process_status.ok() && + !_runtime_state->is_cancelled() && + _num_running_scanners > 0 && + _batch_queue.empty()) { + SCOPED_TIMER(_wait_scanner_timer); + _queue_reader_cond.wait_for(l, std::chrono::seconds(1)); + } + if (!_process_status.ok()) { + // Some scanner process failed. + return _process_status; + } + if (_runtime_state->is_cancelled()) { + if (update_status(Status::CANCELLED)) { + _queue_writer_cond.notify_all(); + } + return _process_status; + } + if (!_batch_queue.empty()) { + scanner_batch = _batch_queue.front(); + _batch_queue.pop_front(); + } + } + + // All scanner has been finished, and all cached batch has been read + if (scanner_batch == nullptr) { + _scan_finished.store(true); + *eos = true; + return Status::OK; + } + + // notify one scanner + _queue_writer_cond.notify_one(); + + // get scanner's batch memory + row_batch->acquire_state(scanner_batch.get()); + _num_rows_returned += row_batch->num_rows(); + COUNTER_SET(_rows_returned_counter, _num_rows_returned); + + // This is first time reach limit. + // Only valid when query 'select * from table1 limit 20' + if (reached_limit()) { + int num_rows_over = _num_rows_returned - _limit; + row_batch->set_num_rows(row_batch->num_rows() - num_rows_over); + _num_rows_returned -= num_rows_over; + COUNTER_SET(_rows_returned_counter, _num_rows_returned); + + _scan_finished.store(true); + _queue_writer_cond.notify_all(); + *eos = true; + } else { + *eos = false; + } + + if (VLOG_ROW_IS_ON) { + for (int i = 0; i < row_batch->num_rows(); ++i) { + TupleRow* row = row_batch->get_row(i); + VLOG_ROW << "EsHttpScanNode output row: " + << Tuple::to_string(row->get_tuple(0), *_tuple_desc); + } + } + + return Status::OK; +} + +Status EsHttpScanNode::close(RuntimeState* state) { + if (is_closed()) { + return Status::OK; + } + RETURN_IF_ERROR(exec_debug_action(TExecNodePhase::CLOSE)); + SCOPED_TIMER(_runtime_profile->total_time_counter()); + _scan_finished.store(true); + _queue_writer_cond.notify_all(); + _queue_reader_cond.notify_all(); + for (int i = 0; i < _scanner_threads.size(); ++i) { + _scanner_threads[i].join(); + } + + _batch_queue.clear(); + + return ExecNode::close(state); +} + +// This function is called after plan node has been prepared. +Status EsHttpScanNode::set_scan_ranges(const std::vector& scan_ranges) { + _scan_ranges = scan_ranges; + return Status::OK; +} + +void EsHttpScanNode::debug_string(int ident_level, std::stringstream* out) const { + (*out) << "EsHttpScanNode"; +} + +Status EsHttpScanNode::scanner_scan( + std::unique_ptr scanner, + const std::vector& conjunct_ctxs, + EsScanCounter* counter) { + RETURN_IF_ERROR(scanner->open()); + bool scanner_eof = false; + + while (!scanner_eof) { + // Fill one row batch + std::shared_ptr row_batch( + new RowBatch(row_desc(), _runtime_state->batch_size(), mem_tracker())); + + // create new tuple buffer for row_batch + MemPool* tuple_pool = row_batch->tuple_data_pool(); + int tuple_buffer_size = row_batch->capacity() * _tuple_desc->byte_size(); + void* tuple_buffer = tuple_pool->allocate(tuple_buffer_size); + if (tuple_buffer == nullptr) { + return Status("Allocate memory for row batch failed."); + } + + Tuple* tuple = reinterpret_cast(tuple_buffer); + while (!scanner_eof) { + RETURN_IF_CANCELLED(_runtime_state); + // If we have finished all works + if (_scan_finished.load()) { + return Status::OK; + } + + // This row batch has been filled up, and break this + if (row_batch->is_full()) { + break; + } + + int row_idx = row_batch->add_row(); + TupleRow* row = row_batch->get_row(row_idx); + // scan node is the first tuple of tuple row + row->set_tuple(0, tuple); + memset(tuple, 0, _tuple_desc->num_null_bytes()); + + // Get from scanner + RETURN_IF_ERROR(scanner->get_next(tuple, tuple_pool, &scanner_eof)); + if (scanner_eof) { + continue; + } + + // eval conjuncts of this row. + if (eval_conjuncts(&conjunct_ctxs[0], conjunct_ctxs.size(), row)) { + row_batch->commit_last_row(); + char* new_tuple = reinterpret_cast(tuple); + new_tuple += _tuple_desc->byte_size(); + tuple = reinterpret_cast(new_tuple); + counter->num_rows_returned++; + } else { + counter->num_rows_filtered++; + } + } + + // Row batch has been filled, push this to the queue + if (row_batch->num_rows() > 0) { + std::unique_lock l(_batch_queue_lock); + while (_process_status.ok() && + !_scan_finished.load() && + !_runtime_state->is_cancelled() && + _batch_queue.size() >= _max_buffered_batches) { + _queue_writer_cond.wait_for(l, std::chrono::seconds(1)); + } + // Process already set failed, so we just return OK + if (!_process_status.ok()) { + return Status::OK; + } + // Scan already finished, just return + if (_scan_finished.load()) { + return Status::OK; + } + // Runtime state is canceled, just return cancel + if (_runtime_state->is_cancelled()) { + return Status::CANCELLED; + } + // Queue size Must be samller than _max_buffered_batches + _batch_queue.push_back(row_batch); + + // Notify reader to + _queue_reader_cond.notify_one(); + } + } + + return Status::OK; +} + +// Prefer to the local host +static std::string get_host_port(const std::vector& es_hosts) { + + std::string host_port; + std::string localhost = BackendOptions::get_localhost(); + + TNetworkAddress host = es_hosts[0]; + for (auto& es_host : es_hosts) { + if (es_host.hostname == localhost) { + host = es_host; + break; + } + } + + host_port = host.hostname; + host_port += ":"; + host_port += std::to_string(host.port); + return host_port; +} + +void EsHttpScanNode::scanner_worker(int start_idx, int length, std::promise& p_status) { + // Clone expr context + std::vector scanner_expr_ctxs; + DCHECK(start_idx < length); + auto status = Expr::clone_if_not_exists(_conjunct_ctxs, _runtime_state, + &scanner_expr_ctxs); + if (!status.ok()) { + LOG(WARNING) << "Clone conjuncts failed."; + } + + EsScanCounter counter; + const TEsScanRange& es_scan_range = + _scan_ranges[start_idx].scan_range.es_scan_range; + + // Collect the informations from scan range to perperties + std::map properties(_properties); + properties[ESScanReader::KEY_INDEX] = es_scan_range.index; + if (es_scan_range.__isset.type) { + properties[ESScanReader::KEY_TYPE] = es_scan_range.type; + } + properties[ESScanReader::KEY_SHARD] = std::to_string(es_scan_range.shard_id); + properties[ESScanReader::KEY_BATCH_SIZE] = std::to_string(_runtime_state->batch_size()); + properties[ESScanReader::KEY_HOST_PORT] = get_host_port(es_scan_range.es_hosts); + properties[ESScanReader::KEY_QUERY] + = ESScrollQueryBuilder::build(properties, _column_names, _predicates); + + // start scanner to scan + std::unique_ptr scanner(new EsHttpScanner( + _runtime_state, runtime_profile(), _tuple_id, + properties, scanner_expr_ctxs, &counter)); + status = scanner_scan(std::move(scanner), scanner_expr_ctxs, &counter); + if (!status.ok()) { + LOG(WARNING) << "Scanner[" << start_idx << "] process failed. status=" + << status.get_error_msg(); + } + + // Update stats + _runtime_state->update_num_rows_load_success(counter.num_rows_returned); + _runtime_state->update_num_rows_load_filtered(counter.num_rows_filtered); + + // scanner is going to finish + { + std::lock_guard l(_batch_queue_lock); + if (!status.ok()) { + update_status(status); + } + // This scanner will finish + _num_running_scanners--; + } + _queue_reader_cond.notify_all(); + // If one scanner failed, others don't need scan any more + if (!status.ok()) { + _queue_writer_cond.notify_all(); + } + + p_status.set_value(status); +} +} diff --git a/be/src/exec/es_http_scan_node.h b/be/src/exec/es_http_scan_node.h new file mode 100644 index 00000000000000..555a44a31bde5e --- /dev/null +++ b/be/src/exec/es_http_scan_node.h @@ -0,0 +1,112 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +#ifndef BE_EXEC_ES_HTTP_SCAN_NODE_H +#define BE_EXEC_ES_HTTP_SCAN_NODE_H + +#include +#include +#include +#include +#include +#include +#include +#include + +#include "common/status.h" +#include "exec/scan_node.h" +#include "exec/es_http_scanner.h" +#include "gen_cpp/PaloInternalService_types.h" + +namespace doris { + +class RuntimeState; +class PartRangeKey; +class PartitionInfo; +class EsHttpScanCounter; +class EsPredicate; + +class EsHttpScanNode : public ScanNode { +public: + EsHttpScanNode(ObjectPool* pool, const TPlanNode& tnode, const DescriptorTbl& descs); + virtual ~EsHttpScanNode(); + + virtual Status init(const TPlanNode& tnode, RuntimeState* state = nullptr) override; + virtual Status prepare(RuntimeState* state) override; + virtual Status open(RuntimeState* state) override; + virtual Status get_next(RuntimeState* state, RowBatch* row_batch, bool* eos) override; + virtual Status close(RuntimeState* state) override; + virtual Status set_scan_ranges(const std::vector& scan_ranges) override; + +protected: + // Write debug string of this into out. + virtual void debug_string(int indentation_level, std::stringstream* out) const override; + +private: + // Update process status to one failed status, + // NOTE: Must hold the mutex of this scan node + bool update_status(const Status& new_status) { + if (_process_status.ok()) { + _process_status = new_status; + return true; + } + return false; + } + + // Create scanners to do scan job + Status start_scanners(); + + // One scanner worker, This scanner will hanle 'length' ranges start from start_idx + void scanner_worker(int start_idx, int length, std::promise& p_status); + + // Scan one range + Status scanner_scan(std::unique_ptr scanner, + const std::vector& conjunct_ctxs, + EsScanCounter* counter); + + Status build_conjuncts_list(); + + TupleId _tuple_id; + RuntimeState* _runtime_state; + TupleDescriptor* _tuple_desc; + + int _num_running_scanners; + std::atomic _scan_finished; + bool _eos; + int _max_buffered_batches; + RuntimeProfile::Counter* _wait_scanner_timer; + + bool _all_scanners_finished; + Status _process_status; + + std::vector _scanner_threads; + std::map _properties; + std::vector _scan_ranges; + std::vector _column_names; + + std::mutex _batch_queue_lock; + std::condition_variable _queue_reader_cond; + std::condition_variable _queue_writer_cond; + std::deque> _batch_queue; + std::vector _predicates; + + std::vector _predicate_to_conjunct; +}; + +} + +#endif diff --git a/be/src/exec/es_http_scanner.cpp b/be/src/exec/es_http_scanner.cpp new file mode 100644 index 00000000000000..331ede8902ea2e --- /dev/null +++ b/be/src/exec/es_http_scanner.cpp @@ -0,0 +1,127 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +#include "exec/es_http_scanner.h" + +#include +#include + +#include "runtime/descriptors.h" +#include "runtime/exec_env.h" +#include "runtime/mem_tracker.h" +#include "runtime/raw_value.h" +#include "runtime/runtime_state.h" +#include "runtime/tuple.h" +#include "exprs/expr.h" + +namespace doris { + +EsHttpScanner::EsHttpScanner( + RuntimeState* state, + RuntimeProfile* profile, + TupleId tuple_id, + const std::map& properties, + const std::vector& conjunct_ctxs, + EsScanCounter* counter) : + _state(state), + _profile(profile), + _tuple_id(tuple_id), + _properties(properties), + _conjunct_ctxs(conjunct_ctxs), + _next_range(0), + _line_eof(false), + _batch_eof(false), +#if BE_TEST + _mem_tracker(new MemTracker()), + _mem_pool(_mem_tracker.get()), +#else + _mem_tracker(new MemTracker(-1, "EsHttp Scanner", state->instance_mem_tracker())), + _mem_pool(_state->instance_mem_tracker()), +#endif + _tuple_desc(nullptr), + _counter(counter), + _es_reader(nullptr), + _es_scroll_parser(nullptr), + _rows_read_counter(nullptr), + _read_timer(nullptr), + _materialize_timer(nullptr) { +} + +EsHttpScanner::~EsHttpScanner() { + close(); +} + +Status EsHttpScanner::open() { + _tuple_desc = _state->desc_tbl().get_tuple_descriptor(_tuple_id); + if (_tuple_desc == nullptr) { + std::stringstream ss; + ss << "Unknown tuple descriptor, tuple_id=" << _tuple_id; + return Status(ss.str()); + } + + const std::string& host = _properties.at(ESScanReader::KEY_HOST_PORT); + _es_reader.reset(new ESScanReader(host, _properties)); + if (_es_reader == nullptr) { + return Status("Es reader construct failed."); + } + + RETURN_IF_ERROR(_es_reader->open()); + + _rows_read_counter = ADD_COUNTER(_profile, "RowsRead", TUnit::UNIT); + _read_timer = ADD_TIMER(_profile, "TotalRawReadTime(*)"); + _materialize_timer = ADD_TIMER(_profile, "MaterializeTupleTime(*)"); + + return Status::OK; +} + +Status EsHttpScanner::get_next(Tuple* tuple, MemPool* tuple_pool, bool* eof) { + SCOPED_TIMER(_read_timer); + if (_line_eof && _batch_eof) { + *eof = true; + return Status::OK; + } + + while (!_batch_eof) { + if (_line_eof || _es_scroll_parser == nullptr) { + RETURN_IF_ERROR(_es_reader->get_next(&_batch_eof, _es_scroll_parser)); + if (_batch_eof) { + *eof = true; + return Status::OK; + } + } + + COUNTER_UPDATE(_rows_read_counter, 1); + SCOPED_TIMER(_materialize_timer); + RETURN_IF_ERROR(_es_scroll_parser->fill_tuple( + _tuple_desc, tuple, tuple_pool, &_line_eof)); + if (!_line_eof) { + break; + } + } + + return Status::OK; +} + +void EsHttpScanner::close() { + if (_es_reader != nullptr) { + _es_reader->close(); + } + + Expr::close(_conjunct_ctxs, _state); +} + +} diff --git a/be/src/exec/es_http_scanner.h b/be/src/exec/es_http_scanner.h new file mode 100644 index 00000000000000..ed4cf9bd8b4294 --- /dev/null +++ b/be/src/exec/es_http_scanner.h @@ -0,0 +1,104 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +#ifndef BE_EXEC_ES_HTTP_SCANNER_H +#define BE_EXEC_ES_HTTP_SCANNER_H + +#include +#include +#include +#include +#include + +#include "common/status.h" +#include "common/global_types.h" +#include "exec/es/es_scan_reader.h" +#include "gen_cpp/PlanNodes_types.h" +#include "gen_cpp/Types_types.h" +#include "runtime/mem_pool.h" +#include "util/runtime_profile.h" + +namespace doris { + +class Tuple; +class SlotDescriptor; +class RuntimeState; +class ExprContext; +class TextConverter; +class TupleDescriptor; +class TupleRow; +class RowDescriptor; +class MemTracker; +class RuntimeProfile; + +struct EsScanCounter { + EsScanCounter() : num_rows_returned(0), num_rows_filtered(0) { + } + + int64_t num_rows_returned; + int64_t num_rows_filtered; +}; + +class EsHttpScanner { +public: + EsHttpScanner( + RuntimeState* state, + RuntimeProfile* profile, + TupleId tuple_id, + const std::map& properties, + const std::vector& conjunct_ctxs, + EsScanCounter* counter); + ~EsHttpScanner(); + + Status open(); + + Status get_next(Tuple* tuple, MemPool* tuple_pool, bool* eof); + + void close(); + +private: + + RuntimeState* _state; + RuntimeProfile* _profile; + TupleId _tuple_id; + const std::map& _properties; + const std::vector& _conjunct_ctxs; + + int _next_range; + bool _line_eof; + bool _batch_eof; + + std::vector _slot_descs; + std::unique_ptr _row_desc; + + std::unique_ptr _mem_tracker; + MemPool _mem_pool; + + const TupleDescriptor* _tuple_desc; + EsScanCounter* _counter; + std::unique_ptr _es_reader; + std::unique_ptr _es_scroll_parser; + + // Profile + RuntimeProfile::Counter* _rows_read_counter; + RuntimeProfile::Counter* _read_timer; + RuntimeProfile::Counter* _materialize_timer; +}; + +} + +#endif diff --git a/be/src/exec/exec_node.cpp b/be/src/exec/exec_node.cpp index 679d42c21d9249..c934cf5fff20f7 100644 --- a/be/src/exec/exec_node.cpp +++ b/be/src/exec/exec_node.cpp @@ -31,6 +31,7 @@ #include "exec/new_partitioned_aggregation_node.h" #include "exec/csv_scan_node.h" #include "exec/es_scan_node.h" +#include "exec/es_http_scan_node.h" #include "exec/pre_aggregation_node.h" #include "exec/hash_join_node.h" #include "exec/broker_scan_node.h" @@ -366,6 +367,10 @@ Status ExecNode::create_node(RuntimeState* state, ObjectPool* pool, const TPlanN *node = pool->add(new EsScanNode(pool, tnode, descs)); return Status::OK; + case TPlanNodeType::ES_HTTP_SCAN_NODE: + *node = pool->add(new EsHttpScanNode(pool, tnode, descs)); + return Status::OK; + case TPlanNodeType::SCHEMA_SCAN_NODE: *node = pool->add(new SchemaScanNode(pool, tnode, descs)); return Status::OK; @@ -515,6 +520,7 @@ void ExecNode::collect_scan_nodes(vector* nodes) { collect_nodes(TPlanNodeType::OLAP_SCAN_NODE, nodes); collect_nodes(TPlanNodeType::BROKER_SCAN_NODE, nodes); collect_nodes(TPlanNodeType::ES_SCAN_NODE, nodes); + collect_nodes(TPlanNodeType::ES_HTTP_SCAN_NODE, nodes); } void ExecNode::init_runtime_profile(const std::string& name) { diff --git a/be/src/exprs/expr.cpp b/be/src/exprs/expr.cpp index 439885f66b5ef9..9ca09ca9e573eb 100644 --- a/be/src/exprs/expr.cpp +++ b/be/src/exprs/expr.cpp @@ -700,6 +700,13 @@ TExprNodeType::type Expr::type_without_cast(const Expr* expr) { return expr->_node_type; } +const Expr* Expr::expr_without_cast(const Expr* expr) { + if (expr->_opcode == TExprOpcode::CAST) { + return expr_without_cast(expr->_children[0]); + } + return expr; +} + doris_udf::AnyVal* Expr::get_const_val(ExprContext* context) { if (!is_constant()) { return NULL; diff --git a/be/src/exprs/expr.h b/be/src/exprs/expr.h index 7d1118acdcc2c1..a32bfd3bbd4e65 100644 --- a/be/src/exprs/expr.h +++ b/be/src/exprs/expr.h @@ -181,6 +181,8 @@ class Expr { static TExprNodeType::type type_without_cast(const Expr* expr); + static const Expr* expr_without_cast(const Expr* expr); + // Returns true if expr doesn't contain slotrefs, ie, can be evaluated // with get_value(NULL). The default implementation returns true if all of // the children are constant. diff --git a/be/src/exprs/expr_context.h b/be/src/exprs/expr_context.h index cbf2b6ea991134..de57638857be3f 100644 --- a/be/src/exprs/expr_context.h +++ b/be/src/exprs/expr_context.h @@ -176,6 +176,7 @@ class ExprContext { friend class InPredicate; friend class OlapScanNode; friend class EsScanNode; + friend class EsPredicate; /// FunctionContexts for each registered expression. The FunctionContexts are created /// and owned by this ExprContext. diff --git a/be/src/exprs/in_predicate.h b/be/src/exprs/in_predicate.h index aec176730f764b..4b3c6fa5729bb2 100644 --- a/be/src/exprs/in_predicate.h +++ b/be/src/exprs/in_predicate.h @@ -55,7 +55,7 @@ class InPredicate : public Predicate { // if add to children, when List is long, copy is a expensive op. void insert(void* value); - HybirdSetBase* hybird_set() { + HybirdSetBase* hybird_set() const { return _hybird_set.get(); } diff --git a/be/src/http/http_client.cpp b/be/src/http/http_client.cpp index 88c4374fe6f618..f56592125d427e 100644 --- a/be/src/http/http_client.cpp +++ b/be/src/http/http_client.cpp @@ -145,18 +145,24 @@ size_t HttpClient::on_response_data(const void* data, size_t length) { // return execute(callback); // } -Status HttpClient::execute_post_request(const std::string& post_data, std::string* response) { +Status HttpClient::execute_post_request(const std::string& payload, std::string* response) { set_method(POST); - set_post_body(post_data); + set_payload(payload); return execute(response); } +Status HttpClient::execute_delete_request(const std::string& payload, std::string* response) { + set_method(DELETE); + set_payload(payload); + return execute(response); +} + Status HttpClient::execute(const std::function& callback) { _callback = &callback; auto code = curl_easy_perform(_curl); if (code != CURLE_OK) { LOG(WARNING) << "fail to execute HTTP client, errmsg=" << _to_errmsg(code); - return Status("fail to execute HTTP client"); + return Status(_to_errmsg(code)); } return Status::OK; } diff --git a/be/src/http/http_client.h b/be/src/http/http_client.h index 83a27b8d63646f..d54bc680d33aaf 100644 --- a/be/src/http/http_client.h +++ b/be/src/http/http_client.h @@ -61,8 +61,7 @@ class HttpClient { curl_easy_setopt(_curl, CURLOPT_HTTPHEADER, _header_list); } - // you must set CURLOPT_POSTFIELDSIZE before CURLOPT_COPYPOSTFIELDS options, otherwise will cause request hanging up - void set_post_body(const std::string& post_body) { + void set_payload(const std::string& post_body) { curl_easy_setopt(_curl, CURLOPT_POSTFIELDSIZE, (long)post_body.length()); curl_easy_setopt(_curl, CURLOPT_COPYPOSTFIELDS, post_body.c_str()); } @@ -114,7 +113,9 @@ class HttpClient { // a file to local_path Status download(const std::string& local_path); - Status execute_post_request(const std::string& post_data, std::string* response); + Status execute_post_request(const std::string& payload, std::string* response); + + Status execute_delete_request(const std::string& payload, std::string* response); // execute a simple method, and its response is saved in response argument Status execute(std::string* response); diff --git a/be/src/runtime/descriptors.h b/be/src/runtime/descriptors.h index 15219e09ec2944..e88cfb44783209 100644 --- a/be/src/runtime/descriptors.h +++ b/be/src/runtime/descriptors.h @@ -124,7 +124,7 @@ class SlotDescriptor { return _slot_size; } - std::string col_name() const { + const std::string& col_name() const { return _col_name; } diff --git a/be/src/runtime/large_int_value.h b/be/src/runtime/large_int_value.h index 4ced5bb0d43242..4b7d4f1a6df21e 100644 --- a/be/src/runtime/large_int_value.h +++ b/be/src/runtime/large_int_value.h @@ -52,6 +52,13 @@ class LargeIntValue { *len = (buffer + *len) - d; return d; } + + static std::string to_string(__int128 value) { + char buf[64] = {0}; + int len = 64; + char *str = to_string(value, buf, &len); + return std::string(str, len); + } }; std::ostream& operator<<(std::ostream& os, __int128 const& value); diff --git a/be/src/runtime/string_value.cpp b/be/src/runtime/string_value.cpp index 71a91faba50ea1..8ac089236fbc2f 100644 --- a/be/src/runtime/string_value.cpp +++ b/be/src/runtime/string_value.cpp @@ -27,6 +27,10 @@ std::string StringValue::debug_string() const { return std::string(ptr, len); } +std::string StringValue::to_string() const { + return std::string(ptr, len); +} + std::ostream& operator<<(std::ostream& os, const StringValue& string_value) { return os << string_value.debug_string(); } diff --git a/be/src/runtime/string_value.h b/be/src/runtime/string_value.h index f840f0d905dcb8..d03604625c4f1d 100644 --- a/be/src/runtime/string_value.h +++ b/be/src/runtime/string_value.h @@ -105,6 +105,8 @@ struct StringValue { std::string debug_string() const; + std::string to_string() const; + // Returns the substring starting at start_pos until the end of string. StringValue substring(int start_pos) const; diff --git a/be/src/util/CMakeLists.txt b/be/src/util/CMakeLists.txt index 0c3884e4a65a11..541a178c86d927 100644 --- a/be/src/util/CMakeLists.txt +++ b/be/src/util/CMakeLists.txt @@ -88,3 +88,4 @@ add_library(Util STATIC #ADD_BE_TEST(bit-util-test) #ADD_BE_TEST(rle-test) ##ADD_BE_TEST(perf-counters-test) +##ADD_BE_TEST(es-scan-reader-test) diff --git a/be/test/exec/CMakeLists.txt b/be/test/exec/CMakeLists.txt index 7b683602ce0b1c..79e33f5b8f516a 100644 --- a/be/test/exec/CMakeLists.txt +++ b/be/test/exec/CMakeLists.txt @@ -44,6 +44,10 @@ ADD_BE_TEST(broker_reader_test) ADD_BE_TEST(broker_scanner_test) ADD_BE_TEST(broker_scan_node_test) ADD_BE_TEST(es_scan_node_test) +ADD_BE_TEST(es_http_scan_node_test) +ADD_BE_TEST(es_predicate_test) +ADD_BE_TEST(es_query_builder_test) +ADD_BE_TEST(es_scan_reader_test) ADD_BE_TEST(olap_table_info_test) ADD_BE_TEST(olap_table_sink_test) #ADD_BE_TEST(schema_scan_node_test) diff --git a/be/test/exec/es_http_scan_node_test.cpp b/be/test/exec/es_http_scan_node_test.cpp new file mode 100644 index 00000000000000..e3fd63a9d2e64f --- /dev/null +++ b/be/test/exec/es_http_scan_node_test.cpp @@ -0,0 +1,151 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +#include "exec/es_http_scan_node.h" + +#include +#include + +#include "common/object_pool.h" +#include "gen_cpp/PlanNodes_types.h" +#include "runtime/mem_pool.h" +#include "runtime/descriptors.h" +#include "runtime/runtime_state.h" +#include "runtime/row_batch.h" +#include "runtime/string_value.h" +#include "runtime/tuple_row.h" +#include "util/runtime_profile.h" +#include "util/debug_util.h" + +using std::vector; + +namespace doris { + +// mock +class EsHttpScanNodeTest : public testing::Test { +public: + EsHttpScanNodeTest() : _runtime_state("EsHttpScanNodeTest") { + _runtime_state._instance_mem_tracker.reset(new MemTracker()); + TDescriptorTable t_desc_table; + + // table descriptors + TTableDescriptor t_table_desc; + t_table_desc.id = 0; + t_table_desc.tableType = TTableType::ES_TABLE; + t_table_desc.numCols = 1; + t_table_desc.numClusteringCols = 0; + t_table_desc.__isset.esTable = true; + t_desc_table.tableDescriptors.push_back(t_table_desc); + t_desc_table.__isset.tableDescriptors = true; + + // TSlotDescriptor + int offset = 1; + int i = 0; + // id + { + TSlotDescriptor t_slot_desc; + t_slot_desc.__set_slotType(TypeDescriptor(TYPE_INT).to_thrift()); + t_slot_desc.__set_columnPos(i); + t_slot_desc.__set_byteOffset(offset); + t_slot_desc.__set_nullIndicatorByte(0); + t_slot_desc.__set_nullIndicatorBit(-1); + t_slot_desc.__set_slotIdx(i); + t_slot_desc.__set_isMaterialized(true); + t_desc_table.slotDescriptors.push_back(t_slot_desc); + offset += sizeof(int); + } + + TTupleDescriptor t_tuple_desc; + t_tuple_desc.id = 0; + t_tuple_desc.byteSize = offset; + t_tuple_desc.numNullBytes = 1; + t_tuple_desc.tableId = 0; + t_tuple_desc.__isset.tableId = true; + t_desc_table.__isset.slotDescriptors = true; + t_desc_table.tupleDescriptors.push_back(t_tuple_desc); + + DescriptorTbl::create(&_obj_pool, t_desc_table, &_desc_tbl); + _runtime_state.set_desc_tbl(_desc_tbl); + + // Node Id + _tnode.node_id = 0; + _tnode.node_type = TPlanNodeType::SCHEMA_SCAN_NODE; + _tnode.num_children = 0; + _tnode.limit = -1; + _tnode.row_tuples.push_back(0); + _tnode.nullable_tuples.push_back(false); + _tnode.es_scan_node.tuple_id = 0; + std::map properties; + _tnode.es_scan_node.__set_properties(properties); + _tnode.__isset.es_scan_node = true; + } + +protected: + virtual void SetUp() { + } + virtual void TearDown() { + } + TPlanNode _tnode; + ObjectPool _obj_pool; + DescriptorTbl* _desc_tbl; + RuntimeState _runtime_state; +}; + +TEST_F(EsHttpScanNodeTest, normal_use) { + + EsHttpScanNode scan_node(&_obj_pool, _tnode, *_desc_tbl); + Status status = scan_node.init(_tnode, &_runtime_state); + ASSERT_TRUE(status.ok()); + + status = scan_node.prepare(&_runtime_state); + ASSERT_TRUE(status.ok()); + + // scan range + TEsScanRange es_scan_range; + es_scan_range.__set_index("index1"); + es_scan_range.__set_type("docs"); + es_scan_range.__set_shard_id(0); + TNetworkAddress es_host; + es_host.__set_hostname("unknown"); + es_host.__set_port(8200); + std::vector es_hosts; + es_hosts.push_back(es_host); + es_scan_range.__set_es_hosts(es_hosts); + TScanRange scan_range; + scan_range.__set_es_scan_range(es_scan_range); + TScanRangeParams scan_range_params; + scan_range_params.__set_scan_range(scan_range); + std::vector scan_ranges; + scan_ranges.push_back(scan_range_params); + + status = scan_node.set_scan_ranges(scan_ranges); + ASSERT_TRUE(status.ok()); + + status = scan_node.open(&_runtime_state); + ASSERT_FALSE(status.ok()); + + status = scan_node.close(&_runtime_state); + ASSERT_TRUE(status.ok()); +} + +} + +int main(int argc, char** argv) { + ::testing::InitGoogleTest(&argc, argv); + return RUN_ALL_TESTS(); +} + diff --git a/be/test/exec/es_predicate_test.cpp b/be/test/exec/es_predicate_test.cpp new file mode 100644 index 00000000000000..3c18bf1af4c454 --- /dev/null +++ b/be/test/exec/es_predicate_test.cpp @@ -0,0 +1,174 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +#include "exec/es/es_predicate.h" + +#include +#include +#include +#include "common/logging.h" +#include "common/status.h" +#include "exprs/binary_predicate.h" +#include "gen_cpp/Exprs_types.h" +#include "exec/es/es_query_builder.h" +#include "rapidjson/document.h" +#include "rapidjson/rapidjson.h" +#include "rapidjson/stringbuffer.h" +#include "rapidjson/writer.h" +#include "runtime/mem_tracker.h" +#include "runtime/primitive_type.h" +#include "runtime/runtime_state.h" +#include "runtime/string_value.h" + +namespace doris { + +class RuntimeState; + +class EsPredicateTest : public testing::Test { +public: + EsPredicateTest() : _runtime_state("EsPredicateTest") { + _runtime_state._instance_mem_tracker.reset(new MemTracker()); + TDescriptorTable t_desc_table; + + // table descriptors + TTableDescriptor t_table_desc; + t_table_desc.id = 0; + t_table_desc.tableType = TTableType::ES_TABLE; + t_table_desc.numCols = 1; + t_table_desc.numClusteringCols = 0; + t_table_desc.__isset.esTable = true; + t_desc_table.tableDescriptors.push_back(t_table_desc); + t_desc_table.__isset.tableDescriptors = true; + + // TSlotDescriptor + int offset = 1; + int i = 0; + // id + { + TSlotDescriptor t_slot_desc; + t_slot_desc.__set_slotType(TypeDescriptor(TYPE_INT).to_thrift()); + t_slot_desc.__set_columnPos(i); + t_slot_desc.__set_byteOffset(offset); + t_slot_desc.__set_nullIndicatorByte(0); + t_slot_desc.__set_nullIndicatorBit(-1); + t_slot_desc.__set_slotIdx(i); + t_slot_desc.__set_isMaterialized(true); + t_slot_desc.colName = "id"; + t_desc_table.slotDescriptors.push_back(t_slot_desc); + offset += sizeof(int); + } + + TTupleDescriptor t_tuple_desc; + t_tuple_desc.id = 0; + t_tuple_desc.byteSize = offset; + t_tuple_desc.numNullBytes = 1; + t_tuple_desc.tableId = 0; + t_tuple_desc.__isset.tableId = true; + t_desc_table.__isset.slotDescriptors = true; + t_desc_table.tupleDescriptors.push_back(t_tuple_desc); + + DescriptorTbl::create(&_obj_pool, t_desc_table, &_desc_tbl); + _runtime_state.set_desc_tbl(_desc_tbl); + } + + Status build_expr_context_list(std::vector& conjunct_ctxs); + void init(); + void SetUp() override {} + void TearDown() override {} + +private: + + ObjectPool _obj_pool; + DescriptorTbl* _desc_tbl; + RuntimeState _runtime_state; +}; + +Status EsPredicateTest::build_expr_context_list(std::vector& conjunct_ctxs) { + + TExpr texpr; + { + TExprNode node0; + node0.opcode = TExprOpcode::GT; + node0.child_type = TPrimitiveType::BIGINT; + node0.node_type = TExprNodeType::BINARY_PRED; + node0.num_children = 2; + node0.__isset.opcode = true; + node0.__isset.child_type = true; + node0.type = gen_type_desc(TPrimitiveType::BOOLEAN); + texpr.nodes.emplace_back(node0); + + TExprNode node1; + node1.node_type = TExprNodeType::SLOT_REF; + node1.type = gen_type_desc(TPrimitiveType::INT); + node1.__isset.slot_ref = true; + node1.num_children = 0; + node1.slot_ref.slot_id = 0; + node1.slot_ref.tuple_id = 0; + node1.output_column = true; + node1.__isset.output_column = true; + texpr.nodes.emplace_back(node1); + + TExprNode node2; + TIntLiteral intLiteral; + intLiteral.value = 10; + node2.node_type = TExprNodeType::INT_LITERAL; + node2.type = gen_type_desc(TPrimitiveType::BIGINT); + node2.__isset.int_literal = true; + node2.int_literal = intLiteral; + texpr.nodes.emplace_back(node2); + } + + std::vector conjuncts; + conjuncts.emplace_back(texpr); + Status status = Expr::create_expr_trees(&_obj_pool, conjuncts, &conjunct_ctxs); + + return status; +} + +TEST_F(EsPredicateTest, normal) { + std::vector conjunct_ctxs; + Status status = build_expr_context_list(conjunct_ctxs); + + TupleDescriptor *tuple_desc = _desc_tbl->get_tuple_descriptor(0); + std::vector predicates; + for (int i = 0; i < conjunct_ctxs.size(); ++i) { + EsPredicate* predicate = new EsPredicate(conjunct_ctxs[i], tuple_desc); + if (predicate->build_disjuncts_list().ok()) { + predicates.push_back(predicate); + } + } + + rapidjson::Document document; + rapidjson::Value compound_bool_value(rapidjson::kObjectType); + compound_bool_value.SetObject(); + BooleanQueryBuilder::to_query(predicates, &document, &compound_bool_value); + rapidjson::StringBuffer buffer; + rapidjson::Writer writer(buffer); + compound_bool_value.Accept(writer); + std::string actual_bool_json = buffer.GetString(); + std::string expected_json = "{\"bool\":{\"filter\":[{\"bool\":{\"should\":[{\"range\":{\"id\":{\"gt\":\"10\"}}}]}}]}}"; + LOG(INFO) << "compound bool query" << actual_bool_json; + ASSERT_STREQ(expected_json.c_str(), actual_bool_json.c_str()); +} + + +} // end namespace doris + +int main(int argc, char** argv) { + ::testing::InitGoogleTest(&argc, argv); + return RUN_ALL_TESTS(); +} diff --git a/be/test/exec/es_query_builder_test.cpp b/be/test/exec/es_query_builder_test.cpp new file mode 100644 index 00000000000000..f6d7938a9ff702 --- /dev/null +++ b/be/test/exec/es_query_builder_test.cpp @@ -0,0 +1,459 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +#include +#include +#include + +#include "common/logging.h" +#include "exec/es/es_query_builder.h" +#include "exec/es/es_predicate.h" +#include "rapidjson/document.h" +#include "rapidjson/rapidjson.h" +#include "rapidjson/stringbuffer.h" +#include "rapidjson/writer.h" +#include "runtime/string_value.h" + +namespace doris { + +class BooleanQueryBuilderTest : public testing::Test { +public: + BooleanQueryBuilderTest() { } + virtual ~BooleanQueryBuilderTest() { } +}; + +TEST_F(BooleanQueryBuilderTest, term_query) { + // content = "wyf" + char str[] = "wyf"; + StringValue value(str, 3); + ExtLiteral term_literal(TYPE_VARCHAR, &value); + TypeDescriptor type_desc = TypeDescriptor::create_varchar_type(3); + std::string name = "content"; + ExtBinaryPredicate term_predicate(TExprNodeType::BINARY_PRED, name, type_desc, TExprOpcode::EQ, term_literal); + TermQueryBuilder term_query(term_predicate); + rapidjson::Document document; + rapidjson::Value term_value(rapidjson::kObjectType); + term_value.SetObject(); + term_query.to_json(&document, &term_value); + rapidjson::StringBuffer buffer; + rapidjson::Writer writer(buffer); + term_value.Accept(writer); + std::string actual_json = buffer.GetString(); + //LOG(INFO) << "term query" << actual_json; + ASSERT_STREQ("{\"term\":{\"content\":\"wyf\"}}", actual_json.c_str()); +} + +TEST_F(BooleanQueryBuilderTest, range_query) { + // k >= a + char str[] = "a"; + StringValue value(str, 1); + ExtLiteral term_literal(TYPE_VARCHAR, &value); + TypeDescriptor type_desc = TypeDescriptor::create_varchar_type(1); + std::string name = "k"; + ExtBinaryPredicate range_predicate(TExprNodeType::BINARY_PRED, name, type_desc, TExprOpcode::GE, term_literal); + RangeQueryBuilder range_query(range_predicate); + rapidjson::Document document; + rapidjson::Value range_value(rapidjson::kObjectType); + range_value.SetObject(); + range_query.to_json(&document, &range_value); + rapidjson::StringBuffer buffer; + rapidjson::Writer writer(buffer); + range_value.Accept(writer); + std::string actual_json = buffer.GetString(); + //LOG(INFO) << "range query" << actual_json; + ASSERT_STREQ("{\"range\":{\"k\":{\"ge\":\"a\"}}}", actual_json.c_str()); +} + +TEST_F(BooleanQueryBuilderTest, es_query) { + // esquery('random', "{\"bool\": {\"must_not\": {\"exists\": {\"field\": \"f1\"}}}}") + char str[] = "{\"bool\": {\"must_not\": {\"exists\": {\"field\": \"f1\"}}}}"; + int length = (int)strlen(str); + TypeDescriptor type_desc = TypeDescriptor::create_varchar_type(length); + std::string name = "random"; + ExtColumnDesc col_des(name, type_desc); + std::vector cols = {col_des}; + StringValue value(str, length); + ExtLiteral term_literal(TYPE_VARCHAR, &value); + std::vector values = {term_literal}; + std::string function_name = "esquery"; + ExtFunction function_predicate(TExprNodeType::FUNCTION_CALL, function_name, cols, values); + ESQueryBuilder es_query(function_predicate); + rapidjson::Document document; + rapidjson::Value es_query_value(rapidjson::kObjectType); + es_query_value.SetObject(); + es_query.to_json(&document, &es_query_value); + rapidjson::StringBuffer buffer; + rapidjson::Writer writer(buffer); + es_query_value.Accept(writer); + std::string actual_json = buffer.GetString(); + //LOG(INFO) << "es query" << actual_json; + ASSERT_STREQ("{\"bool\":{\"must_not\":{\"exists\":{\"field\":\"f1\"}}}}", actual_json.c_str()); +} + +TEST_F(BooleanQueryBuilderTest, like_query) { + // content like 'a%e%g_' + char str[] = "a%e%g_"; + int length = (int)strlen(str); + LOG(INFO) << "length " << length; + TypeDescriptor type_desc = TypeDescriptor::create_varchar_type(length); + StringValue value(str, length); + ExtLiteral like_literal(TYPE_VARCHAR, &value); + std::string name = "content"; + ExtLikePredicate like_predicate(TExprNodeType::LIKE_PRED, name, type_desc, like_literal); + WildCardQueryBuilder like_query(like_predicate); + rapidjson::Document document; + rapidjson::Value like_query_value(rapidjson::kObjectType); + like_query_value.SetObject(); + like_query.to_json(&document, &like_query_value); + rapidjson::StringBuffer buffer; + rapidjson::Writer writer(buffer); + like_query_value.Accept(writer); + std::string actual_json = buffer.GetString(); + // LOG(INFO) << "wildcard query" << actual_json; + ASSERT_STREQ("{\"wildcard\":{\"content\":\"a*e*g?\"}}", actual_json.c_str()); +} + +TEST_F(BooleanQueryBuilderTest, terms_in_query) { + // dv in ["2.0", "4.0", "8.0"] + std::string terms_in_field = "dv"; + int terms_in_field_length = terms_in_field.length(); + TypeDescriptor terms_in_col_type_desc = TypeDescriptor::create_varchar_type(terms_in_field_length); + + char value_1[] = "2.0"; + int value_1_length = (int)strlen(value_1); + StringValue string_value_1(value_1, value_1_length); + ExtLiteral term_literal_1(TYPE_VARCHAR, &string_value_1); + + char value_2[] = "4.0"; + int value_2_length = (int)strlen(value_2); + StringValue string_value_2(value_2, value_2_length); + ExtLiteral term_literal_2(TYPE_VARCHAR, &string_value_2); + + char value_3[] = "8.0"; + int value_3_length = (int)strlen(value_3); + StringValue string_value_3(value_3, value_3_length); + ExtLiteral term_literal_3(TYPE_VARCHAR, &string_value_3); + + std::vector terms_values = {term_literal_1, term_literal_2, term_literal_3}; + ExtInPredicate in_predicate(TExprNodeType::IN_PRED, false, terms_in_field, terms_in_col_type_desc, terms_values); + TermsInSetQueryBuilder terms_query(in_predicate); + rapidjson::Document document; + rapidjson::Value in_query_value(rapidjson::kObjectType); + in_query_value.SetObject(); + terms_query.to_json(&document, &in_query_value); + rapidjson::StringBuffer buffer; + rapidjson::Writer writer(buffer); + in_query_value.Accept(writer); + std::string actual_json = buffer.GetString(); + //LOG(INFO) << "terms in sets query" << actual_json; + ASSERT_STREQ("{\"terms\":{\"dv\":[\"2.0\",\"4.0\",\"8.0\"]}}", actual_json.c_str()); +} + +TEST_F(BooleanQueryBuilderTest, match_all_query) { + // match all docs + MatchAllQueryBuilder match_all_query; + rapidjson::Document document; + rapidjson::Value match_all_query_value(rapidjson::kObjectType); + match_all_query_value.SetObject(); + match_all_query.to_json(&document, &match_all_query_value); + rapidjson::StringBuffer buffer; + rapidjson::Writer writer(buffer); + match_all_query_value.Accept(writer); + std::string actual_json = buffer.GetString(); + //LOG(INFO) << "match all query" << actual_json; + ASSERT_STREQ("{\"match_all\":{}}", actual_json.c_str()); +} + + +TEST_F(BooleanQueryBuilderTest, bool_query) { + // content like 'a%e%g_' + char like_value[] = "a%e%g_"; + int like_value_length = (int)strlen(like_value); + TypeDescriptor like_type_desc = TypeDescriptor::create_varchar_type(like_value_length); + StringValue like_term_value(like_value, like_value_length); + ExtLiteral like_literal(TYPE_VARCHAR, &like_term_value); + std::string like_field_name = "content"; + ExtLikePredicate* like_predicate = new ExtLikePredicate(TExprNodeType::LIKE_PRED, like_field_name, like_type_desc, like_literal); + // esquery("random", "{\"bool\": {\"must_not\": {\"exists\": {\"field\": \"f1\"}}}}") + char es_query_str[] = "{\"bool\": {\"must_not\": {\"exists\": {\"field\": \"f1\"}}}}"; + int es_query_length = (int)strlen(es_query_str); + StringValue value(es_query_str, es_query_length); + TypeDescriptor es_query_type_desc = TypeDescriptor::create_varchar_type(es_query_length); + std::string es_query_field_name = "random"; + ExtColumnDesc es_query_col_des(es_query_field_name, es_query_type_desc); + std::vector es_query_cols = {es_query_col_des}; + StringValue es_query_value(es_query_str, es_query_length); + ExtLiteral es_query_term_literal(TYPE_VARCHAR, &es_query_value); + std::vector es_query_values = {es_query_term_literal}; + std::string function_name = "esquery"; + ExtFunction* function_predicate = new ExtFunction(TExprNodeType::FUNCTION_CALL, function_name, es_query_cols, es_query_values); + // k >= a + char range_value_str[] = "a"; + int range_value_length = (int)strlen(range_value_str); + StringValue range_value(range_value_str, range_value_length); + ExtLiteral range_literal(TYPE_VARCHAR, &range_value); + TypeDescriptor range_type_desc = TypeDescriptor::create_varchar_type(range_value_length); + std::string range_field_name = "k"; + ExtBinaryPredicate* range_predicate = new ExtBinaryPredicate(TExprNodeType::BINARY_PRED, range_field_name, range_type_desc, TExprOpcode::GE, range_literal); + // content = "wyf" + char term_str[] = "wyf"; + int term_value_length = (int)strlen(term_str); + StringValue term_value(term_str, term_value_length); + ExtLiteral term_literal(TYPE_VARCHAR, &term_value); + TypeDescriptor term_type_desc = TypeDescriptor::create_varchar_type(term_value_length); + std::string term_field_name = "content"; + ExtBinaryPredicate* term_predicate = new ExtBinaryPredicate(TExprNodeType::BINARY_PRED, term_field_name, term_type_desc, TExprOpcode::EQ, term_literal); + + // content like 'a%e%g_' or k >= a or content = "wyf" + std::vector or_predicates = {like_predicate, function_predicate, range_predicate, term_predicate}; + BooleanQueryBuilder bool_query(or_predicates); + rapidjson::Document document; + rapidjson::Value bool_query_value(rapidjson::kObjectType); + bool_query_value.SetObject(); + bool_query.to_json(&document, &bool_query_value); + rapidjson::StringBuffer buffer; + rapidjson::Writer writer(buffer); + bool_query_value.Accept(writer); + std::string actual_json = buffer.GetString(); + std::string expected_json = "{\"bool\":{\"should\":[{\"wildcard\":{\"content\":\"a*e*g?\"}},{\"bool\":{\"must_not\":{\"exists\":{\"field\":\"f1\"}}}},{\"range\":{\"k\":{\"ge\":\"a\"}}},{\"term\":{\"content\":\"wyf\"}}]}}"; + //LOG(INFO) << "bool query" << actual_json; + ASSERT_STREQ(expected_json.c_str(), actual_json.c_str()); +} + +TEST_F(BooleanQueryBuilderTest, compound_bool_query) { + // content like "a%e%g_" or esquery(random, '{"bool": {"must_not": {"exists": {"field": "f1"}}}}') + char like_value[] = "a%e%g_"; + int like_value_length = (int)strlen(like_value); + TypeDescriptor like_type_desc = TypeDescriptor::create_varchar_type(like_value_length); + StringValue like_term_value(like_value, like_value_length); + ExtLiteral like_literal(TYPE_VARCHAR, &like_term_value); + std::string like_field_name = "content"; + ExtLikePredicate* like_predicate = new ExtLikePredicate(TExprNodeType::LIKE_PRED, like_field_name, like_type_desc, like_literal); + + char es_query_str[] = "{\"bool\": {\"must_not\": {\"exists\": {\"field\": \"f1\"}}}}"; + int es_query_length = (int)strlen(es_query_str); + StringValue value(es_query_str, es_query_length); + TypeDescriptor es_query_type_desc = TypeDescriptor::create_varchar_type(es_query_length); + std::string es_query_field_name = "random"; + ExtColumnDesc es_query_col_des(es_query_field_name, es_query_type_desc); + std::vector es_query_cols = {es_query_col_des}; + StringValue es_query_value(es_query_str, es_query_length); + ExtLiteral es_query_term_literal(TYPE_VARCHAR, &es_query_value); + std::vector es_query_values = {es_query_term_literal}; + std::string function_name = "esquery"; + ExtFunction* function_predicate = new ExtFunction(TExprNodeType::FUNCTION_CALL, function_name, es_query_cols, es_query_values); + std::vector bool_predicates_1 = {like_predicate, function_predicate}; + EsPredicate* bool_predicate_1 = new EsPredicate(bool_predicates_1); + + // k >= "a" + char range_value_str[] = "a"; + int range_value_length = (int)strlen(range_value_str); + StringValue range_value(range_value_str, range_value_length); + ExtLiteral range_literal(TYPE_VARCHAR, &range_value); + TypeDescriptor range_type_desc = TypeDescriptor::create_varchar_type(range_value_length); + std::string range_field_name = "k"; + ExtBinaryPredicate* range_predicate = new ExtBinaryPredicate(TExprNodeType::BINARY_PRED, range_field_name, range_type_desc, TExprOpcode::GE, range_literal); + + std::vector bool_predicates_2 = {range_predicate}; + EsPredicate* bool_predicate_2 = new EsPredicate(bool_predicates_2); + + // content != "wyf" + char term_str[] = "wyf"; + int term_value_length = (int)strlen(term_str); + StringValue term_value(term_str, term_value_length); + ExtLiteral term_literal(TYPE_VARCHAR, &term_value); + TypeDescriptor term_type_desc = TypeDescriptor::create_varchar_type(term_value_length); + std::string term_field_name = "content"; + ExtBinaryPredicate* term_ne_predicate = new ExtBinaryPredicate(TExprNodeType::BINARY_PRED, term_field_name, term_type_desc, TExprOpcode::NE, term_literal); + std::vector bool_predicates_3 = {term_ne_predicate}; + EsPredicate* bool_predicate_3 = new EsPredicate(bool_predicates_3); + + // fv not in [8.0, 16.0] + std::string terms_in_field = "fv"; + int terms_in_field_length = terms_in_field.length(); + TypeDescriptor terms_in_col_type_desc = TypeDescriptor::create_varchar_type(terms_in_field_length); + + char value_1[] = "8.0"; + int value_1_length = (int)strlen(value_1); + StringValue string_value_1(value_1, value_1_length); + ExtLiteral term_literal_1(TYPE_VARCHAR, &string_value_1); + + char value_2[] = "16.0"; + int value_2_length = (int)strlen(value_2); + StringValue string_value_2(value_2, value_2_length); + ExtLiteral term_literal_2(TYPE_VARCHAR, &string_value_2); + + std::vector terms_values = {term_literal_1, term_literal_2}; + ExtInPredicate* in_predicate = new ExtInPredicate(TExprNodeType::IN_PRED, true, terms_in_field, terms_in_col_type_desc, terms_values); + std::vector bool_predicates_4 = {in_predicate}; + EsPredicate* bool_predicate_4 = new EsPredicate(bool_predicates_4); + + // (content like "a%e%g_" or esquery(random, '{"bool": {"must_not": {"exists": {"field": "f1"}}}}')) and content != "wyf" and fv not in [8.0, 16.0] + std::vector and_bool_predicates = {bool_predicate_1, bool_predicate_2, bool_predicate_3, bool_predicate_4}; + + rapidjson::Document document; + rapidjson::Value compound_bool_value(rapidjson::kObjectType); + compound_bool_value.SetObject(); + BooleanQueryBuilder::to_query(and_bool_predicates, &document, &compound_bool_value); + rapidjson::StringBuffer buffer; + rapidjson::Writer writer(buffer); + compound_bool_value.Accept(writer); + std::string actual_bool_json = buffer.GetString(); + std::string expected_json = "{\"bool\":{\"filter\":[{\"bool\":{\"should\":[{\"wildcard\":{\"content\":\"a*e*g?\"}},{\"bool\":{\"must_not\":{\"exists\":{\"field\":\"f1\"}}}}]}},{\"bool\":{\"should\":[{\"range\":{\"k\":{\"ge\":\"a\"}}}]}},{\"bool\":{\"should\":[{\"bool\":{\"must_not\":[{\"term\":{\"content\":\"wyf\"}}]}}]}},{\"bool\":{\"should\":[{\"bool\":{\"must_not\":[{\"terms\":{\"fv\":[\"8.0\",\"16.0\"]}}]}}]}}]}}"; + //LOG(INFO) << "compound bool query" << actual_bool_json; + ASSERT_STREQ(expected_json.c_str(), actual_bool_json.c_str()); +} +TEST_F(BooleanQueryBuilderTest, validate_esquery) { + std::string function_name = "esquery"; + char field[] = "random"; + int field_length = (int)strlen(field); + TypeDescriptor es_query_type_desc = TypeDescriptor::create_varchar_type(field_length); + ExtColumnDesc es_query_col_des(field, es_query_type_desc); + std::vector es_query_cols = {es_query_col_des}; + char es_query_str[] = "{\"bool\": {\"must_not\": {\"exists\": {\"field\": \"f1\"}}}}"; + int es_query_length = (int)strlen(es_query_str); + StringValue es_query_value(es_query_str, es_query_length); + ExtLiteral es_query_term_literal(TYPE_VARCHAR, &es_query_value); + std::vector es_query_values = {es_query_term_literal}; + ExtFunction legal_es_query(TExprNodeType::FUNCTION_CALL, function_name, es_query_cols, es_query_values); + auto st = BooleanQueryBuilder::check_es_query(legal_es_query); + ASSERT_TRUE(st.ok()); + char empty_query[] = "{}"; + int empty_query_length = (int)strlen(empty_query); + StringValue empty_query_value(empty_query, empty_query_length); + ExtLiteral empty_query_term_literal(TYPE_VARCHAR, &empty_query_value); + std::vector empty_query_values = {empty_query_term_literal}; + ExtFunction empty_es_query(TExprNodeType::FUNCTION_CALL, function_name, es_query_cols, empty_query_values); + st = BooleanQueryBuilder::check_es_query(empty_es_query); + ASSERT_STREQ(st.get_error_msg().c_str(), "esquery must only one root"); + //LOG(INFO) <<"error msg:" << st1.get_error_msg(); + char malformed_query[] = "{\"bool\": {\"must_not\": {\"exists\": {"; + int malformed_query_length = (int)strlen(malformed_query); + StringValue malformed_query_value(malformed_query, malformed_query_length); + ExtLiteral malformed_query_term_literal(TYPE_VARCHAR, &malformed_query_value); + std::vector malformed_query_values = {malformed_query_term_literal}; + ExtFunction malformed_es_query(TExprNodeType::FUNCTION_CALL, function_name, es_query_cols, malformed_query_values); + st = BooleanQueryBuilder::check_es_query(malformed_es_query); + ASSERT_STREQ(st.get_error_msg().c_str(), "malformed esquery json"); + char illegal_query[] = "{\"term\": {\"k1\" : \"2\"},\"match\": {\"k1\": \"3\"}}"; + int illegal_query_length = (int)strlen(illegal_query); + StringValue illegal_query_value(illegal_query, illegal_query_length); + ExtLiteral illegal_query_term_literal(TYPE_VARCHAR, &illegal_query_value); + std::vector illegal_query_values = {illegal_query_term_literal}; + ExtFunction illegal_es_query(TExprNodeType::FUNCTION_CALL, function_name, es_query_cols, illegal_query_values); + st = BooleanQueryBuilder::check_es_query(illegal_es_query); + ASSERT_STREQ(st.get_error_msg().c_str(), "esquery must only one root"); + char illegal_key_query[] = "[\"22\"]"; + int illegal_key_query_length = (int)strlen(illegal_key_query); + StringValue illegal_key_query_value(illegal_key_query, illegal_key_query_length); + ExtLiteral illegal_key_query_term_literal(TYPE_VARCHAR, &illegal_key_query_value); + std::vector illegal_key_query_values = {illegal_key_query_term_literal}; + ExtFunction illegal_key_es_query(TExprNodeType::FUNCTION_CALL, function_name, es_query_cols, illegal_key_query_values); + st = BooleanQueryBuilder::check_es_query(illegal_key_es_query); + ASSERT_STREQ(st.get_error_msg().c_str(), "esquery must be a object"); +} + +TEST_F(BooleanQueryBuilderTest, validate_partial) { + char like_value[] = "a%e%g_"; + int like_value_length = (int)strlen(like_value); + TypeDescriptor like_type_desc = TypeDescriptor::create_varchar_type(like_value_length); + StringValue like_term_value(like_value, like_value_length); + ExtLiteral like_literal(TYPE_VARCHAR, &like_term_value); + std::string like_field_name = "content"; + ExtLikePredicate* like_predicate = new ExtLikePredicate(TExprNodeType::LIKE_PRED, like_field_name, like_type_desc, like_literal); + + // k >= "a" + char range_value_str[] = "a"; + int range_value_length = (int)strlen(range_value_str); + StringValue range_value(range_value_str, range_value_length); + ExtLiteral range_literal(TYPE_VARCHAR, &range_value); + TypeDescriptor range_type_desc = TypeDescriptor::create_varchar_type(range_value_length); + std::string range_field_name = "k"; + ExtBinaryPredicate* range_predicate = new ExtBinaryPredicate(TExprNodeType::BINARY_PRED, range_field_name, range_type_desc, TExprOpcode::GE, range_literal); + + std::vector bool_predicates_1 = {like_predicate, range_predicate}; + EsPredicate* bool_predicate_1 = new EsPredicate(bool_predicates_1); + + // fv not in [8.0, 16.0] + std::string terms_in_field = "fv"; + int terms_in_field_length = terms_in_field.length(); + TypeDescriptor terms_in_col_type_desc = TypeDescriptor::create_varchar_type(terms_in_field_length); + + char value_1[] = "8.0"; + int value_1_length = (int)strlen(value_1); + StringValue string_value_1(value_1, value_1_length); + ExtLiteral term_literal_1(TYPE_VARCHAR, &string_value_1); + + char value_2[] = "16.0"; + int value_2_length = (int)strlen(value_2); + StringValue string_value_2(value_2, value_2_length); + ExtLiteral term_literal_2(TYPE_VARCHAR, &string_value_2); + + std::vector terms_values = {term_literal_1, term_literal_2}; + ExtInPredicate* in_predicate = new ExtInPredicate(TExprNodeType::IN_PRED, true, terms_in_field, terms_in_col_type_desc, terms_values); + std::vector bool_predicates_2 = {in_predicate}; + EsPredicate* bool_predicate_2 = new EsPredicate(bool_predicates_2); + + // content != "wyf" + char term_str[] = "wyf"; + int term_value_length = (int)strlen(term_str); + StringValue term_value(term_str, term_value_length); + ExtLiteral term_literal(TYPE_VARCHAR, &term_value); + TypeDescriptor term_type_desc = TypeDescriptor::create_varchar_type(term_value_length); + std::string term_field_name = "content"; + ExtBinaryPredicate* term_ne_predicate = new ExtBinaryPredicate(TExprNodeType::BINARY_PRED, term_field_name, term_type_desc, TExprOpcode::NE, term_literal); + + char es_query_str[] = "{\"bool\": {\"must_not\": {\"exists\": {\"field\": \"f1\"}}}}"; + int es_query_length = (int)strlen(es_query_str); + StringValue value(es_query_str, es_query_length); + TypeDescriptor es_query_type_desc = TypeDescriptor::create_varchar_type(es_query_length); + std::string es_query_field_name = "random"; + ExtColumnDesc es_query_col_des(es_query_field_name, es_query_type_desc); + std::vector es_query_cols = {es_query_col_des}; + StringValue es_query_value(es_query_str, es_query_length); + ExtLiteral es_query_term_literal(TYPE_VARCHAR, &es_query_value); + std::vector es_query_values = {es_query_term_literal}; + std::string function_name = "esquery"; + ExtFunction* function_predicate = new ExtFunction(TExprNodeType::FUNCTION_CALL, function_name, es_query_cols, es_query_values); + std::vector bool_predicates_3 = {term_ne_predicate, function_predicate}; + EsPredicate* bool_predicate_3 = new EsPredicate(bool_predicates_3); + + std::vector and_bool_predicates = {bool_predicate_1, bool_predicate_2, bool_predicate_3}; + std::vector result; + BooleanQueryBuilder::validate(and_bool_predicates, &result); + std::vector expected = {true, true, true}; + ASSERT_TRUE(result == expected); + char illegal_query[] = "{\"term\": {\"k1\" : \"2\"},\"match\": {\"k1\": \"3\"}}"; + int illegal_query_length = (int)strlen(illegal_query); + StringValue illegal_query_value(illegal_query, illegal_query_length); + ExtLiteral illegal_query_term_literal(TYPE_VARCHAR, &illegal_query_value); + std::vector illegal_query_values = {illegal_query_term_literal}; + ExtFunction* illegal_function_preficate = new ExtFunction(TExprNodeType::FUNCTION_CALL, function_name, es_query_cols, illegal_query_values); + std::vector illegal_bool_predicates_3 = {term_ne_predicate, illegal_function_preficate}; + EsPredicate* illegal_bool_predicate_3 = new EsPredicate(illegal_bool_predicates_3); + std::vector and_bool_predicates_1 = {bool_predicate_1, bool_predicate_2, illegal_bool_predicate_3}; + std::vector result1; + BooleanQueryBuilder::validate(and_bool_predicates_1, &result1); + std::vector expected1 = {true, true, false}; + ASSERT_TRUE(result1 == expected1); +} +} + +int main(int argc, char* argv[]) { + ::testing::InitGoogleTest(&argc, argv); + return RUN_ALL_TESTS(); +} diff --git a/be/test/exec/es_scan_reader_test.cpp b/be/test/exec/es_scan_reader_test.cpp new file mode 100644 index 00000000000000..2da96c025f8355 --- /dev/null +++ b/be/test/exec/es_scan_reader_test.cpp @@ -0,0 +1,245 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +#include +#include +#include +#include + +#include "common/logging.h" +#include "exec/es/es_scan_reader.h" +#include "exec/es/es_scroll_query.h" +#include "http/ev_http_server.h" +#include "http/http_channel.h" +#include "http/http_handler.h" +#include "http/http_request.h" +#include "rapidjson/document.h" +#include "rapidjson/writer.h" +#include "rapidjson/stringbuffer.h" + +namespace doris { + +class RestSearchAction : public HttpHandler { +public: + void handle(HttpRequest* req) override { + std::string user; + std::string passwd; + if (!parse_basic_auth(*req, &user, &passwd) || user != "root") { + HttpChannel::send_basic_challenge(req, "abc"); + return; + } + req->add_output_header(HttpHeaders::CONTENT_TYPE, "application/json"); + if (req->method() == HttpMethod::POST) { + std::string post_body = req->get_request_body(); + rapidjson::Document post_doc; + post_doc.Parse<0>(post_body.c_str()); + int size = 1; + if (post_doc.HasMember("size")) { + rapidjson::Value& size_value = post_doc["size"]; + size = size_value.GetInt(); + } + std::string _scroll_id(std::to_string(size)); + rapidjson::Document search_result; + rapidjson::Document::AllocatorType &allocator = search_result.GetAllocator(); + search_result.SetObject(); + rapidjson::Value scroll_id_value(_scroll_id.c_str(), allocator); + search_result.AddMember("_scroll_id", scroll_id_value, allocator); + + rapidjson::Value outer_hits(rapidjson::kObjectType); + outer_hits.AddMember("total", 10, allocator); + rapidjson::Value inner_hits(rapidjson::kArrayType); + rapidjson::Value source_docuement(rapidjson::kObjectType); + source_docuement.AddMember("id", 1, allocator); + rapidjson::Value value_node("1", allocator); + source_docuement.AddMember("value", value_node, allocator); + inner_hits.PushBack(source_docuement, allocator); + outer_hits.AddMember("hits", inner_hits, allocator); + search_result.AddMember("hits", outer_hits, allocator); + + rapidjson::StringBuffer buffer; + rapidjson::Writer writer(buffer); + search_result.Accept(writer); + //send DELETE scorll post request + std::string search_result_json = buffer.GetString(); + HttpChannel::send_reply(req, search_result_json); + } else { + std::string response = "test1"; + HttpChannel::send_reply(req, response); + } + } +}; + +class RestSearchScrollAction : public HttpHandler { +public: + void handle(HttpRequest* req) override { + std::string user; + std::string passwd; + if (!parse_basic_auth(*req, &user, &passwd) || user != "root") { + HttpChannel::send_basic_challenge(req, "abc"); + return; + } + if (req->method() == HttpMethod::POST) { + std::string post_body = req->get_request_body(); + rapidjson::Document post_doc; + post_doc.Parse<0>(post_body.c_str()); + std::string scroll_id; + if (!post_doc.HasMember("scroll_id")) { + HttpChannel::send_reply(req,HttpStatus::NOT_FOUND, "invalid scroll request"); + return; + } else { + rapidjson::Value& scroll_id_value = post_doc["scroll_id"]; + scroll_id = scroll_id_value.GetString(); + int offset = atoi(scroll_id.c_str()); + if (offset > 10) { + rapidjson::Document end_search_result; + rapidjson::Document::AllocatorType &allocator = end_search_result.GetAllocator(); + end_search_result.SetObject(); + rapidjson::Value scroll_id_value("11", allocator); + end_search_result.AddMember("_scroll_id", scroll_id_value, allocator); + + rapidjson::Value outer_hits(rapidjson::kObjectType); + outer_hits.AddMember("total", 10, allocator); + end_search_result.AddMember("hits", outer_hits, allocator); + rapidjson::StringBuffer buffer; + rapidjson::Writer writer(buffer); + end_search_result.Accept(writer); + //send DELETE scorll post request + std::string end_search_result_json = buffer.GetString(); + HttpChannel::send_reply(req, end_search_result_json); + return; + } else { + int start = offset + 1; + rapidjson::Document search_result; + rapidjson::Document::AllocatorType &allocator = search_result.GetAllocator(); + search_result.SetObject(); + rapidjson::Value scroll_id_value(std::to_string(start).c_str(), allocator); + search_result.AddMember("_scroll_id", scroll_id_value, allocator); + + rapidjson::Value outer_hits(rapidjson::kObjectType); + outer_hits.AddMember("total", 10, allocator); + rapidjson::Value inner_hits(rapidjson::kArrayType); + rapidjson::Value source_docuement(rapidjson::kObjectType); + source_docuement.AddMember("id", start, allocator); + rapidjson::Value value_node(std::to_string(start).c_str(), allocator); + source_docuement.AddMember("value", value_node, allocator); + inner_hits.PushBack(source_docuement, allocator); + outer_hits.AddMember("hits", inner_hits, allocator); + search_result.AddMember("hits", outer_hits, allocator); + + rapidjson::StringBuffer buffer; + rapidjson::Writer writer(buffer); + search_result.Accept(writer); + //send DELETE scorll post request + std::string search_result_json = buffer.GetString(); + HttpChannel::send_reply(req, search_result_json); + return; + } + + } + } + } +}; + +class RestClearScrollAction : public HttpHandler { +public: + void handle(HttpRequest* req) override { + std::string user; + std::string passwd; + if (!parse_basic_auth(*req, &user, &passwd) || user != "root") { + HttpChannel::send_basic_challenge(req, "abc"); + return; + } + if (req->method() == HttpMethod::DELETE) { + std::string post_body = req->get_request_body(); + rapidjson::Document post_doc; + post_doc.Parse<0>(post_body.c_str()); + std::string scroll_id; + if (!post_doc.HasMember("scroll_id")) { + HttpChannel::send_reply(req,HttpStatus::NOT_FOUND, "invalid scroll request"); + return; + } else { + rapidjson::Document clear_scroll_result; + rapidjson::Document::AllocatorType &allocator = clear_scroll_result.GetAllocator(); + clear_scroll_result.SetObject(); + clear_scroll_result.AddMember("succeeded", true, allocator); + clear_scroll_result.AddMember("num_freed", 1, allocator); + rapidjson::StringBuffer buffer; + rapidjson::Writer writer(buffer); + clear_scroll_result.Accept(writer); + std::string clear_scroll_result_json = buffer.GetString(); + HttpChannel::send_reply(req, clear_scroll_result_json); + return; + } + } + } +}; + +static RestSearchAction rest_search_action = RestSearchAction(); +static RestSearchScrollAction rest_search_scroll_action = RestSearchScrollAction(); +static RestClearScrollAction rest_clear_scroll_action = RestClearScrollAction(); +static EvHttpServer* mock_es_server = nullptr; + +class MockESServerTest : public testing::Test { +public: + MockESServerTest() { } + ~MockESServerTest() override { } + + static void SetUpTestCase() { + mock_es_server = new EvHttpServer(29386); + mock_es_server->register_handler(POST, "/{index}/{type}/_search", &rest_search_action); + mock_es_server->register_handler(POST, "/_search/scroll", &rest_search_scroll_action); + mock_es_server->register_handler(DELETE, "/_search/scroll", &rest_clear_scroll_action); + mock_es_server->start(); + } + + static void TearDownTestCase() { + delete mock_es_server; + } +}; + +TEST_F(MockESServerTest, workflow) { + std::string target = "http://127.0.0.1:29386"; + std::vector fields = {"id", "value"}; + std::map props; + props[ESScanReader::KEY_INDEX] = "tindex"; + props[ESScanReader::KEY_TYPE] = "doc"; + props[ESScanReader::KEY_USER_NAME] = "root"; + props[ESScanReader::KEY_PASS_WORD] = "root"; + props[ESScanReader::KEY_SHARD] = "0"; + props[ESScanReader::KEY_BATCH_SIZE] = "1"; + std::vector predicates; + props[ESScanReader::KEY_QUERY] = ESScrollQueryBuilder::build(props, fields, predicates); + ESScanReader reader(target, props); + auto st = reader.open(); + // ASSERT_TRUE(st.ok()); + bool eos = false; + std::unique_ptr parser = nullptr; + while(!eos){ + st = reader.get_next(&eos, parser); + if(eos) { + break; + } + } + auto cst = reader.close(); + ASSERT_TRUE(cst.ok()); +} +} + +int main(int argc, char* argv[]) { + ::testing::InitGoogleTest(&argc, argv); + return RUN_ALL_TESTS(); +} diff --git a/be/test/http/http_client_test.cpp b/be/test/http/http_client_test.cpp index 343c60614466a8..e75a299142cf21 100644 --- a/be/test/http/http_client_test.cpp +++ b/be/test/http/http_client_test.cpp @@ -19,6 +19,7 @@ #include +#include "boost/algorithm/string.hpp" #include "common/logging.h" #include "http/ev_http_server.h" #include "http/http_channel.h" @@ -151,6 +152,20 @@ TEST_F(HttpClientTest, post_normal) { ASSERT_STREQ(response.c_str(), request_body.c_str()); } +TEST_F(HttpClientTest, post_failed) { + HttpClient client; + auto st = client.init("http://127.0.0.1:29386/simple_pos"); + ASSERT_TRUE(st.ok()); + client.set_method(POST); + client.set_basic_auth("test1", ""); + std::string response; + std::string request_body = "simple post body query"; + st = client.execute_post_request(request_body, &response); + ASSERT_FALSE(st.ok()); + std::string not_found = "404"; + ASSERT_TRUE(boost::algorithm::contains(st.get_error_msg(), not_found)); +} + } int main(int argc, char* argv[]) { diff --git a/fe/src/main/java/org/apache/doris/catalog/Catalog.java b/fe/src/main/java/org/apache/doris/catalog/Catalog.java index a4b880033d1bc2..da9d5253b4d740 100644 --- a/fe/src/main/java/org/apache/doris/catalog/Catalog.java +++ b/fe/src/main/java/org/apache/doris/catalog/Catalog.java @@ -3946,7 +3946,8 @@ public static void getDdlStmt(Table table, List createTableStmt, List schema, + public EsTable(long id, String name, List schema, Map properties, PartitionInfo partitionInfo) throws DdlException { super(id, name, TableType.ELASTICSEARCH, schema); @@ -85,7 +93,7 @@ private void validate(Map properties) throws DdlException { hosts = properties.get(HOSTS).trim(); seeds = hosts.split(","); - if (!Strings.isNullOrEmpty(properties.get(USER)) + if (!Strings.isNullOrEmpty(properties.get(USER)) && !Strings.isNullOrEmpty(properties.get(USER).trim())) { userName = properties.get(USER).trim(); } @@ -106,8 +114,16 @@ private void validate(Map properties) throws DdlException { && !Strings.isNullOrEmpty(properties.get(TYPE).trim())) { mappingType = properties.get(TYPE).trim(); } + if (!Strings.isNullOrEmpty(properties.get(TRANSPORT)) + && !Strings.isNullOrEmpty(properties.get(TRANSPORT).trim())) { + transport = properties.get(TRANSPORT).trim(); + if (!(TRANSPORT_HTTP.equals(transport) || TRANSPORT_THRIFT.equals(transport))) { + throw new DdlException("transport of ES table must be http(recommend) or thrift(reserved inner usage)," + + " but value is " + transport); + } + } } - + public TTableDescriptor toThrift() { TEsTable tEsTable = new TEsTable(); TTableDescriptor tTableDescriptor = new TTableDescriptor(getId(), TTableType.ES_TABLE, @@ -137,7 +153,8 @@ public int getSignature(int signatureVersion) { adler32.update(indexName.getBytes(charsetName)); // mysql table adler32.update(mappingType.getBytes(charsetName)); - + // transport + adler32.update(transport.getBytes(charsetName)); } catch (UnsupportedEncodingException e) { LOG.error("encoding error", e); return -1; @@ -156,6 +173,7 @@ public void write(DataOutput out) throws IOException { Text.writeString(out, mappingType); Text.writeString(out, partitionInfo.getType().name()); partitionInfo.write(out); + Text.writeString(out, transport); } @Override @@ -175,12 +193,13 @@ public void readFields(DataInput in) throws IOException { } else { throw new IOException("invalid partition type: " + partType); } + transport = Text.readString(in); } public String getHosts() { return hosts; } - + public String[] getSeeds() { return seeds; } @@ -201,6 +220,10 @@ public String getMappingType() { return mappingType; } + public String getTransport() { + return transport; + } + public PartitionInfo getPartitionInfo() { return partitionInfo; } diff --git a/fe/src/main/java/org/apache/doris/external/EsIndexState.java b/fe/src/main/java/org/apache/doris/external/EsIndexState.java index bcb692511ae9ef..4a1201e70dbc33 100644 --- a/fe/src/main/java/org/apache/doris/external/EsIndexState.java +++ b/fe/src/main/java/org/apache/doris/external/EsIndexState.java @@ -19,6 +19,7 @@ import java.util.List; import java.util.Map; +import java.util.Random; import java.util.stream.Collectors; import java.util.stream.Stream; @@ -55,6 +56,27 @@ public EsIndexState(String indexName) { this.partitionDesc = null; this.partitionKey = null; } + + + public void addHttpAddress(Map nodesInfo) { + for (Map.Entry> entry : shardRoutings.entrySet()) { + List shardRoutings = entry.getValue(); + for (EsShardRouting shardRouting : shardRoutings) { + String nodeId = shardRouting.getNodeId(); + if (nodesInfo.containsKey(nodeId)) { + shardRouting.setHttpAddress(nodesInfo.get(nodeId).getPublishAddress()); + } else { + shardRouting.setHttpAddress(randomAddress(nodesInfo)); + } + } + } + } + + public TNetworkAddress randomAddress(Map nodesInfo) { + int seed = new Random().nextInt() % nodesInfo.size(); + EsNodeInfo[] nodeInfos = (EsNodeInfo[]) nodesInfo.values().toArray(); + return nodeInfos[seed].getPublishAddress(); + } public static EsIndexState parseIndexStateV55(String indexName, JSONObject indicesRoutingMap, JSONObject nodesMap, diff --git a/fe/src/main/java/org/apache/doris/external/EsMajorVersion.java b/fe/src/main/java/org/apache/doris/external/EsMajorVersion.java new file mode 100644 index 00000000000000..b71db8ae0fd6b4 --- /dev/null +++ b/fe/src/main/java/org/apache/doris/external/EsMajorVersion.java @@ -0,0 +1,103 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +package org.apache.doris.external; + + +/** + * Elasticsearch major version information, useful to check client's query compatibility with the Rest API. + * + * reference es-hadoop: + * + */ +public class EsMajorVersion { + public static final EsMajorVersion V_5_X = new EsMajorVersion((byte) 5, "5.x"); + public static final EsMajorVersion V_6_X = new EsMajorVersion((byte) 6, "6.x"); + public static final EsMajorVersion V_7_X = new EsMajorVersion((byte) 7, "7.x"); + public static final EsMajorVersion LATEST = V_7_X; + + public final byte major; + private final String version; + + private EsMajorVersion(byte major, String version) { + this.major = major; + this.version = version; + } + + public boolean after(EsMajorVersion version) { + return version.major < major; + } + + public boolean on(EsMajorVersion version) { + return version.major == major; + } + + public boolean notOn(EsMajorVersion version) { + return !on(version); + } + + public boolean onOrAfter(EsMajorVersion version) { + return version.major <= major; + } + + public boolean before(EsMajorVersion version) { + return version.major > major; + } + + public boolean onOrBefore(EsMajorVersion version) { + return version.major >= major; + } + + public static EsMajorVersion parse(String version) throws Exception { + if (version.startsWith("5.")) { + return new EsMajorVersion((byte) 5, version); + } + if (version.startsWith("6.")) { + return new EsMajorVersion((byte) 6, version); + } + if (version.startsWith("7.")) { + return new EsMajorVersion((byte) 7, version); + } + throw new Exception("Unsupported/Unknown Elasticsearch version [" + version + "]." + + "Highest supported version is [" + LATEST.version + "]. You may need to upgrade ES-Hadoop."); + } + + @Override + public boolean equals(Object o) { + if (this == o) { + return true; + } + if (o == null || getClass() != o.getClass()) { + return false; + } + + EsMajorVersion version = (EsMajorVersion) o; + + return major == version.major && + version.equals(version.version); + } + + @Override + public int hashCode() { + return major; + } + + @Override + public String toString() { + return version; + } +} diff --git a/fe/src/main/java/org/apache/doris/external/EsNodeInfo.java b/fe/src/main/java/org/apache/doris/external/EsNodeInfo.java new file mode 100644 index 00000000000000..61b513bbfacd9e --- /dev/null +++ b/fe/src/main/java/org/apache/doris/external/EsNodeInfo.java @@ -0,0 +1,205 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +package org.apache.doris.external; + +import org.apache.doris.thrift.TNetworkAddress; + +import java.util.List; +import java.util.Map; + +/** + * This class represents one node with the http and potential thrift publish address + */ +public class EsNodeInfo { + private final String id; + private final String name; + private final String host; + private final String ip; + private TNetworkAddress publishAddress; + private final boolean hasHttp; + private final boolean isClient; + private final boolean isData; + private final boolean isIngest; + private boolean hasThrift; + private TNetworkAddress thriftAddress; + + public EsNodeInfo(String id, Map map) throws Exception { + this.id = id; + EsMajorVersion version = EsMajorVersion.parse((String) map.get("version")); + this.name = (String) map.get("name"); + this.host = (String) map.get("host"); + this.ip = (String) map.get("ip"); + if (version.before(EsMajorVersion.V_5_X)) { + Map attributes = (Map) map.get("attributes"); + if (attributes == null) { + this.isClient = false; + this.isData = true; + } else { + String data = (String) attributes.get("data"); + this.isClient = data == null ? true : !Boolean.parseBoolean(data); + this.isData = data == null ? true : Boolean.parseBoolean(data); + } + this.isIngest = false; + } else { + List roles = (List) map.get("roles"); + this.isClient = roles.contains("data") == false; + this.isData = roles.contains("data"); + this.isIngest = roles.contains("ingest"); + } + Map httpMap = (Map) map.get("http"); + if (httpMap != null) { + String address = (String) httpMap.get("publish_address"); + if (address != null) { + String[] scratch = address.split(":"); + this.publishAddress = new TNetworkAddress(scratch[0], Integer.valueOf(scratch[1])); + this.hasHttp = true; + } else { + this.publishAddress = null; + this.hasHttp = false; + } + } else { + this.publishAddress = null; + this.hasHttp = false; + } + + Map attributesMap = (Map) map.get("attributes"); + if (attributesMap != null) { + String thriftPortStr = (String) attributesMap.get("thrift_port"); + if (thriftPortStr != null) { + try { + int thriftPort = Integer.valueOf(thriftPortStr); + hasThrift = true; + thriftAddress = new TNetworkAddress(this.ip, thriftPort); + } catch (Exception e) { + hasThrift = false; + } + } else { + hasThrift = false; + } + } else { + hasThrift = false; + } + } + + public boolean hasHttp() { + return hasHttp; + } + + public boolean isClient() { + return isClient; + } + + public boolean isData() { + return isData; + } + + public boolean isIngest() { + return isIngest; + } + + public String getId() { + return id; + } + + public String getName() { + return name; + } + + public String getHost() { + return host; + } + + public TNetworkAddress getPublishAddress() { + return publishAddress; + } + + public boolean isHasThrift() { + return hasThrift; + } + + public TNetworkAddress getThriftAddress() { + return thriftAddress; + } + + @Override + public boolean equals(Object o) { + if (this == o) return true; + if (o == null || getClass() != o.getClass()) return false; + + EsNodeInfo nodeInfo = (EsNodeInfo) o; + + if (hasHttp != nodeInfo.hasHttp) { + return false; + } + if (isClient != nodeInfo.isClient) { + return false; + } + if (isData != nodeInfo.isData) { + return false; + } + if (!id.equals(nodeInfo.id)) { + return false; + } + if (!name.equals(nodeInfo.name)) { + return false; + } + if (!host.equals(nodeInfo.host)) { + return false; + } + if (!ip.equals(nodeInfo.ip)) { + return false; + } + if (hasThrift != nodeInfo.hasThrift) { + return false; + } + return (publishAddress != null ? publishAddress.equals(nodeInfo.publishAddress) : nodeInfo.publishAddress == null) + && (thriftAddress != null ? thriftAddress.equals(nodeInfo.thriftAddress) : nodeInfo.thriftAddress == null); + } + + @Override + public int hashCode() { + int result = id.hashCode(); + result = 31 * result + name.hashCode(); + result = 31 * result + host.hashCode(); + result = 31 * result + ip.hashCode(); + result = 31 * result + (publishAddress != null ? publishAddress.hashCode() : 0); + result = 31 * result + (thriftAddress != null ? thriftAddress.hashCode() : 0); + result = 31 * result + (hasHttp ? 1 : 0); + result = 31 * result + (hasThrift ? 1 : 0); + result = 31 * result + (isClient ? 1 : 0); + result = 31 * result + (isData ? 1 : 0); + return result; + } + + @Override + public String toString() { + return "EsNodeInfo{" + + "id='" + id + '\'' + + ", name='" + name + '\'' + + ", host='" + host + '\'' + + ", ip='" + ip + '\'' + + ", publishAddress=" + publishAddress + + ", hasHttp=" + hasHttp + + ", isClient=" + isClient + + ", isData=" + isData + + ", isIngest=" + isIngest + + ", hasThrift=" + hasThrift + + ", thriftAddress=" + thriftAddress + + '}'; + } +} diff --git a/fe/src/main/java/org/apache/doris/external/EsRestClient.java b/fe/src/main/java/org/apache/doris/external/EsRestClient.java new file mode 100644 index 00000000000000..1159cadc646f30 --- /dev/null +++ b/fe/src/main/java/org/apache/doris/external/EsRestClient.java @@ -0,0 +1,141 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +package org.apache.doris.external; + +import okhttp3.Credentials; +import okhttp3.OkHttpClient; +import okhttp3.Request; +import okhttp3.Response; +import org.apache.logging.log4j.LogManager; +import org.apache.logging.log4j.Logger; +import org.apache.logging.log4j.util.Strings; +import org.codehaus.jackson.JsonParser; +import org.codehaus.jackson.map.DeserializationConfig; +import org.codehaus.jackson.map.ObjectMapper; +import org.codehaus.jackson.map.SerializationConfig; + +import java.io.IOException; +import java.util.Collections; +import java.util.HashMap; +import java.util.Map; +import java.util.concurrent.TimeUnit; + +public class EsRestClient { + private static final Logger LOG = LogManager.getLogger(EsRestClient.class); + private ObjectMapper mapper; + + { + mapper = new ObjectMapper(); + mapper.configure(DeserializationConfig.Feature.USE_ANNOTATIONS, false); + mapper.configure(SerializationConfig.Feature.USE_ANNOTATIONS, false); + } + + private static OkHttpClient networkClient = new OkHttpClient.Builder() + .readTimeout(10, TimeUnit.SECONDS) + .build(); + + private String basicAuth; + + private int nextClient = 0; + private String[] nodes; + private String currentNode; + + public EsRestClient(String[] nodes, String authUser, String authPassword) { + this.nodes = nodes; + if (!Strings.isEmpty(authUser) && !Strings.isEmpty(authPassword)) { + basicAuth = Credentials.basic(authUser, authPassword); + } + selectNextNode(); + } + + private boolean selectNextNode() { + if (nextClient >= nodes.length) { + return false; + } + currentNode = nodes[nextClient++]; + return true; + } + + public Map getHttpNodes() throws Exception { + Map> nodesData = get("_nodes/http", "nodes"); + if (nodesData == null) { + return Collections.emptyMap(); + } + Map nodes = new HashMap<>(); + for (Map.Entry> entry : nodesData.entrySet()) { + EsNodeInfo node = new EsNodeInfo(entry.getKey(), entry.getValue()); + if (node.hasHttp()) { + nodes.put(node.getId(), node); + } + } + return nodes; + } + + public String getIndexMetaData(String indexName) { + String path = "_cluster/state?indices=" + indexName + + "&metric=routing_table,nodes,metadata&expand_wildcards=open"; + return execute(path); + + } + + /** + * execute request for specific path + * @param path the path must not leading with '/' + * @return + */ + private String execute(String path) { + selectNextNode(); + boolean nextNode; + do { + Request request = new Request.Builder() + .get() + .addHeader("Authorization", basicAuth) + .url(currentNode + "/" + path) + .build(); + try { + Response response = networkClient.newCall(request).execute(); + if (response.isSuccessful()) { + return response.body().string(); + } + } catch (IOException e) { + LOG.warn("request node [{}] [{}] failures {}, try next nodes", currentNode, path, e); + } + nextNode = selectNextNode(); + if (!nextNode) { + LOG.error("try all nodes [{}],no other nodes left", nodes); + } + } while (nextNode); + return null; + } + + public T get(String q, String key) { + return parseContent(execute(q), key); + } + + private T parseContent(String response, String key) { + Map map = Collections.emptyMap(); + try { + JsonParser jsonParser = mapper.getJsonFactory().createJsonParser(response); + map = mapper.readValue(jsonParser, Map.class); + } catch (IOException ex) { + LOG.error("parse es response failure: [{}]", response); + } + return (T) (key != null ? map.get(key) : map); + } + +} diff --git a/fe/src/main/java/org/apache/doris/external/EsShardRouting.java b/fe/src/main/java/org/apache/doris/external/EsShardRouting.java index 721edc9a26bae7..5f9e090a607583 100644 --- a/fe/src/main/java/org/apache/doris/external/EsShardRouting.java +++ b/fe/src/main/java/org/apache/doris/external/EsShardRouting.java @@ -28,12 +28,16 @@ public class EsShardRouting { private final int shardId; private final boolean isPrimary; private final TNetworkAddress address; + + private TNetworkAddress httpAddress; + private final String nodeId; - public EsShardRouting(String indexName, int shardId, boolean isPrimary, TNetworkAddress address) { + public EsShardRouting(String indexName, int shardId, boolean isPrimary, TNetworkAddress address, String nodeId) { this.indexName = indexName; this.shardId = shardId; this.isPrimary = isPrimary; this.address = address; + this.nodeId = nodeId; } public static EsShardRouting parseShardRoutingV55(String indexName, String shardKey, @@ -45,8 +49,8 @@ public static EsShardRouting parseShardRoutingV55(String indexName, String shard String thriftPort = nodeInfo.getJSONObject("attributes").getString("thrift_port"); TNetworkAddress addr = new TNetworkAddress(transportAddr[0], Integer.valueOf(thriftPort)); boolean isPrimary = shardInfo.getBoolean("primary"); - return new EsShardRouting(indexName, Integer.valueOf(shardKey), - isPrimary, addr); + return new EsShardRouting(indexName, Integer.valueOf(shardKey), + isPrimary, addr, nodeId); } public int getShardId() { @@ -64,4 +68,28 @@ public TNetworkAddress getAddress() { public String getIndexName() { return indexName; } + + public TNetworkAddress getHttpAddress() { + return httpAddress; + } + + public void setHttpAddress(TNetworkAddress httpAddress) { + this.httpAddress = httpAddress; + } + + public String getNodeId() { + return nodeId; + } + + @Override + public String toString() { + return "EsShardRouting{" + + "indexName='" + indexName + '\'' + + ", shardId=" + shardId + + ", isPrimary=" + isPrimary + + ", address=" + address + + ", httpAddress=" + httpAddress + + ", nodeId='" + nodeId + '\'' + + '}'; + } } diff --git a/fe/src/main/java/org/apache/doris/external/EsStateStore.java b/fe/src/main/java/org/apache/doris/external/EsStateStore.java index 3a4822ce219dbf..8bab6180fc6bce 100644 --- a/fe/src/main/java/org/apache/doris/external/EsStateStore.java +++ b/fe/src/main/java/org/apache/doris/external/EsStateStore.java @@ -86,10 +86,22 @@ public void deRegisterTable(long tableId) { protected void runOneCycle() { for (EsTable esTable : esTables.values()) { try { - EsTableState esTableState = loadEsIndexMetadataV55(esTable); - if (esTableState != null) { - esTable.setEsTableState(esTableState); + EsRestClient client = new EsRestClient(esTable.getSeeds(), + esTable.getUserName(), esTable.getPasswd()); +// EsTableState esTableState = loadEsIndexMetadataV55(esTable); + String indexMetaData = client.getIndexMetaData(esTable.getIndexName()); + if (indexMetaData == null) { + continue; } + EsTableState esTableState = parseClusterState55(indexMetaData, esTable); + if (esTableState == null) { + continue; + } + if (EsTable.TRANSPORT_HTTP.equals(esTable.getTransport())) { + Map nodesInfo = client.getHttpNodes(); + esTableState.addHttpAddress(nodesInfo); + } + esTable.setEsTableState(esTableState); } catch (Throwable e) { LOG.error("errors while load table {} state from es", esTable.getName()); } diff --git a/fe/src/main/java/org/apache/doris/external/EsTableState.java b/fe/src/main/java/org/apache/doris/external/EsTableState.java index a7620cfd04a9cb..59b69aa2678c2b 100644 --- a/fe/src/main/java/org/apache/doris/external/EsTableState.java +++ b/fe/src/main/java/org/apache/doris/external/EsTableState.java @@ -18,9 +18,11 @@ package org.apache.doris.external; import java.util.Map; +import java.util.Random; import org.apache.doris.catalog.PartitionInfo; import com.google.common.collect.Maps; +import org.apache.doris.thrift.TNetworkAddress; /** * save the dynamic info parsed from es cluster state such as shard routing, partition info @@ -38,6 +40,22 @@ public EsTableState() { partitionedIndexStates = Maps.newHashMap(); unPartitionedIndexStates = Maps.newHashMap(); } + + public void addHttpAddress(Map nodesInfo) { + for (EsIndexState indexState : partitionedIndexStates.values()) { + indexState.addHttpAddress(nodesInfo); + } + for (EsIndexState indexState : unPartitionedIndexStates.values()) { + indexState.addHttpAddress(nodesInfo); + } + + } + + public TNetworkAddress randomAddress(Map nodesInfo) { + int seed = new Random().nextInt() % nodesInfo.size(); + EsNodeInfo[] nodeInfos = (EsNodeInfo[]) nodesInfo.values().toArray(); + return nodeInfos[seed].getPublishAddress(); + } public PartitionInfo getPartitionInfo() { return partitionInfo; diff --git a/fe/src/main/java/org/apache/doris/planner/EsScanNode.java b/fe/src/main/java/org/apache/doris/planner/EsScanNode.java index 790b976ebb0d44..f54c9e0846fd30 100644 --- a/fe/src/main/java/org/apache/doris/planner/EsScanNode.java +++ b/fe/src/main/java/org/apache/doris/planner/EsScanNode.java @@ -128,7 +128,11 @@ public void finalize(Analyzer analyzer) throws UserException { @Override protected void toThrift(TPlanNode msg) { - msg.node_type = TPlanNodeType.ES_SCAN_NODE; + if (EsTable.TRANSPORT_HTTP.equals(table.getTransport())) { + msg.node_type = TPlanNodeType.ES_HTTP_SCAN_NODE; + } else { + msg.node_type = TPlanNodeType.ES_SCAN_NODE; + } Map properties = Maps.newHashMap(); properties.put(EsTable.USER, table.getUserName()); properties.put(EsTable.PASSWORD, table.getPasswd()); @@ -187,8 +191,11 @@ private List getShardLocations() throws UserException { // get backends Set colocatedBes = Sets.newHashSet(); int numBe = Math.min(3, backendMap.size()); - List shardAllocations = shardRouting.stream().map(e -> e.getAddress()) - .collect(Collectors.toList()); + List shardAllocations = new ArrayList<>(); + for (EsShardRouting item : shardRouting) { + shardAllocations.add(EsTable.TRANSPORT_HTTP.equals(table.getTransport()) ? item.getHttpAddress() : item.getAddress()); + } + Collections.shuffle(shardAllocations, random); for (TNetworkAddress address : shardAllocations) { colocatedBes.addAll(backendMap.get(address.getHostname())); diff --git a/gensrc/thrift/PlanNodes.thrift b/gensrc/thrift/PlanNodes.thrift index 69476511499d69..67fbef8f807116 100644 --- a/gensrc/thrift/PlanNodes.thrift +++ b/gensrc/thrift/PlanNodes.thrift @@ -43,7 +43,8 @@ enum TPlanNodeType { BROKER_SCAN_NODE, EMPTY_SET_NODE, UNION_NODE, - ES_SCAN_NODE + ES_SCAN_NODE, + ES_HTTP_SCAN_NODE } // phases of an execution node diff --git a/run-ut.sh b/run-ut.sh index 2963013b595029..bd6a6983a3db4a 100755 --- a/run-ut.sh +++ b/run-ut.sh @@ -163,6 +163,10 @@ ${DORIS_TEST_BINARY_DIR}/exec/plain_text_line_reader_lzop_test ${DORIS_TEST_BINARY_DIR}/exec/broker_scanner_test ${DORIS_TEST_BINARY_DIR}/exec/broker_scan_node_test ${DORIS_TEST_BINARY_DIR}/exec/es_scan_node_test +${DORIS_TEST_BINARY_DIR}/exec/es_http_scan_node_test +${DORIS_TEST_BINARY_DIR}/exec/es_predicate_test +${DORIS_TEST_BINARY_DIR}/exec/es_scan_reader_test +${DORIS_TEST_BINARY_DIR}/exec/es_query_builder_test ${DORIS_TEST_BINARY_DIR}/exec/olap_table_info_test ${DORIS_TEST_BINARY_DIR}/exec/olap_table_sink_test