From 4358000cb4f0d666ffe7ec3ded31950d6272961a Mon Sep 17 00:00:00 2001 From: morningman Date: Wed, 3 Nov 2021 12:51:32 +0800 Subject: [PATCH 01/11] [feat](lateral-view) Support execution of lateral view stmt 1. Add table function node 2. Add 2 table functions: explode_split and explode_bitmap --- be/src/common/daemon.cpp | 2 + be/src/exec/CMakeLists.txt | 1 + be/src/exec/exec_node.cpp | 5 + be/src/exec/table_function_node.cpp | 341 ++++++++++++++++++ be/src/exec/table_function_node.h | 77 ++++ be/src/exprs/CMakeLists.txt | 9 +- be/src/exprs/scalar_fn_call.cpp | 53 +-- be/src/exprs/scalar_fn_call.h | 5 + .../table_function/dummy_table_functions.cpp | 49 +++ .../table_function/dummy_table_functions.h | 47 +++ .../exprs/table_function/explode_bitmap.cpp | 131 +++++++ be/src/exprs/table_function/explode_bitmap.h | 57 +++ .../table_function/explode_json_array.cpp | 202 +++++++++++ .../exprs/table_function/explode_json_array.h | 134 +++++++ be/src/exprs/table_function/explode_split.cpp | 115 ++++++ be/src/exprs/table_function/explode_split.h | 60 +++ be/src/exprs/table_function/table_function.h | 61 ++++ .../table_function/table_function_factory.cpp | 48 +++ .../table_function/table_function_factory.h | 36 ++ be/src/runtime/descriptors.h | 1 - be/src/util/bitmap_value.h | 130 +++++++ be/test/util/bitmap_value_test.cpp | 37 ++ .../doris/analysis/FunctionCallExpr.java | 2 +- .../apache/doris/analysis/LateralViewRef.java | 32 +- .../org/apache/doris/catalog/FunctionSet.java | 50 ++- .../doris/planner/DistributedPlanner.java | 1 + .../java/org/apache/doris/qe/Coordinator.java | 2 + 27 files changed, 1608 insertions(+), 80 deletions(-) create mode 100644 be/src/exec/table_function_node.cpp create mode 100644 be/src/exec/table_function_node.h create mode 100644 be/src/exprs/table_function/dummy_table_functions.cpp create mode 100644 be/src/exprs/table_function/dummy_table_functions.h create mode 100644 be/src/exprs/table_function/explode_bitmap.cpp create mode 100644 be/src/exprs/table_function/explode_bitmap.h create mode 100644 be/src/exprs/table_function/explode_json_array.cpp create mode 100644 be/src/exprs/table_function/explode_json_array.h create mode 100644 be/src/exprs/table_function/explode_split.cpp create mode 100644 be/src/exprs/table_function/explode_split.h create mode 100644 be/src/exprs/table_function/table_function.h create mode 100644 be/src/exprs/table_function/table_function_factory.cpp create mode 100644 be/src/exprs/table_function/table_function_factory.h diff --git a/be/src/common/daemon.cpp b/be/src/common/daemon.cpp index ac4e6457206093..b6863b50e51dde 100644 --- a/be/src/common/daemon.cpp +++ b/be/src/common/daemon.cpp @@ -40,6 +40,7 @@ #include "exprs/new_in_predicate.h" #include "exprs/operators.h" #include "exprs/string_functions.h" +#include "exprs/table_function/dummy_table_functions.h" #include "exprs/time_operators.h" #include "exprs/timestamp_functions.h" #include "exprs/topn_function.h" @@ -264,6 +265,7 @@ void Daemon::init(int argc, char** argv, const std::vector& paths) { HllFunctions::init(); HashFunctions::init(); TopNFunctions::init(); + DummyTableFunctions::init(); LOG(INFO) << CpuInfo::debug_string(); LOG(INFO) << DiskInfo::debug_string(); diff --git a/be/src/exec/CMakeLists.txt b/be/src/exec/CMakeLists.txt index e33e30d20697e8..f3856ec82c11c9 100644 --- a/be/src/exec/CMakeLists.txt +++ b/be/src/exec/CMakeLists.txt @@ -58,6 +58,7 @@ set(EXEC_FILES plain_text_line_reader.cpp csv_scan_node.cpp csv_scanner.cpp + table_function_node.cpp es_scan_node.cpp es_http_scan_node.cpp es_http_scanner.cpp diff --git a/be/src/exec/exec_node.cpp b/be/src/exec/exec_node.cpp index 8d0c8bfc938596..dfa8d73d1de380 100644 --- a/be/src/exec/exec_node.cpp +++ b/be/src/exec/exec_node.cpp @@ -48,6 +48,7 @@ #include "exec/schema_scan_node.h" #include "exec/select_node.h" #include "exec/spill_sort_node.h" +#include "exec/table_function_node.h" #include "exec/topn_node.h" #include "exec/union_node.h" #include "exprs/expr_context.h" @@ -472,6 +473,10 @@ Status ExecNode::create_node(RuntimeState* state, ObjectPool* pool, const TPlanN *node = pool->add(new AssertNumRowsNode(pool, tnode, descs)); return Status::OK(); + case TPlanNodeType::TABLE_FUNCTION_NODE: + *node = pool->add(new TableFunctionNode(pool, tnode, descs)); + return Status::OK(); + default: map::const_iterator i = _TPlanNodeType_VALUES_TO_NAMES.find(tnode.node_type); diff --git a/be/src/exec/table_function_node.cpp b/be/src/exec/table_function_node.cpp new file mode 100644 index 00000000000000..f1a1389dbfbfb7 --- /dev/null +++ b/be/src/exec/table_function_node.cpp @@ -0,0 +1,341 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +#include "exec/table_function_node.h" + +#include "exprs/expr.h" +#include "exprs/expr_context.h" +#include "runtime/descriptors.h" +#include "runtime/raw_value.h" +#include "runtime/row_batch.h" +#include "runtime/runtime_state.h" +#include "runtime/tuple_row.h" +#include "exprs/table_function/table_function_factory.h" + +namespace doris { + +TableFunctionNode::TableFunctionNode(ObjectPool* pool, const TPlanNode& tnode, const DescriptorTbl& descs) + : ExecNode(pool, tnode, descs) { + +} + +TableFunctionNode::~TableFunctionNode() { + +} + +Status TableFunctionNode::init(const TPlanNode& tnode, RuntimeState* state) { + RETURN_IF_ERROR(ExecNode::init(tnode, state)); + + for (const TExpr& texpr : tnode.table_function_node.fnCallExprList) { + ExprContext* ctx = nullptr; + RETURN_IF_ERROR(Expr::create_expr_tree(_pool, texpr, &ctx)); + _fn_ctxs.push_back(ctx); + + Expr* root = ctx->root(); + const std::string& tf_name = root->fn().name.function_name; + TableFunction* fn; + RETURN_IF_ERROR(TableFunctionFactory::get_fn(tf_name, _pool, &fn)); + fn->set_expr_context(ctx); + _fns.push_back(fn); + } + _fn_num = _fns.size(); + _fn_values.resize(_fn_num); + + // Prepare output slot ids + RETURN_IF_ERROR(_prepare_output_slot_ids(tnode)); + return Status::OK(); +} + +Status TableFunctionNode::_prepare_output_slot_ids(const TPlanNode& tnode) { + // Prepare output slot ids + if (tnode.table_function_node.outputSlotIds.empty()) { + return Status::InternalError("Output slots of table function node is empty"); + } + SlotId max_id = -1; + for (auto slot_id : tnode.table_function_node.outputSlotIds) { + if (slot_id > max_id) { + max_id = slot_id; + } + } + _output_slot_ids = std::vector(max_id + 1, false); + for (auto slot_id : tnode.table_function_node.outputSlotIds) { + _output_slot_ids[slot_id] = true; + } + + return Status::OK(); +} + +Status TableFunctionNode::prepare(RuntimeState* state) { + RETURN_IF_ERROR(ExecNode::prepare(state)); + + RETURN_IF_ERROR(Expr::prepare(_fn_ctxs, state, _row_descriptor, expr_mem_tracker())); + for (auto fn : _fns) { + RETURN_IF_ERROR(fn->prepare()); + } + return Status::OK(); +} + +Status TableFunctionNode::open(RuntimeState* state) { + SCOPED_TIMER(_runtime_profile->total_time_counter()); + RETURN_IF_CANCELLED(state); + RETURN_IF_ERROR(ExecNode::open(state)); + + RETURN_IF_ERROR(Expr::open(_fn_ctxs, state)); + for (auto fn : _fns) { + RETURN_IF_ERROR(fn->open()); + } + + RETURN_IF_ERROR(_children[0]->open(state)); + return Status::OK(); +} + +Status TableFunctionNode::_process_next_child_row() { + if (_cur_child_offset == _cur_child_batch->num_rows()) { + _child_batch_exhausted = true; + return Status::OK(); + } + _cur_child_tuple_row = _cur_child_batch->get_row(_cur_child_offset++); + for (TableFunction* fn : _fns) { + RETURN_IF_ERROR(fn->process(_cur_child_tuple_row)); + } + + _child_batch_exhausted = false; + return Status::OK(); +} + +// Returns the index of fn of the last eos counted from back to front +// eg: there are 3 functions in `_fns` +// eos: false, true, true +// return: 1 +// +// eos: false, false, true +// return: 2 +// +// eos: false, false, false +// return: -1 +// +// eos: true, true, true +// return: 0 +// +// return: +// 0: all fns are eos +// -1: all fns are not eos +// >0: some of fns are eos +int TableFunctionNode::_find_last_fn_eos_idx() { + for (int i = _fn_num - 1; i >=0; --i) { + if (!_fns[i]->eos()) { + if (i == _fn_num - 1) { + return -1; + } else { + return i + 1; + } + } + } + // all eos + return 0; +} + +// Roll to reset the table function. +// Eg: +// There are 3 functions f1, f2 and f3 in `_fns`. +// If `last_eos_idx` is 1, which means f2 and f3 are eos. +// So we need to forward f1, and reset f2 and f3. +bool TableFunctionNode::_roll_table_functions(int last_eos_idx) { + bool fn_eos = false; + int i = last_eos_idx - 1; + for (; i >= 0; --i) { + _fns[i]->forward(&fn_eos); + if (!fn_eos) { + break; + } + } + if (i == -1) { + // after forward, all functions are eos. + // we should process next child row to get more table function results. + return false; + } + + for (int j = i + 1; j < _fn_num; ++j) { + _fns[j]->reset(); + } + + return true; +} + +Status TableFunctionNode::get_next(RuntimeState* state, RowBatch* row_batch, bool* eos) { + RETURN_IF_ERROR(exec_debug_action(TExecNodePhase::GETNEXT)); + SCOPED_TIMER(_runtime_profile->total_time_counter()); + + const RowDescriptor& parent_rowdesc = row_batch->row_desc(); + const RowDescriptor& child_rowdesc = _children[0]->row_desc(); + if (_parent_tuple_desc_size == -1) { + _parent_tuple_desc_size = parent_rowdesc.tuple_descriptors().size(); + _child_tuple_desc_size = child_rowdesc.tuple_descriptors().size(); + for (int i = 0; i < _child_tuple_desc_size; ++i) { + _child_slot_sizes.push_back(child_rowdesc.tuple_descriptors()[i]->slots().size()); + } + } + + uint8_t* tuple_buffer = nullptr; + Tuple* tuple_ptr = nullptr; + Tuple* pre_tuple_ptr = nullptr; + int row_idx = 0; + + while (true) { + RETURN_IF_CANCELLED(state); + RETURN_IF_ERROR(state->check_query_state("TableFunctionNode, while getting next batch.")); + + if (_cur_child_batch == nullptr) { + _cur_child_batch.reset(new RowBatch(child_rowdesc, state->batch_size(), mem_tracker().get())); + } + if (_child_batch_exhausted) { + // current child batch is exhausted, get next batch from child + RETURN_IF_ERROR(_children[0]->get_next(state, _cur_child_batch.get(), eos)); + if (*eos) { + break; + } + _cur_child_offset = 0; + RETURN_IF_ERROR(_process_next_child_row()); + if (_child_batch_exhausted) { + _cur_child_batch->reset(); + continue; + } + } + + while (true) { + int idx = _find_last_fn_eos_idx(); + if (idx == 0) { + // all table functions' results are exhausted, process next child row + RETURN_IF_ERROR(_process_next_child_row()); + if (_child_batch_exhausted) { + break; + } + } else if (idx < _fn_num && idx != -1) { + // some of table functions' results are exhausted + if (!_roll_table_functions(idx)) { + // continue to process next child row + continue; + } + } + + // get slots from every table function + // Notice that _fn_values[i] may be null if the table function has empty result set. + for (int i = 0; i < _fn_num; i++) { + RETURN_IF_ERROR(_fns[i]->get_value(&_fn_values[i])); + } + + // allocate memory for row batch for the first time + if (tuple_buffer == nullptr) { + int64_t tuple_buffer_size; + RETURN_IF_ERROR( + row_batch->resize_and_allocate_tuple_buffer(state, &tuple_buffer_size, &tuple_buffer)); + tuple_ptr = reinterpret_cast(tuple_buffer); + } + + pre_tuple_ptr = tuple_ptr; + // The tuples order in parent row batch should be + // child1, child2, tf1, tf2, ... + TupleRow* parent_tuple_row = row_batch->get_row(row_idx++); + // 1. copy child tuples + int tuple_idx = 0; + for (int i = 0; i < _child_tuple_desc_size; tuple_idx++, i++) { + TupleDescriptor* child_tuple_desc = child_rowdesc.tuple_descriptors()[tuple_idx]; + TupleDescriptor* parent_tuple_desc = parent_rowdesc.tuple_descriptors()[tuple_idx]; + + for (int j = 0; j < _child_slot_sizes[i]; ++j) { + SlotDescriptor* child_slot_desc = child_tuple_desc->slots()[j]; + SlotDescriptor* parent_slot_desc = parent_tuple_desc->slots()[j]; + + if (_output_slot_ids[parent_slot_desc->id()]) { + Tuple* child_tuple = _cur_child_tuple_row->get_tuple(child_rowdesc.get_tuple_idx(child_tuple_desc->id())); + void* dest_slot = tuple_ptr->get_slot(parent_slot_desc->tuple_offset()); + RawValue::write(child_tuple->get_slot(child_slot_desc->tuple_offset()), dest_slot, parent_slot_desc->type(), row_batch->tuple_data_pool()); + tuple_ptr->set_not_null(parent_slot_desc->null_indicator_offset()); + } else { + tuple_ptr->set_null(parent_slot_desc->null_indicator_offset()); + } + } + parent_tuple_row->set_tuple(tuple_idx, tuple_ptr); + tuple_ptr = reinterpret_cast(reinterpret_cast(tuple_ptr) + parent_tuple_desc->byte_size()); + } + + // 2. copy funtion result + for (int i = 0; tuple_idx < _parent_tuple_desc_size; tuple_idx++, i++) { + TupleDescriptor* parent_tuple_desc = parent_rowdesc.tuple_descriptors()[tuple_idx]; + SlotDescriptor* parent_slot_desc = parent_tuple_desc->slots()[0]; + void* dest_slot = tuple_ptr->get_slot(parent_slot_desc->tuple_offset()); + // if (_fn_values[i] != nullptr && _output_slot_ids[parent_slot_desc->id()]) { + if (_fn_values[i] != nullptr) { + RawValue::write(_fn_values[i], dest_slot, parent_slot_desc->type(), row_batch->tuple_data_pool()); + tuple_ptr->set_not_null(parent_slot_desc->null_indicator_offset()); + } else { + tuple_ptr->set_null(parent_slot_desc->null_indicator_offset()); + } + parent_tuple_row->set_tuple(tuple_idx, tuple_ptr); + + tuple_ptr = reinterpret_cast(reinterpret_cast(tuple_ptr) + parent_tuple_desc->byte_size()); + } + + // 3. eval conjuncts + if (eval_conjuncts(&_conjunct_ctxs[0], _conjunct_ctxs.size(), parent_tuple_row)) { + row_batch->commit_last_row(); + ++_num_rows_returned; + } else { + tuple_ptr = pre_tuple_ptr; + } + + // Forward after write success. + // Because data in `_fn_values` points to the data saved in functions. + // And `forward` will change the data in functions. + bool tmp; + _fns[_fn_num - 1]->forward(&tmp); + + if (row_batch->at_capacity()) { + *eos = false; + break; + } + } + + if (row_batch->at_capacity()) { + break; + } + + if (_child_batch_exhausted) { + continue; + } + } + if (reached_limit()) { + int num_rows_over = _num_rows_returned - _limit; + row_batch->set_num_rows(row_batch->num_rows() - num_rows_over); + _num_rows_returned -= num_rows_over; + COUNTER_SET(_rows_returned_counter, _num_rows_returned); + *eos = true; + } + + return Status::OK(); +} + +Status TableFunctionNode::close(RuntimeState* state) { + if (is_closed()) { + return Status::OK(); + } + RETURN_IF_ERROR(exec_debug_action(TExecNodePhase::CLOSE)); + Expr::close(_fn_ctxs, state); + return ExecNode::close(state); +} + +}; // namespace doris diff --git a/be/src/exec/table_function_node.h b/be/src/exec/table_function_node.h new file mode 100644 index 00000000000000..f91577e7d35eaf --- /dev/null +++ b/be/src/exec/table_function_node.h @@ -0,0 +1,77 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +#pragma once + +#include "exec/exec_node.h" + +namespace doris { + +class MemPool; +class RowBatch; +class TableFunction; +class TupleRow; + +// TableFunctionNode +class TableFunctionNode : public ExecNode { +public: + TableFunctionNode(ObjectPool* pool, const TPlanNode& tnode, const DescriptorTbl& descs); + ~TableFunctionNode(); + + virtual Status init(const TPlanNode& tnode, RuntimeState* state = nullptr); + virtual Status prepare(RuntimeState* state); + virtual Status open(RuntimeState* state); + virtual Status get_next(RuntimeState* state, RowBatch* row_batch, bool* eos); + virtual Status close(RuntimeState* state); + +private: + + Status _prepare_output_slot_ids(const TPlanNode& tnode); + + // return: + // 0: all fns are eos + // -1: all fns are not eos + // >0: some of fns are eos + int _find_last_fn_eos_idx(); + + Status _process_next_child_row(); + + bool _roll_table_functions(int last_eos_idx); + +private: + + int64_t _cur_child_offset = 0; + TupleRow* _cur_child_tuple_row = nullptr; + std::shared_ptr _cur_child_batch; + // true means current child batch is completely consumed. + // we should get next batch from child node. + bool _child_batch_exhausted = true; + + std::vector _fn_ctxs; + std::vector _fns; + std::vector _fn_values; + int _fn_num = 0; + + // std::unordered_set _output_slot_ids; + std::vector _output_slot_ids; + + int _parent_tuple_desc_size = -1; + int _child_tuple_desc_size = -1; + std::vector _child_slot_sizes; +}; + +}; // namespace doris diff --git a/be/src/exprs/CMakeLists.txt b/be/src/exprs/CMakeLists.txt index 99ebbf401f88ad..5ebf1d1d265af4 100644 --- a/be/src/exprs/CMakeLists.txt +++ b/be/src/exprs/CMakeLists.txt @@ -70,4 +70,11 @@ add_library(Exprs bitmap_function.cpp hll_function.cpp grouping_sets_functions.cpp - topn_function.cpp) + topn_function.cpp + + table_function/explode_split.cpp + table_function/explode_bitmap.cpp + table_function/explode_json_array.cpp + table_function/table_function_factory.cpp + table_function/dummy_table_functions.cpp +) diff --git a/be/src/exprs/scalar_fn_call.cpp b/be/src/exprs/scalar_fn_call.cpp index 2a7fc824dd3ad0..5e1d081f5705b7 100644 --- a/be/src/exprs/scalar_fn_call.cpp +++ b/be/src/exprs/scalar_fn_call.cpp @@ -72,7 +72,6 @@ Status ScalarFnCall::prepare(RuntimeState* state, const RowDescriptor& desc, Exp } _fn_context_index = context->register_func(state, return_type, arg_types, varargs_buffer_size); - // _scalar_fn = OpcodeRegistry::instance()->get_function_ptr(_opcode); Status status = Status::OK(); if (_scalar_fn == nullptr) { if (SymbolsUtil::is_mangled(_fn.scalar_fn.symbol)) { @@ -88,61 +87,11 @@ Status ScalarFnCall::prepare(RuntimeState* state, const RowDescriptor& desc, Exp // ret_type = ColumnType(thrift_to_type(_fn.ret_type)); std::string symbol = SymbolsUtil::mangle_user_function(_fn.scalar_fn.symbol, arg_types, _fn.has_var_args, nullptr); + status = UserFunctionCache::instance()->get_function_ptr( _fn.id, symbol, _fn.hdfs_location, _fn.checksum, &_scalar_fn, &_cache_entry); } } -#if 0 - // If the codegen object hasn't been created yet and we're calling a builtin or native - // UDF with <= 8 non-variadic arguments, we can use the interpreted path and call the - // builtin without codegen. This saves us the overhead of creating the codegen object - // when it's not necessary (i.e., in plan fragments with no codegen-enabled operators). - // In addition, we can never codegen char arguments. - // TODO: codegen for char arguments - if (char_arg || (!state->codegen_created() && num_fixed_args() <= 8 && - (_fn.binary_type == TFunctionBinaryType::BUILTIN || - _fn.binary_type == TFunctionBinaryType::NATIVE))) { - // Builtins with char arguments must still have <= 8 arguments. - // TODO: delete when we have codegen for char arguments - if (char_arg) { - DCHECK(num_fixed_args() <= 8 && _fn.binary_type == TFunctionBinaryType::BUILTIN); - } - Status status = UserFunctionCache::instance()->GetSoFunctionPtr( - _fn.hdfs_location, _fn.scalar_fn.symbol, &_scalar_fn, &cache_entry_); - if (!status.ok()) { - if (_fn.binary_type == TFunctionBinaryType::BUILTIN) { - // Builtins symbols should exist unless there is a version mismatch. - status.SetErrorMsg(ErrorMsg(TErrorCode::MISSING_BUILTIN, - _fn.name.function_name, _fn.scalar_fn.symbol)); - return status; - } else { - DCHECK_EQ(_fn.binary_type, TFunctionBinaryType::NATIVE); - return Status::InternalError(strings::Substitute("Problem loading UDF '$0':\n$1", - _fn.name.function_name, status.GetDetail())); - return status; - } - } - } else { - // If we got here, either codegen is enabled or we need codegen to run this function. - LlvmCodeGen* codegen; - RETURN_IF_ERROR(state->GetCodegen(&codegen)); - - if (_fn.binary_type == TFunctionBinaryType::IR) { - std::string local_path; - RETURN_IF_ERROR(UserFunctionCache::instance()->GetLocalLibPath( - _fn.hdfs_location, UserFunctionCache::TYPE_IR, &local_path)); - // Link the UDF module into this query's main module (essentially copy the UDF - // module into the main module) so the UDF's functions are available in the main - // module. - RETURN_IF_ERROR(codegen->LinkModule(local_path)); - } - - Function* ir_udf_wrapper; - RETURN_IF_ERROR(GetCodegendComputeFn(state, &ir_udf_wrapper)); - // TODO: don't do this for child exprs - codegen->AddFunctionToJit(ir_udf_wrapper, &_scalar_fn_wrapper); - } -#endif if (_fn.scalar_fn.__isset.prepare_fn_symbol) { RETURN_IF_ERROR(get_function(state, _fn.scalar_fn.prepare_fn_symbol, reinterpret_cast(&_prepare_fn))); diff --git a/be/src/exprs/scalar_fn_call.h b/be/src/exprs/scalar_fn_call.h index e94e9d04bcaf4e..cf8f1f7c85cfab 100644 --- a/be/src/exprs/scalar_fn_call.h +++ b/be/src/exprs/scalar_fn_call.h @@ -54,6 +54,11 @@ class ScalarFnCall : public Expr { return pool->add(new ScalarFnCall(*this)); } + // TODO: just for table function. + // It is not good to expose this field to public. + // We should refactor it after implementing real table functions. + int get_fn_context_index() const { return _fn_context_index; } + protected: friend class Expr; diff --git a/be/src/exprs/table_function/dummy_table_functions.cpp b/be/src/exprs/table_function/dummy_table_functions.cpp new file mode 100644 index 00000000000000..bb641001158b71 --- /dev/null +++ b/be/src/exprs/table_function/dummy_table_functions.cpp @@ -0,0 +1,49 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +#include "exprs/table_function/dummy_table_functions.h" + +namespace doris { + +void DummyTableFunctions::init() {} + +StringVal DummyTableFunctions::explode_split(FunctionContext* context, const StringVal& str, + const StringVal& sep) { + return StringVal(); +} + +BigIntVal DummyTableFunctions::explode_bitmap(doris_udf::FunctionContext* context, + const doris_udf::StringVal& bitmap) { + return BigIntVal(); +} + +BigIntVal DummyTableFunctions::explode_json_array_int(doris_udf::FunctionContext* context, + const doris_udf::StringVal& str) { + return BigIntVal(); +} + +DoubleVal DummyTableFunctions::explode_json_array_double(doris_udf::FunctionContext* context, + const doris_udf::StringVal& str) { + return DoubleVal(); +} + +StringVal DummyTableFunctions::explode_json_array_string(doris_udf::FunctionContext* context, + const doris_udf::StringVal& str) { + return StringVal(); +} + +} // namespace doris diff --git a/be/src/exprs/table_function/dummy_table_functions.h b/be/src/exprs/table_function/dummy_table_functions.h new file mode 100644 index 00000000000000..4723a52b9431a7 --- /dev/null +++ b/be/src/exprs/table_function/dummy_table_functions.h @@ -0,0 +1,47 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +#pragma once + +#include "exprs/anyval_util.h" + +namespace doris { + +// Currently Doris does not support array types, so the definition of table function +// is still using the definition of the scalar function. +// The definition here is just to facilitate the query planning stage and the query execution preparation stage +// to make smooth use of the existing function framework +// But the execution logic of the table function is not here. So the function names here are prefixed with "dummy". +// TODO: refactor here after we support real array type. +class DummyTableFunctions { +public: + static void init(); + + static doris_udf::StringVal explode_split(doris_udf::FunctionContext* context, + const doris_udf::StringVal& str, + const doris_udf::StringVal& sep); + static doris_udf::BigIntVal explode_bitmap(doris_udf::FunctionContext* context, + const doris_udf::StringVal& bitmap); + static doris_udf::BigIntVal explode_json_array_int(doris_udf::FunctionContext* context, + const doris_udf::StringVal& str); + static doris_udf::DoubleVal explode_json_array_double(doris_udf::FunctionContext* context, + const doris_udf::StringVal& str); + static doris_udf::StringVal explode_json_array_string(doris_udf::FunctionContext* context, + const doris_udf::StringVal& str); +}; +} // namespace doris + diff --git a/be/src/exprs/table_function/explode_bitmap.cpp b/be/src/exprs/table_function/explode_bitmap.cpp new file mode 100644 index 00000000000000..4749494667a485 --- /dev/null +++ b/be/src/exprs/table_function/explode_bitmap.cpp @@ -0,0 +1,131 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +#include "exprs/table_function/explode_bitmap.h" + +#include "exprs/expr_context.h" +#include "exprs/scalar_fn_call.h" + +namespace doris { + +ExplodeBitmapTableFunction::ExplodeBitmapTableFunction() {} + +ExplodeBitmapTableFunction::~ExplodeBitmapTableFunction() { + if (_cur_iter != nullptr) { + delete _cur_iter; + _cur_iter = nullptr; + } + if (_cur_iter_end != nullptr) { + delete _cur_iter_end; + _cur_iter_end = nullptr; + } + if (_cur_bitmap_owned && _cur_bitmap != nullptr) { + delete _cur_bitmap; + _cur_bitmap = nullptr; + } +} + +Status ExplodeBitmapTableFunction::prepare() { + return Status::OK(); +} + +Status ExplodeBitmapTableFunction::open() { + return Status::OK(); +} + +Status ExplodeBitmapTableFunction::process(TupleRow* tuple_row) { + CHECK(1 == _expr_context->root()->get_num_children()) << _expr_context->root()->get_num_children(); + _eos = false; + _is_current_empty = false; + + StringVal bitmap_str = _expr_context->root()->get_child(0)->get_string_val(_expr_context, tuple_row); + if (bitmap_str.is_null) { + _is_current_empty = true; + } else { + if (bitmap_str.len == 0) { + _cur_bitmap = reinterpret_cast(bitmap_str.ptr); + _cur_bitmap_owned = false; + } else { + _cur_bitmap = new BitmapValue((char*) bitmap_str.ptr); + _cur_bitmap_owned = true; + } + if (_cur_bitmap->cardinality() == 0) { + _is_current_empty = true; + } else { + _reset_iterator(); + } + } + + return Status::OK(); +} + +void ExplodeBitmapTableFunction::_reset_iterator() { + DCHECK(_cur_bitmap->cardinality() > 0) << _cur_bitmap->cardinality(); + if (_cur_iter != nullptr) { + delete _cur_iter; + _cur_iter = nullptr; + } + _cur_iter = new BitmapValueIterator(*_cur_bitmap); + + if (_cur_iter_end != nullptr) { + delete _cur_iter_end; + _cur_iter_end = nullptr; + } + _cur_iter_end = new BitmapValueIterator(*_cur_bitmap, true); + + _cur_value = **_cur_iter; +} + +Status ExplodeBitmapTableFunction::reset() { + _eos = false; + if (!_is_current_empty) { + _reset_iterator(); + } + return Status::OK(); +} + +Status ExplodeBitmapTableFunction::get_value(void** output) { + if (_is_current_empty) { + *output = nullptr; + } else { + *output = &_cur_value; + } + return Status::OK(); +} + +Status ExplodeBitmapTableFunction::close() { + return Status::OK(); +} + +Status ExplodeBitmapTableFunction::forward(bool* eos) { + if (_is_current_empty) { + *eos = true; + _eos = true; + } else { + ++(*_cur_iter); + if (*_cur_iter == *_cur_iter_end) { + *eos = true; + _eos = true; + } else { + _cur_value = **_cur_iter; + *eos = false; + } + } + return Status::OK(); +} + +} // namespace doris diff --git a/be/src/exprs/table_function/explode_bitmap.h b/be/src/exprs/table_function/explode_bitmap.h new file mode 100644 index 00000000000000..818de9c386f3e7 --- /dev/null +++ b/be/src/exprs/table_function/explode_bitmap.h @@ -0,0 +1,57 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +#pragma once + +#include "exprs/table_function/table_function.h" + +#include "util/bitmap_value.h" + +namespace doris { + +class ExplodeBitmapTableFunction : public TableFunction { +public: + ExplodeBitmapTableFunction(); + virtual ~ExplodeBitmapTableFunction(); + + virtual Status prepare() override; + virtual Status open() override; + virtual Status process(TupleRow* tuple_row) override; + virtual Status reset() override; + virtual Status get_value(void** output) override; + virtual Status close() override; + + virtual Status forward(bool* eos) override; + +private: + void _reset_iterator(); + +private: + + // Read from tuple row. + // if _cur_bitmap_owned is true, need to delete it when deconstruction + BitmapValue* _cur_bitmap = nullptr; + bool _cur_bitmap_owned = false; + // iterator of _cur_bitmap + BitmapValueIterator* _cur_iter = nullptr; + BitmapValueIterator* _cur_iter_end = nullptr; + // current value read from bitmap, it will be referenced by + // table function scan node. + uint64_t _cur_value = 0; +}; + +} // namespace doris diff --git a/be/src/exprs/table_function/explode_json_array.cpp b/be/src/exprs/table_function/explode_json_array.cpp new file mode 100644 index 00000000000000..e82c9ecccf32fe --- /dev/null +++ b/be/src/exprs/table_function/explode_json_array.cpp @@ -0,0 +1,202 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +#include "exprs/table_function/explode_json_array.h" + +#include "exprs/expr_context.h" +#include "exprs/scalar_fn_call.h" + +namespace doris { + +std::string ParsedData::true_value = "true"; +std::string ParsedData::false_value = "false"; + +int ParsedData::set_output(ExplodeJsonArrayType type, rapidjson::Document& document) { + int size = document.GetArray().Size(); + switch (type) { + case ExplodeJsonArrayType::INT: { + _data.resize(size); + _backup_int.resize(size); + int i = 0; + for (auto& v : document.GetArray()) { + if (v.IsInt64()) { + _backup_int[i] = v.GetInt64(); + _data[i] = &_backup_int[i]; + } else { + _data[i] = nullptr; + } + ++i; + } + break; + } + case ExplodeJsonArrayType::DOUBLE: { + _data.resize(size); + _backup_double.resize(size); + int i = 0; + for (auto& v : document.GetArray()) { + if (v.IsDouble()) { + _backup_double[i] = v.GetDouble(); + _data[i] = &_backup_double[i]; + } else { + _data[i] = nullptr; + } + ++i; + } + break; + } + case ExplodeJsonArrayType::STRING: { + _data_string.clear(); + _backup_string.clear(); + _string_nulls.clear(); + int32_t wbytes = 0; + int i = 0; + for (auto& v : document.GetArray()) { + switch (v.GetType()) { + case rapidjson::Type::kStringType: + _backup_string.emplace_back(v.GetString(), v.GetStringLength()); + _data_string.emplace_back(_backup_string.back()); + _string_nulls.push_back(false); + break; + case rapidjson::Type::kNumberType: + if (v.IsUint()) { + wbytes = sprintf(tmp_buf, "%u", v.GetUint()); + } else if (v.IsInt()) { + wbytes = sprintf(tmp_buf, "%d", v.GetInt()); + } else if (v.IsUint64()) { + wbytes = sprintf(tmp_buf, "%lu", v.GetUint64()); + } else if (v.IsInt64()) { + wbytes = sprintf(tmp_buf, "%ld", v.GetInt64()); + } else { + wbytes = sprintf(tmp_buf, "%f", v.GetDouble()); + } + _backup_string.emplace_back(tmp_buf, wbytes); + _data_string.emplace_back(_backup_string.back()); + _string_nulls.push_back(false); + break; + case rapidjson::Type::kFalseType: + _data_string.emplace_back(true_value); + _string_nulls.push_back(false); + break; + case rapidjson::Type::kTrueType: + _data_string.emplace_back(false_value); + _string_nulls.push_back(false); + break; + case rapidjson::Type::kNullType: + _data_string.push_back({}); + _string_nulls.push_back(true); + break; + default: + _data_string.push_back({}); + _string_nulls.push_back(true); + break; + } + ++i; + } + break; + } + default: + CHECK(false) << type; + break; + } + return size; +} + +///////////////////////// +ExplodeJsonArrayTableFunction::ExplodeJsonArrayTableFunction(ExplodeJsonArrayType type) + : _type(type) { + +} + +ExplodeJsonArrayTableFunction::~ExplodeJsonArrayTableFunction() { +} + +Status ExplodeJsonArrayTableFunction::prepare() { + return Status::OK(); +} + +Status ExplodeJsonArrayTableFunction::open() { + return Status::OK(); +} + +Status ExplodeJsonArrayTableFunction::process(TupleRow* tuple_row) { + CHECK(1 == _expr_context->root()->get_num_children()) << _expr_context->root()->get_num_children(); + _is_current_empty = false; + _eos = false; + + StringVal text = _expr_context->root()->get_child(0)->get_string_val(_expr_context, tuple_row); + if (text.is_null || text.len == 0) { + // _set_null_output(); + _is_current_empty = true; + } else { + rapidjson::Document document; + document.Parse((char*) text.ptr, text.len); + if (UNLIKELY(document.HasParseError()) || !document.IsArray() || document.GetArray().Size() == 0) { + // _set_null_output(); + _is_current_empty = true; + } else { + _cur_size = _parsed_data.set_output(_type, document); + _cur_offset = 0; + // _eos = _cur_size == 0; + } + } + // _is_current_empty = _eos; + return Status::OK(); +} + +void ExplodeJsonArrayTableFunction::_set_null_output() { + _parsed_data.set_null_output(_type); + _cur_size = 1; + _cur_offset = 0; + _eos = false; +} + +Status ExplodeJsonArrayTableFunction::reset() { + _eos = false; + _cur_offset = 0; + return Status::OK(); +} + +Status ExplodeJsonArrayTableFunction::get_value(void** output) { + if (_is_current_empty) { + *output = nullptr; + } else { + _parsed_data.get_value(_type, _cur_offset, output); + } + return Status::OK(); +} + +Status ExplodeJsonArrayTableFunction::close() { + return Status::OK(); +} + +Status ExplodeJsonArrayTableFunction::forward(bool* eos) { + if (_is_current_empty) { + *eos = true; + _eos = true; + } else { + ++_cur_offset; + if (_cur_offset == _cur_size) { + *eos = true; + _eos = true; + } else { + *eos = false; + } + } + return Status::OK(); +} + +} // namespace doris diff --git a/be/src/exprs/table_function/explode_json_array.h b/be/src/exprs/table_function/explode_json_array.h new file mode 100644 index 00000000000000..3a5589e575ec6a --- /dev/null +++ b/be/src/exprs/table_function/explode_json_array.h @@ -0,0 +1,134 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +#pragma once + +#include "exprs/table_function/table_function.h" + +#include +#include +#include "gutil/strings/stringpiece.h" +#include "runtime/string_value.h" + +namespace doris { + +enum ExplodeJsonArrayType { + INT = 0, + DOUBLE, + STRING +}; + +struct ParsedData { + static std::string true_value; + static std::string false_value; + + // The number parsed from json array + // the `_backup` saved the real number entity. + std::vector _data; + std::vector _data_string; + std::vector _backup_int; + std::vector _backup_double; + std::vector _backup_string; + std::vector _string_nulls; + char tmp_buf[128] = {0}; + + void reset(ExplodeJsonArrayType type) { + switch (type) { + case ExplodeJsonArrayType::INT: + _data.clear(); + _backup_int.clear(); + break; + case ExplodeJsonArrayType::DOUBLE: + _data.clear(); + _backup_double.clear(); + break; + case ExplodeJsonArrayType::STRING: + _data_string.clear(); + _backup_string.clear(); + _string_nulls.clear(); + break; + default: + CHECK(false) << type; + break; + } + } + + void set_null_output(ExplodeJsonArrayType type) { + switch (type) { + case ExplodeJsonArrayType::INT: + case ExplodeJsonArrayType::DOUBLE: + _data.resize(1); + _data[0] = nullptr; + break; + case ExplodeJsonArrayType::STRING: + _string_nulls.resize(1); + _string_nulls[0] = true; + break; + default: + CHECK(false) << type; + break; + } + } + + void get_value(ExplodeJsonArrayType type, int64_t offset, void** output) { + switch(type) { + case ExplodeJsonArrayType::INT: + case ExplodeJsonArrayType::DOUBLE: + *output = _data[offset]; + break; + case ExplodeJsonArrayType::STRING: + // LOG(INFO) << "cmy get_value offset: " << offset << ", is null: " << _string_nulls[offset] << ", data: " << (_string_nulls[offset] ? "null2" : _backup_string[offset]); + *output = _string_nulls[offset] ? nullptr : &_data_string[offset]; + break; + default: + CHECK(false) << type; + } + } + + int set_output(ExplodeJsonArrayType type, rapidjson::Document& document); +}; + +// Input: json array: [1,2,3,4,5]; +// Output: rows: 1,2,3,4,5 +// If json array contains non-numeric type, or is not a json array, will return null +class ExplodeJsonArrayTableFunction : public TableFunction { +public: + ExplodeJsonArrayTableFunction(ExplodeJsonArrayType type); + virtual ~ExplodeJsonArrayTableFunction(); + + virtual Status prepare() override; + virtual Status open() override; + virtual Status process(TupleRow* tuple_row) override; + virtual Status reset() override; + virtual Status get_value(void** output) override; + virtual Status close() override; + virtual Status forward(bool* eos) override; + +private: + void _set_null_output(); + +private: + ParsedData _parsed_data; + + ExplodeJsonArrayType _type; + // the size of _data + int64_t _cur_size = 0; + // current consumed offset of _data + int64_t _cur_offset = 0; +}; + +} // namespace doris diff --git a/be/src/exprs/table_function/explode_split.cpp b/be/src/exprs/table_function/explode_split.cpp new file mode 100644 index 00000000000000..03c53347f6660c --- /dev/null +++ b/be/src/exprs/table_function/explode_split.cpp @@ -0,0 +1,115 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +#include "exprs/table_function/explode_split.h" + +#include "exprs/expr_context.h" +#include "exprs/scalar_fn_call.h" +#include "gutil/strings/split.h" + +namespace doris { + +ExplodeSplitTableFunction::ExplodeSplitTableFunction() { +} + +ExplodeSplitTableFunction::~ExplodeSplitTableFunction() { +} + +Status ExplodeSplitTableFunction::prepare() { + return Status::OK(); +} + +Status ExplodeSplitTableFunction::open() { + ScalarFnCall* fn_call = reinterpret_cast(_expr_context->root()); + FunctionContext* fn_ctx = _expr_context->fn_context(fn_call->get_fn_context_index()); + CHECK(2 == fn_ctx->get_num_constant_args()) << fn_ctx->get_num_constant_args(); + // check if the delimiter argument(the 2nd arg) is constant. + // if yes, cache it + if (fn_ctx->is_arg_constant(1)) { + _is_delimiter_constant = true; + StringVal* delimiter = reinterpret_cast(fn_ctx->get_constant_arg(1)); + _const_delimter = StringPiece((char*) delimiter->ptr, delimiter->len); + } + return Status::OK(); +} + +Status ExplodeSplitTableFunction::process(TupleRow* tuple_row) { + CHECK(2 == _expr_context->root()->get_num_children()) << _expr_context->root()->get_num_children(); + _is_current_empty = false; + _eos = false; + + _data.clear(); + StringVal text = _expr_context->root()->get_child(0)->get_string_val(_expr_context, tuple_row); + if (text.is_null || text.len == 0) { + _is_current_empty = true; + _cur_size = 0; + _cur_offset = 0; + } else { + if (_is_delimiter_constant) { + _backup = strings::Split(StringPiece((char*) text.ptr, text.len), _const_delimter); + } else { + StringVal delimiter = _expr_context->root()->get_child(1)->get_string_val(_expr_context, tuple_row); + _backup = strings::Split(StringPiece((char*) text.ptr, text.len), StringPiece((char*) delimiter.ptr, delimiter.len)); + } + for (const std::string str : _backup) { + _data.emplace_back(str); + } + _cur_size = _backup.size(); + _cur_offset = 0; + _is_current_empty = (_cur_size == 0); + } + return Status::OK(); +} + +Status ExplodeSplitTableFunction::reset() { + _eos = false; + if (!_is_current_empty) { + _cur_offset = 0; + } + return Status::OK(); +} + +Status ExplodeSplitTableFunction::get_value(void** output) { + if (_is_current_empty) { + *output = nullptr; + } else { + *output = &_data[_cur_offset]; + } + return Status::OK(); +} + +Status ExplodeSplitTableFunction::close() { + return Status::OK(); +} + +Status ExplodeSplitTableFunction::forward(bool* eos) { + if (_is_current_empty) { + *eos = true; + _eos = true; + } else { + ++_cur_offset; + if (_cur_offset == _cur_size) { + *eos = true; + _eos = true; + } else { + *eos = false; + } + } + return Status::OK(); +} + +} // namespace doris diff --git a/be/src/exprs/table_function/explode_split.h b/be/src/exprs/table_function/explode_split.h new file mode 100644 index 00000000000000..f62c0cfbc60cc2 --- /dev/null +++ b/be/src/exprs/table_function/explode_split.h @@ -0,0 +1,60 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +#pragma once + +#include "exprs/table_function/table_function.h" + +#include "gutil/strings/stringpiece.h" +#include "runtime/string_value.h" + +namespace doris { + +class ExplodeSplitTableFunction : public TableFunction { +public: + ExplodeSplitTableFunction(); + virtual ~ExplodeSplitTableFunction(); + + virtual Status prepare() override; + virtual Status open() override; + virtual Status process(TupleRow* tuple_row) override; + virtual Status reset() override; + virtual Status get_value(void** output) override; + virtual Status close() override; + + virtual Status forward(bool* eos) override; + +private: + + // The string value splitted from source, and will be referenced by + // table function scan node. + // the `_backup` saved the real string entity. + std::vector _data; + std::vector _backup; + // the size of _data + int64_t _cur_size = 0; + // current consumed offset of _data + int64_t _cur_offset = 0; + + // indicate whether the delimiter is constant. + // if true, the constant delimiter will be saved in `_const_delimter` + bool _is_delimiter_constant = false; + StringPiece _const_delimter; + +}; + +} // namespace doris diff --git a/be/src/exprs/table_function/table_function.h b/be/src/exprs/table_function/table_function.h new file mode 100644 index 00000000000000..55b9a52cc57690 --- /dev/null +++ b/be/src/exprs/table_function/table_function.h @@ -0,0 +1,61 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +#pragma once + +#include "common/status.h" + +namespace doris { + +// TODO: think about how to manager memeory consumption of table functions. +// Currently, the memory allocated from table function is from malloc directly. +class TableFunctionState { +}; + +class ExprContext; +class TupleRow; +class TableFunction { +public: + virtual ~TableFunction() {} + + virtual Status prepare() = 0; + virtual Status open() = 0; + virtual Status process(TupleRow* tuple_row) = 0; + virtual Status reset() = 0; + virtual Status get_value(void** output) = 0; + virtual Status close() = 0; + + virtual Status forward(bool *eos) = 0; + +public: + bool eos() const { return _eos; } + + void set_expr_context(ExprContext* expr_context) { + _expr_context = expr_context; + } + +protected: + std::string _fn_name; + ExprContext* _expr_context; + // true if there is no more data can be read from this function. + bool _eos = false; + // true means the function result set from current row is empty(eg, source value is null or empty). + // so that when calling reset(), we can do nothing and keep eos as true. + bool _is_current_empty = false; +}; + +} // namespace doris diff --git a/be/src/exprs/table_function/table_function_factory.cpp b/be/src/exprs/table_function/table_function_factory.cpp new file mode 100644 index 00000000000000..fc6ead5ec3c428 --- /dev/null +++ b/be/src/exprs/table_function/table_function_factory.cpp @@ -0,0 +1,48 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +#include "exprs/table_function/table_function_factory.h" + +#include "common/object_pool.h" +#include "exprs/table_function/explode_bitmap.h" +#include "exprs/table_function/explode_json_array.h" +#include "exprs/table_function/explode_split.h" + +namespace doris { + +Status TableFunctionFactory::get_fn(const std::string& fn_name, ObjectPool* pool, TableFunction** fn) { + if (fn_name == "explode_split") { + *fn = pool->add(new ExplodeSplitTableFunction()); + return Status::OK(); + } else if (fn_name == "explode_bitmap") { + *fn = pool->add(new ExplodeBitmapTableFunction()); + return Status::OK(); + } else if (fn_name == "explode_json_array_int") { + *fn = pool->add(new ExplodeJsonArrayTableFunction(ExplodeJsonArrayType::INT)); + return Status::OK(); + } else if (fn_name == "explode_json_array_double") { + *fn = pool->add(new ExplodeJsonArrayTableFunction(ExplodeJsonArrayType::DOUBLE)); + return Status::OK(); + } else if (fn_name == "explode_json_array_string") { + *fn = pool->add(new ExplodeJsonArrayTableFunction(ExplodeJsonArrayType::STRING)); + return Status::OK(); + } else { + return Status::NotSupported("Unknown table function: " + fn_name); + } +} + +} // namespace doris diff --git a/be/src/exprs/table_function/table_function_factory.h b/be/src/exprs/table_function/table_function_factory.h new file mode 100644 index 00000000000000..ca5bd05e35ac29 --- /dev/null +++ b/be/src/exprs/table_function/table_function_factory.h @@ -0,0 +1,36 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +#pragma once + +#include "exprs/table_function/table_function_factory.h" +#include "exprs/table_function/explode_split.h" + +#include "common/status.h" + +namespace doris { + +class ObjectPool; +class TableFunction; +class TableFunctionFactory { +public: + TableFunctionFactory() {} + ~TableFunctionFactory() {} + static Status get_fn(const std::string& fn_name, ObjectPool* pool, TableFunction** fn); +}; + +} // namespace doris diff --git a/be/src/runtime/descriptors.h b/be/src/runtime/descriptors.h index e201d8481d8c01..d668d424ffd69d 100644 --- a/be/src/runtime/descriptors.h +++ b/be/src/runtime/descriptors.h @@ -108,7 +108,6 @@ class SlotDescriptor { friend class TupleDescriptor; friend class SchemaScanner; friend class OlapTableSchemaParam; - friend class TupleDescriptor; const SlotId _id; const TypeDescriptor _type; diff --git a/be/src/util/bitmap_value.h b/be/src/util/bitmap_value.h index a3c54e0c4c71fb..7259685c02cd48 100644 --- a/be/src/util/bitmap_value.h +++ b/be/src/util/bitmap_value.h @@ -1134,6 +1134,7 @@ inline Roaring64MapSetBitForwardIterator Roaring64Map::end() const { // Represent the in-memory and on-disk structure of Doris's BITMAP data type. // Optimize for the case where the bitmap contains 0 or 1 element which is common // for streaming load scenario. +class BitmapValueIterator; class BitmapValue { public: // Construct an empty bitmap. @@ -1692,6 +1693,13 @@ class BitmapValue { return count; } + // Implement an iterator for convenience + friend class BitmapValueIterator; + typedef BitmapValueIterator b_iterator; + + b_iterator begin() const; + b_iterator end() const; + private: void _convert_to_smaller_type() { if (_type == BITMAP) { @@ -1717,6 +1725,128 @@ class BitmapValue { BitmapDataType _type; }; +// A simple implement of bitmap value iterator(Read only) +// Usage: +// BitmapValueIterator iter = bitmap_value.begin(); +// BitmapValueIterator end = bitmap_value.end(); +// for (; iter != end(); ++iter) { +// uint64_t v = *iter; +// ... do something with "v" ... +// } +class BitmapValueIterator { +public: + BitmapValueIterator() + : _bitmap(BitmapValue()) { + } + + BitmapValueIterator(const BitmapValue& bitmap, bool end = false) + : _bitmap(bitmap), _end(end) { + + switch (_bitmap._type) { + case BitmapValue::BitmapDataType::EMPTY: + _end = true; + break; + case BitmapValue::BitmapDataType::SINGLE: + _sv = _bitmap._sv; + break; + case BitmapValue::BitmapDataType::BITMAP: + _iter = new detail::Roaring64MapSetBitForwardIterator(_bitmap._bitmap, _end); + break; + default: + CHECK(false) << _bitmap._type; + } + } + + BitmapValueIterator(const BitmapValueIterator& other) + : _bitmap(other._bitmap), _iter(other._iter), _sv(other._sv), _end(other._end) { + } + + ~BitmapValueIterator() { + if (_iter != nullptr) { + delete _iter; + _iter = nullptr; + } + } + + uint64_t operator*() const { + CHECK(!_end) << "should not get value of end iterator"; + switch (_bitmap._type) { + case BitmapValue::BitmapDataType::SINGLE: + return _sv; + case BitmapValue::BitmapDataType::BITMAP: + return *(*_iter); + default: + CHECK(false) << _bitmap._type; + } + return 0; + } + + BitmapValueIterator& operator++() { // ++i, must returned inc. value + CHECK(!_end) << "should not forward when iterator ends"; + switch (_bitmap._type) { + case BitmapValue::BitmapDataType::SINGLE: + _end = true; + break; + case BitmapValue::BitmapDataType::BITMAP: + ++(*_iter); + break; + default: + CHECK(false) << _bitmap._type; + } + return *this; + } + + BitmapValueIterator operator++(int) { // i++, must return orig. value + CHECK(!_end) << "should not forward when iterator ends"; + BitmapValueIterator orig(*this); + switch (_bitmap._type) { + case BitmapValue::BitmapDataType::SINGLE: + _end = true; + break; + case BitmapValue::BitmapDataType::BITMAP: + ++(*_iter); + break; + default: + CHECK(false) << _bitmap._type; + } + return orig; + } + + bool operator==(const BitmapValueIterator& other) const { + if (_end && other._end) return true; + + switch (_bitmap._type) { + case BitmapValue::BitmapDataType::EMPTY: + return other._bitmap._type == BitmapValue::BitmapDataType::EMPTY; + case BitmapValue::BitmapDataType::SINGLE: + return _sv == other._sv; + case BitmapValue::BitmapDataType::BITMAP: + return *_iter == *(other._iter); + default: + CHECK(false) << _bitmap._type; + } + return false; + } + + bool operator!=(const BitmapValueIterator& other) const { + return !(*this == other); + } + +private: + const BitmapValue& _bitmap; + detail::Roaring64MapSetBitForwardIterator* _iter = nullptr; + uint64_t _sv = 0; + bool _end = false; +}; + +inline BitmapValueIterator BitmapValue::begin() const { + return BitmapValueIterator(*this); +} + +inline BitmapValueIterator BitmapValue::end() const { + return BitmapValueIterator(*this, true); +} + } // namespace doris #endif //DORIS_BE_SRC_UTIL_BITMAP_VALUE_H diff --git a/be/test/util/bitmap_value_test.cpp b/be/test/util/bitmap_value_test.cpp index 997320510c0676..cb2083d77db49c 100644 --- a/be/test/util/bitmap_value_test.cpp +++ b/be/test/util/bitmap_value_test.cpp @@ -346,6 +346,43 @@ TEST(BitmapValueTest, bitmap_single_convert) { bitmap |= bitmap_u; ASSERT_EQ(BitmapValue::BITMAP, bitmap._type); } + +TEST(BitmapValueTest, bitmap_value_iterator_test) { + BitmapValue empty; + for (auto iter = empty.begin(); iter != empty.end(); ++iter) { + // should not goes here + ASSERT_TRUE(false); + } + + BitmapValue single(1024); + for (auto iter = single.begin(); iter != single.end(); ++iter) { + ASSERT_EQ(1024, *iter); + } + + int i = 0; + BitmapValue bitmap({0, 1025, 1026, UINT32_MAX, UINT64_MAX}); + for (auto iter = bitmap.begin(); iter != bitmap.end(); ++iter, ++i) { + switch (i) { + case 0: + ASSERT_EQ(0, *iter); + break; + case 1: + ASSERT_EQ(1025, *iter); + break; + case 2: + ASSERT_EQ(1026, *iter); + break; + case 3: + ASSERT_EQ(UINT32_MAX, *iter); + break; + case 4: + ASSERT_EQ(UINT64_MAX, *iter); + break; + default: + ASSERT_TRUE(false); + } + } +} } // namespace doris int main(int argc, char** argv) { diff --git a/fe/fe-core/src/main/java/org/apache/doris/analysis/FunctionCallExpr.java b/fe/fe-core/src/main/java/org/apache/doris/analysis/FunctionCallExpr.java index 76a81c68de7a0b..34fd3ed0c23a0e 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/analysis/FunctionCallExpr.java +++ b/fe/fe-core/src/main/java/org/apache/doris/analysis/FunctionCallExpr.java @@ -688,7 +688,7 @@ public void analyzeImpl(Analyzer analyzer) throws AnalysisException { fn = getTableFunction(fnName.getFunction(), childTypes, Function.CompareMode.IS_NONSTRICT_SUPERTYPE_OF); if (fn == null) { - throw new AnalysisException("Doris only support `explode_split(varchar, varchar)` table function"); + throw new AnalysisException("Current only support `explode_split`, `explode_bitmap()` and `explode_json_array_xx` table functions"); } return; } diff --git a/fe/fe-core/src/main/java/org/apache/doris/analysis/LateralViewRef.java b/fe/fe-core/src/main/java/org/apache/doris/analysis/LateralViewRef.java index 41baabca4f361f..c22c8431835abd 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/analysis/LateralViewRef.java +++ b/fe/fe-core/src/main/java/org/apache/doris/analysis/LateralViewRef.java @@ -17,10 +17,10 @@ package org.apache.doris.analysis; +import com.google.common.base.Preconditions; +import com.google.common.collect.Lists; import org.apache.doris.catalog.Column; -import org.apache.doris.catalog.FunctionSet; import org.apache.doris.catalog.InlineView; -import org.apache.doris.catalog.Type; import org.apache.doris.common.AnalysisException; import org.apache.doris.common.Config; import org.apache.doris.common.UserException; @@ -77,32 +77,31 @@ public void analyze(Analyzer analyzer) throws UserException { if (!(expr instanceof FunctionCallExpr)) { throw new AnalysisException("Only support function call expr in lateral view"); } + + analyzeFunctionExpr(analyzer); + + // analyze lateral view + desc = analyzer.registerTableRef(this); + explodeSlotRef = new SlotRef(new TableName(null, viewName), columnName); + explodeSlotRef.analyze(analyzer); + isAnalyzed = true; // true now that we have assigned desc + } + + private void analyzeFunctionExpr(Analyzer analyzer) throws AnalysisException { fnExpr = (FunctionCallExpr) expr; fnExpr.setTableFnCall(true); checkAndSupplyDefaultTableName(fnExpr); fnExpr.analyze(analyzer); - if (!fnExpr.getFnName().getFunction().equals(FunctionSet.EXPLODE_SPLIT)) { - throw new AnalysisException("Only support explode function in lateral view"); - } checkScalarFunction(fnExpr.getChild(0)); - if (!(fnExpr.getChild(1) instanceof StringLiteral)) { - throw new AnalysisException("Split separator of explode must be a string const"); - } fnExpr.getChild(0).collect(SlotRef.class, originSlotRefList); - // analyze lateral view - desc = analyzer.registerTableRef(this); - explodeSlotRef = new SlotRef(new TableName(null, viewName), columnName); - explodeSlotRef.analyze(analyzer); - isAnalyzed = true; // true now that we have assigned desc } @Override public TupleDescriptor createTupleDescriptor(Analyzer analyzer) throws AnalysisException { // Create a fake catalog table for the lateral view List columnList = Lists.newArrayList(); - columnList.add(new Column(columnName, Type.VARCHAR, - false, null, true, - null, "")); + columnList.add(new Column(columnName, fnExpr.getFn().getReturnType(), + false, null, true, null, "")); view = new InlineView(viewName, columnList); // Create the non-materialized tuple and set the fake table in it. @@ -180,3 +179,4 @@ private void checkScalarFunction(Expr child0) throws AnalysisException { } } } + diff --git a/fe/fe-core/src/main/java/org/apache/doris/catalog/FunctionSet.java b/fe/fe-core/src/main/java/org/apache/doris/catalog/FunctionSet.java index c74da9fe4dbd01..03327cec58e353 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/catalog/FunctionSet.java +++ b/fe/fe-core/src/main/java/org/apache/doris/catalog/FunctionSet.java @@ -2140,19 +2140,51 @@ public List getBulitinFunctions() { return builtinFunctions; } - public static final String EXPLODE_SPLIT = "explode_split"; + public static final String EXPLODE_BITMAP = "explode_bitmap"; + public static final String EXPLODE_JSON_ARRAY_INT = "explode_json_array_int"; + public static final String EXPLODE_JSON_ARRAY_DOUBLE = "explode_json_array_double"; + public static final String EXPLODE_JSON_ARRAY_STRING = "explode_json_array_string"; private void initTableFunction() { - // init explode_split function - ArrayList argsType = Lists.newArrayList(); - argsType.add(Type.VARCHAR); - argsType.add(Type.VARCHAR); - Function explodeSplit = ScalarFunction.createBuiltin( - EXPLODE_SPLIT, Type.VARCHAR, Function.NullableMode.DEPEND_ON_ARGUMENT, argsType, false, - "", "", "", true); List explodeSplits = Lists.newArrayList(); - explodeSplits.add(explodeSplit); + explodeSplits.add(ScalarFunction.createBuiltin( + EXPLODE_SPLIT, Type.VARCHAR, Function.NullableMode.DEPEND_ON_ARGUMENT, + Lists.newArrayList(Type.VARCHAR, Type.VARCHAR), false, + "_ZN5doris19DummyTableFunctions13explode_splitEPN9doris_udf15FunctionContextERKNS1_9StringValES6_", + null, null, true)); tableFunctions.put(EXPLODE_SPLIT, explodeSplits); + + List explodeBitmaps = Lists.newArrayList(); + explodeBitmaps.add(ScalarFunction.createBuiltin( + EXPLODE_BITMAP, Type.BIGINT, Function.NullableMode.DEPEND_ON_ARGUMENT, + Lists.newArrayList(Type.BITMAP), false, + "_ZN5doris19DummyTableFunctions14explode_bitmapEPN9doris_udf15FunctionContextERKNS1_9StringValE", + null, null, true)); + tableFunctions.put(EXPLODE_BITMAP, explodeBitmaps); + + List explodeJsonArrayInts = Lists.newArrayList(); + explodeJsonArrayInts.add(ScalarFunction.createBuiltin( + EXPLODE_JSON_ARRAY_INT, Type.BIGINT, Function.NullableMode.DEPEND_ON_ARGUMENT, + Lists.newArrayList(Type.VARCHAR), false, + "_ZN5doris19DummyTableFunctions22explode_json_array_intEPN9doris_udf15FunctionContextERKNS1_9StringValE", + null, null, true)); + tableFunctions.put(EXPLODE_JSON_ARRAY_INT, explodeJsonArrayInts); + + List explodeJsonArrayDoubles = Lists.newArrayList(); + explodeJsonArrayDoubles.add(ScalarFunction.createBuiltin( + EXPLODE_JSON_ARRAY_DOUBLE, Type.DOUBLE, Function.NullableMode.DEPEND_ON_ARGUMENT, + Lists.newArrayList(Type.VARCHAR), false, + "_ZN5doris19DummyTableFunctions25explode_json_array_doubleEPN9doris_udf15FunctionContextERKNS1_9StringValE", + null, null, true)); + tableFunctions.put(EXPLODE_JSON_ARRAY_DOUBLE, explodeJsonArrayDoubles); + + List explodeJsonArrayStrings = Lists.newArrayList(); + explodeJsonArrayStrings.add(ScalarFunction.createBuiltin( + EXPLODE_JSON_ARRAY_STRING, Type.VARCHAR, Function.NullableMode.DEPEND_ON_ARGUMENT, + Lists.newArrayList(Type.VARCHAR), false, + "_ZN5doris19DummyTableFunctions25explode_json_array_stringEPN9doris_udf15FunctionContextERKNS1_9StringValE", + null, null, true)); + tableFunctions.put(EXPLODE_JSON_ARRAY_STRING, explodeJsonArrayStrings); } } diff --git a/fe/fe-core/src/main/java/org/apache/doris/planner/DistributedPlanner.java b/fe/fe-core/src/main/java/org/apache/doris/planner/DistributedPlanner.java index a77a3ceb981cb3..4e02aa8068a086 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/planner/DistributedPlanner.java +++ b/fe/fe-core/src/main/java/org/apache/doris/planner/DistributedPlanner.java @@ -293,6 +293,7 @@ private PlanFragment createScanFragment(PlanNode node) throws UserException { private PlanFragment createTableFunctionFragment(PlanNode node, PlanFragment childFragment) { Preconditions.checkState(node instanceof TableFunctionNode); node.setChild(0, childFragment.getPlanRoot()); + node.setNumInstances(childFragment.getPlanRoot().getNumInstances()); childFragment.addPlanRoot(node); return childFragment; } diff --git a/fe/fe-core/src/main/java/org/apache/doris/qe/Coordinator.java b/fe/fe-core/src/main/java/org/apache/doris/qe/Coordinator.java index 5e721897495599..a792231f8fa72b 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/qe/Coordinator.java +++ b/fe/fe-core/src/main/java/org/apache/doris/qe/Coordinator.java @@ -573,6 +573,7 @@ private void sendFragment() throws TException, RpcException, UserException { LOG.warn("catch a execute exception", e); exception = e; code = TStatusCode.THRIFT_RPC_ERROR; + BackendServiceProxy.getInstance().removeProxy(pair.first.brpcAddress); } catch (InterruptedException e) { LOG.warn("catch a interrupt exception", e); exception = e; @@ -2229,3 +2230,4 @@ public FRuntimeFilterTargetParam(TUniqueId id, TNetworkAddress host) { } + From 956a2bf54b607f67a4b1f590e0c1d107bdbdc0e7 Mon Sep 17 00:00:00 2001 From: morningman Date: Tue, 30 Nov 2021 17:28:10 +0800 Subject: [PATCH 02/11] add docs --- docs/.vuepress/sidebar/en.js | 10 + docs/.vuepress/sidebar/zh-CN.js | 10 + .../table-functions/explode-bitmap.md | 157 ++++++++++ .../table-functions/explode-json-array.md | 286 ++++++++++++++++++ .../table-functions/explode-split.md | 112 +++++++ .../Data Manipulation/Lateral-View.md | 87 ++++++ .../table-functions/explode-bitmap.md | 157 ++++++++++ .../table-functions/explode-json-array.md | 286 ++++++++++++++++++ .../table-functions/explode-split.md | 112 +++++++ .../Data Manipulation/Lateral-View.md | 86 ++++++ 10 files changed, 1303 insertions(+) create mode 100644 docs/en/sql-reference/sql-functions/table-functions/explode-bitmap.md create mode 100644 docs/en/sql-reference/sql-functions/table-functions/explode-json-array.md create mode 100644 docs/en/sql-reference/sql-functions/table-functions/explode-split.md create mode 100644 docs/en/sql-reference/sql-statements/Data Manipulation/Lateral-View.md create mode 100644 docs/zh-CN/sql-reference/sql-functions/table-functions/explode-bitmap.md create mode 100644 docs/zh-CN/sql-reference/sql-functions/table-functions/explode-json-array.md create mode 100644 docs/zh-CN/sql-reference/sql-functions/table-functions/explode-split.md create mode 100644 docs/zh-CN/sql-reference/sql-statements/Data Manipulation/Lateral-View.md diff --git a/docs/.vuepress/sidebar/en.js b/docs/.vuepress/sidebar/en.js index a7721529b5c2b4..ca05fbe30e7cb4 100644 --- a/docs/.vuepress/sidebar/en.js +++ b/docs/.vuepress/sidebar/en.js @@ -435,6 +435,15 @@ module.exports = [ directoryPath: "hash-functions/", children: ["murmur_hash3_32"], }, + { + title: "table functions", + directoryPath: "table-functions/", + children: [ + "explode-bitmap", + "explode-split", + "explode-json-array" + ], + }, "window-function", "cast", "digital-masking", @@ -585,6 +594,7 @@ module.exports = [ "alter-routine-load", "insert", "UPDATE", + "lateral-view", ], }, { diff --git a/docs/.vuepress/sidebar/zh-CN.js b/docs/.vuepress/sidebar/zh-CN.js index 46f215408b45ed..023ab5d400c0b4 100644 --- a/docs/.vuepress/sidebar/zh-CN.js +++ b/docs/.vuepress/sidebar/zh-CN.js @@ -439,6 +439,15 @@ module.exports = [ directoryPath: "hash-functions/", children: ["murmur_hash3_32"], }, + { + title: "table functions", + directoryPath: "table-functions/", + children: [ + "explode-bitmap", + "explode-split", + "explode-json-array" + ], + }, "window-function", "cast", "digital-masking", @@ -588,6 +597,7 @@ module.exports = [ "alter-routine-load", "insert", "UPDATE", + "lateral-view", ], }, { diff --git a/docs/en/sql-reference/sql-functions/table-functions/explode-bitmap.md b/docs/en/sql-reference/sql-functions/table-functions/explode-bitmap.md new file mode 100644 index 00000000000000..8863928b4794d2 --- /dev/null +++ b/docs/en/sql-reference/sql-functions/table-functions/explode-bitmap.md @@ -0,0 +1,157 @@ +--- +{ + "title": "explode_bitmap", + "language": "en" +} +--- + + + +# explode_bitmap + +## description + +Table functions must be used in conjunction with Lateral View. + +Expand a bitmap type. + +grammar: + +``` +explode_bitmap(bitmap) +``` + +## example + +Original table data: + +``` +mysql> select k1 from example1 order by k1; ++------+ +| k1 | ++------+ +| 1 | +| 2 | +| 3 | +| 4 | +| 5 | +| 6 | ++------+ +``` + +Lateral View: + +``` +mysql> select k1, e1 from example1 lateral view explode_bitmap(bitmap_empty()) tmp1 as e1 order by k1, e1; ++------+------+ +| k1 | e1 | ++------+------+ +| 1 | NULL | +| 2 | NULL | +| 3 | NULL | +| 4 | NULL | +| 5 | NULL | +| 6 | NULL | ++------+------+ + +mysql> select k1, e1 from example1 lateral view explode_bitmap(bitmap_from_string("1")) tmp1 as e1 order by k1, e1; ++------+------+ +| k1 | e1 | ++------+------+ +| 1 | 1 | +| 2 | 1 | +| 3 | 1 | +| 4 | 1 | +| 5 | 1 | +| 6 | 1 | ++------+------+ + +mysql> select k1, e1 from example1 lateral view explode_bitmap(bitmap_from_string("1,2")) tmp1 as e1 order by k1, e1; ++------+------+ +| k1 | e1 | ++------+------+ +| 1 | 1 | +| 1 | 2 | +| 2 | 1 | +| 2 | 2 | +| 3 | 1 | +| 3 | 2 | +| 4 | 1 | +| 4 | 2 | +| 5 | 1 | +| 5 | 2 | +| 6 | 1 | +| 6 | 2 | ++------+------+ + +mysql> select k1, e1 from example1 lateral view explode_bitmap(bitmap_from_string("1,1000")) tmp1 as e1 order by k1, e1; ++------+------+ +| k1 | e1 | ++------+------+ +| 1 | 1 | +| 1 | 1000 | +| 2 | 1 | +| 2 | 1000 | +| 3 | 1 | +| 3 | 1000 | +| 4 | 1 | +| 4 | 1000 | +| 5 | 1 | +| 5 | 1000 | +| 6 | 1 | +| 6 | 1000 | ++------+------+ + +mysql> select k1, e1, e2 from example1 +lateral view explode_bitmap(bitmap_from_string("1,1000")) tmp1 as e1 +lateral view explode_split("a,b", ",") tmp2 as e2 order by k1, e1, e2; ++------+------+------+ +| k1 | e1 | e2 | ++------+------+------+ +| 1 | 1 | a | +| 1 | 1 | b | +| 1 | 1000 | a | +| 1 | 1000 | b | +| 2 | 1 | a | +| 2 | 1 | b | +| 2 | 1000 | a | +| 2 | 1000 | b | +| 3 | 1 | a | +| 3 | 1 | b | +| 3 | 1000 | a | +| 3 | 1000 | b | +| 4 | 1 | a | +| 4 | 1 | b | +| 4 | 1000 | a | +| 4 | 1000 | b | +| 5 | 1 | a | +| 5 | 1 | b | +| 5 | 1000 | a | +| 5 | 1000 | b | +| 6 | 1 | a | +| 6 | 1 | b | +| 6 | 1000 | a | +| 6 | 1000 | b | ++------+------+------+ +``` + +## keyword + + explode_bitmap \ No newline at end of file diff --git a/docs/en/sql-reference/sql-functions/table-functions/explode-json-array.md b/docs/en/sql-reference/sql-functions/table-functions/explode-json-array.md new file mode 100644 index 00000000000000..e9c136dbff17fb --- /dev/null +++ b/docs/en/sql-reference/sql-functions/table-functions/explode-json-array.md @@ -0,0 +1,286 @@ +--- +{ + "title": "explode_json_array", + "language": "en" +} +--- + + + +# `explode_json_array` + +## description + +Table functions must be used in conjunction with Lateral View. + +Expand a json array. According to the array element type, there are three function names. Corresponding to integer, floating point and string arrays respectively. + +grammar: + +``` +explode_json_array_int(json_str) +explode_json_array_double(json_str) +explode_json_array_string(json_str) +``` + +## example + +Original table data: + +``` +mysql> select k1 from example1 order by k1; ++------+ +| k1 | ++------+ +| 1 | +| 2 | +| 3 | +| 4 | +| 5 | +| 6 | ++------+ +``` + +Lateral View: + +``` +mysql> select k1, e1 from example1 lateral view explode_json_array_int('[]') tmp1 as e1 order by k1, e1; ++------+------+ +| k1 | e1 | ++------+------+ +| 1 | NULL | +| 2 | NULL | +| 3 | NULL | ++------+------+ + +mysql> select k1, e1 from example1 lateral view explode_json_array_int('[1,2,3]') tmp1 as e1 order by k1, e1; ++------+------+ +| k1 | e1 | ++------+------+ +| 1 | 1 | +| 1 | 2 | +| 1 | 3 | +| 2 | 1 | +| 2 | 2 | +| 2 | 3 | +| 3 | 1 | +| 3 | 2 | +| 3 | 3 | ++------+------+ + +mysql> select k1, e1 from example1 lateral view explode_json_array_int('[1,"b",3]') tmp1 as e1 order by k1, e1; ++------+------+ +| k1 | e1 | ++------+------+ +| 1 | NULL | +| 1 | 1 | +| 1 | 3 | +| 2 | NULL | +| 2 | 1 | +| 2 | 3 | +| 3 | NULL | +| 3 | 1 | +| 3 | 3 | ++------+------+ + +mysql> select k1, e1 from example1 lateral view explode_json_array_int('["a","b","c"]') tmp1 as e1 order by k1, e1; ++------+------+ +| k1 | e1 | ++------+------+ +| 1 | NULL | +| 1 | NULL | +| 1 | NULL | +| 2 | NULL | +| 2 | NULL | +| 2 | NULL | +| 3 | NULL | +| 3 | NULL | +| 3 | NULL | ++------+------+ + +mysql> select k1, e1 from example1 lateral view explode_json_array_int('{"a": 3}') tmp1 as e1 order by k1, e1; ++------+------+ +| k1 | e1 | ++------+------+ +| 1 | NULL | +| 2 | NULL | +| 3 | NULL | ++------+------+ + +mysql> select k1, e1 from example1 lateral view explode_json_array_double('[]') tmp1 as e1 order by k1, e1; ++------+------+ +| k1 | e1 | ++------+------+ +| 1 | NULL | +| 2 | NULL | +| 3 | NULL | ++------+------+ + +mysql> select k1, e1 from example1 lateral view explode_json_array_double('[1,2,3]') tmp1 as e1 order by k1, e1; ++------+------+ +| k1 | e1 | ++------+------+ +| 1 | NULL | +| 1 | NULL | +| 1 | NULL | +| 2 | NULL | +| 2 | NULL | +| 2 | NULL | +| 3 | NULL | +| 3 | NULL | +| 3 | NULL | ++------+------+ + +mysql> select k1, e1 from example1 lateral view explode_json_array_double('[1,"b",3]') tmp1 as e1 order by k1, e1; ++------+------+ +| k1 | e1 | ++------+------+ +| 1 | NULL | +| 1 | NULL | +| 1 | NULL | +| 2 | NULL | +| 2 | NULL | +| 2 | NULL | +| 3 | NULL | +| 3 | NULL | +| 3 | NULL | ++------+------+ + +mysql> select k1, e1 from example1 lateral view explode_json_array_double('[1.0,2.0,3.0]') tmp1 as e1 order by k1, e1; ++------+------+ +| k1 | e1 | ++------+------+ +| 1 | 1 | +| 1 | 2 | +| 1 | 3 | +| 2 | 1 | +| 2 | 2 | +| 2 | 3 | +| 3 | 1 | +| 3 | 2 | +| 3 | 3 | ++------+------+ + +mysql> select k1, e1 from example1 lateral view explode_json_array_double('[1,"b",3]') tmp1 as e1 order by k1, e1; ++------+------+ +| k1 | e1 | ++------+------+ +| 1 | NULL | +| 1 | NULL | +| 1 | NULL | +| 2 | NULL | +| 2 | NULL | +| 2 | NULL | +| 3 | NULL | +| 3 | NULL | +| 3 | NULL | ++------+------+ + +mysql> select k1, e1 from example1 lateral view explode_json_array_double('["a","b","c"]') tmp1 as e1 order by k1, e1; ++------+------+ +| k1 | e1 | ++------+------+ +| 1 | NULL | +| 1 | NULL | +| 1 | NULL | +| 2 | NULL | +| 2 | NULL | +| 2 | NULL | +| 3 | NULL | +| 3 | NULL | +| 3 | NULL | ++------+------+ + +mysql> select k1, e1 from example1 lateral view explode_json_array_double('{"a": 3}') tmp1 as e1 order by k1, e1; ++------+------+ +| k1 | e1 | ++------+------+ +| 1 | NULL | +| 2 | NULL | +| 3 | NULL | ++------+------+ + +mysql> select k1, e1 from example1 lateral view explode_json_array_string('[]') tmp1 as e1 order by k1, e1; ++------+------+ +| k1 | e1 | ++------+------+ +| 1 | NULL | +| 2 | NULL | +| 3 | NULL | ++------+------+ + +mysql> select k1, e1 from example1 lateral view explode_json_array_string('[1.0,2.0,3.0]') tmp1 as e1 order by k1, e1; ++------+----------+ +| k1 | e1 | ++------+----------+ +| 1 | 1.000000 | +| 1 | 2.000000 | +| 1 | 3.000000 | +| 2 | 1.000000 | +| 2 | 2.000000 | +| 2 | 3.000000 | +| 3 | 1.000000 | +| 3 | 2.000000 | +| 3 | 3.000000 | ++------+----------+ + +mysql> select k1, e1 from example1 lateral view explode_json_array_string('[1,"b",3]') tmp1 as e1 order by k1, e1; ++------+------+ +| k1 | e1 | ++------+------+ +| 1 | 1 | +| 1 | 3 | +| 1 | b | +| 2 | 1 | +| 2 | 3 | +| 2 | b | +| 3 | 1 | +| 3 | 3 | +| 3 | b | ++------+------+ + +mysql> select k1, e1 from example1 lateral view explode_json_array_string('["a","b","c"]') tmp1 as e1 order by k1, e1; ++------+------+ +| k1 | e1 | ++------+------+ +| 1 | a | +| 1 | b | +| 1 | c | +| 2 | a | +| 2 | b | +| 2 | c | +| 3 | a | +| 3 | b | +| 3 | c | ++------+------+ + +mysql> select k1, e1 from example1 lateral view explode_json_array_string('{"a": 3}') tmp1 as e1 order by k1, e1; ++------+------+ +| k1 | e1 | ++------+------+ +| 1 | NULL | +| 2 | NULL | +| 3 | NULL | ++------+------+ +``` + +## keyword + + explode_json_array \ No newline at end of file diff --git a/docs/en/sql-reference/sql-functions/table-functions/explode-split.md b/docs/en/sql-reference/sql-functions/table-functions/explode-split.md new file mode 100644 index 00000000000000..0557b900128905 --- /dev/null +++ b/docs/en/sql-reference/sql-functions/table-functions/explode-split.md @@ -0,0 +1,112 @@ +--- +{ + "title": "explode_split", + "language": "en" +} +--- + + + +# explode_split + +## description + +Table functions must be used in conjunction with Lateral View. + +Split a string into multiple substrings according to the specified delimiter. + +grammar: + +``` +explode_split(str, delimiter) +``` + +## example + +Original table data: + +``` +mysql> select * from example1 order by k1; ++------+---------+ +| k1 | k2 | ++------+---------+ +| 1 | | +| 2 | NULL | +| 3 | , | +| 4 | 1 | +| 5 | 1,2,3 | +| 6 | a, b, c | ++------+---------+ +``` + +Lateral View: + +``` +mysql> select k1, e1 from example1 lateral view explode_split(k2, ',') tmp1 as e1 where k1 = 1 order by k1, e1; ++------+------+ +| k1 | e1 | ++------+------+ +| 1 | | ++------+------+ + +mysql> select k1, e1 from example1 lateral view explode_split(k2, ',') tmp1 as e1 where k1 = 2 order by k1, e1; ++------+------+ +| k1 | e1 | ++------+------+ +| 2 | NULL | ++------+------+ + +mysql> select k1, e1 from example1 lateral view explode_split(k2, ',') tmp1 as e1 where k1 = 3 order by k1, e1; ++------+------+ +| k1 | e1 | ++------+------+ +| 3 | | +| 3 | | ++------+------+ + +mysql> select k1, e1 from example1 lateral view explode_split(k2, ',') tmp1 as e1 where k1 = 4 order by k1, e1; ++------+------+ +| k1 | e1 | ++------+------+ +| 4 | 1 | ++------+------+ + +mysql> select k1, e1 from example1 lateral view explode_split(k2, ',') tmp1 as e1 where k1 = 5 order by k1, e1; ++------+------+ +| k1 | e1 | ++------+------+ +| 5 | 1 | +| 5 | 2 | +| 5 | 3 | ++------+------+ + +mysql> select k1, e1 from example1 lateral view explode_split(k2, ',') tmp1 as e1 where k1 = 6 order by k1, e1; ++------+------+ +| k1 | e1 | ++------+------+ +| 6 | b | +| 6 | c | +| 6 | a | ++------+------+ +``` + +## keyword + + explode_split \ No newline at end of file diff --git a/docs/en/sql-reference/sql-statements/Data Manipulation/Lateral-View.md b/docs/en/sql-reference/sql-statements/Data Manipulation/Lateral-View.md new file mode 100644 index 00000000000000..2d59eae27c13c9 --- /dev/null +++ b/docs/en/sql-reference/sql-statements/Data Manipulation/Lateral-View.md @@ -0,0 +1,87 @@ +--- +{ + "title": "Lateral View", + "language": "en" +} +--- + + + +# Lateral View + +## description + +Lateral view syntax can be used with Table Function to fulfill the requirement of expanding one row of data into multiple rows (column to rows). + +grammar: + +``` +... +FROM table_name +lateral_view_ref[ lateral_view_ref ...] + +lateral_view_ref: + +LATERAL VIEW table_function(...) view_alias as col_name +``` + +The Lateral view clause must follow the table name. Can contain multiple Lateral view clauses. `view_alias` is the name of the corresponding Lateral View. `col_name` is the name of the column produced by the table function `table_function`. + +Table functions currently supported: + +1. `explode_split` +2. `explode_bitmap` +3. `explode_json_array` + +For specific function descriptions, please refer to the corresponding syntax help documentation. + +The data in the table will be Cartesian product with the result set produced by each Lateral View and then return to the upper level. + +## example + +Here, only the syntax example of Lateral View is given. For the specific meaning and output result description, please refer to the help document of the corresponding table function. + +1. + +``` +select k1, e1 from tbl1 +lateral view explode_split(v1,',') tmp1 as e1 where e1 = "abc"; +``` + +2. + +``` +select k1, e1, e2 from tbl2 +lateral view explode_split(v1,',') tmp1 as e1 +lateral view explode_bitmap(bitmap1) tmp2 as e2 +where e2> 3; +``` + +3. + +``` +select k1, e1, e2 from tbl3 +lateral view explode_json_array_int("[1,2,3]") tmp1 as e1 +lateral view explode_bitmap(bitmap_from_string("4,5,6")) tmp2 as e2; +``` + +## keyword + + lateral view \ No newline at end of file diff --git a/docs/zh-CN/sql-reference/sql-functions/table-functions/explode-bitmap.md b/docs/zh-CN/sql-reference/sql-functions/table-functions/explode-bitmap.md new file mode 100644 index 00000000000000..99f79711accae2 --- /dev/null +++ b/docs/zh-CN/sql-reference/sql-functions/table-functions/explode-bitmap.md @@ -0,0 +1,157 @@ +--- +{ + "title": "explode_bitmap", + "language": "zh-CN" +} +--- + + + +# explode_bitmap + +## description + +表函数,需配合 Lateral View 使用。 + +展开一个bitmap类型。 + +语法: + +``` +explode_bitmap(bitmap) +``` + +## example + +原表数据: + +``` +mysql> select k1 from example1 order by k1; ++------+ +| k1 | ++------+ +| 1 | +| 2 | +| 3 | +| 4 | +| 5 | +| 6 | ++------+ +``` + +Lateral View: + +``` +mysql> select k1, e1 from example1 lateral view explode_bitmap(bitmap_empty()) tmp1 as e1 order by k1, e1; ++------+------+ +| k1 | e1 | ++------+------+ +| 1 | NULL | +| 2 | NULL | +| 3 | NULL | +| 4 | NULL | +| 5 | NULL | +| 6 | NULL | ++------+------+ + +mysql> select k1, e1 from example1 lateral view explode_bitmap(bitmap_from_string("1")) tmp1 as e1 order by k1, e1; ++------+------+ +| k1 | e1 | ++------+------+ +| 1 | 1 | +| 2 | 1 | +| 3 | 1 | +| 4 | 1 | +| 5 | 1 | +| 6 | 1 | ++------+------+ + +mysql> select k1, e1 from example1 lateral view explode_bitmap(bitmap_from_string("1,2")) tmp1 as e1 order by k1, e1; ++------+------+ +| k1 | e1 | ++------+------+ +| 1 | 1 | +| 1 | 2 | +| 2 | 1 | +| 2 | 2 | +| 3 | 1 | +| 3 | 2 | +| 4 | 1 | +| 4 | 2 | +| 5 | 1 | +| 5 | 2 | +| 6 | 1 | +| 6 | 2 | ++------+------+ + +mysql> select k1, e1 from example1 lateral view explode_bitmap(bitmap_from_string("1,1000")) tmp1 as e1 order by k1, e1; ++------+------+ +| k1 | e1 | ++------+------+ +| 1 | 1 | +| 1 | 1000 | +| 2 | 1 | +| 2 | 1000 | +| 3 | 1 | +| 3 | 1000 | +| 4 | 1 | +| 4 | 1000 | +| 5 | 1 | +| 5 | 1000 | +| 6 | 1 | +| 6 | 1000 | ++------+------+ + +mysql> select k1, e1, e2 from example1 +lateral view explode_bitmap(bitmap_from_string("1,1000")) tmp1 as e1 +lateral view explode_split("a,b", ",") tmp2 as e2 order by k1, e1, e2; ++------+------+------+ +| k1 | e1 | e2 | ++------+------+------+ +| 1 | 1 | a | +| 1 | 1 | b | +| 1 | 1000 | a | +| 1 | 1000 | b | +| 2 | 1 | a | +| 2 | 1 | b | +| 2 | 1000 | a | +| 2 | 1000 | b | +| 3 | 1 | a | +| 3 | 1 | b | +| 3 | 1000 | a | +| 3 | 1000 | b | +| 4 | 1 | a | +| 4 | 1 | b | +| 4 | 1000 | a | +| 4 | 1000 | b | +| 5 | 1 | a | +| 5 | 1 | b | +| 5 | 1000 | a | +| 5 | 1000 | b | +| 6 | 1 | a | +| 6 | 1 | b | +| 6 | 1000 | a | +| 6 | 1000 | b | ++------+------+------+ +``` + +## keyword + + explode_bitmap \ No newline at end of file diff --git a/docs/zh-CN/sql-reference/sql-functions/table-functions/explode-json-array.md b/docs/zh-CN/sql-reference/sql-functions/table-functions/explode-json-array.md new file mode 100644 index 00000000000000..8332d6a8e435ea --- /dev/null +++ b/docs/zh-CN/sql-reference/sql-functions/table-functions/explode-json-array.md @@ -0,0 +1,286 @@ +--- +{ + "title": "explode_json_array", + "language": "zh-CN" +} +--- + + + +# `explode_json_array` + +## description + +表函数,需配合 Lateral View 使用。 + +展开一个 json 数组。根据数组元素类型,有三种函数名称。分别对应整型、浮点和字符串数组。 + +语法: + +``` +explode_json_array_int(json_str) +explode_json_array_double(json_str) +explode_json_array_string(json_str) +``` + +## example + +原表数据: + +``` +mysql> select k1 from example1 order by k1; ++------+ +| k1 | ++------+ +| 1 | +| 2 | +| 3 | +| 4 | +| 5 | +| 6 | ++------+ +``` + +Lateral View: + +``` +mysql> select k1, e1 from example1 lateral view explode_json_array_int('[]') tmp1 as e1 order by k1, e1; ++------+------+ +| k1 | e1 | ++------+------+ +| 1 | NULL | +| 2 | NULL | +| 3 | NULL | ++------+------+ + +mysql> select k1, e1 from example1 lateral view explode_json_array_int('[1,2,3]') tmp1 as e1 order by k1, e1; ++------+------+ +| k1 | e1 | ++------+------+ +| 1 | 1 | +| 1 | 2 | +| 1 | 3 | +| 2 | 1 | +| 2 | 2 | +| 2 | 3 | +| 3 | 1 | +| 3 | 2 | +| 3 | 3 | ++------+------+ + +mysql> select k1, e1 from example1 lateral view explode_json_array_int('[1,"b",3]') tmp1 as e1 order by k1, e1; ++------+------+ +| k1 | e1 | ++------+------+ +| 1 | NULL | +| 1 | 1 | +| 1 | 3 | +| 2 | NULL | +| 2 | 1 | +| 2 | 3 | +| 3 | NULL | +| 3 | 1 | +| 3 | 3 | ++------+------+ + +mysql> select k1, e1 from example1 lateral view explode_json_array_int('["a","b","c"]') tmp1 as e1 order by k1, e1; ++------+------+ +| k1 | e1 | ++------+------+ +| 1 | NULL | +| 1 | NULL | +| 1 | NULL | +| 2 | NULL | +| 2 | NULL | +| 2 | NULL | +| 3 | NULL | +| 3 | NULL | +| 3 | NULL | ++------+------+ + +mysql> select k1, e1 from example1 lateral view explode_json_array_int('{"a": 3}') tmp1 as e1 order by k1, e1; ++------+------+ +| k1 | e1 | ++------+------+ +| 1 | NULL | +| 2 | NULL | +| 3 | NULL | ++------+------+ + +mysql> select k1, e1 from example1 lateral view explode_json_array_double('[]') tmp1 as e1 order by k1, e1; ++------+------+ +| k1 | e1 | ++------+------+ +| 1 | NULL | +| 2 | NULL | +| 3 | NULL | ++------+------+ + +mysql> select k1, e1 from example1 lateral view explode_json_array_double('[1,2,3]') tmp1 as e1 order by k1, e1; ++------+------+ +| k1 | e1 | ++------+------+ +| 1 | NULL | +| 1 | NULL | +| 1 | NULL | +| 2 | NULL | +| 2 | NULL | +| 2 | NULL | +| 3 | NULL | +| 3 | NULL | +| 3 | NULL | ++------+------+ + +mysql> select k1, e1 from example1 lateral view explode_json_array_double('[1,"b",3]') tmp1 as e1 order by k1, e1; ++------+------+ +| k1 | e1 | ++------+------+ +| 1 | NULL | +| 1 | NULL | +| 1 | NULL | +| 2 | NULL | +| 2 | NULL | +| 2 | NULL | +| 3 | NULL | +| 3 | NULL | +| 3 | NULL | ++------+------+ + +mysql> select k1, e1 from example1 lateral view explode_json_array_double('[1.0,2.0,3.0]') tmp1 as e1 order by k1, e1; ++------+------+ +| k1 | e1 | ++------+------+ +| 1 | 1 | +| 1 | 2 | +| 1 | 3 | +| 2 | 1 | +| 2 | 2 | +| 2 | 3 | +| 3 | 1 | +| 3 | 2 | +| 3 | 3 | ++------+------+ + +mysql> select k1, e1 from example1 lateral view explode_json_array_double('[1,"b",3]') tmp1 as e1 order by k1, e1; ++------+------+ +| k1 | e1 | ++------+------+ +| 1 | NULL | +| 1 | NULL | +| 1 | NULL | +| 2 | NULL | +| 2 | NULL | +| 2 | NULL | +| 3 | NULL | +| 3 | NULL | +| 3 | NULL | ++------+------+ + +mysql> select k1, e1 from example1 lateral view explode_json_array_double('["a","b","c"]') tmp1 as e1 order by k1, e1; ++------+------+ +| k1 | e1 | ++------+------+ +| 1 | NULL | +| 1 | NULL | +| 1 | NULL | +| 2 | NULL | +| 2 | NULL | +| 2 | NULL | +| 3 | NULL | +| 3 | NULL | +| 3 | NULL | ++------+------+ + +mysql> select k1, e1 from example1 lateral view explode_json_array_double('{"a": 3}') tmp1 as e1 order by k1, e1; ++------+------+ +| k1 | e1 | ++------+------+ +| 1 | NULL | +| 2 | NULL | +| 3 | NULL | ++------+------+ + +mysql> select k1, e1 from example1 lateral view explode_json_array_string('[]') tmp1 as e1 order by k1, e1; ++------+------+ +| k1 | e1 | ++------+------+ +| 1 | NULL | +| 2 | NULL | +| 3 | NULL | ++------+------+ + +mysql> select k1, e1 from example1 lateral view explode_json_array_string('[1.0,2.0,3.0]') tmp1 as e1 order by k1, e1; ++------+----------+ +| k1 | e1 | ++------+----------+ +| 1 | 1.000000 | +| 1 | 2.000000 | +| 1 | 3.000000 | +| 2 | 1.000000 | +| 2 | 2.000000 | +| 2 | 3.000000 | +| 3 | 1.000000 | +| 3 | 2.000000 | +| 3 | 3.000000 | ++------+----------+ + +mysql> select k1, e1 from example1 lateral view explode_json_array_string('[1,"b",3]') tmp1 as e1 order by k1, e1; ++------+------+ +| k1 | e1 | ++------+------+ +| 1 | 1 | +| 1 | 3 | +| 1 | b | +| 2 | 1 | +| 2 | 3 | +| 2 | b | +| 3 | 1 | +| 3 | 3 | +| 3 | b | ++------+------+ + +mysql> select k1, e1 from example1 lateral view explode_json_array_string('["a","b","c"]') tmp1 as e1 order by k1, e1; ++------+------+ +| k1 | e1 | ++------+------+ +| 1 | a | +| 1 | b | +| 1 | c | +| 2 | a | +| 2 | b | +| 2 | c | +| 3 | a | +| 3 | b | +| 3 | c | ++------+------+ + +mysql> select k1, e1 from example1 lateral view explode_json_array_string('{"a": 3}') tmp1 as e1 order by k1, e1; ++------+------+ +| k1 | e1 | ++------+------+ +| 1 | NULL | +| 2 | NULL | +| 3 | NULL | ++------+------+ +``` + +## keyword + + explode_json_array \ No newline at end of file diff --git a/docs/zh-CN/sql-reference/sql-functions/table-functions/explode-split.md b/docs/zh-CN/sql-reference/sql-functions/table-functions/explode-split.md new file mode 100644 index 00000000000000..d7c403186e2a92 --- /dev/null +++ b/docs/zh-CN/sql-reference/sql-functions/table-functions/explode-split.md @@ -0,0 +1,112 @@ +--- +{ + "title": "explode_split", + "language": "zh-CN" +} +--- + + + +# explode_split + +## description + +表函数,需配合 Lateral View 使用。 + +将一个字符串按指定的分隔符分割成多个子串。 + +语法: + +``` +explode_split(str, delimiter) +``` + +## example + +原表数据: + +``` +mysql> select * from example1 order by k1; ++------+---------+ +| k1 | k2 | ++------+---------+ +| 1 | | +| 2 | NULL | +| 3 | , | +| 4 | 1 | +| 5 | 1,2,3 | +| 6 | a, b, c | ++------+---------+ +``` + +Lateral View: + +``` +mysql> select k1, e1 from example1 lateral view explode_split(k2, ',') tmp1 as e1 where k1 = 1 order by k1, e1; ++------+------+ +| k1 | e1 | ++------+------+ +| 1 | | ++------+------+ + +mysql> select k1, e1 from example1 lateral view explode_split(k2, ',') tmp1 as e1 where k1 = 2 order by k1, e1; ++------+------+ +| k1 | e1 | ++------+------+ +| 2 | NULL | ++------+------+ + +mysql> select k1, e1 from example1 lateral view explode_split(k2, ',') tmp1 as e1 where k1 = 3 order by k1, e1; ++------+------+ +| k1 | e1 | ++------+------+ +| 3 | | +| 3 | | ++------+------+ + +mysql> select k1, e1 from example1 lateral view explode_split(k2, ',') tmp1 as e1 where k1 = 4 order by k1, e1; ++------+------+ +| k1 | e1 | ++------+------+ +| 4 | 1 | ++------+------+ + +mysql> select k1, e1 from example1 lateral view explode_split(k2, ',') tmp1 as e1 where k1 = 5 order by k1, e1; ++------+------+ +| k1 | e1 | ++------+------+ +| 5 | 1 | +| 5 | 2 | +| 5 | 3 | ++------+------+ + +mysql> select k1, e1 from example1 lateral view explode_split(k2, ',') tmp1 as e1 where k1 = 6 order by k1, e1; ++------+------+ +| k1 | e1 | ++------+------+ +| 6 | b | +| 6 | c | +| 6 | a | ++------+------+ +``` + +## keyword + + explode_split \ No newline at end of file diff --git a/docs/zh-CN/sql-reference/sql-statements/Data Manipulation/Lateral-View.md b/docs/zh-CN/sql-reference/sql-statements/Data Manipulation/Lateral-View.md new file mode 100644 index 00000000000000..6c2719dae1e666 --- /dev/null +++ b/docs/zh-CN/sql-reference/sql-statements/Data Manipulation/Lateral-View.md @@ -0,0 +1,86 @@ +--- +{ + "title": "Lateral View", + "language": "zh-CN" +} +--- + + + +# Lateral View + +## description + +Lateral view 语法可以搭配 Table Function,完成将一行数据扩展成多行(列转行)的需求。 + +语法: + +``` +... +FROM table_name +lateral_view_ref[ lateral_view_ref ...] + +lateral_view_ref: + +LATERAL VIEW table_function(...) view_alias as col_name +``` + +Lateral view 子句必须跟随在表名之后。可以包含多个 Lateral view 子句。`view_alias` 是对应 Lateral View 的名称。`col_name` 是表函数 `table_function` 产出的列名。 + +目前支持的表函数: + +1. `explode_split` +2. `explode_bitmap` +3. `explode_json_array` + +具体函数说明可参阅对应语法帮助文档。 + +table 中的数据会和各个 Lateral View 产生的结果集做笛卡尔积后返回上层。 + +## example + +这里只给出 Lateral View 的语法示例,具体含义和产出的结果说明,可参阅对应表函数帮助文档。 + +1. + +``` +select k1, e1 from tbl1 +lateral view explode_split(v1, ',') tmp1 as e1 where e1 = "abc"; +``` + +2. + +``` +select k1, e1, e2 from tbl2 +lateral view explode_split(v1, ',') tmp1 as e1 +lateral view explode_bitmap(bitmap1) tmp2 as e2 +where e2 > 3; +``` + +3. + +``` +select k1, e1, e2 from tbl3 +lateral view explode_json_array_int("[1,2,3]") tmp1 as e1 +lateral view explode_bitmap(bitmap_from_string("4,5,6")) tmp2 as e2; +``` +## keyword + + lateral view From 05d3aa63370a6dfc09db212b6592a560a69b45a4 Mon Sep 17 00:00:00 2001 From: morningman Date: Tue, 30 Nov 2021 19:30:30 +0800 Subject: [PATCH 03/11] fix cast --- be/src/exprs/table_function/explode_split.cpp | 2 +- .../doris/analysis/FunctionCallExpr.java | 142 +++++++++--------- 2 files changed, 70 insertions(+), 74 deletions(-) diff --git a/be/src/exprs/table_function/explode_split.cpp b/be/src/exprs/table_function/explode_split.cpp index 03c53347f6660c..959dad41ed6828 100644 --- a/be/src/exprs/table_function/explode_split.cpp +++ b/be/src/exprs/table_function/explode_split.cpp @@ -54,7 +54,7 @@ Status ExplodeSplitTableFunction::process(TupleRow* tuple_row) { _data.clear(); StringVal text = _expr_context->root()->get_child(0)->get_string_val(_expr_context, tuple_row); - if (text.is_null || text.len == 0) { + if (text.is_null) { _is_current_empty = true; _cur_size = 0; _cur_offset = 0; diff --git a/fe/fe-core/src/main/java/org/apache/doris/analysis/FunctionCallExpr.java b/fe/fe-core/src/main/java/org/apache/doris/analysis/FunctionCallExpr.java index 34fd3ed0c23a0e..a7754953ef2b22 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/analysis/FunctionCallExpr.java +++ b/fe/fe-core/src/main/java/org/apache/doris/analysis/FunctionCallExpr.java @@ -55,6 +55,7 @@ import java.text.StringCharacterIterator; import java.util.Arrays; import java.util.List; + // TODO: for aggregations, we need to unify the code paths for builtins and UDAs. public class FunctionCallExpr extends Expr { private static final Logger LOG = LogManager.getLogger(FunctionCallExpr.class); @@ -84,7 +85,7 @@ public class FunctionCallExpr extends Expr { private Expr originStmtFnExpr; private boolean isRewrote = false; - + public void setIsAnalyticFnCall(boolean v) { isAnalyticFnCall = v; } @@ -127,7 +128,7 @@ public FunctionCallExpr(FunctionName fnName, FunctionParams params) { } private FunctionCallExpr( - FunctionName fnName, FunctionParams params, boolean isMergeAggFn) { + FunctionName fnName, FunctionParams params, boolean isMergeAggFn) { super(); this.fnName = fnName; fnParams = params; @@ -158,7 +159,7 @@ protected FunctionCallExpr(FunctionCallExpr other) { super(other); fnName = other.fnName; isAnalyticFnCall = other.isAnalyticFnCall; - // aggOp = other.aggOp; + // aggOp = other.aggOp; // fnParams = other.fnParams; // Clone the params in a way that keeps the children_ and the params.exprs() // in sync. The children have already been cloned in the super c'tor. @@ -245,12 +246,12 @@ public String toSqlImpl() { } if (((FunctionCallExpr) expr).fnParams.isDistinct()) { sb.append("DISTINCT "); - } + } boolean isJsonFunction = false; int len = children.size(); List result = Lists.newArrayList(); if ((fnName.getFunction().equalsIgnoreCase("json_array")) || - (fnName.getFunction().equalsIgnoreCase("json_object"))) { + (fnName.getFunction().equalsIgnoreCase("json_object"))) { len = len - 1; isJsonFunction = true; } @@ -277,7 +278,7 @@ public FunctionParams getParams() { public boolean isScalarFunction() { Preconditions.checkState(fn != null); - return fn instanceof ScalarFunction ; + return fn instanceof ScalarFunction; } public boolean isAggregateFunction() { @@ -289,6 +290,7 @@ public boolean isBuiltin() { Preconditions.checkState(fn != null); return fn instanceof BuiltinAggregateFunction && !isAnalyticFnCall; } + /** * Returns true if this is a call to an aggregate function that returns * non-null on an empty input (e.g. count). @@ -296,7 +298,7 @@ public boolean isBuiltin() { public boolean returnsNonNullOnEmpty() { Preconditions.checkNotNull(fn); return fn instanceof AggregateFunction - && ((AggregateFunction) fn).returnsNonNullOnEmpty(); + && ((AggregateFunction) fn).returnsNonNullOnEmpty(); } public boolean isDistinct() { @@ -372,8 +374,8 @@ private void analyzeBuiltinAggFunction(Analyzer analyzer) throws AnalysisExcepti } return; } - - if(fnName.getFunction().equalsIgnoreCase("json_array")) { + + if (fnName.getFunction().equalsIgnoreCase("json_array")) { String res = parseJsonDataType(false); if (children.size() == originChildSize) { children.add(new StringLiteral(res)); @@ -381,8 +383,8 @@ private void analyzeBuiltinAggFunction(Analyzer analyzer) throws AnalysisExcepti return; } - if(fnName.getFunction().equalsIgnoreCase("json_object")) { - if ((children.size()&1) == 1 && (originChildSize == children.size())) { + if (fnName.getFunction().equalsIgnoreCase("json_object")) { + if ((children.size() & 1) == 1 && (originChildSize == children.size())) { throw new AnalysisException("json_object can't be odd parameters, need even parameters: " + this.toSql()); } String res = parseJsonDataType(true); @@ -395,7 +397,7 @@ private void analyzeBuiltinAggFunction(Analyzer analyzer) throws AnalysisExcepti if (fnName.getFunction().equalsIgnoreCase("group_concat")) { if (children.size() > 2 || children.isEmpty()) { throw new AnalysisException( - "group_concat requires one or two parameters: " + this.toSql()); + "group_concat requires one or two parameters: " + this.toSql()); } if (fnParams.isDistinct()) { @@ -405,7 +407,7 @@ private void analyzeBuiltinAggFunction(Analyzer analyzer) throws AnalysisExcepti Expr arg0 = getChild(0); if (!arg0.type.isStringType() && !arg0.type.isNull()) { throw new AnalysisException( - "group_concat requires first parameter to be of type STRING: " + this.toSql()); + "group_concat requires first parameter to be of type STRING: " + this.toSql()); } if (children.size() == 2) { @@ -490,7 +492,7 @@ private void analyzeBuiltinAggFunction(Analyzer analyzer) throws AnalysisExcepti throw new AnalysisException("intersect_count function first argument should be of BITMAP type, but was " + inputType); } - for(int i = 2; i < children.size(); i++) { + for (int i = 2; i < children.size(); i++) { if (!getChild(i).isConstant()) { throw new AnalysisException("intersect_count function filter_values arg must be constant"); } @@ -667,7 +669,7 @@ public void analyzeImpl(Analyzer analyzer) throws AnalysisException { type = getChild(0).type.getMaxResolutionType(); } fn = getBuiltinFunction(analyzer, fnName.getFunction(), new Type[]{type}, - Function.CompareMode.IS_NONSTRICT_SUPERTYPE_OF); + Function.CompareMode.IS_NONSTRICT_SUPERTYPE_OF); } else if (fnName.getFunction().equalsIgnoreCase("count_distinct")) { Type compatibleType = this.children.get(0).getType(); for (int i = 1; i < this.children.size(); ++i) { @@ -690,34 +692,34 @@ public void analyzeImpl(Analyzer analyzer) throws AnalysisException { if (fn == null) { throw new AnalysisException("Current only support `explode_split`, `explode_bitmap()` and `explode_json_array_xx` table functions"); } - return; - } - // now first find function in built-in functions - if (Strings.isNullOrEmpty(fnName.getDb())) { - Type[] childTypes = collectChildReturnTypes(); - fn = getBuiltinFunction(analyzer, fnName.getFunction(), childTypes, - Function.CompareMode.IS_NONSTRICT_SUPERTYPE_OF); - } - - // find user defined functions - if (fn == null) { - if (!analyzer.isUDFAllowed()) { - throw new AnalysisException( - "Does not support non-builtin functions, or function does not exist: " + this.toSqlImpl()); + } else { + // now first find function in built-in functions + if (Strings.isNullOrEmpty(fnName.getDb())) { + Type[] childTypes = collectChildReturnTypes(); + fn = getBuiltinFunction(analyzer, fnName.getFunction(), childTypes, + Function.CompareMode.IS_NONSTRICT_SUPERTYPE_OF); } - String dbName = fnName.analyzeDb(analyzer); - if (!Strings.isNullOrEmpty(dbName)) { - // check operation privilege - if (!Catalog.getCurrentCatalog().getAuth().checkDbPriv( - ConnectContext.get(), dbName, PrivPredicate.SELECT)) { - ErrorReport.reportAnalysisException(ErrorCode.ERR_SPECIFIC_ACCESS_DENIED_ERROR, "SELECT"); + // find user defined functions + if (fn == null) { + if (!analyzer.isUDFAllowed()) { + throw new AnalysisException( + "Does not support non-builtin functions, or function does not exist: " + this.toSqlImpl()); } - Database db = Catalog.getCurrentCatalog().getDbNullable(dbName); - if (db != null) { - Function searchDesc = new Function( - fnName, Arrays.asList(collectChildReturnTypes()), Type.INVALID, false); - fn = db.getFunction(searchDesc, Function.CompareMode.IS_NONSTRICT_SUPERTYPE_OF); + + String dbName = fnName.analyzeDb(analyzer); + if (!Strings.isNullOrEmpty(dbName)) { + // check operation privilege + if (!Catalog.getCurrentCatalog().getAuth().checkDbPriv( + ConnectContext.get(), dbName, PrivPredicate.SELECT)) { + ErrorReport.reportAnalysisException(ErrorCode.ERR_SPECIFIC_ACCESS_DENIED_ERROR, "SELECT"); + } + Database db = Catalog.getCurrentCatalog().getDbNullable(dbName); + if (db != null) { + Function searchDesc = new Function( + fnName, Arrays.asList(collectChildReturnTypes()), Type.INVALID, false); + fn = db.getFunction(searchDesc, Function.CompareMode.IS_NONSTRICT_SUPERTYPE_OF); + } } } } @@ -729,7 +731,7 @@ public void analyzeImpl(Analyzer analyzer) throws AnalysisException { } if (fnName.getFunction().equalsIgnoreCase("from_unixtime") - || fnName.getFunction().equalsIgnoreCase("date_format")) { + || fnName.getFunction().equalsIgnoreCase("date_format")) { // if has only one child, it has default time format: yyyy-MM-dd HH:mm:ss.SSSSSS if (children.size() > 1) { final StringLiteral fmtLiteral = (StringLiteral) children.get(1); @@ -994,40 +996,34 @@ public int hashCode() { result = 31 * result + Objects.hashCode(fnParams); return result; } - public String forJSON(String str){ + + public String forJSON(String str) { final StringBuilder result = new StringBuilder(); StringCharacterIterator iterator = new StringCharacterIterator(str); char character = iterator.current(); - while (character != StringCharacterIterator.DONE){ - if( character == '\"' ){ - result.append("\\\""); - } - else if(character == '\\'){ - result.append("\\\\"); - } - else if(character == '/'){ - result.append("\\/"); - } - else if(character == '\b'){ - result.append("\\b"); - } - else if(character == '\f'){ - result.append("\\f"); - } - else if(character == '\n'){ - result.append("\\n"); - } - else if(character == '\r'){ - result.append("\\r"); - } - else if(character == '\t'){ - result.append("\\t"); - } - else { - result.append(character); - } - character = iterator.next(); + while (character != StringCharacterIterator.DONE) { + if (character == '\"') { + result.append("\\\""); + } else if (character == '\\') { + result.append("\\\\"); + } else if (character == '/') { + result.append("\\/"); + } else if (character == '\b') { + result.append("\\b"); + } else if (character == '\f') { + result.append("\\f"); + } else if (character == '\n') { + result.append("\\n"); + } else if (character == '\r') { + result.append("\\r"); + } else if (character == '\t') { + result.append("\\t"); + } else { + result.append(character); + } + character = iterator.next(); } - return result.toString(); - } + return result.toString(); + } } + From 56a5c61d94f61fb4ed46810cae7b615587b078ab Mon Sep 17 00:00:00 2001 From: morningman Date: Tue, 30 Nov 2021 19:47:27 +0800 Subject: [PATCH 04/11] fix ut --- .../apache/doris/analysis/FunctionCallExpr.java | 5 ++++- .../doris/planner/TableFunctionPlanTest.java | 16 ++++++---------- 2 files changed, 10 insertions(+), 11 deletions(-) diff --git a/fe/fe-core/src/main/java/org/apache/doris/analysis/FunctionCallExpr.java b/fe/fe-core/src/main/java/org/apache/doris/analysis/FunctionCallExpr.java index a7754953ef2b22..d11b5377885fb4 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/analysis/FunctionCallExpr.java +++ b/fe/fe-core/src/main/java/org/apache/doris/analysis/FunctionCallExpr.java @@ -86,6 +86,9 @@ public class FunctionCallExpr extends Expr { private boolean isRewrote = false; + public static final String UNKNOWN_TABLE_FUNCTION_MSG = "Currently only support `explode_split`, `explode_bitmap()` " + + "and `explode_json_array_xx` table functions"; + public void setIsAnalyticFnCall(boolean v) { isAnalyticFnCall = v; } @@ -690,7 +693,7 @@ public void analyzeImpl(Analyzer analyzer) throws AnalysisException { fn = getTableFunction(fnName.getFunction(), childTypes, Function.CompareMode.IS_NONSTRICT_SUPERTYPE_OF); if (fn == null) { - throw new AnalysisException("Current only support `explode_split`, `explode_bitmap()` and `explode_json_array_xx` table functions"); + throw new AnalysisException(UNKNOWN_TABLE_FUNCTION_MSG); } } else { // now first find function in built-in functions diff --git a/fe/fe-core/src/test/java/org/apache/doris/planner/TableFunctionPlanTest.java b/fe/fe-core/src/test/java/org/apache/doris/planner/TableFunctionPlanTest.java index 27524676d40172..72740b7734f912 100644 --- a/fe/fe-core/src/test/java/org/apache/doris/planner/TableFunctionPlanTest.java +++ b/fe/fe-core/src/test/java/org/apache/doris/planner/TableFunctionPlanTest.java @@ -19,20 +19,20 @@ import org.apache.doris.analysis.CreateDbStmt; import org.apache.doris.analysis.CreateTableStmt; +import org.apache.doris.analysis.FunctionCallExpr; import org.apache.doris.catalog.Catalog; import org.apache.doris.qe.ConnectContext; import org.apache.doris.utframe.UtFrameUtils; import org.apache.commons.io.FileUtils; - -import java.io.File; -import java.util.UUID; - import org.junit.After; import org.junit.Assert; import org.junit.BeforeClass; import org.junit.Test; +import java.io.File; +import java.util.UUID; + public class TableFunctionPlanTest { private static String runningDir = "fe/mocked/TableFunctionPlanTest/" + UUID.randomUUID().toString() + "/"; private static ConnectContext ctx; @@ -172,15 +172,11 @@ public void testMultiLateralView() throws Exception { public void errorParam() throws Exception { String sql = "explain select k1, e1 from db1.tbl1 lateral view explode_split(k2) tmp as e1;"; String explainString = UtFrameUtils.getSQLPlanOrErrorMsg(ctx, sql); - Assert.assertTrue(explainString.contains("Doris only support `explode_split(varchar, varchar)` table function")); + Assert.assertTrue(explainString.contains(FunctionCallExpr.UNKNOWN_TABLE_FUNCTION_MSG)); sql = "explain select k1, e1 from db1.tbl1 lateral view explode_split(k1) tmp as e1;"; explainString = UtFrameUtils.getSQLPlanOrErrorMsg(ctx, sql); - Assert.assertTrue(explainString.contains("Doris only support `explode_split(varchar, varchar)` table function")); - - sql = "explain select k1, e1 from db1.tbl1 lateral view explode_split(k1, k2) tmp as e1;"; - explainString = UtFrameUtils.getSQLPlanOrErrorMsg(ctx, sql); - Assert.assertTrue(explainString.contains("Split separator of explode must be a string const")); + Assert.assertTrue(explainString.contains(FunctionCallExpr.UNKNOWN_TABLE_FUNCTION_MSG)); } /* Case2 table function in where stmt From 4745d0be38fbe459f2347cae77746b21287a9a3b Mon Sep 17 00:00:00 2001 From: morningman Date: Wed, 1 Dec 2021 14:52:04 +0800 Subject: [PATCH 05/11] fix doc --- .../Data Manipulation/{Lateral-View.md => lateral-view.md} | 0 .../Data Manipulation/{Lateral-View.md => lateral-view.md} | 0 2 files changed, 0 insertions(+), 0 deletions(-) rename docs/en/sql-reference/sql-statements/Data Manipulation/{Lateral-View.md => lateral-view.md} (100%) rename docs/zh-CN/sql-reference/sql-statements/Data Manipulation/{Lateral-View.md => lateral-view.md} (100%) diff --git a/docs/en/sql-reference/sql-statements/Data Manipulation/Lateral-View.md b/docs/en/sql-reference/sql-statements/Data Manipulation/lateral-view.md similarity index 100% rename from docs/en/sql-reference/sql-statements/Data Manipulation/Lateral-View.md rename to docs/en/sql-reference/sql-statements/Data Manipulation/lateral-view.md diff --git a/docs/zh-CN/sql-reference/sql-statements/Data Manipulation/Lateral-View.md b/docs/zh-CN/sql-reference/sql-statements/Data Manipulation/lateral-view.md similarity index 100% rename from docs/zh-CN/sql-reference/sql-statements/Data Manipulation/Lateral-View.md rename to docs/zh-CN/sql-reference/sql-statements/Data Manipulation/lateral-view.md From 012726a07dceb0e33c8ff60ce57d012d457b3c8b Mon Sep 17 00:00:00 2001 From: morningman Date: Tue, 7 Dec 2021 14:13:52 +0800 Subject: [PATCH 06/11] mod doc --- .../sql-statements/Data Manipulation/lateral-view.md | 11 +++++++++-- .../sql-statements/Data Manipulation/lateral-view.md | 10 +++++++++- 2 files changed, 18 insertions(+), 3 deletions(-) diff --git a/docs/en/sql-reference/sql-statements/Data Manipulation/lateral-view.md b/docs/en/sql-reference/sql-statements/Data Manipulation/lateral-view.md index 2d59eae27c13c9..1adacea210a82b 100644 --- a/docs/en/sql-reference/sql-statements/Data Manipulation/lateral-view.md +++ b/docs/en/sql-reference/sql-statements/Data Manipulation/lateral-view.md @@ -42,7 +42,7 @@ lateral_view_ref: LATERAL VIEW table_function(...) view_alias as col_name ``` -The Lateral view clause must follow the table name. Can contain multiple Lateral view clauses. `view_alias` is the name of the corresponding Lateral View. `col_name` is the name of the column produced by the table function `table_function`. +The Lateral view clause must follow the table name or subquery. Can contain multiple Lateral view clauses. `view_alias` is the name of the corresponding Lateral View. `col_name` is the name of the column produced by the table function `table_function`. Table functions currently supported: @@ -82,6 +82,13 @@ lateral view explode_json_array_int("[1,2,3]") tmp1 as e1 lateral view explode_bitmap(bitmap_from_string("4,5,6")) tmp2 as e2; ``` +4. + +``` +select k1, e1 from (select k1, bitmap_union(members) as x from tbl1 where k1=10000 group by k1)tmp1 +lateral view explode_bitmap(x) tmp2 as e1; +``` + ## keyword - lateral view \ No newline at end of file + lateral view diff --git a/docs/zh-CN/sql-reference/sql-statements/Data Manipulation/lateral-view.md b/docs/zh-CN/sql-reference/sql-statements/Data Manipulation/lateral-view.md index 6c2719dae1e666..2b6798e7c9d444 100644 --- a/docs/zh-CN/sql-reference/sql-statements/Data Manipulation/lateral-view.md +++ b/docs/zh-CN/sql-reference/sql-statements/Data Manipulation/lateral-view.md @@ -42,7 +42,7 @@ lateral_view_ref: LATERAL VIEW table_function(...) view_alias as col_name ``` -Lateral view 子句必须跟随在表名之后。可以包含多个 Lateral view 子句。`view_alias` 是对应 Lateral View 的名称。`col_name` 是表函数 `table_function` 产出的列名。 +Lateral view 子句必须跟随在表名或子查询之后。可以包含多个 Lateral view 子句。`view_alias` 是对应 Lateral View 的名称。`col_name` 是表函数 `table_function` 产出的列名。 目前支持的表函数: @@ -81,6 +81,14 @@ select k1, e1, e2 from tbl3 lateral view explode_json_array_int("[1,2,3]") tmp1 as e1 lateral view explode_bitmap(bitmap_from_string("4,5,6")) tmp2 as e2; ``` + +4. + +``` +select k1, e1 from (select k1, bitmap_union(members) as x from tbl1 where k1=10000 group by k1)tmp1 +lateral view explode_bitmap(x) tmp2 as e1; +``` + ## keyword lateral view From 5af4f7676330bf2fbe490691666da519fab13179 Mon Sep 17 00:00:00 2001 From: morningman Date: Wed, 8 Dec 2021 11:49:37 +0800 Subject: [PATCH 07/11] tmp --- be/src/exprs/slot_ref.cpp | 5 +++-- .../java/org/apache/doris/analysis/SlotRef.java | 14 ++++++++++++++ 2 files changed, 17 insertions(+), 2 deletions(-) diff --git a/be/src/exprs/slot_ref.cpp b/be/src/exprs/slot_ref.cpp index 0068c037603ff2..204d20cdd013a6 100644 --- a/be/src/exprs/slot_ref.cpp +++ b/be/src/exprs/slot_ref.cpp @@ -20,6 +20,7 @@ #include #include "gen_cpp/Exprs_types.h" +#include "gutil/strings/substitute.h" #include "runtime/runtime_state.h" #include "util/types.h" @@ -62,7 +63,7 @@ Status SlotRef::prepare(const SlotDescriptor* slot_desc, const RowDescriptor& ro } _tuple_idx = row_desc.get_tuple_idx(slot_desc->parent()); if (_tuple_idx == RowDescriptor::INVALID_IDX) { - return Status::InternalError("can't support"); + return Status::InternalError(strings::Substitute("failed to get tuple idx with tuple id: $0, slot id: $1", slot_desc->parent(), _slot_id)); } _tuple_is_nullable = row_desc.tuple_is_nullable(_tuple_idx); _slot_offset = slot_desc->tuple_offset(); @@ -94,7 +95,7 @@ Status SlotRef::prepare(RuntimeState* state, const RowDescriptor& row_desc, Expr // TODO(marcel): get from runtime state _tuple_idx = row_desc.get_tuple_idx(slot_desc->parent()); if (_tuple_idx == RowDescriptor::INVALID_IDX) { - return Status::InternalError("can't support"); + return Status::InternalError(strings::Substitute("failed to get tuple idx when prepare with tuple id: $0, slot id: $1", slot_desc->parent(), _slot_id)); } DCHECK(_tuple_idx != RowDescriptor::INVALID_IDX); _tuple_is_nullable = row_desc.tuple_is_nullable(_tuple_idx); diff --git a/fe/fe-core/src/main/java/org/apache/doris/analysis/SlotRef.java b/fe/fe-core/src/main/java/org/apache/doris/analysis/SlotRef.java index 8c7d169e65ee89..c7b451dcc801b3 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/analysis/SlotRef.java +++ b/fe/fe-core/src/main/java/org/apache/doris/analysis/SlotRef.java @@ -30,6 +30,7 @@ import com.google.common.base.MoreObjects; import com.google.common.base.Objects; import com.google.common.base.Preconditions; +import com.google.common.collect.Lists; import org.apache.logging.log4j.LogManager; import org.apache.logging.log4j.Logger; @@ -374,6 +375,19 @@ public void getTableIdToColumnNames(Map> tableIdToColumnNames) } } + @Override + protected Expr substituteImpl(ExprSubstitutionMap smap, Analyzer analyzer) throws AnalysisException { + if (isAnalyzed && desc != null && desc.getParent().getTable() == null && desc.getSourceExprs() != null) { + List newSourceExprs = Lists.newArrayList(); + for (int i = 0; i < desc.getSourceExprs().size(); ++i) { + newSourceExprs.add(desc.getSourceExprs().get(i).substituteImpl(smap, analyzer)); + } + desc.setSourceExprs(newSourceExprs); + return this; + } + return super.substituteImpl(smap, analyzer); + } + public Table getTable() { Preconditions.checkState(desc != null); Table table = desc.getParent().getTable(); From 237342bc2cad8cf09778290e6982bf7b1e47068b Mon Sep 17 00:00:00 2001 From: morningman Date: Sat, 11 Dec 2021 21:40:48 +0800 Subject: [PATCH 08/11] review 1 --- be/src/exec/table_function_node.cpp | 4 ---- .../main/java/org/apache/doris/analysis/FunctionCallExpr.java | 2 +- .../main/java/org/apache/doris/analysis/LateralViewRef.java | 2 -- 3 files changed, 1 insertion(+), 7 deletions(-) diff --git a/be/src/exec/table_function_node.cpp b/be/src/exec/table_function_node.cpp index f1a1389dbfbfb7..f8806980cb0a37 100644 --- a/be/src/exec/table_function_node.cpp +++ b/be/src/exec/table_function_node.cpp @@ -313,10 +313,6 @@ Status TableFunctionNode::get_next(RuntimeState* state, RowBatch* row_batch, boo if (row_batch->at_capacity()) { break; } - - if (_child_batch_exhausted) { - continue; - } } if (reached_limit()) { int num_rows_over = _num_rows_returned - _limit; diff --git a/fe/fe-core/src/main/java/org/apache/doris/analysis/FunctionCallExpr.java b/fe/fe-core/src/main/java/org/apache/doris/analysis/FunctionCallExpr.java index d11b5377885fb4..87f891913e5462 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/analysis/FunctionCallExpr.java +++ b/fe/fe-core/src/main/java/org/apache/doris/analysis/FunctionCallExpr.java @@ -86,7 +86,7 @@ public class FunctionCallExpr extends Expr { private boolean isRewrote = false; - public static final String UNKNOWN_TABLE_FUNCTION_MSG = "Currently only support `explode_split`, `explode_bitmap()` " + + public static final String UNKNOWN_TABLE_FUNCTION_MSG = "Currently only support `explode_split`, `explode_bitmap` " + "and `explode_json_array_xx` table functions"; public void setIsAnalyticFnCall(boolean v) { diff --git a/fe/fe-core/src/main/java/org/apache/doris/analysis/LateralViewRef.java b/fe/fe-core/src/main/java/org/apache/doris/analysis/LateralViewRef.java index c22c8431835abd..83268549372088 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/analysis/LateralViewRef.java +++ b/fe/fe-core/src/main/java/org/apache/doris/analysis/LateralViewRef.java @@ -17,8 +17,6 @@ package org.apache.doris.analysis; -import com.google.common.base.Preconditions; -import com.google.common.collect.Lists; import org.apache.doris.catalog.Column; import org.apache.doris.catalog.InlineView; import org.apache.doris.common.AnalysisException; From 34550e0a55fc5ee44ddbe7df48a19a2fdb8c4a23 Mon Sep 17 00:00:00 2001 From: morningman Date: Sat, 11 Dec 2021 22:41:05 +0800 Subject: [PATCH 09/11] review2 --- .../exprs/table_function/explode_bitmap.cpp | 19 ++++++------------- be/src/exprs/table_function/explode_bitmap.h | 1 - .../exprs/table_function/explode_json_array.h | 4 ---- be/src/exprs/table_function/explode_split.h | 4 ---- be/src/exprs/table_function/table_function.h | 4 ++++ 5 files changed, 10 insertions(+), 22 deletions(-) diff --git a/be/src/exprs/table_function/explode_bitmap.cpp b/be/src/exprs/table_function/explode_bitmap.cpp index 4749494667a485..67fe561346f932 100644 --- a/be/src/exprs/table_function/explode_bitmap.cpp +++ b/be/src/exprs/table_function/explode_bitmap.cpp @@ -29,10 +29,6 @@ ExplodeBitmapTableFunction::~ExplodeBitmapTableFunction() { delete _cur_iter; _cur_iter = nullptr; } - if (_cur_iter_end != nullptr) { - delete _cur_iter_end; - _cur_iter_end = nullptr; - } if (_cur_bitmap_owned && _cur_bitmap != nullptr) { delete _cur_bitmap; _cur_bitmap = nullptr; @@ -51,6 +47,8 @@ Status ExplodeBitmapTableFunction::process(TupleRow* tuple_row) { CHECK(1 == _expr_context->root()->get_num_children()) << _expr_context->root()->get_num_children(); _eos = false; _is_current_empty = false; + _cur_size = 0; + _cur_offset = 0; StringVal bitmap_str = _expr_context->root()->get_child(0)->get_string_val(_expr_context, tuple_row); if (bitmap_str.is_null) { @@ -63,7 +61,8 @@ Status ExplodeBitmapTableFunction::process(TupleRow* tuple_row) { _cur_bitmap = new BitmapValue((char*) bitmap_str.ptr); _cur_bitmap_owned = true; } - if (_cur_bitmap->cardinality() == 0) { + _cur_size = _cur_bitmap->cardinality(); + if (_cur_size == 0) { _is_current_empty = true; } else { _reset_iterator(); @@ -80,13 +79,6 @@ void ExplodeBitmapTableFunction::_reset_iterator() { _cur_iter = nullptr; } _cur_iter = new BitmapValueIterator(*_cur_bitmap); - - if (_cur_iter_end != nullptr) { - delete _cur_iter_end; - _cur_iter_end = nullptr; - } - _cur_iter_end = new BitmapValueIterator(*_cur_bitmap, true); - _cur_value = **_cur_iter; } @@ -117,7 +109,8 @@ Status ExplodeBitmapTableFunction::forward(bool* eos) { _eos = true; } else { ++(*_cur_iter); - if (*_cur_iter == *_cur_iter_end) { + ++_cur_offset; + if (_cur_offset == _cur_size) { *eos = true; _eos = true; } else { diff --git a/be/src/exprs/table_function/explode_bitmap.h b/be/src/exprs/table_function/explode_bitmap.h index 818de9c386f3e7..b491eeae860d41 100644 --- a/be/src/exprs/table_function/explode_bitmap.h +++ b/be/src/exprs/table_function/explode_bitmap.h @@ -48,7 +48,6 @@ class ExplodeBitmapTableFunction : public TableFunction { bool _cur_bitmap_owned = false; // iterator of _cur_bitmap BitmapValueIterator* _cur_iter = nullptr; - BitmapValueIterator* _cur_iter_end = nullptr; // current value read from bitmap, it will be referenced by // table function scan node. uint64_t _cur_value = 0; diff --git a/be/src/exprs/table_function/explode_json_array.h b/be/src/exprs/table_function/explode_json_array.h index 3a5589e575ec6a..ae5c6578b3a856 100644 --- a/be/src/exprs/table_function/explode_json_array.h +++ b/be/src/exprs/table_function/explode_json_array.h @@ -125,10 +125,6 @@ class ExplodeJsonArrayTableFunction : public TableFunction { ParsedData _parsed_data; ExplodeJsonArrayType _type; - // the size of _data - int64_t _cur_size = 0; - // current consumed offset of _data - int64_t _cur_offset = 0; }; } // namespace doris diff --git a/be/src/exprs/table_function/explode_split.h b/be/src/exprs/table_function/explode_split.h index f62c0cfbc60cc2..ad80671954ed31 100644 --- a/be/src/exprs/table_function/explode_split.h +++ b/be/src/exprs/table_function/explode_split.h @@ -45,10 +45,6 @@ class ExplodeSplitTableFunction : public TableFunction { // the `_backup` saved the real string entity. std::vector _data; std::vector _backup; - // the size of _data - int64_t _cur_size = 0; - // current consumed offset of _data - int64_t _cur_offset = 0; // indicate whether the delimiter is constant. // if true, the constant delimiter will be saved in `_const_delimter` diff --git a/be/src/exprs/table_function/table_function.h b/be/src/exprs/table_function/table_function.h index 55b9a52cc57690..a2b8b00d1fa793 100644 --- a/be/src/exprs/table_function/table_function.h +++ b/be/src/exprs/table_function/table_function.h @@ -56,6 +56,10 @@ class TableFunction { // true means the function result set from current row is empty(eg, source value is null or empty). // so that when calling reset(), we can do nothing and keep eos as true. bool _is_current_empty = false; + // the position of current cursor + int64_t _cur_offset = 0; + // the size of current result + int64_t _cur_size = 0; }; } // namespace doris From ded6e8b034af1fda6003ca0ded51be5f04a6cde7 Mon Sep 17 00:00:00 2001 From: morningman Date: Sat, 11 Dec 2021 23:18:30 +0800 Subject: [PATCH 10/11] review 3 --- be/src/exec/table_function_node.cpp | 3 +++ 1 file changed, 3 insertions(+) diff --git a/be/src/exec/table_function_node.cpp b/be/src/exec/table_function_node.cpp index f8806980cb0a37..d192354bb282ff 100644 --- a/be/src/exec/table_function_node.cpp +++ b/be/src/exec/table_function_node.cpp @@ -176,6 +176,9 @@ bool TableFunctionNode::_roll_table_functions(int last_eos_idx) { return true; } +// There are 2 while loops in this method. +// The outer loop is to get the next batch from child node. +// And the inner loop is to expand the row by table functions, and output row by row. Status TableFunctionNode::get_next(RuntimeState* state, RowBatch* row_batch, bool* eos) { RETURN_IF_ERROR(exec_debug_action(TExecNodePhase::GETNEXT)); SCOPED_TIMER(_runtime_profile->total_time_counter()); From 428eb1cc3416a062c1f1ec764ce894b505e8e2c4 Mon Sep 17 00:00:00 2001 From: morningman Date: Wed, 15 Dec 2021 11:58:30 +0800 Subject: [PATCH 11/11] add ut --- .../doris/planner/TableFunctionPlanTest.java | 45 ++++++++++++++++--- 1 file changed, 40 insertions(+), 5 deletions(-) diff --git a/fe/fe-core/src/test/java/org/apache/doris/planner/TableFunctionPlanTest.java b/fe/fe-core/src/test/java/org/apache/doris/planner/TableFunctionPlanTest.java index 72740b7734f912..861707709663f3 100644 --- a/fe/fe-core/src/test/java/org/apache/doris/planner/TableFunctionPlanTest.java +++ b/fe/fe-core/src/test/java/org/apache/doris/planner/TableFunctionPlanTest.java @@ -55,6 +55,11 @@ public static void setUp() throws Exception { + "DUPLICATE KEY(k1) distributed by hash(k1) buckets 3 properties('replication_num' = '1');"; CreateTableStmt createTableStmt = (CreateTableStmt) UtFrameUtils.parseAndAnalyzeStmt(createTblStmtStr, ctx); Catalog.getCurrentCatalog().createTable(createTableStmt); + + createTblStmtStr = "create table db1.tbl2(k1 int, k2 varchar, v1 bitmap bitmap_union) " + + "distributed by hash(k1) buckets 3 properties('replication_num' = '1');"; + createTableStmt = (CreateTableStmt) UtFrameUtils.parseAndAnalyzeStmt(createTblStmtStr, ctx); + Catalog.getCurrentCatalog().createTable(createTableStmt); } // test planner @@ -381,29 +386,59 @@ public void aggColumnInlineViewInTB() throws Exception { Assert.assertTrue(explainString.contains("tuple ids: 1 3")); String formatString = explainString.replaceAll(" ", ""); Assert.assertTrue(formatString.contains( - "SlotDescriptor{id=0,col=k1,type=INT}\n" + "SlotDescriptor{id=0,col=k1,type=INT}\n" + "parent=0\n" + "materialized=true" )); Assert.assertTrue(formatString.contains( - "SlotDescriptor{id=1,col=k2,type=VARCHAR(*)}\n" + "SlotDescriptor{id=1,col=k2,type=VARCHAR(*)}\n" + "parent=0\n" + "materialized=true" )); Assert.assertTrue(formatString.contains( - "SlotDescriptor{id=2,col=null,type=INT}\n" + "SlotDescriptor{id=2,col=null,type=INT}\n" + "parent=1\n" + "materialized=true" )); Assert.assertTrue(formatString.contains( - "SlotDescriptor{id=3,col=null,type=VARCHAR(*)}\n" + "SlotDescriptor{id=3,col=null,type=VARCHAR(*)}\n" + "parent=1\n" + "materialized=true" )); Assert.assertTrue(formatString.contains( - "SlotDescriptor{id=6,col=e1,type=VARCHAR(*)}\n" + "SlotDescriptor{id=6,col=e1,type=VARCHAR(*)}\n" + "parent=3\n" + "materialized=true" )); } + + @Test + public void testExplodeBitmap() throws Exception { + String sql = "desc select k1, e1 from db1.tbl2 lateral view explode_bitmap(v1) tmp1 as e1 "; + String explainString = UtFrameUtils.getSQLPlanOrErrorMsg(ctx, sql, true); + System.out.println(explainString); + Assert.assertTrue(explainString.contains("table function: explode_bitmap(`default_cluster:db1`.`tbl2`.`v1`)")); + Assert.assertTrue(explainString.contains("output slot id: 1 2")); + } + + @Test + public void testExplodeJsonArray() throws Exception { + String sql = "desc select k1, e1 from db1.tbl2 lateral view explode_json_array_int('[1,2,3]') tmp1 as e1 "; + String explainString = UtFrameUtils.getSQLPlanOrErrorMsg(ctx, sql, true); + System.out.println(explainString); + Assert.assertTrue(explainString.contains("table function: explode_json_array_int('[1,2,3]')")); + Assert.assertTrue(explainString.contains("output slot id: 0 1")); + + sql = "desc select k1, e1 from db1.tbl2 lateral view explode_json_array_string('[\"a\",\"b\",\"c\"]') tmp1 as e1 "; + explainString = UtFrameUtils.getSQLPlanOrErrorMsg(ctx, sql, true); + System.out.println(explainString); + Assert.assertTrue(explainString.contains("table function: explode_json_array_string('[\"a\",\"b\",\"c\"]')")); + Assert.assertTrue(explainString.contains("output slot id: 0 1")); + + sql = "desc select k1, e1 from db1.tbl2 lateral view explode_json_array_double('[1.1, 2.2, 3.3]') tmp1 as e1 "; + explainString = UtFrameUtils.getSQLPlanOrErrorMsg(ctx, sql, true); + System.out.println(explainString); + Assert.assertTrue(explainString.contains("table function: explode_json_array_double('[1.1, 2.2, 3.3]')")); + Assert.assertTrue(explainString.contains("output slot id: 0 1")); + } }