From 84ef398265669b4c40f00d000b1226421cec675d Mon Sep 17 00:00:00 2001 From: Gabriel Date: Tue, 10 Oct 2023 16:04:29 +0800 Subject: [PATCH 1/4] update --- .../src/main/java/org/apache/doris/qe/SessionVariable.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fe/fe-core/src/main/java/org/apache/doris/qe/SessionVariable.java b/fe/fe-core/src/main/java/org/apache/doris/qe/SessionVariable.java index a236918e18d7b6..5b97b588464d0e 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/qe/SessionVariable.java +++ b/fe/fe-core/src/main/java/org/apache/doris/qe/SessionVariable.java @@ -702,7 +702,7 @@ public class SessionVariable implements Serializable, Writable { private boolean enablePipelineEngine = true; @VariableMgr.VarAttr(name = ENABLE_PIPELINE_X_ENGINE, fuzzy = false, varType = VariableAnnotation.EXPERIMENTAL) - private boolean enablePipelineXEngine = false; + private boolean enablePipelineXEngine = true; @VariableMgr.VarAttr(name = ENABLE_AGG_STATE, fuzzy = false, varType = VariableAnnotation.EXPERIMENTAL) public boolean enableAggState = false; From 4b6d83c1f5b49743e4f7e55331be5e9b27173ba5 Mon Sep 17 00:00:00 2001 From: Gabriel Date: Tue, 10 Oct 2023 16:53:44 +0800 Subject: [PATCH 2/4] [coverage](test) Delete unused function to improve test coverage --- be/src/runtime/datetime_value.h | 78 -------------- be/src/vec/common/schema_util.cpp | 102 ------------------ be/src/vec/common/schema_util.h | 5 - be/src/vec/core/materialize_block.cpp | 13 --- be/src/vec/core/materialize_block.h | 1 - be/src/vec/core/sort_block.cpp | 90 ---------------- be/src/vec/core/sort_block.h | 18 ---- be/src/vec/core/types.h | 1 + .../org/apache/doris/qe/SessionVariable.java | 2 +- 9 files changed, 2 insertions(+), 308 deletions(-) diff --git a/be/src/runtime/datetime_value.h b/be/src/runtime/datetime_value.h index 88279af1b0e422..d63bb53ab733dc 100644 --- a/be/src/runtime/datetime_value.h +++ b/be/src/runtime/datetime_value.h @@ -17,21 +17,6 @@ #pragma once -#include -#include - -// IWYU pragma: no_include -#include -#include -#include - -#include "cctz/civil_time.h" -#include "cctz/time_zone.h" -#include "udf/udf.h" -#include "util/hash_util.hpp" -#include "util/timezone_utils.h" -#include "vec/runtime/vdatetime_value.h" - namespace doris { enum TimeUnit { @@ -57,72 +42,9 @@ enum TimeUnit { YEAR_MONTH }; -struct TimeInterval { - int64_t year; - int64_t month; - int64_t day; - int64_t hour; - int64_t minute; - int64_t second; - int64_t microsecond; - bool is_neg; - - TimeInterval() - : year(0), - month(0), - day(0), - hour(0), - minute(0), - second(0), - microsecond(0), - is_neg(false) {} - - TimeInterval(TimeUnit unit, int64_t count, bool is_neg_param) - : year(0), - month(0), - day(0), - hour(0), - minute(0), - second(0), - microsecond(0), - is_neg(is_neg_param) { - switch (unit) { - case YEAR: - year = count; - break; - case MONTH: - month = count; - break; - case WEEK: - day = 7 * count; - break; - case DAY: - day = count; - break; - case HOUR: - hour = count; - break; - case MINUTE: - minute = count; - break; - case SECOND: - second = count; - break; - case MICROSECOND: - microsecond = count; - break; - default: - break; - } - } -}; - enum TimeType { TIME_TIME = 1, TIME_DATE = 2, TIME_DATETIME = 3 }; // 9999-99-99 99:99:99.999999; 26 + 1('\0') const int MAX_DTVALUE_STR_LEN = 27; -constexpr size_t const_length(const char* str) { - return (str == nullptr || *str == 0) ? 0 : const_length(str + 1) + 1; -} } // namespace doris diff --git a/be/src/vec/common/schema_util.cpp b/be/src/vec/common/schema_util.cpp index 55edd72c13fbf3..c18ec7aa221e06 100644 --- a/be/src/vec/common/schema_util.cpp +++ b/be/src/vec/common/schema_util.cpp @@ -154,48 +154,6 @@ Status cast_column(const ColumnWithTypeAndName& arg, const DataTypePtr& type, Co return Status::OK(); } -static void get_column_def(const vectorized::DataTypePtr& data_type, const std::string& name, - TColumnDef* column) { - if (!name.empty()) { - column->columnDesc.__set_columnName(name); - } - if (data_type->is_nullable()) { - const auto& real_type = static_cast(*data_type); - column->columnDesc.__set_isAllowNull(true); - get_column_def(real_type.get_nested_type(), "", column); - return; - } - column->columnDesc.__set_columnType(data_type->get_type_as_tprimitive_type()); - if (data_type->get_type_id() == TypeIndex::Array) { - TColumnDef child; - column->columnDesc.__set_children({}); - get_column_def(assert_cast(data_type.get())->get_nested_type(), "", - &child); - column->columnDesc.columnLength = - TabletColumn::get_field_length_by_type(column->columnDesc.columnType, 0); - column->columnDesc.children.push_back(child.columnDesc); - return; - } - if (data_type->get_type_id() == TypeIndex::Tuple) { - // TODO - // auto tuple_type = assert_cast(data_type.get()); - // DCHECK_EQ(tuple_type->get_elements().size(), tuple_type->get_element_names().size()); - // for (size_t i = 0; i < tuple_type->get_elements().size(); ++i) { - // TColumnDef child; - // get_column_def(tuple_type->get_element(i), tuple_type->get_element_names()[i], &child); - // column->columnDesc.children.push_back(child.columnDesc); - // } - // return; - } - if (data_type->get_type_id() == TypeIndex::String) { - return; - } - if (WhichDataType(*data_type).is_simple()) { - column->columnDesc.__set_columnLength(data_type->get_size_of_value_in_memory()); - return; - } -} - // send an empty add columns rpc, the rpc response will fill with base schema info // maybe we could seperate this rpc from add columns rpc Status send_fetch_full_base_schema_view_rpc(FullBaseSchemaView* schema_view) { @@ -232,66 +190,6 @@ Status send_fetch_full_base_schema_view_rpc(FullBaseSchemaView* schema_view) { static const std::regex COLUMN_NAME_REGEX( "^[_a-zA-Z@0-9\\s<>/][.a-zA-Z0-9_+-/>table_name); - req.__set_db_name(schema_view->db_name); - req.__set_table_id(schema_view->table_id); - // TODO(lhy) more configurable - req.__set_allow_type_conflict(true); - req.__set_addColumns({}); - // Deduplicate Column like `Level` and `level` - // TODO we will implement new version of dynamic column soon to handle this issue, - // also ignore column missmatch with regex - std::set dedup; - for (const auto& column_type_name : column_type_names) { - if (dedup.contains(to_lower(column_type_name.name))) { - continue; - } - if (!std::regex_match(column_type_name.name, COLUMN_NAME_REGEX)) { - continue; - } - dedup.insert(to_lower(column_type_name.name)); - TColumnDef col; - get_column_def(column_type_name.type, column_type_name.name, &col); - req.addColumns.push_back(col); - } - auto master_addr = ExecEnv::GetInstance()->master_info()->network_address; - Status rpc_st = ThriftRpcHelper::rpc( - master_addr.hostname, master_addr.port, - [&req, &res](FrontendServiceConnection& client) { client->addColumns(res, req); }, - config::txn_commit_rpc_timeout_ms); - if (!rpc_st.ok()) { - return Status::InternalError("Failed to do schema change, rpc error"); - } - // TODO(lhy) handle more status code - if (res.status.status_code != TStatusCode::OK) { - LOG(WARNING) << "failed to do schema change, code:" << res.status.status_code - << ", msg:" << res.status.error_msgs[0]; - return Status::InvalidArgument( - fmt::format("Failed to do schema change, {}", res.status.error_msgs[0])); - } - size_t sz = res.allColumns.size(); - if (sz < dedup.size()) { - return Status::InternalError( - fmt::format("Unexpected result columns {}, expected at least {}", - res.allColumns.size(), column_type_names.size())); - } - for (const auto& column : res.allColumns) { - schema_view->column_name_to_column[column.column_name] = column; - } - schema_view->schema_version = res.schema_version; - return Status::OK(); -} - Status unfold_object(size_t dynamic_col_position, Block& block, bool cast_to_original_type, RuntimeState* state) { auto dynamic_col = block.get_by_position(dynamic_col_position).column->assume_mutable(); diff --git a/be/src/vec/common/schema_util.h b/be/src/vec/common/schema_util.h index 8a5b1423d6938d..634dae8339a8eb 100644 --- a/be/src/vec/common/schema_util.h +++ b/be/src/vec/common/schema_util.h @@ -96,13 +96,8 @@ struct FullBaseSchemaView { int32_t table_id = 0; std::string table_name; std::string db_name; - - bool empty() { return column_name_to_column.empty() && schema_version == -1; } }; -Status send_add_columns_rpc(ColumnsWithTypeAndName column_type_names, - FullBaseSchemaView* schema_view); - Status send_fetch_full_base_schema_view_rpc(FullBaseSchemaView* schema_view); // For tracking local schema change during load procedure diff --git a/be/src/vec/core/materialize_block.cpp b/be/src/vec/core/materialize_block.cpp index b48d140f325acb..cfa3047cbb414f 100644 --- a/be/src/vec/core/materialize_block.cpp +++ b/be/src/vec/core/materialize_block.cpp @@ -27,19 +27,6 @@ namespace doris::vectorized { -Block materialize_block(const Block& block) { - if (!block.columns()) return block; - - Block res = block; - size_t columns = res.columns(); - for (size_t i = 0; i < columns; ++i) { - auto& element = res.get_by_position(i); - element.column = element.column->convert_to_full_column_if_const(); - } - - return res; -} - void materialize_block_inplace(Block& block) { for (size_t i = 0; i < block.columns(); ++i) { block.replace_by_position_if_const(i); diff --git a/be/src/vec/core/materialize_block.h b/be/src/vec/core/materialize_block.h index d445d0eb43dff6..31126a37c0b288 100644 --- a/be/src/vec/core/materialize_block.h +++ b/be/src/vec/core/materialize_block.h @@ -26,7 +26,6 @@ namespace doris::vectorized { /** Converts columns-constants to full columns ("materializes" them). */ -Block materialize_block(const Block& block); void materialize_block_inplace(Block& block); template diff --git a/be/src/vec/core/sort_block.cpp b/be/src/vec/core/sort_block.cpp index be1068962ec486..91ca3d36430a10 100644 --- a/be/src/vec/core/sort_block.cpp +++ b/be/src/vec/core/sort_block.cpp @@ -42,26 +42,6 @@ ColumnsWithSortDescriptions get_columns_with_sort_description(const Block& block return res; } -struct PartialSortingLess { - const ColumnsWithSortDescriptions& columns; - - explicit PartialSortingLess(const ColumnsWithSortDescriptions& columns_) : columns(columns_) {} - - bool operator()(size_t a, size_t b) const { - for (ColumnsWithSortDescriptions::const_iterator it = columns.begin(); it != columns.end(); - ++it) { - int res = it->second.direction * - it->first->compare_at(a, b, *it->first, it->second.nulls_direction); - if (res < 0) { - return true; - } else if (res > 0) { - return false; - } - } - return false; - } -}; - void sort_block(Block& src_block, Block& dest_block, const SortDescription& description, UInt64 limit) { if (!src_block.columns()) { @@ -117,74 +97,4 @@ void sort_block(Block& src_block, Block& dest_block, const SortDescription& desc } } -void stable_get_permutation(const Block& block, const SortDescription& description, - IColumn::Permutation& out_permutation) { - if (!block.columns()) { - return; - } - - size_t size = block.rows(); - out_permutation.resize(size); - for (size_t i = 0; i < size; ++i) { - out_permutation[i] = i; - } - - ColumnsWithSortDescriptions columns_with_sort_desc = - get_columns_with_sort_description(block, description); - - std::stable_sort(out_permutation.begin(), out_permutation.end(), - PartialSortingLess(columns_with_sort_desc)); -} - -bool is_already_sorted(const Block& block, const SortDescription& description) { - if (!block.columns()) { - return true; - } - - size_t rows = block.rows(); - - ColumnsWithSortDescriptions columns_with_sort_desc = - get_columns_with_sort_description(block, description); - - PartialSortingLess less(columns_with_sort_desc); - - /** If the rows are not too few, then let's make a quick attempt to verify that the block is not sorted. - * Constants - at random. - */ - static constexpr size_t num_rows_to_try = 10; - if (rows > num_rows_to_try * 5) { - for (size_t i = 1; i < num_rows_to_try; ++i) { - size_t prev_position = rows * (i - 1) / num_rows_to_try; - size_t curr_position = rows * i / num_rows_to_try; - - if (less(curr_position, prev_position)) { - return false; - } - } - } - - for (size_t i = 1; i < rows; ++i) { - if (less(i, i - 1)) { - return false; - } - } - - return true; -} - -void stable_sort_block(Block& block, const SortDescription& description) { - if (!block.columns()) { - return; - } - - IColumn::Permutation perm; - stable_get_permutation(block, description, perm); - - size_t columns = block.columns(); - for (size_t i = 0; i < columns; ++i) { - block.safe_get_by_position(i).column = - block.safe_get_by_position(i).column->permute(perm, 0); - } -} - } // namespace doris::vectorized diff --git a/be/src/vec/core/sort_block.h b/be/src/vec/core/sort_block.h index 6b24c160dcb339..db3592e9a72d3f 100644 --- a/be/src/vec/core/sort_block.h +++ b/be/src/vec/core/sort_block.h @@ -56,24 +56,6 @@ namespace doris::vectorized { void sort_block(Block& src_block, Block& dest_block, const SortDescription& description, UInt64 limit = 0); -/** Used only in StorageMergeTree to sort the data with INSERT. - * Sorting is stable. This is important for keeping the order of rows in the CollapsingMergeTree engine - * - because based on the order of rows it is determined whether to delete or leave groups of rows when collapsing. - * Collations are not supported. Partial sorting is not supported. - */ -void stable_sort_block(Block& block, const SortDescription& description); - -/** Same as stable_sort_block, but do not sort the block, but only calculate the permutation of the values, - * so that you can rearrange the column values yourself. - */ -void stable_get_permutation(const Block& block, const SortDescription& description, - IColumn::Permutation& out_permutation); - -/** Quickly check whether the block is already sorted. If the block is not sorted - returns false as fast as possible. - * Collations are not supported. - */ -bool is_already_sorted(const Block& block, const SortDescription& description); - using ColumnWithSortDescription = std::pair; using ColumnsWithSortDescriptions = std::vector; diff --git a/be/src/vec/core/types.h b/be/src/vec/core/types.h index 51c41f8662ccd2..abb5c9255c641f 100644 --- a/be/src/vec/core/types.h +++ b/be/src/vec/core/types.h @@ -37,6 +37,7 @@ class QuantileState; struct decimal12_t; struct uint24_t; +struct StringRef; namespace vectorized { diff --git a/fe/fe-core/src/main/java/org/apache/doris/qe/SessionVariable.java b/fe/fe-core/src/main/java/org/apache/doris/qe/SessionVariable.java index 5b97b588464d0e..a236918e18d7b6 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/qe/SessionVariable.java +++ b/fe/fe-core/src/main/java/org/apache/doris/qe/SessionVariable.java @@ -702,7 +702,7 @@ public class SessionVariable implements Serializable, Writable { private boolean enablePipelineEngine = true; @VariableMgr.VarAttr(name = ENABLE_PIPELINE_X_ENGINE, fuzzy = false, varType = VariableAnnotation.EXPERIMENTAL) - private boolean enablePipelineXEngine = true; + private boolean enablePipelineXEngine = false; @VariableMgr.VarAttr(name = ENABLE_AGG_STATE, fuzzy = false, varType = VariableAnnotation.EXPERIMENTAL) public boolean enableAggState = false; From f7062cceecef9e320407c7b9f9c4bbdfd7ca5e0a Mon Sep 17 00:00:00 2001 From: Gabriel Date: Tue, 10 Oct 2023 21:57:14 +0800 Subject: [PATCH 3/4] update --- be/src/runtime/runtime_state.cpp | 5 +++++ be/src/runtime/runtime_state.h | 5 +---- 2 files changed, 6 insertions(+), 4 deletions(-) diff --git a/be/src/runtime/runtime_state.cpp b/be/src/runtime/runtime_state.cpp index e43b8777c8671e..5e387ed16717a9 100644 --- a/be/src/runtime/runtime_state.cpp +++ b/be/src/runtime/runtime_state.cpp @@ -452,4 +452,9 @@ std::shared_ptr RuntimeState::get_ return _op_id_to_sink_local_state[id]; } +bool RuntimeState::enable_page_cache() const { + return !config::disable_storage_page_cache && + (_query_options.__isset.enable_page_cache && _query_options.enable_page_cache); +} + } // end namespace doris diff --git a/be/src/runtime/runtime_state.h b/be/src/runtime/runtime_state.h index 02c048052a4595..9396c41d96d63a 100644 --- a/be/src/runtime/runtime_state.h +++ b/be/src/runtime/runtime_state.h @@ -356,10 +356,7 @@ class RuntimeState { return _query_options.__isset.skip_delete_bitmap && _query_options.skip_delete_bitmap; } - bool enable_page_cache() const { - return !config::disable_storage_page_cache && - (_query_options.__isset.enable_page_cache && _query_options.enable_page_cache); - } + bool enable_page_cache() const; int partitioned_hash_join_rows_threshold() const { if (!_query_options.__isset.partitioned_hash_join_rows_threshold) { From 735865a4f6b46bc3a027e9140fa3435c9f6e6ad3 Mon Sep 17 00:00:00 2001 From: Gabriel Date: Wed, 11 Oct 2023 09:02:13 +0800 Subject: [PATCH 4/4] update --- be/src/vec/exec/format/parquet/parquet_pred_cmp.h | 3 +++ 1 file changed, 3 insertions(+) diff --git a/be/src/vec/exec/format/parquet/parquet_pred_cmp.h b/be/src/vec/exec/format/parquet/parquet_pred_cmp.h index c76fa95f4ad4a8..b993370a159895 100644 --- a/be/src/vec/exec/format/parquet/parquet_pred_cmp.h +++ b/be/src/vec/exec/format/parquet/parquet_pred_cmp.h @@ -20,9 +20,12 @@ #include #include +#include "cctz/civil_time.h" +#include "cctz/time_zone.h" #include "exec/olap_common.h" #include "gutil/endian.h" #include "parquet_common.h" +#include "util/timezone_utils.h" #include "vec/data_types/data_type_decimal.h" #include "vec/exec/format/format_common.h" #include "vec/exec/format/parquet/schema_desc.h"