diff --git a/be/src/agent/agent_server.cpp b/be/src/agent/agent_server.cpp index b7cfc91cbcec98..54537f9a24e3bd 100644 --- a/be/src/agent/agent_server.cpp +++ b/be/src/agent/agent_server.cpp @@ -329,8 +329,8 @@ void AgentServer::submit_tasks( break; case TTaskType::ROLLUP: case TTaskType::SCHEMA_CHANGE: - case TTaskType::ALTER_TASK: - if (task.__isset.alter_tablet_req) { + case TTaskType::ALTER: + if (task.__isset.alter_tablet_req || task.__isset.alter_tablet_req_v2) { _alter_tablet_workers->submit_task(task); } else { status_code = TStatusCode::ANALYSIS_ERROR; diff --git a/be/src/agent/task_worker_pool.cpp b/be/src/agent/task_worker_pool.cpp index 3bba1abcd03903..a4aaa9ddb78a94 100644 --- a/be/src/agent/task_worker_pool.cpp +++ b/be/src/agent/task_worker_pool.cpp @@ -554,7 +554,7 @@ void* TaskWorkerPool::_alter_tablet_worker_thread_callback(void* arg_this) { int64_t time_elapsed = time(nullptr) - agent_task_req.recv_time; if (time_elapsed > config::report_task_interval_seconds * 20) { LOG(INFO) << "task elapsed " << time_elapsed - << " since it is inserted to queue, it is timeout"; + << " seconds since it is inserted to queue, it is timeout"; is_task_timeout = true; } } @@ -564,7 +564,7 @@ void* TaskWorkerPool::_alter_tablet_worker_thread_callback(void* arg_this) { switch (task_type) { case TTaskType::SCHEMA_CHANGE: case TTaskType::ROLLUP: - case TTaskType::ALTER_TASK: + case TTaskType::ALTER: worker_pool_this->_alter_tablet(worker_pool_this, agent_task_req, signatrue, @@ -602,8 +602,8 @@ void TaskWorkerPool::_alter_tablet( case TTaskType::SCHEMA_CHANGE: process_name = "schema change"; break; - case TTaskType::ALTER_TASK: - process_name = "alter table"; + case TTaskType::ALTER: + process_name = "alter"; break; default: std::string task_name; @@ -621,7 +621,7 @@ void TaskWorkerPool::_alter_tablet( TSchemaHash new_schema_hash = 0; if (status == DORIS_SUCCESS) { OLAPStatus sc_status = OLAP_SUCCESS; - if (task_type == TTaskType::ALTER_TASK) { + if (task_type == TTaskType::ALTER) { new_tablet_id = agent_task_req.alter_tablet_req_v2.new_tablet_id; new_schema_hash = agent_task_req.alter_tablet_req_v2.new_schema_hash; EngineAlterTabletTask engine_task(agent_task_req.alter_tablet_req_v2, signature, task_type, &error_msgs, process_name); diff --git a/be/src/exec/tablet_sink.h b/be/src/exec/tablet_sink.h index 6ceeac63d5db11..00776046c03319 100644 --- a/be/src/exec/tablet_sink.h +++ b/be/src/exec/tablet_sink.h @@ -219,7 +219,7 @@ class OlapTableSink : public DataSink { int64_t _db_id = -1; int64_t _table_id = -1; int _num_repicas = -1; - bool _need_gen_rollup = true; + bool _need_gen_rollup = false; std::string _db_name; std::string _table_name; int _tuple_desc_id = -1; diff --git a/be/src/exprs/timestamp_functions.cpp b/be/src/exprs/timestamp_functions.cpp index 2b62c3258a8d67..115f1544fe0a65 100644 --- a/be/src/exprs/timestamp_functions.cpp +++ b/be/src/exprs/timestamp_functions.cpp @@ -391,6 +391,7 @@ DateTimeVal TimestampFunctions::timestamp( return val; } +// FROM_UNIXTIME() StringVal TimestampFunctions::from_unix( FunctionContext* context, const IntVal& unix_time) { if (unix_time.is_null) { @@ -405,6 +406,7 @@ StringVal TimestampFunctions::from_unix( return AnyValUtil::from_string_temp(context, buf); } +// FROM_UNIXTIME() StringVal TimestampFunctions::from_unix( FunctionContext* context, const IntVal& unix_time, const StringVal& fmt) { if (unix_time.is_null || fmt.is_null) { @@ -422,10 +424,12 @@ StringVal TimestampFunctions::from_unix( return AnyValUtil::from_string_temp(context, buf); } +// UNIX_TIMESTAMP() IntVal TimestampFunctions::to_unix(FunctionContext* context) { return IntVal(context->impl()->state()->timestamp_ms() / 1000); } +// UNIX_TIMESTAMP() IntVal TimestampFunctions::to_unix( FunctionContext* context, const StringVal& string_val, const StringVal& fmt) { if (string_val.is_null || fmt.is_null) { diff --git a/be/src/olap/data_dir.cpp b/be/src/olap/data_dir.cpp index a900bbf75594f6..cf66348184120a 100755 --- a/be/src/olap/data_dir.cpp +++ b/be/src/olap/data_dir.cpp @@ -1026,10 +1026,10 @@ void DataDir::perform_path_scan() { void DataDir::_process_garbage_path(const std::string& path) { if (check_dir_existed(path)) { - LOG(INFO) << "collect garbage dir path:" << path; + LOG(INFO) << "collect garbage dir path: " << path; OLAPStatus status = remove_all_dir(path); if (status != OLAP_SUCCESS) { - LOG(WARNING) << "remove garbage dir path:" << path << " failed"; + LOG(WARNING) << "remove garbage dir path: " << path << " failed"; } } } diff --git a/be/src/olap/delta_writer.cpp b/be/src/olap/delta_writer.cpp index 7a78a48c659981..6499981607d681 100644 --- a/be/src/olap/delta_writer.cpp +++ b/be/src/olap/delta_writer.cpp @@ -130,19 +130,9 @@ OLAPStatus DeltaWriter::init() { writer_context.load_id = _req.load_id; RETURN_NOT_OK(RowsetFactory::create_rowset_writer(writer_context, &_rowset_writer)); - const std::vector& slots = _req.tuple_desc->slots(); - const TabletSchema& schema = _tablet->tablet_schema(); - for (size_t col_id = 0; col_id < schema.num_columns(); ++col_id) { - const TabletColumn& column = schema.column(col_id); - for (size_t i = 0; i < slots.size(); ++i) { - if (slots[i]->col_name() == column.name()) { - _col_ids.push_back(i); - } - } - } _tablet_schema = &(_tablet->tablet_schema()); _schema = new Schema(*_tablet_schema); - _mem_table = new MemTable(_schema, _tablet_schema, &_col_ids, + _mem_table = new MemTable(_schema, _tablet_schema, _req.slots, _req.tuple_desc, _tablet->keys_type()); _is_init = true; return OLAP_SUCCESS; @@ -161,7 +151,7 @@ OLAPStatus DeltaWriter::write(Tuple* tuple) { RETURN_NOT_OK(_mem_table->flush(_rowset_writer.get())); SAFE_DELETE(_mem_table); - _mem_table = new MemTable(_schema, _tablet_schema, &_col_ids, + _mem_table = new MemTable(_schema, _tablet_schema, _req.slots, _req.tuple_desc, _tablet->keys_type()); } return OLAP_SUCCESS; diff --git a/be/src/olap/delta_writer.h b/be/src/olap/delta_writer.h index 86f8adb8a4cd52..f9d1587f5ee187 100644 --- a/be/src/olap/delta_writer.h +++ b/be/src/olap/delta_writer.h @@ -47,6 +47,8 @@ struct WriteRequest { PUniqueId load_id; bool need_gen_rollup; TupleDescriptor* tuple_desc; + // slots are in order of tablet's schema + const std::vector* slots; }; class DeltaWriter { @@ -76,7 +78,6 @@ class DeltaWriter { MemTable* _mem_table; Schema* _schema; const TabletSchema* _tablet_schema; - std::vector _col_ids; bool _delta_written_success; }; diff --git a/be/src/olap/memtable.cpp b/be/src/olap/memtable.cpp index 8aedd664a78f9d..a0054a89deeeda 100644 --- a/be/src/olap/memtable.cpp +++ b/be/src/olap/memtable.cpp @@ -27,12 +27,12 @@ namespace doris { MemTable::MemTable(Schema* schema, const TabletSchema* tablet_schema, - std::vector* col_ids, TupleDescriptor* tuple_desc, + const std::vector* slot_descs, TupleDescriptor* tuple_desc, KeysType keys_type) : _schema(schema), _tablet_schema(tablet_schema), _tuple_desc(tuple_desc), - _col_ids(col_ids), + _slot_descs(slot_descs), _keys_type(keys_type), _row_comparator(_schema) { _schema_size = _schema->schema_size(); @@ -58,11 +58,10 @@ size_t MemTable::memory_usage() { } void MemTable::insert(Tuple* tuple) { - const std::vector& slots = _tuple_desc->slots(); ContiguousRow row(_schema, _tuple_buf); - for (size_t i = 0; i < _col_ids->size(); ++i) { + for (size_t i = 0; i < _slot_descs->size(); ++i) { auto cell = row.cell(i); - const SlotDescriptor* slot = slots[(*_col_ids)[i]]; + const SlotDescriptor* slot = (*_slot_descs)[i]; bool is_null = tuple->is_null(slot->null_indicator_offset()); void* value = tuple->get_slot(slot->tuple_offset()); diff --git a/be/src/olap/memtable.h b/be/src/olap/memtable.h index 79aac7ed40c0ec..1a262ce3ddb750 100644 --- a/be/src/olap/memtable.h +++ b/be/src/olap/memtable.h @@ -32,7 +32,7 @@ class RowCursor; class MemTable { public: MemTable(Schema* schema, const TabletSchema* tablet_schema, - std::vector* col_ids, TupleDescriptor* tuple_desc, + const std::vector* slot_descs, TupleDescriptor* tuple_desc, KeysType keys_type); ~MemTable(); size_t memory_usage(); @@ -43,7 +43,8 @@ class MemTable { Schema* _schema; const TabletSchema* _tablet_schema; TupleDescriptor* _tuple_desc; - std::vector* _col_ids; + // the slot in _slot_descs are in order of tablet's schema + const std::vector* _slot_descs; KeysType _keys_type; struct RowCursorComparator { diff --git a/be/src/olap/schema_change.cpp b/be/src/olap/schema_change.cpp index 091d4897b8323d..9bf5bb01d9be81 100644 --- a/be/src/olap/schema_change.cpp +++ b/be/src/olap/schema_change.cpp @@ -981,6 +981,7 @@ bool SchemaChangeWithSorting::process( row_block_arr.push_back(new_row_block); } else { + LOG(INFO) << "new block num rows is: " << new_row_block->row_block_info().row_num; _row_block_allocator->release(new_row_block); new_row_block = nullptr; } @@ -1181,7 +1182,7 @@ OLAPStatus SchemaChangeHandler::_do_process_alter_tablet_v2(const TAlterTabletRe request.new_tablet_id, request.new_schema_hash); if (new_tablet == nullptr) { LOG(WARNING) << "fail to find new tablet." - << ", new_tablet=" << request.new_tablet_id + << " new_tablet=" << request.new_tablet_id << ", new_schema_hash=" << request.new_schema_hash; return OLAP_ERR_TABLE_NOT_FOUND; } @@ -1335,15 +1336,19 @@ OLAPStatus SchemaChangeHandler::_do_process_alter_tablet_v2(const TAlterTabletRe if (res != OLAP_SUCCESS) { break; } - res = _validate_alter_result(new_tablet, request); } while(0); + if (res == OLAP_SUCCESS) { + // _validate_alter_result should be outside the above while loop. + // to avoid requiring the header lock twice. + res = _validate_alter_result(new_tablet, request); + } + // if failed convert history data, then just remove the new tablet if (res != OLAP_SUCCESS) { LOG(WARNING) << "failed to alter tablet. base_tablet=" << base_tablet->full_name() << ", drop new_tablet=" << new_tablet->full_name(); // do not drop the new tablet and its data. GC thread will - // StorageEngine::instance()->tablet_manager()->drop_tablet(new_tablet->tablet_id(), new_tablet->schema_hash()); } return res; @@ -2134,7 +2139,7 @@ OLAPStatus SchemaChangeHandler::_validate_alter_result(TabletSharedPtr new_table Version max_continuous_version = {-1, 0}; VersionHash max_continuous_version_hash = 0; new_tablet->max_continuous_version_from_begining(&max_continuous_version, &max_continuous_version_hash); - LOG(INFO) << "find max continuous version " + LOG(INFO) << "find max continuous version of tablet=" << new_tablet->full_name() << ", start_version=" << max_continuous_version.first << ", end_version=" << max_continuous_version.second << ", version_hash=" << max_continuous_version_hash; diff --git a/be/src/olap/tablet.cpp b/be/src/olap/tablet.cpp index f4fc08ddd58bf9..b3d6694598aa5d 100644 --- a/be/src/olap/tablet.cpp +++ b/be/src/olap/tablet.cpp @@ -282,7 +282,8 @@ OLAPStatus Tablet::modify_rowsets(const vector& to_add, const RowsetSharedPtr Tablet::get_rowset_by_version(const Version& version) const { auto iter = _rs_version_map.find(version); if (iter == _rs_version_map.end()) { - LOG(INFO) << "no rowset for version:" << version.first << "-" << version.second; + LOG(INFO) << "no rowset for version:" << version.first << "-" << version.second + << ", tablet: " << full_name(); return nullptr; } RowsetSharedPtr rowset = iter->second; @@ -294,7 +295,8 @@ size_t Tablet::get_rowset_size_by_version(const Version& version) { << "invalid version:" << version.first << "-" << version.second; auto iter = _rs_version_map.find(version); if (iter == _rs_version_map.end()) { - LOG(WARNING) << "no rowset for version:" << version.first << "-" << version.second; + LOG(WARNING) << "no rowset for version:" << version.first << "-" << version.second + << ", tablet: " << full_name(); return -1; } RowsetSharedPtr rowset = iter->second; diff --git a/be/src/olap/task/engine_alter_tablet_task.cpp b/be/src/olap/task/engine_alter_tablet_task.cpp index 54fb1eadd911a9..057975c1d2e4a9 100644 --- a/be/src/olap/task/engine_alter_tablet_task.cpp +++ b/be/src/olap/task/engine_alter_tablet_task.cpp @@ -33,18 +33,13 @@ EngineAlterTabletTask::EngineAlterTabletTask(const TAlterTabletReqV2& request, _process_name(process_name) { } OLAPStatus EngineAlterTabletTask::execute() { - LOG(INFO) << "begin to create new alter tablet. base_tablet_id=" << _alter_tablet_req.base_tablet_id - << ", base_schema_hash=" << _alter_tablet_req.base_schema_hash - << ", new_tablet_id=" << _alter_tablet_req.new_tablet_id - << ", new_schema_hash=" << _alter_tablet_req.new_schema_hash; - DorisMetrics::create_rollup_requests_total.increment(1); SchemaChangeHandler handler; OLAPStatus res = handler.process_alter_tablet_v2(_alter_tablet_req); if (res != OLAP_SUCCESS) { - LOG(WARNING) << "failed to do rollup. res=" << res + LOG(WARNING) << "failed to do alter task. res=" << res << " base_tablet_id=" << _alter_tablet_req.base_tablet_id << ", base_schema_hash=" << _alter_tablet_req.base_schema_hash << ", new_tablet_id=" << _alter_tablet_req.new_tablet_id diff --git a/be/src/olap/txn_manager.cpp b/be/src/olap/txn_manager.cpp index d7eb37adf46020..d949ac909e2dc5 100755 --- a/be/src/olap/txn_manager.cpp +++ b/be/src/olap/txn_manager.cpp @@ -201,7 +201,8 @@ OLAPStatus TxnManager::commit_txn( << " partition_id: " << key.first << ", transaction_id: " << key.second << ", tablet: " << tablet_info.to_string() - << ", rowsetid: " << rowset_ptr->rowset_id(); + << ", rowsetid: " << rowset_ptr->rowset_id() + << ", version: " << rowset_ptr->version().first; } return OLAP_SUCCESS; } diff --git a/be/src/runtime/tablet_writer_mgr.cpp b/be/src/runtime/tablet_writer_mgr.cpp index 8b9bf6487fdcb8..0d30dbbab64fe7 100644 --- a/be/src/runtime/tablet_writer_mgr.cpp +++ b/be/src/runtime/tablet_writer_mgr.cpp @@ -207,16 +207,16 @@ Status TabletsChannel::close(int sender_id, bool* finished, } Status TabletsChannel::_open_all_writers(const PTabletWriterOpenRequest& params) { - std::vector* columns = nullptr; + std::vector* index_slots = nullptr; int32_t schema_hash = 0; for (auto& index : _schema->indexes()) { if (index->index_id == _index_id) { - columns = &index->slots; + index_slots = &index->slots; schema_hash = index->schema_hash; break; } } - if (columns == nullptr) { + if (index_slots == nullptr) { std::stringstream ss; ss << "unknown index id, key=" << _key; return Status::InternalError(ss.str()); @@ -231,6 +231,7 @@ Status TabletsChannel::_open_all_writers(const PTabletWriterOpenRequest& params) request.load_id = params.id(); request.need_gen_rollup = params.need_gen_rollup(); request.tuple_desc = _tuple_desc; + request.slots = index_slots; DeltaWriter* writer = nullptr; auto st = DeltaWriter::open(&request, &writer); diff --git a/be/test/olap/delta_writer_test.cpp b/be/test/olap/delta_writer_test.cpp index 435619e1bcf5b7..ab4e0aba36e0b3 100644 --- a/be/test/olap/delta_writer_test.cpp +++ b/be/test/olap/delta_writer_test.cpp @@ -323,17 +323,18 @@ TEST_F(TestDeltaWriter, write) { DescriptorTbl* desc_tbl = nullptr; DescriptorTbl::create(&obj_pool, tdesc_tbl, &desc_tbl); TupleDescriptor* tuple_desc = desc_tbl->get_tuple_descriptor(0); + const std::vector& slots = tuple_desc->slots(); PUniqueId load_id; load_id.set_hi(0); load_id.set_lo(0); WriteRequest write_req = {10004, 270068376, WriteType::LOAD, - 20002, 30002, load_id, false, tuple_desc}; + 20002, 30002, load_id, false, tuple_desc, + &(tuple_desc->slots())}; DeltaWriter* delta_writer = nullptr; DeltaWriter::open(&write_req, &delta_writer); ASSERT_NE(delta_writer, nullptr); - const std::vector& slots = tuple_desc->slots(); Arena arena; // Tuple 1 { diff --git a/docs/documentation/cn/administrator-guide/operation/tablet-meta-tool.md b/docs/documentation/cn/administrator-guide/operation/tablet-meta-tool.md index 87a6230b5f7603..5846d3b40ba02b 100644 --- a/docs/documentation/cn/administrator-guide/operation/tablet-meta-tool.md +++ b/docs/documentation/cn/administrator-guide/operation/tablet-meta-tool.md @@ -72,7 +72,7 @@ api: 命令: ``` -./lib/meta_tool --operation=delete_header --root_path=/path/to/root_path --tablet_id=xxx --schema_hash=xxx` +./lib/meta_tool --operation=delete_header --root_path=/path/to/root_path --tablet_id=xxx --schema_hash=xxx ``` ### 展示 pb 格式的 TabletMeta diff --git a/docs/documentation/cn/administrator-guide/time-zone.md b/docs/documentation/cn/administrator-guide/time-zone.md index a4e42b0365bbea..2a6d727a96441c 100644 --- a/docs/documentation/cn/administrator-guide/time-zone.md +++ b/docs/documentation/cn/administrator-guide/time-zone.md @@ -37,7 +37,15 @@ Doris 内部存在多个时区相关参数 包括NOW()或CURTIME()等时间函数显示的值,也包括show load, show backends中的时间值。 -但不会影响create table 中时间类型分区列的less than值,也不会影响存储为date/datetime类型的值的显示。 +但不会影响 create table 中时间类型分区列的 less than 值,也不会影响存储为 date/datetime 类型的值的显示。 + +受时区影响的函数: + +* `FROM_UNIXTIME`:给定一个 UTC 时间戳,返回指定时区的日期时间:如 `FROM_UNIXTIME(0)`, 返回 CST 时区:`1970-01-01 08:00:00`。 +* `UNIX_TIMESTAMP`:给定一个指定时区日期时间,返回 UTC 时间戳:如 CST 时区 `UNIX_TIMESTAMP('1970-01-01 08:00:00')`,返回 `0`。 +* `CURTIME`:返回指定时区时间。 +* `NOW`:返指定地时区日期时间。 +* `CONVERT_TZ`:将一个日期时间从一个指定时区转换到另一个指定时区。 ## 使用限制 @@ -51,3 +59,7 @@ Doris 内部存在多个时区相关参数 * 为了兼容Doris,支持CST缩写时区,内部会将CST转移为"Asia/Shanghai"的中国标准时区 +## 时区格式列表 + +[List of tz database time zones](https://en.wikipedia.org/wiki/List_of_tz_database_time_zones) + diff --git a/docs/documentation/cn/sql-reference/sql-statements/Data Manipulation/BROKER LOAD.md b/docs/documentation/cn/sql-reference/sql-statements/Data Manipulation/BROKER LOAD.md index 977320a0d03c4a..aca925de42e4d1 100644 --- a/docs/documentation/cn/sql-reference/sql-statements/Data Manipulation/BROKER LOAD.md +++ b/docs/documentation/cn/sql-reference/sql-statements/Data Manipulation/BROKER LOAD.md @@ -133,7 +133,8 @@ max_filter_ratio:最大容忍可过滤(数据不规范等原因)的数据比例。默认零容忍。 exec_mem_limit: 设置导入使用的内存上限。默认为2G,单位字节。这里是指单个 BE 节点的内存上限。 一个导入可能分布于多个BE。我们假设 1GB 数据在单个节点处理需要最大5GB内存。那么假设1GB文件分布在2个节点处理,那么理论上,每个节点需要内存为2.5GB。则该参数可以设置为 2684354560,即2.5GB - strict mode: 是否对数据进行严格限制。默认为true。 + strict mode: 是否对数据进行严格限制。默认为true。 + timezone: 指定某些受时区影响的函数的时区,如 strftime/alignment_timestamp/from_unixtime 等等,具体请查阅 [时区] 文档。如果不指定,则使用 "Asia/Shanghai" 时区。 5. 导入数据格式样例 diff --git a/docs/documentation/cn/sql-reference/sql-statements/Data Manipulation/ROUTINE LOAD.md b/docs/documentation/cn/sql-reference/sql-statements/Data Manipulation/ROUTINE LOAD.md index f618e824ad9b9d..20315ad1080983 100644 --- a/docs/documentation/cn/sql-reference/sql-statements/Data Manipulation/ROUTINE LOAD.md +++ b/docs/documentation/cn/sql-reference/sql-statements/Data Manipulation/ROUTINE LOAD.md @@ -258,5 +258,6 @@ ); ## keyword + CREATE,ROUTINE,LOAD diff --git a/docs/documentation/en/administrator-guide/time-zone_EN.md b/docs/documentation/en/administrator-guide/time-zone_EN.md new file mode 100644 index 00000000000000..1ba66682d5105a --- /dev/null +++ b/docs/documentation/en/administrator-guide/time-zone_EN.md @@ -0,0 +1,72 @@ +# Time zone + +Doris supports multiple time zone settings + +## Noun Interpretation + +* FE: Frontend, the front-end node of Doris. Responsible for metadata management and request access. +* BE: Backend, Doris's back-end node. Responsible for query execution and data storage. + +## Basic concepts + +There are multiple time zone related parameters in Doris + +* `system_time_zone`: + +When the server starts, it will be set automatically according to the time zone set by the machine, which can not be modified after setting. + +* `time_zone`: + +Server current time zone, set it at session level or global level. + +## Specific operations + +1. `SHOW VARIABLES LIKE '% time_zone%'` + + View the current time zone related configuration + +2. `SET time_zone = 'Asia/Shanghai'` + + This command can set the session level time zone, which will fail after disconnection. + +3. `SET global time_zone = 'Asia/Shanghai'` + + This command can set time zone parameters at the global level. The FE will persist the parameters and will not fail when the connection is disconnected. + +### Impact of time zone + +Time zone setting affects the display and storage of time zone sensitive values. + +It includes the values displayed by time functions such as `NOW()` or `CURTIME()`, as well as the time values in `SHOW LOAD` and `SHOW BACKENDS` statements. + +However, it does not affect the `LESS THAN VALUE` of the time-type partition column in the `CREATE TABLE` statement, nor does it affect the display of values stored as `DATE/DATETIME` type. + +Functions affected by time zone: + +* `FROM_UNIXTIME`: Given a UTC timestamp, return the date and time of the specified time zone, such as `FROM_UNIXTIME(0)`, return the CST time zone: `1970-01-08:00`. + +* `UNIX_TIMESTAMP`: Given a specified time zone date and time, return UTC timestamp, such as CST time zone `UNIX_TIMESTAMP('1970-01 08:00:00')`, return `0`. + +* `CURTIME`: Returns the datetime of specified time zone. + +* `NOW`: Returns the specified date and time of specified time zone. + +* `CONVERT_TZ`: Converts a date and time from one specified time zone to another. + +## Restrictions + +Time zone values can be given in several formats, case-insensitive: + +* A string representing UTC offset, such as '+10:00' or '-6:00'. + +* Standard time zone formats, such as "Asia/Shanghai", "America/Los_Angeles" + +* Abbreviated time zone formats such as MET and CTT are not supported. Because the abbreviated time zone is ambiguous in different scenarios, it is not recommended to use it. + +* In order to be compatible with Doris and support CST abbreviated time zone, CST will be internally transferred to "Asia/Shanghai", which is Chinese standard time zone. + +## Time zone format list + +[List of TZ database time zones] (https://en.wikipedia.org/wiki/List_of_tz_database_time_zones) + +[Edit on GitHub](https://github.com/apache/incubator-doris/blob/master/docs/documentation/en/administrator-guide/time-zone_EN.md) \ No newline at end of file diff --git a/fe/src/main/java/org/apache/doris/alter/Alter.java b/fe/src/main/java/org/apache/doris/alter/Alter.java index 2a3e7278e7f864..9c2ad2e0e7d97c 100644 --- a/fe/src/main/java/org/apache/doris/alter/Alter.java +++ b/fe/src/main/java/org/apache/doris/alter/Alter.java @@ -45,6 +45,7 @@ import org.apache.doris.common.DdlException; import org.apache.doris.common.ErrorCode; import org.apache.doris.common.ErrorReport; +import org.apache.doris.common.UserException; import com.google.common.base.Preconditions; @@ -73,7 +74,7 @@ public void start() { clusterHandler.start(); } - public void processAlterTable(AlterTableStmt stmt) throws DdlException { + public void processAlterTable(AlterTableStmt stmt) throws UserException { TableName dbTableName = stmt.getTbl(); String dbName = dbTableName.getDb(); final String clusterName = stmt.getClusterName(); @@ -88,8 +89,9 @@ public void processAlterTable(AlterTableStmt stmt) throws DdlException { // schema change ops can appear several in one alter stmt without other alter ops entry boolean hasSchemaChange = false; - // rollup ops, if has, should appear one and only one entry - boolean hasRollup = false; + // rollup ops, if has, should appear one and only one add or drop rollup entry + boolean hasAddRollup = false; + boolean hasDropRollup = false; // partition ops, if has, should appear one and only one entry boolean hasPartition = false; // rename ops, if has, should appear one and only one entry @@ -99,18 +101,19 @@ public void processAlterTable(AlterTableStmt stmt) throws DdlException { // check conflict alter ops first List alterClauses = stmt.getOps(); - // check conflict alter ops first - // if all alter clauses are DropPartitionClause, no need to call checkQuota. - boolean allDropPartitionClause = true; - + // check conflict alter ops first + + // if all alter clauses are DropPartitionClause or DropRollupClause, no need to check quota. + boolean allIsDropOps = true; for (AlterClause alterClause : alterClauses) { - if (!(alterClause instanceof DropPartitionClause)) { - allDropPartitionClause = false; + if (!(alterClause instanceof DropPartitionClause) + && !(alterClause instanceof DropRollupClause)) { + allIsDropOps = false; break; } } - if (!allDropPartitionClause) { + if (!allIsDropOps) { // check db quota db.checkQuota(); } @@ -122,30 +125,30 @@ public void processAlterTable(AlterTableStmt stmt) throws DdlException { || alterClause instanceof ModifyColumnClause || alterClause instanceof ReorderColumnsClause || alterClause instanceof ModifyTablePropertiesClause) - && !hasRollup && !hasPartition && !hasRename) { + && !hasAddRollup && !hasDropRollup && !hasPartition && !hasRename) { hasSchemaChange = true; - } else if (alterClause instanceof AddRollupClause && !hasSchemaChange && !hasRollup && !hasPartition - && !hasRename && !hasModifyProp) { - hasRollup = true; - } else if (alterClause instanceof DropRollupClause && !hasSchemaChange && !hasRollup && !hasPartition - && !hasRename && !hasModifyProp) { - hasRollup = true; - } else if (alterClause instanceof AddPartitionClause && !hasSchemaChange && !hasRollup && !hasPartition - && !hasRename && !hasModifyProp) { - hasPartition = true; - } else if (alterClause instanceof DropPartitionClause && !hasSchemaChange && !hasRollup && !hasPartition - && !hasRename && !hasModifyProp) { + } else if (alterClause instanceof AddRollupClause && !hasSchemaChange && !hasAddRollup && !hasDropRollup + && !hasPartition && !hasRename && !hasModifyProp) { + hasAddRollup = true; + } else if (alterClause instanceof DropRollupClause && !hasSchemaChange && !hasAddRollup && !hasDropRollup + && !hasPartition && !hasRename && !hasModifyProp) { + hasDropRollup = true; + } else if (alterClause instanceof AddPartitionClause && !hasSchemaChange && !hasAddRollup && !hasDropRollup + && !hasPartition && !hasRename && !hasModifyProp) { hasPartition = true; - } else if (alterClause instanceof ModifyPartitionClause && !hasSchemaChange && !hasRollup + } else if (alterClause instanceof DropPartitionClause && !hasSchemaChange && !hasAddRollup && !hasDropRollup && !hasPartition && !hasRename && !hasModifyProp) { hasPartition = true; + } else if (alterClause instanceof ModifyPartitionClause && !hasSchemaChange && !hasAddRollup + && !hasDropRollup && !hasPartition && !hasRename && !hasModifyProp) { + hasPartition = true; } else if ((alterClause instanceof TableRenameClause || alterClause instanceof RollupRenameClause || alterClause instanceof PartitionRenameClause || alterClause instanceof ColumnRenameClause) - && !hasSchemaChange && !hasRollup && !hasPartition && !hasRename && !hasModifyProp) { + && !hasSchemaChange && !hasAddRollup && !hasDropRollup && !hasPartition && !hasRename + && !hasModifyProp) { hasRename = true; - } else if (alterClause instanceof ModifyTablePropertiesClause && !hasSchemaChange && !hasRollup - && !hasPartition - && !hasRename && !hasModifyProp) { + } else if (alterClause instanceof ModifyTablePropertiesClause && !hasSchemaChange && !hasAddRollup + && !hasDropRollup && !hasPartition && !hasRename && !hasModifyProp) { hasModifyProp = true; } else { throw new DdlException("Conflicting alter clauses. see help for more information"); @@ -171,16 +174,11 @@ public void processAlterTable(AlterTableStmt stmt) throws DdlException { throw new DdlException("table with empty parition cannot do schema change. [" + tableName + "]"); } - if (olapTable.getState() == OlapTableState.SCHEMA_CHANGE - || olapTable.getState() == OlapTableState.RESTORE) { - throw new DdlException("Table[" + table.getName() + "]'s state[" + olapTable.getState() - + "] does not allow doing ALTER ops"); - // here we pass NORMAL and ROLLUP - // NORMAL: ok to do any alter ops - // ROLLUP: we allow user DROP a rollup index when it's under ROLLUP + if (olapTable.getState() != OlapTableState.NORMAL) { + throw new DdlException("Table[" + table.getName() + "]'s state is not NORMAL. Do not allow doing ALTER ops"); } - if (hasSchemaChange || hasModifyProp || hasRollup) { + if (hasSchemaChange || hasModifyProp || hasAddRollup) { // check if all tablets are healthy, and no tablet is in tablet scheduler boolean isStable = olapTable.isStable(Catalog.getCurrentSystemInfo(), Catalog.getCurrentCatalog().getTabletScheduler(), @@ -195,7 +193,7 @@ public void processAlterTable(AlterTableStmt stmt) throws DdlException { if (hasSchemaChange || hasModifyProp) { schemaChangeHandler.process(alterClauses, clusterName, db, olapTable); - } else if (hasRollup) { + } else if (hasAddRollup || hasDropRollup) { rollupHandler.process(alterClauses, clusterName, db, olapTable); } else if (hasPartition) { Preconditions.checkState(alterClauses.size() == 1); diff --git a/fe/src/main/java/org/apache/doris/alter/AlterHandler.java b/fe/src/main/java/org/apache/doris/alter/AlterHandler.java index 06849f41e6ef4b..f1fff2d57bf1b8 100644 --- a/fe/src/main/java/org/apache/doris/alter/AlterHandler.java +++ b/fe/src/main/java/org/apache/doris/alter/AlterHandler.java @@ -22,15 +22,25 @@ import org.apache.doris.analysis.CancelStmt; import org.apache.doris.catalog.Catalog; import org.apache.doris.catalog.Database; +import org.apache.doris.catalog.MaterializedIndex; import org.apache.doris.catalog.OlapTable; +import org.apache.doris.catalog.Partition; +import org.apache.doris.catalog.Replica; +import org.apache.doris.catalog.Tablet; import org.apache.doris.common.Config; import org.apache.doris.common.DdlException; import org.apache.doris.common.MetaNotFoundException; +import org.apache.doris.common.UserException; import org.apache.doris.common.util.Daemon; import org.apache.doris.common.util.TimeUtils; +import org.apache.doris.persist.ReplicaPersistInfo; import org.apache.doris.task.AgentTask; +import org.apache.doris.task.AlterReplicaTask; import org.apache.doris.thrift.TTabletInfo; +import com.google.common.base.Preconditions; +import com.google.common.collect.Maps; + import org.apache.logging.log4j.LogManager; import org.apache.logging.log4j.Logger; @@ -39,16 +49,21 @@ import java.util.Map; import java.util.concurrent.ConcurrentHashMap; import java.util.concurrent.ConcurrentLinkedQueue; +import java.util.concurrent.ConcurrentMap; import java.util.concurrent.locks.ReentrantLock; public abstract class AlterHandler extends Daemon { private static final Logger LOG = LogManager.getLogger(AlterHandler.class); // tableId -> AlterJob + @Deprecated protected ConcurrentHashMap alterJobs = new ConcurrentHashMap(); - + @Deprecated protected ConcurrentLinkedQueue finishedOrCancelledAlterJobs = new ConcurrentLinkedQueue(); + // queue of alter job v2 + protected ConcurrentMap alterJobsV2 = Maps.newConcurrentMap(); + /* * lock to perform atomic operations. * eg. @@ -70,19 +85,43 @@ public AlterHandler(String name) { super(name, 10000); } + protected void addAlterJobV2(AlterJobV2 alterJob) { + this.alterJobsV2.put(alterJob.getJobId(), alterJob); + LOG.info("add {} job {}", alterJob.getType(), alterJob.getJobId()); + } + + public AlterJobV2 getUnfinishedAlterJobV2(long tblId) { + for (AlterJobV2 alterJob : alterJobsV2.values()) { + if (alterJob.getTableId() == tblId + && alterJob.getJobState() != AlterJobV2.JobState.FINISHED + && alterJob.getJobState() != AlterJobV2.JobState.CANCELLED) { + return alterJob; + } + } + return null; + } + + public Map getAlterJobsV2() { + return this.alterJobsV2; + } + + @Deprecated protected void addAlterJob(AlterJob alterJob) { this.alterJobs.put(alterJob.getTableId(), alterJob); LOG.info("add {} job[{}]", alterJob.getType(), alterJob.getTableId()); } + @Deprecated public AlterJob getAlterJob(long tableId) { return this.alterJobs.get(tableId); } + @Deprecated public boolean hasUnfinishedAlterJob(long tableId) { return this.alterJobs.containsKey(tableId); } + @Deprecated public int getAlterJobNum(JobState state, long dbId) { int jobNum = 0; if (state == JobState.PENDING || state == JobState.RUNNING || state == JobState.FINISHING) { @@ -121,24 +160,33 @@ public int getAlterJobNum(JobState state, long dbId) { return jobNum; } + public Long getAlterJobV2Num(org.apache.doris.alter.AlterJobV2.JobState state, long dbId) { + return alterJobsV2.values().stream().filter(e -> e.getJobState() == state && e.getDbId() == dbId).count(); + } + + @Deprecated public Map unprotectedGetAlterJobs() { return this.alterJobs; } + @Deprecated public ConcurrentLinkedQueue unprotectedGetFinishedOrCancelledAlterJobs() { return this.finishedOrCancelledAlterJobs; } + @Deprecated public void addFinishedOrCancelledAlterJob(AlterJob alterJob) { alterJob.clear(); LOG.info("add {} job[{}] to finished or cancel list", alterJob.getType(), alterJob.getTableId()); this.finishedOrCancelledAlterJobs.add(alterJob); } + @Deprecated protected AlterJob removeAlterJob(long tableId) { return this.alterJobs.remove(tableId); } + @Deprecated public void removeDbAlterJob(long dbId) { Iterator> iterator = alterJobs.entrySet().iterator(); while (iterator.hasNext()) { @@ -154,6 +202,7 @@ public void removeDbAlterJob(long dbId) { * handle task report * reportVersion is used in schema change job. */ + @Deprecated public void handleFinishedReplica(AgentTask task, TTabletInfo finishTabletInfo, long reportVersion) throws MetaNotFoundException { long tableId = task.getTableId(); @@ -282,13 +331,14 @@ public void start() { * entry function. handle alter ops */ public abstract void process(List alterClauses, String clusterName, Database db, OlapTable olapTable) - throws DdlException; + throws UserException; /* * cancel alter ops */ public abstract void cancel(CancelStmt stmt) throws DdlException; + @Deprecated public Integer getAlterJobNumByState(JobState state) { int jobNum = 0; for (AlterJob alterJob : alterJobs.values()) { @@ -298,4 +348,95 @@ public Integer getAlterJobNumByState(JobState state) { } return jobNum; } + + /* + * Handle the finish report of alter task. + * If task is success, which means the history data before specified version has been transformed successfully. + * So here we should modify the replica's version. + * We assume that the specified version is X. + * Case 1: + * After alter table process starts, there is no new load job being submitted. So the new replica + * should be with version (1-0). So we just modify the replica's version to partition's visible version, which is X. + * Case 2: + * After alter table process starts, there are some load job being processed. + * Case 2.1: + * Only one new load job, and it failed on this replica. so the replica's last failed version should be X + 1 + * and version is still 1. We should modify the replica's version to (last failed version - 1) + * Case 2.2 + * There are new load jobs after alter task, and at least one of them is succeed on this replica. + * So the replica's version should be larger than X. So we don't need to modify the replica version + * because its already looks like normal. + */ + public void handleFinishAlterTask(AlterReplicaTask task) throws MetaNotFoundException { + Database db = Catalog.getCurrentCatalog().getDb(task.getDbId()); + if (db == null) { + throw new MetaNotFoundException("database " + task.getDbId() + " does not exist"); + } + + db.writeLock(); + try { + OlapTable tbl = (OlapTable) db.getTable(task.getTableId()); + if (tbl == null) { + throw new MetaNotFoundException("tbl " + task.getTableId() + " does not exist"); + } + Partition partition = tbl.getPartition(task.getPartitionId()); + if (partition == null) { + throw new MetaNotFoundException("partition " + task.getPartitionId() + " does not exist"); + } + MaterializedIndex index = partition.getIndex(task.getIndexId()); + if (index == null) { + throw new MetaNotFoundException("index " + task.getIndexId() + " does not exist"); + } + Tablet tablet = index.getTablet(task.getTabletId()); + Preconditions.checkNotNull(tablet, task.getTabletId()); + Replica replica = tablet.getReplicaById(task.getNewReplicaId()); + if (replica == null) { + throw new MetaNotFoundException("replica " + task.getNewReplicaId() + " does not exist"); + } + + LOG.info("before handle alter task tablet {}, replica: {}, task version: {}-{}", + task.getSignature(), replica, task.getVersion(), task.getVersionHash()); + boolean versionChanged = false; + if (replica.getVersion() > task.getVersion()) { + // Case 2.2, do nothing + } else { + if (replica.getLastFailedVersion() > task.getVersion()) { + // Case 2.1 + replica.updateVersionInfo(task.getVersion(), task.getVersionHash(), replica.getDataSize(), replica.getRowCount()); + versionChanged = true; + } else { + // Case 1 + Preconditions.checkState(replica.getLastFailedVersion() == -1, replica.getLastFailedVersion()); + replica.updateVersionInfo(task.getVersion(), task.getVersionHash(), replica.getDataSize(), replica.getRowCount()); + versionChanged = true; + } + } + + if (versionChanged) { + ReplicaPersistInfo info = ReplicaPersistInfo.createForClone(task.getDbId(), task.getTableId(), + task.getPartitionId(), task.getIndexId(), task.getTabletId(), task.getBackendId(), + replica.getId(), replica.getVersion(), replica.getVersionHash(), -1, + replica.getDataSize(), replica.getRowCount(), + replica.getLastFailedVersion(), replica.getLastFailedVersionHash(), + replica.getLastSuccessVersion(), replica.getLastSuccessVersionHash()); + Catalog.getInstance().getEditLog().logUpdateReplica(info); + } + + LOG.info("after handle alter task tablet: {}, replica: {}", task.getSignature(), replica); + } finally { + db.writeUnlock(); + } + } + + // replay the alter job v2 + public void replayAlterJobV2(AlterJobV2 alterJob) { + AlterJobV2 existingJob = alterJobsV2.get(alterJob.getJobId()); + if (existingJob == null) { + // This is the first time to replay the alter job, so just using the replayed alterJob to call replay(); + alterJob.replay(alterJob); + alterJobsV2.put(alterJob.getJobId(), alterJob); + } else { + existingJob.replay(alterJob); + } + } } diff --git a/fe/src/main/java/org/apache/doris/alter/AlterJobV2.java b/fe/src/main/java/org/apache/doris/alter/AlterJobV2.java new file mode 100644 index 00000000000000..8233f38c0540c5 --- /dev/null +++ b/fe/src/main/java/org/apache/doris/alter/AlterJobV2.java @@ -0,0 +1,212 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +package org.apache.doris.alter; + +import org.apache.doris.common.io.Text; +import org.apache.doris.common.io.Writable; + +import com.google.common.base.Preconditions; + +import org.apache.logging.log4j.LogManager; +import org.apache.logging.log4j.Logger; + +import java.io.DataInput; +import java.io.DataOutput; +import java.io.IOException; +import java.util.List; + +/* + * Version 2 of AlterJob, for replacing the old version of AlterJob. + * This base class of RollupJob and SchemaChangeJob + */ +public abstract class AlterJobV2 implements Writable { + private static final Logger LOG = LogManager.getLogger(AlterJobV2.class); + + public enum JobState { + PENDING, // Job is created + WAITING_TXN, // New replicas are created and Shadow catalog object is visible for incoming txns, + // waiting for previous txns to be finished + RUNNING, // alter tasks are sent to BE, and waiting for them finished. + FINISHED, // job is done + CANCELLED; // job is cancelled(failed or be cancelled by user) + + public boolean isFinalState() { + return this == JobState.FINISHED || this == JobState.CANCELLED; + } + } + + public enum JobType { + ROLLUP, SCHEMA_CHANGE + } + + protected JobType type; + protected long jobId; + protected JobState jobState; + + protected long dbId; + protected long tableId; + protected String tableName; + + protected String errMsg = ""; + protected long createTimeMs = -1; + protected long finishedTimeMs = -1; + protected long timeoutMs = -1; + + public AlterJobV2(long jobId, JobType jobType, long dbId, long tableId, String tableName, long timeoutMs) { + this.jobId = jobId; + this.type = jobType; + this.dbId = dbId; + this.tableId = tableId; + this.tableName = tableName; + this.timeoutMs = timeoutMs; + + this.createTimeMs = System.currentTimeMillis(); + this.jobState = JobState.PENDING; + } + + protected AlterJobV2(JobType type) { + this.type = type; + } + + public long getJobId() { + return jobId; + } + + public JobState getJobState() { + return jobState; + } + + public JobType getType() { + return type; + } + + public long getDbId() { + return dbId; + } + + public long getTableId() { + return tableId; + } + + public String getTableName() { + return tableName; + } + + private boolean isTimeout() { + return System.currentTimeMillis() - createTimeMs > timeoutMs; + } + + public boolean isDone() { + return jobState.isFinalState(); + } + + /* + * The keyword 'synchronized' only protects 2 methods: + * run() and cancel() + * Only these 2 methods can be visited by different thread(internal working thread and user connection thread) + * So using 'synchronized' to make sure only one thread can run the job at one time. + * + * lock order: + * synchronized + * db lock + */ + public synchronized void run() { + if (isTimeout()) { + cancelImpl("Timeout"); + return; + } + + switch (jobState) { + case PENDING: + runPendingJob(); + break; + case WAITING_TXN: + runWaitingTxnJob(); + break; + case RUNNING: + runRunningJob(); + break; + default: + break; + } + } + + public final boolean cancel(String errMsg) { + synchronized (this) { + return cancelImpl(errMsg); + } + } + + protected abstract void runPendingJob(); + + protected abstract void runWaitingTxnJob(); + + protected abstract void runRunningJob(); + + protected abstract boolean cancelImpl(String errMsg); + + protected abstract void getInfo(List> infos); + + public abstract void replay(AlterJobV2 replayedJob); + + public static AlterJobV2 read(DataInput in) throws IOException { + JobType type = JobType.valueOf(Text.readString(in)); + switch (type) { + case ROLLUP: + return RollupJobV2.read(in); + case SCHEMA_CHANGE: + return SchemaChangeJobV2.read(in); + default: + Preconditions.checkState(false); + return null; + } + } + + @Override + public void write(DataOutput out) throws IOException { + Text.writeString(out, type.name()); + Text.writeString(out, jobState.name()); + + out.writeLong(jobId); + out.writeLong(dbId); + out.writeLong(tableId); + Text.writeString(out, tableName); + + Text.writeString(out, errMsg); + out.writeLong(createTimeMs); + out.writeLong(finishedTimeMs); + out.writeLong(timeoutMs); + } + + @Override + public void readFields(DataInput in) throws IOException { + // read common members as write in AlterJobV2.write(). + // except 'type' member, which is read in AlterJobV2.read() + jobState = JobState.valueOf(Text.readString(in)); + + jobId = in.readLong(); + dbId = in.readLong(); + tableId = in.readLong(); + tableName = Text.readString(in); + + errMsg = Text.readString(in); + createTimeMs = in.readLong(); + finishedTimeMs = in.readLong(); + timeoutMs = in.readLong(); + } +} diff --git a/fe/src/main/java/org/apache/doris/alter/RollupHandler.java b/fe/src/main/java/org/apache/doris/alter/RollupHandler.java index ca4900da06daec..bd796ac874740d 100644 --- a/fe/src/main/java/org/apache/doris/alter/RollupHandler.java +++ b/fe/src/main/java/org/apache/doris/alter/RollupHandler.java @@ -33,30 +33,22 @@ import org.apache.doris.catalog.OlapTable; import org.apache.doris.catalog.OlapTable.OlapTableState; import org.apache.doris.catalog.Partition; -import org.apache.doris.catalog.Partition.PartitionState; import org.apache.doris.catalog.Replica; import org.apache.doris.catalog.Replica.ReplicaState; import org.apache.doris.catalog.Table; import org.apache.doris.catalog.Tablet; import org.apache.doris.catalog.TabletInvertedIndex; import org.apache.doris.catalog.TabletMeta; -import org.apache.doris.common.AnalysisException; import org.apache.doris.common.DdlException; import org.apache.doris.common.ErrorCode; import org.apache.doris.common.ErrorReport; import org.apache.doris.common.util.ListComparator; -import org.apache.doris.common.util.PropertyAnalyzer; import org.apache.doris.common.util.Util; +import org.apache.doris.mysql.privilege.PrivPredicate; import org.apache.doris.persist.DropInfo; import org.apache.doris.persist.EditLog; import org.apache.doris.qe.ConnectContext; -import org.apache.doris.task.AgentBatchTask; -import org.apache.doris.task.AgentTaskExecutor; -import org.apache.doris.task.DropReplicaTask; -import org.apache.doris.thrift.TKeysType; -import org.apache.doris.thrift.TResourceInfo; import org.apache.doris.thrift.TStorageMedium; -import org.apache.doris.thrift.TStorageType; import com.google.common.base.Preconditions; import com.google.common.base.Strings; @@ -68,11 +60,15 @@ import java.util.ArrayList; import java.util.Collections; +import java.util.Iterator; import java.util.LinkedList; import java.util.List; import java.util.Map; import java.util.Set; +/* + * RollupHandler is responsible for ADD/DROP rollup. + */ public class RollupHandler extends AlterHandler { private static final Logger LOG = LogManager.getLogger(RollupHandler.class); @@ -80,17 +76,23 @@ public RollupHandler() { super("rollup"); } - private void processAddRollup(AddRollupClause alterClause, Database db, OlapTable olapTable, boolean isRestore) + /* + * Handle the Add Rollup request. + * 3 main steps: + * 1. Validate the request. + * 2. Create RollupJob with rollup index + * All replicas of the rollup index will be created in meta and added to TabletInvertedIndex + * 3. Set table's state to ROLLUP. + */ + private void processAddRollup(AddRollupClause alterClause, Database db, OlapTable olapTable) throws DdlException { - if (!isRestore) { - // table is under rollup or has a finishing alter job - if (olapTable.getState() == OlapTableState.ROLLUP || this.hasUnfinishedAlterJob(olapTable.getId())) { - throw new DdlException("Table[" + olapTable.getName() + "]'s is under ROLLUP"); - } - // up to here, table's state can only be NORMAL - Preconditions.checkState(olapTable.getState() == OlapTableState.NORMAL, olapTable.getState().name()); + // table is under rollup or has a finishing alter job + if (olapTable.getState() == OlapTableState.ROLLUP || this.hasUnfinishedAlterJob(olapTable.getId())) { + throw new DdlException("Table[" + olapTable.getName() + "]'s is under ROLLUP"); } + // up to here, table's state can only be NORMAL + Preconditions.checkState(olapTable.getState() == OlapTableState.NORMAL, olapTable.getState().name()); String rollupIndexName = alterClause.getRollupName(); String baseIndexName = alterClause.getBaseRollupName(); @@ -120,7 +122,7 @@ private void processAddRollup(AddRollupClause alterClause, Database db, OlapTabl Preconditions.checkState(baseIndex.getState() == IndexState.NORMAL, baseIndex.getState().name()); } - // 3 check if rollup columns are valid + // 3. check if rollup columns are valid // a. all columns should exist in base rollup schema // b. value after key // c. if rollup contains REPLACE column, all keys on base index should be included. @@ -184,7 +186,7 @@ private void processAddRollup(AddRollupClause alterClause, Database db, OlapTabl if (alterClause.getDupKeys() == null || alterClause.getDupKeys().isEmpty()) { // user does not specify duplicate key for rollup, // use base table's duplicate key. - // so we should check if rollup column contains all base table's duplicate key. + // so we should check if rollup columns contains all base table's duplicate key. List baseIdxCols = olapTable.getSchemaByIndexId(baseIndexId); Set baseIdxKeyColNames = Sets.newHashSet(); for (Column baseCol : baseIdxCols) { @@ -210,6 +212,7 @@ private void processAddRollup(AddRollupClause alterClause, Database db, OlapTabl } } + // check (a)(b) for (String columnName : rollupColumnNames) { Column oneColumn = olapTable.getColumn(columnName); if (oneColumn == null) { @@ -230,10 +233,10 @@ private void processAddRollup(AddRollupClause alterClause, Database db, OlapTabl throw new DdlException("No key column is found"); } } else { - // rollup have different dup keys with base table + // user specify the duplicate keys for rollup index List dupKeys = alterClause.getDupKeys(); if (dupKeys.size() > rollupColumnNames.size()) { - throw new DdlException("Duplicate key should be the prefix of rollup columns. Exceeded"); + throw new DdlException("Num of duplicate keys should less than or equal to num of rollup columns."); } for (int i = 0; i < rollupColumnNames.size(); i++) { @@ -242,7 +245,7 @@ private void processAddRollup(AddRollupClause alterClause, Database db, OlapTabl if (i < dupKeys.size()) { String dupKeyName = dupKeys.get(i); if (!rollupColName.equalsIgnoreCase(dupKeyName)) { - throw new DdlException("Duplicate key should be the prefix of rollup columns"); + throw new DdlException("Duplicate keys should be the prefix of rollup columns"); } isKey = true; } @@ -270,82 +273,46 @@ private void processAddRollup(AddRollupClause alterClause, Database db, OlapTabl } } - // 4. do create things - // 4.1 get storage type. default is COLUMN - - TKeysType rollupKeysType; - if (keysType == KeysType.DUP_KEYS) { - rollupKeysType = TKeysType.DUP_KEYS; - } else if (keysType == KeysType.UNIQUE_KEYS) { - rollupKeysType = TKeysType.UNIQUE_KEYS; - } else { - rollupKeysType = TKeysType.AGG_KEYS; - } + // assign rollup index's key type, same as base index's + KeysType rollupKeysType = keysType; - Map properties = alterClause.getProperties(); - TStorageType rollupStorageType = null; - try { - rollupStorageType = PropertyAnalyzer.analyzeStorageType(properties); - } catch (AnalysisException e) { - throw new DdlException(e.getMessage()); - } - // check storage type if has null column - boolean hasNullColumn = false; - for (Column column : rollupSchema) { - if (column.isAllowNull()) { - hasNullColumn = true; - break; - } - } - if (hasNullColumn && rollupStorageType != TStorageType.COLUMN) { - throw new DdlException("Only column rollup support null columns"); - } - - // 4.2 get rollup schema hash - int schemaVersion = 0; - try { - schemaVersion = PropertyAnalyzer.analyzeSchemaVersion(properties); - } catch (AnalysisException e) { - throw new DdlException(e.getMessage()); - } - int rollupSchemaHash = Util.schemaHash(schemaVersion, rollupSchema, olapTable.getCopiedBfColumns(), + // get rollup schema hash + int rollupSchemaHash = Util.schemaHash(0 /* init schema version */, rollupSchema, olapTable.getCopiedBfColumns(), olapTable.getBfFpp()); - // 4.3 get short key column count + // get short key column count + Map properties = alterClause.getProperties(); short rollupShortKeyColumnCount = Catalog.calcShortKeyColumnCount(rollupSchema, properties); + + // get timeout + long timeoutMs = alterClause.getTimeoutSecond() * 1000; - // 4.4 get user resource info - TResourceInfo resourceInfo = null; - if (ConnectContext.get() != null) { - resourceInfo = ConnectContext.get().toResourceCtx(); - } - - // 4.5 create rollup job + // 4. create rollup job long dbId = db.getId(); long tableId = olapTable.getId(); int baseSchemaHash = olapTable.getSchemaHashByIndexId(baseIndexId); - Catalog catalog = Catalog.getInstance(); + Catalog catalog = Catalog.getCurrentCatalog(); + long jobId = catalog.getNextId(); long rollupIndexId = catalog.getNextId(); - long transactionId = Catalog.getCurrentGlobalTransactionMgr().getTransactionIDGenerator().getNextTransactionId(); - RollupJob rollupJob = new RollupJob(dbId, tableId, baseIndexId, rollupIndexId, - baseIndexName, rollupIndexName, rollupSchema, - baseSchemaHash, rollupSchemaHash, rollupStorageType, - rollupShortKeyColumnCount, resourceInfo, rollupKeysType, transactionId); - + RollupJobV2 rollupJob = new RollupJobV2(jobId, dbId, tableId, olapTable.getName(), timeoutMs, + baseIndexId, rollupIndexId, baseIndexName, rollupIndexName, + rollupSchema, baseSchemaHash, rollupSchemaHash, + rollupKeysType, rollupShortKeyColumnCount); + + /* + * create all rollup indexes. and set state. + * After setting, Tables' state will be ROLLUP + */ for (Partition partition : olapTable.getPartitions()) { long partitionId = partition.getId(); TStorageMedium medium = olapTable.getPartitionInfo().getDataProperty(partitionId).getStorageMedium(); - MaterializedIndex rollupIndex = new MaterializedIndex(rollupIndexId, IndexState.ROLLUP); - if (isRestore) { - rollupIndex.setState(IndexState.NORMAL); - } + // index state is SHADOW + MaterializedIndex rollupIndex = new MaterializedIndex(rollupIndexId, IndexState.SHADOW); MaterializedIndex baseIndex = partition.getIndex(baseIndexId); - TabletMeta rollupTabletMeta = new TabletMeta(dbId, tableId, partitionId, rollupIndexId, - rollupSchemaHash, medium); - short replicationNum = olapTable.getPartitionInfo().getReplicationNum(partition.getId()); + TabletMeta rollupTabletMeta = new TabletMeta(dbId, tableId, partitionId, rollupIndexId, rollupSchemaHash, medium); for (Tablet baseTablet : baseIndex.getTablets()) { long baseTabletId = baseTablet.getId(); long rollupTabletId = catalog.getNextId(); @@ -353,10 +320,9 @@ private void processAddRollup(AddRollupClause alterClause, Database db, OlapTabl Tablet newTablet = new Tablet(rollupTabletId); rollupIndex.addTablet(newTablet, rollupTabletMeta); - rollupJob.setTabletIdMap(partitionId, rollupTabletId, baseTabletId); + rollupJob.addTabletIdMap(partitionId, rollupTabletId, baseTabletId); List baseReplicas = baseTablet.getReplicas(); - int replicaNum = 0; for (Replica baseReplica : baseReplicas) { long rollupReplicaId = catalog.getNextId(); long backendId = baseReplica.getBackendId(); @@ -367,68 +333,35 @@ private void processAddRollup(AddRollupClause alterClause, Database db, OlapTabl continue; } Preconditions.checkState(baseReplica.getState() == ReplicaState.NORMAL); - ++replicaNum; - // the new replica's init version is -1 until finished history rollup - Replica rollupReplica = new Replica(rollupReplicaId, backendId, rollupSchemaHash, - ReplicaState.ROLLUP); - // new replica's last failed version should be set to the partition's next version - 1, - // if all go well, the last failed version will be overwritten when rollup task finished and update - // replica version info. - // If not set, there is no other way to know that this replica has failed version. - rollupReplica.updateVersionInfo(rollupReplica.getVersion(), rollupReplica.getVersionHash(), - partition.getCommittedVersion(), partition.getCommittedVersionHash(), - rollupReplica.getLastSuccessVersion(), rollupReplica.getLastSuccessVersionHash()); - if (isRestore) { - rollupReplica.setState(ReplicaState.NORMAL); - } - // yiguolei: the rollup tablet's replica num maybe less than base tablet's replica num + // replica's init state is ALTER, so that tablet report process will ignore its report + Replica rollupReplica = new Replica(rollupReplicaId, backendId, ReplicaState.ALTER, + Partition.PARTITION_INIT_VERSION, Partition.PARTITION_INIT_VERSION_HASH, + rollupSchemaHash); newTablet.addReplica(rollupReplica); } // end for baseReplica - - if (replicaNum < replicationNum / 2 + 1) { - String errMsg = "Tablet[" + baseTabletId + "] does not have enough replica. [" - + replicaNum + "/" + replicationNum + "]"; - LOG.warn(errMsg); - throw new DdlException(errMsg); - } } // end for baseTablets - if (isRestore) { - partition.createRollupIndex(rollupIndex); - } else { - rollupJob.addRollupIndex(partitionId, rollupIndex); - } + rollupJob.addRollupIndex(partitionId, rollupIndex); - LOG.debug("create rollup index[{}] based on index[{}] in partition[{}], restore: {}", - rollupIndexId, baseIndexId, partitionId, isRestore); + LOG.debug("create rollup index {} based on index {} in partition {}", + rollupIndexId, baseIndexId, partitionId); } // end for partitions - if (isRestore) { - olapTable.setIndexSchemaInfo(rollupIndexId, rollupIndexName, rollupSchema, 0, - rollupSchemaHash, rollupShortKeyColumnCount); - olapTable.setStorageTypeToIndex(rollupIndexId, rollupStorageType); - } else { - // update partition and table state - for (Partition partition : olapTable.getPartitions()) { - partition.setState(PartitionState.ROLLUP); - } - olapTable.setState(OlapTableState.ROLLUP); + // update table state + olapTable.setState(OlapTableState.ROLLUP); - addAlterJob(rollupJob); + addAlterJobV2(rollupJob); - // log rollup operation - EditLog editLog = catalog.getEditLog(); - editLog.logStartRollup(rollupJob); - LOG.debug("sync start create rollup index[{}] in table[{}]", rollupIndexId, tableId); - } + // log rollup operation + catalog.getEditLog().logAlterJob(rollupJob); + LOG.info("finished to create rollup job: {}", rollupJob.getJobId()); } public void processDropRollup(DropRollupClause alterClause, Database db, OlapTable olapTable) throws DdlException { - // make sure we got db write lock here - // up to here, table's state can be NORMAL or ROLLUP - Preconditions.checkState(olapTable.getState() == OlapTableState.NORMAL - || olapTable.getState() == OlapTableState.ROLLUP, olapTable.getState().name()); + // make sure we got db write lock here. + // up to here, table's state can only be NORMAL. + Preconditions.checkState(olapTable.getState() == OlapTableState.NORMAL, olapTable.getState().name()); String rollupIndexName = alterClause.getRollupName(); if (rollupIndexName.equals(olapTable.getName())) { @@ -438,74 +371,37 @@ public void processDropRollup(DropRollupClause alterClause, Database db, OlapTab long dbId = db.getId(); long tableId = olapTable.getId(); if (!olapTable.hasMaterializedIndex(rollupIndexName)) { - // when rollup job is unfinished, rollup index is not added to the table - AlterJob alterJob = getAlterJob(tableId); - if (alterJob == null || !((RollupJob) alterJob).getRollupIndexName().equals(rollupIndexName)) { - throw new DdlException("Rollup index[" + rollupIndexName + "] does not exist in table[" - + olapTable.getName() + "]"); - } - - // cancel rollup job - cancelInternal(alterJob, olapTable, "rollup index is dropped"); - return; + throw new DdlException("Rollup index[" + rollupIndexName + "] does not exist in table[" + + olapTable.getName() + "]"); } - - // 1. check if any rollup job is based on this index - AlterJob alterJob = null; - if ((alterJob = checkIfAnyRollupBasedOn(tableId, rollupIndexName)) != null) { - throw new DdlException("Rollup index[" + ((RollupJob) alterJob).getRollupIndexName() - + "] is doing rollup based on this index[" + rollupIndexName + "] and not finished yet."); - } - - // if the index is a during rollup and in finishing state, then it could not be dropped - // because the finishing state could not be roll back, it is very difficult - alterJob = getAlterJob(tableId); - if (alterJob != null && ((RollupJob) alterJob).getRollupIndexName().equals(rollupIndexName) - && alterJob.getState() == JobState.FINISHING) { - throw new DdlException("Rollup index[" + rollupIndexName + "] in table[" - + olapTable.getName() + "] is in finishing state, waiting it to finish"); - } - // drop rollup for each partition long rollupIndexId = olapTable.getIndexIdByName(rollupIndexName); int rollupSchemaHash = olapTable.getSchemaHashByIndexId(rollupIndexId); Preconditions.checkState(rollupSchemaHash != -1); - Preconditions.checkState(olapTable.getState() == OlapTableState.NORMAL); + // drop rollup for each partition. + // also remove tablets from inverted index. TabletInvertedIndex invertedIndex = Catalog.getCurrentInvertedIndex(); - AgentBatchTask batchTask = new AgentBatchTask(); for (Partition partition : olapTable.getPartitions()) { MaterializedIndex rollupIndex = partition.getIndex(rollupIndexId); Preconditions.checkNotNull(rollupIndex); - // 2. delete rollup index + // delete rollup index partition.deleteRollupIndex(rollupIndexId); - // 3. send DropReplicaTask + // remove tablets from inverted index for (Tablet tablet : rollupIndex.getTablets()) { long tabletId = tablet.getId(); - List replicas = tablet.getReplicas(); - for (Replica replica : replicas) { - long backendId = replica.getBackendId(); - DropReplicaTask dropTask = new DropReplicaTask(backendId, tabletId, rollupSchemaHash); - batchTask.addTask(dropTask); - } // end for replicas - - // remove from inverted index invertedIndex.deleteTablet(tabletId); - } // end for tablets - } // end for partitions + } + } olapTable.deleteIndexInfo(rollupIndexName); - AgentTaskExecutor.submit(batchTask); - // 5. log drop rollup operation + // log drop rollup operation EditLog editLog = Catalog.getInstance().getEditLog(); DropInfo dropInfo = new DropInfo(dbId, tableId, rollupIndexId); editLog.logDropRollup(dropInfo); - LOG.debug("log drop rollup index[{}] finished in table[{}]", dropInfo.getIndexId(), - dropInfo.getTableId()); - LOG.info("finished drop rollup index[{}] in table[{}]", rollupIndexName, olapTable.getName()); } @@ -534,42 +430,43 @@ public void replayDropRollup(DropInfo dropInfo, Catalog catalog) { } finally { db.writeUnlock(); } + LOG.info("replay drop rollup {}", dropInfo.getIndexId()); } - public void removeReplicaRelatedTask(long tableId, long partitionId, long indexId, long tabletId, long backendId) { - // make sure to get db writeLock - AlterJob alterJob = checkIfAnyRollupBasedOn(tableId, indexId); - if (alterJob != null) { - alterJob.removeReplicaRelatedTask(partitionId, tabletId, -1L, backendId); - } - } - - // this is for handle delete replica op - private AlterJob checkIfAnyRollupBasedOn(long tableId, long baseIndexId) { - AlterJob alterJob = this.alterJobs.get(tableId); - if (alterJob != null && ((RollupJob) alterJob).getBaseIndexId() == baseIndexId) { - return alterJob; - } - return null; + @Override + protected void runOneCycle() { + super.runOneCycle(); + runOldAlterJob(); + runAlterJobV2(); } - // this is for drop rollup op - private AlterJob checkIfAnyRollupBasedOn(long tableId, String baseIndexName) { - AlterJob alterJob = this.alterJobs.get(tableId); - if (alterJob != null && ((RollupJob) alterJob).getBaseIndexName().equals(baseIndexName)) { - return alterJob; + private void runAlterJobV2() { + Iterator> iter = alterJobsV2.entrySet().iterator(); + while (iter.hasNext()) { + Map.Entry entry = iter.next(); + AlterJobV2 alterJob = entry.getValue(); + if (alterJob.isDone()) { + continue; + } + alterJob.run(); } - return null; } - @Override - protected void runOneCycle() { - super.runOneCycle(); + @Deprecated + private void runOldAlterJob() { List cancelledJobs = Lists.newArrayList(); List finishedJobs = Lists.newArrayList(); for (AlterJob alterJob : alterJobs.values()) { RollupJob rollupJob = (RollupJob) alterJob; + if (rollupJob.getState() != JobState.FINISHING + && rollupJob.getState() != JobState.FINISHED + && rollupJob.getState() != JobState.CANCELLED) { + // cancel the old alter table job + cancelledJobs.add(rollupJob); + continue; + } + if (rollupJob.getTransactionId() < 0) { // it means this is an old type job and current version is real time load version // then kill this job @@ -674,8 +571,36 @@ protected void runOneCycle() { @Override public List> getAlterJobInfosByDb(Database db) { List> rollupJobInfos = new LinkedList>(); - List jobs = Lists.newArrayList(); + getOldAlterJobInfos(db, rollupJobInfos); + getAlterJobV2Infos(db, rollupJobInfos); + + // sort by + // "JobId", "TableName", "CreateTime", "FinishedTime", "BaseIndexName", "RollupIndexName" + ListComparator> comparator = new ListComparator>(0, 1, 2, 3, 4, 5); + Collections.sort(rollupJobInfos, comparator); + + return rollupJobInfos; + } + + private void getAlterJobV2Infos(Database db, List> rollupJobInfos) { + ConnectContext ctx = ConnectContext.get(); + for (AlterJobV2 alterJob : alterJobsV2.values()) { + if (alterJob.getDbId() != db.getId()) { + continue; + } + if (ctx != null) { + if (!Catalog.getCurrentCatalog().getAuth().checkTblPriv(ctx, db.getFullName(), alterJob.getTableName(), PrivPredicate.ALTER)) { + continue; + } + } + alterJob.getInfo(rollupJobInfos); + } + } + + @Deprecated + private void getOldAlterJobInfos(Database db, List> rollupJobInfos) { + List jobs = Lists.newArrayList(); // lock to perform atomically lock(); try { @@ -707,26 +632,14 @@ public List> getAlterJobInfosByDb(Database db) { } finally { db.readUnlock(); } - - // sort by - // "JobId", "TableName", "CreateTime", "FinishedTime", "BaseIndexName", "RollupIndexName" - ListComparator> comparator = new ListComparator>(0, 1, 2, 3, 4, 5); - Collections.sort(rollupJobInfos, comparator); - - return rollupJobInfos; } @Override public void process(List alterClauses, String clusterName, Database db, OlapTable olapTable) throws DdlException { - process(alterClauses, db, olapTable, false); - } - - public void process(List alterClauses, Database db, OlapTable olapTable, boolean isRestore) - throws DdlException { for (AlterClause alterClause : alterClauses) { if (alterClause instanceof AddRollupClause) { - processAddRollup((AddRollupClause) alterClause, db, olapTable, isRestore); + processAddRollup((AddRollupClause) alterClause, db, olapTable); } else if (alterClause instanceof DropRollupClause) { processDropRollup((DropRollupClause) alterClause, db, olapTable); } else { @@ -750,6 +663,7 @@ public void cancel(CancelStmt stmt) throws DdlException { } AlterJob rollupJob = null; + AlterJobV2 rollupJobV2 = null; db.writeLock(); try { Table table = db.getTable(tableName); @@ -765,18 +679,33 @@ public void cancel(CancelStmt stmt) throws DdlException { + "Use 'ALTER TABLE DROP ROLLUP' if you want to."); } - rollupJob = getAlterJob(olapTable.getId()); - Preconditions.checkNotNull(rollupJob); - - if (rollupJob.getState() == JobState.FINISHED || rollupJob.getState() == JobState.CANCELLED) { - throw new DdlException("job is already " + rollupJob.getState().name() + ", can not cancel it"); + // find from new alter jobs first + rollupJobV2 = getUnfinishedAlterJobV2(olapTable.getId()); + if (rollupJobV2 == null) { + rollupJob = getAlterJob(olapTable.getId()); + Preconditions.checkNotNull(rollupJob, olapTable.getId()); + if (rollupJob.getState() == JobState.FINISHED + || rollupJob.getState() == JobState.FINISHING + || rollupJob.getState() == JobState.CANCELLED) { + throw new DdlException("job is already " + rollupJob.getState().name() + ", can not cancel it"); + } + rollupJob.cancel(olapTable, "user cancelled"); } - - rollupJob.cancel(olapTable, "user cancelled"); } finally { db.writeUnlock(); } - jobDone(rollupJob); + // alter job v2's cancel must be called outside the database lock + if (rollupJobV2 != null) { + if (!rollupJobV2.cancel("user cancelled")) { + throw new DdlException("Job can not be cancelled. State: " + rollupJobV2.getJobState()); + } + return; + } + + // handle old alter job + if (rollupJob != null && rollupJob.getState() == JobState.CANCELLED) { + jobDone(rollupJob); + } } } diff --git a/fe/src/main/java/org/apache/doris/alter/RollupJob.java b/fe/src/main/java/org/apache/doris/alter/RollupJob.java index b135eea2ebe226..9b296e8084e464 100644 --- a/fe/src/main/java/org/apache/doris/alter/RollupJob.java +++ b/fe/src/main/java/org/apache/doris/alter/RollupJob.java @@ -31,6 +31,7 @@ import org.apache.doris.catalog.Tablet; import org.apache.doris.catalog.TabletInvertedIndex; import org.apache.doris.catalog.TabletMeta; +import org.apache.doris.common.Config; import org.apache.doris.common.FeMetaVersion; import org.apache.doris.common.MetaNotFoundException; import org.apache.doris.common.io.Text; @@ -990,7 +991,6 @@ public void getJobInfo(List> jobInfos, OlapTable tbl) { // transaction id jobInfo.add(transactionId); - // job state jobInfo.add(state.name()); @@ -1006,6 +1006,7 @@ public void getJobInfo(List> jobInfos, OlapTable tbl) { } else { jobInfo.add("N/A"); } + jobInfo.add(Config.alter_table_timeout_second); jobInfos.add(jobInfo); } diff --git a/fe/src/main/java/org/apache/doris/alter/RollupJobV2.java b/fe/src/main/java/org/apache/doris/alter/RollupJobV2.java new file mode 100644 index 00000000000000..838cc4845fa220 --- /dev/null +++ b/fe/src/main/java/org/apache/doris/alter/RollupJobV2.java @@ -0,0 +1,759 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +package org.apache.doris.alter; + +import org.apache.doris.catalog.Catalog; +import org.apache.doris.catalog.Column; +import org.apache.doris.catalog.Database; +import org.apache.doris.catalog.KeysType; +import org.apache.doris.catalog.MaterializedIndex; +import org.apache.doris.catalog.MaterializedIndex.IndexState; +import org.apache.doris.catalog.OlapTable; +import org.apache.doris.catalog.OlapTable.OlapTableState; +import org.apache.doris.catalog.Partition; +import org.apache.doris.catalog.Replica; +import org.apache.doris.catalog.Replica.ReplicaState; +import org.apache.doris.catalog.Tablet; +import org.apache.doris.catalog.TabletInvertedIndex; +import org.apache.doris.catalog.TabletMeta; +import org.apache.doris.common.Config; +import org.apache.doris.common.FeConstants; +import org.apache.doris.common.MarkedCountDownLatch; +import org.apache.doris.common.io.Text; +import org.apache.doris.common.util.TimeUtils; +import org.apache.doris.task.AgentBatchTask; +import org.apache.doris.task.AgentTask; +import org.apache.doris.task.AgentTaskExecutor; +import org.apache.doris.task.AgentTaskQueue; +import org.apache.doris.task.AlterReplicaTask; +import org.apache.doris.task.CreateReplicaTask; +import org.apache.doris.thrift.TStorageMedium; +import org.apache.doris.thrift.TStorageType; +import org.apache.doris.thrift.TTaskType; + +import com.google.common.base.Joiner; +import com.google.common.base.Preconditions; +import com.google.common.collect.Lists; +import com.google.common.collect.Maps; + +import org.apache.logging.log4j.LogManager; +import org.apache.logging.log4j.Logger; + +import java.io.DataInput; +import java.io.DataOutput; +import java.io.IOException; +import java.util.List; +import java.util.Map; +import java.util.Map.Entry; +import java.util.concurrent.TimeUnit; + +/* + * Author: Chenmingyu + * Date: Jul 8, 2019 + */ + +/* + * Version 2 of RollupJob. + * This is for replacing the old RollupJob + * https://github.com/apache/incubator-doris/issues/1429 + */ +public class RollupJobV2 extends AlterJobV2 { + private static final Logger LOG = LogManager.getLogger(RollupJobV2.class); + + // partition id -> (rollup tablet id -> base tablet id) + private Map> partitionIdToBaseRollupTabletIdMap = Maps.newHashMap(); + private Map partitionIdToRollupIndex = Maps.newHashMap(); + + // rollup and base schema info + private long baseIndexId; + private long rollupIndexId; + private String baseIndexName; + private String rollupIndexName; + + private List rollupSchema = Lists.newArrayList(); + private int baseSchemaHash; + private int rollupSchemaHash; + + private KeysType rollupKeysType; + private short rollupShortKeyColumnCount; + + // The rollup job will wait all transactions before this txn id finished, then send the rollup tasks. + protected long watershedTxnId = -1; + + // save all create rollup tasks + private AgentBatchTask rollupBatchTask = new AgentBatchTask(); + + public RollupJobV2(long jobId, long dbId, long tableId, String tableName, long timeoutMs, + long baseIndexId, long rollupIndexId, String baseIndexName, String rollupIndexName, + List rollupSchema, int baseSchemaHash, int rollupSchemaHash, + KeysType rollupKeysType, short rollupShortKeyColumnCount) { + super(jobId, JobType.ROLLUP, dbId, tableId, tableName, timeoutMs); + + this.baseIndexId = baseIndexId; + this.rollupIndexId = rollupIndexId; + this.baseIndexName = baseIndexName; + this.rollupIndexName = rollupIndexName; + + this.rollupSchema = rollupSchema; + this.baseSchemaHash = baseSchemaHash; + this.rollupSchemaHash = rollupSchemaHash; + this.rollupKeysType = rollupKeysType; + this.rollupShortKeyColumnCount = rollupShortKeyColumnCount; + } + + private RollupJobV2() { + super(JobType.ROLLUP); + } + + public void addTabletIdMap(long partitionId, long rollupTabletId, long baseTabletId) { + Map tabletIdMap = partitionIdToBaseRollupTabletIdMap.get(partitionId); + if (tabletIdMap == null) { + tabletIdMap = Maps.newHashMap(); + partitionIdToBaseRollupTabletIdMap.put(partitionId, tabletIdMap); + } + tabletIdMap.put(rollupTabletId, baseTabletId); + } + + public void addRollupIndex(long partitionId, MaterializedIndex rollupIndex) { + this.partitionIdToRollupIndex.put(partitionId, rollupIndex); + } + + /* + * runPendingJob(): + * 1. Create all rollup replicas and wait them finished. + * 2. After creating done, add this shadow rollup index to catalog, user can not see this + * rollup, but internal load process will generate data for this rollup index. + * 3. Get a new transaction id, then set job's state to WAITING_TXN + */ + @Override + protected void runPendingJob() { + Preconditions.checkState(jobState == JobState.PENDING, jobState); + + LOG.info("begin to send create rollup replica tasks. job: {}", jobId); + Database db = Catalog.getCurrentCatalog().getDb(dbId); + if (db == null) { + cancelImpl("Database " + dbId + " does not exist"); + return; + } + + // 1. create rollup replicas + AgentBatchTask batchTask = new AgentBatchTask(); + // count total replica num + int totalReplicaNum = 0; + for (MaterializedIndex rollupIdx : partitionIdToRollupIndex.values()) { + for (Tablet tablet : rollupIdx.getTablets()) { + totalReplicaNum += tablet.getReplicas().size(); + } + } + MarkedCountDownLatch countDownLatch = new MarkedCountDownLatch(totalReplicaNum); + db.readLock(); + try { + OlapTable tbl = (OlapTable) db.getTable(tableId); + if (tbl == null) { + cancelImpl("Table " + tableId + " does not exist"); + return; + } + Preconditions.checkState(tbl.getState() == OlapTableState.ROLLUP); + + for (Map.Entry entry : this.partitionIdToRollupIndex.entrySet()) { + long partitionId = entry.getKey(); + Partition partition = tbl.getPartition(partitionId); + if (partition == null) { + continue; + } + TStorageMedium storageMedium = tbl.getPartitionInfo().getDataProperty(partitionId).getStorageMedium(); + MaterializedIndex rollupIndex = entry.getValue(); + + Map tabletIdMap = this.partitionIdToBaseRollupTabletIdMap.get(partitionId); + for (Tablet rollupTablet : rollupIndex.getTablets()) { + long rollupTabletId = rollupTablet.getId(); + List rollupReplicas = rollupTablet.getReplicas(); + for (Replica rollupReplica : rollupReplicas) { + long backendId = rollupReplica.getBackendId(); + Preconditions.checkNotNull(tabletIdMap.get(rollupTabletId)); // baseTabletId + countDownLatch.addMark(backendId, rollupTabletId); + // create replica with version 1. + // version will be updated by following load process, or when rollup task finished. + CreateReplicaTask createReplicaTask = new CreateReplicaTask( + backendId, dbId, tableId, partitionId, rollupIndexId, rollupTabletId, + rollupShortKeyColumnCount, rollupSchemaHash, + Partition.PARTITION_INIT_VERSION, Partition.PARTITION_INIT_VERSION_HASH, + rollupKeysType, TStorageType.COLUMN, storageMedium, + rollupSchema, tbl.getCopiedBfColumns(), tbl.getBfFpp(), countDownLatch); + createReplicaTask.setBaseTablet(tabletIdMap.get(rollupTabletId), baseSchemaHash); + + batchTask.addTask(createReplicaTask); + } // end for rollupReplicas + } // end for rollupTablets + } + } finally { + db.readUnlock(); + } + + if (!FeConstants.runningUnitTest) { + // send all tasks and wait them finished + AgentTaskQueue.addBatchTask(batchTask); + AgentTaskExecutor.submit(batchTask); + long timeout = Math.min(Config.tablet_create_timeout_second * 1000L * totalReplicaNum, + Config.max_create_table_timeout_second * 1000L); + boolean ok = false; + try { + ok = countDownLatch.await(timeout, TimeUnit.MILLISECONDS); + } catch (InterruptedException e) { + LOG.warn("InterruptedException: ", e); + ok = false; + } + + if (!ok || !countDownLatch.getStatus().ok()) { + // create rollup replicas failed. just cancel the job + // clear tasks and show the failed replicas to user + AgentTaskQueue.removeBatchTask(batchTask, TTaskType.CREATE); + String errMsg = null; + if (!countDownLatch.getStatus().ok()) { + errMsg = countDownLatch.getStatus().getErrorMsg(); + } else { + List> unfinishedMarks = countDownLatch.getLeftMarks(); + // only show at most 3 results + List> subList = unfinishedMarks.subList(0, Math.min(unfinishedMarks.size(), 3)); + errMsg = "Error replicas:" + Joiner.on(", ").join(subList); + } + LOG.warn("failed to create rollup replicas for job: {}, {}", jobId, errMsg); + cancelImpl("Create rollup replicas failed. Error: " + errMsg); + return; + } + } + + // create all rollup replicas success. + // add rollup index to catalog + db.writeLock(); + try { + OlapTable tbl = (OlapTable) db.getTable(tableId); + if (tbl == null) { + cancelImpl("Table " + tableId + " does not exist"); + return; + } + Preconditions.checkState(tbl.getState() == OlapTableState.ROLLUP); + addRollupIndexToCatalog(tbl); + } finally { + db.writeUnlock(); + } + + this.watershedTxnId = Catalog.getCurrentGlobalTransactionMgr().getTransactionIDGenerator().getNextTransactionId(); + this.jobState = JobState.WAITING_TXN; + + // write edit log + Catalog.getCurrentCatalog().getEditLog().logAlterJob(this); + LOG.info("transfer rollup job {} state to {}, watershed txn id: {}", jobId, this.jobState, watershedTxnId); + } + + private void addRollupIndexToCatalog(OlapTable tbl) { + for (Partition partition : tbl.getPartitions()) { + long partitionId = partition.getId(); + MaterializedIndex rollupIndex = this.partitionIdToRollupIndex.get(partitionId); + Preconditions.checkNotNull(rollupIndex); + Preconditions.checkState(rollupIndex.getState() == IndexState.SHADOW, rollupIndex.getState()); + partition.createRollupIndex(rollupIndex); + } + + tbl.setIndexSchemaInfo(rollupIndexId, rollupIndexName, rollupSchema, 0 /* init schema version */, + rollupSchemaHash, rollupShortKeyColumnCount); + tbl.setStorageTypeToIndex(rollupIndexId, TStorageType.COLUMN); + } + + /* + * runWaitingTxnJob(): + * 1. Wait the transactions before the watershedTxnId to be finished. + * 2. If all previous transactions finished, send create rollup tasks to BE. + * 3. Change job state to RUNNING. + */ + @Override + protected void runWaitingTxnJob() { + Preconditions.checkState(jobState == JobState.WAITING_TXN, jobState); + + if (!isPreviousLoadFinished()) { + LOG.info("wait transactions before {} to be finished, rollup job: {}", watershedTxnId, jobId); + return; + } + + LOG.info("previous transactions are all finished, begin to send rollup tasks. job: {}", jobId); + Database db = Catalog.getCurrentCatalog().getDb(dbId); + if (db == null) { + cancelImpl("Databasee " + dbId + " does not exist"); + return; + } + + db.readLock(); + try { + OlapTable tbl = (OlapTable) db.getTable(tableId); + if (tbl == null) { + cancelImpl("Table " + tableId + " does not exist"); + return; + } + Preconditions.checkState(tbl.getState() == OlapTableState.ROLLUP); + for (Map.Entry entry : this.partitionIdToRollupIndex.entrySet()) { + long partitionId = entry.getKey(); + Partition partition = tbl.getPartition(partitionId); + Preconditions.checkNotNull(partition, partitionId); + + // the rollup task will transform the data before visible version(included). + long visibleVersion = partition.getVisibleVersion(); + long visibleVersionHash = partition.getVisibleVersionHash(); + + MaterializedIndex rollupIndex = entry.getValue(); + Map tabletIdMap = this.partitionIdToBaseRollupTabletIdMap.get(partitionId); + for (Tablet rollupTablet : rollupIndex.getTablets()) { + long rollupTabletId = rollupTablet.getId(); + long baseTabletId = tabletIdMap.get(rollupTabletId); + + List rollupReplicas = rollupTablet.getReplicas(); + for (Replica rollupReplica : rollupReplicas) { + AlterReplicaTask rollupTask = new AlterReplicaTask( + rollupReplica.getBackendId(), dbId, tableId, partitionId, + rollupIndexId, baseIndexId, + rollupTabletId, baseTabletId, rollupReplica.getId(), + rollupSchemaHash, baseSchemaHash, + visibleVersion, visibleVersionHash, jobId, JobType.ROLLUP); + rollupBatchTask.addTask(rollupTask); + } + } + } + } finally { + db.readUnlock(); + } + + AgentTaskQueue.addBatchTask(rollupBatchTask); + AgentTaskExecutor.submit(rollupBatchTask); + this.jobState = JobState.RUNNING; + + // DO NOT write edit log here, tasks will be send again if FE restart or master changed. + LOG.info("transfer rollup job {} state to {}", jobId, this.jobState); + } + + /* + * runRunningJob() + * 1. Wait all create rollup tasks to be finished. + * 2. Check the integrity of the newly created rollup index. + * 3. Set rollup index's state to NORMAL to let it visible to query. + * 4. Set job'state as FINISHED. + */ + @Override + protected void runRunningJob() { + Preconditions.checkState(jobState == JobState.RUNNING, jobState); + // must check if db or table still exist first. + // or if table is dropped, the tasks will never be finished, + // and the job will be in RUNNING state forever. + Database db = Catalog.getCurrentCatalog().getDb(dbId); + if (db == null) { + cancelImpl("Databasee " + dbId + " does not exist"); + return; + } + + db.readLock(); + try { + OlapTable tbl = (OlapTable) db.getTable(tableId); + if (tbl == null) { + cancelImpl("Table " + tableId + " does not exist"); + return; + } + } finally { + db.readUnlock(); + } + + if (!rollupBatchTask.isFinished()) { + LOG.info("rollup tasks not finished. job: {}", jobId); + return; + } + + /* + * all tasks are finished. check the integrity. + * we just check whether all rollup replicas are healthy. + */ + db.writeLock(); + try { + OlapTable tbl = (OlapTable) db.getTable(tableId); + if (tbl == null) { + cancelImpl("Table " + tableId + " does not exist"); + return; + } + Preconditions.checkState(tbl.getState() == OlapTableState.ROLLUP); + for (Map.Entry entry : this.partitionIdToRollupIndex.entrySet()) { + long partitionId = entry.getKey(); + Partition partition = tbl.getPartition(partitionId); + if (partition == null) { + continue; + } + + long visiableVersion = partition.getVisibleVersion(); + long visiableVersionHash = partition.getVisibleVersionHash(); + short expectReplicationNum = tbl.getPartitionInfo().getReplicationNum(partition.getId()); + + MaterializedIndex rollupIndex = entry.getValue(); + for (Tablet rollupTablet : rollupIndex.getTablets()) { + List replicas = rollupTablet.getReplicas(); + int healthyReplicaNum = 0; + for (Replica replica : replicas) { + if (replica.getLastFailedVersion() < 0 + && replica.checkVersionCatchUp(visiableVersion, visiableVersionHash, false)) { + healthyReplicaNum++; + } + } + + if (healthyReplicaNum < expectReplicationNum / 2 + 1) { + LOG.warn("rollup tablet {} has few healthy replicas: {}, rollup job: {}", + rollupTablet.getId(), replicas, jobId); + cancelImpl("rollup tablet " + rollupTablet.getId() + " has few healthy replicas"); + return; + } + } // end for tablets + } // end for partitions + + onFinished(tbl); + } finally { + db.writeUnlock(); + } + + this.jobState = JobState.FINISHED; + this.finishedTimeMs = System.currentTimeMillis(); + + Catalog.getCurrentCatalog().getEditLog().logAlterJob(this); + LOG.info("rollup job finished: {}", jobId); + } + + private void onFinished(OlapTable tbl) { + for (Partition partition : tbl.getPartitions()) { + MaterializedIndex rollupIndex = partition.getIndex(rollupIndexId); + Preconditions.checkNotNull(rollupIndex, rollupIndexId); + for (Tablet tablet : rollupIndex.getTablets()) { + for (Replica replica : tablet.getReplicas()) { + replica.setState(ReplicaState.NORMAL); + } + } + partition.visualiseShadowIndex(rollupIndexId, false); + } + tbl.setState(OlapTableState.NORMAL); + } + + /* + * cancelImpl() can be called any time any place. + * We need to clean any possible residual of this job. + */ + @Override + protected boolean cancelImpl(String errMsg) { + if (jobState.isFinalState()) { + return false; + } + + cancelInternal(); + + jobState = JobState.CANCELLED; + this.errMsg = errMsg; + this.finishedTimeMs = System.currentTimeMillis(); + LOG.info("cancel {} job {}, err: {}", this.type, jobId, errMsg); + Catalog.getCurrentCatalog().getEditLog().logAlterJob(this); + return true; + } + + private void cancelInternal() { + // clear tasks if has + AgentTaskQueue.removeBatchTask(rollupBatchTask, TTaskType.ALTER); + // remove all rollup indexes, and set state to NORMAL + TabletInvertedIndex invertedIndex = Catalog.getCurrentInvertedIndex(); + Database db = Catalog.getCurrentCatalog().getDb(dbId); + if (db != null) { + db.writeLock(); + try { + OlapTable tbl = (OlapTable) db.getTable(tableId); + if (tbl != null) { + for (Long partitionId : partitionIdToRollupIndex.keySet()) { + MaterializedIndex rollupIndex = partitionIdToRollupIndex.get(partitionId); + for (Tablet rollupTablet : rollupIndex.getTablets()) { + invertedIndex.deleteTablet(rollupTablet.getId()); + } + Partition partition = tbl.getPartition(partitionId); + partition.deleteRollupIndex(rollupIndexId); + } + tbl.deleteIndexInfo(rollupIndexName); + tbl.setState(OlapTableState.NORMAL); + } + } finally { + db.writeUnlock(); + } + } + } + + // Check whether transactions of the given database which txnId is less than 'watershedTxnId' are finished. + protected boolean isPreviousLoadFinished() { + return Catalog.getCurrentGlobalTransactionMgr().isPreviousTransactionsFinished(watershedTxnId, dbId); + } + + public static RollupJobV2 read(DataInput in) throws IOException { + RollupJobV2 rollupJob = new RollupJobV2(); + rollupJob.readFields(in); + return rollupJob; + } + + @Override + public void write(DataOutput out) throws IOException { + super.write(out); + + out.writeInt(partitionIdToRollupIndex.size()); + for (long partitionId : partitionIdToRollupIndex.keySet()) { + out.writeLong(partitionId); + + out.writeInt(partitionIdToBaseRollupTabletIdMap.get(partitionId).size()); + for (Map.Entry entry : partitionIdToBaseRollupTabletIdMap.get(partitionId).entrySet()) { + out.writeLong(entry.getKey()); + out.writeLong(entry.getValue()); + } + + MaterializedIndex rollupIndex = partitionIdToRollupIndex.get(partitionId); + rollupIndex.write(out); + } + + out.writeLong(baseIndexId); + out.writeLong(rollupIndexId); + Text.writeString(out, baseIndexName); + Text.writeString(out, rollupIndexName); + + // rollup schema + out.writeInt(rollupSchema.size()); + for (Column column : rollupSchema) { + column.write(out); + } + out.writeInt(baseSchemaHash); + out.writeInt(rollupSchemaHash); + + Text.writeString(out, rollupKeysType.name()); + out.writeShort(rollupShortKeyColumnCount); + + out.writeLong(watershedTxnId); + } + + @Override + public void readFields(DataInput in) throws IOException { + super.readFields(in); + + int size = in.readInt(); + for (int i = 0; i < size; i++) { + long partitionId = in.readLong(); + int size2 = in.readInt(); + Map tabletIdMap = partitionIdToBaseRollupTabletIdMap.get(partitionId); + if (tabletIdMap == null) { + tabletIdMap = Maps.newHashMap(); + partitionIdToBaseRollupTabletIdMap.put(partitionId, tabletIdMap); + } + for (int j = 0; j < size2; j++) { + long rollupTabletId = in.readLong(); + long baseTabletId = in.readLong(); + tabletIdMap.put(rollupTabletId, baseTabletId); + } + + partitionIdToRollupIndex.put(partitionId, MaterializedIndex.read(in)); + } + + baseIndexId = in.readLong(); + rollupIndexId = in.readLong(); + baseIndexName = Text.readString(in); + rollupIndexName = Text.readString(in); + + size = in.readInt(); + for (int i = 0; i < size; i++) { + Column column = Column.read(in); + rollupSchema.add(column); + } + baseSchemaHash = in.readInt(); + rollupSchemaHash = in.readInt(); + + rollupKeysType = KeysType.valueOf(Text.readString(in)); + rollupShortKeyColumnCount = in.readShort(); + + watershedTxnId = in.readLong(); + } + + /* + * Replay job in PENDING state. + * Should replay all changes before this job's state transfer to PENDING. + * These changes should be same as changes in RollupHander.processAddRollup() + */ + private void replayPending(RollupJobV2 replayedJob) { + Database db = Catalog.getCurrentCatalog().getDb(dbId); + if (db == null) { + // database may be dropped before replaying this log. just return + return; + } + + db.writeLock(); + try { + OlapTable tbl = (OlapTable) db.getTable(tableId); + if (tbl == null) { + // table may be dropped before replaying this log. just return + return; + } + + // add all rollup replicas to tablet inverted index + TabletInvertedIndex invertedIndex = Catalog.getCurrentInvertedIndex(); + for (Long partitionId : partitionIdToRollupIndex.keySet()) { + MaterializedIndex rollupIndex = partitionIdToRollupIndex.get(partitionId); + TStorageMedium medium = tbl.getPartitionInfo().getDataProperty(partitionId).getStorageMedium(); + TabletMeta rollupTabletMeta = new TabletMeta(dbId, tableId, partitionId, rollupIndexId, + rollupSchemaHash, medium); + + for (Tablet rollupTablet : rollupIndex.getTablets()) { + invertedIndex.addTablet(rollupTablet.getId(), rollupTabletMeta); + for (Replica rollupReplica : rollupTablet.getReplicas()) { + invertedIndex.addReplica(rollupTablet.getId(), rollupReplica); + } + } + } + tbl.setState(OlapTableState.ROLLUP); + } finally { + db.writeUnlock(); + } + + this.jobState = JobState.WAITING_TXN; + this.watershedTxnId = replayedJob.watershedTxnId; + + LOG.info("replay pending rollup job: {}", jobId); + } + + /* + * Replay job in WAITING_TXN state. + * Should replay all changes in runPendingJob() + */ + private void replayWaitingTxn(RollupJobV2 replayedJob) { + Database db = Catalog.getCurrentCatalog().getDb(dbId); + if (db == null) { + // database may be dropped before replaying this log. just return + return; + } + + db.writeLock(); + try { + OlapTable tbl = (OlapTable) db.getTable(tableId); + if (tbl == null) { + // table may be dropped before replaying this log. just return + return; + } + addRollupIndexToCatalog(tbl); + } finally { + db.writeUnlock(); + } + + // should still be in WAITING_TXN state, so that the alter tasks will be resend again + this.jobState = JobState.WAITING_TXN; + this.watershedTxnId = replayedJob.watershedTxnId; + + LOG.info("replay waiting txn rollup job: {}", jobId); + } + + /* + * Replay job in FINISHED state. + * Should replay all changes in runRuningJob() + */ + private void replayFinished(RollupJobV2 replayedJob) { + Database db = Catalog.getCurrentCatalog().getDb(dbId); + if (db != null) { + db.writeLock(); + try { + OlapTable tbl = (OlapTable) db.getTable(tableId); + if (tbl != null) { + Preconditions.checkState(tbl.getState() == OlapTableState.ROLLUP); + onFinished(tbl); + } + } finally { + db.writeUnlock(); + } + } + + this.jobState = JobState.FINISHED; + this.finishedTimeMs = replayedJob.finishedTimeMs; + + LOG.info("replay finished rollup job: {}", jobId); + } + + /* + * Replay job in CANCELLED state. + */ + private void replayCancelled(RollupJobV2 replayedJob) { + cancelInternal(); + this.jobState = JobState.CANCELLED; + this.finishedTimeMs = replayedJob.finishedTimeMs; + this.errMsg = replayedJob.errMsg; + LOG.info("replay cancelled rollup job: {}", jobId); + } + + @Override + public void replay(AlterJobV2 replayedJob) { + RollupJobV2 replayedRollupJob = (RollupJobV2) replayedJob; + switch (replayedJob.jobState) { + case PENDING: + replayPending(replayedRollupJob); + break; + case WAITING_TXN: + replayWaitingTxn(replayedRollupJob); + break; + case FINISHED: + replayFinished(replayedRollupJob); + break; + case CANCELLED: + replayCancelled(replayedRollupJob); + break; + default: + break; + } + } + + @Override + protected void getInfo(List> infos) { + List info = Lists.newArrayList(); + info.add(jobId); + info.add(tableName); + info.add(TimeUtils.longToTimeString(createTimeMs)); + info.add(TimeUtils.longToTimeString(finishedTimeMs)); + info.add(baseIndexName); + info.add(rollupIndexName); + info.add(rollupIndexId); + info.add(watershedTxnId); + info.add(jobState.name()); + info.add(errMsg); + // progress + if (jobState == JobState.RUNNING && rollupBatchTask.getTaskNum() > 0) { + info.add(rollupBatchTask.getFinishedTaskNum() + "/" + rollupBatchTask.getTaskNum()); + } else { + info.add("N/A"); + } + info.add(timeoutMs / 1000); + infos.add(info); + } + + public List> getUnfinishedTasks(int limit) { + List> taskInfos = Lists.newArrayList(); + if (jobState == JobState.RUNNING) { + List tasks = rollupBatchTask.getUnfinishedTasks(limit); + for (AgentTask agentTask : tasks) { + AlterReplicaTask rollupTask = (AlterReplicaTask)agentTask; + List info = Lists.newArrayList(); + info.add(String.valueOf(rollupTask.getBackendId())); + info.add(String.valueOf(rollupTask.getBaseTabletId())); + info.add(String.valueOf(rollupTask.getSignature())); + taskInfos.add(info); + } + } + return taskInfos; + } +} diff --git a/fe/src/main/java/org/apache/doris/alter/SchemaChangeHandler.java b/fe/src/main/java/org/apache/doris/alter/SchemaChangeHandler.java index 074ae8cd76f31c..35b16182c5b434 100644 --- a/fe/src/main/java/org/apache/doris/alter/SchemaChangeHandler.java +++ b/fe/src/main/java/org/apache/doris/alter/SchemaChangeHandler.java @@ -41,27 +41,32 @@ import org.apache.doris.catalog.OlapTable; import org.apache.doris.catalog.OlapTable.OlapTableState; import org.apache.doris.catalog.Partition; -import org.apache.doris.catalog.Partition.PartitionState; import org.apache.doris.catalog.PartitionInfo; import org.apache.doris.catalog.PartitionType; import org.apache.doris.catalog.RangePartitionInfo; import org.apache.doris.catalog.Replica; import org.apache.doris.catalog.Replica.ReplicaState; +import org.apache.doris.catalog.Table; import org.apache.doris.catalog.Tablet; +import org.apache.doris.catalog.TabletMeta; import org.apache.doris.common.AnalysisException; import org.apache.doris.common.Config; import org.apache.doris.common.DdlException; +import org.apache.doris.common.ErrorCode; +import org.apache.doris.common.ErrorReport; import org.apache.doris.common.FeConstants; +import org.apache.doris.common.UserException; import org.apache.doris.common.util.ListComparator; import org.apache.doris.common.util.PropertyAnalyzer; import org.apache.doris.common.util.Util; +import org.apache.doris.mysql.privilege.PrivPredicate; import org.apache.doris.qe.ConnectContext; -import org.apache.doris.thrift.TResourceInfo; -import org.apache.doris.thrift.TStorageType; +import org.apache.doris.thrift.TStorageMedium; import com.google.common.base.Preconditions; import com.google.common.base.Strings; import com.google.common.collect.Lists; +import com.google.common.collect.Maps; import com.google.common.collect.Sets; import org.apache.logging.log4j.LogManager; @@ -81,6 +86,9 @@ public class SchemaChangeHandler extends AlterHandler { private static final Logger LOG = LogManager.getLogger(SchemaChangeHandler.class); + // all shadow indexes should have this prefix in name + public static final String SHADOW_NAME_PRFIX = "__doris_shadow_"; + public SchemaChangeHandler() { super("schema change"); } @@ -92,10 +100,6 @@ private void processAddColumn(AddColumnClause alterClause, OlapTable olapTable, String targetIndexName = alterClause.getRollupName(); checkIndexExists(olapTable, targetIndexName); - if (column.isKey()) { - checkKeyModificationIfInRandomDistributedTable(olapTable); - } - String baseIndexName = olapTable.getName(); checkAssignedTargetIndexName(baseIndexName, targetIndexName); @@ -118,9 +122,6 @@ private void processAddColumns(AddColumnsClause alterClause, OlapTable olapTable Set newColNameSet = Sets.newHashSet(); for (Column column : columns) { - if (column.isKey()) { - checkKeyModificationIfInRandomDistributedTable(olapTable); - } newColNameSet.add(column.getName()); } @@ -145,14 +146,15 @@ private void processDropColumn(DropColumnClause alterClause, OlapTable olapTable String targetIndexName = alterClause.getRollupName(); checkIndexExists(olapTable, targetIndexName); - Column dropColumn = olapTable.getColumn(dropColName); - if (dropColumn != null && dropColumn.isKey()) { - checkKeyModificationIfInRandomDistributedTable(olapTable); - } - String baseIndexName = olapTable.getName(); checkAssignedTargetIndexName(baseIndexName, targetIndexName); + /* + * UNIQUE: + * Can not drop any key column. + * AGGREGATION: + * Can not drp any key column is has value with REPLACE method + */ if (KeysType.UNIQUE_KEYS == olapTable.getKeysType()) { long baseIndexId = olapTable.getBaseIndexId(); List baseSchema = indexSchemaMap.get(baseIndexId); @@ -165,7 +167,7 @@ private void processDropColumn(DropColumnClause alterClause, OlapTable olapTable } if (isKey) { - throw new DdlException("key column of unique key table cannot be droped"); + throw new DdlException("Can not drop key column in Unique data model table"); } } else if (KeysType.AGG_KEYS == olapTable.getKeysType()) { @@ -183,10 +185,10 @@ private void processDropColumn(DropColumnClause alterClause, OlapTable olapTable } } if (isKey && hasReplaceColumn) { - throw new DdlException("key column of table with replace aggregation method cannot be droped"); + throw new DdlException("Can not drop key column when table has value column with REPLACE aggregation method"); } } else { - // drop column in rollup and basetable + // drop column in rollup and base index long targetIndexId = olapTable.getIndexIdByName(targetIndexName); // find column List targetIndexSchema = indexSchemaMap.get(targetIndexId); @@ -200,14 +202,14 @@ private void processDropColumn(DropColumnClause alterClause, OlapTable olapTable } } if (isKey && hasReplaceColumn) { - throw new DdlException("key column of table with replace aggregation method cannot be droped"); + throw new DdlException("Can not drop key column when rollup has value column with REPLACE aggregation metho"); } } } long baseIndexId = olapTable.getBaseIndexId(); if (targetIndexName == null) { - // drop base index and all rollup indices's column + // if not specify rollup index, column should be dropped from both base and rollup indexes. List indexIds = new ArrayList(); indexIds.add(baseIndexId); for (long indexId : olapTable.getIndexIdToSchema().keySet()) { @@ -230,7 +232,7 @@ private void processDropColumn(DropColumnClause alterClause, OlapTable olapTable } } if (!found) { - throw new DdlException("Column[" + dropColName + "] does not exists"); + throw new DdlException("Column does not exists: " + dropColName); } // remove column in rollup index if exists (i = 1 to skip base index) @@ -246,7 +248,7 @@ private void processDropColumn(DropColumnClause alterClause, OlapTable olapTable } } // end for index names } else { - // only drop column from specified rollup index + // if specify rollup index, only drop column from specified rollup index long targetIndexId = olapTable.getIndexIdByName(targetIndexName); // find column List targetIndexSchema = indexSchemaMap.get(targetIndexId); @@ -261,33 +263,34 @@ private void processDropColumn(DropColumnClause alterClause, OlapTable olapTable } } if (!found) { - throw new DdlException("Column[" + dropColName + "] does not exists"); + throw new DdlException("Column does not exists: " + dropColName); } } } + // User can modify column type and column position private void processModifyColumn(ModifyColumnClause alterClause, OlapTable olapTable, Map> indexSchemaMap) throws DdlException { Column modColumn = alterClause.getColumn(); if (KeysType.AGG_KEYS == olapTable.getKeysType()) { if (modColumn.isKey() && null != modColumn.getAggregationType()) { - throw new DdlException("key column of aggregate key table cannot use aggregation method"); + throw new DdlException("Can not assign aggregation method on key column: " + modColumn.getName()); } else if (null == modColumn.getAggregationType()) { - // in aggregate key table, no aggreation method indicate key column + // in aggregate key table, no aggregation method indicate key column modColumn.setIsKey(true); } } else if (KeysType.UNIQUE_KEYS == olapTable.getKeysType()) { if (null != modColumn.getAggregationType()) { - throw new DdlException("column of unique key table cannot use aggregation method"); + throw new DdlException("Can not assign aggregation method on column in Unique data model table: " + modColumn.getName()); } if (false == modColumn.isKey()) { modColumn.setAggregationType(AggregateType.REPLACE, true); } } else { if (null != modColumn.getAggregationType()) { - throw new DdlException("column of duplicate key table cannot use aggregation method"); + throw new DdlException("Can not assign aggregation method on column in Duplicate data model table: " + modColumn.getName()); } - if (false == modColumn.isKey()) { + if (!modColumn.isKey()) { modColumn.setAggregationType(AggregateType.NONE, true); } } @@ -295,10 +298,6 @@ private void processModifyColumn(ModifyColumnClause alterClause, OlapTable olapT String targetIndexName = alterClause.getRollupName(); checkIndexExists(olapTable, targetIndexName); - if (modColumn.isKey()) { - checkKeyModificationIfInRandomDistributedTable(olapTable); - } - String baseIndexName = olapTable.getName(); checkAssignedTargetIndexName(baseIndexName, targetIndexName); @@ -318,6 +317,7 @@ private void processModifyColumn(ModifyColumnClause alterClause, OlapTable olapT String newColName = modColumn.getName(); boolean hasColPos = (columnPos != null && !columnPos.isFirst()); boolean found = false; + boolean typeChanged = false; int modColIndex = -1; int lastColIndex = -1; for (int i = 0; i < schemaForFinding.size(); i++) { @@ -325,6 +325,9 @@ private void processModifyColumn(ModifyColumnClause alterClause, OlapTable olapT if (col.getName().equalsIgnoreCase(newColName)) { modColIndex = i; found = true; + if (!col.equals(modColumn)) { + typeChanged = true; + } } if (hasColPos) { if (col.getName().equalsIgnoreCase(columnPos.getLastCol())) { @@ -372,8 +375,6 @@ private void processModifyColumn(ModifyColumnClause alterClause, OlapTable olapT } else { schemaForFinding.set(modColIndex, modColumn); } - int temp = modColIndex; - Column tempCol = schemaForFinding.get(temp); // check if column being mod if (!modColumn.equals(oriColumn)) { @@ -389,20 +390,19 @@ private void processModifyColumn(ModifyColumnClause alterClause, OlapTable olapT } List schema = entry.getValue(); for (Column column : schema) { - if (column.getName().equals(modColumn.getName())) { + if (column.getName().equalsIgnoreCase(modColumn.getName())) { otherIndexIds.add(entry.getKey()); break; } } } - if (KeysType.AGG_KEYS == olapTable.getKeysType() - || KeysType.UNIQUE_KEYS == olapTable.getKeysType()) { + if (KeysType.AGG_KEYS == olapTable.getKeysType() || KeysType.UNIQUE_KEYS == olapTable.getKeysType()) { for (Long otherIndexId : otherIndexIds) { List otherIndexSchema = indexSchemaMap.get(otherIndexId); modColIndex = -1; for (int i = 0; i < otherIndexSchema.size(); i++) { - if (otherIndexSchema.get(i).getName().equals(modColumn.getName())) { + if (otherIndexSchema.get(i).getName().equalsIgnoreCase(modColumn.getName())) { modColIndex = i; break; } @@ -412,11 +412,12 @@ private void processModifyColumn(ModifyColumnClause alterClause, OlapTable olapT otherIndexSchema.set(modColIndex, modColumn); } // end for other indices } else { + // DUPLICATE data model has a little for (Long otherIndexId : otherIndexIds) { List otherIndexSchema = indexSchemaMap.get(otherIndexId); modColIndex = -1; for (int i = 0; i < otherIndexSchema.size(); i++) { - if (otherIndexSchema.get(i).getName().equals(modColumn.getName())) { + if (otherIndexSchema.get(i).getName().equalsIgnoreCase(modColumn.getName())) { modColIndex = i; break; } @@ -434,9 +435,24 @@ private void processModifyColumn(ModifyColumnClause alterClause, OlapTable olapT } otherIndexSchema.set(modColIndex, otherCol); } - tempCol = schemaForFinding.get(temp); } } // end for handling other indices + + if (typeChanged) { + /* + * In new alter table process (AlterJobV2), any modified columns are treated as new columns. + * But the modified columns' name does not changed. So in order to distinguish this, we will add + * a prefix in the name of these modified columns. + * This prefix only exist during the schema change process. Once the schema change is finished, + * it will be removed. + * + * After adding this prefix, modify a column is just same as 'add' a column. + * + * And if the column type is not changed, the same column name is still to the same column type, + * so no need to add prefix. + */ + modColumn.setName(SHADOW_NAME_PRFIX + modColumn.getName()); + } } private void processReorderColumn(ReorderColumnsClause alterClause, OlapTable olapTable, @@ -445,13 +461,6 @@ private void processReorderColumn(ReorderColumnsClause alterClause, OlapTable ol String targetIndexName = alterClause.getRollupName(); checkIndexExists(olapTable, targetIndexName); - for (String colName : orderedColNames) { - Column reorderdCol = olapTable.getColumn(colName); - if (reorderdCol != null && reorderdCol.isKey()) { - checkKeyModificationIfInRandomDistributedTable(olapTable); - } - } - String baseIndexName = olapTable.getName(); checkAssignedTargetIndexName(baseIndexName, targetIndexName); @@ -491,27 +500,34 @@ private void processReorderColumn(ReorderColumnsClause alterClause, OlapTable ol indexSchemaMap.put(targetIndexId, newSchema); } + /* + * Add 'newColumn' to specified index. + * Modified schema will be saved in 'indexSchemaMap' + */ private void addColumnInternal(OlapTable olapTable, Column newColumn, ColumnPosition columnPos, long targetIndexId, long baseIndexId, String baseIndexName, Map> indexSchemaMap, Set newColNameSet) throws DdlException { + String newColName = newColumn.getName(); + // check the validation of aggregation method on column. + // also fill the default aggregation method if not specified. if (KeysType.AGG_KEYS == olapTable.getKeysType()) { if (newColumn.isKey() && newColumn.getAggregationType() != null) { - throw new DdlException("key column of aggregate table cannot use aggregation method"); + throw new DdlException("Can not assign aggregation method on key column: " + newColName); } else if (null == newColumn.getAggregationType()) { newColumn.setIsKey(true); } } else if (KeysType.UNIQUE_KEYS == olapTable.getKeysType()) { if (newColumn.getAggregationType() != null) { - throw new DdlException("column of unique table cannot use aggregation method"); + throw new DdlException("Can not assign aggregation method on column in Unique data model table: " + newColName); } if (!newColumn.isKey()) { newColumn.setAggregationType(AggregateType.REPLACE, true); } } else { if (newColumn.getAggregationType() != null) { - throw new DdlException("column of duplicate table cannot use aggregation method"); + throw new DdlException("Can not assign aggregation method on column in Duplicate data model table: " + newColName); } if (!newColumn.isKey()) { newColumn.setAggregationType(AggregateType.NONE, true); @@ -520,15 +536,16 @@ private void addColumnInternal(OlapTable olapTable, Column newColumn, ColumnPosi // hll must be used in agg_keys if (newColumn.getType().isHllType() && KeysType.AGG_KEYS != olapTable.getKeysType()) { - throw new DdlException("HLL must be used in AGG_KEYS"); + throw new DdlException("HLL type column can only be in Aggregation data model table: " + newColName); } - if (newColumn.getAggregationType() == BITMAP_UNION && KeysType.AGG_KEYS != olapTable.getKeysType()) { + if (newColumn.getAggregationType() == BITMAP_UNION && KeysType.AGG_KEYS != olapTable.getKeysType()) { throw new DdlException("BITMAP_UNION must be used in AGG_KEYS"); } + // check if the new column already exist in base schema. + // do not support adding new column which already exist in base schema. List baseSchema = olapTable.getBaseSchema(); - String newColName = newColumn.getName(); boolean found = false; for (Column column : baseSchema) { if (column.getName().equalsIgnoreCase(newColName)) { @@ -537,17 +554,27 @@ private void addColumnInternal(OlapTable olapTable, Column newColumn, ColumnPosi } } if (found) { - throw new DdlException("Column[" + newColName + "] already exists in base index[" + baseIndexName + "]"); + throw new DdlException("Can not add column which already exists in base table: " + newColName); } + /* + * add new column to indexes. + * UNIQUE: + * 1. If new column is key, it should be added to all indexes. + * 2. Else, add the new column to base index and specified rollup index. + * DUPLICATE: + * 1. If not specify rollup index, just add it to base index. + * 2. Else, first add it to specify rollup index. Then if the new column is key, add it to base + * index, at the end of all other existing key columns. If new new column is value, add it to + * base index by user specified position. + * AGGREGATION: + * 1. Add it to base index, as well as specified rollup index. + */ if (KeysType.UNIQUE_KEYS == olapTable.getKeysType()) { - // check if has default value. this should be done in Analyze phase - // 1. add to base index first List modIndexSchema; if (newColumn.isKey()) { - // add key column to unique key table, should add to all rollups - // Column column = olapTable.getColumn(columnPos.getLastCol()); - // add to all table including base and rollup + // add key column to unique key table + // add to all indexes including base and rollup for (Map.Entry> entry : indexSchemaMap.entrySet()) { modIndexSchema = entry.getValue(); boolean isBaseIdex = entry.getKey() == baseIndexId; @@ -560,28 +587,26 @@ private void addColumnInternal(OlapTable olapTable, Column newColumn, ColumnPosi if (targetIndexId == -1L) { return; } - // 2. add to rollup modIndexSchema = indexSchemaMap.get(targetIndexId); checkAndAddColumn(modIndexSchema, newColumn, columnPos, newColNameSet, false); } } else if (KeysType.DUP_KEYS == olapTable.getKeysType()) { if (targetIndexId == -1L) { - // check if has default value. this should be done in Analyze phase - // 1. add to base index first + // add to base index List modIndexSchema = indexSchemaMap.get(baseIndexId); checkAndAddColumn(modIndexSchema, newColumn, columnPos, newColNameSet, true); // no specified target index. return return; } else { - // 2. add to rollup index + // add to rollup index List modIndexSchema = indexSchemaMap.get(targetIndexId); checkAndAddColumn(modIndexSchema, newColumn, columnPos, newColNameSet, false); if (newColumn.isKey()) { /* * if add column in rollup is key, - * then put the column in base table as end key + * then put the column in base table as the last key column */ modIndexSchema = indexSchemaMap.get(baseIndexId); checkAndAddColumn(modIndexSchema, newColumn, null, newColNameSet, true); @@ -607,6 +632,15 @@ private void addColumnInternal(OlapTable olapTable, Column newColumn, ColumnPosi } } + /* + * add new column to specified index schema('modIndexSchema'). + * if 'isBaseIndex' is true, which means 'modIndexSchema' is base index's schema. + * so we will not check repeat adding of column. + * For example, user want to add column k1 to both rollup1 and rollup2 in one alter stmt: + * ADD COLUMN k1 int to rollup1, + * ADD COLUMN k1 int to rollup2 + * So that k1 will be added to base index 'twice', and we just ignore this repeat adding. + */ private void checkAndAddColumn(List modIndexSchema, Column newColumn, ColumnPosition columnPos, Set newColNameSet, boolean isBaseIndex) throws DdlException { int posIndex = -1; @@ -617,13 +651,13 @@ private void checkAndAddColumn(List modIndexSchema, Column newColumn, Co if (col.getName().equalsIgnoreCase(newColName)) { if (!isBaseIndex || !newColNameSet.contains(newColName)) { // if this is not a base index, we should check if user repeatedly add columns - throw new DdlException("Repeatedly add column[" + newColName + "]"); + throw new DdlException("Repeatedly add column: " + newColName); } // this is a base index, and the column we check here is added by previous 'add column clause' // in same ALTER stmt. // so here we will check if the 2 columns is exactly same. if not, throw exception if (!col.equals(newColumn)) { - throw new DdlException("Repeatedly add same column[" + newColName + "] with different definition"); + throw new DdlException("Repeatedly add same column with different definition: " + newColName); } // column already exist, return @@ -669,16 +703,7 @@ private void checkAndAddColumn(List modIndexSchema, Column newColumn, Co checkRowLength(modIndexSchema); } - private void checkKeyModificationIfInRandomDistributedTable(OlapTable olapTable) throws DdlException { - for (Partition partition : olapTable.getPartitions()) { - DistributionInfo distributionInfo = partition.getDistributionInfo(); - if (distributionInfo.getType() == DistributionInfoType.RANDOM) { - throw new DdlException("Cannot add/del/reorder/modify key column " - + "in table which is distributed by random"); - } - } - } - + // row length can not large than limit private void checkRowLength(List modIndexSchema) throws DdlException { int rowLengthBytes = 0; for (Column column : modIndexSchema) { @@ -691,8 +716,24 @@ private void checkRowLength(List modIndexSchema) throws DdlException { } } + private void checkIndexExists(OlapTable olapTable, String targetIndexName) throws DdlException { + if (targetIndexName != null && !olapTable.hasMaterializedIndex(targetIndexName)) { + throw new DdlException("Index[" + targetIndexName + "] does not exist in table[" + olapTable.getName() + + "]"); + } + } + + private void checkAssignedTargetIndexName(String baseIndexName, String targetIndexName) throws DdlException { + // user cannot assign base index to do schema change + if (targetIndexName != null) { + if (targetIndexName.equals(baseIndexName)) { + throw new DdlException("Do not need to assign base index[" + baseIndexName + "] to do schema change"); + } + } + } + private void createJob(long dbId, OlapTable olapTable, Map> indexSchemaMap, - Map propertyMap) throws DdlException { + Map propertyMap) throws UserException { if (olapTable.getState() == OlapTableState.ROLLUP) { throw new DdlException("Table[" + olapTable.getName() + "]'s is doing ROLLUP job"); } @@ -793,26 +834,20 @@ private void createJob(long dbId, OlapTable olapTable, Map indexIdToShortKeyColumnCount = new HashMap(); + Map indexIdToShortKeyColumnCount = Maps.newHashMap(); + Map> changedIndexIdToSchema = Maps.newHashMap(); for (Long alterIndexId : indexSchemaMap.keySet()) { List originSchema = olapTable.getSchemaByIndexId(alterIndexId); List alterSchema = indexSchemaMap.get(alterIndexId); @@ -831,7 +866,7 @@ private void createJob(long dbId, OlapTable olapTable, Map 0) { - // just skip it (replica cloned from old schema will be deleted) - continue; - } - ++replicaNum; - } // end for replicas - - if (replicaNum < replicationNum / 2 + 1) { - String errMsg = "Tablet[" + tablet.getId() + "] does not have enough replicas. [" - + replicaNum + "/" + replicationNum + "]"; - LOG.warn(errMsg); - throw new DdlException(errMsg); - } - } // end for tablets - } // end for partitions - - // 6. calc short key + // 5. calc short key short newShortKeyColumnCount = Catalog.calcShortKeyColumnCount(alterSchema, indexIdToProperties.get(alterIndexId)); LOG.debug("alter index[{}] short key column count: {}", alterIndexId, newShortKeyColumnCount); indexIdToShortKeyColumnCount.put(alterIndexId, newShortKeyColumnCount); - // 7. check storage type if has null column - TStorageType storageType = olapTable.getStorageTypeByIndexId(alterIndexId); - boolean hasNullColumn = false; - for (Column column : alterSchema) { - if (column.isAllowNull()) { - hasNullColumn = true; - break; - } - } - if (hasNullColumn && storageType != TStorageType.COLUMN) { - throw new DdlException("Only column rollup support null columns"); - } - - // 8. store the changed columns for edit log - schemaChangeJob.putToChangedIndexSchemaMap(alterIndexId, alterSchema); + // 6. store the changed columns for edit log + changedIndexIdToSchema.put(alterIndexId, alterSchema); LOG.debug("schema change[{}-{}-{}] check pass.", dbId, tableId, alterIndexId); } // end for indices - if (schemaChangeJob.getChangedIndexToSchema().isEmpty()) { + if (changedIndexIdToSchema.isEmpty()) { throw new DdlException("Nothing is changed. please check your alter stmt."); } - // from now on, storage type can only be column - schemaChangeJob.setNewStorageType(TStorageType.COLUMN); - // the following operations are done outside the 'for indices' loop // to avoid partial check success - // 1. create schema change job - int newSchemaHash = -1; - for (Partition onePartition : olapTable.getPartitions()) { - for (Map.Entry> entry : schemaChangeJob.getChangedIndexToSchema().entrySet()) { - long indexId = entry.getKey(); - MaterializedIndex alterIndex = onePartition.getIndex(indexId); - Preconditions.checkState(alterIndex.getState() == IndexState.NORMAL, alterIndex.getState()); - - // set new schema - int currentSchemaVersion = olapTable.getSchemaVersionByIndexId(indexId); - int newSchemaVersion = currentSchemaVersion + 1; - List alterColumns = entry.getValue(); - // int newSchemaHash = Util.schemaHash(newSchemaVersion, alterColumns, bfColumns, bfFpp); - // new schema hash should only be generate one time, or the schema hash will differenent from each other in different partitions - if (newSchemaHash == -1) { - newSchemaHash = Util.generateSchemaHash(); - int currentSchemaHash = olapTable.getSchemaHashByIndexId(indexId); - // has to generate a new schema hash not equal to current schema hash - while (currentSchemaHash == newSchemaHash) { - newSchemaHash = Util.generateSchemaHash(); + /* + * Create schema change job + * 1. For each index which has been changed, create a SHADOW index, and save the mapping of origin index to SHADOW index. + * 2. Create all tablets and replicas of all SHADOW index, add them to tablet inverted index. + * 3. Change table's state as SCHEMA_CHANGE + */ + for (Map.Entry> entry : changedIndexIdToSchema.entrySet()) { + long originIndexId = entry.getKey(); + // 1. get new schema version/schema version hash, short key column count + int currentSchemaVersion = olapTable.getSchemaVersionByIndexId(originIndexId); + int newSchemaVersion = currentSchemaVersion + 1; + // generate schema hash for new index has to generate a new schema hash not equal to current schema hash + int currentSchemaHash = olapTable.getSchemaHashByIndexId(originIndexId); + int newSchemaHash = Util.generateSchemaHash(); + while (currentSchemaHash == newSchemaHash) { + newSchemaHash = Util.generateSchemaHash(); + } + String newIndexName = SHADOW_NAME_PRFIX + olapTable.getIndexNameById(originIndexId); + short newShortKeyColumnCount = indexIdToShortKeyColumnCount.get(originIndexId); + long shadowIndexId = catalog.getNextId(); + + // create SHADOW index for each partition + for (Partition partition : olapTable.getPartitions()) { + long partitionId = partition.getId(); + TStorageMedium medium = olapTable.getPartitionInfo().getDataProperty(partitionId).getStorageMedium(); + // index state is SHADOW + MaterializedIndex shadowIndex = new MaterializedIndex(shadowIndexId, IndexState.SHADOW); + MaterializedIndex originIndex = partition.getIndex(originIndexId); + TabletMeta shadowTabletMeta = new TabletMeta(dbId, tableId, partitionId, shadowIndexId, newSchemaHash, medium); + for (Tablet originTablet : originIndex.getTablets()) { + long originTabletId = originTablet.getId(); + long shadowTabletId = catalog.getNextId(); + + Tablet shadowTablet = new Tablet(shadowTabletId); + shadowIndex.addTablet(shadowTablet, shadowTabletMeta); + + schemaChangeJob.addTabletIdMap(partitionId, shadowIndexId, shadowTabletId, originTabletId); + List originReplicas = originTablet.getReplicas(); + + for (Replica originReplica : originReplicas) { + long shadowReplicaId = catalog.getNextId(); + long backendId = originReplica.getBackendId(); + Preconditions.checkState(originReplica.getState() == ReplicaState.NORMAL); + Replica shadowReplica = new Replica(shadowReplicaId, backendId, ReplicaState.ALTER, + Partition.PARTITION_INIT_VERSION, Partition.PARTITION_INIT_VERSION_HASH, + newSchemaHash); + shadowTablet.addReplica(shadowReplica); } } - short newShortKeyColumnCount = indexIdToShortKeyColumnCount.get(indexId); - schemaChangeJob.setNewSchemaInfo(indexId, newSchemaVersion, newSchemaHash, newShortKeyColumnCount); - - // set replica state - for (Tablet tablet : alterIndex.getTablets()) { - for (Replica replica : tablet.getReplicas()) { - if (replica.getState() == ReplicaState.CLONE - || replica.getState() == ReplicaState.DECOMMISSION - || replica.getLastFailedVersion() > 0) { - // this should not happen, cause we only allow schema change when table is stable. - LOG.error("replica {} of tablet {} on backend {} is not NORMAL: {}", - replica.getId(), tablet.getId(), replica.getBackendId(), replica); - continue; - } - Preconditions.checkState(replica.getState() == ReplicaState.NORMAL, replica.getState()); - replica.setState(ReplicaState.SCHEMA_CHANGE); - } // end for replicas - } // end for tablets - - Catalog.getCurrentInvertedIndex().setNewSchemaHash(onePartition.getId(), indexId, newSchemaHash); - - alterIndex.setState(IndexState.SCHEMA_CHANGE); - } // end for indices - - onePartition.setState(PartitionState.SCHEMA_CHANGE); - } // end for partitions - + + schemaChangeJob.addPartitionShadowIndex(partitionId, shadowIndexId, shadowIndex); + } // end for partition + schemaChangeJob.addIndexSchema(shadowIndexId, originIndexId, newIndexName, newSchemaVersion, newSchemaHash, newShortKeyColumnCount, entry.getValue()); + } // end for index + + // set table state olapTable.setState(OlapTableState.SCHEMA_CHANGE); // 2. add schemaChangeJob - addAlterJob(schemaChangeJob); + addAlterJobV2(schemaChangeJob); - // 3. log schema change start operation - Catalog.getInstance().getEditLog().logStartSchemaChange(schemaChangeJob); - LOG.info("schema change job created. table[{}]", olapTable.getName()); + // 3. write edit log + Catalog.getInstance().getEditLog().logAlterJob(schemaChangeJob); + LOG.info("finished to create schema change job: {}", schemaChangeJob.getJobId()); } - private void checkIndexExists(OlapTable olapTable, String targetIndexName) throws DdlException { - if (targetIndexName != null && !olapTable.hasMaterializedIndex(targetIndexName)) { - throw new DdlException("Index[" + targetIndexName + "] does not exist in table[" + olapTable.getName() - + "]"); - } + @Override + protected void runOneCycle() { + super.runOneCycle(); + runOldAlterJob(); + runAlterJobV2(); } - private void checkAssignedTargetIndexName(String baseIndexName, String targetIndexName) throws DdlException { - // user cannot assign base index to do schema change - if (targetIndexName != null) { - if (targetIndexName.equals(baseIndexName)) { - throw new DdlException("Do not need to assign base index[" + baseIndexName + "] to do schema change"); + private void runAlterJobV2() { + Iterator> iter = alterJobsV2.entrySet().iterator(); + while (iter.hasNext()) { + Map.Entry entry = iter.next(); + AlterJobV2 alterJob = entry.getValue(); + if (alterJob.isDone()) { + continue; } + alterJob.run(); } } - public void removeReplicaRelatedTask(long tableId, long tabletId, long replicaId, long backendId) { - AlterJob job = getAlterJob(tableId); - if (job != null) { - job.removeReplicaRelatedTask(-1L, tabletId, replicaId, backendId); - } - } - - @Override - protected void runOneCycle() { - super.runOneCycle(); + @Deprecated + private void runOldAlterJob() { List cancelledJobs = Lists.newArrayList(); List finishedJobs = Lists.newArrayList(); for (AlterJob alterJob : alterJobs.values()) { SchemaChangeJob schemaChangeJob = (SchemaChangeJob) alterJob; + if (schemaChangeJob.getState() != JobState.FINISHING + && schemaChangeJob.getState() != JobState.FINISHED + && schemaChangeJob.getState() != JobState.CANCELLED) { + // cancel the old alter table job + cancelledJobs.add(schemaChangeJob); + continue; + } // it means this is an old type job and current version is real time load version // then kill this job if (alterJob.getTransactionId() < 0) { @@ -1224,6 +1218,32 @@ protected void runOneCycle() { @Override public List> getAlterJobInfosByDb(Database db) { List> schemaChangeJobInfos = new LinkedList>(); + getOldAlterJobInfos(db, schemaChangeJobInfos); + getAlterJobV2Infos(db, schemaChangeJobInfos); + + // sort by "JobId", "PartitionName", "CreateTime", "FinishTime", "IndexName", "IndexState" + ListComparator> comparator = new ListComparator>(0, 1, 2, 3, 4, 5); + Collections.sort(schemaChangeJobInfos, comparator); + return schemaChangeJobInfos; + } + + private void getAlterJobV2Infos(Database db, List> schemaChangeJobInfos) { + ConnectContext ctx = ConnectContext.get(); + for (AlterJobV2 alterJob : alterJobsV2.values()) { + if (alterJob.getDbId() != db.getId()) { + continue; + } + if (ctx != null) { + if (!Catalog.getCurrentCatalog().getAuth().checkTblPriv(ctx, db.getFullName(), alterJob.getTableName(), PrivPredicate.ALTER)) { + continue; + } + } + alterJob.getInfo(schemaChangeJobInfos); + } + } + + @Deprecated + private void getOldAlterJobInfos(Database db, List> schemaChangeJobInfos) { List selectedJobs = Lists.newArrayList(); lock(); @@ -1258,15 +1278,11 @@ public List> getAlterJobInfosByDb(Database db) { } finally { db.readUnlock(); } - - // sort by "JobId", "PartitionName", "CreateTime", "FinishTime", "IndexName", "IndexState" - ListComparator> comparator = new ListComparator>(0, 1, 2, 3, 4, 5); - Collections.sort(schemaChangeJobInfos, comparator); - return schemaChangeJobInfos; } @Override - public void process(List alterClauses, String clusterName, Database db, OlapTable olapTable) throws DdlException { + public void process(List alterClauses, String clusterName, Database db, OlapTable olapTable) + throws UserException { // index id -> index schema Map> indexSchemaMap = new HashMap>(); for (Map.Entry> entry : olapTable.getIndexIdToSchema().entrySet()) { @@ -1284,6 +1300,9 @@ public void process(List alterClauses, String clusterName, Database throw new DdlException("reduplicated PROPERTIES"); } + // modification of colocate property is handle alone. + // And because there should be only one colocate property modification clause in stmt, + // so just return after finished handling. if (properties.containsKey(PropertyAnalyzer.PROPERTIES_COLOCATE_WITH)) { String colocateGroup = properties.get(PropertyAnalyzer.PROPERTIES_COLOCATE_WITH); Catalog.getInstance().modifyTableColocate(db, olapTable, colocateGroup, false, null); @@ -1331,33 +1350,49 @@ public void cancel(CancelStmt stmt) throws DdlException { throw new DdlException("Database[" + dbName + "] does not exist"); } - AlterJob alterJob = null; + AlterJob schemaChangeJob = null; + AlterJobV2 schemaChangeJobV2 = null; db.writeLock(); try { - // 1. get table - OlapTable olapTable = (OlapTable) db.getTable(tableName); - if (olapTable == null) { - throw new DdlException("Table[" + tableName + "] does not exist"); + Table table = db.getTable(tableName); + if (table == null) { + ErrorReport.reportDdlException(ErrorCode.ERR_BAD_TABLE_ERROR, tableName); } - - // 2. find schema change job - alterJob = alterJobs.get(olapTable.getId()); - if (alterJob == null) { - throw new DdlException("Table[" + tableName + "] is not under SCHEMA CHANGE"); + if (!(table instanceof OlapTable)) { + ErrorReport.reportDdlException(ErrorCode.ERR_NOT_OLAP_TABLE, tableName); } - - if (alterJob.getState() == JobState.FINISHING || - alterJob.getState() == JobState.FINISHED || - alterJob.getState() == JobState.CANCELLED) { - throw new DdlException("job is already " + alterJob.getState().name() + ", can not cancel it"); + OlapTable olapTable = (OlapTable) table; + if (olapTable.getState() != OlapTableState.SCHEMA_CHANGE) { + throw new DdlException("Table[" + tableName + "] is not under SCHEMA_CHANGE."); } - // 3. cancel schema change job - alterJob.cancel(olapTable, "user cancelled"); + // find from new alter jobs first + schemaChangeJobV2 = getUnfinishedAlterJobV2(olapTable.getId()); + if (schemaChangeJobV2 == null) { + schemaChangeJob = getAlterJob(olapTable.getId()); + Preconditions.checkNotNull(schemaChangeJob, olapTable.getId()); + if (schemaChangeJob.getState() == JobState.FINISHING + || schemaChangeJob.getState() == JobState.FINISHED + || schemaChangeJob.getState() == JobState.CANCELLED) { + throw new DdlException("job is already " + schemaChangeJob.getState().name() + ", can not cancel it"); + } + schemaChangeJob.cancel(olapTable, "user cancelled"); + } } finally { db.writeUnlock(); } - jobDone(alterJob); + // alter job v2's cancel must be called outside the database lock + if (schemaChangeJobV2 != null) { + if (!schemaChangeJobV2.cancel("user cancelled")) { + throw new DdlException("Job can not be cancelled. State: " + schemaChangeJobV2.getJobState()); + } + return; + } + + // handle old alter job + if (schemaChangeJob != null && schemaChangeJob.getState() == JobState.CANCELLED) { + jobDone(schemaChangeJob); + } } } diff --git a/fe/src/main/java/org/apache/doris/alter/SchemaChangeJob.java b/fe/src/main/java/org/apache/doris/alter/SchemaChangeJob.java index b7b7152e89216f..726bd153cc999c 100644 --- a/fe/src/main/java/org/apache/doris/alter/SchemaChangeJob.java +++ b/fe/src/main/java/org/apache/doris/alter/SchemaChangeJob.java @@ -22,6 +22,7 @@ import org.apache.doris.catalog.Database; import org.apache.doris.catalog.KeysType; import org.apache.doris.catalog.MaterializedIndex; +import org.apache.doris.catalog.MaterializedIndex.IndexExtState; import org.apache.doris.catalog.MaterializedIndex.IndexState; import org.apache.doris.catalog.OlapTable; import org.apache.doris.catalog.OlapTable.OlapTableState; @@ -333,7 +334,7 @@ public int checkOrResendClearTasks() { OUTER_LOOP: for (Partition partition : olapTable.getPartitions()) { long partitionId = partition.getId(); - for (MaterializedIndex index : partition.getMaterializedIndices()) { + for (MaterializedIndex index : partition.getMaterializedIndices(IndexExtState.ALL)) { for (Tablet tablet : index.getTablets()) { List replicas = tablet.getReplicas(); for (Replica replica : replicas) { @@ -853,7 +854,7 @@ public int tryFinishJob() { // 3. update base schema if changed if (this.changedIndexIdToSchema.containsKey(olapTable.getBaseIndexId())) { - table.setNewBaseSchema(this.changedIndexIdToSchema.get(olapTable.getBaseIndexId())); + table.setNewFullSchema(this.changedIndexIdToSchema.get(olapTable.getBaseIndexId())); } // 4. update table bloom filter columns @@ -1018,7 +1019,7 @@ public void replayFinishing(Database db) { olapTable.setIndexStorageType(indexId, newStorageType); } if (indexId == olapTable.getBaseIndexId()) { - olapTable.setNewBaseSchema(entry.getValue()); + olapTable.setNewFullSchema(entry.getValue()); } } @@ -1108,12 +1109,13 @@ public void getJobInfo(List> jobInfos, OlapTable tbl) { jobInfo.add(TimeUtils.longToTimeString(finishedTime)); jobInfo.add("N/A"); // index name jobInfo.add("N/A"); // index id + jobInfo.add("N/A"); // origin id jobInfo.add("N/A"); // schema version - jobInfo.add("N/A"); // index state jobInfo.add(-1); // transaction id jobInfo.add(state.name()); // job state - jobInfo.add("N/A"); // progress jobInfo.add(cancelMsg); + jobInfo.add("N/A"); // progress + jobInfo.add(Config.alter_table_timeout_second); // timeout jobInfos.add(jobInfo); return; } @@ -1173,19 +1175,18 @@ public void getJobInfo(List> jobInfos, OlapTable tbl) { jobInfo.add(TimeUtils.longToTimeString(finishedTime)); jobInfo.add(tbl.getIndexNameById(indexId) == null ? "N/A" : tbl.getIndexNameById(indexId)); // index name jobInfo.add(indexId); + jobInfo.add(indexId); // origin index id // index schema version and schema hash - jobInfo.add(changedIndexIdToSchemaVersion.get(indexId) + "-" + changedIndexIdToSchemaHash.get(indexId)); - jobInfo.add(indexState.get(indexId)); // index state + jobInfo.add(changedIndexIdToSchemaVersion.get(indexId) + ":" + changedIndexIdToSchemaHash.get(indexId)); jobInfo.add(transactionId); jobInfo.add(state.name()); // job state - + jobInfo.add(cancelMsg); if (state == JobState.RUNNING) { jobInfo.add(indexProgress.get(indexId) == null ? "N/A" : indexProgress.get(indexId)); // progress } else { jobInfo.add("N/A"); } - - jobInfo.add(cancelMsg); + jobInfo.add(Config.alter_table_timeout_second); jobInfos.add(jobInfo); } // end for indexIds diff --git a/fe/src/main/java/org/apache/doris/alter/SchemaChangeJobV2.java b/fe/src/main/java/org/apache/doris/alter/SchemaChangeJobV2.java new file mode 100644 index 00000000000000..afabb99fe04e19 --- /dev/null +++ b/fe/src/main/java/org/apache/doris/alter/SchemaChangeJobV2.java @@ -0,0 +1,903 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +package org.apache.doris.alter; + +import org.apache.doris.catalog.Catalog; +import org.apache.doris.catalog.Column; +import org.apache.doris.catalog.Database; +import org.apache.doris.catalog.MaterializedIndex; +import org.apache.doris.catalog.MaterializedIndex.IndexState; +import org.apache.doris.catalog.OlapTable; +import org.apache.doris.catalog.OlapTable.OlapTableState; +import org.apache.doris.catalog.Partition; +import org.apache.doris.catalog.Replica; +import org.apache.doris.catalog.Replica.ReplicaState; +import org.apache.doris.catalog.Tablet; +import org.apache.doris.catalog.TabletInvertedIndex; +import org.apache.doris.catalog.TabletMeta; +import org.apache.doris.common.Config; +import org.apache.doris.common.FeConstants; +import org.apache.doris.common.MarkedCountDownLatch; +import org.apache.doris.common.Pair; +import org.apache.doris.common.io.Text; +import org.apache.doris.common.util.TimeUtils; +import org.apache.doris.task.AgentBatchTask; +import org.apache.doris.task.AgentTask; +import org.apache.doris.task.AgentTaskExecutor; +import org.apache.doris.task.AgentTaskQueue; +import org.apache.doris.task.AlterReplicaTask; +import org.apache.doris.task.CreateReplicaTask; +import org.apache.doris.thrift.TStorageMedium; +import org.apache.doris.thrift.TStorageType; +import org.apache.doris.thrift.TTaskType; + +import com.google.common.base.Joiner; +import com.google.common.base.Preconditions; +import com.google.common.collect.HashBasedTable; +import com.google.common.collect.Lists; +import com.google.common.collect.Maps; +import com.google.common.collect.Sets; +import com.google.common.collect.Table; +import com.google.common.collect.Table.Cell; + +import org.apache.logging.log4j.LogManager; +import org.apache.logging.log4j.Logger; + +import java.io.DataInput; +import java.io.DataOutput; +import java.io.IOException; +import java.util.List; +import java.util.Map; +import java.util.Map.Entry; +import java.util.Set; +import java.util.concurrent.TimeUnit; + +/* + * Author: Chenmingyu + * Date: Jul 8, 2019 + */ + +/* + * Version 2 of SchemaChangeJob. + * This is for replacing the old SchemaChangeJob + * https://github.com/apache/incubator-doris/issues/1429 + */ +public class SchemaChangeJobV2 extends AlterJobV2 { + private static final Logger LOG = LogManager.getLogger(SchemaChangeJobV2.class); + + // partition id -> (shadow index id -> (shadow tablet id -> origin tablet id)) + private Table> partitionIndexTabletMap = HashBasedTable.create(); + // partition id -> (shadow index id -> shadow index)) + private Table partitionIndexMap = HashBasedTable.create(); + // shadow index id -> origin index id + private Map indexIdMap = Maps.newHashMap(); + // shadow index id -> shadow index name(__doris_shadow_xxx) + private Map indexIdToName = Maps.newHashMap(); + // shadow index id -> index schema + private Map> indexSchemaMap = Maps.newHashMap(); + // shadow index id -> (shadow index schema version : schema hash) + private Map> indexSchemaVersionAndHashMap = Maps.newHashMap(); + // shadow index id -> shadow index short key count + private Map indexShortKeyMap = Maps.newHashMap(); + + // bloom filter info + private boolean hasBfChange; + private Set bfColumns = null; + private double bfFpp = 0; + + // The schema change job will wait all transactions before this txn id finished, then send the schema change tasks. + protected long watershedTxnId = -1; + + // save all schema change tasks + private AgentBatchTask schemaChangeBatchTask = new AgentBatchTask(); + + public SchemaChangeJobV2(long jobId, long dbId, long tableId, String tableName, long timeoutMs) { + super(jobId, JobType.SCHEMA_CHANGE, dbId, tableId, tableName, timeoutMs); + + } + + private SchemaChangeJobV2() { + super(JobType.SCHEMA_CHANGE); + } + + public void addTabletIdMap(long partitionId, long shadowIdxId, long shadowTabletId, long originTabletId) { + Map tabletMap = partitionIndexTabletMap.get(partitionId, shadowIdxId); + if (tabletMap == null) { + tabletMap = Maps.newHashMap(); + partitionIndexTabletMap.put(partitionId, shadowIdxId, tabletMap); + } + tabletMap.put(shadowTabletId, originTabletId); + } + + public void addPartitionShadowIndex(long partitionId, long shadowIdxId, MaterializedIndex shadowIdx) { + partitionIndexMap.put(partitionId, shadowIdxId, shadowIdx); + } + + public void addIndexSchema(long shadowIdxId, long originIdxId, + String shadowIndexName, int shadowSchemaVersion, int shadowSchemaHash, + short shadowIdxShortKeyCount, List shadowIdxSchema) { + indexIdMap.put(shadowIdxId, originIdxId); + indexIdToName.put(shadowIdxId, shadowIndexName); + indexSchemaVersionAndHashMap.put(shadowIdxId, Pair.create(shadowSchemaVersion, shadowSchemaHash)); + indexShortKeyMap.put(shadowIdxId, shadowIdxShortKeyCount); + indexSchemaMap.put(shadowIdxId, shadowIdxSchema); + } + + public void setBloomFilterInfo(boolean hasBfChange, Set bfColumns, double bfFpp) { + this.hasBfChange = hasBfChange; + this.bfColumns = bfColumns; + this.bfFpp = bfFpp; + } + + /* + * runPendingJob(): + * 1. Create all replicas of all shadow indexes and wait them finished. + * 2. After creating done, add the shadow indexes to catalog, user can not see this + * shadow index, but internal load process will generate data for these indexes. + * 3. Get a new transaction id, then set job's state to WAITING_TXN + */ + @Override + protected void runPendingJob() { + Preconditions.checkState(jobState == JobState.PENDING, jobState); + + LOG.info("begin to send create replica tasks. job: {}", jobId); + Database db = Catalog.getCurrentCatalog().getDb(dbId); + if (db == null) { + cancelImpl("Databasee " + dbId + " does not exist"); + return; + } + + // 1. create replicas + AgentBatchTask batchTask = new AgentBatchTask(); + // count total replica num + int totalReplicaNum = 0; + for (MaterializedIndex shadowIdx : partitionIndexMap.values()) { + for (Tablet tablet : shadowIdx.getTablets()) { + totalReplicaNum += tablet.getReplicas().size(); + } + } + MarkedCountDownLatch countDownLatch = new MarkedCountDownLatch(totalReplicaNum); + db.readLock(); + try { + OlapTable tbl = (OlapTable) db.getTable(tableId); + if (tbl == null) { + cancelImpl("Table " + tableId + " does not exist"); + return; + } + Preconditions.checkState(tbl.getState() == OlapTableState.SCHEMA_CHANGE); + + for (long partitionId : partitionIndexMap.rowKeySet()) { + Partition partition = tbl.getPartition(partitionId); + if (partition == null) { + continue; + } + TStorageMedium storageMedium = tbl.getPartitionInfo().getDataProperty(partitionId).getStorageMedium(); + + Map shadowIndexMap = partitionIndexMap.row(partitionId); + for (Map.Entry entry : shadowIndexMap.entrySet()) { + long shadowIdxId = entry.getKey(); + MaterializedIndex shadowIdx = entry.getValue(); + + short shadowShortKeyColumnCount = indexShortKeyMap.get(shadowIdxId); + List shadowSchema = indexSchemaMap.get(shadowIdxId); + int shadowSchemaHash = indexSchemaVersionAndHashMap.get(shadowIdxId).second; + int originSchemaHash = tbl.getSchemaHashByIndexId(indexIdMap.get(shadowIdxId)); + + for (Tablet shadowTablet : shadowIdx.getTablets()) { + long shadowTabletId = shadowTablet.getId(); + List shadowReplicas = shadowTablet.getReplicas(); + for (Replica shadowReplica : shadowReplicas) { + long backendId = shadowReplica.getBackendId(); + countDownLatch.addMark(backendId, shadowTabletId); + CreateReplicaTask createReplicaTask = new CreateReplicaTask( + backendId, dbId, tableId, partitionId, shadowIdxId, shadowTabletId, + shadowShortKeyColumnCount, shadowSchemaHash, + Partition.PARTITION_INIT_VERSION, Partition.PARTITION_INIT_VERSION_HASH, + tbl.getKeysType(), TStorageType.COLUMN, storageMedium, + shadowSchema, bfColumns, bfFpp, countDownLatch); + createReplicaTask.setBaseTablet(partitionIndexTabletMap.get(partitionId, shadowIdxId).get(shadowTabletId), originSchemaHash); + + batchTask.addTask(createReplicaTask); + } // end for rollupReplicas + } // end for rollupTablets + } + } + } finally { + db.readUnlock(); + } + + if (!FeConstants.runningUnitTest) { + // send all tasks and wait them finished + AgentTaskQueue.addBatchTask(batchTask); + AgentTaskExecutor.submit(batchTask); + // max timeout is 1 min + long timeout = Math.min(Config.tablet_create_timeout_second * 1000L * totalReplicaNum, 60000); + boolean ok = false; + try { + ok = countDownLatch.await(timeout, TimeUnit.MILLISECONDS); + } catch (InterruptedException e) { + LOG.warn("InterruptedException: ", e); + ok = false; + } + + if (!ok) { + // create replicas failed. just cancel the job + // clear tasks and show the failed replicas to user + AgentTaskQueue.removeBatchTask(batchTask, TTaskType.CREATE); + String errMsg = null; + if (!countDownLatch.getStatus().ok()) { + errMsg = countDownLatch.getStatus().getErrorMsg(); + } else { + List> unfinishedMarks = countDownLatch.getLeftMarks(); + // only show at most 3 results + List> subList = unfinishedMarks.subList(0, Math.min(unfinishedMarks.size(), 3)); + errMsg = "Error replicas:" + Joiner.on(", ").join(subList); + } + LOG.warn("failed to create replicas for job: {}, {}", jobId, errMsg); + cancelImpl("Create replicas failed. Error: " + errMsg); + return; + } + } + + // create all replicas success. + // add all shadow indexes to catalog + db.writeLock(); + try { + OlapTable tbl = (OlapTable) db.getTable(tableId); + if (tbl == null) { + cancelImpl("Table " + tableId + " does not exist"); + return; + } + Preconditions.checkState(tbl.getState() == OlapTableState.SCHEMA_CHANGE); + addShadowIndexToCatalog(tbl); + } finally { + db.writeUnlock(); + } + + this.watershedTxnId = Catalog.getCurrentGlobalTransactionMgr().getTransactionIDGenerator().getNextTransactionId(); + this.jobState = JobState.WAITING_TXN; + + // write edit log + Catalog.getCurrentCatalog().getEditLog().logAlterJob(this); + LOG.info("transfer schema change job {} state to {}, watershed txn id: {}", jobId, this.jobState, watershedTxnId); + } + + private void addShadowIndexToCatalog(OlapTable tbl) { + for (long partitionId : partitionIndexMap.rowKeySet()) { + Partition partition = tbl.getPartition(partitionId); + if (partition == null) { + continue; + } + Map shadowIndexMap = partitionIndexMap.row(partitionId); + for (MaterializedIndex shadowIndex : shadowIndexMap.values()) { + Preconditions.checkState(shadowIndex.getState() == IndexState.SHADOW, shadowIndex.getState()); + partition.createRollupIndex(shadowIndex); + } + } + + for (long shadowIdxId : indexIdMap.keySet()) { + tbl.setIndexSchemaInfo(shadowIdxId, indexIdToName.get(shadowIdxId), indexSchemaMap.get(shadowIdxId), + indexSchemaVersionAndHashMap.get(shadowIdxId).first, + indexSchemaVersionAndHashMap.get(shadowIdxId).second, + indexShortKeyMap.get(shadowIdxId)); + tbl.setStorageTypeToIndex(shadowIdxId, TStorageType.COLUMN); + } + + tbl.rebuildFullSchema(); + } + + /* + * runWaitingTxnJob(): + * 1. Wait the transactions before the watershedTxnId to be finished. + * 2. If all previous transactions finished, send schema change tasks to BE. + * 3. Change job state to RUNNING. + */ + @Override + protected void runWaitingTxnJob() { + Preconditions.checkState(jobState == JobState.WAITING_TXN, jobState); + + if (!isPreviousLoadFinished()) { + LOG.info("wait transactions before {} to be finished, schema change job: {}", watershedTxnId, jobId); + return; + } + + LOG.info("previous transactions are all finished, begin to send schema change tasks. job: {}", jobId); + Database db = Catalog.getCurrentCatalog().getDb(dbId); + if (db == null) { + cancelImpl("Databasee " + dbId + " does not exist"); + return; + } + + db.readLock(); + try { + OlapTable tbl = (OlapTable) db.getTable(tableId); + if (tbl == null) { + cancelImpl("Table " + tableId + " does not exist"); + return; + } + Preconditions.checkState(tbl.getState() == OlapTableState.SCHEMA_CHANGE); + + for (long partitionId : partitionIndexMap.rowKeySet()) { + Partition partition = tbl.getPartition(partitionId); + Preconditions.checkNotNull(partition, partitionId); + + // the schema change task will transform the data before visible version(included). + long visibleVersion = partition.getVisibleVersion(); + long visibleVersionHash = partition.getVisibleVersionHash(); + + Map shadowIndexMap = partitionIndexMap.row(partitionId); + for (Map.Entry entry : shadowIndexMap.entrySet()) { + long shadowIdxId = entry.getKey(); + MaterializedIndex shadowIdx = entry.getValue(); + + long originIdxId = indexIdMap.get(shadowIdxId); + int shadowSchemaHash = indexSchemaVersionAndHashMap.get(shadowIdxId).second; + int originSchemaHash = tbl.getSchemaHashByIndexId(indexIdMap.get(shadowIdxId)); + + for (Tablet shadowTablet : shadowIdx.getTablets()) { + long shadowTabletId = shadowTablet.getId(); + long originTabletId = partitionIndexTabletMap.get(partitionId, shadowIdxId).get(shadowTabletId); + List shadowReplicas = shadowTablet.getReplicas(); + for (Replica shadowReplica : shadowReplicas) { + AlterReplicaTask rollupTask = new AlterReplicaTask( + shadowReplica.getBackendId(), dbId, tableId, partitionId, + shadowIdxId, originIdxId, + shadowTabletId, originTabletId, shadowReplica.getId(), + shadowSchemaHash, originSchemaHash, + visibleVersion, visibleVersionHash, jobId, JobType.SCHEMA_CHANGE); + schemaChangeBatchTask.addTask(rollupTask); + } + } + } + } // end for partitions + } finally { + db.readUnlock(); + } + + AgentTaskQueue.addBatchTask(schemaChangeBatchTask); + AgentTaskExecutor.submit(schemaChangeBatchTask); + + this.jobState = JobState.RUNNING; + + // DO NOT write edit log here, tasks will be send again if FE restart or master changed. + LOG.info("transfer schema change job {} state to {}", jobId, this.jobState); + } + + /* + * runRunningJob() + * 1. Wait all schema change tasks to be finished. + * 2. Check the integrity of the newly created shadow indexes. + * 3. Replace the origin index with shadow index, and set shadow index's state as NORMAL to be visible to user. + * 4. Set job'state as FINISHED. + */ + @Override + protected void runRunningJob() { + Preconditions.checkState(jobState == JobState.RUNNING, jobState); + // must check if db or table still exist first. + // or if table is dropped, the tasks will never be finished, + // and the job will be in RUNNING state forever. + Database db = Catalog.getCurrentCatalog().getDb(dbId); + if (db == null) { + cancelImpl("Databasee " + dbId + " does not exist"); + return; + } + + db.readLock(); + try { + OlapTable tbl = (OlapTable) db.getTable(tableId); + if (tbl == null) { + cancelImpl("Table " + tableId + " does not exist"); + return; + } + } finally { + db.readUnlock(); + } + + if (!schemaChangeBatchTask.isFinished()) { + LOG.info("schema change tasks not finished. job: {}", jobId); + return; + } + + /* + * all tasks are finished. check the integrity. + * we just check whether all new replicas are healthy. + */ + db.writeLock(); + try { + OlapTable tbl = (OlapTable) db.getTable(tableId); + if (tbl == null) { + cancelImpl("Table " + tableId + " does not exist"); + return; + } + Preconditions.checkState(tbl.getState() == OlapTableState.SCHEMA_CHANGE); + + for (long partitionId : partitionIndexMap.rowKeySet()) { + Partition partition = tbl.getPartition(partitionId); + Preconditions.checkNotNull(partition, partitionId); + + long visiableVersion = partition.getVisibleVersion(); + long visiableVersionHash = partition.getVisibleVersionHash(); + short expectReplicationNum = tbl.getPartitionInfo().getReplicationNum(partition.getId()); + + Map shadowIndexMap = partitionIndexMap.row(partitionId); + for (Map.Entry entry : shadowIndexMap.entrySet()) { + MaterializedIndex shadowIdx = entry.getValue(); + + for (Tablet shadowTablet : shadowIdx.getTablets()) { + List replicas = shadowTablet.getReplicas(); + int healthyReplicaNum = 0; + for (Replica replica : replicas) { + if (replica.getLastFailedVersion() < 0 + && replica.checkVersionCatchUp(visiableVersion, visiableVersionHash, false)) { + healthyReplicaNum++; + } + } + + if (healthyReplicaNum < expectReplicationNum / 2 + 1) { + LOG.warn("shadow tablet {} has few healthy replicas: {}, schema change job: {}", + shadowTablet.getId(), replicas, jobId); + cancelImpl("shadow tablet " + shadowTablet.getId() + " has few healthy replicas"); + return; + } + } // end for tablets + } + } // end for partitions + + // all partitions are good + onFinished(tbl); + } finally { + db.writeUnlock(); + } + + this.jobState = JobState.FINISHED; + this.finishedTimeMs = System.currentTimeMillis(); + + Catalog.getCurrentCatalog().getEditLog().logAlterJob(this); + LOG.info("schema change job finished: {}", jobId); + } + + private void onFinished(OlapTable tbl) { + // replace the origin index with shadow index, set index state as NORMAL + for (Partition partition : tbl.getPartitions()) { + // drop the origin index from partitions + for (Map.Entry entry : indexIdMap.entrySet()) { + long shadowIdxId = entry.getKey(); + long originIdxId = entry.getValue(); + // get index from catalog, not from 'partitionIdToRollupIndex'. + // because if this alter job is recovered from edit log, index in 'partitionIndexMap' + // is not the same object in catalog. So modification on that index can not reflect to the index + // in catalog. + MaterializedIndex shadowIdx = partition.getIndex(shadowIdxId); + Preconditions.checkNotNull(shadowIdx, shadowIdxId); + MaterializedIndex droppedIdx = null; + if (originIdxId == partition.getBaseIndex().getId()) { + droppedIdx = partition.getBaseIndex(); + } else { + droppedIdx = partition.deleteRollupIndex(originIdxId); + } + Preconditions.checkNotNull(droppedIdx, originIdxId + " vs. " + shadowIdxId); + + // set replica state + for (Tablet tablet : shadowIdx.getTablets()) { + for (Replica replica : tablet.getReplicas()) { + replica.setState(ReplicaState.NORMAL); + } + } + + partition.visualiseShadowIndex(shadowIdxId, originIdxId == partition.getBaseIndex().getId()); + + // delete origin replicas + for (Tablet originTablet : droppedIdx.getTablets()) { + Catalog.getCurrentInvertedIndex().deleteTablet(originTablet.getId()); + } + } + } + + // update index schema info of each index + for (Map.Entry entry : indexIdMap.entrySet()) { + long shadowIdxId = entry.getKey(); + long originIdxId = entry.getValue(); + String shadowIdxName = tbl.getIndexNameById(shadowIdxId); + String originIdxName = tbl.getIndexNameById(originIdxId); + tbl.deleteIndexInfo(originIdxName); + // the shadow index name is '__doris_shadow_xxx', rename it to origin name 'xxx' + // this will also remove the prefix of columns + tbl.renameIndexForSchemaChange(shadowIdxName, originIdxName); + tbl.renameColumnNamePrefix(shadowIdxId); + + if (originIdxId == tbl.getBaseIndexId()) { + // set base index + tbl.setBaseIndexId(shadowIdxId); + } + } + // rebuild table's full schema + tbl.rebuildFullSchema(); + + // update bloom filter + if (hasBfChange) { + tbl.setBloomFilterInfo(bfColumns, bfFpp); + } + + tbl.setState(OlapTableState.NORMAL); + } + + /* + * cancelImpl() can be called any time any place. + * We need to clean any possible residual of this job. + */ + @Override + protected synchronized boolean cancelImpl(String errMsg) { + if (jobState.isFinalState()) { + return false; + } + + cancelInternal(); + + this.errMsg = errMsg; + this.finishedTimeMs = System.currentTimeMillis(); + LOG.info("cancel {} job {}, err: {}", this.type, jobId, errMsg); + Catalog.getCurrentCatalog().getEditLog().logAlterJob(this); + return true; + } + + private void cancelInternal() { + // clear tasks if has + AgentTaskQueue.removeBatchTask(schemaChangeBatchTask, TTaskType.ALTER); + // remove all shadow indexes, and set state to NORMAL + TabletInvertedIndex invertedIndex = Catalog.getCurrentInvertedIndex(); + Database db = Catalog.getCurrentCatalog().getDb(dbId); + if (db != null) { + db.writeLock(); + try { + OlapTable tbl = (OlapTable) db.getTable(tableId); + if (tbl != null) { + for (long partitionId : partitionIndexMap.rowKeySet()) { + Partition partition = tbl.getPartition(partitionId); + Preconditions.checkNotNull(partition, partitionId); + + Map shadowIndexMap = partitionIndexMap.row(partitionId); + for (Map.Entry entry : shadowIndexMap.entrySet()) { + MaterializedIndex shadowIdx = entry.getValue(); + for (Tablet shadowTablet : shadowIdx.getTablets()) { + invertedIndex.deleteTablet(shadowTablet.getId()); + } + partition.deleteRollupIndex(shadowIdx.getId()); + } + } + for (String shadowIndexName : indexIdToName.values()) { + tbl.deleteIndexInfo(shadowIndexName); + } + tbl.setState(OlapTableState.NORMAL); + } + } finally { + db.writeUnlock(); + } + } + + jobState = JobState.CANCELLED; + } + + // Check whether transactions of the given database which txnId is less than 'watershedTxnId' are finished. + protected boolean isPreviousLoadFinished() { + return Catalog.getCurrentGlobalTransactionMgr().isPreviousTransactionsFinished(watershedTxnId, dbId); + } + + public static SchemaChangeJobV2 read(DataInput in) throws IOException { + SchemaChangeJobV2 schemaChangeJob = new SchemaChangeJobV2(); + schemaChangeJob.readFields(in); + return schemaChangeJob; + } + + /* + * Replay job in PENDING state. + * Should replay all changes before this job's state transfer to PENDING. + * These changes should be same as changes in SchemaChangeHandler.createJob() + */ + private void replayPending(SchemaChangeJobV2 replayedJob) { + Database db = Catalog.getCurrentCatalog().getDb(dbId); + if (db == null) { + // database may be dropped before replaying this log. just return + return; + } + + db.writeLock(); + try { + OlapTable tbl = (OlapTable) db.getTable(tableId); + if (tbl == null) { + // table may be dropped before replaying this log. just return + return; + } + + TabletInvertedIndex invertedIndex = Catalog.getCurrentInvertedIndex(); + for (Cell cell : partitionIndexMap.cellSet()) { + long partitionId = cell.getRowKey(); + long shadowIndexId = cell.getColumnKey(); + MaterializedIndex shadowIndex = cell.getValue(); + + TStorageMedium medium = tbl.getPartitionInfo().getDataProperty(partitionId).getStorageMedium(); + TabletMeta shadowTabletMeta = new TabletMeta(dbId, tableId, partitionId, shadowIndexId, + indexSchemaVersionAndHashMap.get(shadowIndexId).second, medium); + + for (Tablet shadownTablet : shadowIndex.getTablets()) { + invertedIndex.addTablet(shadownTablet.getId(), shadowTabletMeta); + for (Replica shadowReplica : shadownTablet.getReplicas()) { + invertedIndex.addReplica(shadownTablet.getId(), shadowReplica); + } + } + } + + // set table state + tbl.setState(OlapTableState.SCHEMA_CHANGE); + } finally { + db.writeUnlock(); + } + + this.watershedTxnId = replayedJob.watershedTxnId; + jobState = JobState.WAITING_TXN; + LOG.info("replay pending schema change job: {}", jobId); + } + + /* + * Replay job in WAITING_TXN state. + * Should replay all changes in runPendingJob() + */ + private void replayWaitingTxn(SchemaChangeJobV2 replayedJob) { + Database db = Catalog.getCurrentCatalog().getDb(dbId); + if (db == null) { + // database may be dropped before replaying this log. just return + return; + } + + db.writeLock(); + try { + OlapTable tbl = (OlapTable) db.getTable(tableId); + if (tbl == null) { + // table may be dropped before replaying this log. just return + return; + } + addShadowIndexToCatalog(tbl); + } finally { + db.writeUnlock(); + } + + // should still be in WAITING_TXN state, so that the alter tasks will be resend again + this.jobState = JobState.WAITING_TXN; + this.watershedTxnId = replayedJob.watershedTxnId; + LOG.info("replay waiting txn schema change job: {}", jobId); + } + + /* + * Replay job in FINISHED state. + * Should replay all changes in runRuningJob() + */ + private void replayFinished(SchemaChangeJobV2 replayedJob) { + Database db = Catalog.getCurrentCatalog().getDb(dbId); + if (db != null) { + db.writeLock(); + try { + OlapTable tbl = (OlapTable) db.getTable(tableId); + if (tbl != null) { + onFinished(tbl); + } + } finally { + db.writeUnlock(); + } + } + jobState = JobState.FINISHED; + this.finishedTimeMs = replayedJob.finishedTimeMs; + LOG.info("replay finished schema change job: {}", jobId); + } + + /* + * Replay job in CANCELLED state. + */ + private void replayCancelled(SchemaChangeJobV2 replayedJob) { + cancelInternal(); + this.jobState = JobState.CANCELLED; + this.finishedTimeMs = replayedJob.finishedTimeMs; + this.errMsg = replayedJob.errMsg; + LOG.info("replay cancelled schema change job: {}", jobId); + } + + @Override + public void replay(AlterJobV2 replayedJob) { + SchemaChangeJobV2 replayedSchemaChangeJob = (SchemaChangeJobV2) replayedJob; + switch (replayedJob.jobState) { + case PENDING: + replayPending(replayedSchemaChangeJob); + break; + case WAITING_TXN: + replayWaitingTxn(replayedSchemaChangeJob); + break; + case FINISHED: + replayFinished(replayedSchemaChangeJob); + break; + case CANCELLED: + replayCancelled(replayedSchemaChangeJob); + break; + default: + break; + } + } + + @Override + protected void getInfo(List> infos) { + // calc progress first. all index share the same process + String progress = "N/A"; + if (jobState == JobState.RUNNING && schemaChangeBatchTask.getTaskNum() > 0) { + progress = schemaChangeBatchTask.getFinishedTaskNum() + "/" + schemaChangeBatchTask.getTaskNum(); + } + + // one line for one shadow index + for (Map.Entry entry : indexIdMap.entrySet()) { + long shadowIndexId = entry.getKey(); + List info = Lists.newArrayList(); + info.add(jobId); + info.add(tableName); + info.add(TimeUtils.longToTimeString(createTimeMs)); + info.add(TimeUtils.longToTimeString(finishedTimeMs)); + // only show the origin index name + info.add(indexIdToName.get(shadowIndexId).substring(SchemaChangeHandler.SHADOW_NAME_PRFIX.length())); + info.add(shadowIndexId); + info.add(entry.getValue()); + info.add(indexSchemaVersionAndHashMap.get(shadowIndexId).toString()); + info.add(watershedTxnId); + info.add(jobState.name()); + info.add(errMsg); + info.add(progress); + info.add(timeoutMs / 1000); + infos.add(info); + } + } + + public List> getUnfinishedTasks(int limit) { + List> taskInfos = Lists.newArrayList(); + if (jobState == JobState.RUNNING) { + List tasks = schemaChangeBatchTask.getUnfinishedTasks(limit); + for (AgentTask agentTask : tasks) { + AlterReplicaTask alterTask = (AlterReplicaTask) agentTask; + List info = Lists.newArrayList(); + info.add(String.valueOf(alterTask.getBackendId())); + info.add(String.valueOf(alterTask.getBaseTabletId())); + info.add(String.valueOf(alterTask.getSignature())); + taskInfos.add(info); + } + } + return taskInfos; + } + + @Override + public void write(DataOutput out) throws IOException { + super.write(out); + + out.writeInt(partitionIndexTabletMap.rowKeySet().size()); + for (Long partitionId : partitionIndexTabletMap.rowKeySet()) { + out.writeLong(partitionId); + Map> indexTabletMap = partitionIndexTabletMap.row(partitionId); + out.writeInt(indexTabletMap.size()); + for (Long shadowIndexId : indexTabletMap.keySet()) { + out.writeLong(shadowIndexId); + // tablet id map + Map tabletMap = indexTabletMap.get(shadowIndexId); + out.writeInt(tabletMap.size()); + for (Map.Entry entry : tabletMap.entrySet()) { + out.writeLong(entry.getKey()); + out.writeLong(entry.getValue()); + } + // shadow index + MaterializedIndex shadowIndex = partitionIndexMap.get(partitionId, shadowIndexId); + shadowIndex.write(out); + } + } + + // shadow index info + out.writeInt(indexIdMap.size()); + for (Map.Entry entry : indexIdMap.entrySet()) { + long shadowIndexId = entry.getKey(); + out.writeLong(shadowIndexId); + // index id map + out.writeLong(entry.getValue()); + // index name + Text.writeString(out, indexIdToName.get(shadowIndexId)); + // index schema + out.writeInt(indexSchemaMap.get(shadowIndexId).size()); + for (Column column : indexSchemaMap.get(shadowIndexId)) { + column.write(out); + } + // index schema version and hash + out.writeInt(indexSchemaVersionAndHashMap.get(shadowIndexId).first); + out.writeInt(indexSchemaVersionAndHashMap.get(shadowIndexId).second); + // short key count + out.writeShort(indexShortKeyMap.get(shadowIndexId)); + } + + // bloom filter + out.writeBoolean(hasBfChange); + if (hasBfChange) { + out.writeInt(bfColumns.size()); + for (String bfCol : bfColumns) { + Text.writeString(out, bfCol); + } + out.writeDouble(bfFpp); + } + + out.writeLong(watershedTxnId); + } + + @Override + public void readFields(DataInput in) throws IOException { + super.readFields(in); + + int partitionNum = in.readInt(); + for (int i = 0; i < partitionNum; i++) { + long partitionId = in.readLong(); + int indexNum = in.readInt(); + for (int j = 0; j < indexNum; j++) { + long shadowIndexId = in.readLong(); + int tabletNum = in.readInt(); + Map tabletMap = Maps.newHashMapWithExpectedSize(tabletNum); + for (int k = 0; k < tabletNum; k++) { + long shadowTabletId = in.readLong(); + long originTabletId = in.readLong(); + tabletMap.put(shadowTabletId, originTabletId); + } + partitionIndexTabletMap.put(partitionId, shadowIndexId, tabletMap); + // shadow index + MaterializedIndex shadowIndex = MaterializedIndex.read(in); + partitionIndexMap.put(partitionId, shadowIndexId, shadowIndex); + } + } + + // shadow index info + int indexNum = in.readInt(); + for (int i = 0; i < indexNum; i++) { + long shadowIndexId = in.readLong(); + long originIndexId = in.readLong(); + String indexName = Text.readString(in); + // index schema + int colNum = in.readInt(); + List schema = Lists.newArrayListWithCapacity(colNum); + for (int j = 0; j < colNum; j++) { + schema.add(Column.read(in)); + } + int schemaVersion = in.readInt(); + int schemaVersionHash = in.readInt(); + Pair schemaVersionAndHash = Pair.create(schemaVersion, schemaVersionHash); + short shortKeyCount = in.readShort(); + + indexIdMap.put(shadowIndexId, originIndexId); + indexIdToName.put(shadowIndexId, indexName); + indexSchemaMap.put(shadowIndexId, schema); + indexSchemaVersionAndHashMap.put(shadowIndexId, schemaVersionAndHash); + indexShortKeyMap.put(shadowIndexId, shortKeyCount); + } + + // bloom filter + hasBfChange = in.readBoolean(); + if (hasBfChange) { + int bfNum = in.readInt(); + bfColumns = Sets.newHashSetWithExpectedSize(bfNum); + for (int i = 0; i < bfNum; i++) { + bfColumns.add(Text.readString(in)); + } + bfFpp = in.readDouble(); + } + + watershedTxnId = in.readLong(); + } +} diff --git a/fe/src/main/java/org/apache/doris/alter/SystemHandler.java b/fe/src/main/java/org/apache/doris/alter/SystemHandler.java index 470488e628a79b..ba237ec510c4a7 100644 --- a/fe/src/main/java/org/apache/doris/alter/SystemHandler.java +++ b/fe/src/main/java/org/apache/doris/alter/SystemHandler.java @@ -38,8 +38,8 @@ import org.apache.doris.catalog.PartitionInfo; import org.apache.doris.catalog.Table; import org.apache.doris.catalog.Table.TableType; -import org.apache.doris.common.Config; import org.apache.doris.common.AnalysisException; +import org.apache.doris.common.Config; import org.apache.doris.common.DdlException; import org.apache.doris.common.MetaNotFoundException; import org.apache.doris.common.Pair; diff --git a/fe/src/main/java/org/apache/doris/analysis/AddRollupClause.java b/fe/src/main/java/org/apache/doris/analysis/AddRollupClause.java index e674d32df0c4bb..454cf31cb72d1d 100644 --- a/fe/src/main/java/org/apache/doris/analysis/AddRollupClause.java +++ b/fe/src/main/java/org/apache/doris/analysis/AddRollupClause.java @@ -18,10 +18,12 @@ package org.apache.doris.analysis; import org.apache.doris.common.AnalysisException; +import org.apache.doris.common.Config; import org.apache.doris.common.ErrorCode; import org.apache.doris.common.ErrorReport; import org.apache.doris.common.FeNameFormat; import org.apache.doris.common.io.Text; +import org.apache.doris.common.util.PropertyAnalyzer; import com.google.common.base.Strings; import com.google.common.collect.Lists; @@ -43,6 +45,8 @@ public class AddRollupClause extends AlterClause { private List columnNames; private String baseRollupName; private List dupKeys; + private long timeoutSecond; + private Map properties; public AddRollupClause() { @@ -66,6 +70,10 @@ public String getBaseRollupName() { return baseRollupName; } + public long getTimeoutSecond() { + return timeoutSecond; + } + public AddRollupClause(String rollupName, List columnNames, List dupKeys, String baseRollupName, Map properties) { @@ -93,6 +101,8 @@ public void analyze(Analyzer analyzer) throws AnalysisException { } } baseRollupName = Strings.emptyToNull(baseRollupName); + + timeoutSecond = PropertyAnalyzer.analyzeTimeout(properties, Config.alter_table_timeout_second); } @Override diff --git a/fe/src/main/java/org/apache/doris/analysis/Analyzer.java b/fe/src/main/java/org/apache/doris/analysis/Analyzer.java index 5d3ec0d05a4894..2b3bae98e8bed0 100644 --- a/fe/src/main/java/org/apache/doris/analysis/Analyzer.java +++ b/fe/src/main/java/org/apache/doris/analysis/Analyzer.java @@ -32,6 +32,7 @@ import org.apache.doris.common.ErrorCode; import org.apache.doris.common.ErrorReport; import org.apache.doris.common.IdGenerator; +import org.apache.doris.common.util.TimeUtils; import org.apache.doris.planner.PlanNode; import org.apache.doris.qe.ConnectContext; import org.apache.doris.rewrite.BetweenToCompoundRule; @@ -138,6 +139,8 @@ public class Analyzer { private TupleId visibleSemiJoinedTupleId_ = null; // for some situation that udf is not allowed. private boolean isUDFAllowed = true; + // timezone specified for some operation, such as broker load + private String timezone = TimeUtils.DEFAULT_TIME_ZONE; public void setIsSubquery() { isSubquery = true; @@ -150,6 +153,8 @@ public void setIsSubquery() { public void setUDFAllowed(boolean val) { this.isUDFAllowed = val; } public boolean isUDFAllowed() { return this.isUDFAllowed; } + public void setTimezone(String timezone) { this.timezone = timezone; } + public String getTimezone() { return timezone; } // state shared between all objects of an Analyzer tree // TODO: Many maps here contain properties about tuples, e.g., whether diff --git a/fe/src/main/java/org/apache/doris/analysis/DataDescription.java b/fe/src/main/java/org/apache/doris/analysis/DataDescription.java index cb6c9d1dff0423..ff8617706017c8 100644 --- a/fe/src/main/java/org/apache/doris/analysis/DataDescription.java +++ b/fe/src/main/java/org/apache/doris/analysis/DataDescription.java @@ -22,6 +22,7 @@ import org.apache.doris.catalog.Column; import org.apache.doris.catalog.Type; import org.apache.doris.common.AnalysisException; +import org.apache.doris.common.DdlException; import org.apache.doris.common.ErrorCode; import org.apache.doris.common.ErrorReport; import org.apache.doris.common.Pair; @@ -35,6 +36,7 @@ import com.google.common.base.Strings; import com.google.common.collect.Lists; import com.google.common.collect.Maps; +import com.google.common.collect.Sets; import org.apache.logging.log4j.LogManager; import org.apache.logging.log4j.Logger; @@ -65,34 +67,44 @@ public class DataDescription { private static final Logger LOG = LogManager.getLogger(DataDescription.class); public static String FUNCTION_HASH_HLL = "hll_hash"; - private static final List hadoopSupportFunctionName = Arrays.asList("strftime", "time_format", + private static final List HADOOP_SUPPORT_FUNCTION_NAMES = Arrays.asList( + "strftime", + "time_format", "alignment_timestamp", - "default_value", "md5sum", - "replace_value", "now", - "hll_hash"); + "default_value", + "md5sum", + "replace_value", + "now", + "hll_hash", + "substitute"); + private final String tableName; private final List partitionNames; private final List filePaths; - // the column name list of data desc - private final List columns; private final ColumnSeparator columnSeparator; private final String fileFormat; - private final List columnsFromPath; private final boolean isNegative; + + // column names of source files + private List fileFieldNames; + // column names in the path + private final List columnsFromPath; + // save column mapping in SET(xxx = xxx) clause private final List columnMappingList; // Used for mini load private TNetworkAddress beAddr; private String lineDelimiter; - // This param only include the hadoop function which need to be checked in the future. - // For hadoop load, this param is also used to persistence. - private Map>> columnToHadoopFunction; - /** - * Merged from columns and columnMappingList - * ImportColumnDesc: column name to expr or null - **/ + // Merged from fileFieldNames, columnsFromPath and columnMappingList + // ImportColumnDesc: column name to (expr or null) private List parsedColumnExprList = Lists.newArrayList(); + /* + * This param only include the hadoop function which need to be checked in the future. + * For hadoop load, this param is also used to persistence. + * The function in this param is copied from 'parsedColumnExprList' + */ + private Map>> columnToHadoopFunction = Maps.newTreeMap(String.CASE_INSENSITIVE_ORDER); private boolean isHadoopLoad = false; @@ -119,7 +131,7 @@ public DataDescription(String tableName, this.tableName = tableName; this.partitionNames = partitionNames; this.filePaths = filePaths; - this.columns = columns; + this.fileFieldNames = columns; this.columnSeparator = columnSeparator; this.fileFormat = fileFormat; this.columnsFromPath = columnsFromPath; @@ -139,12 +151,11 @@ public List getFilePaths() { return filePaths; } - // only return the column names of SlotRef in columns - public List getColumnNames() { - if (columns == null || columns.isEmpty()) { + public List getFileFieldNames() { + if (fileFieldNames == null || fileFieldNames.isEmpty()) { return null; } - return columns; + return fileFieldNames; } public String getFileFormat() { @@ -182,14 +193,11 @@ public void setLineDelimiter(String lineDelimiter) { this.lineDelimiter = lineDelimiter; } + @Deprecated public void addColumnMapping(String functionName, Pair> pair) { - if (Strings.isNullOrEmpty(functionName) || pair == null) { return; } - if (columnToHadoopFunction == null) { - columnToHadoopFunction = Maps.newTreeMap(String.CASE_INSENSITIVE_ORDER); - } columnToHadoopFunction.put(functionName, pair); } @@ -209,49 +217,63 @@ public boolean isHadoopLoad() { return isHadoopLoad; } - /** - * Analyze parsedExprMap and columnToHadoopFunction from columns and columnMappingList - * Example: columns (col1, tmp_col2, tmp_col3) set (col2=tmp_col2+1, col3=strftime("%Y-%m-%d %H:%M:%S", tmp_col3)) - * Result: parsedExprMap = {"col1": null, "tmp_col2": null, "tmp_col3": null, - * "col2": "tmp_col2+1", "col3": "strftime("%Y-%m-%d %H:%M:%S", tmp_col3)"} + /* + * Analyze parsedExprMap and columnToHadoopFunction from columns, columns from path and columnMappingList + * Example: + * columns (col1, tmp_col2, tmp_col3) + * columns from path as (col4, col5) + * set (col2=tmp_col2+1, col3=strftime("%Y-%m-%d %H:%M:%S", tmp_col3)) + * + * Result: + * parsedExprMap = {"col1": null, "tmp_col2": null, "tmp_col3": null, "col4": null, "col5": null, + * "col2": "tmp_col2+1", "col3": "strftime("%Y-%m-%d %H:%M:%S", tmp_col3)"} + * columnToHadoopFunction = {"col3": "strftime("%Y-%m-%d %H:%M:%S", tmp_col3)"} */ private void analyzeColumns() throws AnalysisException { - if (columns == null && columnsFromPath != null) { + if ((fileFieldNames == null || fileFieldNames.isEmpty()) && (columnsFromPath != null && !columnsFromPath.isEmpty())) { throw new AnalysisException("Can not specify columns_from_path without column_list"); } - List columnList = Lists.newArrayList(); - if (columns != null) { - columnList.addAll(columns); - if (columnsFromPath != null) { - columnList.addAll(columnsFromPath); - } - } - if (columnList.isEmpty()) { - return; - } - // merge columns exprs from columns and columnMappingList - // used to check duplicated column name + + // used to check duplicated column name in COLUMNS and COLUMNS FROM PATH Set columnNames = new TreeSet<>(String.CASE_INSENSITIVE_ORDER); - // Order of parsedColumnExprList: columns(fileFieldNames) + columnsFromPath - // Step1: analyze columns - for (String columnName : columnList) { - if (!columnNames.add(columnName)) { - throw new AnalysisException("Duplicate column : " + columnName); + + // merge columns exprs from columns, columns from path and columnMappingList + // 1. analyze columns + if (fileFieldNames != null && !fileFieldNames.isEmpty()) { + for (String columnName : fileFieldNames) { + if (!columnNames.add(columnName)) { + throw new AnalysisException("Duplicate column: " + columnName); + } + ImportColumnDesc importColumnDesc = new ImportColumnDesc(columnName, null); + parsedColumnExprList.add(importColumnDesc); } - ImportColumnDesc importColumnDesc = new ImportColumnDesc(columnName, null); - parsedColumnExprList.add(importColumnDesc); } + // 2. analyze columns from path + if (columnsFromPath != null && !columnsFromPath.isEmpty()) { + if (isHadoopLoad) { + throw new AnalysisException("Hadoop load does not support specifying columns from path"); + } + for (String columnName : columnsFromPath) { + if (!columnNames.add(columnName)) { + throw new AnalysisException("Duplicate column: " + columnName); + } + ImportColumnDesc importColumnDesc = new ImportColumnDesc(columnName, null); + parsedColumnExprList.add(importColumnDesc); + } + } + // 3: analyze column mapping if (columnMappingList == null || columnMappingList.isEmpty()) { return; } + + // used to check duplicated column name in SET clause + Set columnMappingNames = new TreeSet<>(String.CASE_INSENSITIVE_ORDER); // Step2: analyze column mapping // the column expr only support the SlotRef or eq binary predicate which's child(0) must be a SloRef. // the duplicate column name of SloRef is forbidden. - columnToHadoopFunction = Maps.newTreeMap(String.CASE_INSENSITIVE_ORDER); for (Expr columnExpr : columnMappingList) { - if (!(columnExpr instanceof BinaryPredicate)) { throw new AnalysisException("Mapping function expr only support the column or eq binary predicate. " + "Expr: " + columnExpr.toSql()); @@ -267,7 +289,7 @@ private void analyzeColumns() throws AnalysisException { + "The mapping column error. column: " + child0.toSql()); } String column = ((SlotRef) child0).getColumnName(); - if (!columnNames.add(column)) { + if (!columnMappingNames.add(column)) { throw new AnalysisException("Duplicate column mapping: " + column); } // hadoop load only supports the FunctionCallExpr @@ -288,11 +310,12 @@ private void analyzeColumnToHadoopFunction(String columnName, Expr child1) throw Preconditions.checkState(child1 instanceof FunctionCallExpr); FunctionCallExpr functionCallExpr = (FunctionCallExpr) child1; String functionName = functionCallExpr.getFnName().getFunction(); - if (!hadoopSupportFunctionName.contains(functionName.toLowerCase())) { + if (!HADOOP_SUPPORT_FUNCTION_NAMES.contains(functionName.toLowerCase())) { return; } List paramExprs = functionCallExpr.getParams().exprs(); List args = Lists.newArrayList(); + for (Expr paramExpr : paramExprs) { if (paramExpr instanceof SlotRef) { SlotRef slot = (SlotRef) paramExpr; @@ -303,10 +326,8 @@ private void analyzeColumnToHadoopFunction(String columnName, Expr child1) throw } else if (paramExpr instanceof NullLiteral) { args.add(null); } else { - if (isHadoopLoad) { - throw new AnalysisException("Mapping function args error, arg: " + paramExpr.toSql()); - } - continue; + // hadoop function only support slot, string and null parameters + throw new AnalysisException("Mapping function args error, arg: " + paramExpr.toSql()); } } @@ -333,6 +354,8 @@ public static void validateMappingFunction(String functionName, List arg validateHllHash(args, columnNameMap); } else if (functionName.equalsIgnoreCase("now")) { validateNowFunction(mappingColumn); + } else if (functionName.equalsIgnoreCase("substitute")) { + validateSubstituteFunction(args, columnNameMap); } else { if (isHadoopLoad) { throw new AnalysisException("Unknown function: " + functionName); @@ -340,6 +363,22 @@ public static void validateMappingFunction(String functionName, List arg } } + // eg: k2 = substitute(k1) + // this is used for creating derivative column from existing column + private static void validateSubstituteFunction(List args, Map columnNameMap) + throws AnalysisException { + if (args.size() != 1) { + throw new AnalysisException("Should has only one argument: " + args); + } + + String argColumn = args.get(0); + if (!columnNameMap.containsKey(argColumn)) { + throw new AnalysisException("Column is not in sources, column: " + argColumn); + } + + args.set(0, columnNameMap.get(argColumn)); + } + private static void validateAlignmentTimestamp(List args, Map columnNameMap) throws AnalysisException { if (args.size() != 2) { @@ -503,6 +542,49 @@ public void analyzeWithoutCheckPriv() throws AnalysisException { analyzeColumns(); } + /* + * If user does not specify COLUMNS in load stmt, we fill it here. + * eg1: + * both COLUMNS and SET clause is empty. after fill: + * (k1,k2,k3) + * + * eg2: + * COLUMNS is empty, SET is not empty + * SET ( k2 = default_value("2") ) + * after fill: + * (k1, k2, k3) + * SET ( k2 = default_value("2") ) + * + * eg3: + * COLUMNS is empty, SET is not empty + * SET (k2 = strftime("%Y-%m-%d %H:%M:%S", k2) + * after fill: + * (k1,k2,k3) + * SET (k2 = strftime("%Y-%m-%d %H:%M:%S", k2) + * + */ + public void fillColumnInfoIfNotSpecified(List baseSchema) throws DdlException { + if (fileFieldNames != null && !fileFieldNames.isEmpty()) { + return; + } + + fileFieldNames = Lists.newArrayList(); + + Set mappingColNames = Sets.newTreeSet(String.CASE_INSENSITIVE_ORDER); + for (ImportColumnDesc importColumnDesc : parsedColumnExprList) { + mappingColNames.add(importColumnDesc.getColumnName()); + } + + for (Column column : baseSchema) { + if (!mappingColNames.contains(column.getName())) { + parsedColumnExprList.add(new ImportColumnDesc(column.getName(), null)); + } + fileFieldNames.add(column.getName()); + } + + LOG.debug("after fill column info. columns: {}, parsed column exprs: {}", fileFieldNames, parsedColumnExprList); + } + public String toSql() { StringBuilder sb = new StringBuilder(); sb.append("DATA INFILE ("); @@ -527,9 +609,9 @@ public String apply(String s) { sb.append(" COLUMNS FROM PATH AS ("); Joiner.on(", ").appendTo(sb, columnsFromPath).append(")"); } - if (columns != null && !columns.isEmpty()) { + if (fileFieldNames != null && !fileFieldNames.isEmpty()) { sb.append(" ("); - Joiner.on(", ").appendTo(sb, columns).append(")"); + Joiner.on(", ").appendTo(sb, fileFieldNames).append(")"); } if (columnMappingList != null && !columnMappingList.isEmpty()) { sb.append(" SET ("); diff --git a/fe/src/main/java/org/apache/doris/analysis/InsertStmt.java b/fe/src/main/java/org/apache/doris/analysis/InsertStmt.java index ff942473991f5f..9f3e4f0b31ae3a 100644 --- a/fe/src/main/java/org/apache/doris/analysis/InsertStmt.java +++ b/fe/src/main/java/org/apache/doris/analysis/InsertStmt.java @@ -17,6 +17,7 @@ package org.apache.doris.analysis; +import org.apache.doris.alter.SchemaChangeHandler; import org.apache.doris.catalog.AggregateType; import org.apache.doris.catalog.BrokerTable; import org.apache.doris.catalog.Catalog; @@ -30,6 +31,7 @@ import org.apache.doris.catalog.Table; import org.apache.doris.catalog.Type; import org.apache.doris.common.AnalysisException; +import org.apache.doris.common.DdlException; import org.apache.doris.common.ErrorCode; import org.apache.doris.common.ErrorReport; import org.apache.doris.common.UserException; @@ -42,6 +44,7 @@ import org.apache.doris.rewrite.ExprRewriter; import org.apache.doris.service.FrontendOptions; import org.apache.doris.thrift.TUniqueId; +import org.apache.doris.transaction.TransactionState; import org.apache.doris.transaction.TransactionState.LoadJobSourceType; import com.google.common.base.Joiner; @@ -308,7 +311,7 @@ private void analyzeTargetTable(Analyzer analyzer) throws AnalysisException { // need a descriptor DescriptorTable descTable = analyzer.getDescTbl(); olapTuple = descTable.createTupleDescriptor(); - for (Column col : olapTable.getBaseSchema()) { + for (Column col : olapTable.getFullSchema()) { SlotDescriptor slotDesc = descTable.addSlotDescriptor(olapTuple); slotDesc.setIsMaterialized(true); slotDesc.setType(col.getType()); @@ -356,10 +359,12 @@ private void checkColumnCoverage(Set mentionedCols, List baseCol } } - public void analyzeSubquery(Analyzer analyzer) throws UserException { + private void analyzeSubquery(Analyzer analyzer) throws UserException { // Analyze columns mentioned in the statement. Set mentionedColumns = Sets.newTreeSet(String.CASE_INSENSITIVE_ORDER); if (targetColumnNames == null) { + // the mentioned columns are columns which are visible to user, so here we use + // getBaseSchema(), not getFullSchema() for (Column col : targetTable.getBaseSchema()) { mentionedColumns.add(col.getName()); targetColumns.add(col); @@ -377,6 +382,34 @@ public void analyzeSubquery(Analyzer analyzer) throws UserException { } } + /* + * When doing schema change, there may be some shadow columns. we should add + * them to the end of targetColumns. And use 'origColIdxsForShadowCols' to save + * the index of column in 'targetColumns' which the shadow column related to. + * eg: origin targetColumns: (A,B,C), shadow column: __doris_shadow_B after + * processing, targetColumns: (A, B, C, __doris_shadow_B), and + * origColIdxsForShadowCols has 1 element: "1", which is the index of column B + * in targetColumns. + * + * Rule A: If the column which the shadow column related to is not mentioned, + * then do not add the shadow column to targetColumns. They will be filled by + * null or default value when loading. + */ + List origColIdxsForShadowCols = Lists.newArrayList(); + for (Column column : targetTable.getFullSchema()) { + if (column.isNameWithPrefix(SchemaChangeHandler.SHADOW_NAME_PRFIX)) { + String origName = Column.removeNamePrefix(column.getName()); + for (int i = 0; i < targetColumns.size(); i++) { + if (targetColumns.get(i).nameEquals(origName, false)) { + // Rule A + origColIdxsForShadowCols.add(i); + targetColumns.add(column); + break; + } + } + } + } + // parse query statement queryStmt.setFromInsert(true); queryStmt.analyze(analyzer); @@ -388,22 +421,46 @@ public void analyzeSubquery(Analyzer analyzer) throws UserException { // Check if all columns mentioned is enough checkColumnCoverage(mentionedColumns, targetTable.getBaseSchema()) ; + + // handle VALUES() or SELECT constant list if (queryStmt instanceof SelectStmt && ((SelectStmt) queryStmt).getTableRefs().isEmpty()) { SelectStmt selectStmt = (SelectStmt) queryStmt; if (selectStmt.getValueList() != null) { + // INSERT INTO VALUES(...) List> rows = selectStmt.getValueList().getRows(); for (int rowIdx = 0; rowIdx < rows.size(); ++rowIdx) { - analyzeRow(analyzer, targetColumns, rows.get(rowIdx), rowIdx + 1); + analyzeRow(analyzer, targetColumns, rows, rowIdx, origColIdxsForShadowCols); } - for (int i = 0; i < selectStmt.getResultExprs().size(); ++i) { - selectStmt.getResultExprs().set(i, selectStmt.getValueList().getFirstRow().get(i)); - selectStmt.getBaseTblResultExprs().set(i, selectStmt.getValueList().getFirstRow().get(i)); + + // clear these 2 structures, rebuild them using VALUES exprs + selectStmt.getResultExprs().clear(); + selectStmt.getBaseTblResultExprs().clear(); + + for (int i = 0; i < selectStmt.getValueList().getFirstRow().size(); ++i) { + selectStmt.getResultExprs().add(selectStmt.getValueList().getFirstRow().get(i)); + selectStmt.getBaseTblResultExprs().add(selectStmt.getValueList().getFirstRow().get(i)); } } else { - analyzeRow(analyzer, targetColumns, selectStmt.getResultExprs(), 1); + // INSERT INTO SELECT 1,2,3 ... + List> rows = Lists.newArrayList(); + rows.add(selectStmt.getResultExprs()); + analyzeRow(analyzer, targetColumns, rows, 0, origColIdxsForShadowCols); + // rows may be changed in analyzeRow(), so rebuild the result exprs + selectStmt.getResultExprs().clear(); + for (Expr expr : rows.get(0)) { + selectStmt.getResultExprs().add(expr); + } } isStreaming = true; } else { + // INSERT INTO SELECT ... FROM tbl + if (!origColIdxsForShadowCols.isEmpty()) { + // extend the result expr by duplicating the related exprs + for (Integer idx : origColIdxsForShadowCols) { + queryStmt.getResultExprs().add(queryStmt.getResultExprs().get(idx)); + } + } + // check compatibility for (int i = 0; i < targetColumns.size(); ++i) { Column column = targetColumns.get(i); if (column.getType().isHllType()) { @@ -417,14 +474,66 @@ public void analyzeSubquery(Analyzer analyzer) throws UserException { } } } + + // expand baseTblResultExprs and colLabels in QueryStmt + if (!origColIdxsForShadowCols.isEmpty()) { + if (queryStmt.getResultExprs().size() != queryStmt.getBaseTblResultExprs().size()) { + for (Integer idx : origColIdxsForShadowCols) { + queryStmt.getBaseTblResultExprs().add(queryStmt.getBaseTblResultExprs().get(idx)); + } + } + + if (queryStmt.getResultExprs().size() != queryStmt.getColLabels().size()) { + for (Integer idx : origColIdxsForShadowCols) { + queryStmt.getColLabels().add(queryStmt.getColLabels().get(idx)); + } + } + } + + if (LOG.isDebugEnabled()) { + for (Expr expr : queryStmt.getResultExprs()) { + LOG.debug("final result expr: {}, {}", expr, System.identityHashCode(expr)); + } + for (Expr expr : queryStmt.getBaseTblResultExprs()) { + LOG.debug("final base table result expr: {}, {}", expr, System.identityHashCode(expr)); + } + for (String colLabel : queryStmt.getColLabels()) { + LOG.debug("final col label: {}", colLabel); + } + } } - private void analyzeRow(Analyzer analyzer, List targetColumns, ArrayList row, int rowIdx) - throws AnalysisException { + private void analyzeRow(Analyzer analyzer, List targetColumns, List> rows, + int rowIdx, List origColIdxsForShadowCols) throws AnalysisException { // 1. check number of fields if equal with first row - if (row.size() != targetColumns.size()) { - throw new AnalysisException("Column count doesn't match value count at row " + rowIdx); + // targetColumns contains some shadow columns, which is added by system, + // so we should minus this + if (rows.get(rowIdx).size() != targetColumns.size() - origColIdxsForShadowCols.size()) { + throw new AnalysisException("Column count doesn't match value count at row " + (rowIdx + 1)); } + + ArrayList row = rows.get(rowIdx); + if (!origColIdxsForShadowCols.isEmpty()) { + /* + * we should extends the row for shadow columns. + * eg: + * the origin row has exprs: (expr1, expr2, expr3), and targetColumns is (A, B, C, __doris_shadow_b) + * after processing, extentedRow is (expr1, expr2, expr3, expr2) + */ + ArrayList extentedRow = Lists.newArrayList(); + for (Expr expr : row) { + extentedRow.add(expr); + } + + for (Integer idx : origColIdxsForShadowCols) { + extentedRow.add(extentedRow.get(idx)); + } + + row = extentedRow; + rows.set(rowIdx, row); + } + + // check the compatibility of expr in row and column in targetColumns for (int i = 0; i < row.size(); ++i) { Expr expr = row.get(i); Column col = targetColumns.get(i); @@ -545,7 +654,7 @@ public void prepareExpressions() throws UserException { exprByName.put(col.getName(), expr); } // reorder resultExprs in table column order - for (Column col : targetTable.getBaseSchema()) { + for (Column col : targetTable.getFullSchema()) { if (exprByName.containsKey(col.getName())) { resultExprs.add(exprByName.get(col.getName())); } else { @@ -595,6 +704,12 @@ public DataSink createDataSink() throws AnalysisException { public void finalize() throws UserException { if (targetTable instanceof OlapTable) { ((OlapTableSink) dataSink).finalize(); + // add table indexes to transaction state + TransactionState txnState = Catalog.getCurrentGlobalTransactionMgr().getTransactionState(transactionId); + if (txnState == null) { + throw new DdlException("txn does not exist: " + transactionId); + } + txnState.addTableIndexes((OlapTable) targetTable); } } diff --git a/fe/src/main/java/org/apache/doris/analysis/LoadStmt.java b/fe/src/main/java/org/apache/doris/analysis/LoadStmt.java index 99a2923549bc1a..957e0dfff87661 100644 --- a/fe/src/main/java/org/apache/doris/analysis/LoadStmt.java +++ b/fe/src/main/java/org/apache/doris/analysis/LoadStmt.java @@ -21,6 +21,7 @@ import org.apache.doris.common.DdlException; import org.apache.doris.common.UserException; import org.apache.doris.common.util.PrintableMap; +import org.apache.doris.common.util.TimeUtils; import org.apache.doris.load.Load; import org.apache.doris.qe.ConnectContext; @@ -60,6 +61,7 @@ public class LoadStmt extends DdlStmt { public static final String CLUSTER_PROPERTY = "cluster"; private static final String VERSION = "version"; public static final String STRICT_MODE = "strict_mode"; + public static final String TIMEZONE = "timezone"; // for load data from Baidu Object Store(BOS) public static final String BOS_ENDPOINT = "bos_endpoint"; @@ -92,6 +94,7 @@ public class LoadStmt extends DdlStmt { .add(CLUSTER_PROPERTY) .add(STRICT_MODE) .add(VERSION) + .add(TIMEZONE) .build(); public LoadStmt(LabelName label, List dataDescriptions, @@ -195,15 +198,20 @@ public static void checkProperties(Map properties) throws DdlExc } } + // time zone + final String timezone = properties.get(TIMEZONE); + if (timezone != null) { + TimeUtils.checkTimeZoneValid(timezone); + } } - private void analyzeVersion() { + private void analyzeVersion() throws AnalysisException { if (properties == null) { return; } final String versionProperty = properties.get(VERSION); if (versionProperty != null) { - version = Load.VERSION; + throw new AnalysisException("Do not support VERSION property"); } } diff --git a/fe/src/main/java/org/apache/doris/analysis/ShowAlterStmt.java b/fe/src/main/java/org/apache/doris/analysis/ShowAlterStmt.java index 1bba28f30e2a6c..c7fe7880716bbc 100644 --- a/fe/src/main/java/org/apache/doris/analysis/ShowAlterStmt.java +++ b/fe/src/main/java/org/apache/doris/analysis/ShowAlterStmt.java @@ -87,7 +87,6 @@ public void analyze(Analyzer analyzer) throws AnalysisException, UserException { Preconditions.checkNotNull(type); // check auth when get job info - handleShowAlterTable(analyzer); } diff --git a/fe/src/main/java/org/apache/doris/analysis/ShowTabletStmt.java b/fe/src/main/java/org/apache/doris/analysis/ShowTabletStmt.java index 37b2d6c7179be6..c3cbd8d7f14f39 100644 --- a/fe/src/main/java/org/apache/doris/analysis/ShowTabletStmt.java +++ b/fe/src/main/java/org/apache/doris/analysis/ShowTabletStmt.java @@ -63,10 +63,12 @@ public ShowTabletStmt(TableName dbTableName, long tabletId, List partiti this.dbName = null; this.tableName = null; this.isShowSingleTablet = true; + this.indexName = null; } else { this.dbName = dbTableName.getDb(); this.tableName = dbTableName.getTbl(); this.isShowSingleTablet = false; + this.indexName = Strings.emptyToNull(indexName); } this.tabletId = tabletId; this.partitionNames = partitionNames; @@ -251,7 +253,7 @@ public String toSql() { if (isShowSingleTablet) { sb.append(tabletId); } else { - sb.append(" from ").append("`").append(dbName).append("`.`").append(tableName).append("`"); + sb.append(" FROM ").append("`").append(dbName).append("`.`").append(tableName).append("`"); } if (limitElement != null) { if (limitElement.hasOffset() && limitElement.hasLimit()) { diff --git a/fe/src/main/java/org/apache/doris/analysis/SlotDescriptor.java b/fe/src/main/java/org/apache/doris/analysis/SlotDescriptor.java index 96fd0de5c158e7..1d2feb5e9374ec 100644 --- a/fe/src/main/java/org/apache/doris/analysis/SlotDescriptor.java +++ b/fe/src/main/java/org/apache/doris/analysis/SlotDescriptor.java @@ -21,6 +21,7 @@ import org.apache.doris.catalog.ColumnStats; import org.apache.doris.catalog.Type; import org.apache.doris.thrift.TSlotDescriptor; + import com.google.common.base.Objects; import com.google.common.base.Preconditions; import com.google.common.collect.Lists; @@ -263,4 +264,9 @@ public String debugString() { .add("nullIndicatorBit", nullIndicatorBit) .add("slotIdx", slotIdx).toString(); } + + @Override + public String toString() { + return debugString(); + } } diff --git a/fe/src/main/java/org/apache/doris/backup/BackupHandler.java b/fe/src/main/java/org/apache/doris/backup/BackupHandler.java index 9014d6c4782d9b..24d8a0f0896d5d 100644 --- a/fe/src/main/java/org/apache/doris/backup/BackupHandler.java +++ b/fe/src/main/java/org/apache/doris/backup/BackupHandler.java @@ -30,6 +30,7 @@ import org.apache.doris.backup.BackupJobInfo.BackupTableInfo; import org.apache.doris.catalog.Catalog; import org.apache.doris.catalog.Database; +import org.apache.doris.catalog.MaterializedIndex.IndexExtState; import org.apache.doris.catalog.OlapTable; import org.apache.doris.catalog.Partition; import org.apache.doris.catalog.Table; @@ -298,7 +299,7 @@ private void backup(Repository repository, Database db, BackupStmt stmt) throws } // copy a table with selected partitions for calculating the signature - OlapTable copiedTbl = olapTbl.selectiveCopy(tblRef.getPartitions(), true); + OlapTable copiedTbl = olapTbl.selectiveCopy(tblRef.getPartitions(), true, IndexExtState.VISIBLE); if (copiedTbl == null) { ErrorReport.reportDdlException(ErrorCode.ERR_COMMON_ERROR, "Failed to copy table " + tblName + " with selected partitions"); diff --git a/fe/src/main/java/org/apache/doris/backup/BackupJob.java b/fe/src/main/java/org/apache/doris/backup/BackupJob.java index 13b656ae269448..9c411a81e27162 100644 --- a/fe/src/main/java/org/apache/doris/backup/BackupJob.java +++ b/fe/src/main/java/org/apache/doris/backup/BackupJob.java @@ -23,6 +23,7 @@ import org.apache.doris.catalog.Database; import org.apache.doris.catalog.FsBroker; import org.apache.doris.catalog.MaterializedIndex; +import org.apache.doris.catalog.MaterializedIndex.IndexExtState; import org.apache.doris.catalog.OlapTable; import org.apache.doris.catalog.Partition; import org.apache.doris.catalog.Replica; @@ -386,7 +387,7 @@ private void prepareAndSendSnapshotTask() { for (Partition partition : partitions) { long visibleVersion = partition.getVisibleVersion(); long visibleVersionHash = partition.getVisibleVersionHash(); - List indexes = partition.getMaterializedIndices(); + List indexes = partition.getMaterializedIndices(IndexExtState.VISIBLE); for (MaterializedIndex index : indexes) { int schemaHash = tbl.getSchemaHashByIndexId(index.getId()); List tablets = index.getTablets(); @@ -419,7 +420,8 @@ private void prepareAndSendSnapshotTask() { for (TableRef tableRef : tableRefs) { String tblName = tableRef.getName().getTbl(); OlapTable tbl = (OlapTable) db.getTable(tblName); - OlapTable copiedTbl = tbl.selectiveCopy(tableRef.getPartitions(), true); + // only copy visible indexes + OlapTable copiedTbl = tbl.selectiveCopy(tableRef.getPartitions(), true, IndexExtState.VISIBLE); if (copiedTbl == null) { status = new Status(ErrCode.COMMON_ERROR, "faild to copy table: " + tblName); return; diff --git a/fe/src/main/java/org/apache/doris/backup/BackupJobInfo.java b/fe/src/main/java/org/apache/doris/backup/BackupJobInfo.java index c788659e0177aa..9f07fed710da41 100644 --- a/fe/src/main/java/org/apache/doris/backup/BackupJobInfo.java +++ b/fe/src/main/java/org/apache/doris/backup/BackupJobInfo.java @@ -19,6 +19,7 @@ import org.apache.doris.backup.RestoreFileMapping.IdChain; import org.apache.doris.catalog.MaterializedIndex; +import org.apache.doris.catalog.MaterializedIndex.IndexExtState; import org.apache.doris.catalog.OlapTable; import org.apache.doris.catalog.Partition; import org.apache.doris.catalog.Table; @@ -251,7 +252,7 @@ public static BackupJobInfo fromCatalog(long backupTime, String label, String db partitionInfo.versionHash = partition.getVisibleVersionHash(); tableInfo.partitions.put(partitionInfo.name, partitionInfo); // indexes - for (MaterializedIndex index : partition.getMaterializedIndices()) { + for (MaterializedIndex index : partition.getMaterializedIndices(IndexExtState.VISIBLE)) { BackupIndexInfo idxInfo = new BackupIndexInfo(); idxInfo.id = index.getId(); idxInfo.name = olapTbl.getIndexNameById(index.getId()); diff --git a/fe/src/main/java/org/apache/doris/backup/RestoreJob.java b/fe/src/main/java/org/apache/doris/backup/RestoreJob.java index 4c545a7e08699b..0910ded5d72447 100644 --- a/fe/src/main/java/org/apache/doris/backup/RestoreJob.java +++ b/fe/src/main/java/org/apache/doris/backup/RestoreJob.java @@ -30,6 +30,7 @@ import org.apache.doris.catalog.FsBroker; import org.apache.doris.catalog.KeysType; import org.apache.doris.catalog.MaterializedIndex; +import org.apache.doris.catalog.MaterializedIndex.IndexExtState; import org.apache.doris.catalog.OlapTable; import org.apache.doris.catalog.OlapTable.OlapTableState; import org.apache.doris.catalog.Partition; @@ -613,7 +614,7 @@ private void checkAndPrepareMeta() { Set bfColumns = localTbl.getCopiedBfColumns(); double bfFpp = localTbl.getBfFpp(); - for (MaterializedIndex restoredIdx : restorePart.getMaterializedIndices()) { + for (MaterializedIndex restoredIdx : restorePart.getMaterializedIndices(IndexExtState.VISIBLE)) { short shortKeyColumnCount = localTbl.getShortKeyColumnCountByIndexId(restoredIdx.getId()); int schemaHash = localTbl.getSchemaHashByIndexId(restoredIdx.getId()); KeysType keysType = localTbl.getKeysType(); @@ -646,7 +647,7 @@ private void checkAndPrepareMeta() { for (Partition restorePart : restoreTbl.getPartitions()) { Set bfColumns = restoreTbl.getCopiedBfColumns(); double bfFpp = restoreTbl.getBfFpp(); - for (MaterializedIndex index : restorePart.getMaterializedIndices()) { + for (MaterializedIndex index : restorePart.getMaterializedIndices(IndexExtState.VISIBLE)) { short shortKeyColumnCount = restoreTbl.getShortKeyColumnCountByIndexId(index.getId()); int schemaHash = restoreTbl.getSchemaHashByIndexId(index.getId()); KeysType keysType = restoreTbl.getKeysType(); @@ -838,7 +839,7 @@ private Partition resetPartitionForRestore(OlapTable localTbl, OlapTable remoteT long visibleVersionHash = remotePart.getVisibleVersionHash(); // tablets - for (MaterializedIndex remoteIdx : remotePart.getMaterializedIndices()) { + for (MaterializedIndex remoteIdx : remotePart.getMaterializedIndices(IndexExtState.VISIBLE)) { int schemaHash = remoteTbl.getSchemaHashByIndexId(remoteIdx.getId()); int remotetabletSize = remoteIdx.getTablets().size(); remoteIdx.clearTabletsForRestore(); @@ -872,9 +873,9 @@ private Partition resetPartitionForRestore(OlapTable localTbl, OlapTable remoteT // files in repo to files in local private void genFileMapping(OlapTable localTbl, Partition localPartition, Long remoteTblId, BackupPartitionInfo backupPartInfo, boolean overwrite) { - for (MaterializedIndex localIdx : localPartition.getMaterializedIndices()) { + for (MaterializedIndex localIdx : localPartition.getMaterializedIndices(IndexExtState.VISIBLE)) { LOG.debug("get index id: {}, index name: {}", localIdx.getId(), - localTbl.getIndexNameById(localIdx.getId())); + localTbl.getIndexNameById(localIdx.getId())); BackupIndexInfo backupIdxInfo = backupPartInfo.getIdx(localTbl.getIndexNameById(localIdx.getId())); Preconditions.checkState(backupIdxInfo.tablets.size() == localIdx.getTablets().size()); for (int i = 0; i < localIdx.getTablets().size(); i++) { @@ -935,7 +936,7 @@ private void replayCheckAndPrepareMeta() { localTbl.addPartition(restorePart); // modify tablet inverted index - for (MaterializedIndex restoreIdx : restorePart.getMaterializedIndices()) { + for (MaterializedIndex restoreIdx : restorePart.getMaterializedIndices(IndexExtState.VISIBLE)) { int schemaHash = localTbl.getSchemaHashByIndexId(restoreIdx.getId()); TabletMeta tabletMeta = new TabletMeta(db.getId(), localTbl.getId(), restorePart.getId(), restoreIdx.getId(), schemaHash, TStorageMedium.HDD); @@ -953,7 +954,7 @@ private void replayCheckAndPrepareMeta() { db.createTable(restoreTbl); // modify tablet inverted index for (Partition restorePart : restoreTbl.getPartitions()) { - for (MaterializedIndex restoreIdx : restorePart.getMaterializedIndices()) { + for (MaterializedIndex restoreIdx : restorePart.getMaterializedIndices(IndexExtState.VISIBLE)) { int schemaHash = restoreTbl.getSchemaHashByIndexId(restoreIdx.getId()); TabletMeta tabletMeta = new TabletMeta(db.getId(), restoreTbl.getId(), restorePart.getId(), restoreIdx.getId(), schemaHash, TStorageMedium.HDD); @@ -1219,11 +1220,11 @@ private Status allTabletCommitted(boolean isReplay) { part.updateVersionForRestore(entry.getValue().first, entry.getValue().second); // we also need to update the replica version of these overwritten restored partitions - for (MaterializedIndex idx : part.getMaterializedIndices()) { + for (MaterializedIndex idx : part.getMaterializedIndices(IndexExtState.VISIBLE)) { for (Tablet tablet : idx.getTablets()) { for (Replica replica : tablet.getReplicas()) { if (!replica.checkVersionCatchUp(part.getVisibleVersion(), - part.getVisibleVersionHash())) { + part.getVisibleVersionHash(), false)) { replica.updateVersionInfo(part.getVisibleVersion(), part.getVisibleVersionHash(), replica.getDataSize(), replica.getRowCount()); } @@ -1369,7 +1370,7 @@ public void cancelInternal(boolean isReplay) { for (OlapTable restoreTbl : restoredTbls) { LOG.info("remove restored table when cancelled: {}", restoreTbl.getName()); for (Partition part : restoreTbl.getPartitions()) { - for (MaterializedIndex idx : part.getMaterializedIndices()) { + for (MaterializedIndex idx : part.getMaterializedIndices(IndexExtState.VISIBLE)) { for (Tablet tablet : idx.getTablets()) { Catalog.getCurrentInvertedIndex().deleteTablet(tablet.getId()); } @@ -1386,7 +1387,7 @@ public void cancelInternal(boolean isReplay) { } LOG.info("remove restored partition in table {} when cancelled: {}", restoreTbl.getName(), entry.second.getName()); - for (MaterializedIndex idx : entry.second.getMaterializedIndices()) { + for (MaterializedIndex idx : entry.second.getMaterializedIndices(IndexExtState.VISIBLE)) { for (Tablet tablet : idx.getTablets()) { Catalog.getCurrentInvertedIndex().deleteTablet(tablet.getId()); } diff --git a/fe/src/main/java/org/apache/doris/catalog/BrokerTable.java b/fe/src/main/java/org/apache/doris/catalog/BrokerTable.java index d3251ee81b5771..0e4d8852a5b8fc 100644 --- a/fe/src/main/java/org/apache/doris/catalog/BrokerTable.java +++ b/fe/src/main/java/org/apache/doris/catalog/BrokerTable.java @@ -202,7 +202,7 @@ private void validate(Map properties) throws DdlException { public TTableDescriptor toThrift() { TBrokerTable tBrokerTable = new TBrokerTable(); TTableDescriptor tTableDescriptor = new TTableDescriptor(getId(), TTableType.BROKER_TABLE, - baseSchema.size(), 0, getName(), ""); + fullSchema.size(), 0, getName(), ""); tTableDescriptor.setBrokerTable(tBrokerTable); return tTableDescriptor; } diff --git a/fe/src/main/java/org/apache/doris/catalog/Catalog.java b/fe/src/main/java/org/apache/doris/catalog/Catalog.java index 26330385e36b6f..49ea7535907405 100644 --- a/fe/src/main/java/org/apache/doris/catalog/Catalog.java +++ b/fe/src/main/java/org/apache/doris/catalog/Catalog.java @@ -20,6 +20,7 @@ import org.apache.doris.alter.Alter; import org.apache.doris.alter.AlterJob; import org.apache.doris.alter.AlterJob.JobType; +import org.apache.doris.alter.AlterJobV2; import org.apache.doris.alter.DecommissionBackendJob.DecommissionType; import org.apache.doris.alter.RollupHandler; import org.apache.doris.alter.SchemaChangeHandler; @@ -76,6 +77,7 @@ import org.apache.doris.catalog.Database.DbState; import org.apache.doris.catalog.DistributionInfo.DistributionInfoType; import org.apache.doris.catalog.KuduPartition.KuduRange; +import org.apache.doris.catalog.MaterializedIndex.IndexExtState; import org.apache.doris.catalog.MaterializedIndex.IndexState; import org.apache.doris.catalog.OlapTable.OlapTableState; import org.apache.doris.catalog.Replica.ReplicaState; @@ -171,7 +173,6 @@ import org.apache.doris.system.HeartbeatMgr; import org.apache.doris.system.SystemInfoService; import org.apache.doris.task.AgentBatchTask; -import org.apache.doris.task.AgentTask; import org.apache.doris.task.AgentTaskExecutor; import org.apache.doris.task.AgentTaskQueue; import org.apache.doris.task.CreateReplicaTask; @@ -1356,7 +1357,7 @@ private void recreateTabletInvertIndex() { long partitionId = partition.getId(); TStorageMedium medium = olapTable.getPartitionInfo().getDataProperty( partitionId).getStorageMedium(); - for (MaterializedIndex index : partition.getMaterializedIndices()) { + for (MaterializedIndex index : partition.getMaterializedIndices(IndexExtState.ALL)) { long indexId = index.getId(); int schemaHash = olapTable.getSchemaHashByIndexId(indexId); TabletMeta tabletMeta = new TabletMeta(dbId, tableId, partitionId, indexId, schemaHash, medium); @@ -1566,18 +1567,20 @@ public long loadAlterJob(DataInputStream dis, long checksum) throws IOException public long loadAlterJob(DataInputStream dis, long checksum, JobType type) throws IOException { Map alterJobs = null; ConcurrentLinkedQueue finishedOrCancelledAlterJobs = null; + Map alterJobsV2 = Maps.newHashMap(); if (type == JobType.ROLLUP) { alterJobs = this.getRollupHandler().unprotectedGetAlterJobs(); finishedOrCancelledAlterJobs = this.getRollupHandler().unprotectedGetFinishedOrCancelledAlterJobs(); + alterJobsV2 = this.getRollupHandler().getAlterJobsV2(); } else if (type == JobType.SCHEMA_CHANGE) { alterJobs = this.getSchemaChangeHandler().unprotectedGetAlterJobs(); finishedOrCancelledAlterJobs = this.getSchemaChangeHandler().unprotectedGetFinishedOrCancelledAlterJobs(); + alterJobsV2 = this.getSchemaChangeHandler().getAlterJobsV2(); } else if (type == JobType.DECOMMISSION_BACKEND) { alterJobs = this.getClusterHandler().unprotectedGetAlterJobs(); finishedOrCancelledAlterJobs = this.getClusterHandler().unprotectedGetFinishedOrCancelledAlterJobs(); } - // alter jobs int size = dis.readInt(); long newChecksum = checksum ^ size; @@ -1612,6 +1615,16 @@ public long loadAlterJob(DataInputStream dis, long checksum, JobType type) throw } } + // alter job v2 + if (Catalog.getCurrentCatalogJournalVersion() >= FeMetaVersion.VERSION_61) { + size = dis.readInt(); + newChecksum ^= size; + for (int i = 0; i < size; i++) { + AlterJobV2 alterJobV2 = AlterJobV2.read(dis); + alterJobsV2.put(alterJobV2.getJobId(), alterJobV2); + } + } + return newChecksum; } @@ -1918,12 +1931,15 @@ public long saveAlterJob(DataOutputStream dos, long checksum) throws IOException public long saveAlterJob(DataOutputStream dos, long checksum, JobType type) throws IOException { Map alterJobs = null; ConcurrentLinkedQueue finishedOrCancelledAlterJobs = null; + Map alterJobsV2 = Maps.newHashMap(); if (type == JobType.ROLLUP) { alterJobs = this.getRollupHandler().unprotectedGetAlterJobs(); finishedOrCancelledAlterJobs = this.getRollupHandler().unprotectedGetFinishedOrCancelledAlterJobs(); + alterJobsV2 = this.getRollupHandler().getAlterJobsV2(); } else if (type == JobType.SCHEMA_CHANGE) { alterJobs = this.getSchemaChangeHandler().unprotectedGetAlterJobs(); finishedOrCancelledAlterJobs = this.getSchemaChangeHandler().unprotectedGetFinishedOrCancelledAlterJobs(); + alterJobsV2 = this.getSchemaChangeHandler().getAlterJobsV2(); } else if (type == JobType.DECOMMISSION_BACKEND) { alterJobs = this.getClusterHandler().unprotectedGetAlterJobs(); finishedOrCancelledAlterJobs = this.getClusterHandler().unprotectedGetFinishedOrCancelledAlterJobs(); @@ -1951,6 +1967,14 @@ public long saveAlterJob(DataOutputStream dos, long checksum, JobType type) thro alterJob.write(dos); } + // alter job v2 + size = alterJobsV2.size(); + checksum ^= size; + dos.writeInt(size); + for (AlterJobV2 alterJobV2 : alterJobsV2.values()) { + alterJobV2.write(dos); + } + return checksum; } @@ -3006,7 +3030,7 @@ public void replayAddPartition(PartitionPersistInfo info) throws DdlException { if (!isCheckpointThread()) { // add to inverted index TabletInvertedIndex invertedIndex = Catalog.getCurrentInvertedIndex(); - for (MaterializedIndex index : partition.getMaterializedIndices()) { + for (MaterializedIndex index : partition.getMaterializedIndices(IndexExtState.ALL)) { long indexId = index.getId(); int schemaHash = olapTable.getSchemaHashByIndexId(indexId); TabletMeta tabletMeta = new TabletMeta(info.getDbId(), info.getTableId(), partition.getId(), @@ -3265,6 +3289,7 @@ private Partition createPartitionWithIndices(String clusterName, long dbId, long // estimate timeout long timeout = Config.tablet_create_timeout_second * 1000L * totalTaskNum; + timeout = Math.min(timeout, Config.max_create_table_timeout_second * 1000); try { ok = countDownLatch.await(timeout, TimeUnit.MILLISECONDS); } catch (InterruptedException e) { @@ -3272,19 +3297,22 @@ private Partition createPartitionWithIndices(String clusterName, long dbId, long ok = false; } - if (!ok) { - errMsg = "Failed to create partition[" + partitionName + "]. Timeout"; + if (!ok || !countDownLatch.getStatus().ok()) { + errMsg = "Failed to create partition[" + partitionName + "]. Timeout."; // clear tasks - List tasks = batchTask.getAllTasks(); - for (AgentTask task : tasks) { - AgentTaskQueue.removeTask(task.getBackendId(), TTaskType.CREATE, task.getSignature()); - } + AgentTaskQueue.removeBatchTask(batchTask, TTaskType.CREATE); - List> unfinishedMarks = countDownLatch.getLeftMarks(); - // only show at most 10 results - List> subList = unfinishedMarks.subList(0, Math.min(unfinishedMarks.size(), 10)); - String idStr = Joiner.on(", ").join(subList); - LOG.warn("{}. unfinished marks: {}", errMsg, idStr); + if (!countDownLatch.getStatus().ok()) { + errMsg += " Error: " + countDownLatch.getStatus().getErrorMsg(); + } else { + List> unfinishedMarks = countDownLatch.getLeftMarks(); + // only show at most 3 results + List> subList = unfinishedMarks.subList(0, Math.min(unfinishedMarks.size(), 3)); + if (!subList.isEmpty()) { + errMsg += " Unfinished mark: " + Joiner.on(", ").join(subList); + } + } + LOG.warn(errMsg); throw new DdlException(errMsg); } } else { @@ -3983,7 +4011,7 @@ public void replayCreateTable(String dbName, Table table) { long partitionId = partition.getId(); TStorageMedium medium = olapTable.getPartitionInfo().getDataProperty( partitionId).getStorageMedium(); - for (MaterializedIndex mIndex : partition.getMaterializedIndices()) { + for (MaterializedIndex mIndex : partition.getMaterializedIndices(IndexExtState.ALL)) { long indexId = mIndex.getId(); int schemaHash = olapTable.getSchemaHashByIndexId(indexId); TabletMeta tabletMeta = new TabletMeta(dbId, tableId, partitionId, indexId, schemaHash, medium); @@ -4181,18 +4209,6 @@ public void replayRecoverTable(RecoverInfo info) { } } - public void handleJobsWhenDeleteReplica(long tableId, long partitionId, long indexId, long tabletId, long replicaId, - long backendId) { - // rollup - getRollupHandler().removeReplicaRelatedTask(tableId, partitionId, indexId, tabletId, backendId); - - // schema change - getSchemaChangeHandler().removeReplicaRelatedTask(tableId, tabletId, replicaId, backendId); - - // task - AgentTaskQueue.removeReplicaRelatedTasks(backendId, tabletId); - } - private void unprotectAddReplica(ReplicaPersistInfo info) { LOG.debug("replay add a replica {}", info); Database db = getDb(info.getDbId()); @@ -5644,7 +5660,7 @@ public Set getMigrations() { for (Partition partition : olapTable.getPartitions()) { final short replicationNum = olapTable.getPartitionInfo() .getReplicationNum(partition.getId()); - for (MaterializedIndex materializedIndex : partition.getMaterializedIndices()) { + for (MaterializedIndex materializedIndex : partition.getMaterializedIndices(IndexExtState.ALL)) { if (materializedIndex.getState() != IndexState.NORMAL) { continue; } @@ -5919,7 +5935,7 @@ public void truncateTable(TruncateTableStmt truncateTableStmt) throws DdlExcepti } } - copiedTbl = olapTable.selectiveCopy(origPartitions.keySet(), true); + copiedTbl = olapTable.selectiveCopy(origPartitions.keySet(), true, IndexExtState.VISIBLE); } finally { db.readUnlock(); @@ -6032,7 +6048,7 @@ private void truncateTableInternal(OlapTable olapTable, List newParti for (Partition newPartition : newPartitions) { Partition oldPartition = olapTable.replacePartition(newPartition); // save old tablets to be removed - for (MaterializedIndex index : oldPartition.getMaterializedIndices()) { + for (MaterializedIndex index : oldPartition.getMaterializedIndices(IndexExtState.ALL)) { index.getTablets().stream().forEach(t -> { oldTabletIds.add(t.getId()); }); @@ -6059,7 +6075,7 @@ public void replayTruncateTable(TruncateTableInfo info) { long partitionId = partition.getId(); TStorageMedium medium = olapTable.getPartitionInfo().getDataProperty( partitionId).getStorageMedium(); - for (MaterializedIndex mIndex : partition.getMaterializedIndices()) { + for (MaterializedIndex mIndex : partition.getMaterializedIndices(IndexExtState.ALL)) { long indexId = mIndex.getId(); int schemaHash = olapTable.getSchemaHashByIndexId(indexId); TabletMeta tabletMeta = new TabletMeta(db.getId(), olapTable.getId(), diff --git a/fe/src/main/java/org/apache/doris/catalog/CatalogRecycleBin.java b/fe/src/main/java/org/apache/doris/catalog/CatalogRecycleBin.java index d14dfea83f413f..08dfc355608d23 100644 --- a/fe/src/main/java/org/apache/doris/catalog/CatalogRecycleBin.java +++ b/fe/src/main/java/org/apache/doris/catalog/CatalogRecycleBin.java @@ -17,6 +17,7 @@ package org.apache.doris.catalog; +import org.apache.doris.catalog.MaterializedIndex.IndexExtState; import org.apache.doris.catalog.Table.TableType; import org.apache.doris.common.Config; import org.apache.doris.common.DdlException; @@ -208,7 +209,7 @@ private void onEraseOlapTable(OlapTable olapTable) { // inverted index TabletInvertedIndex invertedIndex = Catalog.getCurrentInvertedIndex(); for (Partition partition : olapTable.getPartitions()) { - for (MaterializedIndex index : partition.getMaterializedIndices()) { + for (MaterializedIndex index : partition.getMaterializedIndices(IndexExtState.ALL)) { for (Tablet tablet : index.getTablets()) { invertedIndex.deleteTablet(tablet.getId()); } @@ -218,7 +219,7 @@ private void onEraseOlapTable(OlapTable olapTable) { // drop all replicas AgentBatchTask batchTask = new AgentBatchTask(); for (Partition partition : olapTable.getPartitions()) { - List allIndices = partition.getMaterializedIndices(); + List allIndices = partition.getMaterializedIndices(IndexExtState.ALL); for (MaterializedIndex materializedIndex : allIndices) { long indexId = materializedIndex.getId(); int schemaHash = olapTable.getSchemaHashByIndexId(indexId); @@ -272,7 +273,7 @@ public synchronized void replayEraseTable(long tableId) { // remove tablet from inverted index TabletInvertedIndex invertedIndex = Catalog.getCurrentInvertedIndex(); for (Partition partition : olapTable.getPartitions()) { - for (MaterializedIndex index : partition.getMaterializedIndices()) { + for (MaterializedIndex index : partition.getMaterializedIndices(IndexExtState.ALL)) { for (Tablet tablet : index.getTablets()) { invertedIndex.deleteTablet(tablet.getId()); } @@ -297,7 +298,7 @@ private synchronized void erasePartition(long currentTimeMs) { if (isExpire(partitionId, currentTimeMs)) { // remove tablet in inverted index TabletInvertedIndex invertedIndex = Catalog.getCurrentInvertedIndex(); - for (MaterializedIndex index : partition.getMaterializedIndices()) { + for (MaterializedIndex index : partition.getMaterializedIndices(IndexExtState.ALL)) { for (Tablet tablet : index.getTablets()) { invertedIndex.deleteTablet(tablet.getId()); } @@ -327,7 +328,7 @@ private synchronized void erasePartitionWithSameName(long dbId, long tableId, St if (partition.getName().equals(partitionName)) { // remove tablet in inverted index TabletInvertedIndex invertedIndex = Catalog.getCurrentInvertedIndex(); - for (MaterializedIndex index : partition.getMaterializedIndices()) { + for (MaterializedIndex index : partition.getMaterializedIndices(IndexExtState.ALL)) { for (Tablet tablet : index.getTablets()) { invertedIndex.deleteTablet(tablet.getId()); } @@ -349,7 +350,7 @@ public synchronized void replayErasePartition(long partitionId) { if (!Catalog.isCheckpointThread()) { // remove tablet from inverted index TabletInvertedIndex invertedIndex = Catalog.getCurrentInvertedIndex(); - for (MaterializedIndex index : partition.getMaterializedIndices()) { + for (MaterializedIndex index : partition.getMaterializedIndices(IndexExtState.ALL)) { for (Tablet tablet : index.getTablets()) { invertedIndex.deleteTablet(tablet.getId()); } @@ -581,7 +582,7 @@ public void addTabletToInvertedIndex() { for (Partition partition : olapTable.getPartitions()) { long partitionId = partition.getId(); TStorageMedium medium = olapTable.getPartitionInfo().getDataProperty(partitionId).getStorageMedium(); - for (MaterializedIndex index : partition.getMaterializedIndices()) { + for (MaterializedIndex index : partition.getMaterializedIndices(IndexExtState.ALL)) { long indexId = index.getId(); int schemaHash = olapTable.getSchemaHashByIndexId(indexId); TabletMeta tabletMeta = new TabletMeta(dbId, tableId, partitionId, indexId, schemaHash, medium); @@ -633,7 +634,7 @@ public void addTabletToInvertedIndex() { // storage medium should be got from RecyclePartitionInfo, not from olap table. because olap table // does not have this partition any more TStorageMedium medium = partitionInfo.getDataProperty().getStorageMedium(); - for (MaterializedIndex index : partition.getMaterializedIndices()) { + for (MaterializedIndex index : partition.getMaterializedIndices(IndexExtState.ALL)) { long indexId = index.getId(); int schemaHash = olapTable.getSchemaHashByIndexId(indexId); TabletMeta tabletMeta = new TabletMeta(dbId, tableId, partitionId, indexId, schemaHash, medium); diff --git a/fe/src/main/java/org/apache/doris/catalog/Column.java b/fe/src/main/java/org/apache/doris/catalog/Column.java index 52067b9434fded..7fc66369ec5da8 100644 --- a/fe/src/main/java/org/apache/doris/catalog/Column.java +++ b/fe/src/main/java/org/apache/doris/catalog/Column.java @@ -17,6 +17,8 @@ package org.apache.doris.catalog; +import org.apache.doris.alter.SchemaChangeHandler; +import org.apache.doris.common.CaseSensibility; import org.apache.doris.common.DdlException; import org.apache.doris.common.FeMetaVersion; import org.apache.doris.common.io.Text; @@ -118,6 +120,17 @@ public String getName() { return this.name; } + public String getNameWithoutPrefix(String prefix) { + if (isNameWithPrefix(prefix)) { + return name.substring(prefix.length()); + } + return name; + } + + public boolean isNameWithPrefix(String prefix) { + return this.name.startsWith(prefix); + } + public void setIsKey(boolean isKey) { this.isKey = isKey; } @@ -214,24 +227,24 @@ public void checkSchemaChangeAllowed(Column other) throws DdlException { } if (!ColumnType.isSchemaChangeAllowed(type, other.type)) { - throw new DdlException("Cannot change " + getDataType() + " to " + other.getDataType()); + throw new DdlException("Can not change " + getDataType() + " to " + other.getDataType()); } if (this.aggregationType != other.aggregationType) { - throw new DdlException("Cannot change aggregation type"); + throw new DdlException("Can not change aggregation type"); } if (this.isAllowNull && !other.isAllowNull) { - throw new DdlException("Cannot change from null to not null"); + throw new DdlException("Can not change from nullable to non-nullable"); } if (this.getDefaultValue() == null) { if (other.getDefaultValue() != null) { - throw new DdlException("Cannot change default value"); + throw new DdlException("Can not change default value"); } } else { if (!this.getDefaultValue().equals(other.getDefaultValue())) { - throw new DdlException("Cannot change default value"); + throw new DdlException("Can not change default value"); } } @@ -252,6 +265,29 @@ public void checkSchemaChangeAllowed(Column other) throws DdlException { } } + public boolean nameEquals(String otherColName, boolean ignorePrefix) { + if (CaseSensibility.COLUMN.getCaseSensibility()) { + if (!ignorePrefix) { + return name.equals(otherColName); + } else { + return removeNamePrefix(name).equals(removeNamePrefix(otherColName)); + } + } else { + if (!ignorePrefix) { + return name.equalsIgnoreCase(otherColName); + } else { + return removeNamePrefix(name).equalsIgnoreCase(removeNamePrefix(otherColName)); + } + } + } + + public static String removeNamePrefix(String colName) { + if (colName.startsWith(SchemaChangeHandler.SHADOW_NAME_PRFIX)) { + return colName.substring(SchemaChangeHandler.SHADOW_NAME_PRFIX.length()); + } + return colName; + } + public String toSql() { StringBuilder sb = new StringBuilder(); sb.append("`").append(name).append("` "); diff --git a/fe/src/main/java/org/apache/doris/catalog/Database.java b/fe/src/main/java/org/apache/doris/catalog/Database.java index ea206094aad0ea..1b43ce11d16962 100644 --- a/fe/src/main/java/org/apache/doris/catalog/Database.java +++ b/fe/src/main/java/org/apache/doris/catalog/Database.java @@ -17,7 +17,7 @@ package org.apache.doris.catalog; -import com.google.common.collect.Lists; +import org.apache.doris.catalog.MaterializedIndex.IndexExtState; import org.apache.doris.catalog.MaterializedIndex.IndexState; import org.apache.doris.catalog.Replica.ReplicaState; import org.apache.doris.catalog.Table.TableType; @@ -35,6 +35,7 @@ import com.google.common.base.Preconditions; import com.google.common.collect.ImmutableList; +import com.google.common.collect.Lists; import com.google.common.collect.Maps; import org.apache.logging.log4j.LogManager; @@ -206,7 +207,7 @@ public long getDataQuotaLeftWithLock() { OlapTable olapTable = (OlapTable) table; for (Partition partition : olapTable.getPartitions()) { - for (MaterializedIndex mIndex : partition.getMaterializedIndices()) { + for (MaterializedIndex mIndex : partition.getMaterializedIndices(IndexExtState.VISIBLE)) { // skip ROLLUP index if (mIndex.getState() == IndexState.ROLLUP) { continue; diff --git a/fe/src/main/java/org/apache/doris/catalog/EsTable.java b/fe/src/main/java/org/apache/doris/catalog/EsTable.java index 8713ac6ce1271b..bf5c420f804c55 100644 --- a/fe/src/main/java/org/apache/doris/catalog/EsTable.java +++ b/fe/src/main/java/org/apache/doris/catalog/EsTable.java @@ -121,7 +121,7 @@ private void validate(Map properties) throws DdlException { public TTableDescriptor toThrift() { TEsTable tEsTable = new TEsTable(); TTableDescriptor tTableDescriptor = new TTableDescriptor(getId(), TTableType.ES_TABLE, - baseSchema.size(), 0, getName(), ""); + fullSchema.size(), 0, getName(), ""); tTableDescriptor.setEsTable(tEsTable); return tTableDescriptor; } diff --git a/fe/src/main/java/org/apache/doris/catalog/MaterializedIndex.java b/fe/src/main/java/org/apache/doris/catalog/MaterializedIndex.java index 61234be1593890..ebcbe4a72392d1 100644 --- a/fe/src/main/java/org/apache/doris/catalog/MaterializedIndex.java +++ b/fe/src/main/java/org/apache/doris/catalog/MaterializedIndex.java @@ -37,8 +37,21 @@ public class MaterializedIndex extends MetaObject implements Writable { public enum IndexState { NORMAL, + @Deprecated ROLLUP, - SCHEMA_CHANGE + @Deprecated + SCHEMA_CHANGE, + SHADOW; // index in SHADOW state is visible to load process, but invisible to query + + public boolean isVisible() { + return this == IndexState.NORMAL || this == IndexState.SCHEMA_CHANGE; + } + } + + public enum IndexExtState { + ALL, + VISIBLE, // index state in NORMAL and SCHEMA_CHANGE + SHADOW // index state in SHADOW } private long id; diff --git a/fe/src/main/java/org/apache/doris/catalog/MetadataViewer.java b/fe/src/main/java/org/apache/doris/catalog/MetadataViewer.java index 05e56e85603e5e..55a1657b27b4a2 100644 --- a/fe/src/main/java/org/apache/doris/catalog/MetadataViewer.java +++ b/fe/src/main/java/org/apache/doris/catalog/MetadataViewer.java @@ -20,6 +20,7 @@ import org.apache.doris.analysis.AdminShowReplicaDistributionStmt; import org.apache.doris.analysis.AdminShowReplicaStatusStmt; import org.apache.doris.analysis.BinaryPredicate.Operator; +import org.apache.doris.catalog.MaterializedIndex.IndexExtState; import org.apache.doris.catalog.Replica.ReplicaStatus; import org.apache.doris.catalog.Table.TableType; import org.apache.doris.common.DdlException; @@ -79,7 +80,7 @@ private static List> getTabletStatus(String dbName, String tblName, long visibleVersion = partition.getVisibleVersion(); short replicationNum = olapTable.getPartitionInfo().getReplicationNum(partition.getId()); - for (MaterializedIndex index : partition.getMaterializedIndices()) { + for (MaterializedIndex index : partition.getMaterializedIndices(IndexExtState.VISIBLE)) { int schemaHash = olapTable.getSchemaHashByIndexId(index.getId()); for (Tablet tablet : index.getTablets()) { long tabletId = tablet.getId(); @@ -210,7 +211,7 @@ private static List> getTabletDistribution(String dbName, String tb int totalReplicaNum = 0; for (String partName : partitions) { Partition partition = olapTable.getPartition(partName); - for (MaterializedIndex index : partition.getMaterializedIndices()) { + for (MaterializedIndex index : partition.getMaterializedIndices(IndexExtState.VISIBLE)) { for (Tablet tablet : index.getTablets()) { for (Replica replica : tablet.getReplicas()) { if (!countMap.containsKey(replica.getBackendId())) { diff --git a/fe/src/main/java/org/apache/doris/catalog/MysqlTable.java b/fe/src/main/java/org/apache/doris/catalog/MysqlTable.java index e60a8749697c19..a31872faf4c89f 100644 --- a/fe/src/main/java/org/apache/doris/catalog/MysqlTable.java +++ b/fe/src/main/java/org/apache/doris/catalog/MysqlTable.java @@ -144,7 +144,7 @@ public TTableDescriptor toThrift() { TMySQLTable tMySQLTable = new TMySQLTable(host, port, userName, passwd, mysqlDatabaseName, mysqlTableName); TTableDescriptor tTableDescriptor = new TTableDescriptor(getId(), TTableType.MYSQL_TABLE, - baseSchema.size(), 0, getName(), ""); + fullSchema.size(), 0, getName(), ""); tTableDescriptor.setMysqlTable(tMySQLTable); return tTableDescriptor; } diff --git a/fe/src/main/java/org/apache/doris/catalog/OlapTable.java b/fe/src/main/java/org/apache/doris/catalog/OlapTable.java index 3467f0984eff30..9ae4dbaf0bb646 100644 --- a/fe/src/main/java/org/apache/doris/catalog/OlapTable.java +++ b/fe/src/main/java/org/apache/doris/catalog/OlapTable.java @@ -17,8 +17,6 @@ package org.apache.doris.catalog; -import org.apache.doris.alter.AlterJob.JobState; -import org.apache.doris.alter.RollupJob; import org.apache.doris.analysis.AddPartitionClause; import org.apache.doris.analysis.AddRollupClause; import org.apache.doris.analysis.AlterClause; @@ -30,6 +28,7 @@ import org.apache.doris.backup.Status; import org.apache.doris.backup.Status.ErrCode; import org.apache.doris.catalog.DistributionInfo.DistributionInfoType; +import org.apache.doris.catalog.MaterializedIndex.IndexExtState; import org.apache.doris.catalog.MaterializedIndex.IndexState; import org.apache.doris.catalog.Partition.PartitionState; import org.apache.doris.catalog.Replica.ReplicaState; @@ -214,6 +213,9 @@ public boolean hasMaterializedIndex(String indexName) { return indexNameToId.containsKey(indexName); } + /* + * Set index schema info for specified index. + */ public void setIndexSchemaInfo(Long indexId, String indexName, List schema, int schemaVersion, int schemaHash, short shortKeyColumnCount) { if (indexName == null) { @@ -226,19 +228,40 @@ public void setIndexSchemaInfo(Long indexId, String indexName, List sche indexIdToSchemaHash.put(indexId, schemaHash); indexIdToShortKeyColumnCount.put(indexId, shortKeyColumnCount); } + public void setIndexStorageType(Long indexId, TStorageType newStorageType) { Preconditions.checkState(newStorageType == TStorageType.COLUMN); indexIdToStorageType.put(indexId, newStorageType); } - public void deleteIndexInfo(String indexName) { - long indexId = this.indexNameToId.remove(indexName); + // rebuild the full schema of table + // the order of columns in fullSchema is meaningless + public void rebuildFullSchema() { + fullSchema.clear(); + nameToColumn.clear(); + for (List columns : indexIdToSchema.values()) { + for (Column column : columns) { + if (!nameToColumn.containsKey(column.getName())) { + fullSchema.add(column); + nameToColumn.put(column.getName(), column); + } + } + } + LOG.debug("after rebuild full schema. table {}, schema: {}", id, fullSchema); + } + public boolean deleteIndexInfo(String indexName) { + if (!indexNameToId.containsKey(indexName)) { + return false; + } + + long indexId = this.indexNameToId.remove(indexName); indexIdToSchema.remove(indexId); indexIdToSchemaVersion.remove(indexId); indexIdToSchemaHash.remove(indexId); indexIdToShortKeyColumnCount.remove(indexId); indexIdToStorageType.remove(indexId); + return true; } public Map getIndexNameToId() { @@ -258,6 +281,19 @@ public String getIndexNameById(long indexId) { return null; } + // this is only for schema change. + public void renameIndexForSchemaChange(String name, String newName) { + long idxId = indexNameToId.remove(name); + indexNameToId.put(newName, idxId); + } + + public void renameColumnNamePrefix(long idxId) { + List columns = indexIdToSchema.get(idxId); + for (Column column : columns) { + column.setName(Column.removeNamePrefix(column.getName())); + } + } + public Status resetIdsForRestore(Catalog catalog, Database db, int restoreReplicationNum) { // table id id = catalog.getNextId(); @@ -562,17 +598,13 @@ public void setColocateGroup(String colocateGroup) { // it is used for stream load // the caller should get db lock when call this method public boolean shouldLoadToNewRollup() { - RollupJob rollupJob = (RollupJob) Catalog.getInstance().getRollupHandler().getAlterJob(id); - if (rollupJob != null && rollupJob.getState() == JobState.FINISHING) { - return false; - } - return true; + return false; } public TTableDescriptor toThrift() { TOlapTable tOlapTable = new TOlapTable(getName()); TTableDescriptor tTableDescriptor = new TTableDescriptor(id, TTableType.OLAP_TABLE, - baseSchema.size(), 0, getName(), ""); + fullSchema.size(), 0, getName(), ""); tTableDescriptor.setOlapTable(tOlapTable); return tTableDescriptor; } @@ -920,7 +952,7 @@ public boolean equals(Table table) { return true; } - public OlapTable selectiveCopy(Collection reservedPartNames, boolean resetState) { + public OlapTable selectiveCopy(Collection reservedPartNames, boolean resetState, IndexExtState extState) { OlapTable copied = new OlapTable(); if (!DeepCopy.copy(this, copied)) { LOG.warn("failed to copy olap table: " + getName()); @@ -932,7 +964,7 @@ public OlapTable selectiveCopy(Collection reservedPartNames, boolean res for (Partition partition : copied.getPartitions()) { partition.setState(PartitionState.NORMAL); copied.getPartitionInfo().setDataProperty(partition.getId(), new DataProperty(TStorageMedium.HDD)); - for (MaterializedIndex idx : partition.getMaterializedIndices()) { + for (MaterializedIndex idx : partition.getMaterializedIndices(extState)) { idx.setState(IndexState.NORMAL); for (Tablet tablet : idx.getTablets()) { for (Replica replica : tablet.getReplicas()) { @@ -1003,7 +1035,7 @@ public boolean isStable(SystemInfoService infoService, TabletScheduler tabletSch long visibleVersion = partition.getVisibleVersion(); long visibleVersionHash = partition.getVisibleVersionHash(); short replicationNum = partitionInfo.getReplicationNum(partition.getId()); - for (MaterializedIndex mIndex : partition.getMaterializedIndices()) { + for (MaterializedIndex mIndex : partition.getMaterializedIndices(IndexExtState.ALL)) { for (Tablet tablet : mIndex.getTablets()) { if (tabletScheduler.containsTablet(tablet.getId())) { return false; @@ -1053,11 +1085,11 @@ public long proximateRowCount() { for (Partition partition : getPartitions()) { long version = partition.getVisibleVersion(); long versionHash = partition.getVisibleVersionHash(); - for (MaterializedIndex index : partition.getMaterializedIndices()) { + for (MaterializedIndex index : partition.getMaterializedIndices(IndexExtState.VISIBLE)) { for (Tablet tablet : index.getTablets()) { long tabletRowCount = 0L; for (Replica replica : tablet.getReplicas()) { - if (replica.checkVersionCatchUp(version, versionHash) + if (replica.checkVersionCatchUp(version, versionHash, false) && replica.getRowCount() > tabletRowCount) { tabletRowCount = replica.getRowCount(); } @@ -1068,4 +1100,19 @@ public long proximateRowCount() { } return totalCount; } + + @Override + public List getBaseSchema() { + return indexIdToSchema.get(baseIndexId); + } + + public int getKeysNum() { + int keysNum = 0; + for (Column column : getBaseSchema()) { + if (column.isKey()) { + keysNum += 1; + } + } + return keysNum; + } } diff --git a/fe/src/main/java/org/apache/doris/catalog/Partition.java b/fe/src/main/java/org/apache/doris/catalog/Partition.java index 4dec9661808b64..286f7fdc28c77c 100644 --- a/fe/src/main/java/org/apache/doris/catalog/Partition.java +++ b/fe/src/main/java/org/apache/doris/catalog/Partition.java @@ -18,20 +18,24 @@ package org.apache.doris.catalog; import org.apache.doris.catalog.DistributionInfo.DistributionInfoType; +import org.apache.doris.catalog.MaterializedIndex.IndexExtState; +import org.apache.doris.catalog.MaterializedIndex.IndexState; import org.apache.doris.common.FeMetaVersion; import org.apache.doris.common.io.Text; import org.apache.doris.common.io.Writable; import org.apache.doris.common.util.Util; import org.apache.doris.meta.MetaContext; +import com.google.common.base.Preconditions; +import com.google.common.collect.Maps; + +import org.apache.kudu.client.shaded.com.google.common.collect.Lists; import org.apache.logging.log4j.LogManager; import org.apache.logging.log4j.Logger; import java.io.DataInput; import java.io.DataOutput; import java.io.IOException; -import java.util.ArrayList; -import java.util.HashMap; import java.util.List; import java.util.Map; import java.util.Map.Entry; @@ -47,7 +51,9 @@ public class Partition extends MetaObject implements Writable { public enum PartitionState { NORMAL, + @Deprecated ROLLUP, + @Deprecated SCHEMA_CHANGE } @@ -56,7 +62,17 @@ public enum PartitionState { private PartitionState state; private MaterializedIndex baseIndex; - private Map idToRollupIndex; + /* + * Visible rollup indexes are indexes which are visible to user. + * User can do query on them, show them in related 'show' stmt. + */ + private Map idToVisibleRollupIndex = Maps.newHashMap(); + /* + * Shadow indexes are indexes which are not visible to user. + * Query will not run on these shadow indexes, and user can not see them neither. + * But load process will load data into these shadow indexes. + */ + private Map idToShadowIndex = Maps.newHashMap(); /* * committed version(hash): after txn is committed, set committed version(hash) @@ -73,8 +89,7 @@ public enum PartitionState { private DistributionInfo distributionInfo; - public Partition() { - this.idToRollupIndex = new HashMap(); + private Partition() { } public Partition(long id, String name, @@ -84,7 +99,6 @@ public Partition(long id, String name, this.state = PartitionState.NORMAL; this.baseIndex = baseIndex; - this.idToRollupIndex = new HashMap(); this.visibleVersion = PARTITION_INIT_VERSION; this.visibleVersionHash = PARTITION_INIT_VERSION_HASH; @@ -168,11 +182,19 @@ public DistributionInfo getDistributionInfo() { } public void createRollupIndex(MaterializedIndex mIndex) { - this.idToRollupIndex.put(mIndex.getId(), mIndex); + if (mIndex.getState().isVisible()) { + this.idToVisibleRollupIndex.put(mIndex.getId(), mIndex); + } else { + this.idToShadowIndex.put(mIndex.getId(), mIndex); + } } public MaterializedIndex deleteRollupIndex(long indexId) { - return this.idToRollupIndex.remove(indexId); + if (this.idToVisibleRollupIndex.containsKey(indexId)) { + return idToVisibleRollupIndex.remove(indexId); + } else { + return idToShadowIndex.remove(indexId); + } } public MaterializedIndex getBaseIndex() { @@ -204,36 +226,40 @@ public long getCommittedVersionHash() { return committedVersionHash; } - public List getRollupIndices() { - List rollupIndices = new ArrayList(idToRollupIndex.size()); - for (Map.Entry entry : idToRollupIndex.entrySet()) { - rollupIndices.add(entry.getValue()); - } - return rollupIndices; - } - public MaterializedIndex getIndex(long indexId) { if (baseIndex.getId() == indexId) { return baseIndex; } - if (idToRollupIndex.containsKey(indexId)) { - return idToRollupIndex.get(indexId); + if (idToVisibleRollupIndex.containsKey(indexId)) { + return idToVisibleRollupIndex.get(indexId); + } else { + return idToShadowIndex.get(indexId); } - return null; } - public List getMaterializedIndices() { - List indices = new ArrayList(); - indices.add(baseIndex); - for (MaterializedIndex rollupIndex : idToRollupIndex.values()) { - indices.add(rollupIndex); + public List getMaterializedIndices(IndexExtState extState) { + List indices = Lists.newArrayList(); + switch (extState) { + case ALL: + indices.add(baseIndex); + indices.addAll(idToVisibleRollupIndex.values()); + indices.addAll(idToShadowIndex.values()); + break; + case VISIBLE: + indices.add(baseIndex); + indices.addAll(idToVisibleRollupIndex.values()); + break; + case SHADOW: + indices.addAll(idToShadowIndex.values()); + default: + break; } return indices; } public long getDataSize() { long dataSize = 0; - for (MaterializedIndex mIndex : getMaterializedIndices()) { + for (MaterializedIndex mIndex : getMaterializedIndices(IndexExtState.VISIBLE)) { dataSize += mIndex.getDataSize(); } return dataSize; @@ -243,6 +269,26 @@ public boolean hasData() { return !(visibleVersion == PARTITION_INIT_VERSION && visibleVersionHash == PARTITION_INIT_VERSION_HASH); } + /* + * Change the index' state from SHADOW to NORMAL + * Also move it to idToVisibleRollupIndex if it is not the base index. + */ + public boolean visualiseShadowIndex(long shadowIndexId, boolean isBaseIndex) { + MaterializedIndex shadowIdx = idToShadowIndex.remove(shadowIndexId); + if (shadowIdx == null) { + return false; + } + Preconditions.checkState(!idToVisibleRollupIndex.containsKey(shadowIndexId), shadowIndexId); + shadowIdx.setState(IndexState.NORMAL); + if (isBaseIndex) { + baseIndex = shadowIdx; + } else { + idToVisibleRollupIndex.put(shadowIndexId, shadowIdx); + } + LOG.info("visualise the shadow index: {}", shadowIndexId); + return true; + } + public static Partition read(DataInput in) throws IOException { Partition partition = new Partition(); partition.readFields(in); @@ -259,14 +305,19 @@ public void write(DataOutput out) throws IOException { baseIndex.write(out); - int rollupCount = (idToRollupIndex != null) ? idToRollupIndex.size() : 0; + int rollupCount = (idToVisibleRollupIndex != null) ? idToVisibleRollupIndex.size() : 0; out.writeInt(rollupCount); - if (idToRollupIndex != null) { - for (Map.Entry entry : idToRollupIndex.entrySet()) { + if (idToVisibleRollupIndex != null) { + for (Map.Entry entry : idToVisibleRollupIndex.entrySet()) { entry.getValue().write(out); } } + out.writeInt(idToShadowIndex.size()); + for (MaterializedIndex shadowIndex : idToShadowIndex.values()) { + shadowIndex.write(out); + } + out.writeLong(visibleVersion); out.writeLong(visibleVersionHash); @@ -291,7 +342,15 @@ public void readFields(DataInput in) throws IOException { int rollupCount = in.readInt(); for (int i = 0; i < rollupCount; ++i) { MaterializedIndex rollupTable = MaterializedIndex.read(in); - idToRollupIndex.put(rollupTable.getId(), rollupTable); + idToVisibleRollupIndex.put(rollupTable.getId(), rollupTable); + } + + if (Catalog.getCurrentCatalogJournalVersion() >= FeMetaVersion.VERSION_61) { + int shadowIndexCount = in.readInt(); + for (int i = 0; i < shadowIndexCount; i++) { + MaterializedIndex shadowIndex = MaterializedIndex.read(in); + idToShadowIndex.put(shadowIndex.getId(), shadowIndex); + } } visibleVersion = in.readLong(); @@ -332,16 +391,16 @@ public boolean equals(Object obj) { } Partition partition = (Partition) obj; - if (idToRollupIndex != partition.idToRollupIndex) { - if (idToRollupIndex.size() != partition.idToRollupIndex.size()) { + if (idToVisibleRollupIndex != partition.idToVisibleRollupIndex) { + if (idToVisibleRollupIndex.size() != partition.idToVisibleRollupIndex.size()) { return false; } - for (Entry entry : idToRollupIndex.entrySet()) { + for (Entry entry : idToVisibleRollupIndex.entrySet()) { long key = entry.getKey(); - if (!partition.idToRollupIndex.containsKey(key)) { + if (!partition.idToVisibleRollupIndex.containsKey(key)) { return false; } - if (!entry.getValue().equals(partition.idToRollupIndex.get(key))) { + if (!entry.getValue().equals(partition.idToVisibleRollupIndex.get(key))) { return false; } } @@ -362,11 +421,11 @@ public String toString() { buffer.append("base_index: ").append(baseIndex.toString()).append("; "); - int rollupCount = (idToRollupIndex != null) ? idToRollupIndex.size() : 0; + int rollupCount = (idToVisibleRollupIndex != null) ? idToVisibleRollupIndex.size() : 0; buffer.append("rollup count: ").append(rollupCount).append("; "); - if (idToRollupIndex != null) { - for (Map.Entry entry : idToRollupIndex.entrySet()) { + if (idToVisibleRollupIndex != null) { + for (Map.Entry entry : idToVisibleRollupIndex.entrySet()) { buffer.append("rollup_index: ").append(entry.getValue().toString()).append("; "); } } diff --git a/fe/src/main/java/org/apache/doris/catalog/Replica.java b/fe/src/main/java/org/apache/doris/catalog/Replica.java index f567225ab9ca4b..7513644c7915c4 100644 --- a/fe/src/main/java/org/apache/doris/catalog/Replica.java +++ b/fe/src/main/java/org/apache/doris/catalog/Replica.java @@ -39,13 +39,20 @@ public class Replica implements Writable { public enum ReplicaState { NORMAL, + @Deprecated ROLLUP, + @Deprecated SCHEMA_CHANGE, CLONE, + ALTER, // replica is under rollup or schema change DECOMMISSION; // replica is ready to be deleted - public boolean isLoadable() { - return this == ReplicaState.NORMAL || this == ReplicaState.SCHEMA_CHANGE; + public boolean canLoad() { + return this == NORMAL || this == SCHEMA_CHANGE || this == ALTER; + } + + public boolean canQuery() { + return this == NORMAL || this == SCHEMA_CHANGE; } } @@ -379,7 +386,20 @@ public synchronized void updateLastFailedVersion(long lastFailedVersion, long la this.lastSuccessVersion, this.lastSuccessVersionHash, dataSize, rowCount); } - public boolean checkVersionCatchUp(long expectedVersion, long expectedVersionHash) { + /* + * Check whether the replica's version catch up with the expected version. + * If ignoreAlter is true, and state is ALTER, and replica's version is PARTITION_INIT_VERSION, just return true, ignore the version. + * This is for the case that when altering table, the newly created replica's version is PARTITION_INIT_VERSION, + * but we need to treat it as a "normal" replica which version is supposed to be "catch-up". + * But if state is ALTER but version larger than PARTITION_INIT_VERSION, which means this replica + * is already updated by load process, so we need to consider its version. + */ + public boolean checkVersionCatchUp(long expectedVersion, long expectedVersionHash, boolean ignoreAlter) { + if (ignoreAlter && state == ReplicaState.ALTER && version == Partition.PARTITION_INIT_VERSION + && versionHash == Partition.PARTITION_INIT_VERSION_HASH) { + return true; + } + if (expectedVersion == Partition.PARTITION_INIT_VERSION && expectedVersionHash == Partition.PARTITION_INIT_VERSION_HASH) { // no data is loaded into this replica, just return true diff --git a/fe/src/main/java/org/apache/doris/catalog/Table.java b/fe/src/main/java/org/apache/doris/catalog/Table.java index aedad489ff059d..6992c3d64a1e92 100644 --- a/fe/src/main/java/org/apache/doris/catalog/Table.java +++ b/fe/src/main/java/org/apache/doris/catalog/Table.java @@ -24,6 +24,7 @@ import org.apache.doris.thrift.TTableDescriptor; import com.google.common.base.Preconditions; +import com.google.common.collect.Lists; import com.google.common.collect.Maps; import org.apache.commons.lang.NotImplementedException; @@ -33,7 +34,6 @@ import java.io.DataInput; import java.io.DataOutput; import java.io.IOException; -import java.util.LinkedList; import java.util.List; import java.util.Map; @@ -57,8 +57,17 @@ public enum TableType { protected long id; protected String name; protected TableType type; - protected List baseSchema; - // tree map for case-insensitive lookup + /* + * fullSchema and nameToColumn should contains all columns, both visible and shadow. + * eg. for OlapTable, when doing schema change, there will be some shadow columns which are not visible + * to query but visible to load process. + * If you want to get all visible columns, you should call getBaseSchema() method, which is override in + * sub classes. + * + * NOTICE: the order of this fullSchema is meaningless to OlapTable + */ + protected List fullSchema; + // tree map for case-insensitive lookup. protected Map nameToColumn; // DO NOT persist this variable. @@ -66,19 +75,21 @@ public enum TableType { public Table(TableType type) { this.type = type; - this.baseSchema = new LinkedList(); + this.fullSchema = Lists.newArrayList(); this.nameToColumn = Maps.newTreeMap(String.CASE_INSENSITIVE_ORDER); } - public Table(long id, String tableName, TableType type, List baseSchema) { + public Table(long id, String tableName, TableType type, List fullSchema) { this.id = id; this.name = tableName; this.type = type; - this.baseSchema = baseSchema; - + // must copy the list, it should not be the same object as in indexIdToSchmea + if (fullSchema != null) { + this.fullSchema = Lists.newArrayList(fullSchema); + } this.nameToColumn = Maps.newTreeMap(String.CASE_INSENSITIVE_ORDER); - if (baseSchema != null) { - for (Column col : baseSchema) { + if (this.fullSchema != null) { + for (Column col : this.fullSchema) { nameToColumn.put(col.getName(), col); } } else { @@ -107,24 +118,19 @@ public TableType getType() { return type; } - public int getKeysNum() { - int keysNum = 0; - for (Column column : baseSchema) { - if (column.isKey()) { - keysNum += 1; - } - } - return keysNum; + public List getFullSchema() { + return fullSchema; } + // should override in subclass if necessary public List getBaseSchema() { - return baseSchema; + return fullSchema; } - public void setNewBaseSchema(List newSchema) { - this.baseSchema = newSchema; + public void setNewFullSchema(List newSchema) { + this.fullSchema = newSchema; this.nameToColumn.clear(); - for (Column col : baseSchema) { + for (Column col : fullSchema) { nameToColumn.put(col.getName(), col); } } @@ -182,9 +188,9 @@ public void write(DataOutput out) throws IOException { Text.writeString(out, name); // base schema - int columnCount = baseSchema.size(); + int columnCount = fullSchema.size(); out.writeInt(columnCount); - for (Column column : baseSchema) { + for (Column column : fullSchema) { column.write(out); } } @@ -205,7 +211,7 @@ public void readFields(DataInput in) throws IOException { int columnCount = in.readInt(); for (int i = 0; i < columnCount; i++) { Column column = Column.read(in); - this.baseSchema.add(column); + this.fullSchema.add(column); this.nameToColumn.put(column.getName(), column); } } @@ -267,9 +273,10 @@ public String toString() { /* * 1. Only schedule OLAP table. * 2. If table is colocate with other table, not schedule it. - * 3. if table's state is ROLLUP or SCHEMA_CHANGE, but alter job's state is FINISHING, we should also + * 3. (deprecated). if table's state is ROLLUP or SCHEMA_CHANGE, but alter job's state is FINISHING, we should also * schedule the tablet to repair it(only for VERSION_IMCOMPLETE case, this will be checked in * TabletScheduler). + * 4. Even if table's state is ROLLUP or SCHEMA_CHANGE, check it. Because we can repair the tablet of base index. */ public boolean needSchedule() { if (type != TableType.OLAP) { @@ -277,7 +284,7 @@ public boolean needSchedule() { } OlapTable olapTable = (OlapTable) this; - + if (Catalog.getCurrentColocateIndex().isColocateTable(olapTable.getId())) { LOG.debug("table {} is a colocate table, skip tablet checker.", name); return false; diff --git a/fe/src/main/java/org/apache/doris/catalog/Tablet.java b/fe/src/main/java/org/apache/doris/catalog/Tablet.java index 2753e7d539e94a..d0dff0fcaa9351 100644 --- a/fe/src/main/java/org/apache/doris/catalog/Tablet.java +++ b/fe/src/main/java/org/apache/doris/catalog/Tablet.java @@ -178,8 +178,7 @@ public List getNormalReplicaBackendIds() { } ReplicaState state = replica.getState(); - if (infoService.checkBackendAlive(replica.getBackendId()) - && (state == ReplicaState.NORMAL || state == ReplicaState.SCHEMA_CHANGE)) { + if (infoService.checkBackendAlive(replica.getBackendId()) && state.canLoad()) { beIds.add(replica.getBackendId()); } } @@ -197,7 +196,7 @@ public Multimap getNormalReplicaBackendPathMap() { ReplicaState state = replica.getState(); if (infoService.checkBackendAlive(replica.getBackendId()) - && (state == ReplicaState.NORMAL || state == ReplicaState.SCHEMA_CHANGE)) { + && (state == ReplicaState.NORMAL || state == ReplicaState.ALTER)) { map.put(replica.getBackendId(), replica.getPathHash()); } } @@ -213,9 +212,9 @@ public void getQueryableReplicas(List allQuerableReplica, List } ReplicaState state = replica.getState(); - if (state == ReplicaState.NORMAL || state == ReplicaState.SCHEMA_CHANGE) { + if (state.canQuery()) { // replica.getSchemaHash() == -1 is for compatibility - if (replica.checkVersionCatchUp(visibleVersion, visibleVersionHash) + if (replica.checkVersionCatchUp(visibleVersion, visibleVersionHash, false) && (replica.getSchemaHash() == -1 || replica.getSchemaHash() == schemaHash)) { allQuerableReplica.add(replica); if (localBeId != -1 && replica.getBackendId() == localBeId) { diff --git a/fe/src/main/java/org/apache/doris/catalog/TabletInvertedIndex.java b/fe/src/main/java/org/apache/doris/catalog/TabletInvertedIndex.java index a6006620554bff..9cdee89fbe2668 100644 --- a/fe/src/main/java/org/apache/doris/catalog/TabletInvertedIndex.java +++ b/fe/src/main/java/org/apache/doris/catalog/TabletInvertedIndex.java @@ -347,6 +347,12 @@ private boolean needSync(Replica replicaInFe, TTabletInfo backendTabletInfo) { // it will be handled in needRecovery() return false; } + + if (replicaInFe.getState() == ReplicaState.ALTER) { + // ignore the replica is ALTER state. its version will be taken care by load process and alter table process + return false; + } + long versionInFe = replicaInFe.getVersion(); long versionHashInFe = replicaInFe.getVersionHash(); diff --git a/fe/src/main/java/org/apache/doris/catalog/TabletStatMgr.java b/fe/src/main/java/org/apache/doris/catalog/TabletStatMgr.java index 67911c78f7b458..a2fcd62d055e92 100644 --- a/fe/src/main/java/org/apache/doris/catalog/TabletStatMgr.java +++ b/fe/src/main/java/org/apache/doris/catalog/TabletStatMgr.java @@ -17,6 +17,7 @@ package org.apache.doris.catalog; +import org.apache.doris.catalog.MaterializedIndex.IndexExtState; import org.apache.doris.catalog.Table.TableType; import org.apache.doris.common.ClientPool; import org.apache.doris.common.Config; @@ -119,12 +120,12 @@ protected void runOneCycle() { for (Partition partition : olapTable.getPartitions()) { long version = partition.getVisibleVersion(); long versionHash = partition.getVisibleVersionHash(); - for (MaterializedIndex index : partition.getMaterializedIndices()) { + for (MaterializedIndex index : partition.getMaterializedIndices(IndexExtState.VISIBLE)) { long indexRowCount = 0L; for (Tablet tablet : index.getTablets()) { long tabletRowCount = 0L; for (Replica replica : tablet.getReplicas()) { - if (replica.checkVersionCatchUp(version, versionHash) + if (replica.checkVersionCatchUp(version, versionHash, false) && replica.getRowCount() > tabletRowCount) { tabletRowCount = replica.getRowCount(); } diff --git a/fe/src/main/java/org/apache/doris/clone/ColocateTableBalancer.java b/fe/src/main/java/org/apache/doris/clone/ColocateTableBalancer.java index 19707dc884dbfb..60a092c861b4c2 100644 --- a/fe/src/main/java/org/apache/doris/clone/ColocateTableBalancer.java +++ b/fe/src/main/java/org/apache/doris/clone/ColocateTableBalancer.java @@ -23,6 +23,7 @@ import org.apache.doris.catalog.ColocateTableIndex.GroupId; import org.apache.doris.catalog.Database; import org.apache.doris.catalog.MaterializedIndex; +import org.apache.doris.catalog.MaterializedIndex.IndexExtState; import org.apache.doris.catalog.OlapTable; import org.apache.doris.catalog.Partition; import org.apache.doris.catalog.Replica; @@ -233,7 +234,7 @@ private long selectSubstituteBackend(int tabletOrderIdx, GroupId groupId, long u } for (Partition partition : tbl.getPartitions()) { - for (MaterializedIndex index : partition.getMaterializedIndices()) { + for (MaterializedIndex index : partition.getMaterializedIndices(IndexExtState.VISIBLE)) { long tabletId = index.getTabletIdsInOrder().get(tabletOrderIdx); Tablet tablet = index.getTablet(tabletId); Replica replica = tablet.getReplicaByBackendId(unavailableBeId); @@ -344,7 +345,9 @@ private void matchGroup() { short replicationNum = olapTable.getPartitionInfo().getReplicationNum(partition.getId()); long visibleVersion = partition.getVisibleVersion(); long visibleVersionHash = partition.getVisibleVersionHash(); - for (MaterializedIndex index : partition.getMaterializedIndices()) { + // Here we only get VISIBLE indexes. All other indexes are not queryable. + // So it does not matter if tablets of other indexes are not matched. + for (MaterializedIndex index : partition.getMaterializedIndices(IndexExtState.VISIBLE)) { Preconditions.checkState(backendBucketsSeq.size() == index.getTablets().size(), backendBucketsSeq.size() + " vs. " + index.getTablets().size()); int idx = 0; diff --git a/fe/src/main/java/org/apache/doris/clone/LoadBalancer.java b/fe/src/main/java/org/apache/doris/clone/LoadBalancer.java index 77d3ac9a2814e2..16baa7a7cface8 100644 --- a/fe/src/main/java/org/apache/doris/clone/LoadBalancer.java +++ b/fe/src/main/java/org/apache/doris/clone/LoadBalancer.java @@ -82,6 +82,9 @@ public List selectAlternativeTablets() { * * Here we only select tablets from high load node, do not set its src or dest, all this will be set * when this tablet is being scheduled in tablet scheduler. + * + * NOTICE that we may select any available tablets here, ignore their state. + * The state will be checked when being scheduled in tablet scheduler. */ private List selectAlternativeTabletsForCluster( String clusterName, ClusterLoadStatistic clusterStat, TStorageMedium medium) { diff --git a/fe/src/main/java/org/apache/doris/clone/TabletChecker.java b/fe/src/main/java/org/apache/doris/clone/TabletChecker.java index d2e9d643a99a2d..003e16a638dca0 100644 --- a/fe/src/main/java/org/apache/doris/clone/TabletChecker.java +++ b/fe/src/main/java/org/apache/doris/clone/TabletChecker.java @@ -22,6 +22,7 @@ import org.apache.doris.catalog.Catalog; import org.apache.doris.catalog.Database; import org.apache.doris.catalog.MaterializedIndex; +import org.apache.doris.catalog.MaterializedIndex.IndexExtState; import org.apache.doris.catalog.OlapTable; import org.apache.doris.catalog.Partition; import org.apache.doris.catalog.Partition.PartitionState; @@ -207,7 +208,10 @@ private void checkTablets() { } boolean isInPrios = isInPrios(dbId, table.getId(), partition.getId()); boolean prioPartIsHealthy = true; - for (MaterializedIndex idx : partition.getMaterializedIndices()) { + /* + * Tablet in SHADOW index can not be repaired of balanced + */ + for (MaterializedIndex idx : partition.getMaterializedIndices(IndexExtState.VISIBLE)) { for (Tablet tablet : idx.getTablets()) { totalTabletNum++; diff --git a/fe/src/main/java/org/apache/doris/clone/TabletSchedCtx.java b/fe/src/main/java/org/apache/doris/clone/TabletSchedCtx.java index 77166eb51d22b5..8e1c22f230aa4f 100644 --- a/fe/src/main/java/org/apache/doris/clone/TabletSchedCtx.java +++ b/fe/src/main/java/org/apache/doris/clone/TabletSchedCtx.java @@ -478,7 +478,7 @@ public void chooseSrcReplica(Map backendsWorkingSlots) throws Sc continue; } - if (!replica.checkVersionCatchUp(visibleVersion, visibleVersionHash)) { + if (!replica.checkVersionCatchUp(visibleVersion, visibleVersionHash, false)) { continue; } diff --git a/fe/src/main/java/org/apache/doris/clone/TabletScheduler.java b/fe/src/main/java/org/apache/doris/clone/TabletScheduler.java index 06445cb612a89c..feebc40aa06247 100644 --- a/fe/src/main/java/org/apache/doris/clone/TabletScheduler.java +++ b/fe/src/main/java/org/apache/doris/clone/TabletScheduler.java @@ -442,7 +442,7 @@ private synchronized TabletSchedCtx takeRunningTablets(long tabletId) { * Try to schedule a single tablet. */ private void scheduleTablet(TabletSchedCtx tabletCtx, AgentBatchTask batchTask) throws SchedException { - LOG.debug("schedule tablet: {}", tabletCtx.getTabletId()); + LOG.debug("schedule tablet: {}, type: {}, status: {}", tabletCtx.getTabletId(), tabletCtx.getType(), tabletCtx.getTabletStatus()); long currentTime = System.currentTimeMillis(); tabletCtx.setLastSchedTime(currentTime); tabletCtx.setLastVisitedTime(currentTime); @@ -514,7 +514,8 @@ private void scheduleTablet(TabletSchedCtx tabletCtx, AgentBatchTask batchTask) throw new SchedException(Status.UNRECOVERABLE, "table's state is not NORMAL"); } - if (statusPair.first != TabletStatus.VERSION_INCOMPLETE && partition.getState() != PartitionState.NORMAL) { + if (statusPair.first != TabletStatus.VERSION_INCOMPLETE + && (partition.getState() != PartitionState.NORMAL || tableState != OlapTableState.NORMAL)) { // If table is under ALTER process(before FINISHING), do not allow to add or delete replica. // VERSION_INCOMPLETE will repair the replica in place, which is allowed. throw new SchedException(Status.UNRECOVERABLE, @@ -743,7 +744,7 @@ private boolean deleteReplicaWithFailedVersion(TabletSchedCtx tabletCtx, boolean private boolean deleteReplicaWithLowerVersion(TabletSchedCtx tabletCtx, boolean force) throws SchedException { for (Replica replica : tabletCtx.getReplicas()) { - if (!replica.checkVersionCatchUp(tabletCtx.getCommittedVersion(), tabletCtx.getCommittedVersionHash())) { + if (!replica.checkVersionCatchUp(tabletCtx.getCommittedVersion(), tabletCtx.getCommittedVersionHash(), false)) { deleteReplicaInternal(tabletCtx, replica, "lower version", force); return true; } @@ -874,7 +875,7 @@ private void deleteReplicaInternal(TabletSchedCtx tabletCtx, Replica replica, St * 2. Wait for any txns before the watermark txn id to be finished. If all are finished, which means this replica is * safe to be deleted. */ - if (!force && replica.getState().isLoadable() && replica.getWatermarkTxnId() == -1) { + if (!force && replica.getState().canLoad() && replica.getWatermarkTxnId() == -1) { long nextTxnId = Catalog.getCurrentGlobalTransactionMgr().getTransactionIDGenerator().getNextTransactionId(); replica.setWatermarkTxnId(nextTxnId); replica.setState(ReplicaState.DECOMMISSION); diff --git a/fe/src/main/java/org/apache/doris/common/CaseSensibility.java b/fe/src/main/java/org/apache/doris/common/CaseSensibility.java index 170f8b37ec2a76..fa11d1d5390ced 100644 --- a/fe/src/main/java/org/apache/doris/common/CaseSensibility.java +++ b/fe/src/main/java/org/apache/doris/common/CaseSensibility.java @@ -23,7 +23,7 @@ public enum CaseSensibility { TABLE(true), ROLUP(true), PARTITION(true), - COLUMN(true), + COLUMN(false), USER(true), ROLE(false), HOST(false), diff --git a/fe/src/main/java/org/apache/doris/common/Config.java b/fe/src/main/java/org/apache/doris/common/Config.java index 92f9bc290d8847..ab289927c2b85b 100644 --- a/fe/src/main/java/org/apache/doris/common/Config.java +++ b/fe/src/main/java/org/apache/doris/common/Config.java @@ -263,6 +263,11 @@ public class Config extends ConfigBase { */ @ConfField(mutable = true, masterOnly = true) public static int tablet_create_timeout_second = 1; + /* + * In order not to wait too long for create table(index), set a max timeout. + */ + @ConfField(mutable = true, masterOnly = true) + public static int max_create_table_timeout_second = 60; /* * Maximal waiting time for all publish version tasks of one transaction to be finished diff --git a/fe/src/main/java/org/apache/doris/common/FeConstants.java b/fe/src/main/java/org/apache/doris/common/FeConstants.java index 9174ec4c56aa79..79e2add21d541c 100644 --- a/fe/src/main/java/org/apache/doris/common/FeConstants.java +++ b/fe/src/main/java/org/apache/doris/common/FeConstants.java @@ -28,12 +28,15 @@ public class FeConstants { public static int checkpoint_interval_second = 60; // 1 minutes // dpp version - public static String dpp_version = "3_1_0"; + public static String dpp_version = "3_2_0"; // bloom filter false positive probability public static double default_bloom_filter_fpp = 0.05; + // set to true to skip some step when running FE unit test + public static boolean runningUnitTest = false; + // general model // Current meta data version. Use this version to write journals and image - public static int meta_version = FeMetaVersion.VERSION_60; + public static int meta_version = FeMetaVersion.VERSION_61; } diff --git a/fe/src/main/java/org/apache/doris/common/FeMetaVersion.java b/fe/src/main/java/org/apache/doris/common/FeMetaVersion.java index 9e7c42ce2c5357..f2e7b0e5a188b9 100644 --- a/fe/src/main/java/org/apache/doris/common/FeMetaVersion.java +++ b/fe/src/main/java/org/apache/doris/common/FeMetaVersion.java @@ -130,4 +130,6 @@ public final class FeMetaVersion { public static final int VERSION_59 = 59; // refactor date literal public static final int VERSION_60 = 60; + // for alter job v2 + public static final int VERSION_61 = 61; } diff --git a/fe/src/main/java/org/apache/doris/common/FeNameFormat.java b/fe/src/main/java/org/apache/doris/common/FeNameFormat.java index 722d825a6a230b..f050e1ad2c64fb 100644 --- a/fe/src/main/java/org/apache/doris/common/FeNameFormat.java +++ b/fe/src/main/java/org/apache/doris/common/FeNameFormat.java @@ -17,6 +17,7 @@ package org.apache.doris.common; +import org.apache.doris.alter.SchemaChangeHandler; import org.apache.doris.mysql.privilege.PaloRole; import org.apache.doris.system.SystemInfoService; @@ -63,6 +64,9 @@ public static void checkColumnName(String columnName) throws AnalysisException { if (Strings.isNullOrEmpty(columnName) || !columnName.matches(COMMON_NAME_REGEX)) { ErrorReport.reportAnalysisException(ErrorCode.ERR_WRONG_COLUMN_NAME, columnName); } + if (columnName.startsWith(SchemaChangeHandler.SHADOW_NAME_PRFIX)) { + ErrorReport.reportAnalysisException(ErrorCode.ERR_WRONG_COLUMN_NAME, columnName); + } } public static void checkLabel(String label) throws AnalysisException { diff --git a/fe/src/main/java/org/apache/doris/common/MarkedCountDownLatch.java b/fe/src/main/java/org/apache/doris/common/MarkedCountDownLatch.java index bc25b3877f2b8f..74a42ee6c76b8b 100644 --- a/fe/src/main/java/org/apache/doris/common/MarkedCountDownLatch.java +++ b/fe/src/main/java/org/apache/doris/common/MarkedCountDownLatch.java @@ -28,6 +28,7 @@ public class MarkedCountDownLatch extends CountDownLatch { private Multimap marks; + private Status st = Status.OK; public MarkedCountDownLatch(int count) { super(count); @@ -50,9 +51,16 @@ public synchronized List> getLeftMarks() { return Lists.newArrayList(marks.entries()); } - public synchronized void countDownToZero() { + public Status getStatus() { + return st; + } + + public synchronized void countDownToZero(Status status) { while(getCount() > 0) { super.countDown(); } + if (st.ok()) { + st = status; + } } } diff --git a/fe/src/main/java/org/apache/doris/common/proc/IndicesProcDir.java b/fe/src/main/java/org/apache/doris/common/proc/IndicesProcDir.java index be4986cfca2970..63bf96d2f0c7be 100644 --- a/fe/src/main/java/org/apache/doris/common/proc/IndicesProcDir.java +++ b/fe/src/main/java/org/apache/doris/common/proc/IndicesProcDir.java @@ -19,6 +19,7 @@ import org.apache.doris.catalog.Database; import org.apache.doris.catalog.MaterializedIndex; +import org.apache.doris.catalog.MaterializedIndex.IndexExtState; import org.apache.doris.catalog.OlapTable; import org.apache.doris.catalog.Partition; import org.apache.doris.common.AnalysisException; @@ -63,7 +64,7 @@ public ProcResult fetchResult() throws AnalysisException { db.readLock(); try { result.setNames(TITLE_NAMES); - for (MaterializedIndex materializedIndex : partition.getMaterializedIndices()) { + for (MaterializedIndex materializedIndex : partition.getMaterializedIndices(IndexExtState.ALL)) { List indexInfo = new ArrayList(); indexInfo.add(materializedIndex.getId()); indexInfo.add(olapTable.getIndexNameById(materializedIndex.getId())); diff --git a/fe/src/main/java/org/apache/doris/common/proc/JobsProcDir.java b/fe/src/main/java/org/apache/doris/common/proc/JobsProcDir.java index 17fd645c8d092e..437ba82dcbd915 100644 --- a/fe/src/main/java/org/apache/doris/common/proc/JobsProcDir.java +++ b/fe/src/main/java/org/apache/doris/common/proc/JobsProcDir.java @@ -94,22 +94,22 @@ public ProcResult fetchResult() throws AnalysisException { // load Load load = Catalog.getInstance().getLoadInstance(); LoadManager loadManager = Catalog.getCurrentCatalog().getLoadManager(); - Integer pendingNum = load.getLoadJobNum(org.apache.doris.load.LoadJob.JobState.PENDING, dbId) + Long pendingNum = load.getLoadJobNum(org.apache.doris.load.LoadJob.JobState.PENDING, dbId) + loadManager.getLoadJobNum(org.apache.doris.load.loadv2.JobState.PENDING, dbId); - Integer runningNum = load.getLoadJobNum(org.apache.doris.load.LoadJob.JobState.ETL, dbId) + Long runningNum = load.getLoadJobNum(org.apache.doris.load.LoadJob.JobState.ETL, dbId) + load.getLoadJobNum(org.apache.doris.load.LoadJob.JobState.LOADING, dbId) + loadManager.getLoadJobNum(org.apache.doris.load.loadv2.JobState.LOADING, dbId); - Integer finishedNum = load.getLoadJobNum(org.apache.doris.load.LoadJob.JobState.QUORUM_FINISHED, dbId) + Long finishedNum = load.getLoadJobNum(org.apache.doris.load.LoadJob.JobState.QUORUM_FINISHED, dbId) + load.getLoadJobNum(org.apache.doris.load.LoadJob.JobState.FINISHED, dbId) + loadManager.getLoadJobNum(org.apache.doris.load.loadv2.JobState.FINISHED, dbId); - Integer cancelledNum = load.getLoadJobNum(org.apache.doris.load.LoadJob.JobState.CANCELLED, dbId) + Long cancelledNum = load.getLoadJobNum(org.apache.doris.load.LoadJob.JobState.CANCELLED, dbId) + loadManager.getLoadJobNum(org.apache.doris.load.loadv2.JobState.CANCELLED, dbId); - Integer totalNum = pendingNum + runningNum + finishedNum + cancelledNum; + Long totalNum = pendingNum + runningNum + finishedNum + cancelledNum; result.addRow(Lists.newArrayList(LOAD, pendingNum.toString(), runningNum.toString(), finishedNum.toString(), cancelledNum.toString(), totalNum.toString())); // delete - pendingNum = 0; + pendingNum = 0L; runningNum = load.getDeleteJobNumByState(dbId, org.apache.doris.load.LoadJob.JobState.LOADING); finishedNum = load.getDeleteJobNumByState(dbId, org.apache.doris.load.LoadJob.JobState.FINISHED); cancelledNum = load.getDeleteJobNumByState(dbId, org.apache.doris.load.LoadJob.JobState.CANCELLED); @@ -119,52 +119,26 @@ public ProcResult fetchResult() throws AnalysisException { // rollup RollupHandler rollupHandler = Catalog.getInstance().getRollupHandler(); - pendingNum = rollupHandler.getAlterJobNum(org.apache.doris.alter.AlterJob.JobState.PENDING, dbId); - runningNum = rollupHandler.getAlterJobNum(org.apache.doris.alter.AlterJob.JobState.RUNNING, dbId) - + rollupHandler.getAlterJobNum(org.apache.doris.alter.AlterJob.JobState.FINISHING, dbId); - finishedNum = rollupHandler.getAlterJobNum(org.apache.doris.alter.AlterJob.JobState.FINISHED, dbId); - cancelledNum = rollupHandler.getAlterJobNum(org.apache.doris.alter.AlterJob.JobState.CANCELLED, dbId); + pendingNum = rollupHandler.getAlterJobV2Num(org.apache.doris.alter.AlterJobV2.JobState.PENDING, dbId); + runningNum = rollupHandler.getAlterJobV2Num(org.apache.doris.alter.AlterJobV2.JobState.WAITING_TXN, dbId) + + rollupHandler.getAlterJobV2Num(org.apache.doris.alter.AlterJobV2.JobState.RUNNING, dbId); + finishedNum = rollupHandler.getAlterJobV2Num(org.apache.doris.alter.AlterJobV2.JobState.FINISHED, dbId); + cancelledNum = rollupHandler.getAlterJobV2Num(org.apache.doris.alter.AlterJobV2.JobState.CANCELLED, dbId); totalNum = pendingNum + runningNum + finishedNum + cancelledNum; result.addRow(Lists.newArrayList(ROLLUP, pendingNum.toString(), runningNum.toString(), finishedNum.toString(), cancelledNum.toString(), totalNum.toString())); // schema change SchemaChangeHandler schemaChangeHandler = Catalog.getInstance().getSchemaChangeHandler(); - pendingNum = schemaChangeHandler.getAlterJobNum(org.apache.doris.alter.AlterJob.JobState.PENDING, dbId); - runningNum = schemaChangeHandler.getAlterJobNum(org.apache.doris.alter.AlterJob.JobState.RUNNING, dbId) - + schemaChangeHandler.getAlterJobNum(org.apache.doris.alter.AlterJob.JobState.FINISHING, dbId); - finishedNum = schemaChangeHandler.getAlterJobNum(org.apache.doris.alter.AlterJob.JobState.FINISHED, dbId); - cancelledNum = schemaChangeHandler.getAlterJobNum(org.apache.doris.alter.AlterJob.JobState.CANCELLED, dbId); + pendingNum = schemaChangeHandler.getAlterJobV2Num(org.apache.doris.alter.AlterJobV2.JobState.PENDING, dbId); + runningNum = schemaChangeHandler.getAlterJobV2Num(org.apache.doris.alter.AlterJobV2.JobState.WAITING_TXN, dbId) + + schemaChangeHandler.getAlterJobV2Num(org.apache.doris.alter.AlterJobV2.JobState.RUNNING, dbId); + finishedNum = schemaChangeHandler.getAlterJobV2Num(org.apache.doris.alter.AlterJobV2.JobState.FINISHED, dbId); + cancelledNum = schemaChangeHandler.getAlterJobV2Num(org.apache.doris.alter.AlterJobV2.JobState.CANCELLED, dbId); totalNum = pendingNum + runningNum + finishedNum + cancelledNum; result.addRow(Lists.newArrayList(SCHEMA_CHANGE, pendingNum.toString(), runningNum.toString(), finishedNum.toString(), cancelledNum.toString(), totalNum.toString())); - /* - // backup - BackupHandler backupHandler = Catalog.getInstance().getBackupHandler(); - pendingNum = backupHandler.getBackupJobNum(BackupJobState.PENDING, dbId); - runningNum = backupHandler.getBackupJobNum(BackupJobState.SNAPSHOT, dbId) - + backupHandler.getBackupJobNum(BackupJobState.UPLOAD, dbId) - + backupHandler.getBackupJobNum(BackupJobState.UPLOADING, dbId) - + backupHandler.getBackupJobNum(BackupJobState.FINISHING, dbId); - finishedNum = backupHandler.getBackupJobNum(BackupJobState.FINISHED, dbId); - cancelledNum = backupHandler.getBackupJobNum(BackupJobState.CANCELLED, dbId); - totalNum = pendingNum + runningNum + finishedNum + cancelledNum; - result.addRow(Lists.newArrayList(BACKUP, pendingNum.toString(), runningNum.toString(), - finishedNum.toString(), cancelledNum.toString(), totalNum.toString())); - - // restore - pendingNum = backupHandler.getRestoreJobNum(RestoreJobState.PENDING, dbId); - runningNum = backupHandler.getRestoreJobNum(RestoreJobState.RESTORE_META, dbId) - + backupHandler.getRestoreJobNum(RestoreJobState.DOWNLOAD, dbId) - + backupHandler.getRestoreJobNum(RestoreJobState.DOWNLOADING, dbId); - finishedNum = backupHandler.getRestoreJobNum(RestoreJobState.FINISHED, dbId); - cancelledNum = backupHandler.getRestoreJobNum(RestoreJobState.CANCELLED, dbId); - totalNum = pendingNum + runningNum + finishedNum + cancelledNum; - result.addRow(Lists.newArrayList(RESTORE, pendingNum.toString(), runningNum.toString(), - finishedNum.toString(), cancelledNum.toString(), totalNum.toString())); - */ - // export ExportMgr exportMgr = Catalog.getInstance().getExportMgr(); pendingNum = exportMgr.getJobNum(ExportJob.JobState.PENDING, dbId); diff --git a/fe/src/main/java/org/apache/doris/common/proc/RollupJobProcDir.java b/fe/src/main/java/org/apache/doris/common/proc/RollupJobProcDir.java index 8e2096367d7bf1..741e0038a45d6f 100644 --- a/fe/src/main/java/org/apache/doris/common/proc/RollupJobProcDir.java +++ b/fe/src/main/java/org/apache/doris/common/proc/RollupJobProcDir.java @@ -17,23 +17,23 @@ package org.apache.doris.common.proc; -import org.apache.doris.alter.RollupJob; +import org.apache.doris.alter.RollupJobV2; import org.apache.doris.common.AnalysisException; import com.google.common.base.Preconditions; import com.google.common.collect.ImmutableList; -import java.util.ArrayList; import java.util.List; -public class RollupJobProcDir implements ProcDirInterface { +// Show unfinished rollup tasks of rollup job v2 +public class RollupJobProcDir implements ProcNodeInterface { public static final ImmutableList TITLE_NAMES = new ImmutableList.Builder() - .add("PartitionId").add("RollupIndexId").add("IndexState") + .add("BackendId").add("BaseTabletId").add("RollupTabletId") .build(); - private RollupJob rollupJob; + private RollupJobV2 rollupJob; - public RollupJobProcDir(RollupJob rollupJob) { + public RollupJobProcDir(RollupJobV2 rollupJob) { this.rollupJob = rollupJob; } @@ -44,32 +44,8 @@ public ProcResult fetchResult() throws AnalysisException { BaseProcResult result = new BaseProcResult(); result.setNames(TITLE_NAMES); - List> rollupJobInfos = rollupJob.getInfos(); - for (List infoStr : rollupJobInfos) { - List oneInfo = new ArrayList(TITLE_NAMES.size()); - for (Comparable element : infoStr) { - oneInfo.add(element.toString()); - } - result.addRow(oneInfo); - } + List> unfinishedRollupTasks = rollupJob.getUnfinishedTasks(2000); + result.setRows(unfinishedRollupTasks); return result; } - - @Override - public boolean register(String name, ProcNodeInterface node) { - return false; - } - - @Override - public ProcNodeInterface lookup(String partitionIdStr) throws AnalysisException { - long partitionId; - try { - partitionId = Long.valueOf(partitionIdStr); - } catch (NumberFormatException e) { - throw new AnalysisException("Invalid table id format: " + partitionIdStr); - } - - return new RollupTabletsProcNode(rollupJob, partitionId); - } - } diff --git a/fe/src/main/java/org/apache/doris/common/proc/RollupProcDir.java b/fe/src/main/java/org/apache/doris/common/proc/RollupProcDir.java index 42c31523c65819..520939e1b2d828 100644 --- a/fe/src/main/java/org/apache/doris/common/proc/RollupProcDir.java +++ b/fe/src/main/java/org/apache/doris/common/proc/RollupProcDir.java @@ -17,9 +17,9 @@ package org.apache.doris.common.proc; -import org.apache.doris.alter.AlterJob; +import org.apache.doris.alter.AlterJobV2; import org.apache.doris.alter.RollupHandler; -import org.apache.doris.alter.RollupJob; +import org.apache.doris.alter.RollupJobV2; import org.apache.doris.catalog.Database; import org.apache.doris.common.AnalysisException; @@ -34,7 +34,7 @@ public class RollupProcDir implements ProcDirInterface { public static final ImmutableList TITLE_NAMES = new ImmutableList.Builder() .add("JobId").add("TableName").add("CreateTime").add("FinishedTime") .add("BaseIndexName").add("RollupIndexName").add("RollupId").add("TransactionId") - .add("State").add("Msg") .add("Progress") + .add("State").add("Msg").add("Progress").add("Timeout") .build(); private RollupHandler rollupHandler; @@ -70,25 +70,25 @@ public boolean register(String name, ProcNodeInterface node) { } @Override - public ProcNodeInterface lookup(String tableIdStr) throws AnalysisException { - if (Strings.isNullOrEmpty(tableIdStr)) { - throw new AnalysisException("Table id is null"); + public ProcNodeInterface lookup(String jobIdStr) throws AnalysisException { + if (Strings.isNullOrEmpty(jobIdStr)) { + throw new AnalysisException("Job id is null"); } - long tableId = -1L; + long jobId = -1L; try { - tableId = Long.valueOf(tableIdStr); + jobId = Long.valueOf(jobIdStr); } catch (Exception e) { - throw new AnalysisException("Table id is invalid"); + throw new AnalysisException("Job id is invalid"); } - Preconditions.checkState(tableId != -1L); - AlterJob job = rollupHandler.getAlterJob(tableId); + Preconditions.checkState(jobId != -1L); + AlterJobV2 job = rollupHandler.getUnfinishedAlterJobV2(jobId); if (job == null) { return null; } - return new RollupJobProcDir((RollupJob) job); + return new RollupJobProcDir((RollupJobV2) job); } } diff --git a/fe/src/main/java/org/apache/doris/common/proc/RollupTabletsProcNode.java b/fe/src/main/java/org/apache/doris/common/proc/RollupTabletsProcNode.java deleted file mode 100644 index 08ad1ff195bd42..00000000000000 --- a/fe/src/main/java/org/apache/doris/common/proc/RollupTabletsProcNode.java +++ /dev/null @@ -1,62 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, -// software distributed under the License is distributed on an -// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -// KIND, either express or implied. See the License for the -// specific language governing permissions and limitations -// under the License. - -package org.apache.doris.common.proc; - -import org.apache.doris.alter.RollupJob; -import org.apache.doris.common.AnalysisException; - -import com.google.common.base.Preconditions; -import com.google.common.collect.ImmutableList; - -import java.util.ArrayList; -import java.util.List; - -public class RollupTabletsProcNode implements ProcNodeInterface { - public static final ImmutableList TITLE_NAMES = new ImmutableList.Builder() - .add("TabletId").add("ReplicaId").add("BackendId").add("Version") - .add("VersionHash").add("DataSize").add("RowCount").add("State") - .build(); - - private RollupJob rollupJob; - private long partitionId; - - public RollupTabletsProcNode(RollupJob rollupJob, long partitionId) { - this.rollupJob = rollupJob; - this.partitionId = partitionId; - } - - @Override - public ProcResult fetchResult() throws AnalysisException { - Preconditions.checkNotNull(rollupJob); - - BaseProcResult result = new BaseProcResult(); - result.setNames(TITLE_NAMES); - - List> tabletInfos = rollupJob.getRollupIndexInfo(partitionId); - for (int i = 0; i < tabletInfos.size(); i++) { - List info = tabletInfos.get(i); - List row = new ArrayList(info.size()); - for (int j = 0; j < info.size(); j++) { - row.add(info.get(j).toString()); - } - result.addRow(row); - } - return result; - } - -} diff --git a/fe/src/main/java/org/apache/doris/common/proc/SchemaChangeProcNode.java b/fe/src/main/java/org/apache/doris/common/proc/SchemaChangeProcNode.java index 3ec874aa8c9c01..a5806beed5c477 100644 --- a/fe/src/main/java/org/apache/doris/common/proc/SchemaChangeProcNode.java +++ b/fe/src/main/java/org/apache/doris/common/proc/SchemaChangeProcNode.java @@ -30,8 +30,8 @@ public class SchemaChangeProcNode implements ProcNodeInterface { public static final ImmutableList TITLE_NAMES = new ImmutableList.Builder() .add("JobId").add("TableName").add("CreateTime").add("FinishTime") - .add("IndexName").add("IndexId").add("SchemaVersion").add("IndexState") - .add("TransactionId").add("State").add("Progress").add("Msg") + .add("IndexName").add("IndexId").add("OriginIndexId").add("SchemaVersion") + .add("TransactionId").add("State").add("Msg").add("Progress").add("Timeout") .build(); private SchemaChangeHandler schemaChangeHandler; diff --git a/fe/src/main/java/org/apache/doris/common/proc/StatisticProcDir.java b/fe/src/main/java/org/apache/doris/common/proc/StatisticProcDir.java index da46a36c0d36fa..3e320e17180d98 100644 --- a/fe/src/main/java/org/apache/doris/common/proc/StatisticProcDir.java +++ b/fe/src/main/java/org/apache/doris/common/proc/StatisticProcDir.java @@ -25,6 +25,7 @@ import org.apache.doris.catalog.Table; import org.apache.doris.catalog.Table.TableType; import org.apache.doris.catalog.Tablet; +import org.apache.doris.catalog.MaterializedIndex.IndexExtState; import org.apache.doris.catalog.Tablet.TabletStatus; import org.apache.doris.clone.TabletSchedCtx.Priority; import org.apache.doris.common.AnalysisException; @@ -117,7 +118,7 @@ public ProcResult fetchResult() throws AnalysisException { for (Partition partition : olapTable.getPartitions()) { short replicationNum = olapTable.getPartitionInfo().getReplicationNum(partition.getId()); ++dbPartitionNum; - for (MaterializedIndex materializedIndex : partition.getMaterializedIndices()) { + for (MaterializedIndex materializedIndex : partition.getMaterializedIndices(IndexExtState.VISIBLE)) { ++dbIndexNum; for (Tablet tablet : materializedIndex.getTablets()) { ++dbTabletNum; diff --git a/fe/src/main/java/org/apache/doris/common/util/PropertyAnalyzer.java b/fe/src/main/java/org/apache/doris/common/util/PropertyAnalyzer.java index caa8e6d705bf2a..4dcb15798baa50 100644 --- a/fe/src/main/java/org/apache/doris/common/util/PropertyAnalyzer.java +++ b/fe/src/main/java/org/apache/doris/common/util/PropertyAnalyzer.java @@ -68,6 +68,8 @@ public class PropertyAnalyzer { public static final String PROPERTIES_COLOCATE_WITH = "colocate_with"; + public static final String PROPERTIES_TIMEOUT = "timeout"; + public static DataProperty analyzeDataProperty(Map properties, DataProperty oldDataProperty) throws AnalysisException { DataProperty dataProperty = oldDataProperty; @@ -353,4 +355,18 @@ public static String analyzeColocate(Map properties) throws Anal } return colocateGroup; } + + public static long analyzeTimeout(Map properties, long defaultTimeout) throws AnalysisException { + long timeout = defaultTimeout; + if (properties != null && properties.containsKey(PROPERTIES_TIMEOUT)) { + String timeoutStr = properties.get(PROPERTIES_TIMEOUT); + try { + timeout = Long.valueOf(timeoutStr); + } catch (NumberFormatException e) { + throw new AnalysisException("Invalid timeout format: " + timeoutStr); + } + properties.remove(PROPERTIES_TIMEOUT); + } + return timeout; + } } diff --git a/fe/src/main/java/org/apache/doris/common/util/TimeUtils.java b/fe/src/main/java/org/apache/doris/common/util/TimeUtils.java index 4e0c41df71d82b..571ca2b7a5b911 100644 --- a/fe/src/main/java/org/apache/doris/common/util/TimeUtils.java +++ b/fe/src/main/java/org/apache/doris/common/util/TimeUtils.java @@ -20,15 +20,15 @@ import org.apache.doris.catalog.PrimitiveType; import org.apache.doris.catalog.Type; import org.apache.doris.common.AnalysisException; - -import com.google.common.base.Preconditions; -import com.google.common.collect.ImmutableMap; - import org.apache.doris.common.DdlException; import org.apache.doris.common.ErrorCode; import org.apache.doris.common.ErrorReport; import org.apache.doris.qe.ConnectContext; import org.apache.doris.qe.VariableMgr; + +import com.google.common.base.Preconditions; +import com.google.common.collect.ImmutableMap; + import org.apache.logging.log4j.LogManager; import org.apache.logging.log4j.Logger; @@ -48,10 +48,12 @@ public class TimeUtils { private static final Logger LOG = LogManager.getLogger(TimeUtils.class); + public static final String DEFAULT_TIME_ZONE = "Asia/Shanghai"; + private static final TimeZone TIME_ZONE; // set CST to +08:00 instead of America/Chicago - public static final ImmutableMap timeZoneAliasMap = ImmutableMap.of("CST", "Asia/Shanghai"); + public static final ImmutableMap timeZoneAliasMap = ImmutableMap.of("CST", DEFAULT_TIME_ZONE); // NOTICE: Date formats are not synchronized. // it must be used as synchronized externally. @@ -79,8 +81,6 @@ public class TimeUtils { public static int MIN_TIME; public static int MAX_TIME; - public static String DEFAULT_TIME_ZONE = "Asia/Shanghai"; - static { TIME_ZONE = new SimpleTimeZone(8 * 3600 * 1000, ""); diff --git a/fe/src/main/java/org/apache/doris/consistency/ConsistencyChecker.java b/fe/src/main/java/org/apache/doris/consistency/ConsistencyChecker.java index 3e329a5cc531ff..fbd727b4d66815 100644 --- a/fe/src/main/java/org/apache/doris/consistency/ConsistencyChecker.java +++ b/fe/src/main/java/org/apache/doris/consistency/ConsistencyChecker.java @@ -20,6 +20,7 @@ import org.apache.doris.catalog.Catalog; import org.apache.doris.catalog.Database; import org.apache.doris.catalog.MaterializedIndex; +import org.apache.doris.catalog.MaterializedIndex.IndexExtState; import org.apache.doris.catalog.MetaObject; import org.apache.doris.catalog.OlapTable; import org.apache.doris.catalog.Partition; @@ -304,7 +305,7 @@ private long chooseTablet() { // sort materializedIndices Queue indexQueue = new PriorityQueue(1, COMPARATOR); - for (MaterializedIndex index : partition.getMaterializedIndices()) { + for (MaterializedIndex index : partition.getMaterializedIndices(IndexExtState.VISIBLE)) { indexQueue.add(index); } diff --git a/fe/src/main/java/org/apache/doris/http/rest/RowCountAction.java b/fe/src/main/java/org/apache/doris/http/rest/RowCountAction.java index 54a420b442d3e3..0ed9bc7e8a0a87 100644 --- a/fe/src/main/java/org/apache/doris/http/rest/RowCountAction.java +++ b/fe/src/main/java/org/apache/doris/http/rest/RowCountAction.java @@ -20,6 +20,7 @@ import org.apache.doris.catalog.Catalog; import org.apache.doris.catalog.Database; import org.apache.doris.catalog.MaterializedIndex; +import org.apache.doris.catalog.MaterializedIndex.IndexExtState; import org.apache.doris.catalog.OlapTable; import org.apache.doris.catalog.Partition; import org.apache.doris.catalog.Replica; @@ -92,12 +93,12 @@ public void execute(BaseRequest request, BaseResponse response) throws DdlExcept for (Partition partition : olapTable.getPartitions()) { long version = partition.getVisibleVersion(); long versionHash = partition.getVisibleVersionHash(); - for (MaterializedIndex index : partition.getMaterializedIndices()) { + for (MaterializedIndex index : partition.getMaterializedIndices(IndexExtState.VISIBLE)) { long indexRowCount = 0L; for (Tablet tablet : index.getTablets()) { long tabletRowCount = 0L; for (Replica replica : tablet.getReplicas()) { - if (replica.checkVersionCatchUp(version, versionHash) + if (replica.checkVersionCatchUp(version, versionHash, false) && replica.getRowCount() > tabletRowCount) { tabletRowCount = replica.getRowCount(); } diff --git a/fe/src/main/java/org/apache/doris/http/rest/ShowMetaInfoAction.java b/fe/src/main/java/org/apache/doris/http/rest/ShowMetaInfoAction.java index 9ff93a253a8418..28adf4ef65369a 100644 --- a/fe/src/main/java/org/apache/doris/http/rest/ShowMetaInfoAction.java +++ b/fe/src/main/java/org/apache/doris/http/rest/ShowMetaInfoAction.java @@ -20,6 +20,7 @@ import org.apache.doris.catalog.Catalog; import org.apache.doris.catalog.Database; import org.apache.doris.catalog.MaterializedIndex; +import org.apache.doris.catalog.MaterializedIndex.IndexExtState; import org.apache.doris.catalog.OlapTable; import org.apache.doris.catalog.Partition; import org.apache.doris.catalog.Replica; @@ -34,9 +35,8 @@ import org.apache.doris.http.BaseResponse; import org.apache.doris.http.IllegalArgException; import org.apache.doris.persist.Storage; -import com.google.gson.Gson; -import io.netty.handler.codec.http.HttpMethod; +import com.google.gson.Gson; import org.apache.commons.lang.StringUtils; import org.apache.logging.log4j.LogManager; @@ -49,6 +49,8 @@ import java.util.List; import java.util.Map; +import io.netty.handler.codec.http.HttpMethod; + public class ShowMetaInfoAction extends RestBaseAction { private enum Action { SHOW_DB_SIZE, @@ -164,7 +166,7 @@ public Map getDataSize() { long tableSize = 0; for (Partition partition : olapTable.getPartitions()) { long partitionSize = 0; - for (MaterializedIndex mIndex : partition.getMaterializedIndices()) { + for (MaterializedIndex mIndex : partition.getMaterializedIndices(IndexExtState.VISIBLE)) { long indexSize = 0; for (Tablet tablet : mIndex.getTablets()) { long maxReplicaSize = 0; diff --git a/fe/src/main/java/org/apache/doris/journal/JournalEntity.java b/fe/src/main/java/org/apache/doris/journal/JournalEntity.java index a28ebfb7b38ab6..06a8515b4f5861 100644 --- a/fe/src/main/java/org/apache/doris/journal/JournalEntity.java +++ b/fe/src/main/java/org/apache/doris/journal/JournalEntity.java @@ -18,6 +18,7 @@ package org.apache.doris.journal; import org.apache.doris.alter.AlterJob; +import org.apache.doris.alter.AlterJobV2; import org.apache.doris.analysis.UserIdentity; import org.apache.doris.backup.BackupJob; import org.apache.doris.backup.Repository; @@ -411,6 +412,11 @@ public void readFields(DataInput in) throws IOException { needRead = false; break; } + case OperationType.OP_ALTER_JOB_V2: { + data = AlterJobV2.read(in); + needRead = false; + break; + } default: { IOException e = new IOException(); LOG.error("UNKNOWN Operation Type {}", opCode, e); diff --git a/fe/src/main/java/org/apache/doris/load/BrokerFileGroup.java b/fe/src/main/java/org/apache/doris/load/BrokerFileGroup.java index bf69f4a76cdaed..bc1b62386c8f3e 100644 --- a/fe/src/main/java/org/apache/doris/load/BrokerFileGroup.java +++ b/fe/src/main/java/org/apache/doris/load/BrokerFileGroup.java @@ -21,15 +21,20 @@ import org.apache.doris.analysis.DataDescription; import org.apache.doris.analysis.Expr; import org.apache.doris.analysis.ImportColumnDesc; +import org.apache.doris.catalog.AggregateType; import org.apache.doris.catalog.BrokerTable; import org.apache.doris.catalog.Catalog; +import org.apache.doris.catalog.Column; import org.apache.doris.catalog.Database; +import org.apache.doris.catalog.KeysType; import org.apache.doris.catalog.OlapTable; +import org.apache.doris.catalog.OlapTable.OlapTableState; import org.apache.doris.catalog.Partition; import org.apache.doris.catalog.Table; import org.apache.doris.common.AnalysisException; import org.apache.doris.common.DdlException; import org.apache.doris.common.FeMetaVersion; +import org.apache.doris.common.Pair; import org.apache.doris.common.io.Text; import org.apache.doris.common.io.Writable; @@ -53,25 +58,22 @@ public class BrokerFileGroup implements Writable { private static final Logger LOG = LogManager.getLogger(BrokerFileGroup.class); - // input - private DataDescription dataDescription; - private long tableId; private String valueSeparator; private String lineDelimiter; // fileFormat may be null, which means format will be decided by file's suffix private String fileFormat; - private List columnsFromPath; private boolean isNegative; private List partitionIds; - // this is a compatible param which only happens before the function of broker has been supported. - private List fileFieldNames; private List filePaths; - // this is a compatible param which only happens before the function of broker has been supported. - private Map exprColumnMap; + private List fileFieldNames; + private List columnsFromPath; + // columnExprList includes all fileFieldNames, columnsFromPath and column mappings // this param will be recreated by data desc when the log replay private List columnExprList; + // this is only for hadoop function check + private Map>> columnToHadoopFunction; // Used for recovery from edit log private BrokerFileGroup() { @@ -88,23 +90,23 @@ public BrokerFileGroup(BrokerTable table) throws AnalysisException { } public BrokerFileGroup(DataDescription dataDescription) { - this.dataDescription = dataDescription; + this.fileFieldNames = dataDescription.getFileFieldNames(); this.columnsFromPath = dataDescription.getColumnsFromPath(); - this.exprColumnMap = null; this.columnExprList = dataDescription.getParsedColumnExprList(); + this.columnToHadoopFunction = dataDescription.getColumnToHadoopFunction(); } // NOTE: DBLock will be held // This will parse the input DataDescription to list for BrokerFileInfo - public void parse(Database db) throws DdlException { + public void parse(Database db, DataDescription dataDescription) throws DdlException { // tableId Table table = db.getTable(dataDescription.getTableName()); if (table == null) { - throw new DdlException("Unknown table(" + dataDescription.getTableName() - + ") in database(" + db.getFullName() + ")"); + throw new DdlException("Unknown table " + dataDescription.getTableName() + + " in database " + db.getFullName()); } if (!(table instanceof OlapTable)) { - throw new DdlException("Table(" + table.getName() + ") is not OlapTable"); + throw new DdlException("Table " + table.getName() + " is not OlapTable"); } OlapTable olapTable = (OlapTable) table; tableId = table.getId(); @@ -118,13 +120,29 @@ public void parse(Database db) throws DdlException { for (String pName : dataDescription.getPartitionNames()) { Partition partition = olapTable.getPartition(pName); if (partition == null) { - throw new DdlException("Unknown partition(" + pName + ") in table(" - + table.getName() + ")"); + throw new DdlException("Unknown partition" + pName + " in table" + table.getName()); } partitionIds.add(partition.getId()); } } + if (olapTable.getState() == OlapTableState.RESTORE) { + throw new DdlException("Table [" + table.getName() + "] is under restore"); + } + + if (olapTable.getKeysType() != KeysType.AGG_KEYS && dataDescription.isNegative()) { + throw new DdlException("Load for AGG_KEYS table should not specify NEGATIVE"); + } + + // check negative for sum aggregate type + if (dataDescription.isNegative()) { + for (Column column : table.getBaseSchema()) { + if (!column.isKey() && column.getAggregationType() != AggregateType.SUM) { + throw new DdlException("Column is not SUM AggreateType. column:" + column.getName()); + } + } + } + // column valueSeparator = dataDescription.getColumnSeparator(); if (valueSeparator == null) { @@ -138,7 +156,7 @@ public void parse(Database db) throws DdlException { fileFormat = dataDescription.getFileFormat(); if (fileFormat != null) { if (!fileFormat.toLowerCase().equals("parquet") && !fileFormat.toLowerCase().equals("csv")) { - throw new DdlException("File Format Type("+fileFormat+") Is Invalid. Only support 'csv' or 'parquet'"); + throw new DdlException("File Format Type "+fileFormat+" is invalid. Only support 'csv' or 'parquet'"); } } isNegative = dataDescription.isNegative(); @@ -163,10 +181,6 @@ public String getFileFormat() { return fileFormat; } - public List getColumnsFromPath() { - return columnsFromPath; - } - public boolean isNegative() { return isNegative; } @@ -179,10 +193,18 @@ public List getFilePaths() { return filePaths; } + public List getColumnsFromPath() { + return columnsFromPath; + } + public List getColumnExprList() { return columnExprList; } + public Map>> getColumnToHadoopFunction() { + return columnToHadoopFunction; + } + @Override public String toString() { StringBuilder sb = new StringBuilder(); @@ -273,16 +295,8 @@ public void write(DataOutput out) throws IOException { Text.writeString(out, path); } // expr column map will be null after broker load supports function - if (exprColumnMap == null) { - out.writeInt(0); - } else { - int size = exprColumnMap.size(); - out.writeInt(size); - for (Map.Entry entry : exprColumnMap.entrySet()) { - Text.writeString(out, entry.getKey()); - Expr.writeTo(entry.getValue(), out); - } - } + out.writeInt(0); + // fileFormat if (fileFormat == null) { out.writeBoolean(false); @@ -327,14 +341,12 @@ public void readFields(DataInput in) throws IOException { } } // expr column map + Map exprColumnMap = Maps.newHashMap(); { int size = in.readInt(); - if (size > 0) { - exprColumnMap = Maps.newHashMap(); - for (int i = 0; i < size; ++i) { - final String name = Text.readString(in); - exprColumnMap.put(name, Expr.readIn(in)); - } + for (int i = 0; i < size; ++i) { + final String name = Text.readString(in); + exprColumnMap.put(name, Expr.readIn(in)); } } // file format diff --git a/fe/src/main/java/org/apache/doris/load/ExportJob.java b/fe/src/main/java/org/apache/doris/load/ExportJob.java index 6207ae03ed610c..fbfd965c120aca 100644 --- a/fe/src/main/java/org/apache/doris/load/ExportJob.java +++ b/fe/src/main/java/org/apache/doris/load/ExportJob.java @@ -352,7 +352,8 @@ private void genCoordinators(List fragments, List nodes) ScanNode scanNode = nodes.get(i); TUniqueId queryId = new TUniqueId(uuid.getMostSignificantBits() + i, uuid.getLeastSignificantBits()); Coordinator coord = new Coordinator( - id, queryId, desc, Lists.newArrayList(fragment), Lists.newArrayList(scanNode), clusterName); + id, queryId, desc, Lists.newArrayList(fragment), Lists.newArrayList(scanNode), clusterName, + TimeUtils.DEFAULT_TIME_ZONE); coord.setExecMemoryLimit(getExecMemLimit()); this.coordList.add(coord); } diff --git a/fe/src/main/java/org/apache/doris/load/ExportMgr.java b/fe/src/main/java/org/apache/doris/load/ExportMgr.java index 0de207c53369b0..08133298c8d047 100644 --- a/fe/src/main/java/org/apache/doris/load/ExportMgr.java +++ b/fe/src/main/java/org/apache/doris/load/ExportMgr.java @@ -277,7 +277,7 @@ public void replayUpdateJobState(long jobId, ExportJob.JobState newState) { } } - public Integer getJobNum(ExportJob.JobState state, long dbId) { + public long getJobNum(ExportJob.JobState state, long dbId) { int size = 0; readLock(); try { diff --git a/fe/src/main/java/org/apache/doris/load/Load.java b/fe/src/main/java/org/apache/doris/load/Load.java index 6daeaf87b09e72..f7f238920ba585 100644 --- a/fe/src/main/java/org/apache/doris/load/Load.java +++ b/fe/src/main/java/org/apache/doris/load/Load.java @@ -17,19 +17,29 @@ package org.apache.doris.load; +import org.apache.doris.alter.SchemaChangeHandler; +import org.apache.doris.analysis.Analyzer; import org.apache.doris.analysis.BinaryPredicate; import org.apache.doris.analysis.CancelLoadStmt; import org.apache.doris.analysis.ColumnSeparator; import org.apache.doris.analysis.DataDescription; import org.apache.doris.analysis.DeleteStmt; import org.apache.doris.analysis.Expr; +import org.apache.doris.analysis.ExprSubstitutionMap; +import org.apache.doris.analysis.FunctionCallExpr; +import org.apache.doris.analysis.FunctionName; +import org.apache.doris.analysis.FunctionParams; import org.apache.doris.analysis.ImportColumnDesc; import org.apache.doris.analysis.IsNullPredicate; import org.apache.doris.analysis.LabelName; import org.apache.doris.analysis.LiteralExpr; import org.apache.doris.analysis.LoadStmt; +import org.apache.doris.analysis.NullLiteral; import org.apache.doris.analysis.Predicate; +import org.apache.doris.analysis.SlotDescriptor; import org.apache.doris.analysis.SlotRef; +import org.apache.doris.analysis.StringLiteral; +import org.apache.doris.analysis.TupleDescriptor; import org.apache.doris.backup.BlobStorage; import org.apache.doris.backup.Status; import org.apache.doris.catalog.AggregateType; @@ -38,6 +48,7 @@ import org.apache.doris.catalog.Database; import org.apache.doris.catalog.KeysType; import org.apache.doris.catalog.MaterializedIndex; +import org.apache.doris.catalog.MaterializedIndex.IndexExtState; import org.apache.doris.catalog.OlapTable; import org.apache.doris.catalog.OlapTable.OlapTableState; import org.apache.doris.catalog.Partition; @@ -45,6 +56,7 @@ import org.apache.doris.catalog.PartitionType; import org.apache.doris.catalog.PrimitiveType; import org.apache.doris.catalog.Replica; +import org.apache.doris.catalog.ScalarType; import org.apache.doris.catalog.Table; import org.apache.doris.catalog.Table.TableType; import org.apache.doris.catalog.Tablet; @@ -61,12 +73,11 @@ import org.apache.doris.common.FeNameFormat; import org.apache.doris.common.LabelAlreadyUsedException; import org.apache.doris.common.LoadException; -import org.apache.doris.common.MarkedCountDownLatch; import org.apache.doris.common.MetaNotFoundException; import org.apache.doris.common.Pair; +import org.apache.doris.common.UserException; import org.apache.doris.common.util.ListComparator; import org.apache.doris.common.util.TimeUtils; -import org.apache.doris.common.util.Util; import org.apache.doris.load.AsyncDeleteJob.DeleteState; import org.apache.doris.load.FailMsg.CancelType; import org.apache.doris.load.LoadJob.JobState; @@ -76,17 +87,14 @@ import org.apache.doris.qe.ConnectContext; import org.apache.doris.service.FrontendOptions; import org.apache.doris.system.Backend; -import org.apache.doris.task.AgentBatchTask; import org.apache.doris.task.AgentClient; -import org.apache.doris.task.AgentTask; -import org.apache.doris.task.AgentTaskExecutor; import org.apache.doris.task.AgentTaskQueue; import org.apache.doris.task.PushTask; +import org.apache.doris.thrift.TBrokerScanRangeParams; import org.apache.doris.thrift.TEtlState; import org.apache.doris.thrift.TMiniLoadRequest; import org.apache.doris.thrift.TNetworkAddress; import org.apache.doris.thrift.TPriority; -import org.apache.doris.thrift.TPushType; import org.apache.doris.transaction.PartitionCommitInfo; import org.apache.doris.transaction.TableCommitInfo; import org.apache.doris.transaction.TransactionState; @@ -117,7 +125,6 @@ import java.util.Map.Entry; import java.util.Set; import java.util.UUID; -import java.util.concurrent.TimeUnit; import java.util.concurrent.locks.ReentrantReadWriteLock; public class Load { @@ -239,7 +246,8 @@ private void writeUnlock() { // return true if we truly add the load job // return false otherwise (eg: a retry request) - public boolean addLoadJob(TMiniLoadRequest request) throws DdlException { + @Deprecated + public boolean addMiniLoadJob(TMiniLoadRequest request) throws DdlException { // get params String fullDbName = request.getDb(); String tableName = request.getTbl(); @@ -302,8 +310,7 @@ public boolean addLoadJob(TMiniLoadRequest request) throws DdlException { } DataDescription dataDescription = new DataDescription(tableName, partitionNames, filePaths, - columnNames, - columnSeparator, formatType, false, null); + columnNames, columnSeparator, formatType, false, null); dataDescription.setLineDelimiter(lineDelimiter); dataDescription.setBeAddr(beAddr); // parse hll param pair @@ -451,12 +458,7 @@ private LoadJob createLoadJob(LoadStmt stmt, EtlJobType etlJobType, } if (properties.containsKey(LoadStmt.LOAD_DELETE_FLAG_PROPERTY)) { - String flag = properties.get(LoadStmt.LOAD_DELETE_FLAG_PROPERTY); - if (flag.equalsIgnoreCase("true") || flag.equalsIgnoreCase("false")) { - job.setDeleteFlag(Boolean.parseBoolean(flag)); - } else { - throw new DdlException("Value of delete flag is invalid"); - } + throw new DdlException("Do not support load_delete_flag"); } if (properties.containsKey(LoadStmt.EXEC_MEM_LIMIT)) { @@ -474,7 +476,7 @@ private LoadJob createLoadJob(LoadStmt stmt, EtlJobType etlJobType, Map>> tableToPartitionSources = Maps.newHashMap(); for (DataDescription dataDescription : dataDescriptions) { // create source - checkAndCreateSource(db, dataDescription, tableToPartitionSources, job.getDeleteFlag(), etlJobType); + checkAndCreateSource(db, dataDescription, tableToPartitionSources, etlJobType); job.addTableName(dataDescription.getTableName()); } for (Entry>> tableEntry : tableToPartitionSources.entrySet()) { @@ -494,11 +496,11 @@ private LoadJob createLoadJob(LoadStmt stmt, EtlJobType etlJobType, PullLoadSourceInfo sourceInfo = new PullLoadSourceInfo(); for (DataDescription dataDescription : dataDescriptions) { BrokerFileGroup fileGroup = new BrokerFileGroup(dataDescription); - fileGroup.parse(db); + fileGroup.parse(db, dataDescription); sourceInfo.addFileGroup(fileGroup); } job.setPullLoadSourceInfo(sourceInfo); - LOG.info("Source info is {}", sourceInfo); + LOG.info("source info is {}", sourceInfo); } if (etlJobType == EtlJobType.MINI) { @@ -601,10 +603,11 @@ private LoadJob createLoadJob(LoadStmt stmt, EtlJobType etlJobType, return job; } + /* + * This is only used for hadoop load + */ public static void checkAndCreateSource(Database db, DataDescription dataDescription, - Map>> tableToPartitionSources, - boolean deleteFlag, EtlJobType jobType) - throws DdlException { + Map>> tableToPartitionSources, EtlJobType jobType) throws DdlException { Source source = new Source(dataDescription.getFilePaths()); long tableId = -1; Set sourcePartitionIds = Sets.newHashSet(); @@ -643,34 +646,30 @@ public static void checkAndCreateSource(Database db, DataDescription dataDescrip throw new DdlException("Load for AGG_KEYS table should not specify NEGATIVE"); } - if (((OlapTable) table).getKeysType() != KeysType.UNIQUE_KEYS && deleteFlag) { - throw new DdlException("Delete flag can only be used for UNIQUE_KEYS table"); - } - // get table schema - List tableSchema = table.getBaseSchema(); - Map nameToTableColumn = Maps.newTreeMap(String.CASE_INSENSITIVE_ORDER); - for (Column column : tableSchema) { - nameToTableColumn.put(column.getName(), column); - } + List baseSchema = table.getBaseSchema(); + // fill the column info if user does not specify them + dataDescription.fillColumnInfoIfNotSpecified(baseSchema); + + // source columns List columnNames = Lists.newArrayList(); List assignColumnNames = Lists.newArrayList(); - if (dataDescription.getColumnNames() != null) { - assignColumnNames.addAll(dataDescription.getColumnNames()); + if (dataDescription.getFileFieldNames() != null) { + assignColumnNames.addAll(dataDescription.getFileFieldNames()); if (dataDescription.getColumnsFromPath() != null) { assignColumnNames.addAll(dataDescription.getColumnsFromPath()); } } if (assignColumnNames.isEmpty()) { // use table columns - for (Column column : tableSchema) { + for (Column column : baseSchema) { columnNames.add(column.getName()); } } else { // convert column to schema format for (String assignCol : assignColumnNames) { - if (nameToTableColumn.containsKey(assignCol)) { - columnNames.add(nameToTableColumn.get(assignCol).getName()); + if (table.getColumn(assignCol) != null) { + columnNames.add(table.getColumn(assignCol).getName()); } else { columnNames.add(assignCol); } @@ -685,7 +684,7 @@ public static void checkAndCreateSource(Database db, DataDescription dataDescrip for (ImportColumnDesc importColumnDesc : parsedColumnExprList) { parsedColumnExprMap.put(importColumnDesc.getColumnName(), importColumnDesc.getExpr()); } - for (Column column : tableSchema) { + for (Column column : baseSchema) { String columnName = column.getName(); if (columnNames.contains(columnName)) { continue; @@ -704,7 +703,7 @@ public static void checkAndCreateSource(Database db, DataDescription dataDescrip // check negative for sum aggregate type if (dataDescription.isNegative()) { - for (Column column : tableSchema) { + for (Column column : baseSchema) { if (!column.isKey() && column.getAggregationType() != AggregateType.SUM) { throw new DdlException("Column is not SUM AggreateType. column:" + column.getName()); } @@ -712,29 +711,83 @@ public static void checkAndCreateSource(Database db, DataDescription dataDescrip } // check hll - for (Column column : tableSchema) { + for (Column column : baseSchema) { if (column.getDataType() == PrimitiveType.HLL) { if (columnToHadoopFunction != null && !columnToHadoopFunction.containsKey(column.getName())) { throw new DdlException("Hll column is not assigned. column:" + column.getName()); } } } + // check mapping column exist in table // check function // convert mapping column and func arg columns to schema format + + // When doing schema change, there may have some 'shadow' columns, with prefix '__doris_shadow_' in + // their names. These columns are invisible to user, but we need to generate data for these columns. + // So we add column mappings for these column. + // eg1: + // base schema is (A, B, C), and B is under schema change, so there will be a shadow column: '__doris_shadow_B' + // So the final column mapping should looks like: (A, B, C, __doris_shadow_B = substitute(B)); + for (Column column : table.getFullSchema()) { + if (column.isNameWithPrefix(SchemaChangeHandler.SHADOW_NAME_PRFIX)) { + String originCol = column.getNameWithoutPrefix(SchemaChangeHandler.SHADOW_NAME_PRFIX); + if (parsedColumnExprMap.containsKey(originCol)) { + Expr mappingExpr = parsedColumnExprMap.get(originCol); + if (mappingExpr != null) { + /* + * eg: + * (A, C) SET (B = func(xx)) + * -> + * (A, C) SET (B = func(xx), __doris_shadow_B = func(xxx)) + */ + if (columnToHadoopFunction.containsKey(originCol)) { + columnToHadoopFunction.put(column.getName(), columnToHadoopFunction.get(originCol)); + } + ImportColumnDesc importColumnDesc = new ImportColumnDesc(column.getName(), mappingExpr); + parsedColumnExprList.add(importColumnDesc); + } else { + /* + * eg: + * (A, B, C) + * -> + * (A, B, C) SET (__doris_shadow_B = substitute(B)) + */ + columnToHadoopFunction.put(column.getName(), Pair.create("substitute", Lists.newArrayList(originCol))); + ImportColumnDesc importColumnDesc = new ImportColumnDesc(column.getName(), new SlotRef(null, originCol)); + parsedColumnExprList.add(importColumnDesc); + } + } else { + /* + * There is a case that if user does not specify the related origin column, eg: + * COLUMNS (A, C), and B is not specified, but B is being modified so there is a shadow column '__doris_shadow_B'. + * We can not just add a mapping function "__doris_shadow_B = substitute(B)", because Doris can not find column B. + * In this case, __doris_shadow_B can use its default value, so no need to add it to column mapping + */ + // do nothing + } + + } + } + + LOG.debug("after add shadow column. parsedColumnExprList: {}, columnToHadoopFunction: {}", + parsedColumnExprList, columnToHadoopFunction); + Map columnNameMap = Maps.newTreeMap(String.CASE_INSENSITIVE_ORDER); for (String columnName : columnNames) { columnNameMap.put(columnName, columnName); } + + // validate hadoop functions if (columnToHadoopFunction != null) { columnToFunction = Maps.newHashMap(); for (Entry>> entry : columnToHadoopFunction.entrySet()) { String mappingColumnName = entry.getKey(); - if (!nameToTableColumn.containsKey(mappingColumnName)) { + Column mappingColumn = table.getColumn(mappingColumnName); + if (mappingColumn == null) { throw new DdlException("Mapping column is not in table. column: " + mappingColumnName); } - Column mappingColumn = nameToTableColumn.get(mappingColumnName); Pair> function = entry.getValue(); try { DataDescription.validateMappingFunction(function.first, function.second, columnNameMap, @@ -807,6 +860,330 @@ public static void checkAndCreateSource(Database db, DataDescription dataDescrip } } + /* + * This function will do followings: + * 1. fill the column exprs if user does not specify any column or column mapping. + * 2. For not specified columns, check if they have default value. + * 3. Add any shadow columns if have. + * 4. validate hadoop functions + * 5. init slot descs and expr map for load plan + * + * This function should be used for broker load v2 and stream load. + * And it must be called in same db lock when planing. + */ + public static void initColumns(Table tbl, List columnExprs, + Map>> columnToHadoopFunction, + Map exprsByName, Analyzer analyzer, TupleDescriptor srcTupleDesc, + Map slotDescByName, TBrokerScanRangeParams params) throws UserException { + // If user does not specify the file field names, generate it by using base schema of table. + // So that the following process can be unified + boolean specifyFileFieldNames = columnExprs.stream().anyMatch(p -> p.isColumn()); + if (!specifyFileFieldNames) { + List columns = tbl.getBaseSchema(); + for (Column column : columns) { + ImportColumnDesc columnDesc = new ImportColumnDesc(column.getName()); + LOG.debug("add base column {} to stream load task", column.getName()); + columnExprs.add(columnDesc); + } + } + // generate a map for checking easily + Map columnExprMap = Maps.newTreeMap(String.CASE_INSENSITIVE_ORDER); + for (ImportColumnDesc importColumnDesc : columnExprs) { + columnExprMap.put(importColumnDesc.getColumnName(), importColumnDesc.getExpr()); + } + + // check default value + for (Column column : tbl.getBaseSchema()) { + String columnName = column.getName(); + if (columnExprMap.containsKey(columnName)) { + continue; + } + if (column.getDefaultValue() != null || column.isAllowNull()) { + continue; + } + throw new DdlException("Column has no default value. column: " + columnName); + } + + // When doing schema change, there may have some 'shadow' columns, with prefix '__doris_shadow_' in + // their names. These columns are invisible to user, but we need to generate data for these columns. + // So we add column mappings for these column. + // eg1: + // base schema is (A, B, C), and B is under schema change, so there will be a shadow column: '__doris_shadow_B' + // So the final column mapping should looks like: (A, B, C, __doris_shadow_B = substitute(B)); + for (Column column : tbl.getFullSchema()) { + if (!column.isNameWithPrefix(SchemaChangeHandler.SHADOW_NAME_PRFIX)) { + continue; + } + + String originCol = column.getNameWithoutPrefix(SchemaChangeHandler.SHADOW_NAME_PRFIX); + if (columnExprMap.containsKey(originCol)) { + Expr mappingExpr = columnExprMap.get(originCol); + if (mappingExpr != null) { + /* + * eg: + * (A, C) SET (B = func(xx)) + * -> + * (A, C) SET (B = func(xx), __doris_shadow_B = func(xxx)) + */ + ImportColumnDesc importColumnDesc = new ImportColumnDesc(column.getName(), mappingExpr); + columnExprs.add(importColumnDesc); + } else { + /* + * eg: + * (A, B, C) + * -> + * (A, B, C) SET (__doris_shadow_B = B) + */ + ImportColumnDesc importColumnDesc = new ImportColumnDesc(column.getName(), + new SlotRef(null, originCol)); + columnExprs.add(importColumnDesc); + } + } else { + /* + * There is a case that if user does not specify the related origin column, eg: + * COLUMNS (A, C), and B is not specified, but B is being modified so there is a shadow column '__doris_shadow_B'. + * We can not just add a mapping function "__doris_shadow_B = substitute(B)", because Doris can not find column B. + * In this case, __doris_shadow_B can use its default value, so no need to add it to column mapping + */ + // do nothing + } + } + + // validate hadoop functions + if (columnToHadoopFunction != null) { + Map columnNameMap = Maps.newTreeMap(String.CASE_INSENSITIVE_ORDER); + for (ImportColumnDesc importColumnDesc : columnExprs) { + if (importColumnDesc.isColumn()) { + columnNameMap.put(importColumnDesc.getColumnName(), importColumnDesc.getColumnName()); + } + } + for (Entry>> entry : columnToHadoopFunction.entrySet()) { + String mappingColumnName = entry.getKey(); + Column mappingColumn = tbl.getColumn(mappingColumnName); + if (mappingColumn == null) { + throw new DdlException("Mapping column is not in table. column: " + mappingColumnName); + } + + Pair> function = entry.getValue(); + try { + DataDescription.validateMappingFunction(function.first, function.second, columnNameMap, + mappingColumn, false); + } catch (AnalysisException e) { + throw new DdlException(e.getMessage()); + } + } + } + + // init slot desc add expr map, also transform hadoop functions + for (ImportColumnDesc importColumnDesc : columnExprs) { + // make column name case match with real column name + String columnName = importColumnDesc.getColumnName(); + String realColName = tbl.getColumn(columnName) == null ? columnName + : tbl.getColumn(columnName).getName(); + if (importColumnDesc.getExpr() != null) { + Expr expr = transformHadoopFunctionExpr(tbl, realColName, importColumnDesc.getExpr()); + exprsByName.put(realColName, expr); + } else { + SlotDescriptor slotDesc = analyzer.getDescTbl().addSlotDescriptor(srcTupleDesc); + slotDesc.setType(ScalarType.createType(PrimitiveType.VARCHAR)); + slotDesc.setIsMaterialized(true); + // ISSUE A: src slot should be nullable even if the column is not nullable. + // because src slot is what we read from file, not represent to real column value. + // If column is not nullable, error will be thrown when filling the dest slot, + // which is not nullable. + slotDesc.setIsNullable(true); + params.addToSrc_slot_ids(slotDesc.getId().asInt()); + slotDescByName.put(realColName, slotDesc); + } + } + LOG.debug("slotDescByName: {}, exprsByName: {}", slotDescByName, exprsByName); + + // analyze all exprs + for (Map.Entry entry : exprsByName.entrySet()) { + ExprSubstitutionMap smap = new ExprSubstitutionMap(); + List slots = Lists.newArrayList(); + entry.getValue().collect(SlotRef.class, slots); + for (SlotRef slot : slots) { + SlotDescriptor slotDesc = slotDescByName.get(slot.getColumnName()); + if (slotDesc == null) { + throw new UserException("unknown reference column, column=" + entry.getKey() + + ", reference=" + slot.getColumnName()); + } + smap.getLhs().add(slot); + smap.getRhs().add(new SlotRef(slotDesc)); + } + Expr expr = entry.getValue().clone(smap); + expr.analyze(analyzer); + + // check if contain aggregation + List funcs = Lists.newArrayList(); + expr.collect(FunctionCallExpr.class, funcs); + for (FunctionCallExpr fn : funcs) { + if (fn.isAggregateFunction()) { + throw new AnalysisException("Don't support aggregation function in load expression"); + } + } + exprsByName.put(entry.getKey(), expr); + } + LOG.debug("after init column, exprMap: {}", exprsByName); + } + + /** + * This method is used to transform hadoop function. + * The hadoop function includes: replace_value, strftime, time_format, alignment_timestamp, default_value, now. + * It rewrites those function with real function name and param. + * For the other function, the expr only go through this function and the origin expr is returned. + * + * @param columnName + * @param originExpr + * @return + * @throws UserException + */ + private static Expr transformHadoopFunctionExpr(Table tbl, String columnName, Expr originExpr) + throws UserException { + Column column = tbl.getColumn(columnName); + if (column == null) { + // the unknown column will be checked later. + return originExpr; + } + + // To compatible with older load version + if (originExpr instanceof FunctionCallExpr) { + FunctionCallExpr funcExpr = (FunctionCallExpr) originExpr; + String funcName = funcExpr.getFnName().getFunction(); + + if (funcName.equalsIgnoreCase("replace_value")) { + List exprs = Lists.newArrayList(); + SlotRef slotRef = new SlotRef(null, columnName); + // We will convert this to IF(`col` != child0, `col`, child1), + // because we need the if return type equal to `col`, we use NE + + /* + * We will convert this based on different cases: + * case 1: k1 = replace_value(null, anyval); + * to: k1 = if (k1 is not null, k1, anyval); + * + * case 2: k1 = replace_value(anyval1, anyval2); + * to: k1 = if (k1 is not null, if(k1 != anyval1, k1, anyval2), null); + */ + if (funcExpr.getChild(0) instanceof NullLiteral) { + // case 1 + exprs.add(new IsNullPredicate(slotRef, true)); + exprs.add(slotRef); + if (funcExpr.hasChild(1)) { + exprs.add(funcExpr.getChild(1)); + } else { + if (column.getDefaultValue() != null) { + exprs.add(new StringLiteral(column.getDefaultValue())); + } else { + if (column.isAllowNull()) { + exprs.add(NullLiteral.create(Type.VARCHAR)); + } else { + throw new UserException("Column(" + columnName + ") has no default value."); + } + } + } + } else { + // case 2 + exprs.add(new IsNullPredicate(slotRef, true)); + List innerIfExprs = Lists.newArrayList(); + innerIfExprs.add(new BinaryPredicate(BinaryPredicate.Operator.NE, slotRef, funcExpr.getChild(0))); + innerIfExprs.add(slotRef); + if (funcExpr.hasChild(1)) { + innerIfExprs.add(funcExpr.getChild(1)); + } else { + if (column.getDefaultValue() != null) { + innerIfExprs.add(new StringLiteral(column.getDefaultValue())); + } else { + if (column.isAllowNull()) { + innerIfExprs.add(NullLiteral.create(Type.VARCHAR)); + } else { + throw new UserException("Column(" + columnName + ") has no default value."); + } + } + } + FunctionCallExpr innerIfFn = new FunctionCallExpr("if", innerIfExprs); + exprs.add(innerIfFn); + exprs.add(NullLiteral.create(Type.VARCHAR)); + } + + LOG.debug("replace_value expr: {}", exprs); + FunctionCallExpr newFn = new FunctionCallExpr("if", exprs); + return newFn; + } else if (funcName.equalsIgnoreCase("strftime")) { + // FROM_UNIXTIME(val) + FunctionName fromUnixName = new FunctionName("FROM_UNIXTIME"); + List fromUnixArgs = Lists.newArrayList(funcExpr.getChild(1)); + FunctionCallExpr fromUnixFunc = new FunctionCallExpr( + fromUnixName, new FunctionParams(false, fromUnixArgs)); + + return fromUnixFunc; + } else if (funcName.equalsIgnoreCase("time_format")) { + // DATE_FORMAT(STR_TO_DATE(dt_str, dt_fmt)) + FunctionName strToDateName = new FunctionName("STR_TO_DATE"); + List strToDateExprs = Lists.newArrayList(funcExpr.getChild(2), funcExpr.getChild(1)); + FunctionCallExpr strToDateFuncExpr = new FunctionCallExpr( + strToDateName, new FunctionParams(false, strToDateExprs)); + + FunctionName dateFormatName = new FunctionName("DATE_FORMAT"); + List dateFormatArgs = Lists.newArrayList(strToDateFuncExpr, funcExpr.getChild(0)); + FunctionCallExpr dateFormatFunc = new FunctionCallExpr( + dateFormatName, new FunctionParams(false, dateFormatArgs)); + + return dateFormatFunc; + } else if (funcName.equalsIgnoreCase("alignment_timestamp")) { + /* + * change to: + * UNIX_TIMESTAMP(DATE_FORMAT(FROM_UNIXTIME(ts), "%Y-01-01 00:00:00")); + * + */ + + // FROM_UNIXTIME + FunctionName fromUnixName = new FunctionName("FROM_UNIXTIME"); + List fromUnixArgs = Lists.newArrayList(funcExpr.getChild(1)); + FunctionCallExpr fromUnixFunc = new FunctionCallExpr( + fromUnixName, new FunctionParams(false, fromUnixArgs)); + + // DATE_FORMAT + StringLiteral precision = (StringLiteral) funcExpr.getChild(0); + StringLiteral format; + if (precision.getStringValue().equalsIgnoreCase("year")) { + format = new StringLiteral("%Y-01-01 00:00:00"); + } else if (precision.getStringValue().equalsIgnoreCase("month")) { + format = new StringLiteral("%Y-%m-01 00:00:00"); + } else if (precision.getStringValue().equalsIgnoreCase("day")) { + format = new StringLiteral("%Y-%m-%d 00:00:00"); + } else if (precision.getStringValue().equalsIgnoreCase("hour")) { + format = new StringLiteral("%Y-%m-%d %H:00:00"); + } else { + throw new UserException("Unknown precision(" + precision.getStringValue() + ")"); + } + FunctionName dateFormatName = new FunctionName("DATE_FORMAT"); + List dateFormatArgs = Lists.newArrayList(fromUnixFunc, format); + FunctionCallExpr dateFormatFunc = new FunctionCallExpr( + dateFormatName, new FunctionParams(false, dateFormatArgs)); + + // UNIX_TIMESTAMP + FunctionName unixTimeName = new FunctionName("UNIX_TIMESTAMP"); + List unixTimeArgs = Lists.newArrayList(); + unixTimeArgs.add(dateFormatFunc); + FunctionCallExpr unixTimeFunc = new FunctionCallExpr( + unixTimeName, new FunctionParams(false, unixTimeArgs)); + + return unixTimeFunc; + } else if (funcName.equalsIgnoreCase("default_value")) { + return funcExpr.getChild(0); + } else if (funcName.equalsIgnoreCase("now")) { + FunctionName nowFunctionName = new FunctionName("NOW"); + FunctionCallExpr newFunc = new FunctionCallExpr(nowFunctionName, new FunctionParams(null)); + return newFunc; + } else if (funcName.equalsIgnoreCase("substitute")) { + return funcExpr.getChild(0); + } + } + return originExpr; + } + public void unprotectAddLoadJob(LoadJob job, boolean isReplay) throws DdlException { long jobId = job.getId(); long dbId = job.getDbId(); @@ -1349,7 +1726,7 @@ public List getLoadJobs(JobState jobState) { return jobs; } - public int getLoadJobNum(JobState jobState, long dbId) { + public long getLoadJobNum(JobState jobState, long dbId) { readLock(); try { List loadJobs = this.dbToLoadJobs.get(dbId); @@ -1640,7 +2017,7 @@ public List> getLoadJobUnfinishedInfo(long jobId) { long versionHash = partitionLoadInfo.getVersionHash(); for (Replica replica : tablet.getReplicas()) { - if (replica.checkVersionCatchUp(version, versionHash)) { + if (replica.checkVersionCatchUp(version, versionHash, false)) { continue; } @@ -1862,8 +2239,8 @@ public void unprotectQuorumLoadJob(LoadJob job, Database db) { partitionLoadInfo.getVersionHash(), jobId); // update table row count - for (MaterializedIndex materializedIndex : partition.getMaterializedIndices()) { - long tableRowCount = 0L; + for (MaterializedIndex materializedIndex : partition.getMaterializedIndices(IndexExtState.ALL)) { + long indexRowCount = 0L; for (Tablet tablet : materializedIndex.getTablets()) { long tabletRowCount = 0L; for (Replica replica : tablet.getReplicas()) { @@ -1872,9 +2249,9 @@ public void unprotectQuorumLoadJob(LoadJob job, Database db) { tabletRowCount = replicaRowCount; } } - tableRowCount += tabletRowCount; + indexRowCount += tabletRowCount; } - materializedIndex.setRowCount(tableRowCount); + materializedIndex.setRowCount(indexRowCount); } // end for indices } // end for partitions } // end for tables @@ -2392,7 +2769,7 @@ private boolean processQuorumFinished(LoadJob job, Database db) { updatePartitionVersion(partition, partitionLoadInfo.getVersion(), partitionLoadInfo.getVersionHash(), jobId); - for (MaterializedIndex materializedIndex : partition.getMaterializedIndices()) { + for (MaterializedIndex materializedIndex : partition.getMaterializedIndices(IndexExtState.ALL)) { long tableRowCount = 0L; for (Tablet tablet : materializedIndex.getTablets()) { long tabletRowCount = 0L; @@ -2751,11 +3128,11 @@ private void checkDeleteV2(OlapTable table, Partition partition, List slotRef.setCol(column.getName()); } Map> indexIdToSchema = table.getIndexIdToSchema(); - for (MaterializedIndex index : partition.getMaterializedIndices()) { + for (MaterializedIndex index : partition.getMaterializedIndices(IndexExtState.VISIBLE)) { // check table has condition column - Map indexNameToColumn = Maps.newTreeMap(String.CASE_INSENSITIVE_ORDER); + Map indexColNameToColumn = Maps.newTreeMap(String.CASE_INSENSITIVE_ORDER); for (Column column : indexIdToSchema.get(index.getId())) { - indexNameToColumn.put(column.getName(), column); + indexColNameToColumn.put(column.getName(), column); } String indexName = table.getIndexNameById(index.getId()); for (Predicate condition : conditions) { @@ -2767,7 +3144,7 @@ private void checkDeleteV2(OlapTable table, Partition partition, List IsNullPredicate isNullPredicate = (IsNullPredicate) condition; columnName = ((SlotRef) isNullPredicate.getChild(0)).getColumnName(); } - Column column = indexNameToColumn.get(columnName); + Column column = indexColNameToColumn.get(columnName); if (column == null) { ErrorReport.reportDdlException(ErrorCode.ERR_BAD_FIELD_ERROR, columnName, indexName); } @@ -2811,221 +3188,6 @@ private void checkDeleteV2(OlapTable table, Partition partition, List } } - private void checkDelete(OlapTable table, Partition partition, List conditions, - long checkVersion, long checkVersionHash, List deleteConditions, - Map> asyncTabletIdToBackends, boolean preCheck) - throws DdlException { - // check partition state - PartitionState state = partition.getState(); - if (state != PartitionState.NORMAL) { - // ErrorReport.reportDdlException(ErrorCode.ERR_BAD_PARTITION_STATE, partition.getName(), state.name()); - throw new DdlException("Partition[" + partition.getName() + "]' state is not NORNAL: " + state.name()); - } - - // check running load job - List quorumFinishedLoadJobs = Lists.newArrayList(); - if (!checkPartitionLoadFinished(partition.getId(), quorumFinishedLoadJobs)) { - // ErrorReport.reportDdlException(ErrorCode.ERR_PARTITION_HAS_LOADING_JOBS, partition.getName()); - throw new DdlException("Partition[" + partition.getName() + "] has unfinished load jobs"); - } - - // get running async delete job - List asyncDeleteJobs = getCopiedAsyncDeleteJobs(); - - // check condition column is key column and condition value - Map nameToColumn = Maps.newTreeMap(String.CASE_INSENSITIVE_ORDER); - for (Column column : table.getBaseSchema()) { - nameToColumn.put(column.getName(), column); - } - for (Predicate condition : conditions) { - SlotRef slotRef = null; - if (condition instanceof BinaryPredicate) { - BinaryPredicate binaryPredicate = (BinaryPredicate) condition; - slotRef = (SlotRef) binaryPredicate.getChild(0); - } else if (condition instanceof IsNullPredicate) { - IsNullPredicate isNullPredicate = (IsNullPredicate) condition; - slotRef = (SlotRef) isNullPredicate.getChild(0); - } - String columnName = slotRef.getColumnName(); - if (!nameToColumn.containsKey(columnName)) { - ErrorReport.reportDdlException(ErrorCode.ERR_BAD_FIELD_ERROR, columnName, table.getName()); - } - - Column column = nameToColumn.get(columnName); - if (!column.isKey()) { - // ErrorReport.reportDdlException(ErrorCode.ERR_NOT_KEY_COLUMN, columnName); - throw new DdlException("Column[" + columnName + "] is not key column"); - } - - if (condition instanceof BinaryPredicate) { - String value = null; - try { - BinaryPredicate binaryPredicate = (BinaryPredicate) condition; - value = ((LiteralExpr) binaryPredicate.getChild(1)).getStringValue(); - LiteralExpr.create(value, Type.fromPrimitiveType(column.getDataType())); - } catch (AnalysisException e) { - // ErrorReport.reportDdlException(ErrorCode.ERR_INVALID_VALUE, value); - throw new DdlException("Invalid column value[" + value + "]"); - } - } - - // set schema column name - slotRef.setCol(column.getName()); - } - - long tableId = table.getId(); - long partitionId = partition.getId(); - Map> indexIdToSchema = table.getIndexIdToSchema(); - for (MaterializedIndex index : partition.getMaterializedIndices()) { - // check table has condition column - Map indexNameToColumn = Maps.newTreeMap(String.CASE_INSENSITIVE_ORDER); - for (Column column : indexIdToSchema.get(index.getId())) { - indexNameToColumn.put(column.getName(), column); - } - String indexName = table.getIndexNameById(index.getId()); - for (Predicate condition : conditions) { - String columnName = null; - if (condition instanceof BinaryPredicate) { - BinaryPredicate binaryPredicate = (BinaryPredicate) condition; - columnName = ((SlotRef) binaryPredicate.getChild(0)).getColumnName(); - } else if (condition instanceof IsNullPredicate) { - IsNullPredicate isNullPredicate = (IsNullPredicate) condition; - columnName = ((SlotRef) isNullPredicate.getChild(0)).getColumnName(); - } - Column column = indexNameToColumn.get(columnName); - if (column == null) { - ErrorReport.reportDdlException(ErrorCode.ERR_BAD_FIELD_ERROR, columnName, indexName); - } - - if (table.getKeysType() == KeysType.DUP_KEYS && !column.isKey()) { - throw new DdlException("Column[" + columnName + "] is not key column in index[" + indexName + "]"); - } - } - - // check replica version and backend alive - short replicationNum = table.getPartitionInfo().getReplicationNum(partition.getId()); - for (Tablet tablet : index.getTablets()) { - Set needAsyncBackendIds = Sets.newHashSet(); - for (Replica replica : tablet.getReplicas()) { - if (!Catalog.getCurrentSystemInfo().checkBackendAvailable(replica.getBackendId())) { - LOG.warn("backend[{}] is not alive when delete check. pre: {}", - replica.getBackendId(), preCheck); - needAsyncBackendIds.add(replica.getBackendId()); - continue; - } - - // check replica version. - // here is a little bit confused. the main idea is - // 1. check if replica catch up the version - // 2. if not catch up and this is pre check, make sure there will be right quorum finished load jobs - // to fill the version gap between 'replica committed version' and 'partition committed version'. - // 3. if not catch up and this is after check - // 1) if diff version == 1, some sync delete task may failed. add async delete task. - // 2) if diff version > 1, make sure there will be right quorum finished load jobs - // to fill the version gap between 'replica committed version' and 'delete version - 1'. - // if ok, add async delete task. - if (!replica.checkVersionCatchUp(checkVersion, checkVersionHash)) { - long replicaVersion = replica.getVersion(); - if (replicaVersion == checkVersion) { - // in this case, version is same but version hash is not. - // which mean the current replica version is a non-committed version. - // so the replica's committed version should be the previous one. - --replicaVersion; - } - - // the *diffVersion* is number of versions need to be check - // for now: - // *replicaVersion* : the 'committed version' of the replica - // *checkVersion* : - // 1) if preCheck, this is partition committed version - // 2) if not preCheck, this is delete version - long diffVersion = checkVersion - replicaVersion; - Preconditions.checkState(diffVersion > 0); - for (int i = 1; i <= diffVersion; i++) { - boolean find = false; - long theVersion = replicaVersion + i; - for (LoadJob loadJob : quorumFinishedLoadJobs) { - if (theVersion == loadJob.getPartitionLoadInfo(tableId, partitionId).getVersion()) { - find = true; - break; - } - } - - for (AsyncDeleteJob deleteJob : asyncDeleteJobs) { - if (tableId == deleteJob.getTableId() && partitionId == deleteJob.getPartitionId() - && theVersion == deleteJob.getPartitionVersion()) { - find = true; - break; - } - } - - if (!find) { - if (theVersion == checkVersion && !preCheck) { - // the sync delete task of this replica may failed. - // add async delete task after. - continue; - } else { - // this should not happend. add log to observe. - LOG.error("replica version does not catch up with version: {}-{}. " - + "replica: {}-{}-{}-{}", - checkVersion, checkVersionHash, replica.getId(), tablet.getId(), - replica.getBackendId(), replica.getState()); - throw new DdlException("Replica[" + tablet.getId() + "-" + replica.getId() - + "] is not catch up with version: " + checkVersion + "-" - + replica.getVersion()); - } - } - } - - needAsyncBackendIds.add(replica.getBackendId()); - } // end check replica version - } // end for replicas - - if (replicationNum - needAsyncBackendIds.size() < replicationNum / 2 + 1) { - String backendsStr = Joiner.on(", ").join(needAsyncBackendIds); - LOG.warn("too many unavailable replica in tablet[{}], backends:[{}]", tablet.getId(), backendsStr); - throw new DdlException("Too many replicas are not available. Wait 10 mins and try again." - + " if still not work, contact Palo RD"); - } - - if (!needAsyncBackendIds.isEmpty()) { - LOG.info("add tablet[{}] to async delete. backends: {}", - tablet.getId(), needAsyncBackendIds); - asyncTabletIdToBackends.put(tablet.getId(), needAsyncBackendIds); - } - } // end for tablets - } // end for indices - - if (deleteConditions == null) { - return; - } - - // save delete conditions - for (Predicate condition : conditions) { - if (condition instanceof BinaryPredicate) { - BinaryPredicate binaryPredicate = (BinaryPredicate) condition; - SlotRef slotRef = (SlotRef) binaryPredicate.getChild(0); - String columnName = slotRef.getColumnName(); - StringBuilder sb = new StringBuilder(); - sb.append(columnName).append(" ").append(binaryPredicate.getOp().name()).append(" \"") - .append(((LiteralExpr) binaryPredicate.getChild(1)).getStringValue()).append("\""); - deleteConditions.add(sb.toString()); - } else if (condition instanceof IsNullPredicate) { - IsNullPredicate isNullPredicate = (IsNullPredicate) condition; - SlotRef slotRef = (SlotRef) isNullPredicate.getChild(0); - String columnName = slotRef.getColumnName(); - StringBuilder sb = new StringBuilder(); - sb.append(columnName); - if (isNullPredicate.isNotNull()) { - sb.append(" IS NOT NULL"); - } else { - sb.append(" IS NULL"); - } - deleteConditions.add(sb.toString()); - } - } - } - private boolean checkAndAddRunningSyncDeleteJob(long partitionId, String partitionName) throws DdlException { // check if there are synchronized delete job under going writeLock(); @@ -3140,7 +3302,7 @@ public void delete(DeleteStmt stmt) throws DdlException { loadDeleteJob = new LoadJob(jobId, db.getId(), tableId, partitionId, jobLabel, olapTable.getIndexIdToSchemaHash(), conditions, deleteInfo); Map idToTabletLoadInfo = Maps.newHashMap(); - for (MaterializedIndex materializedIndex : partition.getMaterializedIndices()) { + for (MaterializedIndex materializedIndex : partition.getMaterializedIndices(IndexExtState.VISIBLE)) { for (Tablet tablet : materializedIndex.getTablets()) { long tabletId = tablet.getId(); // tabletLoadInfo is empty, because delete load does not need filepath filesize info @@ -3210,227 +3372,6 @@ public void delete(DeleteStmt stmt) throws DdlException { } } - @Deprecated - public void deleteOld(DeleteStmt stmt) throws DdlException { - String dbName = stmt.getDbName(); - String tableName = stmt.getTableName(); - String partitionName = stmt.getPartitionName(); - List conditions = stmt.getDeleteConditions(); - Database db = Catalog.getInstance().getDb(dbName); - if (db == null) { - throw new DdlException("Db does not exist. name: " + dbName); - } - - DeleteInfo deleteInfo = null; - - long tableId = -1; - long partitionId = -1; - long visibleVersion = -1; - long visibleVersionHash = -1; - long newVersion = -1; - long newVersionHash = -1; - AgentBatchTask deleteBatchTask = null; - int totalReplicaNum = 0; - Map> asyncTabletIdToBackends = Maps.newHashMap(); - db.readLock(); - try { - Table table = db.getTable(tableName); - if (table == null) { - throw new DdlException("Table does not exist. name: " + tableName); - } - - if (table.getType() != TableType.OLAP) { - throw new DdlException("Not olap type table. type: " + table.getType().name()); - } - OlapTable olapTable = (OlapTable) table; - - if (olapTable.getState() != OlapTableState.NORMAL) { - throw new DdlException("Table's state is not normal: " + tableName); - } - - tableId = olapTable.getId(); - Partition partition = olapTable.getPartition(partitionName); - if (partition == null) { - throw new DdlException("Partition does not exist. name: " + partitionName); - } - partitionId = partition.getId(); - - // pre check - visibleVersion = partition.getVisibleVersion(); - visibleVersionHash = partition.getVisibleVersionHash(); - checkDelete(olapTable, partition, conditions, visibleVersion, visibleVersionHash, - null, asyncTabletIdToBackends, true); - - newVersion = visibleVersion + 1; - newVersionHash = Util.generateVersionHash(); - deleteInfo = new DeleteInfo(db.getId(), tableId, tableName, - partition.getId(), partitionName, - newVersion, newVersionHash, null); - - checkAndAddRunningSyncDeleteJob(deleteInfo.getPartitionId(), partitionName); - - // create sync delete tasks - deleteBatchTask = new AgentBatchTask(); - for (MaterializedIndex materializedIndex : partition.getMaterializedIndices()) { - int schemaHash = olapTable.getSchemaHashByIndexId(materializedIndex.getId()); - for (Tablet tablet : materializedIndex.getTablets()) { - long tabletId = tablet.getId(); - for (Replica replica : tablet.getReplicas()) { - - if (asyncTabletIdToBackends.containsKey(tabletId) - && asyncTabletIdToBackends.get(tabletId).contains(replica.getBackendId())) { - continue; - } - - AgentTask pushTask = new PushTask(null, replica.getBackendId(), db.getId(), - tableId, partition.getId(), - materializedIndex.getId(), tabletId, replica.getId(), - schemaHash, newVersion, - newVersionHash, null, -1L, 0, -1L, TPushType.DELETE, - conditions, false, TPriority.HIGH); - if (AgentTaskQueue.addTask(pushTask)) { - deleteBatchTask.addTask(pushTask); - ++totalReplicaNum; - } - } - } - } - } finally { - db.readUnlock(); - } - - // send tasks to backends - MarkedCountDownLatch countDownLatch = new MarkedCountDownLatch(totalReplicaNum); - for (AgentTask task : deleteBatchTask.getAllTasks()) { - countDownLatch.addMark(task.getBackendId(), task.getSignature()); - ((PushTask) task).setCountDownLatch(countDownLatch); - } - AgentTaskExecutor.submit(deleteBatchTask); - long timeout = Config.tablet_delete_timeout_second * 1000L * totalReplicaNum; - boolean ok = false; - try { - ok = countDownLatch.await(timeout, TimeUnit.MILLISECONDS); - } catch (InterruptedException e) { - LOG.warn("InterruptedException: ", e); - ok = false; - } - - if (!ok) { - // sync delete failed for unknown reason. - // use async delete to try to make up after. - LOG.warn("sync delete failed. try async delete. table: {}, partition: {}", tableName, partitionName); - } - - Partition partition = null; - try { - // after check - db.writeLock(); - try { - OlapTable table = (OlapTable) db.getTable(tableName); - if (table == null) { - throw new DdlException("Table does not exist. name: " + tableName); - } - - partition = table.getPartition(partitionName); - if (partition == null) { - throw new DdlException("Partition does not exist. name: " + partitionName); - } - - // after check - // 1. check partition committed version first - if (partition.getVisibleVersion() > visibleVersion - || (visibleVersion == partition.getVisibleVersion() - && visibleVersionHash != partition.getVisibleVersionHash())) { - LOG.warn("before delete version: {}-{}. after delete version: {}-{}", - visibleVersion, visibleVersionHash, - partition.getVisibleVersion(), partition.getVisibleVersionHash()); - throw new DdlException("There may have some load job done during delete job. Try again"); - } - - // 2. after check - List deleteConditions = Lists.newArrayList(); - checkDelete(table, partition, conditions, newVersion, newVersionHash, deleteConditions, - asyncTabletIdToBackends, false); - deleteInfo.setDeleteConditions(deleteConditions); - - // update partition's version - updatePartitionVersion(partition, newVersion, newVersionHash, -1); - - for (MaterializedIndex materializedIndex : partition.getMaterializedIndices()) { - long indexId = materializedIndex.getId(); - for (Tablet tablet : materializedIndex.getTablets()) { - long tabletId = tablet.getId(); - for (Replica replica : tablet.getReplicas()) { - ReplicaPersistInfo info = - ReplicaPersistInfo.createForCondDelete(indexId, - tabletId, - replica.getId(), - replica.getVersion(), - replica.getVersionHash(), - table.getSchemaHashByIndexId(indexId), - replica.getDataSize(), - replica.getRowCount(), - replica.getLastFailedVersion(), - replica.getLastFailedVersionHash(), - replica.getLastSuccessVersion(), - replica.getLastSuccessVersionHash()); - deleteInfo.addReplicaPersistInfo(info); - } - } - } - - writeLock(); - try { - // handle async delete jobs - if (!asyncTabletIdToBackends.isEmpty()) { - AsyncDeleteJob asyncDeleteJob = new AsyncDeleteJob(db.getId(), tableId, partition.getId(), - newVersion, newVersionHash, - conditions); - for (Long tabletId : asyncTabletIdToBackends.keySet()) { - asyncDeleteJob.addTabletId(tabletId); - } - deleteInfo.setAsyncDeleteJob(asyncDeleteJob); - idToQuorumFinishedDeleteJob.put(asyncDeleteJob.getJobId(), asyncDeleteJob); - LOG.info("finished create async delete job: {}", asyncDeleteJob.getJobId()); - } - - // save delete info - List deleteInfos = dbToDeleteInfos.get(db.getId()); - if (deleteInfos == null) { - deleteInfos = Lists.newArrayList(); - dbToDeleteInfos.put(db.getId(), deleteInfos); - } - deleteInfos.add(deleteInfo); - } finally { - writeUnlock(); - } - - // Write edit log - Catalog.getInstance().getEditLog().logFinishSyncDelete(deleteInfo); - LOG.info("delete job finished at: {}. table: {}, partition: {}", - TimeUtils.longToTimeString(System.currentTimeMillis()), tableName, partitionName); - } finally { - db.writeUnlock(); - } - } finally { - // clear tasks - List tasks = deleteBatchTask.getAllTasks(); - for (AgentTask task : tasks) { - PushTask pushTask = (PushTask) task; - AgentTaskQueue.removePushTask(pushTask.getBackendId(), pushTask.getSignature(), - pushTask.getVersion(), pushTask.getVersionHash(), - pushTask.getPushType(), pushTask.getTaskType()); - } - - writeLock(); - try { - partitionUnderDelete.remove(partitionId); - } finally { - writeUnlock(); - } - } - } - public List> getAsyncDeleteJobInfo(long jobId) { LinkedList> infos = new LinkedList>(); readLock(); @@ -3461,7 +3402,7 @@ public List> getAsyncDeleteJobInfo(long jobId) { return infos; } - public int getDeleteJobNumByState(long dbId, JobState state) { + public long getDeleteJobNumByState(long dbId, JobState state) { readLock(); try { List deleteJobs = dbToDeleteJobs.get(dbId); @@ -3688,5 +3629,3 @@ public Integer getLoadJobNumByTypeAndState(EtlJobType type, JobState state) { return num; } } - - diff --git a/fe/src/main/java/org/apache/doris/load/LoadChecker.java b/fe/src/main/java/org/apache/doris/load/LoadChecker.java index 83bb0abe3ca7f3..5f9aafdf99ef71 100644 --- a/fe/src/main/java/org/apache/doris/load/LoadChecker.java +++ b/fe/src/main/java/org/apache/doris/load/LoadChecker.java @@ -21,7 +21,7 @@ import org.apache.doris.catalog.Catalog; import org.apache.doris.catalog.Database; import org.apache.doris.catalog.MaterializedIndex; -import org.apache.doris.catalog.MaterializedIndex.IndexState; +import org.apache.doris.catalog.MaterializedIndex.IndexExtState; import org.apache.doris.catalog.OlapTable; import org.apache.doris.catalog.Partition; import org.apache.doris.catalog.Replica; @@ -395,19 +395,25 @@ private Set submitPushTasks(LoadJob job, Database db) { short replicationNum = table.getPartitionInfo().getReplicationNum(partition.getId()); // check all indices (base + roll up (not include ROLLUP state index)) - List indices = partition.getMaterializedIndices(); + List indices = partition.getMaterializedIndices(IndexExtState.ALL); for (MaterializedIndex index : indices) { long indexId = index.getId(); - // if index is in rollup, then not load into it, be will automatically convert the data - if (index.getState() == IndexState.ROLLUP) { - LOG.error("skip table under rollup[{}]", indexId); - continue; - } + // 1. the load job's etl is started before rollup finished // 2. rollup job comes into finishing state, add rollup index to catalog // 3. load job's etl finished, begin to load // 4. load will send data to new rollup index, but could not get schema hash, load will failed + /* + * new: + * 1. load job is started before alter table, and etl task does not contains new indexes + * 2. just send push tasks to indexes which it contains, ignore others + */ if (!tableLoadInfo.containsIndex(indexId)) { + if (rollupJob == null) { + // new process, just continue + continue; + } + if (rollupJob.getRollupIndexId() == indexId) { continue; } else { @@ -451,8 +457,6 @@ private Set submitPushTasks(LoadJob job, Database db) { TPushType type = TPushType.LOAD; if (job.isSyncDeleteJob()) { type = TPushType.DELETE; - } else if (job.getDeleteFlag()) { - type = TPushType.LOAD_DELETE; } // add task to batchTask diff --git a/fe/src/main/java/org/apache/doris/load/LoadJob.java b/fe/src/main/java/org/apache/doris/load/LoadJob.java index cb788a48af0600..d8c288f7f2a1b5 100644 --- a/fe/src/main/java/org/apache/doris/load/LoadJob.java +++ b/fe/src/main/java/org/apache/doris/load/LoadJob.java @@ -83,7 +83,6 @@ public enum JobState { long timestamp; private int timeoutSecond; private double maxFilterRatio; - private boolean deleteFlag; private JobState state; private BrokerDesc brokerDesc; @@ -148,7 +147,6 @@ public LoadJob(long id, long dbId, long tableId, long partitionId, String label, this.transactionId = -1; this.timestamp = -1; this.timeoutSecond = DEFAULT_TIMEOUT_S; - this.deleteFlag = true; this.state = JobState.LOADING; this.progress = 0; this.createTimeMs = System.currentTimeMillis(); @@ -200,7 +198,6 @@ public LoadJob(String label, int timeoutSecond, double maxFilterRatio) { this.timestamp = -1; this.timeoutSecond = timeoutSecond; this.maxFilterRatio = maxFilterRatio; - this.deleteFlag = false; this.state = JobState.PENDING; this.progress = 0; this.createTimeMs = System.currentTimeMillis(); @@ -284,14 +281,6 @@ public void setMaxFilterRatio(double maxFilterRatio) { public double getMaxFilterRatio() { return maxFilterRatio; } - - public void setDeleteFlag(boolean deleteFlag) { - this.deleteFlag = deleteFlag; - } - - public boolean getDeleteFlag() { - return deleteFlag; - } public JobState getState() { return state; @@ -647,7 +636,7 @@ public long getDeleteJobTimeout() { @Override public String toString() { return "LoadJob [id=" + id + ", dbId=" + dbId + ", label=" + label + ", timeoutSecond=" + timeoutSecond - + ", maxFilterRatio=" + maxFilterRatio + ", deleteFlag=" + deleteFlag + ", state=" + state + + ", maxFilterRatio=" + maxFilterRatio + ", state=" + state + ", progress=" + progress + ", createTimeMs=" + createTimeMs + ", etlStartTimeMs=" + etlStartTimeMs + ", etlFinishTimeMs=" + etlFinishTimeMs + ", loadStartTimeMs=" + loadStartTimeMs + ", loadFinishTimeMs=" + loadFinishTimeMs + ", failMsg=" + failMsg + ", etlJobType=" + etlJobType @@ -706,7 +695,7 @@ public void write(DataOutput out) throws IOException { out.writeLong(timestamp); out.writeInt(timeoutSecond); out.writeDouble(maxFilterRatio); - out.writeBoolean(deleteFlag); + out.writeBoolean(true); // delete flag, does not use anymore Text.writeString(out, state.name()); out.writeInt(progress); out.writeLong(createTimeMs); @@ -853,7 +842,7 @@ public void readFields(DataInput in) throws IOException { timeoutSecond = in.readInt(); maxFilterRatio = in.readDouble(); - deleteFlag = false; + boolean deleteFlag = false; if (version >= FeMetaVersion.VERSION_30) { deleteFlag = in.readBoolean(); } diff --git a/fe/src/main/java/org/apache/doris/load/loadv2/BrokerLoadJob.java b/fe/src/main/java/org/apache/doris/load/loadv2/BrokerLoadJob.java index acee0b8b7f7ac5..7a5f6829b26086 100644 --- a/fe/src/main/java/org/apache/doris/load/loadv2/BrokerLoadJob.java +++ b/fe/src/main/java/org/apache/doris/load/loadv2/BrokerLoadJob.java @@ -76,7 +76,6 @@ public class BrokerLoadJob extends LoadJob { private static final Logger LOG = LogManager.getLogger(BrokerLoadJob.class); // input params - private List dataDescriptions = Lists.newArrayList(); private BrokerDesc brokerDesc; // this param is used to persist the expr of columns // the origin stmt is persisted instead of columns expr @@ -93,12 +92,10 @@ public BrokerLoadJob() { this.jobType = EtlJobType.BROKER; } - public BrokerLoadJob(long dbId, String label, BrokerDesc brokerDesc, List dataDescriptions, - String originStmt) + private BrokerLoadJob(long dbId, String label, BrokerDesc brokerDesc, String originStmt) throws MetaNotFoundException { super(dbId, label); this.timeoutSecond = Config.broker_load_default_timeout_second; - this.dataDescriptions = dataDescriptions; this.brokerDesc = brokerDesc; this.originStmt = originStmt; this.jobType = EtlJobType.BROKER; @@ -112,27 +109,30 @@ public static BrokerLoadJob fromLoadStmt(LoadStmt stmt, String originStmt) throw if (db == null) { throw new DdlException("Database[" + dbName + "] does not exist"); } - // check data source info - LoadJob.checkDataSourceInfo(db, stmt.getDataDescriptions(), EtlJobType.BROKER); // create job try { BrokerLoadJob brokerLoadJob = new BrokerLoadJob(db.getId(), stmt.getLabel().getLabelName(), - stmt.getBrokerDesc(), stmt.getDataDescriptions(), - originStmt); + stmt.getBrokerDesc(), originStmt); brokerLoadJob.setJobProperties(stmt.getProperties()); - brokerLoadJob.setDataSourceInfo(db, stmt.getDataDescriptions()); + brokerLoadJob.checkAndSetDataSourceInfo(db, stmt.getDataDescriptions()); return brokerLoadJob; } catch (MetaNotFoundException e) { throw new DdlException(e.getMessage()); } } - private void setDataSourceInfo(Database db, List dataDescriptions) throws DdlException { - for (DataDescription dataDescription : dataDescriptions) { - BrokerFileGroup fileGroup = new BrokerFileGroup(dataDescription); - fileGroup.parse(db); - dataSourceInfo.addFileGroup(fileGroup); + private void checkAndSetDataSourceInfo(Database db, List dataDescriptions) throws DdlException { + // check data source info + db.readLock(); + try { + for (DataDescription dataDescription : dataDescriptions) { + BrokerFileGroup fileGroup = new BrokerFileGroup(dataDescription); + fileGroup.parse(db, dataDescription); + dataSourceInfo.addFileGroup(fileGroup); + } + } finally { + db.readUnlock(); } } @@ -277,7 +277,7 @@ public void analyze() { if (db == null) { throw new DdlException("Database[" + dbId + "] does not exist"); } - setDataSourceInfo(db, stmt.getDataDescriptions()); + checkAndSetDataSourceInfo(db, stmt.getDataDescriptions()); } catch (Exception e) { LOG.info(new LogBuilder(LogKey.LOAD_JOB, id) .add("origin_stmt", originStmt) @@ -339,6 +339,7 @@ private void createLoadingTask(Database db, BrokerPendingTaskAttachment attachme // divide job into broker loading task by table db.readLock(); try { + List newLoadingTasks = Lists.newArrayList(); for (Map.Entry> entry : dataSourceInfo.getIdToFileGroups().entrySet()) { long tableId = entry.getKey(); @@ -355,16 +356,27 @@ private void createLoadingTask(Database db, BrokerPendingTaskAttachment attachme // Generate loading task and init the plan of task LoadLoadingTask task = new LoadLoadingTask(db, table, brokerDesc, - entry.getValue(), getDeadlineMs(), execMemLimit, - strictMode, transactionId, this); + entry.getValue(), getDeadlineMs(), execMemLimit, + strictMode, transactionId, this, timezone); UUID uuid = UUID.randomUUID(); TUniqueId loadId = new TUniqueId(uuid.getMostSignificantBits(), uuid.getLeastSignificantBits()); - task.init(loadId, attachment.getFileStatusByTable(tableId), - attachment.getFileNumByTable(tableId)); - // Add tasks into list and pool + task.init(loadId, attachment.getFileStatusByTable(tableId), attachment.getFileNumByTable(tableId)); idToTasks.put(task.getSignature(), task); + // idToTasks contains previous LoadPendingTasks, so idToTasks is just used to save all tasks. + // use newLoadingTasks to save new created loading tasks and submit them later. + newLoadingTasks.add(task); loadStatistic.numLoadedRowsMap.put(loadId, new AtomicLong(0)); - Catalog.getCurrentCatalog().getLoadTaskScheduler().submit(task); + + // save all related tables and rollups in transaction state + TransactionState txnState = Catalog.getCurrentGlobalTransactionMgr().getTransactionState(transactionId); + if (txnState == null) { + throw new UserException("txn does not exist: " + transactionId); + } + txnState.addTableIndexes(table); + } + // submit all tasks together + for (LoadTask loadTask : newLoadingTasks) { + Catalog.getCurrentCatalog().getLoadTaskScheduler().submit(loadTask); } } finally { db.readUnlock(); @@ -492,7 +504,6 @@ protected void executeReplayOnVisible(TransactionState txnState) { public void write(DataOutput out) throws IOException { super.write(out); brokerDesc.write(out); - dataSourceInfo.write(out); Text.writeString(out, originStmt); } @@ -500,9 +511,9 @@ public void write(DataOutput out) throws IOException { public void readFields(DataInput in) throws IOException { super.readFields(in); brokerDesc = BrokerDesc.read(in); - // The data source info also need to be replayed - // because the load properties of old broker load has been saved in here. - dataSourceInfo.readFields(in); + if (Catalog.getCurrentCatalogJournalVersion() < FeMetaVersion.VERSION_61) { + dataSourceInfo.readFields(in); + } if (Catalog.getCurrentCatalogJournalVersion() >= FeMetaVersion.VERSION_58) { originStmt = Text.readString(in); diff --git a/fe/src/main/java/org/apache/doris/load/loadv2/LoadJob.java b/fe/src/main/java/org/apache/doris/load/loadv2/LoadJob.java index bc3c66e0f50991..42191067ca56a8 100644 --- a/fe/src/main/java/org/apache/doris/load/loadv2/LoadJob.java +++ b/fe/src/main/java/org/apache/doris/load/loadv2/LoadJob.java @@ -17,7 +17,6 @@ package org.apache.doris.load.loadv2; -import org.apache.doris.analysis.DataDescription; import org.apache.doris.analysis.LoadStmt; import org.apache.doris.catalog.AuthorizationInfo; import org.apache.doris.catalog.Catalog; @@ -40,7 +39,6 @@ import org.apache.doris.load.EtlStatus; import org.apache.doris.load.FailMsg; import org.apache.doris.load.Load; -import org.apache.doris.load.Source; import org.apache.doris.metric.MetricRepo; import org.apache.doris.mysql.privilege.PaloPrivilege; import org.apache.doris.mysql.privilege.PrivPredicate; @@ -94,9 +92,10 @@ public abstract class LoadJob extends AbstractTxnStateChangeCallback implements protected long timeoutSecond = Config.broker_load_default_timeout_second; protected long execMemLimit = 2147483648L; // 2GB; protected double maxFilterRatio = 0; + protected boolean strictMode = true; + protected String timezone = TimeUtils.DEFAULT_TIME_ZONE; @Deprecated protected boolean deleteFlag = false; - protected boolean strictMode = true; protected long createTimestamp = System.currentTimeMillis(); protected long loadStartTimestamp = -1; @@ -303,17 +302,13 @@ protected void setJobProperties(Map properties) throws DdlExcept if (properties.containsKey(LoadStmt.STRICT_MODE)) { strictMode = Boolean.valueOf(properties.get(LoadStmt.STRICT_MODE)); } - } - } - protected static void checkDataSourceInfo(Database db, List dataDescriptions, - EtlJobType jobType) throws DdlException { - for (DataDescription dataDescription : dataDescriptions) { - // loadInfo is a temporary param for the method of checkAndCreateSource. - // >> - Map>> loadInfo = Maps.newHashMap(); - // only support broker load now - Load.checkAndCreateSource(db, dataDescription, loadInfo, false, jobType); + if (properties.containsKey(LoadStmt.TIMEZONE)) { + timezone = properties.get(LoadStmt.TIMEZONE); + } else if (ConnectContext.get() != null) { + // get timezone for session variable + timezone = ConnectContext.get().getSessionVariable().getTimeZone(); + } } } @@ -880,6 +875,7 @@ public void write(DataOutput out) throws IOException { out.writeBoolean(true); authorizationInfo.write(out); } + Text.writeString(out, timezone); } @Override @@ -920,5 +916,8 @@ public void readFields(DataInput in) throws IOException { authorizationInfo.readFields(in); } } + if (Catalog.getCurrentCatalogJournalVersion() >= FeMetaVersion.VERSION_61) { + timezone = Text.readString(in); + } } } diff --git a/fe/src/main/java/org/apache/doris/load/loadv2/LoadLoadingTask.java b/fe/src/main/java/org/apache/doris/load/loadv2/LoadLoadingTask.java index 4f25a90b12136e..3ee8a437563c51 100644 --- a/fe/src/main/java/org/apache/doris/load/loadv2/LoadLoadingTask.java +++ b/fe/src/main/java/org/apache/doris/load/loadv2/LoadLoadingTask.java @@ -57,13 +57,14 @@ public class LoadLoadingTask extends LoadTask { private final long execMemLimit; private final boolean strictMode; private final long txnId; + private final String timezone; private LoadingTaskPlanner planner; public LoadLoadingTask(Database db, OlapTable table, BrokerDesc brokerDesc, List fileGroups, long jobDeadlineMs, long execMemLimit, boolean strictMode, - long txnId, LoadTaskCallback callback) { + long txnId, LoadTaskCallback callback, String timezone) { super(callback); this.db = db; this.table = table; @@ -75,11 +76,12 @@ public LoadLoadingTask(Database db, OlapTable table, this.txnId = txnId; this.failMsg = new FailMsg(FailMsg.CancelType.LOAD_RUN_FAIL); this.retryTime = 2; // 2 times is enough + this.timezone = timezone; } public void init(TUniqueId loadId, List> fileStatusList, int fileNum) throws UserException { this.loadId = loadId; - planner = new LoadingTaskPlanner(callback.getCallbackId(), txnId, db.getId(), table, brokerDesc, fileGroups, strictMode); + planner = new LoadingTaskPlanner(callback.getCallbackId(), txnId, db.getId(), table, brokerDesc, fileGroups, strictMode, timezone); planner.plan(loadId, fileStatusList, fileNum); } @@ -98,7 +100,7 @@ protected void executeTask() throws Exception{ private void executeOnce() throws Exception { // New one query id, Coordinator curCoordinator = new Coordinator(callback.getCallbackId(), loadId, planner.getDescTable(), - planner.getFragments(), planner.getScanNodes(), db.getClusterName()); + planner.getFragments(), planner.getScanNodes(), db.getClusterName(), planner.getTimezone()); curCoordinator.setQueryType(TQueryType.LOAD); curCoordinator.setExecMemoryLimit(execMemLimit); curCoordinator.setTimeout((int) (getLeftTimeMs() / 1000)); diff --git a/fe/src/main/java/org/apache/doris/load/loadv2/LoadManager.java b/fe/src/main/java/org/apache/doris/load/loadv2/LoadManager.java index 36f6d929e3ce64..fb76b393cdc325 100644 --- a/fe/src/main/java/org/apache/doris/load/loadv2/LoadManager.java +++ b/fe/src/main/java/org/apache/doris/load/loadv2/LoadManager.java @@ -202,6 +202,7 @@ public void createLoadJobV1FromStmt(LoadStmt stmt, EtlJobType jobType, long time * else: return true. * @throws DdlException */ + @Deprecated public boolean createLoadJobV1FromRequest(TMiniLoadRequest request) throws DdlException { String cluster = SystemInfoService.DEFAULT_CLUSTER; if (request.isSetCluster()) { @@ -211,7 +212,7 @@ public boolean createLoadJobV1FromRequest(TMiniLoadRequest request) throws DdlEx writeLock(); try { checkLabelUsed(database.getId(), request.getLabel(), null); - return Catalog.getCurrentCatalog().getLoadInstance().addLoadJob(request); + return Catalog.getCurrentCatalog().getLoadInstance().addMiniLoadJob(request); } finally { writeUnlock(); } diff --git a/fe/src/main/java/org/apache/doris/load/loadv2/LoadingTaskPlanner.java b/fe/src/main/java/org/apache/doris/load/loadv2/LoadingTaskPlanner.java index e45d502dc24327..e0019e68a720cd 100644 --- a/fe/src/main/java/org/apache/doris/load/loadv2/LoadingTaskPlanner.java +++ b/fe/src/main/java/org/apache/doris/load/loadv2/LoadingTaskPlanner.java @@ -80,7 +80,7 @@ public class LoadingTaskPlanner { public LoadingTaskPlanner(Long loadJobId, long txnId, long dbId, OlapTable table, BrokerDesc brokerDesc, List brokerFileGroups, - boolean strictMode) { + boolean strictMode, String timezone) { this.loadJobId = loadJobId; this.txnId = txnId; this.dbId = dbId; @@ -88,6 +88,7 @@ public LoadingTaskPlanner(Long loadJobId, long txnId, long dbId, OlapTable table this.brokerDesc = brokerDesc; this.fileGroups = brokerFileGroups; this.strictMode = strictMode; + this.analyzer.setTimezone(timezone); } public void plan(TUniqueId loadId, List> fileStatusesList, int filesAdded) @@ -95,7 +96,8 @@ public void plan(TUniqueId loadId, List> fileStatusesLis // Generate tuple descriptor List slotRefs = Lists.newArrayList(); TupleDescriptor tupleDesc = descTable.createTupleDescriptor(); - for (Column col : table.getBaseSchema()) { + // use full schema to fill the descriptor table + for (Column col : table.getFullSchema()) { SlotDescriptor slotDesc = descTable.addSlotDescriptor(tupleDesc); slotDesc.setIsMaterialized(true); slotDesc.setColumn(col); @@ -153,6 +155,10 @@ public List getScanNodes() { return scanNodes; } + public String getTimezone() { + return analyzer.getTimezone(); + } + private String convertBrokerDescPartitionInfo() throws LoadException, MetaNotFoundException { String result = ""; for (BrokerFileGroup brokerFileGroup : fileGroups) { diff --git a/fe/src/main/java/org/apache/doris/load/routineload/KafkaTaskInfo.java b/fe/src/main/java/org/apache/doris/load/routineload/KafkaTaskInfo.java index 1d35f9b1066f89..9b4c0a90ab2b12 100644 --- a/fe/src/main/java/org/apache/doris/load/routineload/KafkaTaskInfo.java +++ b/fe/src/main/java/org/apache/doris/load/routineload/KafkaTaskInfo.java @@ -101,9 +101,9 @@ protected String getTaskDataSourceProperties() { private TExecPlanFragmentParams rePlan(RoutineLoadJob routineLoadJob) throws UserException { TUniqueId loadId = new TUniqueId(id.getMostSignificantBits(), id.getLeastSignificantBits()); // plan for each task, in case table has change(rollup or schema change) - TExecPlanFragmentParams tExecPlanFragmentParams = routineLoadJob.plan(loadId); + TExecPlanFragmentParams tExecPlanFragmentParams = routineLoadJob.plan(loadId, txnId); TPlanFragment tPlanFragment = tExecPlanFragmentParams.getFragment(); - tPlanFragment.getOutput_sink().getOlap_table_sink().setTxn_id(this.txnId); + tPlanFragment.getOutput_sink().getOlap_table_sink().setTxn_id(txnId); return tExecPlanFragmentParams; } } diff --git a/fe/src/main/java/org/apache/doris/load/routineload/RoutineLoadJob.java b/fe/src/main/java/org/apache/doris/load/routineload/RoutineLoadJob.java index 801f63156ac4e8..29d4c6ed1fa4a6 100644 --- a/fe/src/main/java/org/apache/doris/load/routineload/RoutineLoadJob.java +++ b/fe/src/main/java/org/apache/doris/load/routineload/RoutineLoadJob.java @@ -196,7 +196,7 @@ public boolean isFinalState() { protected List routineLoadTaskInfoList = Lists.newArrayList(); // stream load planer will be initialized during job schedule - StreamLoadPlanner planner; + protected StreamLoadPlanner planner; // this is the origin stmt of CreateRoutineLoadStmt, we use it to persist the RoutineLoadJob, // because we can not serialize the Expressions contained in job. @@ -574,7 +574,7 @@ private void initPlanner() throws UserException { planner = new StreamLoadPlanner(db, (OlapTable) db.getTable(this.tableId), streamLoadTask); } - public TExecPlanFragmentParams plan(TUniqueId loadId) throws UserException { + public TExecPlanFragmentParams plan(TUniqueId loadId, long txnId) throws UserException { Preconditions.checkNotNull(planner); Database db = Catalog.getCurrentCatalog().getDb(dbId); if (db == null) { @@ -582,7 +582,15 @@ public TExecPlanFragmentParams plan(TUniqueId loadId) throws UserException { } db.readLock(); try { - return planner.plan(loadId); + TExecPlanFragmentParams planParams = planner.plan(loadId); + // add table indexes to transaction state + TransactionState txnState = Catalog.getCurrentGlobalTransactionMgr().getTransactionState(txnId); + if (txnState == null) { + throw new MetaNotFoundException("txn does not exist: " + txnId); + } + txnState.addTableIndexes(planner.getDestTable()); + + return planParams; } finally { db.readUnlock(); } diff --git a/fe/src/main/java/org/apache/doris/master/Checkpoint.java b/fe/src/main/java/org/apache/doris/master/Checkpoint.java index b40c77fb82ac88..f356dc6fd30a8b 100644 --- a/fe/src/main/java/org/apache/doris/master/Checkpoint.java +++ b/fe/src/main/java/org/apache/doris/master/Checkpoint.java @@ -25,6 +25,7 @@ import org.apache.doris.catalog.Table; import org.apache.doris.catalog.Table.TableType; import org.apache.doris.catalog.Tablet; +import org.apache.doris.catalog.MaterializedIndex.IndexExtState; import org.apache.doris.common.Config; import org.apache.doris.common.util.Daemon; import org.apache.doris.metric.MetricRepo; @@ -249,7 +250,7 @@ private boolean checkMemoryEnoughToDoCheckpoint() { OlapTable olapTable = (OlapTable) table; for (Partition partition : olapTable.getPartitions()) { totalPartitionNum++; - for (MaterializedIndex materializedIndex : partition.getMaterializedIndices()) { + for (MaterializedIndex materializedIndex : partition.getMaterializedIndices(IndexExtState.ALL)) { totalIndexNum++; for (Tablet tablet : materializedIndex.getTablets()) { totalTabletNum++; diff --git a/fe/src/main/java/org/apache/doris/master/MasterImpl.java b/fe/src/main/java/org/apache/doris/master/MasterImpl.java index a94b4e22485c82..1393fd6300cb98 100644 --- a/fe/src/main/java/org/apache/doris/master/MasterImpl.java +++ b/fe/src/main/java/org/apache/doris/master/MasterImpl.java @@ -18,6 +18,7 @@ package org.apache.doris.master; import org.apache.doris.alter.AlterJob; +import org.apache.doris.alter.AlterJobV2.JobType; import org.apache.doris.alter.RollupHandler; import org.apache.doris.alter.RollupJob; import org.apache.doris.alter.SchemaChangeHandler; @@ -40,6 +41,7 @@ import org.apache.doris.system.Backend; import org.apache.doris.task.AgentTask; import org.apache.doris.task.AgentTaskQueue; +import org.apache.doris.task.AlterReplicaTask; import org.apache.doris.task.CheckConsistencyTask; import org.apache.doris.task.ClearAlterTask; import org.apache.doris.task.ClearTransactionTask; @@ -132,7 +134,8 @@ public TMasterResult finishTask(TFinishTaskRequest request) throws TException { // We start to let FE perceive the task's error msg if (taskType != TTaskType.MAKE_SNAPSHOT && taskType != TTaskType.UPLOAD && taskType != TTaskType.DOWNLOAD && taskType != TTaskType.MOVE - && taskType != TTaskType.CLONE && taskType != TTaskType.PUBLISH_VERSION) { + && taskType != TTaskType.CLONE && taskType != TTaskType.PUBLISH_VERSION + && taskType != TTaskType.CREATE) { return result; } } @@ -199,6 +202,9 @@ public TMasterResult finishTask(TFinishTaskRequest request) throws TException { case RECOVER_TABLET: finishRecoverTablet(task); break; + case ALTER: + finishAlterTask(task); + break; default: break; } @@ -228,24 +234,29 @@ private void finishCreateReplica(AgentTask task, TFinishTaskRequest request) { // if we get here, this task will be removed from AgentTaskQueue for certain. // because in this function, the only problem that cause failure is meta missing. // and if meta is missing, we no longer need to resend this task + try { + CreateReplicaTask createReplicaTask = (CreateReplicaTask) task; + if (request.getTask_status().getStatus_code() != TStatusCode.OK) { + createReplicaTask.countDownToZero(task.getBackendId() + ": " + request.getTask_status().getError_msgs().toString()); + } else { + long tabletId = createReplicaTask.getTabletId(); - CreateReplicaTask createReplicaTask = (CreateReplicaTask) task; - long tabletId = createReplicaTask.getTabletId(); - - if (request.isSetFinish_tablet_infos()) { - Replica replica = Catalog.getCurrentInvertedIndex().getReplica(createReplicaTask.getTabletId(), - createReplicaTask.getBackendId()); - replica.setPathHash(request.getFinish_tablet_infos().get(0).getPath_hash()); + if (request.isSetFinish_tablet_infos()) { + Replica replica = Catalog.getCurrentInvertedIndex().getReplica(createReplicaTask.getTabletId(), + createReplicaTask.getBackendId()); + replica.setPathHash(request.getFinish_tablet_infos().get(0).getPath_hash()); + } + + // this should be called before 'countDownLatch()' + Catalog.getCurrentSystemInfo().updateBackendReportVersion(task.getBackendId(), request.getReport_version(), task.getDbId()); + + createReplicaTask.countDownLatch(task.getBackendId(), task.getSignature()); + LOG.debug("finish create replica. tablet id: {}, be: {}, report version: {}", + tabletId, task.getBackendId(), request.getReport_version()); + } + } finally { + AgentTaskQueue.removeTask(task.getBackendId(), TTaskType.CREATE, task.getSignature()); } - - // this should be called before 'countDownLatch()' - Catalog.getCurrentSystemInfo().updateBackendReportVersion(task.getBackendId(), request.getReport_version(), - task.getDbId()); - - createReplicaTask.countDownLatch(task.getBackendId(), task.getSignature()); - LOG.debug("finish create replica. tablet id: {}, be: {}, report version: {}", - tabletId, task.getBackendId(), request.getReport_version()); - AgentTaskQueue.removeTask(task.getBackendId(), TTaskType.CREATE, task.getSignature()); } private void finishRealtimePush(AgentTask task, TFinishTaskRequest request) { @@ -766,4 +777,18 @@ public TFetchResourceResult fetchResource() { return Catalog.getInstance().getAuth().toResourceThrift(); } + private void finishAlterTask(AgentTask task) { + AlterReplicaTask alterTask = (AlterReplicaTask) task; + try { + if (alterTask.getJobType() == JobType.ROLLUP) { + Catalog.getCurrentCatalog().getRollupHandler().handleFinishAlterTask(alterTask); + } else if (alterTask.getJobType() == JobType.SCHEMA_CHANGE) { + Catalog.getCurrentCatalog().getSchemaChangeHandler().handleFinishAlterTask(alterTask); + } + alterTask.setFinished(true); + } catch (MetaNotFoundException e) { + LOG.warn("failed to handle finish alter task: {}, {}", task.getSignature(), e.getMessage()); + } + AgentTaskQueue.removeTask(task.getBackendId(), TTaskType.ALTER, task.getSignature()); + } } diff --git a/fe/src/main/java/org/apache/doris/master/ReportHandler.java b/fe/src/main/java/org/apache/doris/master/ReportHandler.java index 462ddde216144e..83123823097e2c 100644 --- a/fe/src/main/java/org/apache/doris/master/ReportHandler.java +++ b/fe/src/main/java/org/apache/doris/master/ReportHandler.java @@ -22,6 +22,7 @@ import org.apache.doris.catalog.Database; import org.apache.doris.catalog.KeysType; import org.apache.doris.catalog.MaterializedIndex; +import org.apache.doris.catalog.MaterializedIndex.IndexState; import org.apache.doris.catalog.OlapTable; import org.apache.doris.catalog.Partition; import org.apache.doris.catalog.Replica; @@ -134,23 +135,23 @@ public TMasterResult handleReport(TReportRequest request) throws TException { String reportType = ""; if (request.isSetTasks()) { tasks = request.getTasks(); - reportType += " task"; + reportType += "task"; } if (request.isSetDisks()) { disks = request.getDisks(); - reportType += " disk"; + reportType += "disk"; } if (request.isSetTablets()) { tablets = request.getTablets(); reportVersion = request.getReport_version(); - reportType += " tablet"; + reportType += "tablet"; } else if (request.isSetTablet_list()) { // the 'tablets' member will be deprecated in future. tablets = buildTabletMap(request.getTablet_list()); reportVersion = request.getReport_version(); - reportType += " tablet"; + reportType += "tablet"; } if (request.isSetForce_recovery()) { @@ -521,6 +522,11 @@ private static void deleteFromMeta(ListMultimap tabletDeleteFromMeta if (index == null) { continue; } + if (index.getState() == IndexState.SHADOW) { + // This index is under schema change or rollup, tablet may not be created on BE. + // ignore it. + continue; + } Tablet tablet = index.getTablet(tabletId); if (tablet == null) { @@ -591,9 +597,8 @@ private static void deleteFromMeta(ListMultimap tabletDeleteFromMeta tablet.deleteReplicaByBackendId(backendId); ++deleteCounter; - // handle related task - Catalog.getInstance().handleJobsWhenDeleteReplica(tableId, partitionId, indexId, tabletId, - replica.getId(), backendId); + // remove replica related tasks + AgentTaskQueue.removeReplicaRelatedTasks(backendId, tabletId); // write edit log ReplicaPersistInfo info = ReplicaPersistInfo.createForDelete(dbId, tableId, partitionId, diff --git a/fe/src/main/java/org/apache/doris/persist/EditLog.java b/fe/src/main/java/org/apache/doris/persist/EditLog.java index 3a115ad939900e..9fe20493981176 100644 --- a/fe/src/main/java/org/apache/doris/persist/EditLog.java +++ b/fe/src/main/java/org/apache/doris/persist/EditLog.java @@ -17,9 +17,12 @@ package org.apache.doris.persist; +import org.apache.doris.alter.AlterJobV2; import org.apache.doris.alter.DecommissionBackendJob; import org.apache.doris.alter.RollupJob; +import org.apache.doris.alter.RollupJobV2; import org.apache.doris.alter.SchemaChangeJob; +import org.apache.doris.alter.SchemaChangeJobV2; import org.apache.doris.analysis.UserIdentity; import org.apache.doris.backup.BackupJob; import org.apache.doris.backup.Repository; @@ -654,28 +657,42 @@ public static void loadJournal(Catalog catalog, JournalEntity journal) { } case OperationType.OP_REMOVE_ROUTINE_LOAD_JOB: { RoutineLoadOperation operation = (RoutineLoadOperation) journal.getData(); - Catalog.getCurrentCatalog().getRoutineLoadManager().replayRemoveOldRoutineLoad(operation); + catalog.getRoutineLoadManager().replayRemoveOldRoutineLoad(operation); break; } case OperationType.OP_CREATE_LOAD_JOB: { org.apache.doris.load.loadv2.LoadJob loadJob = (org.apache.doris.load.loadv2.LoadJob) journal.getData(); - Catalog.getCurrentCatalog().getLoadManager().replayCreateLoadJob(loadJob); + catalog.getLoadManager().replayCreateLoadJob(loadJob); break; } case OperationType.OP_END_LOAD_JOB: { LoadJobFinalOperation operation = (LoadJobFinalOperation) journal.getData(); - Catalog.getCurrentCatalog().getLoadManager().replayEndLoadJob(operation); + catalog.getLoadManager().replayEndLoadJob(operation); break; } case OperationType.OP_CREATE_SMALL_FILE: { SmallFile smallFile = (SmallFile) journal.getData(); - Catalog.getCurrentCatalog().getSmallFileMgr().replayCreateFile(smallFile); + catalog.getSmallFileMgr().replayCreateFile(smallFile); break; } case OperationType.OP_DROP_SMALL_FILE: { SmallFile smallFile = (SmallFile) journal.getData(); - Catalog.getCurrentCatalog().getSmallFileMgr().replayRemoveFile(smallFile); + catalog.getSmallFileMgr().replayRemoveFile(smallFile); + break; + } + case OperationType.OP_ALTER_JOB_V2: { + AlterJobV2 alterJob = (AlterJobV2) journal.getData(); + switch (alterJob.getType()) { + case ROLLUP: + catalog.getRollupHandler().replayAlterJobV2((RollupJobV2) alterJob); + break; + case SCHEMA_CHANGE: + catalog.getSchemaChangeHandler().replayAlterJobV2((SchemaChangeJobV2) alterJob); + break; + default: + break; + } break; } default: { @@ -1167,4 +1184,8 @@ public void logCreateSmallFile(SmallFile info) { public void logDropSmallFile(SmallFile info) { logEdit(OperationType.OP_DROP_SMALL_FILE, info); } + + public void logAlterJob(AlterJobV2 alterJob) { + logEdit(OperationType.OP_ALTER_JOB_V2, alterJob); + } } diff --git a/fe/src/main/java/org/apache/doris/persist/OperationType.java b/fe/src/main/java/org/apache/doris/persist/OperationType.java index 1eb8108b0e9c4d..5041cbc793ab28 100644 --- a/fe/src/main/java/org/apache/doris/persist/OperationType.java +++ b/fe/src/main/java/org/apache/doris/persist/OperationType.java @@ -54,6 +54,7 @@ public class OperationType { public static final short OP_CLEAR_ROLLUP_INFO = 28; public static final short OP_FINISH_CONSISTENCY_CHECK = 29; public static final short OP_RENAME_ROLLUP = 120; + public static final short OP_ALTER_JOB_V2 = 121; // 30~39 130~139 230~239 ... // load job for only hadoop load diff --git a/fe/src/main/java/org/apache/doris/planner/BrokerScanNode.java b/fe/src/main/java/org/apache/doris/planner/BrokerScanNode.java index ef342ec767d588..e5817d8dadc96a 100644 --- a/fe/src/main/java/org/apache/doris/planner/BrokerScanNode.java +++ b/fe/src/main/java/org/apache/doris/planner/BrokerScanNode.java @@ -19,14 +19,9 @@ import org.apache.doris.analysis.Analyzer; import org.apache.doris.analysis.ArithmeticExpr; -import org.apache.doris.analysis.BinaryPredicate; import org.apache.doris.analysis.BrokerDesc; import org.apache.doris.analysis.Expr; -import org.apache.doris.analysis.ExprSubstitutionMap; import org.apache.doris.analysis.FunctionCallExpr; -import org.apache.doris.analysis.FunctionName; -import org.apache.doris.analysis.FunctionParams; -import org.apache.doris.analysis.ImportColumnDesc; import org.apache.doris.analysis.IntLiteral; import org.apache.doris.analysis.NullLiteral; import org.apache.doris.analysis.SlotDescriptor; @@ -39,7 +34,6 @@ import org.apache.doris.catalog.Column; import org.apache.doris.catalog.FsBroker; import org.apache.doris.catalog.PrimitiveType; -import org.apache.doris.catalog.ScalarType; import org.apache.doris.catalog.Table; import org.apache.doris.catalog.Type; import org.apache.doris.common.AnalysisException; @@ -47,6 +41,7 @@ import org.apache.doris.common.UserException; import org.apache.doris.common.util.BrokerUtil; import org.apache.doris.load.BrokerFileGroup; +import org.apache.doris.load.Load; import org.apache.doris.system.Backend; import org.apache.doris.thrift.TBrokerFileStatus; import org.apache.doris.thrift.TBrokerRangeDesc; @@ -123,7 +118,6 @@ public int compare(TBrokerFileStatus o1, TBrokerFileStatus o2) { private int nextBe = 0; private Analyzer analyzer; - private List partitionExprs; private static class ParamCreateContext { public BrokerFileGroup fileGroup; @@ -131,6 +125,7 @@ private static class ParamCreateContext { public TupleDescriptor tupleDescriptor; public Map exprMap; public Map slotDescByName; + public String timezone; } private List paramCreateContexts; @@ -163,15 +158,18 @@ public void init(Analyzer analyzer) throws UserException { getFileStatusAndCalcInstance(); paramCreateContexts = Lists.newArrayList(); + int i = 0; for (BrokerFileGroup fileGroup : fileGroups) { ParamCreateContext context = new ParamCreateContext(); context.fileGroup = fileGroup; + context.timezone = analyzer.getTimezone(); try { - initParams(context); + initParams(context, fileStatusesList.get(i)); } catch (AnalysisException e) { throw new UserException(e.getMessage()); } paramCreateContexts.add(context); + ++i; } } @@ -203,7 +201,8 @@ public void setLoadInfo(long loadJobId, } // Called from init, construct source tuple information - private void initParams(ParamCreateContext context) throws AnalysisException, UserException { + private void initParams(ParamCreateContext context, List fileStatus) + throws AnalysisException, UserException { TBrokerScanRangeParams params = new TBrokerScanRangeParams(); context.params = params; @@ -225,190 +224,13 @@ private void initParams(ParamCreateContext context) throws AnalysisException, Us * @throws UserException */ private void initColumns(ParamCreateContext context) throws UserException { - // This tuple descriptor is used for origin file - TupleDescriptor srcTupleDesc = analyzer.getDescTbl().createTupleDescriptor(); - context.tupleDescriptor = srcTupleDesc; - Map slotDescByName = Maps.newHashMap(); - context.slotDescByName = slotDescByName; - - TBrokerScanRangeParams params = context.params; - // there are no columns transform - List originColumnNameToExprList = context.fileGroup.getColumnExprList(); - if (originColumnNameToExprList == null || originColumnNameToExprList.isEmpty()) { - for (Column column : targetTable.getBaseSchema()) { - SlotDescriptor slotDesc = analyzer.getDescTbl().addSlotDescriptor(srcTupleDesc); - slotDesc.setType(ScalarType.createType(PrimitiveType.VARCHAR)); - slotDesc.setIsMaterialized(true); - // ISSUE A: src slot should be nullable even if the column is not nullable. - // because src slot is what we read from file, not represent to real column value. - // If column is not nullable, error will be thrown when filling the dest slot, - // which is not nullable - slotDesc.setIsNullable(true); - slotDescByName.put(column.getName(), slotDesc); - params.addToSrc_slot_ids(slotDesc.getId().asInt()); - } - params.setSrc_tuple_id(srcTupleDesc.getId().asInt()); - return; - } - - // there are columns expr which belong to load - Map columnNameToExpr = Maps.newHashMap(); - context.exprMap = columnNameToExpr; - for (ImportColumnDesc originColumnNameToExpr : originColumnNameToExprList) { - // make column name case match with real column name - String columnName = originColumnNameToExpr.getColumnName(); - Expr columnExpr = originColumnNameToExpr.getExpr(); - String realColName = targetTable.getColumn(columnName) == null ? columnName - : targetTable.getColumn(columnName).getName(); - if (columnExpr != null) { - columnExpr = transformHadoopFunctionExpr(columnName, columnExpr); - columnNameToExpr.put(realColName, columnExpr); - } else { - SlotDescriptor slotDesc = analyzer.getDescTbl().addSlotDescriptor(srcTupleDesc); - slotDesc.setType(ScalarType.createType(PrimitiveType.VARCHAR)); - slotDesc.setIsMaterialized(true); - // same as ISSUE A - slotDesc.setIsNullable(true); - slotDesc.setColumn(new Column(realColName, PrimitiveType.VARCHAR)); - params.addToSrc_slot_ids(slotDesc.getId().asInt()); - slotDescByName.put(realColName, slotDesc); - } - } - // analyze all exprs - for (Map.Entry entry : columnNameToExpr.entrySet()) { - ExprSubstitutionMap smap = new ExprSubstitutionMap(); - List slots = Lists.newArrayList(); - entry.getValue().collect(SlotRef.class, slots); - for (SlotRef slot : slots) { - SlotDescriptor slotDesc = slotDescByName.get(slot.getColumnName()); - if (slotDesc == null) { - throw new UserException("unknown reference column, column=" + entry.getKey() - + ", reference=" + slot.getColumnName()); - } - smap.getLhs().add(slot); - smap.getRhs().add(new SlotRef(slotDesc)); - } - Expr expr = entry.getValue().clone(smap); - expr.analyze(analyzer); - - // check if contain aggregation - List funcs = Lists.newArrayList(); - expr.collect(FunctionCallExpr.class, funcs); - for (FunctionCallExpr fn : funcs) { - if (fn.isAggregateFunction()) { - throw new AnalysisException("Don't support aggregation function in load expression"); - } - } - - columnNameToExpr.put(entry.getKey(), expr); - } - params.setSrc_tuple_id(srcTupleDesc.getId().asInt()); - - } - - /** - * This method is used to transform hadoop function. - * The hadoop function includes: replace_value, strftime, time_format, alignment_timestamp, default_value, now. - * It rewrites those function with real function name and param. - * For the other function, the expr only go through this function and the origin expr is returned. - * @param columnName - * @param originExpr - * @return - * @throws UserException - */ - private Expr transformHadoopFunctionExpr(String columnName, Expr originExpr) throws UserException { - Column column = targetTable.getColumn(columnName); - if (column == null) { - throw new UserException("Unknown column(" + columnName + ")"); - } + context.tupleDescriptor = analyzer.getDescTbl().createTupleDescriptor(); + context.slotDescByName = Maps.newHashMap(); + context.exprMap = Maps.newHashMap(); - // To compatible with older load version - if (originExpr instanceof FunctionCallExpr) { - FunctionCallExpr funcExpr = (FunctionCallExpr) originExpr; - String funcName = funcExpr.getFnName().getFunction(); - - if (funcName.equalsIgnoreCase("replace_value")) { - List exprs = Lists.newArrayList(); - SlotRef slotRef = new SlotRef(null, columnName); - // We will convert this to IF(`col` != child0, `col`, child1), - // because we need the if return type equal to `col`, we use NE - // - exprs.add(new BinaryPredicate(BinaryPredicate.Operator.NE, slotRef, funcExpr.getChild(0))); - exprs.add(slotRef); - if (funcExpr.hasChild(1)) { - exprs.add(funcExpr.getChild(1)); - } else { - if (column.getDefaultValue() != null) { - exprs.add(new StringLiteral(column.getDefaultValue())); - } else { - if (column.isAllowNull()) { - exprs.add(NullLiteral.create(Type.VARCHAR)); - } else { - throw new UserException("Column(" + columnName + ") has no default value."); - } - } - } - FunctionCallExpr newFn = new FunctionCallExpr("if", exprs); - return newFn; - } else if (funcName.equalsIgnoreCase("strftime")) { - FunctionName fromUnixName = new FunctionName("FROM_UNIXTIME"); - List fromUnixArgs = Lists.newArrayList(funcExpr.getChild(1)); - FunctionCallExpr fromUnixFunc = new FunctionCallExpr( - fromUnixName, new FunctionParams(false, fromUnixArgs)); - - return fromUnixFunc; - } else if (funcName.equalsIgnoreCase("time_format")) { - FunctionName strToDateName = new FunctionName("STR_TO_DATE"); - List strToDateExprs = Lists.newArrayList(funcExpr.getChild(2), funcExpr.getChild(1)); - FunctionCallExpr strToDateFuncExpr = new FunctionCallExpr( - strToDateName, new FunctionParams(false, strToDateExprs)); - - FunctionName dateFormatName = new FunctionName("DATE_FORMAT"); - List dateFormatArgs = Lists.newArrayList(strToDateFuncExpr, funcExpr.getChild(0)); - FunctionCallExpr dateFormatFunc = new FunctionCallExpr( - dateFormatName, new FunctionParams(false, dateFormatArgs)); - - return dateFormatFunc; - } else if (funcName.equalsIgnoreCase("alignment_timestamp")) { - FunctionName fromUnixName = new FunctionName("FROM_UNIXTIME"); - List fromUnixArgs = Lists.newArrayList(funcExpr.getChild(1)); - FunctionCallExpr fromUnixFunc = new FunctionCallExpr( - fromUnixName, new FunctionParams(false, fromUnixArgs)); - - StringLiteral precision = (StringLiteral) funcExpr.getChild(0); - StringLiteral format; - if (precision.getStringValue().equalsIgnoreCase("year")) { - format = new StringLiteral("%Y-01-01 00:00:00"); - } else if (precision.getStringValue().equalsIgnoreCase("month")) { - format = new StringLiteral("%Y-%m-01 00:00:00"); - } else if (precision.getStringValue().equalsIgnoreCase("day")) { - format = new StringLiteral("%Y-%m-%d 00:00:00"); - } else if (precision.getStringValue().equalsIgnoreCase("hour")) { - format = new StringLiteral("%Y-%m-%d %H:00:00"); - } else { - throw new UserException("Unknown precision(" + precision.getStringValue() + ")"); - } - FunctionName dateFormatName = new FunctionName("DATE_FORMAT"); - List dateFormatArgs = Lists.newArrayList(fromUnixFunc, format); - FunctionCallExpr dateFormatFunc = new FunctionCallExpr( - dateFormatName, new FunctionParams(false, dateFormatArgs)); - - FunctionName unixTimeName = new FunctionName("UNIX_TIMESTAMP"); - List unixTimeArgs = Lists.newArrayList(); - unixTimeArgs.add(dateFormatFunc); - FunctionCallExpr unixTimeFunc = new FunctionCallExpr( - unixTimeName, new FunctionParams(false, unixTimeArgs)); - - return unixTimeFunc; - } else if (funcName.equalsIgnoreCase("default_value")) { - return funcExpr.getChild(0); - } else if (funcName.equalsIgnoreCase("now")) { - FunctionName nowFunctionName = new FunctionName("NOW"); - FunctionCallExpr newFunc = new FunctionCallExpr(nowFunctionName, new FunctionParams(null)); - return newFunc; - } - } - return originExpr; + Load.initColumns(targetTable, context.fileGroup.getColumnExprList(), + context.fileGroup.getColumnToHadoopFunction(), context.exprMap, analyzer, + context.tupleDescriptor, context.slotDescByName, context.params); } private void finalizeParams(ParamCreateContext context) throws UserException, AnalysisException { @@ -474,6 +296,7 @@ private void finalizeParams(ParamCreateContext context) throws UserException, An context.params.putToExpr_of_dest_slot(destSlotDesc.getId().asInt(), expr.treeToThrift()); } context.params.setDest_sid_to_src_sid_without_trans(destSidToSrcSidWithoutTrans); + context.params.setSrc_tuple_id(context.tupleDescriptor.getId().asInt()); context.params.setDest_tuple_id(desc.getId().asInt()); context.params.setStrict_mode(strictMode); // Need re compute memory layout after set some slot descriptor to nullable diff --git a/fe/src/main/java/org/apache/doris/planner/DataSplitSink.java b/fe/src/main/java/org/apache/doris/planner/DataSplitSink.java index 6bdfb2e81d648f..05e7fb74dd9dc5 100644 --- a/fe/src/main/java/org/apache/doris/planner/DataSplitSink.java +++ b/fe/src/main/java/org/apache/doris/planner/DataSplitSink.java @@ -62,7 +62,7 @@ // This class used to split data read from file to batch @Deprecated public class DataSplitSink extends DataSink { - private static final Logger LOG = LogManager.getLogger(Planner.class); + private static final Logger LOG = LogManager.getLogger(DataSplitSink.class); private final OlapTable targetTable; diff --git a/fe/src/main/java/org/apache/doris/planner/OlapTableSink.java b/fe/src/main/java/org/apache/doris/planner/OlapTableSink.java index c1afb3170919aa..f330b0403ada1d 100644 --- a/fe/src/main/java/org/apache/doris/planner/OlapTableSink.java +++ b/fe/src/main/java/org/apache/doris/planner/OlapTableSink.java @@ -24,6 +24,7 @@ import org.apache.doris.catalog.DistributionInfo; import org.apache.doris.catalog.HashDistributionInfo; import org.apache.doris.catalog.MaterializedIndex; +import org.apache.doris.catalog.MaterializedIndex.IndexExtState; import org.apache.doris.catalog.OlapTable; import org.apache.doris.catalog.Partition; import org.apache.doris.catalog.PartitionKey; @@ -81,11 +82,6 @@ public class OlapTableSink extends DataSink { // set after init called private TDataSink tDataSink; - public OlapTableSink(OlapTable dstTable, TupleDescriptor tupleDescriptor) { - this.dstTable = dstTable; - this.tupleDescriptor = tupleDescriptor; - } - public OlapTableSink(OlapTable dstTable, TupleDescriptor tupleDescriptor, String partitions) { this.dstTable = dstTable; this.tupleDescriptor = tupleDescriptor; @@ -183,8 +179,9 @@ private TOlapTableSchemaParam createSchema(long dbId, OlapTable table) { for (Map.Entry> pair : table.getIndexIdToSchema().entrySet()) { List columns = Lists.newArrayList(); columns.addAll(pair.getValue().stream().map(Column::getName).collect(Collectors.toList())); - schemaParam.addToIndexes(new TOlapTableIndexSchema(pair.getKey(), columns, - table.getSchemaHashByIndexId(pair.getKey()))); + TOlapTableIndexSchema indexSchema = new TOlapTableIndexSchema(pair.getKey(), columns, + table.getSchemaHashByIndexId(pair.getKey())); + schemaParam.addToIndexes(indexSchema); } return schemaParam; } @@ -246,8 +243,8 @@ private TOlapTablePartitionParam createPartition(long dbId, OlapTable table) thr tPartition.addToEnd_keys(range.upperEndpoint().getKeys().get(i).treeToThrift().getNodes().get(0)); } } - - for (MaterializedIndex index : partition.getMaterializedIndices()) { + + for (MaterializedIndex index : partition.getMaterializedIndices(IndexExtState.ALL)) { tPartition.addToIndexes(new TOlapTableIndexTablets(index.getId(), Lists.newArrayList( index.getTablets().stream().map(Tablet::getId).collect(Collectors.toList())))); tPartition.setNum_buckets(index.getTablets().size()); @@ -277,7 +274,7 @@ private TOlapTablePartitionParam createPartition(long dbId, OlapTable table) thr TOlapTablePartition tPartition = new TOlapTablePartition(); tPartition.setId(partition.getId()); // No lowerBound and upperBound for this range - for (MaterializedIndex index : partition.getMaterializedIndices()) { + for (MaterializedIndex index : partition.getMaterializedIndices(IndexExtState.ALL)) { tPartition.addToIndexes(new TOlapTableIndexTablets(index.getId(), Lists.newArrayList( index.getTablets().stream().map(Tablet::getId).collect(Collectors.toList())))); tPartition.setNum_buckets(index.getTablets().size()); @@ -300,7 +297,7 @@ private TOlapTableLocationParam createLocation(OlapTable table) throws UserExcep Multimap allBePathsMap = HashMultimap.create(); for (Partition partition : table.getPartitions()) { int quorum = table.getPartitionInfo().getReplicationNum(partition.getId()) / 2 + 1; - for (MaterializedIndex index : partition.getMaterializedIndices()) { + for (MaterializedIndex index : partition.getMaterializedIndices(IndexExtState.ALL)) { // we should ensure the replica backend is alive // otherwise, there will be a 'unknown node id, id=xxx' error for stream load for (Tablet tablet : index.getTablets()) { diff --git a/fe/src/main/java/org/apache/doris/planner/Planner.java b/fe/src/main/java/org/apache/doris/planner/Planner.java index 857fc9e923c570..3b34fc8989a2bd 100644 --- a/fe/src/main/java/org/apache/doris/planner/Planner.java +++ b/fe/src/main/java/org/apache/doris/planner/Planner.java @@ -145,7 +145,6 @@ public void createPlanFragments(StatementBase statment, Analyzer analyzer, TQuer singleNodePlanner = new SingleNodePlanner(plannerContext); PlanNode singleNodePlan = singleNodePlanner.createSingleNodePlan(); - List resultExprs = queryStmt.getResultExprs(); if (statment instanceof InsertStmt) { InsertStmt insertStmt = (InsertStmt) statment; insertStmt.prepareExpressions(); diff --git a/fe/src/main/java/org/apache/doris/planner/RollupSelector.java b/fe/src/main/java/org/apache/doris/planner/RollupSelector.java index b9abc376f15a4c..610040552efa02 100644 --- a/fe/src/main/java/org/apache/doris/planner/RollupSelector.java +++ b/fe/src/main/java/org/apache/doris/planner/RollupSelector.java @@ -17,22 +17,25 @@ package org.apache.doris.planner; -import com.google.common.base.Preconditions; -import com.google.common.collect.Lists; -import com.google.common.collect.Sets; import org.apache.doris.analysis.Analyzer; +import org.apache.doris.analysis.BinaryPredicate; import org.apache.doris.analysis.CastExpr; import org.apache.doris.analysis.Expr; -import org.apache.doris.analysis.BinaryPredicate; import org.apache.doris.analysis.InPredicate; import org.apache.doris.analysis.SlotDescriptor; import org.apache.doris.analysis.SlotRef; import org.apache.doris.analysis.TupleDescriptor; import org.apache.doris.catalog.Column; import org.apache.doris.catalog.MaterializedIndex; +import org.apache.doris.catalog.MaterializedIndex.IndexExtState; import org.apache.doris.catalog.OlapTable; import org.apache.doris.catalog.Partition; import org.apache.doris.common.UserException; + +import com.google.common.base.Preconditions; +import com.google.common.collect.Lists; +import com.google.common.collect.Sets; + import org.apache.logging.log4j.LogManager; import org.apache.logging.log4j.Logger; @@ -104,9 +107,7 @@ private List selectBestPrefixIndexRollup( outputColumns.add(col.getName()); } - final List rollups = Lists.newArrayList(); - rollups.add(partition.getBaseIndex()); - rollups.addAll(partition.getRollupIndices()); + final List rollups = partition.getMaterializedIndices(IndexExtState.VISIBLE); LOG.debug("num of rollup(base included): {}, pre aggr: {}", rollups.size(), isPreAggregation); // 1. find all rollup indexes which contains all tuple columns diff --git a/fe/src/main/java/org/apache/doris/planner/StreamLoadPlanner.java b/fe/src/main/java/org/apache/doris/planner/StreamLoadPlanner.java index bb6b28de23f543..8d5dfa5cb9082d 100644 --- a/fe/src/main/java/org/apache/doris/planner/StreamLoadPlanner.java +++ b/fe/src/main/java/org/apache/doris/planner/StreamLoadPlanner.java @@ -80,12 +80,17 @@ public StreamLoadPlanner(Database db, OlapTable destTable, StreamLoadTask stream descTable = analyzer.getDescTbl(); } + public OlapTable getDestTable() { + return destTable; + } + // create the plan. the plan's query id and load id are same, using the parameter 'loadId' public TExecPlanFragmentParams plan(TUniqueId loadId) throws UserException { // construct tuple descriptor, used for scanNode and dataSink TupleDescriptor tupleDesc = descTable.createTupleDescriptor("DstTableTuple"); boolean negative = streamLoadTask.getNegative(); - for (Column col : destTable.getBaseSchema()) { + // here we should be full schema to fill the descriptor table + for (Column col : destTable.getFullSchema()) { SlotDescriptor slotDesc = descTable.addSlotDescriptor(tupleDesc); slotDesc.setIsMaterialized(true); slotDesc.setColumn(col); diff --git a/fe/src/main/java/org/apache/doris/planner/StreamLoadScanNode.java b/fe/src/main/java/org/apache/doris/planner/StreamLoadScanNode.java index b012d083e520b9..3a4a6a54128440 100644 --- a/fe/src/main/java/org/apache/doris/planner/StreamLoadScanNode.java +++ b/fe/src/main/java/org/apache/doris/planner/StreamLoadScanNode.java @@ -22,7 +22,6 @@ import org.apache.doris.analysis.Expr; import org.apache.doris.analysis.ExprSubstitutionMap; import org.apache.doris.analysis.FunctionCallExpr; -import org.apache.doris.analysis.ImportColumnDesc; import org.apache.doris.analysis.IntLiteral; import org.apache.doris.analysis.NullLiteral; import org.apache.doris.analysis.SlotDescriptor; @@ -32,11 +31,11 @@ import org.apache.doris.catalog.AggregateType; import org.apache.doris.catalog.Column; import org.apache.doris.catalog.PrimitiveType; -import org.apache.doris.catalog.ScalarType; import org.apache.doris.catalog.Table; import org.apache.doris.catalog.Type; import org.apache.doris.common.AnalysisException; import org.apache.doris.common.UserException; +import org.apache.doris.load.Load; import org.apache.doris.task.StreamLoadTask; import org.apache.doris.thrift.TBrokerRangeDesc; import org.apache.doris.thrift.TBrokerScanNode; @@ -120,72 +119,8 @@ public void init(Analyzer analyzer) throws UserException { TBrokerScanRangeParams params = new TBrokerScanRangeParams(); params.setStrict_mode(streamLoadTask.isStrictMode()); - // parse columns header. this contain map from input column to column of destination table - // columns: k1, k2, v1, v2=k1 + k2 - // this means that there are three columns(k1, k2, v1) in source file, - // and v2 is derived from (k1 + k2) - if (streamLoadTask.getColumnExprDesc() != null && !streamLoadTask.getColumnExprDesc().isEmpty()) { - for (ImportColumnDesc importColumnDesc : streamLoadTask.getColumnExprDesc()) { - // make column name case match with real column name - String columnName = importColumnDesc.getColumnName(); - String realColName = dstTable.getColumn(columnName) == null ? columnName - : dstTable.getColumn(columnName).getName(); - if (importColumnDesc.getExpr() != null) { - exprsByName.put(realColName, importColumnDesc.getExpr()); - } else { - SlotDescriptor slotDesc = analyzer.getDescTbl().addSlotDescriptor(srcTupleDesc); - slotDesc.setType(ScalarType.createType(PrimitiveType.VARCHAR)); - slotDesc.setIsMaterialized(true); - // ISSUE A: src slot should be nullable even if the column is not nullable. - // because src slot is what we read from file, not represent to real column value. - // If column is not nullable, error will be thrown when filling the dest slot, - // which is not nullable - slotDesc.setIsNullable(true); - params.addToSrc_slot_ids(slotDesc.getId().asInt()); - slotDescByName.put(realColName, slotDesc); - } - } - - // analyze all exprs - for (Map.Entry entry : exprsByName.entrySet()) { - ExprSubstitutionMap smap = new ExprSubstitutionMap(); - List slots = Lists.newArrayList(); - entry.getValue().collect(SlotRef.class, slots); - for (SlotRef slot : slots) { - SlotDescriptor slotDesc = slotDescByName.get(slot.getColumnName()); - if (slotDesc == null) { - throw new UserException("unknown reference column, column=" + entry.getKey() - + ", reference=" + slot.getColumnName()); - } - smap.getLhs().add(slot); - smap.getRhs().add(new SlotRef(slotDesc)); - } - Expr expr = entry.getValue().clone(smap); - expr.analyze(analyzer); - - // check if contain aggregation - List funcs = Lists.newArrayList(); - expr.collect(FunctionCallExpr.class, funcs); - for (FunctionCallExpr fn : funcs) { - if (fn.isAggregateFunction()) { - throw new AnalysisException("Don't support aggregation function in load expression"); - } - } - - exprsByName.put(entry.getKey(), expr); - } - } else { - for (Column column : dstTable.getBaseSchema()) { - SlotDescriptor slotDesc = analyzer.getDescTbl().addSlotDescriptor(srcTupleDesc); - slotDesc.setType(ScalarType.createType(PrimitiveType.VARCHAR)); - slotDesc.setIsMaterialized(true); - // same as ISSUE A - slotDesc.setIsNullable(true); - params.addToSrc_slot_ids(slotDesc.getId().asInt()); - - slotDescByName.put(column.getName(), slotDesc); - } - } + Load.initColumns(dstTable, streamLoadTask.getColumnExprDescs(), null /* no hadoop function */, + exprsByName, analyzer, srcTupleDesc, slotDescByName, params); // analyze where statement if (streamLoadTask.getWhereExpr() != null) { diff --git a/fe/src/main/java/org/apache/doris/qe/Coordinator.java b/fe/src/main/java/org/apache/doris/qe/Coordinator.java index c583ad18b77d5f..4b874e53cf0ead 100644 --- a/fe/src/main/java/org/apache/doris/qe/Coordinator.java +++ b/fe/src/main/java/org/apache/doris/qe/Coordinator.java @@ -216,7 +216,7 @@ public Coordinator(ConnectContext context, Analyzer analyzer, Planner planner) { // Used for broker load task/export task coordinator public Coordinator(Long jobId, TUniqueId queryId, DescriptorTable descTable, - List fragments, List scanNodes, String cluster) { + List fragments, List scanNodes, String cluster, String timezone) { this.isBlockQuery = true; this.jobId = jobId; this.queryId = queryId; @@ -226,7 +226,7 @@ public Coordinator(Long jobId, TUniqueId queryId, DescriptorTable descTable, this.queryOptions = new TQueryOptions(); this.queryGlobals.setNow_string(DATE_FORMAT.format(new Date())); this.queryGlobals.setTimestamp_ms(new Date().getTime()); - this.queryGlobals.setTime_zone(TimeUtils.DEFAULT_TIME_ZONE); + this.queryGlobals.setTime_zone(timezone); this.tResourceInfo = new TResourceInfo("", ""); this.needReport = true; this.clusterName = cluster; @@ -676,7 +676,7 @@ private void cancelInternal(PPlanFragmentCancelReason cancelReason) { cancelRemoteFragmentsAsync(cancelReason); if (profileDoneSignal != null) { // count down to zero to notify all objects waiting for this - profileDoneSignal.countDownToZero(); + profileDoneSignal.countDownToZero(new Status()); LOG.info("unfinished instance: {}", profileDoneSignal.getLeftMarks().stream().map(e->DebugUtil.printId(e.getKey())).toArray()); } } diff --git a/fe/src/main/java/org/apache/doris/qe/DdlExecutor.java b/fe/src/main/java/org/apache/doris/qe/DdlExecutor.java index e6364302e63bec..56137aeadcb58c 100644 --- a/fe/src/main/java/org/apache/doris/qe/DdlExecutor.java +++ b/fe/src/main/java/org/apache/doris/qe/DdlExecutor.java @@ -115,7 +115,7 @@ public static void execute(Catalog catalog, DdlStmt ddlStmt, String origStmt) th } jobType = EtlJobType.HADOOP; } - if (loadStmt.getVersion().equals(Load.VERSION) || loadStmt.getBrokerDesc() == null) { + if (loadStmt.getVersion().equals(Load.VERSION) || jobType == EtlJobType.HADOOP) { catalog.getLoadManager().createLoadJobV1FromStmt(loadStmt, jobType, System.currentTimeMillis()); } else { catalog.getLoadManager().createLoadJobFromStmt(loadStmt, origStmt); diff --git a/fe/src/main/java/org/apache/doris/qe/ShowExecutor.java b/fe/src/main/java/org/apache/doris/qe/ShowExecutor.java index 8176ff3dad6e42..3ecd5e71e3ec10 100644 --- a/fe/src/main/java/org/apache/doris/qe/ShowExecutor.java +++ b/fe/src/main/java/org/apache/doris/qe/ShowExecutor.java @@ -70,9 +70,11 @@ import org.apache.doris.catalog.Database; import org.apache.doris.catalog.Function; import org.apache.doris.catalog.MaterializedIndex; +import org.apache.doris.catalog.MaterializedIndex.IndexExtState; import org.apache.doris.catalog.MetadataViewer; import org.apache.doris.catalog.OlapTable; import org.apache.doris.catalog.Partition; +import org.apache.doris.catalog.Replica; import org.apache.doris.catalog.ScalarFunction; import org.apache.doris.catalog.Table; import org.apache.doris.catalog.Tablet; @@ -930,7 +932,7 @@ private void handleShowRoutineLoad() throws AnalysisException { // if the jobName has been specified throw new AnalysisException("There is no job named " + showRoutineLoadStmt.getName() + " in db " + showRoutineLoadStmt.getDbFullName() - + " include history " + showRoutineLoadStmt.isIncludeHistory()); + + ". Include history? " + showRoutineLoadStmt.isIncludeHistory()); } resultSet = new ShowResultSet(showRoutineLoadStmt.getMetaData(), rows); } @@ -1119,6 +1121,21 @@ private void handleShowTablet() throws AnalysisException { isSync = false; break; } + + List replicas = tablet.getReplicas(); + for (Replica replica : replicas) { + Replica tmp = invertedIndex.getReplica(tabletId, replica.getBackendId()); + if (tmp == null) { + isSync = false; + break; + } + // use !=, not equals(), because this should be the same object. + if (tmp != replica) { + isSync = false; + break; + } + } + } finally { db.readUnlock(); } @@ -1180,7 +1197,7 @@ private void handleShowTablet() throws AnalysisException { if (stop) { break; } - for (MaterializedIndex index : partition.getMaterializedIndices()) { + for (MaterializedIndex index : partition.getMaterializedIndices(IndexExtState.ALL)) { if (indexId > -1 && index.getId() != indexId) { continue; } diff --git a/fe/src/main/java/org/apache/doris/service/FrontendServiceImpl.java b/fe/src/main/java/org/apache/doris/service/FrontendServiceImpl.java index 072a5b834386ad..7a82187bda4769 100644 --- a/fe/src/main/java/org/apache/doris/service/FrontendServiceImpl.java +++ b/fe/src/main/java/org/apache/doris/service/FrontendServiceImpl.java @@ -335,6 +335,7 @@ public TFetchResourceResult fetchResource() throws TException { return masterImpl.fetchResource(); } + @Deprecated @Override public TFeResult miniLoad(TMiniLoadRequest request) throws TException { LOG.info("receive mini load request: label: {}, db: {}, tbl: {}, backend: {}", @@ -790,7 +791,7 @@ private TExecPlanFragmentParams streamLoadPutImpl(TStreamLoadPutRequest request) cluster = SystemInfoService.DEFAULT_CLUSTER; } - Catalog catalog = Catalog.getInstance(); + Catalog catalog = Catalog.getCurrentCatalog(); String fullDbName = ClusterNamespace.getFullName(cluster, request.getDb()); Database db = catalog.getDb(fullDbName); if (db == null) { @@ -812,7 +813,15 @@ private TExecPlanFragmentParams streamLoadPutImpl(TStreamLoadPutRequest request) } StreamLoadTask streamLoadTask = StreamLoadTask.fromTStreamLoadPutRequest(request); StreamLoadPlanner planner = new StreamLoadPlanner(db, (OlapTable) table, streamLoadTask); - return planner.plan(streamLoadTask.getId()); + TExecPlanFragmentParams plan = planner.plan(streamLoadTask.getId()); + // add table indexes to transaction state + TransactionState txnState = Catalog.getCurrentGlobalTransactionMgr().getTransactionState(request.getTxnId()); + if (txnState == null) { + throw new UserException("txn does not exist: " + request.getTxnId()); + } + txnState.addTableIndexes((OlapTable) table); + + return plan; } finally { db.readUnlock(); } diff --git a/fe/src/main/java/org/apache/doris/task/AgentBatchTask.java b/fe/src/main/java/org/apache/doris/task/AgentBatchTask.java index 6f09c3016f4099..ea5077b475e6eb 100644 --- a/fe/src/main/java/org/apache/doris/task/AgentBatchTask.java +++ b/fe/src/main/java/org/apache/doris/task/AgentBatchTask.java @@ -24,6 +24,7 @@ import org.apache.doris.thrift.TAgentServiceVersion; import org.apache.doris.thrift.TAgentTaskRequest; import org.apache.doris.thrift.TAlterTabletReq; +import org.apache.doris.thrift.TAlterTabletReqV2; import org.apache.doris.thrift.TCheckConsistencyReq; import org.apache.doris.thrift.TClearAlterTaskRequest; import org.apache.doris.thrift.TClearTransactionTaskRequest; @@ -44,6 +45,8 @@ import org.apache.doris.thrift.TUpdateTabletMetaInfoReq; import org.apache.doris.thrift.TUploadReq; +import com.google.common.collect.Lists; + import org.apache.logging.log4j.LogManager; import org.apache.logging.log4j.Logger; @@ -101,6 +104,47 @@ public int getTaskNum() { return num; } + // return true only if all tasks are finished. + // NOTICE that even if AgentTask.isFinished() return false, it does not mean that task is not finished. + // this depends on caller's logic. See comments on 'isFinished' member. + public boolean isFinished() { + for (List tasks : this.backendIdToTasks.values()) { + for (AgentTask agentTask : tasks) { + if (!agentTask.isFinished()) { + return false; + } + } + } + return true; + } + + // return the limit number of unfinished tasks. + public List getUnfinishedTasks(int limit) { + List res = Lists.newArrayList(); + for (List tasks : this.backendIdToTasks.values()) { + for (AgentTask agentTask : tasks) { + if (!agentTask.isFinished()) { + if (res.size() < limit) { + res.add(agentTask); + } + } + } + } + return res; + } + + public int getFinishedTaskNum() { + int count = 0; + for (List tasks : this.backendIdToTasks.values()) { + for (AgentTask agentTask : tasks) { + if (agentTask.isFinished()) { + count++; + } + } + } + return count; + } + @Override public void run() { for (Long backendId : this.backendIdToTasks.keySet()) { @@ -315,6 +359,14 @@ private TAgentTaskRequest toAgentTaskRequest(AgentTask task) { LOG.debug(request.toString()); } tAgentTaskRequest.setUpdate_tablet_meta_info_req(request); + } + case ALTER: { + AlterReplicaTask createRollupTask = (AlterReplicaTask) task; + TAlterTabletReqV2 request = createRollupTask.toThrift(); + if (LOG.isDebugEnabled()) { + LOG.debug(request.toString()); + } + tAgentTaskRequest.setAlter_tablet_req_v2(request); return tAgentTaskRequest; } default: diff --git a/fe/src/main/java/org/apache/doris/task/AgentTask.java b/fe/src/main/java/org/apache/doris/task/AgentTask.java index 807d859c557d3e..60486e73791d77 100644 --- a/fe/src/main/java/org/apache/doris/task/AgentTask.java +++ b/fe/src/main/java/org/apache/doris/task/AgentTask.java @@ -34,6 +34,10 @@ public abstract class AgentTask { protected TResourceInfo resourceInfo; protected int failedTimes; + // some of process may use this member to check if the task is finished. + // some of are not. + // so whether the task is finished depends on caller's logic, not the value of this member. + protected boolean isFinished = false; public AgentTask(TResourceInfo resourceInfo, long backendId, TTaskType taskType, long dbId, long tableId, long partitionId, long indexId, long tabletId, long signature) { @@ -101,6 +105,14 @@ public int getFailedTimes() { return this.failedTimes; } + public void setFinished(boolean isFinished) { + this.isFinished = isFinished; + } + + public boolean isFinished() { + return isFinished; + } + @Override public String toString() { return "[" + taskType + "], signature: " + signature + ", backendId: " + backendId + ", tablet id: " + tabletId; diff --git a/fe/src/main/java/org/apache/doris/task/AgentTaskQueue.java b/fe/src/main/java/org/apache/doris/task/AgentTaskQueue.java index 38e4fdfab3a8b9..6b701caa36b2c8 100644 --- a/fe/src/main/java/org/apache/doris/task/AgentTaskQueue.java +++ b/fe/src/main/java/org/apache/doris/task/AgentTaskQueue.java @@ -44,6 +44,12 @@ public class AgentTaskQueue { // backend id -> (task type -> (signature -> agent task)) private static Table> tasks = HashBasedTable.create(); private static int taskNum = 0; + + public static synchronized void addBatchTask(AgentBatchTask batchTask) { + for (AgentTask task : batchTask.getAllTasks()) { + addTask(task); + } + } public static synchronized boolean addTask(AgentTask task) { long backendId = task.getBackendId(); @@ -70,6 +76,14 @@ public static synchronized boolean addTask(AgentTask task) { return true; } + // remove all task in AgentBatchTask. + // the caller should make sure all tasks in AgentBatchTask is type of 'type' + public static synchronized void removeBatchTask(AgentBatchTask batchTask, TTaskType type) { + for (AgentTask task : batchTask.getAllTasks()) { + removeTask(task.getBackendId(), type, task.getSignature()); + } + } + public static synchronized void removeTask(long backendId, TTaskType type, long signature) { if (!tasks.contains(backendId, type)) { return; @@ -128,6 +142,15 @@ public static synchronized AgentTask getTask(long backendId, TTaskType type, lon return signatureMap.get(signature); } + // this is just for unit test + public static synchronized List getTask(TTaskType type) { + List res = Lists.newArrayList(); + for (Map agentTasks : tasks.column(TTaskType.ALTER).values()) { + res.addAll(agentTasks.values()); + } + return res; + } + public static synchronized List getDiffTasks(long backendId, Map> runningTasks) { List diffTasks = new ArrayList(); if (!tasks.containsRow(backendId)) { diff --git a/fe/src/main/java/org/apache/doris/task/AlterReplicaTask.java b/fe/src/main/java/org/apache/doris/task/AlterReplicaTask.java new file mode 100644 index 00000000000000..d682383b14c38c --- /dev/null +++ b/fe/src/main/java/org/apache/doris/task/AlterReplicaTask.java @@ -0,0 +1,98 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +package org.apache.doris.task; + +import org.apache.doris.alter.AlterJobV2; +import org.apache.doris.thrift.TAlterTabletReqV2; +import org.apache.doris.thrift.TTaskType; + +/* + * This task is used for alter table process, such as rollup and schema change + * The task will do data transformation from base replica to new replica. + * The new replica should be created before. + * The new replica can be a rollup replica, or a shadow replica of schema change. + */ +public class AlterReplicaTask extends AgentTask { + + private long baseTabletId; + private long newReplicaId; + private int baseSchemaHash; + private int newSchemaHash; + private long version; + private long versionHash; + private long jobId; + private AlterJobV2.JobType jobType; + + public AlterReplicaTask(long backendId, long dbId, long tableId, + long partitionId, long rollupIndexId, long baseIndexId, long rollupTabletId, + long baseTabletId, long newReplicaId, int newSchemaHash, int baseSchemaHash, + long version, long versionHash, long jobId, AlterJobV2.JobType jobType) { + super(null, backendId, TTaskType.ALTER, dbId, tableId, partitionId, rollupIndexId, rollupTabletId); + + this.baseTabletId = baseTabletId; + this.newReplicaId = newReplicaId; + + this.newSchemaHash = newSchemaHash; + this.baseSchemaHash = baseSchemaHash; + + this.version = version; + this.versionHash = versionHash; + this.jobId = jobId; + + this.jobType = jobType; + } + + public long getBaseTabletId() { + return baseTabletId; + } + + public long getNewReplicaId() { + return newReplicaId; + } + + public int getNewSchemaHash() { + return newSchemaHash; + } + + public int getBaseSchemaHash() { + return baseSchemaHash; + } + + public long getVersion() { + return version; + } + + public long getVersionHash() { + return versionHash; + } + + public long getJobId() { + return jobId; + } + + public AlterJobV2.JobType getJobType() { + return jobType; + } + + public TAlterTabletReqV2 toThrift() { + TAlterTabletReqV2 req = new TAlterTabletReqV2(baseTabletId, signature, baseSchemaHash, newSchemaHash); + req.setAlter_version(version); + req.setAlter_version_hash(versionHash); + return req; + } +} diff --git a/fe/src/main/java/org/apache/doris/task/CreateReplicaTask.java b/fe/src/main/java/org/apache/doris/task/CreateReplicaTask.java index 4cb938d698f9db..9da5f4f66af7fb 100644 --- a/fe/src/main/java/org/apache/doris/task/CreateReplicaTask.java +++ b/fe/src/main/java/org/apache/doris/task/CreateReplicaTask.java @@ -17,11 +17,14 @@ package org.apache.doris.task; +import org.apache.doris.alter.SchemaChangeHandler; import org.apache.doris.catalog.Column; import org.apache.doris.catalog.KeysType; import org.apache.doris.common.MarkedCountDownLatch; +import org.apache.doris.common.Status; import org.apache.doris.thrift.TColumn; import org.apache.doris.thrift.TCreateTabletReq; +import org.apache.doris.thrift.TStatusCode; import org.apache.doris.thrift.TStorageMedium; import org.apache.doris.thrift.TStorageType; import org.apache.doris.thrift.TTabletSchema; @@ -54,15 +57,19 @@ public class CreateReplicaTask extends AgentTask { private double bfFpp; // used for synchronous process - private MarkedCountDownLatch latch; + private MarkedCountDownLatch latch; private boolean inRestoreMode = false; + // if base tablet id is set, BE will create the replica on same disk as this base tablet + private long baseTabletId = -1; + private int baseSchemaHash = -1; + public CreateReplicaTask(long backendId, long dbId, long tableId, long partitionId, long indexId, long tabletId, short shortKeyColumnCount, int schemaHash, long version, long versionHash, KeysType keysType, TStorageType storageType, TStorageMedium storageMedium, List columns, - Set bfColumns, double bfFpp, MarkedCountDownLatch latch) { + Set bfColumns, double bfFpp, MarkedCountDownLatch latch) { super(null, backendId, TTaskType.CREATE, dbId, tableId, partitionId, indexId, tabletId); this.shortKeyColumnCount = shortKeyColumnCount; @@ -92,7 +99,15 @@ public void countDownLatch(long backendId, long tabletId) { } } - public void setLatch(MarkedCountDownLatch latch) { + // call this always means one of tasks is failed. count down to zero to finish entire task + public void countDownToZero(String errMsg) { + if (this.latch != null) { + latch.countDownToZero(new Status(TStatusCode.CANCELLED, errMsg)); + LOG.debug("CreateReplicaTask download to zero. error msg: {}", errMsg); + } + } + + public void setLatch(MarkedCountDownLatch latch) { this.latch = latch; } @@ -100,6 +115,11 @@ public void setInRestoreMode(boolean inRestoreMode) { this.inRestoreMode = inRestoreMode; } + public void setBaseTablet(long baseTabletId, int baseSchemaHash) { + this.baseTabletId = baseTabletId; + this.baseSchemaHash = baseSchemaHash; + } + public TCreateTabletReq toThrift() { TCreateTabletReq createTabletReq = new TCreateTabletReq(); createTabletReq.setTablet_id(tabletId); @@ -117,6 +137,11 @@ public TCreateTabletReq toThrift() { if (bfColumns != null && bfColumns.contains(column.getName())) { tColumn.setIs_bloom_filter_column(true); } + // when doing schema change, some modified column has a prefix in name. + // this prefix is only used in FE, not visible to BE, so we should remove this prefix. + if(column.getName().startsWith(SchemaChangeHandler.SHADOW_NAME_PRFIX)) { + tColumn.setColumn_name(column.getName().substring(SchemaChangeHandler.SHADOW_NAME_PRFIX.length())); + } tColumns.add(tColumn); } tSchema.setColumns(tColumns); @@ -136,6 +161,11 @@ public TCreateTabletReq toThrift() { createTabletReq.setTable_id(tableId); createTabletReq.setPartition_id(partitionId); + if (baseTabletId != -1) { + createTabletReq.setBase_tablet_id(baseTabletId); + createTabletReq.setBase_schema_hash(baseSchemaHash); + } + return createTabletReq; } } diff --git a/fe/src/main/java/org/apache/doris/task/HadoopLoadPendingTask.java b/fe/src/main/java/org/apache/doris/task/HadoopLoadPendingTask.java index 4d8334b08020ef..8c22d9d51d8778 100644 --- a/fe/src/main/java/org/apache/doris/task/HadoopLoadPendingTask.java +++ b/fe/src/main/java/org/apache/doris/task/HadoopLoadPendingTask.java @@ -19,6 +19,7 @@ import org.apache.doris.analysis.LiteralExpr; import org.apache.doris.catalog.AggregateType; +import org.apache.doris.catalog.Catalog; import org.apache.doris.catalog.Column; import org.apache.doris.catalog.DistributionInfo; import org.apache.doris.catalog.HashDistributionInfo; @@ -41,6 +42,7 @@ import org.apache.doris.load.Source; import org.apache.doris.load.TableLoadInfo; import org.apache.doris.thrift.TStatusCode; +import org.apache.doris.transaction.TransactionState; import com.google.common.base.Function; import com.google.common.base.Preconditions; @@ -67,7 +69,6 @@ public HadoopLoadPendingTask(LoadJob job) { @Override protected void createEtlRequest() throws Exception { - // yiguolei: add a db read lock here? because the schema maybe changed during create etl task db.readLock(); try { EtlTaskConf taskConf = new EtlTaskConf(); @@ -88,6 +89,19 @@ protected void createEtlRequest() throws Exception { etlTaskConf = taskConf.toDppTaskConf(); Preconditions.checkNotNull(etlTaskConf); + + // add table indexes to transaction state + TransactionState txnState = Catalog.getCurrentGlobalTransactionMgr().getTransactionState(job.getTransactionId()); + if (txnState == null) { + throw new LoadException("txn does not exist: " + job.getTransactionId()); + } + for (long tableId : job.getIdToTableLoadInfo().keySet()) { + OlapTable table = (OlapTable) db.getTable(tableId); + if (table == null) { + throw new LoadException("table does not exist. id: " + tableId); + } + txnState.addTableIndexes(table); + } } finally { db.readUnlock(); } @@ -204,7 +218,7 @@ private Map createEtlIndices(OlapTable table, long partitionId } else { aggregation = aggregateType.name(); } - } else if ("UNIQUE_KEYS" == table.getKeysType().name()) { + } else if (table.getKeysType().name().equalsIgnoreCase("UNIQUE_KEYS")) { aggregation = "REPLACE"; } dppColumn.put("aggregation_method", aggregation); diff --git a/fe/src/main/java/org/apache/doris/task/LoadEtlTask.java b/fe/src/main/java/org/apache/doris/task/LoadEtlTask.java index 236775cde2d56a..4f37819c4800b1 100644 --- a/fe/src/main/java/org/apache/doris/task/LoadEtlTask.java +++ b/fe/src/main/java/org/apache/doris/task/LoadEtlTask.java @@ -22,6 +22,7 @@ import org.apache.doris.catalog.DistributionInfo; import org.apache.doris.catalog.DistributionInfo.DistributionInfoType; import org.apache.doris.catalog.MaterializedIndex; +import org.apache.doris.catalog.MaterializedIndex.IndexExtState; import org.apache.doris.catalog.OlapTable; import org.apache.doris.catalog.Partition; import org.apache.doris.catalog.Tablet; @@ -264,7 +265,7 @@ protected Map getTabletLoadInfos(Map> fileStatusesList, int filesAdded) // Generate tuple descriptor List slotRefs = Lists.newArrayList(); TupleDescriptor tupleDesc = descTable.createTupleDescriptor(); - for (Column col : table.getBaseSchema()) { + for (Column col : table.getFullSchema()) { SlotDescriptor slotDesc = descTable.addSlotDescriptor(tupleDesc); slotDesc.setIsMaterialized(true); slotDesc.setColumn(col); diff --git a/fe/src/main/java/org/apache/doris/task/StreamLoadTask.java b/fe/src/main/java/org/apache/doris/task/StreamLoadTask.java index 6feb27e417c5f2..3ac9722c444ccc 100644 --- a/fe/src/main/java/org/apache/doris/task/StreamLoadTask.java +++ b/fe/src/main/java/org/apache/doris/task/StreamLoadTask.java @@ -34,6 +34,7 @@ import org.apache.doris.thrift.TUniqueId; import com.google.common.base.Joiner; +import com.google.common.collect.Lists; import org.apache.logging.log4j.LogManager; import org.apache.logging.log4j.Logger; @@ -51,7 +52,7 @@ public class StreamLoadTask { private TFileFormatType formatType; // optional - private List columnExprDesc; + private List columnExprDescs = Lists.newArrayList(); private Expr whereExpr; private ColumnSeparator columnSeparator; private String partitions; @@ -83,8 +84,8 @@ public TFileFormatType getFormatType() { return formatType; } - public List getColumnExprDesc() { - return columnExprDesc; + public List getColumnExprDescs() { + return columnExprDescs; } public Expr getWhereExpr() { @@ -162,13 +163,18 @@ public static StreamLoadTask fromRoutineLoadJob(RoutineLoadJob routineLoadJob) { } private void setOptionalFromRoutineLoadJob(RoutineLoadJob routineLoadJob) { - columnExprDesc = routineLoadJob.getColumnDescs(); + // copy the columnExprDescs, cause it may be changed when planning. + // so we keep the columnExprDescs in routine load job as origin. + if (routineLoadJob.getColumnDescs() != null) { + columnExprDescs = Lists.newArrayList(routineLoadJob.getColumnDescs()); + } whereExpr = routineLoadJob.getWhereExpr(); columnSeparator = routineLoadJob.getColumnSeparator(); partitions = routineLoadJob.getPartitions() == null ? null : Joiner.on(",").join(routineLoadJob.getPartitions()); strictMode = routineLoadJob.isStrictMode(); } + // used for stream load private void setColumnToColumnExpr(String columns) throws UserException { String columnsSQL = new String("COLUMNS (" + columns + ")"); SqlParser parser = new SqlParser(new SqlScanner(new StringReader(columnsSQL))); @@ -193,7 +199,7 @@ private void setColumnToColumnExpr(String columns) throws UserException { } if (columnsStmt.getColumns() != null && !columnsStmt.getColumns().isEmpty()) { - columnExprDesc = columnsStmt.getColumns(); + columnExprDescs = columnsStmt.getColumns(); } } diff --git a/fe/src/main/java/org/apache/doris/transaction/GlobalTransactionMgr.java b/fe/src/main/java/org/apache/doris/transaction/GlobalTransactionMgr.java index 4c358dbb2f0de9..c83aa89a74468c 100644 --- a/fe/src/main/java/org/apache/doris/transaction/GlobalTransactionMgr.java +++ b/fe/src/main/java/org/apache/doris/transaction/GlobalTransactionMgr.java @@ -17,11 +17,10 @@ package org.apache.doris.transaction; -import org.apache.doris.alter.RollupJob; import org.apache.doris.catalog.Catalog; import org.apache.doris.catalog.Database; import org.apache.doris.catalog.MaterializedIndex; -import org.apache.doris.catalog.MaterializedIndex.IndexState; +import org.apache.doris.catalog.MaterializedIndex.IndexExtState; import org.apache.doris.catalog.OlapTable; import org.apache.doris.catalog.OlapTable.OlapTableState; import org.apache.doris.catalog.Partition; @@ -329,15 +328,20 @@ public void commitTransaction(long dbId, long transactionId, List allIndices = new ArrayList<>(); - allIndices.addAll(partition.getMaterializedIndices()); - MaterializedIndex rollingUpIndex = null; - RollupJob rollupJob = null; - if (table.getState() == OlapTableState.ROLLUP) { - rollupJob = (RollupJob) catalog.getRollupHandler().getAlterJob(tableId); - rollingUpIndex = rollupJob.getRollupIndex(partition.getId()); + + List allIndices = null; + if (transactionState.getLoadedTblIndexes().isEmpty()) { + allIndices = partition.getMaterializedIndices(IndexExtState.ALL); + } else { + allIndices = Lists.newArrayList(); + for (long indexId : transactionState.getLoadedTblIndexes().get(tableId)) { + MaterializedIndex index = partition.getIndex(indexId); + if (index != null) { + allIndices.add(index); + } + } } - + if (table.getState() == OlapTableState.ROLLUP || table.getState() == OlapTableState.SCHEMA_CHANGE) { /* * This is just a optimization that do our best to not let publish version tasks @@ -355,15 +359,7 @@ public void commitTransaction(long dbId, long transactionId, List errorReplicaIds) thr continue; } int quorumReplicaNum = partitionInfo.getReplicationNum(partitionId) / 2 + 1; - MaterializedIndex baseIndex = partition.getBaseIndex(); - MaterializedIndex rollingUpIndex = null; - RollupJob rollupJob = null; - if (table.getState() == OlapTableState.ROLLUP) { - rollupJob = (RollupJob) catalog.getRollupHandler().getAlterJob(tableId); - rollingUpIndex = rollupJob.getRollupIndex(partitionId); - } - List allInices = new ArrayList<>(); - allInices.addAll(partition.getMaterializedIndices()); - if (rollingUpIndex != null) { - allInices.add(rollingUpIndex); + + List allIndices = null; + if (transactionState.getLoadedTblIndexes().isEmpty()) { + allIndices = partition.getMaterializedIndices(IndexExtState.ALL); + } else { + allIndices = Lists.newArrayList(); + for (long indexId : transactionState.getLoadedTblIndexes().get(tableId)) { + MaterializedIndex index = partition.getIndex(indexId); + if (index != null) { + allIndices.add(index); + } + } } - for (MaterializedIndex index : allInices) { + + for (MaterializedIndex index : allIndices) { for (Tablet tablet : index.getTablets()) { int healthReplicaNum = 0; for (Replica replica : tablet.getReplicas()) { @@ -688,7 +676,7 @@ public void finishTransaction(long transactionId, Set errorReplicaIds) thr // it is healthy in the past and does not have error in current load if (replica.checkVersionCatchUp(partition.getVisibleVersion(), - partition.getVisibleVersionHash())) { + partition.getVisibleVersionHash(), true)) { // during rollup, the rollup replica's last failed version < 0, // it may be treated as a normal replica. // the replica is not failed during commit or publish @@ -726,17 +714,9 @@ public void finishTransaction(long transactionId, Set errorReplicaIds) thr errorReplicaIds.remove(replica.getId()); ++healthReplicaNum; } - if (replica.getLastFailedVersion() > 0) { - // if this error replica is a base replica and it is under rollup - // then remove the rollup task and rollup job will remove the rollup replica automatically - if (index.getId() == baseIndex.getId() && rollupJob != null) { - LOG.info("base replica [{}] has errors during load, remove rollup task on related replica", replica); - rollupJob.removeReplicaRelatedTask(partition.getId(), - tablet.getId(), replica.getId(), replica.getBackendId()); - } - } } - if (index.getState() != IndexState.ROLLUP && healthReplicaNum < quorumReplicaNum) { + + if (healthReplicaNum < quorumReplicaNum) { LOG.info("publish version failed for transaction {} on tablet {}, with only {} replicas less than quorum {}", transactionState, tablet, healthReplicaNum, quorumReplicaNum); hasError = true; @@ -778,8 +758,8 @@ public boolean isPreviousTransactionsFinished(long endTransactionId, long dbId) continue; } if (entry.getKey() <= endTransactionId) { - LOG.debug("find a running txn with txn_id={}, less than schema change txn_id {}", - entry.getKey(), endTransactionId); + LOG.info("find a running txn with txn_id={} on db: {}, less than watermark txn_id {}", + entry.getKey(), dbId, endTransactionId); return false; } } @@ -833,6 +813,7 @@ public void removeOldTransactions() { } // 3. use dbIdToTxnIds to remove old transactions, without holding load locks again + List abortedTxns = Lists.newArrayList(); writeLock(); try { List transactionsToDelete = Lists.newArrayList(); @@ -859,6 +840,7 @@ public void removeOldTransactions() { transactionState.setFinishTime(System.currentTimeMillis()); transactionState.setReason("transaction is timeout and is cancelled automatically"); unprotectUpsertTransactionState(transactionState); + abortedTxns.add(transactionState); } } } @@ -871,10 +853,19 @@ public void removeOldTransactions() { } finally { writeUnlock(); } + + for (TransactionState abortedTxn : abortedTxns) { + try { + abortedTxn.afterStateTransform(TransactionStatus.ABORTED, true, abortedTxn.getReason()); + } catch (UserException e) { + // just print a log, it does not matter. + LOG.warn("after abort timeout txn failed. txn id: {}", abortedTxn.getTransactionId(), e); + } + } } private boolean checkTxnHasRelatedJob(TransactionState txnState, Map> dbIdToTxnIds) { - // TODO: put checkTxnHasRelaredJob into Load + // TODO: put checkTxnHasRelatedJob into Load Set txnIds = dbIdToTxnIds.get(txnState.getDbId()); if (txnIds == null) { // We can't find the related load job of this database. @@ -1030,18 +1021,7 @@ private void updateCatalogAfterCommitted(TransactionState transactionState, Data for (PartitionCommitInfo partitionCommitInfo : tableCommitInfo.getIdToPartitionCommitInfo().values()) { long partitionId = partitionCommitInfo.getPartitionId(); Partition partition = table.getPartition(partitionId); - List allIndices = new ArrayList<>(); - allIndices.addAll(partition.getMaterializedIndices()); - MaterializedIndex baseIndex = partition.getBaseIndex(); - MaterializedIndex rollingUpIndex = null; - RollupJob rollupJob = null; - if (table.getState() == OlapTableState.ROLLUP) { - rollupJob = (RollupJob) catalog.getRollupHandler().getAlterJob(tableId); - rollingUpIndex = rollupJob.getRollupIndex(partition.getId()); - } - if (rollingUpIndex != null) { - allIndices.add(rollingUpIndex); - } + List allIndices = partition.getMaterializedIndices(IndexExtState.ALL); for (MaterializedIndex index : allIndices) { List tablets = index.getTablets(); for (Tablet tablet : tablets) { @@ -1051,14 +1031,6 @@ private void updateCatalogAfterCommitted(TransactionState transactionState, Data // should get from transaction state replica.updateLastFailedVersion(partitionCommitInfo.getVersion(), partitionCommitInfo.getVersionHash()); - // if this error replica is a base replica and it is under rollup - // then remove the rollup task and rollup job will remove the rollup replica automatically - if (index.getId() == baseIndex.getId() && rollupJob != null) { - LOG.debug("the base replica [{}] has error, remove the related rollup replica from rollupjob [{}]", - replica, rollupJob); - rollupJob.removeReplicaRelatedTask(partition.getId(), - tablet.getId(), replica.getId(), replica.getBackendId()); - } } } } @@ -1086,18 +1058,7 @@ private boolean updateCatalogAfterVisible(TransactionState transactionState, Dat long newCommitVersion = partitionCommitInfo.getVersion(); long newCommitVersionHash = partitionCommitInfo.getVersionHash(); Partition partition = table.getPartition(partitionId); - MaterializedIndex baseIndex = partition.getBaseIndex(); - MaterializedIndex rollingUpIndex = null; - RollupJob rollupJob = null; - if (table.getState() == OlapTableState.ROLLUP) { - rollupJob = (RollupJob) catalog.getRollupHandler().getAlterJob(tableId); - rollingUpIndex = rollupJob.getRollupIndex(partitionId); - } - List allIndices = new ArrayList<>(); - allIndices.addAll(partition.getMaterializedIndices()); - if (rollingUpIndex != null) { - allIndices.add(rollingUpIndex); - } + List allIndices = partition.getMaterializedIndices(IndexExtState.ALL); for (MaterializedIndex index : allIndices) { for (Tablet tablet : index.getTablets()) { for (Replica replica : tablet.getReplicas()) { @@ -1113,7 +1074,7 @@ private boolean updateCatalogAfterVisible(TransactionState transactionState, Dat newVersion = replica.getVersion(); newVersionHash = replica.getVersionHash(); } else if (!replica.checkVersionCatchUp(partition.getVisibleVersion(), - partition.getVisibleVersionHash())) { + partition.getVisibleVersionHash(), true)) { // this means the replica has error in the past, but we did not observe it // during upgrade, one job maybe in quorum finished state, for example, A,B,C 3 replica // A,B 's version is 10, C's version is 10 but C' 10 is abnormal should be rollback @@ -1142,15 +1103,6 @@ private boolean updateCatalogAfterVisible(TransactionState transactionState, Dat } } replica.updateVersionInfo(newVersion, newVersionHash, lastFailedVersion, lastFailedVersionHash, lastSucessVersion, lastSuccessVersionHash); - // if this error replica is a base replica and it is under rollup - // then remove the rollup task and rollup job will remove the rollup replica automatically - if (index.getId() == baseIndex.getId() - && replica.getLastFailedVersion() > 0 - && rollupJob != null) { - LOG.debug("base replica [{}] has errors during load, remove rollup task on related replica", replica); - rollupJob.removeReplicaRelatedTask(partition.getId(), - tablet.getId(), replica.getId(), replica.getBackendId()); - } } } } // end for indices diff --git a/fe/src/main/java/org/apache/doris/transaction/PublishVersionDaemon.java b/fe/src/main/java/org/apache/doris/transaction/PublishVersionDaemon.java index 129c03948bd531..1e4c40799205b4 100644 --- a/fe/src/main/java/org/apache/doris/transaction/PublishVersionDaemon.java +++ b/fe/src/main/java/org/apache/doris/transaction/PublishVersionDaemon.java @@ -40,7 +40,6 @@ import java.util.List; import java.util.Map; import java.util.Set; -import java.util.stream.Collectors; public class PublishVersionDaemon extends Daemon { @@ -134,7 +133,7 @@ private void publishVersion() throws UserException { continue; } Map transTasks = transactionState.getPublishVersionTasks(); - Set transErrorReplicas = Sets.newHashSet(); + Set publishErrorReplicaIds = Sets.newHashSet(); List unfinishedTasks = Lists.newArrayList(); for (PublishVersionTask publishVersionTask : transTasks.values()) { if (publishVersionTask.isFinished()) { @@ -153,7 +152,7 @@ private void publishVersion() throws UserException { } Replica replica = tabletInvertedIndex.getReplica(tabletId, publishVersionTask.getBackendId()); if (replica != null) { - transErrorReplicas.add(replica); + publishErrorReplicaIds.add(replica.getId()); } else { LOG.info("could not find related replica with tabletid={}, backendid={}", tabletId, publishVersionTask.getBackendId()); @@ -189,7 +188,7 @@ private void publishVersion() throws UserException { Replica replica = tabletInvertedIndex.getReplica(tabletId, unfinishedTask.getBackendId()); if (replica != null) { - transErrorReplicas.add(replica); + publishErrorReplicaIds.add(replica.getId()); } else { LOG.info("could not find related replica with tabletid={}, backendid={}", tabletId, unfinishedTask.getBackendId()); @@ -207,14 +206,13 @@ private void publishVersion() throws UserException { } if (shouldFinishTxn) { - Set allErrorReplicas = transErrorReplicas.stream().map(v -> v.getId()).collect(Collectors.toSet()); - globalTransactionMgr.finishTransaction(transactionState.getTransactionId(), allErrorReplicas); + globalTransactionMgr.finishTransaction(transactionState.getTransactionId(), publishErrorReplicaIds); if (transactionState.getTransactionStatus() != TransactionStatus.VISIBLE) { // if finish transaction state failed, then update publish version time, should check // to finish after some interval transactionState.updateSendTaskTime(); LOG.debug("publish version for transation {} failed, has {} error replicas during publish", - transactionState, transErrorReplicas.size()); + transactionState, publishErrorReplicaIds.size()); } } diff --git a/fe/src/main/java/org/apache/doris/transaction/TransactionState.java b/fe/src/main/java/org/apache/doris/transaction/TransactionState.java index b44a81a1142f90..82ae27574a3072 100644 --- a/fe/src/main/java/org/apache/doris/transaction/TransactionState.java +++ b/fe/src/main/java/org/apache/doris/transaction/TransactionState.java @@ -18,6 +18,7 @@ package org.apache.doris.transaction; import org.apache.doris.catalog.Catalog; +import org.apache.doris.catalog.OlapTable; import org.apache.doris.common.Config; import org.apache.doris.common.FeMetaVersion; import org.apache.doris.common.UserException; @@ -141,6 +142,11 @@ public String toString() { // optional private TxnCommitAttachment txnCommitAttachment; + // this map should be set when load execution begin, so that when the txn commit, it will know + // which tables and rollups it loaded. + // tbl id -> (index ids) + private Map> loadedTblIndexes = Maps.newHashMap(); + private String errorLogUrl = null; public TransactionState() { @@ -408,6 +414,28 @@ public void setTxnCommitAttachment(TxnCommitAttachment txnCommitAttachment) { this.txnCommitAttachment = txnCommitAttachment; } + /* + * Add related table indexes to the transaction. + * If function should always be called before adding this transaction state to transaction manager, + * No other thread will access this state. So no need to lock + */ + public void addTableIndexes(OlapTable table) { + Set indexIds = loadedTblIndexes.get(table.getId()); + if (indexIds == null) { + indexIds = Sets.newHashSet(); + loadedTblIndexes.put(table.getId(), indexIds); + } + // always rewrite the index ids + indexIds.clear(); + for (Long indexId : table.getIndexIdToSchema().keySet()) { + indexIds.add(indexId); + } + } + + public Map> getLoadedTblIndexes() { + return loadedTblIndexes; + } + @Override public String toString() { StringBuilder sb = new StringBuilder("TransactionState. "); diff --git a/fe/src/test/java/org/apache/doris/alter/RollupJobTest.java b/fe/src/test/java/org/apache/doris/alter/RollupJobTest.java deleted file mode 100644 index 87b49af93f8ac2..00000000000000 --- a/fe/src/test/java/org/apache/doris/alter/RollupJobTest.java +++ /dev/null @@ -1,351 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, -// software distributed under the License is distributed on an -// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -// KIND, either express or implied. See the License for the -// specific language governing permissions and limitations -// under the License. - -package org.apache.doris.alter; - -import static org.junit.Assert.assertEquals; - -import org.apache.doris.alter.AlterJob.JobState; -import org.apache.doris.analysis.AccessTestUtil; -import org.apache.doris.analysis.AddRollupClause; -import org.apache.doris.analysis.AlterClause; -import org.apache.doris.analysis.Analyzer; -import org.apache.doris.catalog.Catalog; -import org.apache.doris.catalog.CatalogTestUtil; -import org.apache.doris.catalog.Database; -import org.apache.doris.catalog.FakeCatalog; -import org.apache.doris.catalog.FakeEditLog; -import org.apache.doris.catalog.MaterializedIndex; -import org.apache.doris.catalog.MaterializedIndex.IndexState; -import org.apache.doris.catalog.OlapTable; -import org.apache.doris.catalog.OlapTable.OlapTableState; -import org.apache.doris.catalog.Partition; -import org.apache.doris.catalog.Partition.PartitionState; -import org.apache.doris.catalog.Replica; -import org.apache.doris.catalog.Tablet; -import org.apache.doris.common.AnalysisException; -import org.apache.doris.common.Config; -import org.apache.doris.common.FeMetaVersion; -import org.apache.doris.meta.MetaContext; -import org.apache.doris.task.AgentTask; -import org.apache.doris.task.AgentTaskQueue; -import org.apache.doris.thrift.TTabletInfo; -import org.apache.doris.thrift.TTaskType; -import org.apache.doris.transaction.FakeTransactionIDGenerator; -import org.apache.doris.transaction.GlobalTransactionMgr; -import org.apache.doris.transaction.TabletCommitInfo; -import org.apache.doris.transaction.TransactionState; -import org.apache.doris.transaction.TransactionState.LoadJobSourceType; -import org.apache.doris.transaction.TransactionStatus; - -import com.google.common.collect.Lists; -import com.google.common.collect.Sets; - -import org.junit.Assert; -import org.junit.Before; -import org.junit.Test; - -import java.lang.reflect.InvocationTargetException; -import java.util.ArrayList; -import java.util.List; -import java.util.Set; - -import mockit.internal.startup.Startup; - -public class RollupJobTest { - - private static FakeEditLog fakeEditLog; - private static FakeCatalog fakeCatalog; - private static FakeTransactionIDGenerator fakeTransactionIDGenerator; - private static GlobalTransactionMgr masterTransMgr; - private static GlobalTransactionMgr slaveTransMgr; - private static Catalog masterCatalog; - private static Catalog slaveCatalog; - - private String transactionSource = "localfe"; - private static Analyzer analyzer; - private static AddRollupClause clause; - - static { - Startup.initializeIfPossible(); - } - - @Before - public void setUp() throws InstantiationException, IllegalAccessException, IllegalArgumentException, - InvocationTargetException, NoSuchMethodException, SecurityException, AnalysisException { - fakeEditLog = new FakeEditLog(); - fakeCatalog = new FakeCatalog(); - fakeTransactionIDGenerator = new FakeTransactionIDGenerator(); - masterCatalog = CatalogTestUtil.createTestCatalog(); - slaveCatalog = CatalogTestUtil.createTestCatalog(); - MetaContext metaContext = new MetaContext(); - metaContext.setMetaVersion(FeMetaVersion.VERSION_40); - metaContext.setThreadLocalInfo(); - // masterCatalog.setJournalVersion(FeMetaVersion.VERSION_40); - // slaveCatalog.setJournalVersion(FeMetaVersion.VERSION_40); - masterTransMgr = masterCatalog.getGlobalTransactionMgr(); - masterTransMgr.setEditLog(masterCatalog.getEditLog()); - - slaveTransMgr = slaveCatalog.getGlobalTransactionMgr(); - slaveTransMgr.setEditLog(slaveCatalog.getEditLog()); - analyzer = AccessTestUtil.fetchAdminAnalyzer(false); - clause = new AddRollupClause(CatalogTestUtil.testRollupIndex2, Lists.newArrayList("k1", "v"), null, - CatalogTestUtil.testIndex1, null); - clause.analyze(analyzer); - } - - @Test - public void testAddRollup() throws Exception { - FakeCatalog.setCatalog(masterCatalog); - RollupHandler rollupHandler = Catalog.getInstance().getRollupHandler(); - ArrayList alterClauses = new ArrayList<>(); - alterClauses.add(clause); - Database db = masterCatalog.getDb(CatalogTestUtil.testDbId1); - OlapTable olapTable = (OlapTable) db.getTable(CatalogTestUtil.testTableId1); - rollupHandler.process(alterClauses, db, olapTable, false); - RollupJob rollupJob = (RollupJob) rollupHandler.getAlterJob(CatalogTestUtil.testTableId1); - Assert.assertEquals(CatalogTestUtil.testIndexId1, rollupJob.getBaseIndexId()); - Assert.assertEquals(CatalogTestUtil.testRollupIndex2, rollupJob.getRollupIndexName()); - } - - // start a rollup, then finished - @Test - public void testRollup1() throws Exception { - FakeCatalog.setCatalog(masterCatalog); - RollupHandler rollupHandler = Catalog.getInstance().getRollupHandler(); - - // add a rollup job - ArrayList alterClauses = new ArrayList<>(); - alterClauses.add(clause); - Database db = masterCatalog.getDb(CatalogTestUtil.testDbId1); - OlapTable olapTable = (OlapTable) db.getTable(CatalogTestUtil.testTableId1); - Partition testPartition = olapTable.getPartition(CatalogTestUtil.testPartitionId1); - rollupHandler.process(alterClauses, db, olapTable, false); - RollupJob rollupJob = (RollupJob) rollupHandler.getAlterJob(CatalogTestUtil.testTableId1); - Assert.assertEquals(CatalogTestUtil.testIndexId1, rollupJob.getBaseIndexId()); - Assert.assertEquals(CatalogTestUtil.testRollupIndex2, rollupJob.getRollupIndexName()); - MaterializedIndex rollupIndex = rollupJob.getRollupIndex(CatalogTestUtil.testPartitionId1); - MaterializedIndex baseIndex = testPartition.getBaseIndex(); - assertEquals(IndexState.ROLLUP, rollupIndex.getState()); - assertEquals(IndexState.NORMAL, baseIndex.getState()); - assertEquals(OlapTableState.ROLLUP, olapTable.getState()); - assertEquals(PartitionState.ROLLUP, testPartition.getState()); - Tablet rollupTablet = rollupIndex.getTablets().get(0); - List replicas = rollupTablet.getReplicas(); - Replica rollupReplica1 = replicas.get(0); - Replica rollupReplica2 = replicas.get(1); - Replica rollupReplica3 = replicas.get(2); - - assertEquals(-1, rollupReplica1.getVersion()); - assertEquals(-1, rollupReplica2.getVersion()); - assertEquals(-1, rollupReplica3.getVersion()); - assertEquals(CatalogTestUtil.testStartVersion, rollupReplica1.getLastFailedVersion()); - assertEquals(CatalogTestUtil.testStartVersion, rollupReplica2.getLastFailedVersion()); - assertEquals(CatalogTestUtil.testStartVersion, rollupReplica3.getLastFailedVersion()); - assertEquals(-1, rollupReplica1.getLastSuccessVersion()); - assertEquals(-1, rollupReplica2.getLastSuccessVersion()); - assertEquals(-1, rollupReplica3.getLastSuccessVersion()); - - // rollup handler run one cycle, agent task is generated and send tasks - rollupHandler.runOneCycle(); - AgentTask task1 = AgentTaskQueue.getTask(rollupReplica1.getBackendId(), TTaskType.ROLLUP, rollupTablet.getId()); - AgentTask task2 = AgentTaskQueue.getTask(rollupReplica2.getBackendId(), TTaskType.ROLLUP, rollupTablet.getId()); - AgentTask task3 = AgentTaskQueue.getTask(rollupReplica3.getBackendId(), TTaskType.ROLLUP, rollupTablet.getId()); - - // be report finishe rollup success - TTabletInfo tTabletInfo = new TTabletInfo(rollupTablet.getId(), CatalogTestUtil.testSchemaHash1, - CatalogTestUtil.testStartVersion, CatalogTestUtil.testStartVersionHash, 0, 0); - rollupHandler.handleFinishedReplica(task1, tTabletInfo, -1); - rollupHandler.handleFinishedReplica(task2, tTabletInfo, -1); - rollupHandler.handleFinishedReplica(task3, tTabletInfo, -1); - - // rollup hander run one cycle again, the rollup job is finishing - rollupHandler.runOneCycle(); - Assert.assertEquals(JobState.FINISHING, rollupJob.getState()); - assertEquals(CatalogTestUtil.testStartVersion, rollupReplica1.getVersion()); - assertEquals(CatalogTestUtil.testStartVersion, rollupReplica2.getVersion()); - assertEquals(CatalogTestUtil.testStartVersion, rollupReplica3.getVersion()); - assertEquals(-1, rollupReplica1.getLastFailedVersion()); - assertEquals(-1, rollupReplica2.getLastFailedVersion()); - assertEquals(-1, rollupReplica3.getLastFailedVersion()); - assertEquals(CatalogTestUtil.testStartVersion, rollupReplica1.getLastSuccessVersion()); - assertEquals(CatalogTestUtil.testStartVersion, rollupReplica1.getLastSuccessVersion()); - assertEquals(CatalogTestUtil.testStartVersion, rollupReplica1.getLastSuccessVersion()); - } - - // load some data and one replica has errors - // start a rollup and then load data - // load finished and rollup finished - @Test - public void testRollup2() throws Exception { - FakeCatalog.setCatalog(masterCatalog); - // load one transaction with backend 2 has errors - long transactionId = masterTransMgr.beginTransaction(CatalogTestUtil.testDbId1, - CatalogTestUtil.testTxnLable1, transactionSource, - LoadJobSourceType.FRONTEND, Config.stream_load_default_timeout_second); - // commit a transaction, backend 2 has errors - TabletCommitInfo tabletCommitInfo1 = new TabletCommitInfo(CatalogTestUtil.testTabletId1, - CatalogTestUtil.testBackendId1); - // TabletCommitInfo tabletCommitInfo2 = new - // TabletCommitInfo(CatalogTestUtil.testTabletId1, - // CatalogTestUtil.testBackendId2); - TabletCommitInfo tabletCommitInfo3 = new TabletCommitInfo(CatalogTestUtil.testTabletId1, - CatalogTestUtil.testBackendId3); - List transTablets = Lists.newArrayList(); - transTablets.add(tabletCommitInfo1); - // transTablets.add(tabletCommitInfo2); - transTablets.add(tabletCommitInfo3); - masterTransMgr.commitTransaction(CatalogTestUtil.testDbId1, transactionId, transTablets); - TransactionState transactionState = fakeEditLog.getTransaction(transactionId); - assertEquals(TransactionStatus.COMMITTED, transactionState.getTransactionStatus()); - Set errorReplicaIds = Sets.newHashSet(); - errorReplicaIds.add(CatalogTestUtil.testReplicaId2); - masterTransMgr.finishTransaction(transactionId, errorReplicaIds); - transactionState = fakeEditLog.getTransaction(transactionId); - assertEquals(TransactionStatus.VISIBLE, transactionState.getTransactionStatus()); - - // start a rollup - RollupHandler rollupHandler = Catalog.getInstance().getRollupHandler(); - // add a rollup job - ArrayList alterClauses = new ArrayList<>(); - alterClauses.add(clause); - Database db = masterCatalog.getDb(CatalogTestUtil.testDbId1); - OlapTable olapTable = (OlapTable) db.getTable(CatalogTestUtil.testTableId1); - Partition testPartition = olapTable.getPartition(CatalogTestUtil.testPartitionId1); - rollupHandler.process(alterClauses, db, olapTable, false); - RollupJob rollupJob = (RollupJob) rollupHandler.getAlterJob(CatalogTestUtil.testTableId1); - Assert.assertEquals(CatalogTestUtil.testIndexId1, rollupJob.getBaseIndexId()); - Assert.assertEquals(CatalogTestUtil.testRollupIndex2, rollupJob.getRollupIndexName()); - MaterializedIndex rollupIndex = rollupJob.getRollupIndex(CatalogTestUtil.testPartitionId1); - MaterializedIndex baseIndex = testPartition.getBaseIndex(); - assertEquals(IndexState.ROLLUP, rollupIndex.getState()); - assertEquals(IndexState.NORMAL, baseIndex.getState()); - assertEquals(OlapTableState.ROLLUP, olapTable.getState()); - assertEquals(PartitionState.ROLLUP, testPartition.getState()); - Tablet rollupTablet = rollupIndex.getTablets().get(0); - List replicas = rollupTablet.getReplicas(); - Replica rollupReplica1 = replicas.get(0); - Replica rollupReplica3 = replicas.get(1); - assertEquals(2, rollupTablet.getReplicas().size()); - - assertEquals(-1, rollupReplica1.getVersion()); - assertEquals(-1, rollupReplica3.getVersion()); - assertEquals(CatalogTestUtil.testStartVersion + 1, rollupReplica1.getLastFailedVersion()); - assertEquals(CatalogTestUtil.testStartVersion + 1, rollupReplica3.getLastFailedVersion()); - assertEquals(-1, rollupReplica1.getLastSuccessVersion()); - assertEquals(-1, rollupReplica3.getLastSuccessVersion()); - - // rollup handler run one cycle, agent task is generated and send tasks - rollupHandler.runOneCycle(); - AgentTask task1 = AgentTaskQueue.getTask(rollupReplica1.getBackendId(), TTaskType.ROLLUP, rollupTablet.getId()); - AgentTask task3 = AgentTaskQueue.getTask(rollupReplica3.getBackendId(), TTaskType.ROLLUP, rollupTablet.getId()); - - // be report finishe rollup success - TTabletInfo tTabletInfo = new TTabletInfo(rollupTablet.getId(), CatalogTestUtil.testSchemaHash1, - CatalogTestUtil.testStartVersion + 1, CatalogTestUtil.testPartitionNextVersionHash, 0, 0); - rollupHandler.handleFinishedReplica(task1, tTabletInfo, -1); - rollupHandler.handleFinishedReplica(task3, tTabletInfo, -1); - - // rollup hander run one cycle again, the rollup job is finishing - rollupHandler.runOneCycle(); - Assert.assertEquals(JobState.FINISHING, rollupJob.getState()); - assertEquals(CatalogTestUtil.testStartVersion + 1, rollupReplica1.getVersion()); - assertEquals(CatalogTestUtil.testStartVersion + 1, rollupReplica3.getVersion()); - assertEquals(-1, rollupReplica1.getLastFailedVersion()); - assertEquals(-1, rollupReplica3.getLastFailedVersion()); - assertEquals(CatalogTestUtil.testStartVersion + 1, rollupReplica1.getLastSuccessVersion()); - assertEquals(CatalogTestUtil.testStartVersion + 1, rollupReplica3.getLastSuccessVersion()); - } - - // start a rollup and then load data - // but load to rolluping index failed, then rollup is cancelled - @Test - public void testRollup3() throws Exception { - FakeCatalog.setCatalog(masterCatalog); - RollupHandler rollupHandler = Catalog.getInstance().getRollupHandler(); - - // add a rollup job - ArrayList alterClauses = new ArrayList<>(); - alterClauses.add(clause); - Database db = masterCatalog.getDb(CatalogTestUtil.testDbId1); - OlapTable olapTable = (OlapTable) db.getTable(CatalogTestUtil.testTableId1); - Partition testPartition = olapTable.getPartition(CatalogTestUtil.testPartitionId1); - rollupHandler.process(alterClauses, db, olapTable, false); - RollupJob rollupJob = (RollupJob) rollupHandler.getAlterJob(CatalogTestUtil.testTableId1); - Assert.assertEquals(CatalogTestUtil.testIndexId1, rollupJob.getBaseIndexId()); - Assert.assertEquals(CatalogTestUtil.testRollupIndex2, rollupJob.getRollupIndexName()); - MaterializedIndex rollupIndex = rollupJob.getRollupIndex(CatalogTestUtil.testPartitionId1); - MaterializedIndex baseIndex = testPartition.getBaseIndex(); - assertEquals(IndexState.ROLLUP, rollupIndex.getState()); - assertEquals(IndexState.NORMAL, baseIndex.getState()); - assertEquals(OlapTableState.ROLLUP, olapTable.getState()); - assertEquals(PartitionState.ROLLUP, testPartition.getState()); - Tablet rollupTablet = rollupIndex.getTablets().get(0); - List replicas = rollupTablet.getReplicas(); - Replica rollupReplica1 = replicas.get(0); - Replica rollupReplica2 = replicas.get(1); - Replica rollupReplica3 = replicas.get(2); - - // rollup handler run one cycle, agent task is generated and send tasks - rollupHandler.runOneCycle(); - AgentTask task1 = AgentTaskQueue.getTask(rollupReplica1.getBackendId(), TTaskType.ROLLUP, rollupTablet.getId()); - AgentTask task2 = AgentTaskQueue.getTask(rollupReplica2.getBackendId(), TTaskType.ROLLUP, rollupTablet.getId()); - AgentTask task3 = AgentTaskQueue.getTask(rollupReplica3.getBackendId(), TTaskType.ROLLUP, rollupTablet.getId()); - - // load a transaction, but rollup tablet failed, then the rollup job should be - // cancelled - long transactionId = masterTransMgr.beginTransaction(CatalogTestUtil.testDbId1, - CatalogTestUtil.testTxnLable1, - transactionSource, - LoadJobSourceType.FRONTEND, Config.stream_load_default_timeout_second); - // commit a transaction, backend 2 has errors - TabletCommitInfo tabletCommitInfo1 = new TabletCommitInfo(CatalogTestUtil.testTabletId1, - CatalogTestUtil.testBackendId1); - TabletCommitInfo tabletCommitInfo2 = new TabletCommitInfo(CatalogTestUtil.testTabletId1, - CatalogTestUtil.testBackendId2); - TabletCommitInfo tabletCommitInfo3 = new TabletCommitInfo(CatalogTestUtil.testTabletId1, - CatalogTestUtil.testBackendId3); - List transTablets = Lists.newArrayList(); - transTablets.add(tabletCommitInfo1); - transTablets.add(tabletCommitInfo2); - transTablets.add(tabletCommitInfo3); - masterTransMgr.commitTransaction(CatalogTestUtil.testDbId1, transactionId, transTablets); - TransactionState transactionState = fakeEditLog.getTransaction(transactionId); - assertEquals(TransactionStatus.COMMITTED, transactionState.getTransactionStatus()); - Set errorReplicaIds = Sets.newHashSet(); - errorReplicaIds.add(CatalogTestUtil.testReplicaId2); - masterTransMgr.finishTransaction(transactionId, errorReplicaIds); - transactionState = fakeEditLog.getTransaction(transactionId); - - // rollup replca's last failed version should change to 13 - assertEquals(CatalogTestUtil.testStartVersion + 1, rollupReplica1.getLastFailedVersion()); - assertEquals(CatalogTestUtil.testStartVersion + 1, rollupReplica2.getLastFailedVersion()); - assertEquals(CatalogTestUtil.testStartVersion + 1, rollupReplica3.getLastFailedVersion()); - - // be report finishe rollup success - TTabletInfo tTabletInfo = new TTabletInfo(rollupTablet.getId(), CatalogTestUtil.testSchemaHash1, - CatalogTestUtil.testStartVersion, CatalogTestUtil.testStartVersionHash, 0, 0); - rollupHandler.handleFinishedReplica(task1, tTabletInfo, -1); - rollupHandler.handleFinishedReplica(task2, tTabletInfo, -1); - rollupHandler.handleFinishedReplica(task3, tTabletInfo, -1); - - // rollup hander run one cycle again, the rollup job is finishing - rollupHandler.runOneCycle(); - Assert.assertEquals(JobState.CANCELLED, rollupJob.getState()); - assertEquals(1, testPartition.getMaterializedIndices().size()); - } -} diff --git a/fe/src/test/java/org/apache/doris/alter/RollupJobV2Test.java b/fe/src/test/java/org/apache/doris/alter/RollupJobV2Test.java new file mode 100644 index 00000000000000..a8dabe71fa804d --- /dev/null +++ b/fe/src/test/java/org/apache/doris/alter/RollupJobV2Test.java @@ -0,0 +1,221 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +package org.apache.doris.alter; + +import org.apache.doris.alter.AlterJobV2.JobState; +import org.apache.doris.analysis.AccessTestUtil; +import org.apache.doris.analysis.AddRollupClause; +import org.apache.doris.analysis.AlterClause; +import org.apache.doris.analysis.Analyzer; +import org.apache.doris.catalog.Catalog; +import org.apache.doris.catalog.CatalogTestUtil; +import org.apache.doris.catalog.Database; +import org.apache.doris.catalog.FakeCatalog; +import org.apache.doris.catalog.FakeEditLog; +import org.apache.doris.catalog.MaterializedIndex; +import org.apache.doris.catalog.MaterializedIndex.IndexExtState; +import org.apache.doris.catalog.OlapTable; +import org.apache.doris.catalog.OlapTable.OlapTableState; +import org.apache.doris.catalog.Partition; +import org.apache.doris.catalog.Replica; +import org.apache.doris.catalog.Tablet; +import org.apache.doris.common.AnalysisException; +import org.apache.doris.common.FeConstants; +import org.apache.doris.common.FeMetaVersion; +import org.apache.doris.common.UserException; +import org.apache.doris.meta.MetaContext; +import org.apache.doris.task.AgentTask; +import org.apache.doris.task.AgentTaskQueue; +import org.apache.doris.thrift.TTaskType; +import org.apache.doris.transaction.FakeTransactionIDGenerator; +import org.apache.doris.transaction.GlobalTransactionMgr; + +import com.google.common.collect.Lists; + +import org.junit.Assert; +import org.junit.Before; +import org.junit.Test; + +import java.lang.reflect.InvocationTargetException; +import java.util.ArrayList; +import java.util.List; +import java.util.Map; + +/* + * Author: Chenmingyu + * Date: Aug 21, 2019 + */ + +public class RollupJobV2Test { + + private static FakeEditLog fakeEditLog; + private static FakeCatalog fakeCatalog; + private static FakeTransactionIDGenerator fakeTransactionIDGenerator; + private static GlobalTransactionMgr masterTransMgr; + private static GlobalTransactionMgr slaveTransMgr; + private static Catalog masterCatalog; + private static Catalog slaveCatalog; + + private String transactionSource = "localfe"; + private static Analyzer analyzer; + private static AddRollupClause clause; + + @Before + public void setUp() throws InstantiationException, IllegalAccessException, IllegalArgumentException, + InvocationTargetException, NoSuchMethodException, SecurityException, AnalysisException { + fakeEditLog = new FakeEditLog(); + fakeCatalog = new FakeCatalog(); + fakeTransactionIDGenerator = new FakeTransactionIDGenerator(); + masterCatalog = CatalogTestUtil.createTestCatalog(); + slaveCatalog = CatalogTestUtil.createTestCatalog(); + MetaContext metaContext = new MetaContext(); + metaContext.setMetaVersion(FeMetaVersion.VERSION_61); + metaContext.setThreadLocalInfo(); + // masterCatalog.setJournalVersion(FeMetaVersion.VERSION_40); + // slaveCatalog.setJournalVersion(FeMetaVersion.VERSION_40); + masterTransMgr = masterCatalog.getGlobalTransactionMgr(); + masterTransMgr.setEditLog(masterCatalog.getEditLog()); + + slaveTransMgr = slaveCatalog.getGlobalTransactionMgr(); + slaveTransMgr.setEditLog(slaveCatalog.getEditLog()); + analyzer = AccessTestUtil.fetchAdminAnalyzer(false); + clause = new AddRollupClause(CatalogTestUtil.testRollupIndex2, Lists.newArrayList("k1", "v"), null, + CatalogTestUtil.testIndex1, null); + clause.analyze(analyzer); + + FeConstants.runningUnitTest = true; + } + + @Test + public void testAddSchemaChange() throws UserException { + FakeCatalog.setCatalog(masterCatalog); + RollupHandler rollupHandler = Catalog.getInstance().getRollupHandler(); + ArrayList alterClauses = new ArrayList<>(); + alterClauses.add(clause); + Database db = masterCatalog.getDb(CatalogTestUtil.testDbId1); + OlapTable olapTable = (OlapTable) db.getTable(CatalogTestUtil.testTableId1); + rollupHandler.process(alterClauses, db.getClusterName(), db, olapTable); + Map alterJobsV2 = rollupHandler.getAlterJobsV2(); + Assert.assertEquals(1, alterJobsV2.size()); + Assert.assertEquals(OlapTableState.ROLLUP, olapTable.getState()); + } + + // start a schema change, then finished + @Test + public void testSchemaChange1() throws Exception { + FakeCatalog.setCatalog(masterCatalog); + RollupHandler rollupHandler = Catalog.getInstance().getRollupHandler(); + + // add a rollup job + ArrayList alterClauses = new ArrayList<>(); + alterClauses.add(clause); + Database db = masterCatalog.getDb(CatalogTestUtil.testDbId1); + OlapTable olapTable = (OlapTable) db.getTable(CatalogTestUtil.testTableId1); + Partition testPartition = olapTable.getPartition(CatalogTestUtil.testPartitionId1); + rollupHandler.process(alterClauses, db.getClusterName(), db, olapTable); + Map alterJobsV2 = rollupHandler.getAlterJobsV2(); + Assert.assertEquals(1, alterJobsV2.size()); + RollupJobV2 rollupJob = (RollupJobV2) alterJobsV2.values().stream().findAny().get(); + + // runPendingJob + rollupHandler.runOneCycle(); + Assert.assertEquals(JobState.WAITING_TXN, rollupJob.getJobState()); + Assert.assertEquals(2, testPartition.getMaterializedIndices(IndexExtState.ALL).size()); + Assert.assertEquals(1, testPartition.getMaterializedIndices(IndexExtState.VISIBLE).size()); + Assert.assertEquals(1, testPartition.getMaterializedIndices(IndexExtState.SHADOW).size()); + + // runWaitingTxnJob + rollupHandler.runOneCycle(); + Assert.assertEquals(JobState.RUNNING, rollupJob.getJobState()); + + // runWaitingTxnJob, task not finished + rollupHandler.runOneCycle(); + Assert.assertEquals(JobState.RUNNING, rollupJob.getJobState()); + + // finish all tasks + List tasks = AgentTaskQueue.getTask(TTaskType.ALTER); + Assert.assertEquals(3, tasks.size()); + for (AgentTask agentTask : tasks) { + agentTask.setFinished(true); + } + MaterializedIndex shadowIndex = testPartition.getMaterializedIndices(IndexExtState.SHADOW).get(0); + for (Tablet shadowTablet : shadowIndex.getTablets()) { + for (Replica shadowReplica : shadowTablet.getReplicas()) { + shadowReplica.updateVersionInfo(testPartition.getVisibleVersion(), + testPartition.getVisibleVersionHash(), shadowReplica.getDataSize(), + shadowReplica.getRowCount()); + } + } + + rollupHandler.runOneCycle(); + Assert.assertEquals(JobState.FINISHED, rollupJob.getJobState()); + + /* + Assert.assertEquals(CatalogTestUtil.testIndexId1, rollupJob.getBaseIndexId()); + Assert.assertEquals(CatalogTestUtil.testRollupIndex2, rollupJob.getRollupIndexName()); + MaterializedIndex rollupIndex = rollupJob.getRollupIndex(CatalogTestUtil.testPartitionId1); + MaterializedIndex baseIndex = testPartition.getBaseIndex(); + assertEquals(IndexState.ROLLUP, rollupIndex.getState()); + assertEquals(IndexState.NORMAL, baseIndex.getState()); + assertEquals(OlapTableState.ROLLUP, olapTable.getState()); + assertEquals(PartitionState.ROLLUP, testPartition.getState()); + Tablet rollupTablet = rollupIndex.getTablets().get(0); + List replicas = rollupTablet.getReplicas(); + Replica rollupReplica1 = replicas.get(0); + Replica rollupReplica2 = replicas.get(1); + Replica rollupReplica3 = replicas.get(2); + + assertEquals(-1, rollupReplica1.getVersion()); + assertEquals(-1, rollupReplica2.getVersion()); + assertEquals(-1, rollupReplica3.getVersion()); + assertEquals(CatalogTestUtil.testStartVersion, rollupReplica1.getLastFailedVersion()); + assertEquals(CatalogTestUtil.testStartVersion, rollupReplica2.getLastFailedVersion()); + assertEquals(CatalogTestUtil.testStartVersion, rollupReplica3.getLastFailedVersion()); + assertEquals(-1, rollupReplica1.getLastSuccessVersion()); + assertEquals(-1, rollupReplica2.getLastSuccessVersion()); + assertEquals(-1, rollupReplica3.getLastSuccessVersion()); + + // rollup handler run one cycle, agent task is generated and send tasks + rollupHandler.runOneCycle(); + AgentTask task1 = AgentTaskQueue.getTask(rollupReplica1.getBackendId(), TTaskType.ROLLUP, rollupTablet.getId()); + AgentTask task2 = AgentTaskQueue.getTask(rollupReplica2.getBackendId(), TTaskType.ROLLUP, rollupTablet.getId()); + AgentTask task3 = AgentTaskQueue.getTask(rollupReplica3.getBackendId(), TTaskType.ROLLUP, rollupTablet.getId()); + + // be report finishe rollup success + TTabletInfo tTabletInfo = new TTabletInfo(rollupTablet.getId(), CatalogTestUtil.testSchemaHash1, + CatalogTestUtil.testStartVersion, CatalogTestUtil.testStartVersionHash, 0, 0); + rollupHandler.handleFinishedReplica(task1, tTabletInfo, -1); + rollupHandler.handleFinishedReplica(task2, tTabletInfo, -1); + rollupHandler.handleFinishedReplica(task3, tTabletInfo, -1); + + // rollup hander run one cycle again, the rollup job is finishing + rollupHandler.runOneCycle(); + Assert.assertEquals(JobState.FINISHING, rollupJob.getState()); + assertEquals(CatalogTestUtil.testStartVersion, rollupReplica1.getVersion()); + assertEquals(CatalogTestUtil.testStartVersion, rollupReplica2.getVersion()); + assertEquals(CatalogTestUtil.testStartVersion, rollupReplica3.getVersion()); + assertEquals(-1, rollupReplica1.getLastFailedVersion()); + assertEquals(-1, rollupReplica2.getLastFailedVersion()); + assertEquals(-1, rollupReplica3.getLastFailedVersion()); + assertEquals(CatalogTestUtil.testStartVersion, rollupReplica1.getLastSuccessVersion()); + assertEquals(CatalogTestUtil.testStartVersion, rollupReplica1.getLastSuccessVersion()); + assertEquals(CatalogTestUtil.testStartVersion, rollupReplica1.getLastSuccessVersion()); + */ + } + +} diff --git a/fe/src/test/java/org/apache/doris/alter/SchemaChangeJobTest.java b/fe/src/test/java/org/apache/doris/alter/SchemaChangeJobTest.java deleted file mode 100644 index f37704569b8447..00000000000000 --- a/fe/src/test/java/org/apache/doris/alter/SchemaChangeJobTest.java +++ /dev/null @@ -1,280 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, -// software distributed under the License is distributed on an -// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -// KIND, either express or implied. See the License for the -// specific language governing permissions and limitations -// under the License. - -package org.apache.doris.alter; - -import static org.junit.Assert.assertEquals; - -import org.apache.doris.alter.AlterJob.JobState; -import org.apache.doris.analysis.AccessTestUtil; -import org.apache.doris.analysis.AddColumnClause; -import org.apache.doris.analysis.AlterClause; -import org.apache.doris.analysis.Analyzer; -import org.apache.doris.analysis.ColumnDef; -import org.apache.doris.analysis.ColumnDef.DefaultValue; -import org.apache.doris.analysis.ColumnPosition; -import org.apache.doris.analysis.TypeDef; -import org.apache.doris.catalog.AggregateType; -import org.apache.doris.catalog.Catalog; -import org.apache.doris.catalog.CatalogTestUtil; -import org.apache.doris.catalog.Database; -import org.apache.doris.catalog.FakeCatalog; -import org.apache.doris.catalog.FakeEditLog; -import org.apache.doris.catalog.MaterializedIndex; -import org.apache.doris.catalog.MaterializedIndex.IndexState; -import org.apache.doris.catalog.OlapTable; -import org.apache.doris.catalog.OlapTable.OlapTableState; -import org.apache.doris.catalog.Partition; -import org.apache.doris.catalog.Partition.PartitionState; -import org.apache.doris.catalog.PrimitiveType; -import org.apache.doris.catalog.Replica; -import org.apache.doris.catalog.Replica.ReplicaState; -import org.apache.doris.catalog.ScalarType; -import org.apache.doris.catalog.Tablet; -import org.apache.doris.common.AnalysisException; -import org.apache.doris.common.Config; -import org.apache.doris.common.FeMetaVersion; -import org.apache.doris.meta.MetaContext; -import org.apache.doris.task.AgentTask; -import org.apache.doris.task.AgentTaskQueue; -import org.apache.doris.thrift.TTabletInfo; -import org.apache.doris.thrift.TTaskType; -import org.apache.doris.transaction.FakeTransactionIDGenerator; -import org.apache.doris.transaction.GlobalTransactionMgr; -import org.apache.doris.transaction.TabletCommitInfo; -import org.apache.doris.transaction.TransactionState; -import org.apache.doris.transaction.TransactionState.LoadJobSourceType; -import org.apache.doris.transaction.TransactionStatus; - -import com.google.common.collect.Lists; -import com.google.common.collect.Sets; - -import org.junit.Assert; -import org.junit.Before; -import org.junit.Test; - -import java.lang.reflect.InvocationTargetException; -import java.util.ArrayList; -import java.util.List; -import java.util.Set; - -public class SchemaChangeJobTest { - - private static FakeEditLog fakeEditLog; - private static FakeCatalog fakeCatalog; - private static FakeTransactionIDGenerator fakeTransactionIDGenerator; - private static GlobalTransactionMgr masterTransMgr; - private static GlobalTransactionMgr slaveTransMgr; - private static Catalog masterCatalog; - private static Catalog slaveCatalog; - - private String transactionSource = "localfe"; - private static Analyzer analyzer; - private static ColumnDef newCol = new ColumnDef("add_v", new TypeDef(ScalarType.createType(PrimitiveType.INT)), false, AggregateType.MAX, - false, new DefaultValue(true, "1"), ""); - private static AddColumnClause addColumnClause = new AddColumnClause(newCol, new ColumnPosition("v"), null, null); - - @Before - public void setUp() throws InstantiationException, IllegalAccessException, IllegalArgumentException, - InvocationTargetException, NoSuchMethodException, SecurityException, AnalysisException { - fakeEditLog = new FakeEditLog(); - fakeCatalog = new FakeCatalog(); - fakeTransactionIDGenerator = new FakeTransactionIDGenerator(); - masterCatalog = CatalogTestUtil.createTestCatalog(); - slaveCatalog = CatalogTestUtil.createTestCatalog(); - MetaContext metaContext = new MetaContext(); - metaContext.setMetaVersion(FeMetaVersion.VERSION_40); - metaContext.setThreadLocalInfo(); - - // masterCatalog.setJournalVersion(FeMetaVersion.VERSION_40); - // slaveCatalog.setJournalVersion(FeMetaVersion.VERSION_40); - masterTransMgr = masterCatalog.getGlobalTransactionMgr(); - masterTransMgr.setEditLog(masterCatalog.getEditLog()); - slaveTransMgr = slaveCatalog.getGlobalTransactionMgr(); - slaveTransMgr.setEditLog(slaveCatalog.getEditLog()); - analyzer = AccessTestUtil.fetchAdminAnalyzer(false); - addColumnClause.analyze(analyzer); - } - - @Test - public void testAddSchemaChange() throws Exception { - FakeCatalog.setCatalog(masterCatalog); - SchemaChangeHandler schemaChangeHandler = Catalog.getInstance().getSchemaChangeHandler(); - ArrayList alterClauses = new ArrayList<>(); - alterClauses.add(addColumnClause); - Database db = masterCatalog.getDb(CatalogTestUtil.testDbId1); - OlapTable olapTable = (OlapTable) db.getTable(CatalogTestUtil.testTableId1); - schemaChangeHandler.process(alterClauses, "default", db, olapTable); - SchemaChangeJob schemaChangeJob = (SchemaChangeJob) schemaChangeHandler - .getAlterJob(CatalogTestUtil.testTableId1); - Assert.assertEquals(OlapTableState.SCHEMA_CHANGE, olapTable.getState()); - } - - // start a schema change, then finished - @Test - public void testSchemaChange1() throws Exception { - FakeCatalog.setCatalog(masterCatalog); - SchemaChangeHandler schemaChangeHandler = Catalog.getInstance().getSchemaChangeHandler(); - - // add a schema change job - ArrayList alterClauses = new ArrayList<>(); - alterClauses.add(addColumnClause); - Database db = masterCatalog.getDb(CatalogTestUtil.testDbId1); - OlapTable olapTable = (OlapTable) db.getTable(CatalogTestUtil.testTableId1); - Partition testPartition = olapTable.getPartition(CatalogTestUtil.testPartitionId1); - schemaChangeHandler.process(alterClauses, "default", db, olapTable); - SchemaChangeJob schemaChangeJob = (SchemaChangeJob) schemaChangeHandler - .getAlterJob(CatalogTestUtil.testTableId1); - MaterializedIndex baseIndex = testPartition.getBaseIndex(); - assertEquals(IndexState.SCHEMA_CHANGE, baseIndex.getState()); - assertEquals(OlapTableState.SCHEMA_CHANGE, olapTable.getState()); - assertEquals(PartitionState.SCHEMA_CHANGE, testPartition.getState()); - Tablet baseTablet = baseIndex.getTablets().get(0); - List replicas = baseTablet.getReplicas(); - Replica replica1 = replicas.get(0); - Replica replica2 = replicas.get(1); - Replica replica3 = replicas.get(2); - - assertEquals(CatalogTestUtil.testStartVersion, replica1.getVersion()); - assertEquals(CatalogTestUtil.testStartVersion, replica2.getVersion()); - assertEquals(CatalogTestUtil.testStartVersion, replica3.getVersion()); - assertEquals(-1, replica1.getLastFailedVersion()); - assertEquals(-1, replica2.getLastFailedVersion()); - assertEquals(-1, replica3.getLastFailedVersion()); - assertEquals(CatalogTestUtil.testStartVersion, replica1.getLastSuccessVersion()); - assertEquals(CatalogTestUtil.testStartVersion, replica2.getLastSuccessVersion()); - assertEquals(CatalogTestUtil.testStartVersion, replica3.getLastSuccessVersion()); - - // schemachange handler run one cycle, agent task is generated and send tasks - schemaChangeHandler.runOneCycle(); - AgentTask task1 = AgentTaskQueue.getTask(replica1.getBackendId(), TTaskType.SCHEMA_CHANGE, baseTablet.getId()); - AgentTask task2 = AgentTaskQueue.getTask(replica2.getBackendId(), TTaskType.SCHEMA_CHANGE, baseTablet.getId()); - AgentTask task3 = AgentTaskQueue.getTask(replica3.getBackendId(), TTaskType.SCHEMA_CHANGE, baseTablet.getId()); - - // be report finishe schema change success, report the new schema hash - TTabletInfo tTabletInfo = new TTabletInfo(baseTablet.getId(), - schemaChangeJob.getSchemaHashByIndexId(CatalogTestUtil.testIndexId1), CatalogTestUtil.testStartVersion, - CatalogTestUtil.testStartVersionHash, 0, 0); - schemaChangeHandler.handleFinishedReplica(task1, tTabletInfo, -1); - schemaChangeHandler.handleFinishedReplica(task2, tTabletInfo, -1); - schemaChangeHandler.handleFinishedReplica(task3, tTabletInfo, -1); - - // schema change hander run one cycle again, the rollup job is finishing - schemaChangeHandler.runOneCycle(); - Assert.assertEquals(JobState.FINISHING, schemaChangeJob.getState()); - assertEquals(CatalogTestUtil.testStartVersion, replica1.getVersion()); - assertEquals(CatalogTestUtil.testStartVersion, replica2.getVersion()); - assertEquals(CatalogTestUtil.testStartVersion, replica3.getVersion()); - assertEquals(-1, replica1.getLastFailedVersion()); - assertEquals(-1, replica2.getLastFailedVersion()); - assertEquals(-1, replica3.getLastFailedVersion()); - assertEquals(CatalogTestUtil.testStartVersion, replica1.getLastSuccessVersion()); - assertEquals(CatalogTestUtil.testStartVersion, replica2.getLastSuccessVersion()); - assertEquals(CatalogTestUtil.testStartVersion, replica3.getLastSuccessVersion()); - } - - // load some data and one replica has errors - // start a schema change and then load data - // load finished and schema change finished - @Test - public void testSchemaChange2() throws Exception { - FakeCatalog.setCatalog(masterCatalog); - SchemaChangeHandler schemaChangeHandler = Catalog.getInstance().getSchemaChangeHandler(); - // load one transaction with backend 2 has errors - long transactionId = masterTransMgr.beginTransaction(CatalogTestUtil.testDbId1, - CatalogTestUtil.testTxnLable1, - transactionSource, - LoadJobSourceType.FRONTEND, Config.stream_load_default_timeout_second); - // commit a transaction, backend 2 has errors - TabletCommitInfo tabletCommitInfo1 = new TabletCommitInfo(CatalogTestUtil.testTabletId1, - CatalogTestUtil.testBackendId1); - TabletCommitInfo tabletCommitInfo2 = new TabletCommitInfo(CatalogTestUtil.testTabletId1, - CatalogTestUtil.testBackendId2); - TabletCommitInfo tabletCommitInfo3 = new TabletCommitInfo(CatalogTestUtil.testTabletId1, - CatalogTestUtil.testBackendId3); - List transTablets = Lists.newArrayList(); - transTablets.add(tabletCommitInfo1); - transTablets.add(tabletCommitInfo2); - transTablets.add(tabletCommitInfo3); - masterTransMgr.commitTransaction(CatalogTestUtil.testDbId1, transactionId, transTablets); - TransactionState transactionState = fakeEditLog.getTransaction(transactionId); - assertEquals(TransactionStatus.COMMITTED, transactionState.getTransactionStatus()); - Set errorReplicaIds = Sets.newHashSet(); - // errorReplicaIds.add(CatalogTestUtil.testReplicaId2); - masterTransMgr.finishTransaction(transactionId, errorReplicaIds); - transactionState = fakeEditLog.getTransaction(transactionId); - assertEquals(TransactionStatus.VISIBLE, transactionState.getTransactionStatus()); - - // start a schema change - ArrayList alterClauses = new ArrayList<>(); - alterClauses.add(addColumnClause); - Database db = masterCatalog.getDb(CatalogTestUtil.testDbId1); - OlapTable olapTable = (OlapTable) db.getTable(CatalogTestUtil.testTableId1); - Partition testPartition = olapTable.getPartition(CatalogTestUtil.testPartitionId1); - schemaChangeHandler.process(alterClauses, "default", db, olapTable); - SchemaChangeJob schemaChangeJob = (SchemaChangeJob) schemaChangeHandler - .getAlterJob(CatalogTestUtil.testTableId1); - MaterializedIndex baseIndex = testPartition.getBaseIndex(); - assertEquals(IndexState.SCHEMA_CHANGE, baseIndex.getState()); - assertEquals(OlapTableState.SCHEMA_CHANGE, olapTable.getState()); - assertEquals(PartitionState.SCHEMA_CHANGE, testPartition.getState()); - Tablet baseTablet = baseIndex.getTablets().get(0); - List replicas = baseTablet.getReplicas(); - Replica replica1 = replicas.get(0); - Replica replica2 = replicas.get(1); - Replica replica3 = replicas.get(2); - assertEquals(3, baseTablet.getReplicas().size()); - - assertEquals(ReplicaState.SCHEMA_CHANGE, replica1.getState()); - assertEquals(ReplicaState.SCHEMA_CHANGE, replica2.getState()); - assertEquals(ReplicaState.SCHEMA_CHANGE, replica3.getState()); - - assertEquals(CatalogTestUtil.testStartVersion + 1, replica1.getVersion()); - assertEquals(CatalogTestUtil.testStartVersion + 1, replica2.getVersion()); - assertEquals(CatalogTestUtil.testStartVersion + 1, replica3.getVersion()); - assertEquals(-1, replica1.getLastFailedVersion()); - assertEquals(-1, replica2.getLastFailedVersion()); - assertEquals(-1, replica3.getLastFailedVersion()); - assertEquals(CatalogTestUtil.testStartVersion + 1, replica1.getLastSuccessVersion()); - assertEquals(CatalogTestUtil.testStartVersion + 1, replica2.getLastSuccessVersion()); - assertEquals(CatalogTestUtil.testStartVersion + 1, replica3.getLastSuccessVersion()); - - // schemachange handler run one cycle, agent task is generated and send tasks - schemaChangeHandler.runOneCycle(); - AgentTask task1 = AgentTaskQueue.getTask(replica1.getBackendId(), TTaskType.SCHEMA_CHANGE, baseTablet.getId()); - AgentTask task2 = AgentTaskQueue.getTask(replica2.getBackendId(), TTaskType.SCHEMA_CHANGE, baseTablet.getId()); - AgentTask task3 = AgentTaskQueue.getTask(replica3.getBackendId(), TTaskType.SCHEMA_CHANGE, baseTablet.getId()); - - // be report finish schema change success, report the new schema hash - TTabletInfo tTabletInfo = new TTabletInfo(baseTablet.getId(), - schemaChangeJob.getSchemaHashByIndexId(CatalogTestUtil.testIndexId1), CatalogTestUtil.testStartVersion, - CatalogTestUtil.testStartVersionHash, 0, 0); - schemaChangeHandler.handleFinishedReplica(task1, tTabletInfo, -1); - schemaChangeHandler.handleFinishedReplica(task2, tTabletInfo, -1); - schemaChangeHandler.handleFinishedReplica(task3, tTabletInfo, -1); - - // rollup hander run one cycle again, the rollup job is finishing - schemaChangeHandler.runOneCycle(); - Assert.assertEquals(JobState.FINISHING, schemaChangeJob.getState()); - assertEquals(CatalogTestUtil.testStartVersion + 1, replica1.getVersion()); - assertEquals(CatalogTestUtil.testStartVersion + 1, replica3.getVersion()); - assertEquals(-1, replica1.getLastFailedVersion()); - assertEquals(-1, replica3.getLastFailedVersion()); - assertEquals(CatalogTestUtil.testStartVersion + 1, replica1.getLastSuccessVersion()); - assertEquals(CatalogTestUtil.testStartVersion + 1, replica3.getLastSuccessVersion()); - } -} diff --git a/fe/src/test/java/org/apache/doris/alter/SchemaChangeJobV2Test.java b/fe/src/test/java/org/apache/doris/alter/SchemaChangeJobV2Test.java new file mode 100644 index 00000000000000..897507bc4e9421 --- /dev/null +++ b/fe/src/test/java/org/apache/doris/alter/SchemaChangeJobV2Test.java @@ -0,0 +1,200 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +package org.apache.doris.alter; + +import static org.junit.Assert.assertEquals; + +import org.apache.doris.alter.AlterJobV2.JobState; +import org.apache.doris.analysis.AccessTestUtil; +import org.apache.doris.analysis.AddColumnClause; +import org.apache.doris.analysis.AlterClause; +import org.apache.doris.analysis.Analyzer; +import org.apache.doris.analysis.ColumnDef; +import org.apache.doris.analysis.ColumnDef.DefaultValue; +import org.apache.doris.analysis.ColumnPosition; +import org.apache.doris.analysis.TypeDef; +import org.apache.doris.catalog.AggregateType; +import org.apache.doris.catalog.Catalog; +import org.apache.doris.catalog.CatalogTestUtil; +import org.apache.doris.catalog.Database; +import org.apache.doris.catalog.FakeCatalog; +import org.apache.doris.catalog.FakeEditLog; +import org.apache.doris.catalog.MaterializedIndex; +import org.apache.doris.catalog.MaterializedIndex.IndexExtState; +import org.apache.doris.catalog.MaterializedIndex.IndexState; +import org.apache.doris.catalog.OlapTable; +import org.apache.doris.catalog.OlapTable.OlapTableState; +import org.apache.doris.catalog.Partition; +import org.apache.doris.catalog.Partition.PartitionState; +import org.apache.doris.catalog.PrimitiveType; +import org.apache.doris.catalog.Replica; +import org.apache.doris.catalog.ScalarType; +import org.apache.doris.catalog.Tablet; +import org.apache.doris.common.AnalysisException; +import org.apache.doris.common.FeConstants; +import org.apache.doris.common.FeMetaVersion; +import org.apache.doris.common.UserException; +import org.apache.doris.meta.MetaContext; +import org.apache.doris.task.AgentTask; +import org.apache.doris.task.AgentTaskQueue; +import org.apache.doris.thrift.TTaskType; +import org.apache.doris.transaction.FakeTransactionIDGenerator; +import org.apache.doris.transaction.GlobalTransactionMgr; + +import org.junit.Assert; +import org.junit.Before; +import org.junit.Test; + +import java.lang.reflect.InvocationTargetException; +import java.util.ArrayList; +import java.util.List; +import java.util.Map; + +/* + * Author: Chenmingyu + * Date: Aug 21, 2019 + */ + +public class SchemaChangeJobV2Test { + + private static FakeEditLog fakeEditLog; + private static FakeCatalog fakeCatalog; + private static FakeTransactionIDGenerator fakeTransactionIDGenerator; + private static GlobalTransactionMgr masterTransMgr; + private static GlobalTransactionMgr slaveTransMgr; + private static Catalog masterCatalog; + private static Catalog slaveCatalog; + + private static Analyzer analyzer; + private static ColumnDef newCol = new ColumnDef("add_v", new TypeDef(ScalarType.createType(PrimitiveType.INT)), + false, AggregateType.MAX, false, new DefaultValue(true, "1"), ""); + private static AddColumnClause addColumnClause = new AddColumnClause(newCol, new ColumnPosition("v"), null, null); + + @Before + public void setUp() throws InstantiationException, IllegalAccessException, IllegalArgumentException, + InvocationTargetException, NoSuchMethodException, SecurityException, AnalysisException { + fakeEditLog = new FakeEditLog(); + fakeCatalog = new FakeCatalog(); + fakeTransactionIDGenerator = new FakeTransactionIDGenerator(); + masterCatalog = CatalogTestUtil.createTestCatalog(); + slaveCatalog = CatalogTestUtil.createTestCatalog(); + MetaContext metaContext = new MetaContext(); + metaContext.setMetaVersion(FeMetaVersion.VERSION_61); + metaContext.setThreadLocalInfo(); + + masterTransMgr = masterCatalog.getGlobalTransactionMgr(); + masterTransMgr.setEditLog(masterCatalog.getEditLog()); + slaveTransMgr = slaveCatalog.getGlobalTransactionMgr(); + slaveTransMgr.setEditLog(slaveCatalog.getEditLog()); + analyzer = AccessTestUtil.fetchAdminAnalyzer(false); + addColumnClause.analyze(analyzer); + + FeConstants.runningUnitTest = true; + AgentTaskQueue.clearAllTasks(); + } + + @Test + public void testAddSchemaChange() throws UserException { + FakeCatalog.setCatalog(masterCatalog); + SchemaChangeHandler schemaChangeHandler = Catalog.getInstance().getSchemaChangeHandler(); + ArrayList alterClauses = new ArrayList<>(); + alterClauses.add(addColumnClause); + Database db = masterCatalog.getDb(CatalogTestUtil.testDbId1); + OlapTable olapTable = (OlapTable) db.getTable(CatalogTestUtil.testTableId1); + schemaChangeHandler.process(alterClauses, "default_cluster", db, olapTable); + Map alterJobsV2 = schemaChangeHandler.getAlterJobsV2(); + Assert.assertEquals(1, alterJobsV2.size()); + Assert.assertEquals(OlapTableState.SCHEMA_CHANGE, olapTable.getState()); + } + + // start a schema change, then finished + @Test + public void testSchemaChange1() throws Exception { + FakeCatalog.setCatalog(masterCatalog); + SchemaChangeHandler schemaChangeHandler = Catalog.getInstance().getSchemaChangeHandler(); + + // add a schema change job + ArrayList alterClauses = new ArrayList<>(); + alterClauses.add(addColumnClause); + Database db = masterCatalog.getDb(CatalogTestUtil.testDbId1); + OlapTable olapTable = (OlapTable) db.getTable(CatalogTestUtil.testTableId1); + Partition testPartition = olapTable.getPartition(CatalogTestUtil.testPartitionId1); + schemaChangeHandler.process(alterClauses, "default_cluster", db, olapTable); + Map alterJobsV2 = schemaChangeHandler.getAlterJobsV2(); + Assert.assertEquals(1, alterJobsV2.size()); + SchemaChangeJobV2 schemaChangeJob = (SchemaChangeJobV2) alterJobsV2.values().stream().findAny().get(); + + MaterializedIndex baseIndex = testPartition.getBaseIndex(); + assertEquals(IndexState.NORMAL, baseIndex.getState()); + assertEquals(PartitionState.NORMAL, testPartition.getState()); + assertEquals(OlapTableState.SCHEMA_CHANGE, olapTable.getState()); + + Tablet baseTablet = baseIndex.getTablets().get(0); + List replicas = baseTablet.getReplicas(); + Replica replica1 = replicas.get(0); + Replica replica2 = replicas.get(1); + Replica replica3 = replicas.get(2); + + assertEquals(CatalogTestUtil.testStartVersion, replica1.getVersion()); + assertEquals(CatalogTestUtil.testStartVersion, replica2.getVersion()); + assertEquals(CatalogTestUtil.testStartVersion, replica3.getVersion()); + assertEquals(-1, replica1.getLastFailedVersion()); + assertEquals(-1, replica2.getLastFailedVersion()); + assertEquals(-1, replica3.getLastFailedVersion()); + assertEquals(CatalogTestUtil.testStartVersion, replica1.getLastSuccessVersion()); + assertEquals(CatalogTestUtil.testStartVersion, replica2.getLastSuccessVersion()); + assertEquals(CatalogTestUtil.testStartVersion, replica3.getLastSuccessVersion()); + + // runPendingJob + schemaChangeHandler.runOneCycle(); + Assert.assertEquals(JobState.WAITING_TXN, schemaChangeJob.getJobState()); + Assert.assertEquals(2, testPartition.getMaterializedIndices(IndexExtState.ALL).size()); + Assert.assertEquals(1, testPartition.getMaterializedIndices(IndexExtState.VISIBLE).size()); + Assert.assertEquals(1, testPartition.getMaterializedIndices(IndexExtState.SHADOW).size()); + + // runWaitingTxnJob + schemaChangeHandler.runOneCycle(); + Assert.assertEquals(JobState.RUNNING, schemaChangeJob.getJobState()); + + // runWaitingTxnJob, task not finished + schemaChangeHandler.runOneCycle(); + Assert.assertEquals(JobState.RUNNING, schemaChangeJob.getJobState()); + + // runRunningJob + schemaChangeHandler.runOneCycle(); + // task not finished, still running + Assert.assertEquals(JobState.RUNNING, schemaChangeJob.getJobState()); + + // finish alter tasks + List tasks = AgentTaskQueue.getTask(TTaskType.ALTER); + Assert.assertEquals(3, tasks.size()); + for (AgentTask agentTask : tasks) { + agentTask.setFinished(true); + } + MaterializedIndex shadowIndex = testPartition.getMaterializedIndices(IndexExtState.SHADOW).get(0); + for (Tablet shadowTablet : shadowIndex.getTablets()) { + for (Replica shadowReplica : shadowTablet.getReplicas()) { + shadowReplica.updateVersionInfo(testPartition.getVisibleVersion(), testPartition.getVisibleVersionHash(), shadowReplica.getDataSize(), shadowReplica.getRowCount()); + } + } + + schemaChangeHandler.runOneCycle(); + Assert.assertEquals(JobState.FINISHED, schemaChangeJob.getJobState()); + } + +} diff --git a/fe/src/test/java/org/apache/doris/analysis/DataDescriptionTest.java b/fe/src/test/java/org/apache/doris/analysis/DataDescriptionTest.java index a01cac0c26f2b0..6a1a2808b7236a 100644 --- a/fe/src/test/java/org/apache/doris/analysis/DataDescriptionTest.java +++ b/fe/src/test/java/org/apache/doris/analysis/DataDescriptionTest.java @@ -76,7 +76,7 @@ public void testNormal() throws AnalysisException { desc.analyze("testDb"); Assert.assertEquals("DATA INFILE ('abc.txt') NEGATIVE INTO TABLE testTable (col1, col2)", desc.toString()); Assert.assertEquals("testTable", desc.getTableName()); - Assert.assertEquals("[col1, col2]", desc.getColumnNames().toString()); + Assert.assertEquals("[col1, col2]", desc.getFileFieldNames().toString()); Assert.assertEquals("[abc.txt]", desc.getFilePaths().toString()); Assert.assertTrue(desc.isNegative()); Assert.assertNull(desc.getColumnSeparator()); diff --git a/fe/src/test/java/org/apache/doris/backup/BackupHandlerTest.java b/fe/src/test/java/org/apache/doris/backup/BackupHandlerTest.java index db90cba79c5d9b..2a4c22cf0f8f29 100644 --- a/fe/src/test/java/org/apache/doris/backup/BackupHandlerTest.java +++ b/fe/src/test/java/org/apache/doris/backup/BackupHandlerTest.java @@ -29,6 +29,7 @@ import org.apache.doris.catalog.Catalog; import org.apache.doris.catalog.Database; import org.apache.doris.catalog.MaterializedIndex; +import org.apache.doris.catalog.MaterializedIndex.IndexExtState; import org.apache.doris.catalog.OlapTable; import org.apache.doris.catalog.Partition; import org.apache.doris.catalog.Table; @@ -205,7 +206,7 @@ public Status getSnapshotInfoFile(String label, String backupTimestamp, List snapshotInfos = Maps.newHashMap(); for (Partition part : tbl.getPartitions()) { - for (MaterializedIndex idx : part.getMaterializedIndices()) { + for (MaterializedIndex idx : part.getMaterializedIndices(IndexExtState.VISIBLE)) { for (Tablet tablet : idx.getTablets()) { List files = Lists.newArrayList(); SnapshotInfo sinfo = new SnapshotInfo(db.getId(), tbl.getId(), part.getId(), idx.getId(), diff --git a/fe/src/test/java/org/apache/doris/backup/RestoreJobTest.java b/fe/src/test/java/org/apache/doris/backup/RestoreJobTest.java index 304cf58e0d43eb..07d8c770c6c5cd 100644 --- a/fe/src/test/java/org/apache/doris/backup/RestoreJobTest.java +++ b/fe/src/test/java/org/apache/doris/backup/RestoreJobTest.java @@ -25,6 +25,7 @@ import org.apache.doris.catalog.Catalog; import org.apache.doris.catalog.Database; import org.apache.doris.catalog.MaterializedIndex; +import org.apache.doris.catalog.MaterializedIndex.IndexExtState; import org.apache.doris.catalog.OlapTable; import org.apache.doris.catalog.Partition; import org.apache.doris.catalog.Table; @@ -214,7 +215,7 @@ boolean await(long timeout, TimeUnit unit) { partInfo.name = partition.getName(); tblInfo.partitions.put(partInfo.name, partInfo); - for (MaterializedIndex index : partition.getMaterializedIndices()) { + for (MaterializedIndex index : partition.getMaterializedIndices(IndexExtState.VISIBLE)) { BackupIndexInfo idxInfo = new BackupIndexInfo(); idxInfo.id = index.getId(); idxInfo.name = expectedRestoreTbl.getIndexNameById(index.getId()); diff --git a/fe/src/test/java/org/apache/doris/catalog/CatalogTestUtil.java b/fe/src/test/java/org/apache/doris/catalog/CatalogTestUtil.java index d8548a967d6680..648b898971abe4 100644 --- a/fe/src/test/java/org/apache/doris/catalog/CatalogTestUtil.java +++ b/fe/src/test/java/org/apache/doris/catalog/CatalogTestUtil.java @@ -19,6 +19,7 @@ import org.apache.doris.analysis.PartitionKeyDesc; import org.apache.doris.analysis.SingleRangePartitionDesc; +import org.apache.doris.catalog.MaterializedIndex.IndexExtState; import org.apache.doris.catalog.MaterializedIndex.IndexState; import org.apache.doris.catalog.Replica.ReplicaState; import org.apache.doris.common.DdlException; @@ -122,7 +123,7 @@ public static boolean compareCatalog(Catalog masterCatalog, Catalog slaveCatalog || masterPartition.getCommittedVersionHash() != slavePartition.getCommittedVersionHash()) { return false; } - List allMaterializedIndices = masterPartition.getMaterializedIndices(); + List allMaterializedIndices = masterPartition.getMaterializedIndices(IndexExtState.ALL); for (MaterializedIndex masterIndex : allMaterializedIndices) { MaterializedIndex slaveIndex = slavePartition.getIndex(masterIndex.getId()); if (slaveIndex == null) { diff --git a/fe/src/test/java/org/apache/doris/catalog/FakeEditLog.java b/fe/src/test/java/org/apache/doris/catalog/FakeEditLog.java index 62d5e3937b07e0..4d45afcb644665 100644 --- a/fe/src/test/java/org/apache/doris/catalog/FakeEditLog.java +++ b/fe/src/test/java/org/apache/doris/catalog/FakeEditLog.java @@ -17,6 +17,7 @@ package org.apache.doris.catalog; +import org.apache.doris.alter.AlterJobV2; import org.apache.doris.alter.RollupJob; import org.apache.doris.alter.SchemaChangeJob; import org.apache.doris.cluster.Cluster; @@ -91,6 +92,11 @@ public void logOpRoutineLoadJob(RoutineLoadOperation operation) { } + @Mock + public void logAlterJob(AlterJobV2 alterJob) { + + } + public TransactionState getTransaction(long transactionId) { return allTransactionState.get(transactionId); } diff --git a/fe/src/test/java/org/apache/doris/catalog/ReplicaTest.java b/fe/src/test/java/org/apache/doris/catalog/ReplicaTest.java index 4f0e210d6c00ef..2b396c8a90fcad 100644 --- a/fe/src/test/java/org/apache/doris/catalog/ReplicaTest.java +++ b/fe/src/test/java/org/apache/doris/catalog/ReplicaTest.java @@ -85,9 +85,9 @@ public void getMethodTest() { Assert.assertEquals(newRowCount, replica.getRowCount()); // check version catch up - Assert.assertFalse(replica.checkVersionCatchUp(5, 98765)); - Assert.assertFalse(replica.checkVersionCatchUp(newVersion, 76543)); - Assert.assertTrue(replica.checkVersionCatchUp(newVersion, newVersionHash)); + Assert.assertFalse(replica.checkVersionCatchUp(5, 98765, false)); + Assert.assertFalse(replica.checkVersionCatchUp(newVersion, 76543, false)); + Assert.assertTrue(replica.checkVersionCatchUp(newVersion, newVersionHash, false)); } @Test diff --git a/fe/src/test/java/org/apache/doris/common/util/BrokerUtilTest.java b/fe/src/test/java/org/apache/doris/common/util/BrokerUtilTest.java index f8f1815142f21f..55a6e00c000d55 100644 --- a/fe/src/test/java/org/apache/doris/common/util/BrokerUtilTest.java +++ b/fe/src/test/java/org/apache/doris/common/util/BrokerUtilTest.java @@ -17,15 +17,18 @@ package org.apache.doris.common.util; -import com.google.common.collect.Lists; +import static org.junit.Assert.assertEquals; +import static org.junit.Assert.fail; + import org.apache.doris.common.UserException; + +import com.google.common.collect.Lists; + import org.junit.Test; import java.util.Collections; import java.util.List; -import static org.junit.Assert.*; - public class BrokerUtilTest { @Test @@ -110,5 +113,13 @@ public void parseColumnsFromPath() { } catch (UserException ignored) { } + path = "/path/to/dir/k1=2/a/xxx.csv"; + try { + List columns = BrokerUtil.parseColumnsFromPath(path, Collections.singletonList("k1")); + fail(); + } catch (UserException ignored) { + ignored.printStackTrace(); + } + } } diff --git a/fe/src/test/java/org/apache/doris/http/DorisHttpTestCase.java b/fe/src/test/java/org/apache/doris/http/DorisHttpTestCase.java index e6757ad4322333..00832c66064a3a 100644 --- a/fe/src/test/java/org/apache/doris/http/DorisHttpTestCase.java +++ b/fe/src/test/java/org/apache/doris/http/DorisHttpTestCase.java @@ -86,7 +86,6 @@ abstract public class DorisHttpTestCase { public static final String DB_NAME = "testDb"; public static final String TABLE_NAME = "testTbl"; - private static long testBackendId1 = 1000; private static long testBackendId2 = 1001; private static long testBackendId3 = 1002; @@ -95,7 +94,6 @@ abstract public class DorisHttpTestCase { private static long testReplicaId2 = 2001; private static long testReplicaId3 = 2002; - private static long testDbId = 100L; private static long testTableId = 200L; private static long testPartitionId = 201L; @@ -112,10 +110,8 @@ abstract public class DorisHttpTestCase { protected static final String URI = "http://localhost:" + HTTP_PORT + "/api/" + DB_NAME + "/" + TABLE_NAME; - protected String rootAuth = Credentials.basic("root", ""); - public static OlapTable newTable(String name) { Catalog.getCurrentInvertedIndex().clear(); Column k1 = new Column("k1", PrimitiveType.BIGINT); @@ -158,7 +154,7 @@ public static OlapTable newTable(String name) { distributionInfo); table.addPartition(partition); table.setIndexSchemaInfo(testIndexId, "testIndex", columns, 0, testSchemaHash, (short) 1); - + table.setBaseIndexId(testIndexId); return table; } @@ -186,7 +182,6 @@ private static EsTable newEsTable(String name) { return table; } - private static Catalog newDelegateCatalog() { try { Catalog catalog = EasyMock.createMock(Catalog.class); @@ -227,7 +222,6 @@ private static Catalog newDelegateCatalog() { Startup.initializeIfPossible(); } - private static void assignBackends() { Backend backend1 = new Backend(testBackendId1, "node-1", 9308); backend1.setBePort(9300); diff --git a/fe/src/test/java/org/apache/doris/load/LoadCheckerTest.java b/fe/src/test/java/org/apache/doris/load/LoadCheckerTest.java index f69ca373f7c5bb..1b99e0dec09fb7 100644 --- a/fe/src/test/java/org/apache/doris/load/LoadCheckerTest.java +++ b/fe/src/test/java/org/apache/doris/load/LoadCheckerTest.java @@ -20,6 +20,7 @@ import org.apache.doris.catalog.Catalog; import org.apache.doris.catalog.Database; import org.apache.doris.catalog.MaterializedIndex; +import org.apache.doris.catalog.MaterializedIndex.IndexExtState; import org.apache.doris.catalog.OlapTable; import org.apache.doris.catalog.Partition; import org.apache.doris.catalog.Replica; @@ -254,7 +255,7 @@ public void testRunLoadingJobs() throws Exception { // set tablet load infos int replicaNum = 0; Map tabletLoadInfos = new HashMap(); - for (MaterializedIndex index : partition.getMaterializedIndices()) { + for (MaterializedIndex index : partition.getMaterializedIndices(IndexExtState.ALL)) { for (Tablet tablet : index.getTablets()) { replicaNum += tablet.getReplicas().size(); TabletLoadInfo tabletLoadInfo = new TabletLoadInfo("/label/path", 1L); @@ -285,7 +286,7 @@ public void testRunLoadingJobs() throws Exception { Assert.assertEquals(0, AgentTaskQueue.getTaskNum()); // update replica to new version - for (MaterializedIndex olapIndex : partition.getMaterializedIndices()) { + for (MaterializedIndex olapIndex : partition.getMaterializedIndices(IndexExtState.ALL)) { for (Tablet tablet : olapIndex.getTablets()) { for (Replica replica : tablet.getReplicas()) { replica.updateVersionInfo(newVersion, newVersionHash, 0L, 0L); @@ -327,7 +328,7 @@ public void testRunQuorumFinishedJobs() throws Exception { job.setIdToTableLoadInfo(idToTableLoadInfo); // set tablet load infos Map tabletLoadInfos = new HashMap(); - for (MaterializedIndex index : partition.getMaterializedIndices()) { + for (MaterializedIndex index : partition.getMaterializedIndices(IndexExtState.ALL)) { for (Tablet tablet : index.getTablets()) { for (Replica replica : tablet.getReplicas()) { replica.updateVersionInfo(newVersion, newVersionHash, 0L, 0L); diff --git a/fe/src/test/java/org/apache/doris/load/loadv2/BrokerLoadJobTest.java b/fe/src/test/java/org/apache/doris/load/loadv2/BrokerLoadJobTest.java index c308e2f72d8ffc..dec2c6b0331953 100644 --- a/fe/src/test/java/org/apache/doris/load/loadv2/BrokerLoadJobTest.java +++ b/fe/src/test/java/org/apache/doris/load/loadv2/BrokerLoadJobTest.java @@ -98,7 +98,7 @@ public void testFromLoadStmt(@Injectable LoadStmt loadStmt, } @Test - public void testFromLoadStmt(@Injectable LoadStmt loadStmt, + public void testFromLoadStmt2(@Injectable LoadStmt loadStmt, @Injectable DataDescription dataDescription, @Injectable LabelName labelName, @Injectable Database database, @@ -139,8 +139,8 @@ public void testFromLoadStmt(@Injectable LoadStmt loadStmt, new MockUp() { @Mock public void checkAndCreateSource(Database db, DataDescription dataDescription, - Map>> tableToPartitionSources, - boolean deleteFlag, EtlJobType jobType) { + Map>> tableToPartitionSources, EtlJobType jobType) { + } }; @@ -150,7 +150,6 @@ public void checkAndCreateSource(Database db, DataDescription dataDescription, Assert.assertEquals(label, Deencapsulation.getField(brokerLoadJob, "label")); Assert.assertEquals(JobState.PENDING, Deencapsulation.getField(brokerLoadJob, "state")); Assert.assertEquals(EtlJobType.BROKER, Deencapsulation.getField(brokerLoadJob, "jobType")); - Assert.assertEquals(dataDescriptionList, Deencapsulation.getField(brokerLoadJob, "dataDescriptions")); } catch (DdlException e) { Assert.fail(e.getMessage()); } diff --git a/fe/src/test/java/org/apache/doris/load/loadv2/LoadJobTest.java b/fe/src/test/java/org/apache/doris/load/loadv2/LoadJobTest.java index 3f5a72289c0b8b..0f32c130ffee08 100644 --- a/fe/src/test/java/org/apache/doris/load/loadv2/LoadJobTest.java +++ b/fe/src/test/java/org/apache/doris/load/loadv2/LoadJobTest.java @@ -169,9 +169,15 @@ public void testUpdateStateToLoading() { @Test public void testUpdateStateToFinished(@Mocked MetricRepo metricRepo, - @Mocked LongCounterMetric longCounterMetric) { - metricRepo.COUNTER_LOAD_FINISHED = longCounterMetric; + @Mocked LongCounterMetric longCounterMetric) { + + MetricRepo.COUNTER_LOAD_FINISHED = longCounterMetric; LoadJob loadJob = new BrokerLoadJob(); + + // TxnStateCallbackFactory factory = Catalog.getCurrentCatalog().getGlobalTransactionMgr().getCallbackFactory(); + Catalog catalog = Catalog.getCurrentCatalog(); + GlobalTransactionMgr mgr = new GlobalTransactionMgr(catalog); + Deencapsulation.setField(catalog, "globalTransactionMgr", mgr); loadJob.updateState(JobState.FINISHED); Assert.assertEquals(JobState.FINISHED, loadJob.getState()); Assert.assertNotEquals(-1, (long) Deencapsulation.getField(loadJob, "finishTimestamp")); diff --git a/fe/src/test/java/org/apache/doris/planner/OlapTableSinkTest.java b/fe/src/test/java/org/apache/doris/planner/OlapTableSinkTest.java index 891c78de6c2490..9aad7920cf1ad0 100644 --- a/fe/src/test/java/org/apache/doris/planner/OlapTableSinkTest.java +++ b/fe/src/test/java/org/apache/doris/planner/OlapTableSinkTest.java @@ -98,7 +98,7 @@ public void testSinglePartition() throws UserException { dstTable.getPartitions(); result = Lists.newArrayList(partition); }}; - OlapTableSink sink = new OlapTableSink(dstTable, tuple); + OlapTableSink sink = new OlapTableSink(dstTable, tuple, ""); sink.init(new TUniqueId(1, 2), 3, 4); sink.finalize(); LOG.info("sink is {}", sink.toThrift()); diff --git a/fe/src/test/java/org/apache/doris/planner/StreamLoadScanNodeTest.java b/fe/src/test/java/org/apache/doris/planner/StreamLoadScanNodeTest.java index 0ce5b1850a6cc0..24cbe98c6b4bae 100644 --- a/fe/src/test/java/org/apache/doris/planner/StreamLoadScanNodeTest.java +++ b/fe/src/test/java/org/apache/doris/planner/StreamLoadScanNodeTest.java @@ -153,6 +153,11 @@ public void testNormal() throws UserException { StreamLoadScanNode scanNode = getStreamLoadScanNode(dstDesc, request); new Expectations() {{ dstTable.getBaseSchema(); result = columns; + dstTable.getFullSchema(); result = columns; + dstTable.getColumn("k1"); result = columns.get(0); + dstTable.getColumn("k2"); result = columns.get(1); + dstTable.getColumn("v1"); result = columns.get(2); + dstTable.getColumn("v2"); result = columns.get(3); }}; scanNode.init(analyzer); scanNode.finalize(analyzer); diff --git a/fe/src/test/java/org/apache/doris/qe/ShowExecutorTest.java b/fe/src/test/java/org/apache/doris/qe/ShowExecutorTest.java index 62ee101b1deceb..f89fae150002d8 100644 --- a/fe/src/test/java/org/apache/doris/qe/ShowExecutorTest.java +++ b/fe/src/test/java/org/apache/doris/qe/ShowExecutorTest.java @@ -95,7 +95,6 @@ public void setUp() throws Exception { // mock partition Partition partition = EasyMock.createMock(Partition.class); - EasyMock.expect(partition.getRollupIndices()).andReturn(Lists.newArrayList(index1, index2)).anyTimes(); EasyMock.expect(partition.getBaseIndex()).andReturn(index1).anyTimes(); EasyMock.replay(partition); diff --git a/fe/src/test/java/org/apache/doris/task/LoadEtlTaskTest.java b/fe/src/test/java/org/apache/doris/task/LoadEtlTaskTest.java index 32cd8fa9f7ac63..392d6ce243825e 100644 --- a/fe/src/test/java/org/apache/doris/task/LoadEtlTaskTest.java +++ b/fe/src/test/java/org/apache/doris/task/LoadEtlTaskTest.java @@ -20,6 +20,7 @@ import org.apache.doris.catalog.Catalog; import org.apache.doris.catalog.Database; import org.apache.doris.catalog.MaterializedIndex; +import org.apache.doris.catalog.MaterializedIndex.IndexExtState; import org.apache.doris.catalog.OlapTable; import org.apache.doris.catalog.Partition; import org.apache.doris.catalog.Tablet; @@ -175,7 +176,7 @@ public void testRunEtlTask() throws Exception { .getIdToPartitionLoadInfo().get(paritionId).getVersion()); int tabletNum = 0; Map tabletLoadInfos = job.getIdToTabletLoadInfo(); - for (MaterializedIndex olapTable : partition.getMaterializedIndices()) { + for (MaterializedIndex olapTable : partition.getMaterializedIndices(IndexExtState.ALL)) { for (Tablet tablet : olapTable.getTablets()) { ++tabletNum; Assert.assertTrue(tabletLoadInfos.containsKey(tablet.getId())); diff --git a/gensrc/thrift/AgentService.thrift b/gensrc/thrift/AgentService.thrift index a82f0b10caef25..3f6c906d90be70 100644 --- a/gensrc/thrift/AgentService.thrift +++ b/gensrc/thrift/AgentService.thrift @@ -65,7 +65,7 @@ struct TDropTabletReq { 2: optional Types.TSchemaHash schema_hash } -struct TAlterTabletReq{ +struct TAlterTabletReq { 1: required Types.TTabletId base_tablet_id 2: required Types.TSchemaHash base_schema_hash 3: required TCreateTabletReq new_tablet_req diff --git a/gensrc/thrift/Types.thrift b/gensrc/thrift/Types.thrift index c354e89390b725..b88a9de3814868 100644 --- a/gensrc/thrift/Types.thrift +++ b/gensrc/thrift/Types.thrift @@ -162,7 +162,8 @@ enum TTaskType { RECOVER_TABLET, STREAM_LOAD, UPDATE_TABLET_META_INFO, - ALTER_TASK + // this type of task will replace both ROLLUP and SCHEMA_CHANGE + ALTER } enum TStmtType {