diff --git a/be/CMakeLists.txt b/be/CMakeLists.txt index ebf121df8168a5..9c8358d60bb35b 100644 --- a/be/CMakeLists.txt +++ b/be/CMakeLists.txt @@ -331,7 +331,7 @@ set(CXX_GCC_FLAGS "-g -Wno-unused-local-typedefs") # Debug information is stored as dwarf2 to be as compatible as possible # -Werror: compile warnings should be errors when using the toolchain compiler. # Only enable for debug builds because this is what we test in pre-commit tests. -set(CXX_FLAGS_DEBUG "${CXX_GCC_FLAGS} -Werror -ggdb") +set(CXX_FLAGS_DEBUG "${CXX_GCC_FLAGS} -Werror -ggdb -O0 -gdwarf-2") # For CMAKE_BUILD_TYPE=Release # -O3: Enable all compiler optimizations @@ -476,6 +476,7 @@ set(DORIS_LINK_LIBS Exprs Gutil Olap + Rowset Runtime Service Udf @@ -604,8 +605,9 @@ add_subdirectory(${SRC_DIR}/olap) add_subdirectory(${SRC_DIR}/runtime) add_subdirectory(${SRC_DIR}/service) add_subdirectory(${SRC_DIR}/testutil) -add_subdirectory(${SRC_DIR}/tools) +#add_subdirectory(${SRC_DIR}/tools) add_subdirectory(${SRC_DIR}/udf) +add_subdirectory(${SRC_DIR}/tools) add_subdirectory(${SRC_DIR}/udf_samples) add_subdirectory(${SRC_DIR}/util) diff --git a/be/src/agent/CMakeLists.txt b/be/src/agent/CMakeLists.txt index 378890ca4dab2c..570c19de12013e 100644 --- a/be/src/agent/CMakeLists.txt +++ b/be/src/agent/CMakeLists.txt @@ -23,13 +23,10 @@ set(EXECUTABLE_OUTPUT_PATH "${BUILD_DIR}/src/agent") add_library(Agent STATIC agent_server.cpp - pusher.cpp heartbeat_server.cpp task_worker_pool.cpp utils.cpp cgroups_mgr.cpp topic_subscriber.cpp user_resource_listener.cpp -) - - +) \ No newline at end of file diff --git a/be/src/agent/agent_server.cpp b/be/src/agent/agent_server.cpp index 740a6e4de10a94..53b9dc4b60109e 100644 --- a/be/src/agent/agent_server.cpp +++ b/be/src/agent/agent_server.cpp @@ -35,6 +35,7 @@ #include "gen_cpp/MasterService_types.h" #include "gen_cpp/Status_types.h" #include "olap/utils.h" +#include "olap/snapshot_manager.h" #include "runtime/exec_env.h" #include "runtime/etl_job_mgr.h" #include "util/debug_util.h" @@ -71,11 +72,11 @@ AgentServer::AgentServer(ExecEnv* exec_env, } // init task worker pool - _create_table_workers = new TaskWorkerPool( + _create_tablet_workers = new TaskWorkerPool( TaskWorkerPool::TaskWorkerType::CREATE_TABLE, _exec_env, master_info); - _drop_table_workers = new TaskWorkerPool( + _drop_tablet_workers = new TaskWorkerPool( TaskWorkerPool::TaskWorkerType::DROP_TABLE, _exec_env, master_info); @@ -99,7 +100,7 @@ AgentServer::AgentServer(ExecEnv* exec_env, TaskWorkerPool::TaskWorkerType::DELETE, _exec_env, master_info); - _alter_table_workers = new TaskWorkerPool( + _alter_tablet_workers = new TaskWorkerPool( TaskWorkerPool::TaskWorkerType::ALTER_TABLE, _exec_env, master_info); @@ -111,10 +112,6 @@ AgentServer::AgentServer(ExecEnv* exec_env, TaskWorkerPool::TaskWorkerType::STORAGE_MEDIUM_MIGRATE, _exec_env, master_info); - _cancel_delete_data_workers = new TaskWorkerPool( - TaskWorkerPool::TaskWorkerType::CANCEL_DELETE_DATA, - _exec_env, - master_info); _check_consistency_workers = new TaskWorkerPool( TaskWorkerPool::TaskWorkerType::CHECK_CONSISTENCY, _exec_env, @@ -127,7 +124,7 @@ AgentServer::AgentServer(ExecEnv* exec_env, TaskWorkerPool::TaskWorkerType::REPORT_DISK_STATE, _exec_env, master_info); - _report_olap_table_workers = new TaskWorkerPool( + _report_tablet_workers = new TaskWorkerPool( TaskWorkerPool::TaskWorkerType::REPORT_OLAP_TABLE, _exec_env, master_info); @@ -156,21 +153,20 @@ AgentServer::AgentServer(ExecEnv* exec_env, _exec_env, master_info); #ifndef BE_TEST - _create_table_workers->start(); - _drop_table_workers->start(); + _create_tablet_workers->start(); + _drop_tablet_workers->start(); _push_workers->start(); _publish_version_workers->start(); _clear_alter_task_workers->start(); _clear_transaction_task_workers->start(); _delete_workers->start(); - _alter_table_workers->start(); + _alter_tablet_workers->start(); _clone_workers->start(); _storage_medium_migrate_workers->start(); - _cancel_delete_data_workers->start(); _check_consistency_workers->start(); _report_task_workers->start(); _report_disk_state_workers->start(); - _report_olap_table_workers->start(); + _report_tablet_workers->start(); _upload_workers->start(); _download_workers->start(); _make_snapshot_workers->start(); @@ -185,11 +181,11 @@ AgentServer::AgentServer(ExecEnv* exec_env, } AgentServer::~AgentServer() { - if (_create_table_workers != NULL) { - delete _create_table_workers; + if (_create_tablet_workers != NULL) { + delete _create_tablet_workers; } - if (_drop_table_workers != NULL) { - delete _drop_table_workers; + if (_drop_tablet_workers != NULL) { + delete _drop_tablet_workers; } if (_push_workers != NULL) { delete _push_workers; @@ -206,8 +202,8 @@ AgentServer::~AgentServer() { if (_delete_workers != NULL) { delete _delete_workers; } - if (_alter_table_workers != NULL) { - delete _alter_table_workers; + if (_alter_tablet_workers != NULL) { + delete _alter_tablet_workers; } if (_clone_workers != NULL) { delete _clone_workers; @@ -215,9 +211,6 @@ AgentServer::~AgentServer() { if (_storage_medium_migrate_workers != NULL) { delete _storage_medium_migrate_workers; } - if (_cancel_delete_data_workers != NULL) { - delete _cancel_delete_data_workers; - } if (_check_consistency_workers != NULL) { delete _check_consistency_workers; } @@ -227,8 +220,8 @@ AgentServer::~AgentServer() { if (_report_disk_state_workers != NULL) { delete _report_disk_state_workers; } - if (_report_olap_table_workers != NULL) { - delete _report_olap_table_workers; + if (_report_tablet_workers != NULL) { + delete _report_tablet_workers; } if (_upload_workers != NULL) { delete _upload_workers; @@ -277,14 +270,14 @@ void AgentServer::submit_tasks( switch (task_type) { case TTaskType::CREATE: if (task.__isset.create_tablet_req) { - _create_table_workers->submit_task(task); + _create_tablet_workers->submit_task(task); } else { status_code = TStatusCode::ANALYSIS_ERROR; } break; case TTaskType::DROP: if (task.__isset.drop_tablet_req) { - _drop_table_workers->submit_task(task); + _drop_tablet_workers->submit_task(task); } else { status_code = TStatusCode::ANALYSIS_ERROR; } @@ -328,7 +321,7 @@ void AgentServer::submit_tasks( case TTaskType::ROLLUP: case TTaskType::SCHEMA_CHANGE: if (task.__isset.alter_tablet_req) { - _alter_table_workers->submit_task(task); + _alter_tablet_workers->submit_task(task); } else { status_code = TStatusCode::ANALYSIS_ERROR; } @@ -347,13 +340,6 @@ void AgentServer::submit_tasks( status_code = TStatusCode::ANALYSIS_ERROR; } break; - case TTaskType::CANCEL_DELETE: - if (task.__isset.cancel_delete_data_req) { - _cancel_delete_data_workers->submit_task(task); - } else { - status_code = TStatusCode::ANALYSIS_ERROR; - } - break; case TTaskType::CHECK_CONSISTENCY: if (task.__isset.check_consistency_req) { _check_consistency_workers->submit_task(task); @@ -423,10 +409,10 @@ void AgentServer::make_snapshot(TAgentResult& return_value, TStatus status; vector error_msgs; TStatusCode::type status_code = TStatusCode::OK; - + return_value.__set_snapshot_version(PREFERRED_SNAPSHOT_VERSION); string snapshot_path; OLAPStatus make_snapshot_status = - _exec_env->olap_engine()->make_snapshot(snapshot_request, &snapshot_path); + SnapshotManager::instance()->make_snapshot(snapshot_request, &snapshot_path); if (make_snapshot_status != OLAP_SUCCESS) { status_code = TStatusCode::RUNTIME_ERROR; OLAP_LOG_WARNING("make_snapshot failed. tablet_id: %ld, schema_hash: %ld, status: %d", @@ -453,7 +439,7 @@ void AgentServer::release_snapshot(TAgentResult& return_value, const std::string TStatusCode::type status_code = TStatusCode::OK; OLAPStatus release_snapshot_status = - _exec_env->olap_engine()->release_snapshot(snapshot_path); + SnapshotManager::instance()->release_snapshot(snapshot_path); if (release_snapshot_status != OLAP_SUCCESS) { status_code = TStatusCode::RUNTIME_ERROR; LOG(WARNING) << "release_snapshot failed. snapshot_path: " << snapshot_path << ", status: " << release_snapshot_status; diff --git a/be/src/agent/agent_server.h b/be/src/agent/agent_server.h index f1e584d88d952d..aa74c075123425 100644 --- a/be/src/agent/agent_server.h +++ b/be/src/agent/agent_server.h @@ -93,21 +93,20 @@ class AgentServer { ExecEnv* _exec_env; const TMasterInfo& _master_info; - TaskWorkerPool* _create_table_workers; - TaskWorkerPool* _drop_table_workers; + TaskWorkerPool* _create_tablet_workers; + TaskWorkerPool* _drop_tablet_workers; TaskWorkerPool* _push_workers; TaskWorkerPool* _publish_version_workers; TaskWorkerPool* _clear_alter_task_workers; TaskWorkerPool* _clear_transaction_task_workers; TaskWorkerPool* _delete_workers; - TaskWorkerPool* _alter_table_workers; + TaskWorkerPool* _alter_tablet_workers; TaskWorkerPool* _clone_workers; TaskWorkerPool* _storage_medium_migrate_workers; - TaskWorkerPool* _cancel_delete_data_workers; TaskWorkerPool* _check_consistency_workers; TaskWorkerPool* _report_task_workers; TaskWorkerPool* _report_disk_state_workers; - TaskWorkerPool* _report_olap_table_workers; + TaskWorkerPool* _report_tablet_workers; TaskWorkerPool* _upload_workers; TaskWorkerPool* _download_workers; TaskWorkerPool* _make_snapshot_workers; diff --git a/be/src/agent/cgroups_mgr.cpp b/be/src/agent/cgroups_mgr.cpp index 66d763472f9c8d..8a2faac2f95115 100644 --- a/be/src/agent/cgroups_mgr.cpp +++ b/be/src/agent/cgroups_mgr.cpp @@ -27,8 +27,8 @@ #include #include "boost/filesystem.hpp" #include "common/logging.h" -#include "olap/store.h" -#include "olap/olap_engine.h" +#include "olap/data_dir.h" +#include "olap/storage_engine.h" #include "runtime/exec_env.h" #include "runtime/load_path_mgr.h" @@ -188,7 +188,7 @@ AgentStatus CgroupsMgr::_config_disk_throttle(std::string user_name, } // add olap engine data path here - auto stores = OLAPEngine::get_instance()->get_stores(); + auto stores = StorageEngine::instance()->get_stores(); // buld load data path, it is alreay in data path // _exec_env->load_path_mgr()->get_load_data_path(&data_paths); diff --git a/be/src/agent/heartbeat_server.cpp b/be/src/agent/heartbeat_server.cpp index 35f67f8ddeb337..5e04c99cb899f1 100644 --- a/be/src/agent/heartbeat_server.cpp +++ b/be/src/agent/heartbeat_server.cpp @@ -25,7 +25,7 @@ #include "common/status.h" #include "gen_cpp/HeartbeatService.h" #include "gen_cpp/Status_types.h" -#include "olap/olap_engine.h" +#include "olap/storage_engine.h" #include "olap/utils.h" #include "service/backend_options.h" #include "util/thrift_server.h" @@ -41,7 +41,7 @@ namespace doris { HeartbeatServer::HeartbeatServer(TMasterInfo* master_info) : _master_info(master_info), _epoch(0) { - _olap_engine = OLAPEngine::get_instance(); + _olap_engine = StorageEngine::instance(); } void HeartbeatServer::init_cluster_id() { diff --git a/be/src/agent/heartbeat_server.h b/be/src/agent/heartbeat_server.h index 5b3165dcb07739..3df17f06b009b0 100644 --- a/be/src/agent/heartbeat_server.h +++ b/be/src/agent/heartbeat_server.h @@ -31,7 +31,7 @@ namespace doris { const uint32_t HEARTBEAT_INTERVAL = 10; -class OLAPEngine; +class StorageEngine; class Status; class ThriftServer; @@ -55,7 +55,7 @@ class HeartbeatServer : public HeartbeatServiceIf { Status _heartbeat( const TMasterInfo& master_info); - OLAPEngine* _olap_engine; + StorageEngine* _olap_engine; // mutex to protect master_info and _epoch std::mutex _hb_mtx; diff --git a/be/src/agent/task_worker_pool.cpp b/be/src/agent/task_worker_pool.cpp index 2719090a1e8d52..1f635ff201fb51 100644 --- a/be/src/agent/task_worker_pool.cpp +++ b/be/src/agent/task_worker_pool.cpp @@ -31,16 +31,23 @@ #include "boost/filesystem.hpp" #include "boost/lexical_cast.hpp" -#include "agent/pusher.h" #include "agent/status.h" #include "agent/utils.h" #include "gen_cpp/FrontendService.h" #include "gen_cpp/Types_types.h" #include "http/http_client.h" #include "olap/olap_common.h" -#include "olap/olap_engine.h" -#include "olap/olap_table.h" -#include "olap/store.h" +#include "olap/storage_engine.h" +#include "olap/tablet.h" +#include "olap/data_dir.h" +#include "olap/snapshot_manager.h" +#include "olap/task/engine_checksum_task.h" +#include "olap/task/engine_clear_alter_task.h" +#include "olap/task/engine_clone_task.h" +#include "olap/task/engine_schema_change_task.h" +#include "olap/task/engine_batch_load_task.h" +#include "olap/task/engine_storage_migration_task.h" +#include "olap/task/engine_publish_version_task.h" #include "olap/utils.h" #include "common/resource_tls.h" #include "common/status.h" @@ -64,20 +71,11 @@ using std::vector; namespace doris { -const uint32_t DOWNLOAD_FILE_MAX_RETRY = 3; const uint32_t TASK_FINISH_MAX_RETRY = 3; -const uint32_t PUSH_MAX_RETRY = 1; const uint32_t PUBLISH_VERSION_MAX_RETRY = 3; const uint32_t REPORT_TASK_WORKER_COUNT = 1; const uint32_t REPORT_DISK_STATE_WORKER_COUNT = 1; const uint32_t REPORT_OLAP_TABLE_WORKER_COUNT = 1; -const uint32_t LIST_REMOTE_FILE_TIMEOUT = 15; -const std::string HTTP_REQUEST_PREFIX = "/api/_tablet/_download?"; -const std::string HTTP_REQUEST_TOKEN_PARAM = "token="; -const std::string HTTP_REQUEST_FILE_PARAM = "&file="; - -const uint32_t GET_LENGTH_TIMEOUT = 10; -const uint32_t CURL_OPT_CONNECTTIMEOUT = 120; std::atomic_ulong TaskWorkerPool::_s_report_version(time(NULL) * 10000); Mutex TaskWorkerPool::_s_task_signatures_lock; @@ -118,12 +116,12 @@ void TaskWorkerPool::start() { // Init task pool and task workers switch (_task_worker_type) { case TaskWorkerType::CREATE_TABLE: - _worker_count = config::create_table_worker_count; - _callback_function = _create_table_worker_thread_callback; + _worker_count = config::create_tablet_worker_count; + _callback_function = _create_tablet_worker_thread_callback; break; case TaskWorkerType::DROP_TABLE: - _worker_count = config::drop_table_worker_count; - _callback_function = _drop_table_worker_thread_callback; + _worker_count = config::drop_tablet_worker_count; + _callback_function = _drop_tablet_worker_thread_callback; break; case TaskWorkerType::PUSH: case TaskWorkerType::REALTIME_PUSH: @@ -148,8 +146,8 @@ void TaskWorkerPool::start() { _callback_function = _push_worker_thread_callback; break; case TaskWorkerType::ALTER_TABLE: - _worker_count = config::alter_table_worker_count; - _callback_function = _alter_table_worker_thread_callback; + _worker_count = config::alter_tablet_worker_count; + _callback_function = _alter_tablet_worker_thread_callback; break; case TaskWorkerType::CLONE: _worker_count = config::clone_worker_count; @@ -159,10 +157,6 @@ void TaskWorkerPool::start() { _worker_count = config::storage_medium_migrate_count; _callback_function = _storage_medium_migrate_worker_thread_callback; break; - case TaskWorkerType::CANCEL_DELETE_DATA: - _worker_count = config::cancel_delete_data_worker_count; - _callback_function = _cancel_delete_data_worker_thread_callback; - break; case TaskWorkerType::CHECK_CONSISTENCY: _worker_count = config::check_consistency_worker_count; _callback_function = _check_consistency_worker_thread_callback; @@ -177,7 +171,7 @@ void TaskWorkerPool::start() { break; case TaskWorkerType::REPORT_OLAP_TABLE: _worker_count = REPORT_OLAP_TABLE_WORKER_COUNT; - _callback_function = _report_olap_table_worker_thread_callback; + _callback_function = _report_tablet_worker_thread_callback; break; case TaskWorkerType::UPLOAD: _worker_count = config::upload_worker_count; @@ -223,14 +217,13 @@ void TaskWorkerPool::submit_task(const TAgentTaskRequest& task) { if (task.__isset.resource_info) { user = task.resource_info.user; } - bool ret = _record_task_info(task_type, signature, user); if (ret == true) { - { - lock_guard worker_thread_lock(_worker_thread_lock); - _tasks.push_back(task); - _worker_thread_condition_lock.notify(); - } + lock_guard worker_thread_lock(_worker_thread_lock); + // set the task receive time + (const_cast(task)).__set_recv_time(time(nullptr)); + _tasks.push_back(task); + _worker_thread_condition_lock.notify(); } } @@ -253,7 +246,7 @@ bool TaskWorkerPool::_record_task_info( signature_set.insert(signature); LOG(INFO) << "type: " << task_name << ", signature: " << signature << ", has been inserted" - << ". queue size: " << signature_set.size(); + << ", queue size: " << signature_set.size(); if (task_type == TTaskType::PUSH) { _s_total_task_user_count[task_type][user] += 1; _s_total_task_count[task_type] += 1; @@ -412,7 +405,7 @@ uint32_t TaskWorkerPool::_get_next_task_index( return index; } -void* TaskWorkerPool::_create_table_worker_thread_callback(void* arg_this) { +void* TaskWorkerPool::_create_tablet_worker_thread_callback(void* arg_this) { TaskWorkerPool* worker_pool_this = (TaskWorkerPool*)arg_this; #ifndef BE_TEST @@ -436,7 +429,7 @@ void* TaskWorkerPool::_create_table_worker_thread_callback(void* arg_this) { TStatus task_status; OLAPStatus create_status = - worker_pool_this->_env->olap_engine()->create_table(create_tablet_req); + worker_pool_this->_env->storage_engine()->create_tablet(create_tablet_req); if (create_status != OLAPStatus::OLAP_SUCCESS) { OLAP_LOG_WARNING("create table failed. status: %d, signature: %ld", create_status, agent_task_req.signature); @@ -464,7 +457,7 @@ void* TaskWorkerPool::_create_table_worker_thread_callback(void* arg_this) { return (void*)0; } -void* TaskWorkerPool::_drop_table_worker_thread_callback(void* arg_this) { +void* TaskWorkerPool::_drop_tablet_worker_thread_callback(void* arg_this) { TaskWorkerPool* worker_pool_this = (TaskWorkerPool*)arg_this; #ifndef BE_TEST @@ -486,13 +479,19 @@ void* TaskWorkerPool::_drop_table_worker_thread_callback(void* arg_this) { TStatusCode::type status_code = TStatusCode::OK; vector error_msgs; TStatus task_status; - - AgentStatus status = worker_pool_this->_drop_table(drop_tablet_req); - if (status != DORIS_SUCCESS) { - OLAP_LOG_WARNING( - "drop table failed! signature: %ld", agent_task_req.signature); - error_msgs.push_back("drop table failed!"); - status_code = TStatusCode::RUNTIME_ERROR; + TabletSharedPtr dropped_tablet = StorageEngine::instance()->tablet_manager()->get_tablet( + drop_tablet_req.tablet_id, drop_tablet_req.schema_hash); + if (dropped_tablet != nullptr) { + OLAPStatus drop_status = StorageEngine::instance()->tablet_manager()->drop_tablet( + drop_tablet_req.tablet_id, drop_tablet_req.schema_hash); + if (drop_status != OLAP_SUCCESS ) { + LOG(WARNING) << "drop table failed! signature: " << agent_task_req.signature; + error_msgs.push_back("drop table failed!"); + status_code = TStatusCode::RUNTIME_ERROR; + } + // if tablet is dropped by fe, then the related txn should also be removed + StorageEngine::instance()->txn_manager()->force_rollback_tablet_related_txns(dropped_tablet->data_dir()->get_meta(), + drop_tablet_req.tablet_id, drop_tablet_req.schema_hash, dropped_tablet->tablet_uid()); } task_status.__set_status_code(status_code); task_status.__set_error_msgs(error_msgs); @@ -511,7 +510,7 @@ void* TaskWorkerPool::_drop_table_worker_thread_callback(void* arg_this) { return (void*)0; } -void* TaskWorkerPool::_alter_table_worker_thread_callback(void* arg_this) { +void* TaskWorkerPool::_alter_tablet_worker_thread_callback(void* arg_this) { TaskWorkerPool* worker_pool_this = (TaskWorkerPool*)arg_this; #ifndef BE_TEST @@ -533,23 +532,33 @@ void* TaskWorkerPool::_alter_table_worker_thread_callback(void* arg_this) { CgroupsMgr::apply_system_cgroup(); int64_t signatrue = agent_task_req.signature; LOG(INFO) << "get alter table task, signature: " << agent_task_req.signature; - - TFinishTaskRequest finish_task_request; - TTaskType::type task_type = agent_task_req.task_type; - switch (task_type) { - case TTaskType::SCHEMA_CHANGE: - case TTaskType::ROLLUP: - worker_pool_this->_alter_table(alter_tablet_request, - signatrue, - task_type, - &finish_task_request); - break; - default: - // pass - break; + bool is_task_timeout = false; + if (agent_task_req.__isset.recv_time) { + int64_t time_elapsed = time(nullptr) - agent_task_req.recv_time; + if (time_elapsed > config::report_task_interval_seconds * 20) { + LOG(INFO) << "task elapsed " << time_elapsed + << " since it is inserted to queue, it is timeout"; + is_task_timeout = true; + } + } + if (!is_task_timeout) { + TFinishTaskRequest finish_task_request; + TTaskType::type task_type = agent_task_req.task_type; + switch (task_type) { + case TTaskType::SCHEMA_CHANGE: + case TTaskType::ROLLUP: + worker_pool_this->_alter_tablet(worker_pool_this, + alter_tablet_request, + signatrue, + task_type, + &finish_task_request); + break; + default: + // pass + break; + } + worker_pool_this->_finish_task(finish_task_request); } - - worker_pool_this->_finish_task(finish_task_request); worker_pool_this->_remove_task_info(agent_task_req.task_type, agent_task_req.signature, ""); #ifndef BE_TEST } @@ -557,7 +566,8 @@ void* TaskWorkerPool::_alter_table_worker_thread_callback(void* arg_this) { return (void*)0; } -void TaskWorkerPool::_alter_table( +void TaskWorkerPool::_alter_tablet( + TaskWorkerPool* worker_pool_this, const TAlterTabletReq& alter_tablet_request, int64_t signature, const TTaskType::type task_type, @@ -583,58 +593,16 @@ void TaskWorkerPool::_alter_table( break; } - TTabletId base_tablet_id = alter_tablet_request.base_tablet_id; - TSchemaHash base_schema_hash = alter_tablet_request.base_schema_hash; - // Check last schema change status, if failed delete tablet file // Do not need to adjust delete success or not // Because if delete failed create rollup will failed if (status == DORIS_SUCCESS) { - // Check lastest schema change status - AlterTableStatus alter_table_status = _show_alter_table_status( - base_tablet_id, - base_schema_hash); - LOG(INFO) << "get alter table status:" << alter_table_status - << ", signature:" << signature; - - // Delete failed alter table tablet file - if (alter_table_status == ALTER_TABLE_FAILED) { - TDropTabletReq drop_tablet_req; - drop_tablet_req.__set_tablet_id(alter_tablet_request.new_tablet_req.tablet_id); - drop_tablet_req.__set_schema_hash(alter_tablet_request.new_tablet_req.tablet_schema.schema_hash); - status = _drop_table(drop_tablet_req); - - if (status != DORIS_SUCCESS) { - OLAP_LOG_WARNING("delete failed rollup file failed, status: %d, " - "signature: %ld.", - status, signature); - error_msgs.push_back("delete failed rollup file failed, " - "signature: " + to_string(signature)); - } - } - - if (status == DORIS_SUCCESS) { - if (alter_table_status == ALTER_TABLE_FINISHED - || alter_table_status == ALTER_TABLE_FAILED - || alter_table_status == ALTER_TABLE_WAITING) { - // Create rollup table - OLAPStatus ret = OLAPStatus::OLAP_SUCCESS; - switch (task_type) { - case TTaskType::ROLLUP: - ret = _env->olap_engine()->create_rollup_table(alter_tablet_request); - break; - case TTaskType::SCHEMA_CHANGE: - ret = _env->olap_engine()->schema_change(alter_tablet_request); - break; - default: - // pass - break; - } - if (ret != OLAPStatus::OLAP_SUCCESS) { - status = DORIS_ERROR; - LOG(WARNING) << process_name << " failed. signature: " << signature << " status: " << status; - } - } + EngineSchemaChangeTask engine_task(alter_tablet_request, signature, task_type, &error_msgs, process_name); + OLAPStatus sc_status = worker_pool_this->_env->storage_engine()->execute_task(&engine_task); + if (sc_status != OLAP_SUCCESS) { + status = DORIS_ERROR; + } else { + status = DORIS_SUCCESS; } } @@ -659,12 +627,10 @@ void TaskWorkerPool::_alter_table( &tablet_info); if (status != DORIS_SUCCESS) { - OLAP_LOG_WARNING("%s success, but get new tablet info failed." - "tablet_id: %ld, schema_hash: %ld, signature: %ld.", - process_name.c_str(), - alter_tablet_request.new_tablet_req.tablet_id, - alter_tablet_request.new_tablet_req.tablet_schema.schema_hash, - signature); + LOG(WARNING) << process_name<< " success, but get new tablet info failed." + << "tablet_id: " << alter_tablet_request.new_tablet_req.tablet_id + << ", schema_hash: " << alter_tablet_request.new_tablet_req.tablet_schema.schema_hash + << ", signature: " << signature; } else { finish_tablet_infos.push_back(tablet_info); } @@ -676,8 +642,8 @@ void TaskWorkerPool::_alter_table( error_msgs.push_back(process_name + " success"); task_status.__set_status_code(TStatusCode::OK); } else if (status == DORIS_TASK_REQUEST_ERROR) { - OLAP_LOG_WARNING("alter table request task type invalid. " - "signature: %ld", signature); + LOG(WARNING) << "alter table request task type invalid. " + << "signature:" << signature; error_msgs.push_back("alter table request new tablet id or schema count invalid."); task_status.__set_status_code(TStatusCode::ANALYSIS_ERROR); } else { @@ -752,49 +718,9 @@ void* TaskWorkerPool::_push_worker_thread_callback(void* arg_this) { LOG(INFO) << "get push task. signature: " << agent_task_req.signature << " user: " << user << " priority: " << priority; vector tablet_infos; - if (push_req.push_type == TPushType::LOAD || push_req.push_type == TPushType::LOAD_DELETE) { -#ifndef BE_TEST - Pusher pusher(worker_pool_this->_env->olap_engine(), push_req); - status = pusher.init(); -#else - status = worker_pool_this->_pusher->init(); -#endif - - if (status == DORIS_SUCCESS) { - uint32_t retry_time = 0; - while (retry_time < PUSH_MAX_RETRY) { -#ifndef BE_TEST - status = pusher.process(&tablet_infos); -#else - status = worker_pool_this->_pusher->process(&tablet_infos); -#endif - if (status == DORIS_PUSH_HAD_LOADED) { - OLAP_LOG_WARNING("transaction exists when realtime push, " - "but unfinished, do not report to fe, signature: %ld", - agent_task_req.signature); - break; // not retry any more - } - // Internal error, need retry - if (status == DORIS_ERROR) { - OLAP_LOG_WARNING("push internal error, need retry.signature: %ld", - agent_task_req.signature); - retry_time += 1; - } else { - break; - } - } - } - } else if (push_req.push_type == TPushType::DELETE) { - OLAPStatus delete_data_status = - worker_pool_this->_env->olap_engine()->delete_data(push_req, &tablet_infos); - if (delete_data_status != OLAPStatus::OLAP_SUCCESS) { - OLAP_LOG_WARNING("delete data failed. status: %d, signature: %ld", - delete_data_status, agent_task_req.signature); - status = DORIS_ERROR; - } - } else { - status = DORIS_TASK_REQUEST_ERROR; - } + + EngineBatchLoadTask engine_task(push_req, &tablet_infos, agent_task_req.signature, &status); + worker_pool_this->_env->storage_engine()->execute_task(&engine_task); #ifndef BE_TEST if (status == DORIS_PUSH_HAD_LOADED) { @@ -881,8 +807,8 @@ void* TaskWorkerPool::_publish_version_worker_thread_callback(void* arg_this) { OLAPStatus res = OLAP_SUCCESS; while (retry_time < PUBLISH_VERSION_MAX_RETRY) { error_tablet_ids.clear(); - res = worker_pool_this->_env->olap_engine()->publish_version( - publish_version_req, &error_tablet_ids); + EnginePublishVersionTask engine_task(publish_version_req, &error_tablet_ids); + res = worker_pool_this->_env->storage_engine()->execute_task(&engine_task); if (res == OLAP_SUCCESS) { break; } else { @@ -896,8 +822,10 @@ void* TaskWorkerPool::_publish_version_worker_thread_callback(void* arg_this) { TFinishTaskRequest finish_task_request; if (res != OLAP_SUCCESS) { + // if publish failed, return failed, fe will ignore this error and + // check error tablet ids and fe will also republish this task status_code = TStatusCode::RUNTIME_ERROR; - OLAP_LOG_WARNING("publish version failed. signature: %ld", agent_task_req.signature); + LOG(WARNING) << "publish version failed. signature:" << agent_task_req.signature; error_msgs.push_back("publish version failed"); finish_task_request.__set_error_tablet_ids(error_tablet_ids); DorisMetrics::publish_task_failed_total.increment(1); @@ -946,9 +874,8 @@ void* TaskWorkerPool::_clear_alter_task_worker_thread_callback(void* arg_this) { TStatusCode::type status_code = TStatusCode::OK; vector error_msgs; TStatus task_status; - - OLAPStatus clear_status = worker_pool_this->_env->olap_engine()-> - clear_alter_task(clear_alter_task_req.tablet_id, clear_alter_task_req.schema_hash); + EngineClearAlterTask engine_task(clear_alter_task_req); + OLAPStatus clear_status = worker_pool_this->_env->storage_engine()->execute_task(&engine_task); if (clear_status != OLAPStatus::OLAP_SUCCESS) { OLAP_LOG_WARNING("clear alter task failed. [signature: %ld status=%d]", agent_task_req.signature, clear_status); @@ -1000,7 +927,7 @@ void* TaskWorkerPool::_clear_transaction_task_worker_thread_callback(void* arg_t vector error_msgs; TStatus task_status; - worker_pool_this->_env->olap_engine()->clear_transaction_task( + worker_pool_this->_env->storage_engine()->clear_transaction_task( clear_transaction_task_req.transaction_id, clear_transaction_task_req.partition_id); LOG(INFO) << "finish to clear transaction task. signature:" << agent_task_req.signature << ", transaction_id:" << clear_transaction_task_req.transaction_id; @@ -1049,213 +976,12 @@ void* TaskWorkerPool::_clone_worker_thread_callback(void* arg_this) { LOG(INFO) << "get clone task. signature:" << agent_task_req.signature; vector error_msgs; - string src_file_path; - TBackend src_host; - // Check local tablet exist or not - OLAPTablePtr tablet = - worker_pool_this->_env->olap_engine()->get_table( - clone_req.tablet_id, clone_req.schema_hash); - - int64_t copy_size = 0; - int64_t copy_time_ms = 0; - if (tablet.get() != NULL) { - LOG(INFO) << "clone tablet exist yet, begin to incremental clone. " - << "signature:" << agent_task_req.signature - << ", tablet_id:" << clone_req.tablet_id - << ", schema_hash:" << clone_req.schema_hash - << ", committed_version:" << clone_req.committed_version; - - // try to incremental clone - vector missing_versions; - string local_data_path = worker_pool_this->_env->olap_engine()-> - get_info_before_incremental_clone(tablet, clone_req.committed_version, &missing_versions); - - bool allow_incremental_clone = false; - status = worker_pool_this->_clone_copy(clone_req, - agent_task_req.signature, - local_data_path, - &src_host, - &src_file_path, - &error_msgs, - &missing_versions, - &allow_incremental_clone, - ©_size, - ©_time_ms); - if (status == DORIS_SUCCESS) { - OLAPStatus olap_status = worker_pool_this->_env->olap_engine()-> - finish_clone(tablet, local_data_path, clone_req.committed_version, allow_incremental_clone); - if (olap_status != OLAP_SUCCESS) { - LOG(WARNING) << "failed to finish incremental clone. [table=" << tablet->full_name() - << " res=" << olap_status << "]"; - error_msgs.push_back("incremental clone error."); - status = DORIS_ERROR; - } - } else { - // begin to full clone if incremental failed - LOG(INFO) << "begin to full clone. [table=" << tablet->full_name(); - status = worker_pool_this->_clone_copy(clone_req, - agent_task_req.signature, - local_data_path, - &src_host, - &src_file_path, - &error_msgs, - NULL, NULL, - ©_size, - ©_time_ms); - if (status == DORIS_SUCCESS) { - LOG(INFO) << "download successfully when full clone. [table=" << tablet->full_name() - << " src_host=" << src_host.host << " src_file_path=" << src_file_path - << " local_data_path=" << local_data_path << "]"; - - OLAPStatus olap_status = worker_pool_this->_env->olap_engine()-> - finish_clone(tablet, local_data_path, clone_req.committed_version, false); - - if (olap_status != OLAP_SUCCESS) { - LOG(WARNING) << "fail to finish full clone. [table=" << tablet->full_name() - << " res=" << olap_status << "]"; - error_msgs.push_back("full clone error."); - status = DORIS_ERROR; - } - } - } - } else { // create a new tablet - // Get local disk from olap - string local_shard_root_path; - OlapStore* store = nullptr; - OLAPStatus olap_status = OLAP_ERR_OTHER_ERROR; - if (clone_req.__isset.task_version && clone_req.task_version == 2) { - // use path specified in clone request - olap_status = worker_pool_this->_env->olap_engine()->obtain_shard_path_by_hash( - clone_req.dest_path_hash, &local_shard_root_path, &store); - } - - // if failed to get path by hash, or path hash is not specified, get arbitrary one - if (olap_status != OLAP_SUCCESS || clone_req.task_version == 1) { - olap_status = worker_pool_this->_env->olap_engine()->obtain_shard_path( - clone_req.storage_medium, &local_shard_root_path, &store); - } - - if (olap_status != OLAP_SUCCESS) { - OLAP_LOG_WARNING("clone get local root path failed. signature: %ld", - agent_task_req.signature); - error_msgs.push_back("clone get local root path failed."); - status = DORIS_ERROR; - } - - if (status == DORIS_SUCCESS) { - stringstream tablet_dir_stream; - tablet_dir_stream << local_shard_root_path - << "/" << clone_req.tablet_id - << "/" << clone_req.schema_hash; - status = worker_pool_this->_clone_copy(clone_req, - agent_task_req.signature, - tablet_dir_stream.str(), - &src_host, - &src_file_path, - &error_msgs, - NULL, NULL, - ©_size, - ©_time_ms); - } - - if (status == DORIS_SUCCESS) { - LOG(INFO) << "clone copy done. src_host: " << src_host.host - << " src_file_path: " << src_file_path; - // Load header - OLAPStatus load_header_status = - worker_pool_this->_env->olap_engine()->load_header( - store, - local_shard_root_path, - clone_req.tablet_id, - clone_req.schema_hash); - if (load_header_status != OLAP_SUCCESS) { - LOG(WARNING) << "load header failed. local_shard_root_path: '" << local_shard_root_path - << "' schema_hash: " << clone_req.schema_hash << ". status: " << load_header_status - << ". signature: " << agent_task_req.signature; - error_msgs.push_back("load header failed."); - status = DORIS_ERROR; - } - } - -#ifndef BE_TEST - // Clean useless dir, if failed, ignore it. - if (status != DORIS_SUCCESS && status != DORIS_CREATE_TABLE_EXIST) { - stringstream local_data_path_stream; - local_data_path_stream << local_shard_root_path - << "/" << clone_req.tablet_id; - string local_data_path = local_data_path_stream.str(); - LOG(INFO) << "clone failed. want to delete local dir: " << local_data_path - << ". signature: " << agent_task_req.signature; - try { - boost::filesystem::path local_path(local_data_path); - if (boost::filesystem::exists(local_path)) { - boost::filesystem::remove_all(local_path); - } - } catch (boost::filesystem::filesystem_error e) { - // Ignore the error, OLAP will delete it - OLAP_LOG_WARNING("clone delete useless dir failed. " - "error: %s, local dir: %s, signature: %ld", - e.what(), local_data_path.c_str(), - agent_task_req.signature); - } - } -#endif - } - - // Get clone tablet info vector tablet_infos; - if (status == DORIS_SUCCESS || status == DORIS_CREATE_TABLE_EXIST) { - TTabletInfo tablet_info; - AgentStatus get_tablet_info_status = worker_pool_this->_get_tablet_info( - clone_req.tablet_id, - clone_req.schema_hash, - agent_task_req.signature, - &tablet_info); - if (get_tablet_info_status != DORIS_SUCCESS) { - OLAP_LOG_WARNING("clone success, but get tablet info failed." - "tablet id: %ld, schema hash: %ld, signature: %ld", - clone_req.tablet_id, clone_req.schema_hash, - agent_task_req.signature); - error_msgs.push_back("clone success, but get tablet info failed."); - status = DORIS_ERROR; - } else if ( - (clone_req.__isset.committed_version - && clone_req.__isset.committed_version_hash) - && (tablet_info.version < clone_req.committed_version || - (tablet_info.version == clone_req.committed_version - && tablet_info.version_hash != clone_req.committed_version_hash))) { - - // we need to check if this cloned table's version is what we expect. - // if not, maybe this is a stale remaining table which is waiting for drop. - // we drop it. - LOG(INFO) << "begin to drop the stale table. tablet_id:" << clone_req.tablet_id - << ", schema_hash:" << clone_req.schema_hash - << ", signature:" << agent_task_req.signature - << ", version:" << tablet_info.version - << ", version_hash:" << tablet_info.version_hash - << ", expected_version: " << clone_req.committed_version - << ", version_hash:" << clone_req.committed_version_hash; - TDropTabletReq drop_req; - drop_req.tablet_id = clone_req.tablet_id; - drop_req.schema_hash = clone_req.schema_hash; - AgentStatus drop_status = worker_pool_this->_drop_table(drop_req); - if (drop_status != DORIS_SUCCESS) { - // just log - OLAP_LOG_WARNING( - "drop stale cloned table failed! tabelt id: %ld", clone_req.tablet_id); - } - - status = DORIS_ERROR; - } else { - LOG(INFO) << "clone get tablet info success. tablet_id:" << clone_req.tablet_id - << ", schema_hash:" << clone_req.schema_hash - << ", signature:" << agent_task_req.signature - << ", version:" << tablet_info.version - << ", version_hash:" << tablet_info.version_hash; - tablet_infos.push_back(tablet_info); - } - } - + EngineCloneTask engine_task(clone_req, worker_pool_this->_master_info, + agent_task_req.signature, + &error_msgs, &tablet_infos, + &status); + worker_pool_this->_env->storage_engine()->execute_task(&engine_task); // Return result to fe TStatus task_status; TFinishTaskRequest finish_task_request; @@ -1279,9 +1005,6 @@ void* TaskWorkerPool::_clone_worker_thread_callback(void* arg_this) { task_status.__set_error_msgs(error_msgs); finish_task_request.__set_task_status(task_status); - finish_task_request.__set_copy_size(copy_size); - finish_task_request.__set_copy_time_ms(copy_time_ms); - worker_pool_this->_finish_task(finish_task_request); worker_pool_this->_remove_task_info(agent_task_req.task_type, agent_task_req.signature, ""); #ifndef BE_TEST @@ -1291,269 +1014,6 @@ void* TaskWorkerPool::_clone_worker_thread_callback(void* arg_this) { return (void*)0; } -AgentStatus TaskWorkerPool::_clone_copy( - const TCloneReq& clone_req, - int64_t signature, - const string& local_data_path, - TBackend* src_host, - string* src_file_path, - vector* error_msgs, - const vector* missing_versions, - bool* allow_incremental_clone, - int64_t* copy_size, - int64_t* copy_time_ms) { - AgentStatus status = DORIS_SUCCESS; - - std::string token = _master_info.token; - for (auto& src_backend : clone_req.src_backends) { - stringstream http_host_stream; - http_host_stream << "http://" << src_backend.host << ":" << src_backend.http_port; - string http_host = http_host_stream.str(); - // Make snapshot in remote olap engine - *src_host = src_backend; -#ifndef BE_TEST - AgentServerClient agent_client(*src_host); -#endif - TAgentResult make_snapshot_result; - status = DORIS_SUCCESS; - - LOG(INFO) << "pre make snapshot. backend_ip: " << src_host->host; - TSnapshotRequest snapshot_request; - snapshot_request.__set_tablet_id(clone_req.tablet_id); - snapshot_request.__set_schema_hash(clone_req.schema_hash); - if (missing_versions != NULL) { - // TODO: missing version composed of singleton delta. - // if not, this place should be rewrote. - vector snapshot_versions; - for (Version version : *missing_versions) { - snapshot_versions.push_back(version.first); - } - snapshot_request.__set_missing_version(snapshot_versions); - } -#ifndef BE_TEST - agent_client.make_snapshot( - snapshot_request, - &make_snapshot_result); -#else - _agent_client->make_snapshot( - snapshot_request, - &make_snapshot_result); -#endif - - if (make_snapshot_result.__isset.allow_incremental_clone) { - // During upgrading, some BE nodes still be installed an old previous old. - // which incremental clone is not ready in those nodes. - // should add a symbol to indicate it. - *allow_incremental_clone = make_snapshot_result.allow_incremental_clone; - } - if (make_snapshot_result.status.status_code == TStatusCode::OK) { - if (make_snapshot_result.__isset.snapshot_path) { - *src_file_path = make_snapshot_result.snapshot_path; - if (src_file_path->at(src_file_path->length() - 1) != '/') { - src_file_path->append("/"); - } - LOG(INFO) << "make snapshot success. backend_ip: " << src_host->host << ". src_file_path: " - << *src_file_path << ". signature: " << signature; - } else { - OLAP_LOG_WARNING("clone make snapshot success, " - "but get src file path failed. signature: %ld", - signature); - status = DORIS_ERROR; - continue; - } - } else { - LOG(WARNING) << "make snapshot failed. tablet_id: " << clone_req.tablet_id - << ". schema_hash: " << clone_req.schema_hash - << ". backend_ip: " << src_host->host - << ". backend_port: " << src_host->be_port << ". signature: " << signature; - error_msgs->push_back("make snapshot failed. backend_ip: " + src_host->host); - status = DORIS_ERROR; - continue; - } - - // Get remote and local full path - stringstream src_file_full_path_stream; - stringstream local_file_full_path_stream; - - if (status == DORIS_SUCCESS) { - src_file_full_path_stream << *src_file_path - << "/" << clone_req.tablet_id - << "/" << clone_req.schema_hash << "/"; - local_file_full_path_stream << local_data_path << "/"; - } - string src_file_full_path = src_file_full_path_stream.str(); - string local_file_full_path = local_file_full_path_stream.str(); - - // Check local path exist, if exist, remove it, then create the dir - if (status == DORIS_SUCCESS) { - boost::filesystem::path local_file_full_dir(local_file_full_path); - if (boost::filesystem::exists(local_file_full_dir)) { - boost::filesystem::remove_all(local_file_full_dir); - } - boost::filesystem::create_directories(local_file_full_dir); - } - - // Get remove dir file list - HttpClient client; - std::string remote_file_path = http_host + HTTP_REQUEST_PREFIX - + HTTP_REQUEST_TOKEN_PARAM + token - + HTTP_REQUEST_FILE_PARAM + src_file_full_path; - - string file_list_str; - auto list_files_cb = [&remote_file_path, &file_list_str] (HttpClient* client) { - RETURN_IF_ERROR(client->init(remote_file_path)); - client->set_timeout_ms(LIST_REMOTE_FILE_TIMEOUT * 1000); - RETURN_IF_ERROR(client->execute(&file_list_str)); - return Status::OK(); - }; - - Status download_status = HttpClient::execute_with_retry( - DOWNLOAD_FILE_MAX_RETRY, 1, list_files_cb); - if (!download_status.ok()) { - OLAP_LOG_WARNING("clone get remote file list failed over max time. backend_ip: %s, " - "src_file_path: %s, signature: %ld", - src_host->host.c_str(), - remote_file_path.c_str(), - signature); - status = DORIS_ERROR; - } - - vector file_name_list; - if (status == DORIS_SUCCESS) { - size_t start_position = 0; - size_t end_position = file_list_str.find("\n"); - - // Split file name from file_list_str - while (end_position != string::npos) { - string file_name = file_list_str.substr( - start_position, end_position - start_position); - // If the header file is not exist, the table could't loaded by olap engine. - // Avoid of data is not complete, we copy the header file at last. - // The header file's name is end of .hdr. - if (file_name.size() > 4 && file_name.substr(file_name.size() - 4, 4) == ".hdr") { - file_name_list.push_back(file_name); - } else { - file_name_list.insert(file_name_list.begin(), file_name); - } - - start_position = end_position + 1; - end_position = file_list_str.find("\n", start_position); - } - if (start_position != file_list_str.size()) { - string file_name = file_list_str.substr( - start_position, file_list_str.size() - start_position); - if (file_name.size() > 4 && file_name.substr(file_name.size() - 4, 4) == ".hdr") { - file_name_list.push_back(file_name); - } else { - file_name_list.insert(file_name_list.begin(), file_name); - } - } - } - - // Get copy from remote - uint64_t total_file_size = 0; - MonotonicStopWatch watch; - watch.start(); - for (auto& file_name : file_name_list) { - remote_file_path = http_host + HTTP_REQUEST_PREFIX - + HTTP_REQUEST_TOKEN_PARAM + token - + HTTP_REQUEST_FILE_PARAM + src_file_full_path + file_name; - - // get file length - uint64_t file_size = 0; - auto get_file_size_cb = [&remote_file_path, &file_size] (HttpClient* client) { - RETURN_IF_ERROR(client->init(remote_file_path)); - client->set_timeout_ms(GET_LENGTH_TIMEOUT * 1000); - RETURN_IF_ERROR(client->head()); - file_size = client->get_content_length(); - return Status::OK(); - }; - download_status = HttpClient::execute_with_retry( - DOWNLOAD_FILE_MAX_RETRY, 1, get_file_size_cb); - if (!download_status.ok()) { - LOG(WARNING) << "clone copy get file length failed over max time. remote_path=" - << remote_file_path - << ", signature=" << signature; - status = DORIS_ERROR; - break; - } - - total_file_size += file_size; - uint64_t estimate_timeout = file_size / config::download_low_speed_limit_kbps / 1024; - if (estimate_timeout < config::download_low_speed_time) { - estimate_timeout = config::download_low_speed_time; - } - - std::string local_file_path = local_file_full_path + file_name; - - auto download_cb = [&remote_file_path, - estimate_timeout, - &local_file_path, - file_size] (HttpClient* client) { - RETURN_IF_ERROR(client->init(remote_file_path)); - client->set_timeout_ms(estimate_timeout * 1000); - RETURN_IF_ERROR(client->download(local_file_path)); - - // Check file length - uint64_t local_file_size = boost::filesystem::file_size(local_file_path); - if (local_file_size != file_size) { - LOG(WARNING) << "download file length error" - << ", remote_path=" << remote_file_path - << ", file_size=" << file_size - << ", local_file_size=" << local_file_size; - return Status::InternalError("downloaded file size is not equal"); - } - chmod(local_file_path.c_str(), S_IRUSR | S_IWUSR); - return Status::OK(); - }; - download_status = HttpClient::execute_with_retry( - DOWNLOAD_FILE_MAX_RETRY, 1, download_cb); - if (!download_status.ok()) { - LOG(WARNING) << "download file failed over max retry." - << ", remote_path=" << remote_file_path - << ", signature=" << signature - << ", errormsg=" << download_status.get_error_msg(); - status = DORIS_ERROR; - break; - } - } // Clone files from remote backend - - uint64_t total_time_ms = watch.elapsed_time() / 1000 / 1000; - total_time_ms = total_time_ms > 0 ? total_time_ms : 0; - double copy_rate = 0.0; - if (total_time_ms > 0) { - copy_rate = total_file_size / ((double) total_time_ms) / 1000; - } - *copy_size = (int64_t) total_file_size; - *copy_time_ms = (int64_t) total_time_ms; - LOG(INFO) << "succeed to copy tablet " << signature - << ", total file size: " << total_file_size << " B" - << ", cost: " << total_time_ms << " ms" - << ", rate: " << copy_rate << " B/s"; - - // Release snapshot, if failed, ignore it. OLAP engine will drop useless snapshot - TAgentResult release_snapshot_result; -#ifndef BE_TEST - agent_client.release_snapshot( - make_snapshot_result.snapshot_path, - &release_snapshot_result); -#else - _agent_client->release_snapshot( - make_snapshot_result.snapshot_path, - &release_snapshot_result); -#endif - if (release_snapshot_result.status.status_code != TStatusCode::OK) { - LOG(WARNING) << "release snapshot failed. src_file_path: " << *src_file_path - << ". signature: " << signature; - } - - if (status == DORIS_SUCCESS) { - break; - } - } // clone copy from one backend - return status; -} - void* TaskWorkerPool::_storage_medium_migrate_worker_thread_callback(void* arg_this) { TaskWorkerPool* worker_pool_this = (TaskWorkerPool*)arg_this; @@ -1578,13 +1038,9 @@ void* TaskWorkerPool::_storage_medium_migrate_worker_thread_callback(void* arg_t TStatusCode::type status_code = TStatusCode::OK; vector error_msgs; TStatus task_status; - - OLAPStatus res = OLAPStatus::OLAP_SUCCESS; - res = worker_pool_this->_env->olap_engine()->storage_medium_migrate( - storage_medium_migrate_req.tablet_id, - storage_medium_migrate_req.schema_hash, - storage_medium_migrate_req.storage_medium); - if (res != OLAPStatus::OLAP_SUCCESS) { + EngineStorageMigrationTask engine_task(storage_medium_migrate_req); + OLAPStatus res = worker_pool_this->_env->storage_engine()->execute_task(&engine_task); + if (res != OLAP_SUCCESS) { OLAP_LOG_WARNING("storage media migrate failed. status: %d, signature: %ld", res, agent_task_req.signature); status_code = TStatusCode::RUNTIME_ERROR; @@ -1610,60 +1066,6 @@ void* TaskWorkerPool::_storage_medium_migrate_worker_thread_callback(void* arg_t return (void*)0; } -void* TaskWorkerPool::_cancel_delete_data_worker_thread_callback(void* arg_this) { - TaskWorkerPool* worker_pool_this = (TaskWorkerPool*)arg_this; - -#ifndef BE_TEST - while (true) { -#endif - TAgentTaskRequest agent_task_req; - TCancelDeleteDataReq cancel_delete_data_req; - { - lock_guard worker_thread_lock(worker_pool_this->_worker_thread_lock); - while (worker_pool_this->_tasks.empty()) { - worker_pool_this->_worker_thread_condition_lock.wait(); - } - - agent_task_req = worker_pool_this->_tasks.front(); - cancel_delete_data_req = agent_task_req.cancel_delete_data_req; - worker_pool_this->_tasks.pop_front(); - } - - LOG(INFO) << "get cancel delete data task. signature:" << agent_task_req.signature; - TStatusCode::type status_code = TStatusCode::OK; - vector error_msgs; - TStatus task_status; - - OLAPStatus cancel_delete_data_status = OLAPStatus::OLAP_SUCCESS; - cancel_delete_data_status = - worker_pool_this->_env->olap_engine()->cancel_delete(cancel_delete_data_req); - if (cancel_delete_data_status != OLAPStatus::OLAP_SUCCESS) { - OLAP_LOG_WARNING("cancel delete data failed. statusta: %d, signature: %ld", - cancel_delete_data_status, agent_task_req.signature); - status_code = TStatusCode::RUNTIME_ERROR; - } else { - LOG(INFO) << "cancel delete data success. status:" << cancel_delete_data_status - << ", signature:" << agent_task_req.signature; - } - - task_status.__set_status_code(status_code); - task_status.__set_error_msgs(error_msgs); - - TFinishTaskRequest finish_task_request; - finish_task_request.__set_backend(worker_pool_this->_backend); - finish_task_request.__set_task_type(agent_task_req.task_type); - finish_task_request.__set_signature(agent_task_req.signature); - finish_task_request.__set_task_status(task_status); - - worker_pool_this->_finish_task(finish_task_request); - worker_pool_this->_remove_task_info( - agent_task_req.task_type, agent_task_req.signature, ""); -#ifndef BE_TEST - } -#endif - return (void*)0; -} - void* TaskWorkerPool::_check_consistency_worker_thread_callback(void* arg_this) { TaskWorkerPool* worker_pool_this = (TaskWorkerPool*)arg_this; @@ -1689,15 +1091,14 @@ void* TaskWorkerPool::_check_consistency_worker_thread_callback(void* arg_this) vector error_msgs; TStatus task_status; - OLAPStatus res = OLAPStatus::OLAP_SUCCESS; uint32_t checksum = 0; - res = worker_pool_this->_env->olap_engine()->compute_checksum( - check_consistency_req.tablet_id, + EngineChecksumTask engine_task(check_consistency_req.tablet_id, check_consistency_req.schema_hash, check_consistency_req.version, check_consistency_req.version_hash, &checksum); - if (res != OLAPStatus::OLAP_SUCCESS) { + OLAPStatus res = worker_pool_this->_env->storage_engine()->execute_task(&engine_task); + if (res != OLAP_SUCCESS) { OLAP_LOG_WARNING("check consistency failed. status: %d, signature: %ld", res, agent_task_req.signature); status_code = TStatusCode::RUNTIME_ERROR; @@ -1778,11 +1179,11 @@ void* TaskWorkerPool::_report_disk_state_worker_thread_callback(void* arg_this) continue; } #endif - vector root_paths_info; - worker_pool_this->_env->olap_engine()->get_all_root_path_info(&root_paths_info); + vector data_dir_infos; + worker_pool_this->_env->storage_engine()->get_all_data_dir_info(&data_dir_infos); map disks; - for (auto& root_path_info : root_paths_info) { + for (auto& root_path_info : data_dir_infos) { TDisk disk; disk.__set_root_path(root_path_info.path); disk.__set_path_hash(root_path_info.path_hash); @@ -1813,7 +1214,7 @@ void* TaskWorkerPool::_report_disk_state_worker_thread_callback(void* arg_this) #ifndef BE_TEST // wait for notifying until timeout - OLAPEngine::get_instance()->wait_for_report_notify( + StorageEngine::instance()->wait_for_report_notify( config::report_disk_state_interval_seconds, false); } #endif @@ -1821,7 +1222,7 @@ void* TaskWorkerPool::_report_disk_state_worker_thread_callback(void* arg_this) return (void*)0; } -void* TaskWorkerPool::_report_olap_table_worker_thread_callback(void* arg_this) { +void* TaskWorkerPool::_report_tablet_worker_thread_callback(void* arg_this) { TaskWorkerPool* worker_pool_this = (TaskWorkerPool*)arg_this; TReportRequest request; @@ -1844,14 +1245,14 @@ void* TaskWorkerPool::_report_olap_table_worker_thread_callback(void* arg_this) request.__set_report_version(_s_report_version); OLAPStatus report_all_tablets_info_status = - worker_pool_this->_env->olap_engine()->report_all_tablets_info(&request.tablets); + StorageEngine::instance()->tablet_manager()->report_all_tablets_info(&request.tablets); if (report_all_tablets_info_status != OLAP_SUCCESS) { OLAP_LOG_WARNING("report get all tablets info failed. status: %d", report_all_tablets_info_status); #ifndef BE_TEST // wait for notifying until timeout - OLAPEngine::get_instance()->wait_for_report_notify( - config::report_olap_table_interval_seconds, true); + StorageEngine::instance()->wait_for_report_notify( + config::report_tablet_interval_seconds, true); continue; #else return (void*)0; @@ -1870,8 +1271,8 @@ void* TaskWorkerPool::_report_olap_table_worker_thread_callback(void* arg_this) #ifndef BE_TEST // wait for notifying until timeout - OLAPEngine::get_instance()->wait_for_report_notify( - config::report_olap_table_interval_seconds, true); + StorageEngine::instance()->wait_for_report_notify( + config::report_tablet_interval_seconds, true); } #endif @@ -2033,7 +1434,7 @@ void* TaskWorkerPool::_make_snapshot_thread_callback(void* arg_this) { string snapshot_path; std::vector snapshot_files; - OLAPStatus make_snapshot_status = worker_pool_this->_env->olap_engine()->make_snapshot( + OLAPStatus make_snapshot_status = SnapshotManager::instance()->make_snapshot( snapshot_request, &snapshot_path); if (make_snapshot_status != OLAP_SUCCESS) { status_code = TStatusCode::RUNTIME_ERROR; @@ -2118,7 +1519,7 @@ void* TaskWorkerPool::_release_snapshot_thread_callback(void* arg_this) { string& snapshot_path = release_snapshot_request.snapshot_path; OLAPStatus release_snapshot_status = - worker_pool_this->_env->olap_engine()->release_snapshot(snapshot_path); + SnapshotManager::instance()->release_snapshot(snapshot_path); if (release_snapshot_status != OLAP_SUCCESS) { status_code = TStatusCode::RUNTIME_ERROR; LOG(WARNING) << "release_snapshot failed. snapshot_path: " << snapshot_path @@ -2147,23 +1548,6 @@ void* TaskWorkerPool::_release_snapshot_thread_callback(void* arg_this) { return (void*)0; } -AlterTableStatus TaskWorkerPool::_show_alter_table_status( - TTabletId tablet_id, - TSchemaHash schema_hash) { - AlterTableStatus alter_table_status = - _env->olap_engine()->show_alter_table_status(tablet_id, schema_hash); - return alter_table_status; -} - -AgentStatus TaskWorkerPool::_drop_table(const TDropTabletReq& req) { - AgentStatus status = DORIS_SUCCESS; - OLAPStatus drop_status = _env->olap_engine()->drop_table(req.tablet_id, req.schema_hash); - if (drop_status != OLAP_SUCCESS && drop_status != OLAP_ERR_TABLE_NOT_FOUND) { - status = DORIS_ERROR; - } - return status; -} - AgentStatus TaskWorkerPool::_get_tablet_info( const TTabletId tablet_id, const TSchemaHash schema_hash, @@ -2173,7 +1557,7 @@ AgentStatus TaskWorkerPool::_get_tablet_info( tablet_info->__set_tablet_id(tablet_id); tablet_info->__set_schema_hash(schema_hash); - OLAPStatus olap_status = _env->olap_engine()->report_tablet_info(tablet_info); + OLAPStatus olap_status = StorageEngine::instance()->tablet_manager()->report_tablet_info(tablet_info); if (olap_status != OLAP_SUCCESS) { OLAP_LOG_WARNING("get tablet info failed. status: %d, signature: %ld", olap_status, signature); @@ -2256,9 +1640,9 @@ AgentStatus TaskWorkerPool::_move_dir( bool overwrite, std::vector* error_msgs) { - OLAPTablePtr tablet = _env->olap_engine()->get_table( + TabletSharedPtr tablet = StorageEngine::instance()->tablet_manager()->get_tablet( tablet_id, schema_hash); - if (tablet.get() == NULL) { + if (tablet == nullptr) { LOG(INFO) << "failed to get tablet. tablet_id:" << tablet_id << ", schema hash:" << schema_hash; error_msgs->push_back("failed to get tablet"); @@ -2266,20 +1650,8 @@ AgentStatus TaskWorkerPool::_move_dir( } std::string dest_tablet_dir = tablet->tablet_path(); - std::string store_path = tablet->store()->path(); - - // same as finish_clone() in OlapEngine, lock them all - tablet->obtain_base_compaction_lock(); - tablet->obtain_cumulative_lock(); - tablet->obtain_push_lock(); - tablet->obtain_header_wrlock(); SnapshotLoader loader(_env, job_id, tablet_id); - Status status = loader.move(src, dest_tablet_dir, store_path, overwrite); - // unlock - tablet->release_header_lock(); - tablet->release_push_lock(); - tablet->release_cumulative_lock(); - tablet->release_base_compaction_lock(); + Status status = loader.move(src, tablet, overwrite); if (!status.ok()) { LOG(WARNING) << "move failed. job id: " << job_id << ", msg: " << status.get_error_msg(); @@ -2287,17 +1659,6 @@ AgentStatus TaskWorkerPool::_move_dir( return DORIS_INTERNAL_ERROR; } - // reload tablet - OLAPStatus ost = OLAPEngine::get_instance()->load_one_tablet( - tablet->store(), tablet_id, schema_hash, dest_tablet_dir, true); - if (ost != OLAP_SUCCESS) { - std::stringstream ss; - ss << "failed to reload tablet: " << tablet_id; - LOG(WARNING) << ss.str(); - error_msgs->push_back(ss.str()); - return DORIS_INTERNAL_ERROR; - } - LOG(INFO) << "finished to reload tablet: " << tablet_id << " after move dir"; return DORIS_SUCCESS; } @@ -2327,7 +1688,7 @@ void* TaskWorkerPool::_recover_tablet_thread_callback(void* arg_this) { LOG(INFO) << "begin to recover tablet." << ", tablet_id:" << recover_tablet_req.tablet_id << "." << recover_tablet_req.schema_hash << ", version:" << recover_tablet_req.version << "-" << recover_tablet_req.version_hash; - OLAPStatus status = worker_pool_this->_env->olap_engine()->recover_tablet_until_specfic_version(recover_tablet_req); + OLAPStatus status = worker_pool_this->_env->storage_engine()->recover_tablet_until_specfic_version(recover_tablet_req); if (status != OLAP_SUCCESS) { status_code = TStatusCode::RUNTIME_ERROR; LOG(WARNING) << "failed to recover tablet." diff --git a/be/src/agent/task_worker_pool.h b/be/src/agent/task_worker_pool.h index a3866bc6ba6959..ac55859a76654d 100644 --- a/be/src/agent/task_worker_pool.h +++ b/be/src/agent/task_worker_pool.h @@ -26,13 +26,12 @@ #include #include #include -#include "agent/pusher.h" #include "agent/status.h" #include "agent/utils.h" #include "gen_cpp/AgentService_types.h" #include "gen_cpp/HeartbeatService_types.h" #include "olap/olap_define.h" -#include "olap/olap_engine.h" +#include "olap/storage_engine.h" #include "olap/utils.h" namespace doris { @@ -54,7 +53,6 @@ class TaskWorkerPool { QUERY_SPLIT_KEY, CLONE, STORAGE_MEDIUM_MIGRATE, - CANCEL_DELETE_DATA, CHECK_CONSISTENCY, REPORT_TASK, REPORT_DISK_STATE, @@ -94,20 +92,19 @@ class TaskWorkerPool { uint32_t _get_next_task_index(int32_t thread_count, std::deque& tasks, TPriority::type priority); - static void* _create_table_worker_thread_callback(void* arg_this); - static void* _drop_table_worker_thread_callback(void* arg_this); + static void* _create_tablet_worker_thread_callback(void* arg_this); + static void* _drop_tablet_worker_thread_callback(void* arg_this); static void* _push_worker_thread_callback(void* arg_this); static void* _publish_version_worker_thread_callback(void* arg_this); static void* _clear_alter_task_worker_thread_callback(void* arg_this); static void* _clear_transaction_task_worker_thread_callback(void* arg_this); - static void* _alter_table_worker_thread_callback(void* arg_this); + static void* _alter_tablet_worker_thread_callback(void* arg_this); static void* _clone_worker_thread_callback(void* arg_this); static void* _storage_medium_migrate_worker_thread_callback(void* arg_this); - static void* _cancel_delete_data_worker_thread_callback(void* arg_this); static void* _check_consistency_worker_thread_callback(void* arg_this); static void* _report_task_worker_thread_callback(void* arg_this); static void* _report_disk_state_worker_thread_callback(void* arg_this); - static void* _report_olap_table_worker_thread_callback(void* arg_this); + static void* _report_tablet_worker_thread_callback(void* arg_this); static void* _upload_worker_thread_callback(void* arg_this); static void* _download_worker_thread_callback(void* arg_this); static void* _make_snapshot_thread_callback(void* arg_this); @@ -115,30 +112,13 @@ class TaskWorkerPool { static void* _move_dir_thread_callback(void* arg_this); static void* _recover_tablet_thread_callback(void* arg_this); - AgentStatus _clone_copy( - const TCloneReq& clone_req, - int64_t signature, - const std::string& local_data_path, - TBackend* src_host, - std::string* src_file_path, - std::vector* error_msgs, - const std::vector* missing_versions, - bool* allow_incremental_clone, - int64_t* copy_size, - int64_t* copy_time_ms); - - void _alter_table( + void _alter_tablet( + TaskWorkerPool* worker_pool_this, const TAlterTabletReq& create_rollup_request, int64_t signature, const TTaskType::type task_type, TFinishTaskRequest* finish_task_request); - AlterTableStatus _show_alter_table_status( - const TTabletId tablet_id, - const TSchemaHash schema_hash); - - AgentStatus _drop_table(const TDropTabletReq& drop_tablet_req); - AgentStatus _get_tablet_info( const TTabletId tablet_id, const TSchemaHash schema_hash, @@ -160,7 +140,6 @@ class TaskWorkerPool { ExecEnv* _env; #ifdef BE_TEST AgentServerClient* _agent_client; - Pusher * _pusher; #endif std::deque _tasks; diff --git a/be/src/common/config.h b/be/src/common/config.h index 7283d4ac045a88..e01ecf2baa50af 100644 --- a/be/src/common/config.h +++ b/be/src/common/config.h @@ -55,9 +55,9 @@ namespace config { // the count of heart beat service CONF_Int32(heartbeat_service_thread_count, "1"); // the count of thread to create table - CONF_Int32(create_table_worker_count, "3"); + CONF_Int32(create_tablet_worker_count, "3"); // the count of thread to drop table - CONF_Int32(drop_table_worker_count, "3"); + CONF_Int32(drop_tablet_worker_count, "3"); // the count of thread to batch load CONF_Int32(push_worker_count_normal_priority, "3"); // the count of thread to high priority batch load @@ -71,13 +71,11 @@ namespace config { // the count of thread to delete CONF_Int32(delete_worker_count, "3"); // the count of thread to alter table - CONF_Int32(alter_table_worker_count, "3"); + CONF_Int32(alter_tablet_worker_count, "3"); // the count of thread to clone CONF_Int32(clone_worker_count, "3"); // the count of thread to clone CONF_Int32(storage_medium_migrate_count, "1"); - // the count of thread to cancel delete data - CONF_Int32(cancel_delete_data_worker_count, "3"); // the count of thread to check consistency CONF_Int32(check_consistency_worker_count, "1"); // the count of thread to upload @@ -93,9 +91,9 @@ namespace config { // the interval time(seconds) for agent report disk state to FE CONF_Int32(report_disk_state_interval_seconds, "60"); // the interval time(seconds) for agent report olap table to FE - CONF_Int32(report_olap_table_interval_seconds, "60"); + CONF_Int32(report_tablet_interval_seconds, "60"); // the timeout(seconds) for alter table - CONF_Int32(alter_table_timeout_seconds, "86400"); + CONF_Int32(alter_tablet_timeout_seconds, "86400"); // the timeout(seconds) for make snapshot CONF_Int32(make_snapshot_timeout_seconds, "600"); // the timeout(seconds) for release snapshot @@ -203,16 +201,16 @@ namespace config { CONF_Int32(file_descriptor_cache_clean_interval, "3600"); CONF_Int32(disk_stat_monitor_interval, "5"); - CONF_Int32(unused_index_monitor_interval, "30"); - CONF_String(storage_root_path, "${DORIS_HOME}/data"); + CONF_Int32(unused_rowset_monitor_interval, "30"); + CONF_String(storage_root_path, "${DORIS_HOME}/storage"); CONF_Int32(min_percentage_of_error_disk, "50"); CONF_Int32(default_num_rows_per_data_block, "1024"); CONF_Int32(default_num_rows_per_column_file_block, "1024"); CONF_Int32(max_tablet_num_per_shard, "1024"); // pending data policy CONF_Int32(pending_data_expire_time_sec, "1800"); - // incremental delta policy - CONF_Int32(incremental_delta_expire_time_sec, "1800"); + // inc_rowset expired interval + CONF_Int32(inc_rowset_expired_sec, "1800"); // garbage sweep policy CONF_Int32(max_garbage_sweep_interval, "43200"); CONF_Int32(min_garbage_sweep_interval, "200"); @@ -416,6 +414,12 @@ namespace config { // Dir to save files downloaded by SmallFileMgr CONF_String(small_file_dir, "${DORIS_HOME}/lib/small_file/"); + // path gc + CONF_Bool(path_gc_check, "true"); + CONF_Int32(path_gc_check_interval_second, "86400"); + CONF_Int32(path_gc_check_step, "1000"); + CONF_Int32(path_gc_check_step_interval_ms, "10"); + CONF_Int32(path_scan_interval_second, "86400"); } // namespace config } // namespace doris diff --git a/be/src/exec/CMakeLists.txt b/be/src/exec/CMakeLists.txt index c2b1945013e07a..7a1d08e9568db3 100644 --- a/be/src/exec/CMakeLists.txt +++ b/be/src/exec/CMakeLists.txt @@ -56,8 +56,8 @@ set(EXEC_FILES olap_scanner.cpp olap_meta_reader.cpp olap_common.cpp - olap_table_info.cpp - olap_table_sink.cpp + tablet_info.cpp + tablet_sink.cpp plain_text_line_reader.cpp csv_scan_node.cpp csv_scanner.cpp diff --git a/be/src/exec/data_sink.cpp b/be/src/exec/data_sink.cpp index 026f7382f82e49..4746b20348f2cd 100644 --- a/be/src/exec/data_sink.cpp +++ b/be/src/exec/data_sink.cpp @@ -22,7 +22,7 @@ #include #include "exec/exec_node.h" -#include "exec/olap_table_sink.h" +#include "exec/tablet_sink.h" #include "exprs/expr.h" #include "gen_cpp/PaloInternalService_types.h" #include "runtime/data_stream_sender.h" diff --git a/be/src/exec/olap_meta_reader.cpp b/be/src/exec/olap_meta_reader.cpp index 191a2d29ee9c67..cebd9e6656f33d 100644 --- a/be/src/exec/olap_meta_reader.cpp +++ b/be/src/exec/olap_meta_reader.cpp @@ -43,9 +43,9 @@ Status EngineMetaReader::get_hints( auto tablet_id = scan_range->scan_range().tablet_id; int32_t schema_hash = strtoul(scan_range->scan_range().schema_hash.c_str(), NULL, 10); std::string err; - OLAPTablePtr table = OLAPEngine::get_instance()->get_table( + TabletSharedPtr table = StorageEngine::instance()->tablet_manager()->get_tablet( tablet_id, schema_hash, true, &err); - if (table.get() == NULL) { + if (table == nullptr) { std::stringstream ss; ss << "failed to get tablet: " << tablet_id << "with schema hash: " << schema_hash << ", reason: " << err; diff --git a/be/src/exec/olap_scan_node.cpp b/be/src/exec/olap_scan_node.cpp index cde573a1828199..5fe1c3b3e6c51c 100644 --- a/be/src/exec/olap_scan_node.cpp +++ b/be/src/exec/olap_scan_node.cpp @@ -356,6 +356,20 @@ Status OlapScanNode::close(RuntimeState* state) { return ScanNode::close(state); } +// PlanFragmentExecutor will call this method to set scan range +// Doris scan range is defined in thrift file like this +// struct TPaloScanRange { +// 1: required list hosts +// 2: required string schema_hash +// 3: required string version +// 4: required string version_hash +// 5: required Types.TTabletId tablet_id +// 6: required string db_name +// 7: optional list partition_column_ranges +// 8: optional string index_name +// 9: optional string table_name +//} +// every doris_scan_range is related with one tablet so that one olap scan node contains multiple tablet Status OlapScanNode::set_scan_ranges(const std::vector& scan_ranges) { for (auto& scan_range : scan_ranges) { DCHECK(scan_range.scan_range.__isset.palo_scan_range); @@ -379,7 +393,7 @@ Status OlapScanNode::start_scan(RuntimeState* state) { RETURN_IF_ERROR(normalize_conjuncts()); VLOG(1) << "BuildOlapFilters"; - // 2. Using ColumnValueRange to Build OlapEngine filters + // 2. Using ColumnValueRange to Build StorageEngine filters RETURN_IF_ERROR(build_olap_filters()); VLOG(1) << "SelectScanRanges"; @@ -391,7 +405,7 @@ Status OlapScanNode::start_scan(RuntimeState* state) { RETURN_IF_ERROR(build_scan_key()); VLOG(1) << "SplitScanRange"; - // 5. Query OlapEngine to split `Sub ScanRange` to serval `Sub Sub ScanRange` + // 5. Query StorageEngine to split `Sub ScanRange` to serval `Sub Sub ScanRange` RETURN_IF_ERROR(split_scan_range()); VLOG(1) << "StartScanThread"; @@ -588,6 +602,8 @@ Status OlapScanNode::split_scan_range() { std::vector sub_ranges; VLOG(1) << "_doris_scan_ranges.size()=" << _doris_scan_ranges.size(); + // doris scan range is related with one tablet + // split scan range for every tablet for (auto scan_range : _doris_scan_ranges) { sub_ranges.clear(); RETURN_IF_ERROR(get_sub_scan_range(scan_range, &sub_ranges)); @@ -597,6 +613,7 @@ Status OlapScanNode::split_scan_range() { << sub_range.begin_scan_range << " : " << sub_range.end_scan_range << (sub_range.end_include ? "]" : ")"); + // just to get sub_range related scan_range? why not create a object? _query_key_ranges.push_back(sub_range); _query_scan_ranges.push_back(scan_range); } @@ -729,7 +746,7 @@ Status OlapScanNode::normalize_in_predicate(SlotDescriptor* slot, ColumnValueRan // 1.3 Push InPredicate value into ColumnValueRange HybirdSetBase::IteratorBase* iter = pred->hybird_set()->begin(); while (iter->has_next()) { - // column in (NULL,...) counldn't push down to OlapEngine + // column in (NULL,...) counldn't push down to StorageEngine // so that discard whole ColumnValueRange if (NULL == iter->get_value()) { range->clear(); diff --git a/be/src/exec/olap_scanner.cpp b/be/src/exec/olap_scanner.cpp index 97257922f3642a..52b864f7221f8e 100644 --- a/be/src/exec/olap_scanner.cpp +++ b/be/src/exec/olap_scanner.cpp @@ -80,39 +80,54 @@ Status OlapScanner::_prepare( strtoul(scan_range->scan_range().version_hash.c_str(), nullptr, 10); { std::string err; - _olap_table = OLAPEngine::get_instance()->get_table(tablet_id, schema_hash, true, &err); - if (_olap_table.get() == nullptr) { + _tablet = StorageEngine::instance()->tablet_manager()->get_tablet(tablet_id, schema_hash, true, &err); + if (_tablet.get() == nullptr) { std::stringstream ss; - ss << "failed to get tablet: " << tablet_id << " with schema hash: " << schema_hash - << ", reason: " << err; + ss << "failed to get tablet. tablet_id=" << tablet_id + << ", with schema_hash=" << schema_hash + << ", reason=" << err; LOG(WARNING) << ss.str(); return Status::InternalError(ss.str()); } { - ReadLock rdlock(_olap_table->get_header_lock_ptr()); - const PDelta* delta = _olap_table->lastest_version(); - if (delta == NULL) { + ReadLock rdlock(_tablet->get_header_lock_ptr()); + const RowsetSharedPtr rowset = _tablet->rowset_with_max_version(); + if (rowset == nullptr) { std::stringstream ss; ss << "fail to get latest version of tablet: " << tablet_id; - OLAP_LOG_WARNING(ss.str().c_str()); + LOG(WARNING) << ss.str(); return Status::InternalError(ss.str()); } - if (delta->end_version() == _version - && delta->version_hash() != version_hash) { - OLAP_LOG_WARNING("fail to check latest version hash. " - "[tablet_id=%ld version_hash=%ld request_version_hash=%ld]", - tablet_id, delta->version_hash(), version_hash); + if (rowset->end_version() == _version + && rowset->version_hash() != version_hash) { + LOG(WARNING) << "fail to check latest version hash. " + << " tablet_id=" << tablet_id + << " version_hash=" << rowset->version_hash() + << " request_version_hash=" << version_hash; std::stringstream ss; ss << "fail to check version hash of tablet: " << tablet_id; return Status::InternalError(ss.str()); } + + // acquire tablet rowset readers at the beginning of the scan node + // to prevent this case: when there are lots of olap scanners to run for example 10000 + // the rowsets maybe compacted when the last olap scanner starts + Version rd_version(0, _version); + OLAPStatus acquire_reader_st = _tablet->capture_rs_readers(rd_version, &_params.rs_readers); + if (acquire_reader_st != OLAP_SUCCESS) { + LOG(WARNING) << "fail to init reader.res=" << acquire_reader_st; + std::stringstream ss; + ss << "failed to initialize storage reader. tablet=" << _tablet->full_name() + << ", res=" << acquire_reader_st << ", backend=" << BackendOptions::get_localhost(); + return Status::InternalError(ss.str().c_str()); + } } } - - // Initialize _params + { + // Initialize _params RETURN_IF_ERROR(_init_params(key_ranges, filters, is_nulls)); } @@ -131,20 +146,21 @@ Status OlapScanner::open() { if (res != OLAP_SUCCESS) { OLAP_LOG_WARNING("fail to init reader.[res=%d]", res); std::stringstream ss; - ss << "failed to initialize storage reader. tablet=" << _params.olap_table->full_name() + ss << "failed to initialize storage reader. tablet=" << _params.tablet->full_name() << ", res=" << res << ", backend=" << BackendOptions::get_localhost(); return Status::InternalError(ss.str().c_str()); } return Status::OK(); } +// it will be called under tablet read lock because capture rs readers need Status OlapScanner::_init_params( const std::vector& key_ranges, const std::vector& filters, const std::vector& is_nulls) { RETURN_IF_ERROR(_init_return_columns()); - _params.olap_table = _olap_table; + _params.tablet = _tablet; _params.reader_type = READER_QUERY; _params.aggregation = _aggregation; _params.version = Version(0, _version); @@ -177,11 +193,11 @@ Status OlapScanner::_init_params( if (_aggregation) { _params.return_columns = _return_columns; } else { - for (size_t i = 0; i < _olap_table->num_key_fields(); ++i) { + for (size_t i = 0; i < _tablet->num_key_columns(); ++i) { _params.return_columns.push_back(i); } for (auto index : _return_columns) { - if (_olap_table->tablet_schema()[index].is_key) { + if (_tablet->tablet_schema().column(index).is_key()) { continue; } else { _params.return_columns.push_back(index); @@ -190,12 +206,12 @@ Status OlapScanner::_init_params( } // use _params.return_columns, because reader use this to merge sort - OLAPStatus res = _read_row_cursor.init(_olap_table->tablet_schema(), _params.return_columns); + OLAPStatus res = _read_row_cursor.init(_tablet->tablet_schema(), _params.return_columns); if (res != OLAP_SUCCESS) { OLAP_LOG_WARNING("fail to init row cursor.[res=%d]", res); return Status::InternalError("failed to initialize storage read row cursor"); } - _read_row_cursor.allocate_memory_for_string_type(_olap_table->tablet_schema()); + _read_row_cursor.allocate_memory_for_string_type(_tablet->tablet_schema()); for (auto cid : _return_columns) { _query_fields.push_back(_read_row_cursor.get_field_by_index(cid)); } @@ -208,7 +224,7 @@ Status OlapScanner::_init_return_columns() { if (!slot->is_materialized()) { continue; } - int32_t index = _olap_table->get_field_index(slot->col_name()); + int32_t index = _tablet->field_index(slot->col_name()); if (index < 0) { std::stringstream ss; ss << "field name is invalied. field=" << slot->col_name(); @@ -216,12 +232,13 @@ Status OlapScanner::_init_return_columns() { return Status::InternalError(ss.str()); } _return_columns.push_back(index); - if (_olap_table->tablet_schema()[index].type == OLAP_FIELD_TYPE_VARCHAR || - _olap_table->tablet_schema()[index].type == OLAP_FIELD_TYPE_HLL) { + const TabletColumn& column = _tablet->tablet_schema().column(index); + if (column.type() == OLAP_FIELD_TYPE_VARCHAR || + column.type() == OLAP_FIELD_TYPE_HLL) { _request_columns_size.push_back( - _olap_table->tablet_schema()[index].length - sizeof(StringLengthType)); + column.length() - sizeof(StringLengthType)); } else { - _request_columns_size.push_back(_olap_table->tablet_schema()[index].length); + _request_columns_size.push_back(column.length()); } _query_slots.push_back(slot); } @@ -477,6 +494,13 @@ Status OlapScanner::close(RuntimeState* state) { if (_is_closed) { return Status::OK(); } + // olap scan node will call scanner.close() when finished + // will release resources here + // if not clear rowset readers in read_params here + // readers will be release when runtime state deconstructed but + // deconstructor in reader references runtime state + // so that it will core + _params.rs_readers.clear(); update_counter(); _reader.reset(); Expr::close(_conjunct_ctxs, state); diff --git a/be/src/exec/olap_scanner.h b/be/src/exec/olap_scanner.h index 8e78eca90eb4b0..05ce18777a6644 100644 --- a/be/src/exec/olap_scanner.h +++ b/be/src/exec/olap_scanner.h @@ -35,9 +35,9 @@ #include "runtime/vectorized_row_batch.h" #include "olap/delete_handler.h" -#include "olap/column_data.h" +#include "olap/rowset/column_data.h" #include "olap/olap_cond.h" -#include "olap/olap_engine.h" +#include "olap/storage_engine.h" #include "olap/reader.h" namespace doris { @@ -118,7 +118,7 @@ class OlapScanner { ReaderParams _params; std::unique_ptr _reader; - OLAPTablePtr _olap_table; + TabletSharedPtr _tablet; int64_t _version; std::vector _return_columns; diff --git a/be/src/exec/parquet_reader.h b/be/src/exec/parquet_reader.h index e3496976f4944f..defe3d9ebf09fd 100644 --- a/be/src/exec/parquet_reader.h +++ b/be/src/exec/parquet_reader.h @@ -37,7 +37,6 @@ #include "gen_cpp/PaloBrokerService_types.h" #include "gen_cpp/PlanNodes_types.h" - namespace doris { class ExecEnv; diff --git a/be/src/exec/schema_scanner/schema_tables_scanner.cpp b/be/src/exec/schema_scanner/schema_tables_scanner.cpp index de9340a973a573..034d514174ad1c 100644 --- a/be/src/exec/schema_scanner/schema_tables_scanner.cpp +++ b/be/src/exec/schema_scanner/schema_tables_scanner.cpp @@ -174,7 +174,7 @@ Status SchemaTablesScanner::fill_one_row(Tuple *tuple, MemPool *pool) { { tuple->set_null(_tuple_desc->slots()[13]->null_indicator_offset()); } - // create_time + // creation_time { tuple->set_null(_tuple_desc->slots()[14]->null_indicator_offset()); } diff --git a/be/src/exec/olap_table_info.cpp b/be/src/exec/tablet_info.cpp similarity index 99% rename from be/src/exec/olap_table_info.cpp rename to be/src/exec/tablet_info.cpp index 9bee59d996115d..c4fc5b3b5cc996 100644 --- a/be/src/exec/olap_table_info.cpp +++ b/be/src/exec/tablet_info.cpp @@ -15,7 +15,7 @@ // specific language governing permissions and limitations // under the License. -#include "exec/olap_table_info.h" +#include "exec/tablet_info.h" #include "runtime/mem_pool.h" #include "runtime/mem_tracker.h" diff --git a/be/src/exec/olap_table_info.h b/be/src/exec/tablet_info.h similarity index 100% rename from be/src/exec/olap_table_info.h rename to be/src/exec/tablet_info.h diff --git a/be/src/exec/olap_table_sink.cpp b/be/src/exec/tablet_sink.cpp similarity index 99% rename from be/src/exec/olap_table_sink.cpp rename to be/src/exec/tablet_sink.cpp index d605be83b82a9c..9369c7370963f9 100644 --- a/be/src/exec/olap_table_sink.cpp +++ b/be/src/exec/tablet_sink.cpp @@ -15,7 +15,7 @@ // specific language governing permissions and limitations // under the License. -#include "exec/olap_table_sink.h" +#include "exec/tablet_sink.h" #include diff --git a/be/src/exec/olap_table_sink.h b/be/src/exec/tablet_sink.h similarity index 98% rename from be/src/exec/olap_table_sink.h rename to be/src/exec/tablet_sink.h index d4856bc0067496..3bd5941f3ce915 100644 --- a/be/src/exec/olap_table_sink.h +++ b/be/src/exec/tablet_sink.h @@ -27,7 +27,7 @@ #include "common/status.h" #include "common/object_pool.h" #include "exec/data_sink.h" -#include "exec/olap_table_info.h" +#include "exec/tablet_info.h" #include "gen_cpp/Types_types.h" #include "gen_cpp/internal_service.pb.h" #include "gen_cpp/palo_internal_service.pb.h" @@ -143,7 +143,7 @@ class IndexChannel { }; // write data to Olap Table. -// this class distributed data according to +// this class distributed data according to class OlapTableSink : public DataSink { public: // Construct from thrift struct which is generated by FE. diff --git a/be/src/http/action/checksum_action.cpp b/be/src/http/action/checksum_action.cpp index 39721e74f29341..603512e673583d 100644 --- a/be/src/http/action/checksum_action.cpp +++ b/be/src/http/action/checksum_action.cpp @@ -30,7 +30,8 @@ #include "http/http_response.h" #include "http/http_status.h" #include "olap/olap_define.h" -#include "olap/olap_engine.h" +#include "olap/storage_engine.h" +#include "olap/task/engine_checksum_task.h" #include "runtime/exec_env.h" namespace doris { @@ -125,11 +126,11 @@ void ChecksumAction::handle(HttpRequest *req) { int64_t ChecksumAction::do_checksum(int64_t tablet_id, int64_t version, int64_t version_hash, int32_t schema_hash, HttpRequest *req) { - OLAPStatus res = OLAPStatus::OLAP_SUCCESS; + OLAPStatus res = OLAP_SUCCESS; uint32_t checksum; - res = _exec_env->olap_engine()->compute_checksum( - tablet_id, schema_hash, version, version_hash, &checksum); - if (res != OLAPStatus::OLAP_SUCCESS) { + EngineChecksumTask engine_task(tablet_id, schema_hash, version, version_hash, &checksum); + res = engine_task.execute(); + if (res != OLAP_SUCCESS) { LOG(WARNING) << "checksum failed. status: " << res << ", signature: " << tablet_id; return -1L; diff --git a/be/src/http/action/meta_action.cpp b/be/src/http/action/meta_action.cpp index 7f32c60fce14a0..e07786444a35a5 100644 --- a/be/src/http/action/meta_action.cpp +++ b/be/src/http/action/meta_action.cpp @@ -26,11 +26,11 @@ #include "http/http_headers.h" #include "http/http_status.h" -#include "olap/olap_header_manager.h" -#include "olap/olap_engine.h" +#include "olap/tablet_meta_manager.h" +#include "olap/storage_engine.h" #include "olap/olap_define.h" -#include "olap/olap_header.h" -#include "olap/olap_table.h" +#include "olap/tablet_meta.h" +#include "olap/tablet.h" #include "common/logging.h" #include "util/json_util.h" @@ -38,7 +38,7 @@ namespace doris { const static std::string HEADER_JSON = "application/json"; -Status MetaAction::_handle_header(HttpRequest *req, std::string* json_header) { +Status MetaAction::_handle_header(HttpRequest *req, std::string* json_meta) { req->add_output_header(HttpHeaders::CONTENT_TYPE, HEADER_JSON.c_str()); std::string req_tablet_id = req->param(TABLET_ID_KEY); std::string req_schema_hash = req->param(TABLET_SCHEMA_HASH_KEY); @@ -49,12 +49,12 @@ Status MetaAction::_handle_header(HttpRequest *req, std::string* json_header) { } uint64_t tablet_id = std::stoull(req_tablet_id); uint32_t schema_hash = std::stoul(req_schema_hash); - OLAPTablePtr olap_table = OLAPEngine::get_instance()->get_table(tablet_id, schema_hash); - if (olap_table == nullptr) { + TabletSharedPtr tablet = StorageEngine::instance()->tablet_manager()->get_tablet(tablet_id, schema_hash); + if (tablet == nullptr) { LOG(WARNING) << "no tablet for tablet_id:" << tablet_id << " schema hash:" << schema_hash; return Status::InternalError("no tablet exist"); } - OLAPStatus s = OlapHeaderManager::get_json_header(olap_table->store(), tablet_id, schema_hash, json_header); + OLAPStatus s = TabletMetaManager::get_json_meta(tablet->data_dir(), tablet_id, schema_hash, json_meta); if (s == OLAP_ERR_META_KEY_NOT_FOUND) { return Status::InternalError("no header exist"); } else if (s != OLAP_SUCCESS) { @@ -65,12 +65,12 @@ Status MetaAction::_handle_header(HttpRequest *req, std::string* json_header) { void MetaAction::handle(HttpRequest *req) { if (_meta_type == META_TYPE::HEADER) { - std::string json_header; - Status status = _handle_header(req, &json_header); + std::string json_meta; + Status status = _handle_header(req, &json_meta); std::string status_result = to_json(status); LOG(INFO) << "handle request result:" << status_result; if (status.ok()) { - HttpChannel::send_reply(req, HttpStatus::OK, json_header); + HttpChannel::send_reply(req, HttpStatus::OK, json_meta); } else { HttpChannel::send_reply(req, HttpStatus::INTERNAL_SERVER_ERROR, status_result); } diff --git a/be/src/http/action/reload_tablet_action.cpp b/be/src/http/action/reload_tablet_action.cpp index c66a5f5c283278..fc4a216c4185f2 100644 --- a/be/src/http/action/reload_tablet_action.cpp +++ b/be/src/http/action/reload_tablet_action.cpp @@ -30,7 +30,7 @@ #include "http/http_response.h" #include "http/http_status.h" #include "olap/olap_define.h" -#include "olap/olap_engine.h" +#include "olap/storage_engine.h" #include "runtime/exec_env.h" namespace doris { @@ -103,7 +103,7 @@ void ReloadTabletAction::reload( clone_req.__set_schema_hash(schema_hash); OLAPStatus res = OLAPStatus::OLAP_SUCCESS; - res = _exec_env->olap_engine()->load_header(path, clone_req); + res = _exec_env->storage_engine()->load_header(path, clone_req); if (res != OLAPStatus::OLAP_SUCCESS) { LOG(WARNING) << "load header failed. status: " << res << ", signature: " << tablet_id; diff --git a/be/src/http/action/restore_tablet_action.cpp b/be/src/http/action/restore_tablet_action.cpp index adbb02fe3f4a9c..745fa106bcb7d8 100644 --- a/be/src/http/action/restore_tablet_action.cpp +++ b/be/src/http/action/restore_tablet_action.cpp @@ -31,11 +31,11 @@ #include "http/http_status.h" #include "util/file_utils.h" #include "olap/utils.h" -#include "olap/olap_header.h" +#include "olap/tablet_meta.h" #include "util/json_util.h" #include "olap/olap_define.h" -#include "olap/olap_engine.h" -#include "olap/store.h" +#include "olap/storage_engine.h" +#include "olap/data_dir.h" #include "runtime/exec_env.h" using boost::filesystem::path; @@ -84,8 +84,8 @@ Status RestoreTabletAction::_handle(HttpRequest *req) { int32_t schema_hash = std::atoi(schema_hash_str.c_str()); LOG(INFO) << "get restore tablet action request: " << tablet_id << "-" << schema_hash; - OLAPTablePtr tablet = - OLAPEngine::get_instance()->get_table(tablet_id, schema_hash); + TabletSharedPtr tablet = + StorageEngine::instance()->tablet_manager()->get_tablet(tablet_id, schema_hash); if (tablet.get() != nullptr) { LOG(WARNING) << "find tablet. tablet_id=" << tablet_id << " schema_hash=" << schema_hash; return Status::InternalError("tablet already exists, can not restore."); @@ -115,7 +115,7 @@ Status RestoreTabletAction::_reload_tablet( clone_req.__set_tablet_id(tablet_id); clone_req.__set_schema_hash(schema_hash); OLAPStatus res = OLAPStatus::OLAP_SUCCESS; - res = _exec_env->olap_engine()->load_header(shard_path, clone_req); + res = _exec_env->storage_engine()->load_header(shard_path, clone_req); if (res != OLAPStatus::OLAP_SUCCESS) { LOG(WARNING) << "load header failed. status: " << res << ", signature: " << tablet_id; @@ -163,8 +163,8 @@ Status RestoreTabletAction::_restore(const std::string& key, int64_t tablet_id, } LOG(INFO) << "tablet path in trash:" << latest_tablet_path; std::string original_header_path = latest_tablet_path + "/" + std::to_string(tablet_id) +".hdr"; - OLAPHeader header(original_header_path); - OLAPStatus load_status = header.load_and_init(); + TabletMeta tablet_meta; + OLAPStatus load_status = tablet_meta.create_from_file(original_header_path); if (load_status != OLAP_SUCCESS) { LOG(WARNING) << "header load and init error, header path:" << original_header_path; return Status::InternalError("load header failed"); @@ -176,9 +176,9 @@ Status RestoreTabletAction::_restore(const std::string& key, int64_t tablet_id, _tablet_path_map[key] = latest_tablet_path; } - std::string root_path = OlapStore::get_root_path_from_schema_hash_path_in_trash(latest_tablet_path); - OlapStore* store = OLAPEngine::get_instance()->get_store(root_path); - std::string restore_schema_hash_path = store->get_tablet_schema_hash_path_from_header(&header); + std::string root_path = DataDir::get_root_path_from_schema_hash_path_in_trash(latest_tablet_path); + DataDir* store = StorageEngine::instance()->get_store(root_path); + std::string restore_schema_hash_path = store->get_absolute_tablet_path(&tablet_meta, true); Status s = FileUtils::create_dir(restore_schema_hash_path); if (!s.ok()) { LOG(WARNING) << "create tablet path failed:" << restore_schema_hash_path; @@ -198,7 +198,7 @@ Status RestoreTabletAction::_restore(const std::string& key, int64_t tablet_id, if (link_ret != 0) { LOG(WARNING) << "link from:" << from << " to:" << to << " failed, link ret:" << link_ret; - std::string restore_tablet_path = store->get_tablet_path_from_header(&header); + std::string restore_tablet_path = store->get_absolute_tablet_path(&tablet_meta, false); LOG(WARNING) << "remove tablet_path:" << restore_tablet_path; Status s = FileUtils::remove_all(restore_tablet_path); if (!s.ok()) { @@ -207,7 +207,7 @@ Status RestoreTabletAction::_restore(const std::string& key, int64_t tablet_id, return Status::InternalError("create link path failed"); } } - std::string restore_shard_path = store->get_shard_path_from_header(std::to_string(header.shard())); + std::string restore_shard_path = store->get_absolute_shard_path(std::to_string(tablet_meta.shard_id())); Status status = _reload_tablet(key, restore_shard_path, tablet_id, schema_hash); return status; } @@ -215,7 +215,7 @@ Status RestoreTabletAction::_restore(const std::string& key, int64_t tablet_id, bool RestoreTabletAction::_get_latest_tablet_path_from_trash( int64_t tablet_id, int32_t schema_hash, std::string* path) { std::vector tablet_paths; - std::vector stores = OLAPEngine::get_instance()->get_stores(); + std::vector stores = StorageEngine::instance()->get_stores(); for (auto& store : stores) { store->find_tablet_in_trash(tablet_id, &tablet_paths); } diff --git a/be/src/http/action/snapshot_action.cpp b/be/src/http/action/snapshot_action.cpp index 1b79ea5645970e..a294eacb511fc3 100644 --- a/be/src/http/action/snapshot_action.cpp +++ b/be/src/http/action/snapshot_action.cpp @@ -32,7 +32,8 @@ #include "http/http_status.h" #include "runtime/exec_env.h" #include "olap/olap_define.h" -#include "olap/olap_engine.h" +#include "olap/storage_engine.h" +#include "olap/snapshot_manager.h" namespace doris { @@ -104,7 +105,7 @@ int64_t SnapshotAction::make_snapshot(int64_t tablet_id, int32_t schema_hash, request.schema_hash = schema_hash; OLAPStatus res = OLAPStatus::OLAP_SUCCESS; - res = _exec_env->olap_engine()->make_snapshot(request, snapshot_path); + res = SnapshotManager::instance()->make_snapshot(request, snapshot_path); if (res != OLAPStatus::OLAP_SUCCESS) { LOG(WARNING) << "make snapshot failed. status: " << res << ", signature: " << tablet_id; diff --git a/be/src/olap/CMakeLists.txt b/be/src/olap/CMakeLists.txt index 8af95703b49d91..07a0a29502a684 100644 --- a/be/src/olap/CMakeLists.txt +++ b/be/src/olap/CMakeLists.txt @@ -21,22 +21,18 @@ set(LIBRARY_OUTPUT_PATH "${BUILD_DIR}/src/olap") # where to put generated binaries set(EXECUTABLE_OUTPUT_PATH "${BUILD_DIR}/src/olap") +add_subdirectory(rowset) + add_library(Olap STATIC aggregate_func.cpp base_compaction.cpp - bit_field_reader.cpp - bit_field_writer.cpp bloom_filter.hpp bloom_filter_reader.cpp bloom_filter_writer.cpp byte_buffer.cpp - column_data.cpp - column_reader.cpp - column_writer.cpp comparison_predicate.cpp compress.cpp cumulative_compaction.cpp - data_writer.cpp delete_handler.cpp delta_writer.cpp field.cpp @@ -52,34 +48,31 @@ add_library(Olap STATIC new_status.cpp null_predicate.cpp olap_cond.cpp - olap_engine.cpp - olap_header.cpp - olap_header_manager.cpp olap_index.cpp olap_meta.cpp olap_server.cpp - olap_snapshot.cpp - olap_table.cpp options.cpp out_stream.cpp push_handler.cpp reader.cpp row_block.cpp row_cursor.cpp - segment_group.cpp - run_length_byte_reader.cpp - run_length_byte_writer.cpp - run_length_integer_reader.cpp - run_length_integer_writer.cpp + rowset_graph.cpp schema_change.cpp - segment_reader.cpp - segment_writer.cpp serialize.cpp - store.cpp + storage_engine.cpp + data_dir.cpp + snapshot_manager.cpp stream_index_common.cpp stream_index_reader.cpp stream_index_writer.cpp stream_name.cpp + tablet.cpp + tablet_manager.cpp + tablet_meta.cpp + tablet_meta_manager.cpp + tablet_schema.cpp + txn_manager.cpp types.cpp utils.cpp wrapper_field.cpp @@ -88,4 +81,13 @@ add_library(Olap STATIC rowset/segment_v2/column_writer.cpp rowset/segment_v2/encoding_info.cpp rowset/segment_v2/ordinal_page_index.cpp + rowset_factory.cpp + task/engine_batch_load_task.cpp + task/engine_checksum_task.cpp + task/engine_clear_alter_task.cpp + task/engine_clone_task.cpp + task/engine_schema_change_task.cpp + task/engine_storage_migration_task.cpp + task/engine_publish_version_task.cpp + olap_snapshot_converter.cpp ) diff --git a/be/src/olap/aggregate_func.cpp b/be/src/olap/aggregate_func.cpp index 9d17aabf4c366e..974255fa606f2b 100644 --- a/be/src/olap/aggregate_func.cpp +++ b/be/src/olap/aggregate_func.cpp @@ -142,12 +142,12 @@ AggregateFuncResolver::~AggregateFuncResolver() {} AggregateFunc get_aggregate_func(const FieldAggregationMethod agg_method, const FieldType field_type) { - return AggregateFuncResolver::get_instance()->get_aggregate_func(agg_method, field_type); + return AggregateFuncResolver::instance()->get_aggregate_func(agg_method, field_type); } FinalizeFunc get_finalize_func(const FieldAggregationMethod agg_method, const FieldType field_type) { - return AggregateFuncResolver::get_instance()->get_finalize_func(agg_method, field_type); + return AggregateFuncResolver::instance()->get_finalize_func(agg_method, field_type); } } // namespace doris diff --git a/be/src/olap/aggregate_func.h b/be/src/olap/aggregate_func.h index 4fe5da9608e50d..e04e2b59850a64 100644 --- a/be/src/olap/aggregate_func.h +++ b/be/src/olap/aggregate_func.h @@ -18,7 +18,6 @@ #ifndef DORIS_BE_SRC_OLAP_AGGREGATE_FUNC_H #define DORIS_BE_SRC_OLAP_AGGREGATE_FUNC_H -#include "olap/field_info.h" #include "olap/hll.h" #include "olap/types.h" #include "util/arena.h" diff --git a/be/src/olap/base_compaction.cpp b/be/src/olap/base_compaction.cpp index ea98c00080a992..19b484bf197ade 100644 --- a/be/src/olap/base_compaction.cpp +++ b/be/src/olap/base_compaction.cpp @@ -25,11 +25,11 @@ #include "olap/delete_handler.h" #include "olap/merger.h" -#include "olap/column_data.h" -#include "olap/olap_engine.h" -#include "olap/olap_header.h" -#include "olap/segment_group.h" -#include "olap/olap_table.h" +#include "olap/rowset/column_data.h" +#include "olap/storage_engine.h" +#include "olap/tablet_meta.h" +#include "olap/rowset/segment_group.h" +#include "olap/tablet.h" #include "olap/utils.h" #include "util/doris_metrics.h" @@ -40,20 +40,20 @@ using std::vector; namespace doris { -OLAPStatus BaseCompaction::init(OLAPTablePtr table, bool is_manual_trigger) { +OLAPStatus BaseCompaction::init(TabletSharedPtr tablet, bool is_manual_trigger) { // 表在首次查询或PUSH等操作时,会被加载到内存 // 如果表没有被加载,表明该表上目前没有任何操作,所以不进行BE操作 - if (!table->is_loaded()) { + if (!tablet->init_succeeded()) { return OLAP_ERR_INPUT_PARAMETER_ERROR; } - LOG(INFO) << "init base compaction handler. [table=" << table->full_name() << "]"; + LOG(INFO) << "init base compaction handler. [tablet=" << tablet->full_name() << "]"; - _table = table; + _tablet = tablet; // 1. 尝试取得base compaction的锁 if (!_try_base_compaction_lock()) { - LOG(WARNING) << "another base compaction is running. table=" << table->full_name(); + LOG(WARNING) << "another base compaction is running. tablet=" << tablet->full_name(); return OLAP_ERR_BE_TRY_BE_LOCK_ERROR; } @@ -83,7 +83,7 @@ OLAPStatus BaseCompaction::init(OLAPTablePtr table, bool is_manual_trigger) { } OLAPStatus BaseCompaction::run() { - LOG(INFO) << "start base compaction. tablet=" << _table->full_name() + LOG(INFO) << "start base compaction. tablet=" << _tablet->full_name() << ", old_base_version=" << _old_base_version.second << ", new_base_version=" << _new_base_version.second; @@ -92,80 +92,109 @@ OLAPStatus BaseCompaction::run() { // 1. 计算新base的version hash VersionHash new_base_version_hash; - res = _table->compute_all_versions_hash(_need_merged_versions, &new_base_version_hash); + res = _tablet->compute_all_versions_hash(_need_merged_versions, &new_base_version_hash); if (res != OLAP_SUCCESS) { - OLAP_LOG_WARNING("fail to calculate new base version hash.[table=%s; new_base_version=%d]", - _table->full_name().c_str(), - _new_base_version.second); + LOG(WARNING) << "fail to calculate new base version hash. tablet=" << _tablet->full_name() + << ", new_base_version=" << _new_base_version.second; _garbage_collection(); return res; } - VLOG(10) << "new_base_version_hash" << new_base_version_hash; + VLOG(10) << "new_base_version_hash:" << new_base_version_hash; // 2. 获取生成新base需要的data sources - vector base_data_sources; - _table->acquire_data_sources_by_versions(_need_merged_versions, &base_data_sources); - if (base_data_sources.empty()) { - OLAP_LOG_WARNING("fail to acquire need data sources. [table=%s; version=%d]", - _table->full_name().c_str(), - _new_base_version.second); + vector rowsets; + res = _tablet->capture_consistent_rowsets(_need_merged_versions, &rowsets); + if (res != OLAP_SUCCESS) { + LOG(WARNING) << "fail to acquire need data sources. tablet=" << _tablet->full_name() + << ", version=" << _new_base_version.second; _garbage_collection(); - return OLAP_ERR_BE_ACQUIRE_DATA_SOURCES_ERROR; + return res; } { DorisMetrics::base_compaction_deltas_total.increment(_need_merged_versions.size()); int64_t merge_bytes = 0; - for (ColumnData* i_data : base_data_sources) { - merge_bytes += i_data->segment_group()->data_size(); + for (auto& rowset : rowsets) { + merge_bytes += rowset->data_disk_size(); } DorisMetrics::base_compaction_bytes_total.increment(merge_bytes); } - // 保存生成base文件时候累积的行数 - uint64_t row_count = 0; - // 3. 执行base compaction // 执行过程可能会持续比较长时间 stage_watch.reset(); - res = _do_base_compaction(new_base_version_hash, - &base_data_sources, - &row_count); + RowsetId rowset_id = 0; + RETURN_NOT_OK(_tablet->next_rowset_id(&rowset_id)); + RowsetWriterContext context; + context.rowset_id = rowset_id; + context.tablet_uid = _tablet->tablet_uid(); + context.tablet_id = _tablet->tablet_id(); + context.partition_id = _tablet->partition_id(); + context.tablet_schema_hash = _tablet->schema_hash(); + context.rowset_type = ALPHA_ROWSET; + context.rowset_path_prefix = _tablet->tablet_path(); + context.tablet_schema = &(_tablet->tablet_schema()); + context.rowset_state = VISIBLE; + context.data_dir = _tablet->data_dir(); + context.version = _new_base_version; + context.version_hash = new_base_version_hash; + + _rs_writer.reset(new (std::nothrow)AlphaRowsetWriter()); + if (_rs_writer == nullptr) { + LOG(WARNING) << "fail to new rowset."; + _garbage_collection(); + return OLAP_ERR_MALLOC_ERROR; + } + RETURN_NOT_OK(_rs_writer->init(context)); + res = _do_base_compaction(new_base_version_hash, rowsets); + _tablet->data_dir()->remove_pending_ids(ROWSET_ID_PREFIX + std::to_string(_rs_writer->rowset_id())); // 释放不再使用的ColumnData对象 - _table->release_data_sources(&base_data_sources); if (res != OLAP_SUCCESS) { - OLAP_LOG_WARNING("fail to do base version. [table=%s; version=%d]", - _table->full_name().c_str(), - _new_base_version.second); + LOG(WARNING) << "fail to do base version. tablet=" << _tablet->full_name() + << ", version=" << _new_base_version.second; _garbage_collection(); return res; } + // validate that delete action is right + // if error happened, sleep 1 hour. Report a fatal log every 1 minute if (_validate_delete_file_action() != OLAP_SUCCESS) { LOG(WARNING) << "failed to do base compaction. delete action has error."; _garbage_collection(); return OLAP_ERR_BE_ERROR_DELETE_ACTION; } - - VLOG(3) << "elapsed time of doing base compaction:" << stage_watch.get_elapse_time_us(); - // 4. make new versions visable. // If success, remove files belong to old versions; // If fail, gc files belong to new versions. - vector unused_olap_indices; - res = _update_header(row_count, &unused_olap_indices); + vector unused_rowsets; + vector unused_versions; + _get_unused_versions(&unused_versions); + res = _tablet->capture_consistent_rowsets(unused_versions, &unused_rowsets); if (res != OLAP_SUCCESS) { - LOG(WARNING) << "fail to update header. table=" << _table->full_name() + LOG(WARNING) << "fail to capture consistent rowsets. tablet=" << _tablet->full_name() + << ", version=" << _new_base_version.second; + _garbage_collection(); + return res; + } + + res = _update_header(unused_rowsets); + if (res != OLAP_SUCCESS) { + LOG(WARNING) << "fail to update header. tablet=" << _tablet->full_name() << ", version=" << _new_base_version.first << "-" << _new_base_version.second; _garbage_collection(); return res; } - _delete_old_files(&unused_olap_indices); + _delete_old_files(&unused_rowsets); _release_base_compaction_lock(); + LOG(INFO) << "succeed to do base compaction. tablet=" << _tablet->full_name() + << ", base_version=" << _new_base_version.first << "-" << _new_base_version.second + << ". elapsed time of doing base compaction" + << ", time=" << stage_watch.get_elapse_second() << "s"; + return OLAP_SUCCESS; } @@ -175,10 +204,10 @@ static bool version_comparator(const Version& lhs, const Version& rhs) { bool BaseCompaction::_check_whether_satisfy_policy(bool is_manual_trigger, vector* candidate_versions) { - ReadLock rdlock(_table->get_header_lock_ptr()); - int32_t cumulative_layer_point = _table->cumulative_layer_point(); + ReadLock rdlock(_tablet->get_header_lock_ptr()); + int64_t cumulative_layer_point = _tablet->cumulative_layer_point(); if (cumulative_layer_point == -1) { - LOG(FATAL) << "tablet has an unreasonable cumulative layer point. [tablet='" << _table->full_name() + LOG(FATAL) << "tablet has an unreasonable cumulative layer point. [tablet='" << _tablet->full_name() << "' cumulative_layer_point=" << cumulative_layer_point << "]"; return false; } @@ -187,10 +216,8 @@ bool BaseCompaction::_check_whether_satisfy_policy(bool is_manual_trigger, --cumulative_layer_point; vector path_versions; - if (OLAP_SUCCESS != _table->select_versions_to_span(Version(0, cumulative_layer_point), - &path_versions)) { - OLAP_LOG_WARNING("fail to select shortest version path. [start=%d end=%d]", - 0, cumulative_layer_point); + if (OLAP_SUCCESS != _tablet->capture_consistent_versions(Version(0, cumulative_layer_point), &path_versions)) { + LOG(WARNING) << "fail to select shortest version path. start=0, end=" << cumulative_layer_point; return false; } @@ -203,8 +230,8 @@ bool BaseCompaction::_check_whether_satisfy_policy(bool is_manual_trigger, // base文件 if (temp.first == 0) { _old_base_version = temp; - base_size = _table->get_version_data_size(temp); - base_creation_time = _table->get_delta(index)->creation_time(); + base_size = _tablet->get_rowset_size_by_version(temp); + base_creation_time = _tablet->get_rowset_by_version(temp)->creation_time(); continue; } @@ -218,7 +245,7 @@ bool BaseCompaction::_check_whether_satisfy_policy(bool is_manual_trigger, // 只有1个base文件和1个delta文件 if (base_compaction_layer_point == -1) { VLOG(3) << "can't do base compaction: no cumulative files." - << "table=" << _table->full_name() + << "tablet=" << _tablet->full_name() << ", base_version=0-" << _old_base_version.second << ", cumulative_layer_point=" << cumulative_layer_point + 1; return false; @@ -227,15 +254,14 @@ bool BaseCompaction::_check_whether_satisfy_policy(bool is_manual_trigger, // 只有1个cumulative文件 if (base_compaction_layer_point == _old_base_version.second) { VLOG(3) << "can't do base compaction: only one cumulative file." - << "table=" << _table->full_name() + << "tablet=" << _tablet->full_name() << ", base_version=0-" << _old_base_version.second << ", cumulative_layer_point=" << cumulative_layer_point + 1; return false; } // 使用最短路径算法,选择可合并的cumulative版本 - if (OLAP_SUCCESS != _table->select_versions_to_span(_new_base_version, - candidate_versions)) { + if (OLAP_SUCCESS != _tablet->capture_consistent_versions(_new_base_version, candidate_versions)) { LOG(WARNING) << "fail to select shortest version path." << "start=" << _new_base_version.first << ", end=" << _new_base_version.second; @@ -247,7 +273,7 @@ bool BaseCompaction::_check_whether_satisfy_policy(bool is_manual_trigger, // 如果是手动执行START_BASE_COMPACTION命令,则不检查base compaction policy, // 也不考虑删除版本过期问题, 只要有可以合并的cumulative,就执行base compaction if (is_manual_trigger) { - VLOG(3) << "manual trigger base compaction. table=" << _table->full_name(); + VLOG(3) << "manual trigger base compaction. tablet=" << _tablet->full_name(); return true; } @@ -261,7 +287,7 @@ bool BaseCompaction::_check_whether_satisfy_policy(bool is_manual_trigger, continue; } // cumulative文件 - cumulative_total_size += _table->get_version_data_size(temp); + cumulative_total_size += _tablet->get_rowset_size_by_version(temp); } // 检查是否满足base compaction的触发条件 @@ -271,7 +297,7 @@ bool BaseCompaction::_check_whether_satisfy_policy(bool is_manual_trigger, = config::base_compaction_num_cumulative_deltas; // candidate_versions中包含base文件,所以这里减1 if (candidate_versions->size() - 1 >= base_compaction_num_cumulative_deltas) { - LOG(INFO) << "satisfy the base compaction policy. table="<< _table->full_name() + LOG(INFO) << "satisfy the base compaction policy. tablet="<< _tablet->full_name() << ", num_cumulative_deltas=" << candidate_versions->size() - 1 << ", base_compaction_num_cumulative_deltas=" << base_compaction_num_cumulative_deltas; return true; @@ -281,7 +307,7 @@ bool BaseCompaction::_check_whether_satisfy_policy(bool is_manual_trigger, const double base_cumulative_delta_ratio = config::base_cumulative_delta_ratio; double cumulative_base_ratio = static_cast(cumulative_total_size) / base_size; if (cumulative_base_ratio > base_cumulative_delta_ratio) { - LOG(INFO) << "satisfy the base compaction policy. table=" << _table->full_name() + LOG(INFO) << "satisfy the base compaction policy. tablet=" << _tablet->full_name() << ", cumualtive_total_size=" << cumulative_total_size << ", base_size=" << base_size << ", cumulative_base_ratio=" << cumulative_base_ratio @@ -293,13 +319,13 @@ bool BaseCompaction::_check_whether_satisfy_policy(bool is_manual_trigger, const uint32_t interval_since_last_operation = config::base_compaction_interval_seconds_since_last_operation; int64_t interval_since_last_be = time(NULL) - base_creation_time; if (interval_since_last_be > interval_since_last_operation) { - LOG(INFO) << "satisfy the base compaction policy. table=" << _table->full_name() + LOG(INFO) << "satisfy the base compaction policy. tablet=" << _tablet->full_name() << ", interval_since_last_be=" << interval_since_last_be << ", policy_interval=" << interval_since_last_operation; return true; } - VLOG(3) << "don't satisfy the base compaction policy. table=" << _table->full_name() + VLOG(3) << "don't satisfy the base compaction policy. tablet=" << _tablet->full_name() << ", cumulative_files_number=" << candidate_versions->size() - 1 << ", cumulative_base_ratio=" << cumulative_base_ratio << ", interval_since_last_be=" << interval_since_last_be; @@ -308,20 +334,19 @@ bool BaseCompaction::_check_whether_satisfy_policy(bool is_manual_trigger, } OLAPStatus BaseCompaction::_do_base_compaction(VersionHash new_base_version_hash, - vector* base_data_sources, - uint64_t* row_count) { + const vector& rowsets) { OlapStopWatch watch; - // 1. 生成新base文件对应的olap index - SegmentGroup* new_base = new (std::nothrow) SegmentGroup(_table.get(), - _new_base_version, - new_base_version_hash, - false, 0, 0); - if (new_base == NULL) { - OLAP_LOG_WARNING("fail to new SegmentGroup."); - return OLAP_ERR_MALLOC_ERROR; + vector rs_readers; + for (auto& rowset : rowsets) { + RowsetReaderSharedPtr rs_reader(rowset->create_reader()); + if (rs_reader == nullptr) { + LOG(WARNING) << "rowset create reader failed. rowset:" << rowset->rowset_id(); + return OLAP_ERR_ROWSET_CREATE_READER; + } + rs_readers.push_back(rs_reader); } - LOG(INFO) << "start merge new base. tablet=" << _table->full_name() + LOG(INFO) << "start merge new base. tablet=" << _tablet->full_name() << ", version=" << _new_base_version.second; // 2. 执行base compaction的merge // 注意:无论是行列存,还是列存,在执行merge时都使用Merger类,不能使用MassiveMerger。 @@ -335,55 +360,34 @@ OLAPStatus BaseCompaction::_do_base_compaction(VersionHash new_base_version_hash uint64_t merged_rows = 0; uint64_t filted_rows = 0; OLAPStatus res = OLAP_SUCCESS; - if (_table->data_file_type() == COLUMN_ORIENTED_FILE) { - _table->obtain_header_rdlock(); - _table->release_header_lock(); - - Merger merger(_table, new_base, READER_BASE_COMPACTION); - res = merger.merge(*base_data_sources, &merged_rows, &filted_rows); - if (res == OLAP_SUCCESS) { - *row_count = merger.row_count(); - } - } else { - OLAP_LOG_WARNING("unknown data file type. [type=%s]", - DataFileType_Name(_table->data_file_type()).c_str()); - res = OLAP_ERR_DATA_FILE_TYPE_ERROR; - } + Merger merger(_tablet, _rs_writer, READER_BASE_COMPACTION); + res = merger.merge(rs_readers, &merged_rows, &filted_rows); // 3. 如果merge失败,执行清理工作,返回错误码退出 if (res != OLAP_SUCCESS) { - OLAP_LOG_WARNING("fail to make new base version. [table='%s' version='%d.%d' res=%d]", - _table->full_name().c_str(), - _new_base_version.first, - _new_base_version.second, - res); - - new_base->delete_all_files(); - SAFE_DELETE(new_base); - + LOG(WARNING) << "fail to make new base version. res=" << res + << ", tablet=" << _tablet->full_name() + << ", version=" << _new_base_version.first + << "-" << _new_base_version.second; return OLAP_ERR_BE_MERGE_ERROR; } + RowsetSharedPtr new_base = _rs_writer->build(); + if (new_base == nullptr) { + LOG(WARNING) << "rowset writer build failed. writer version:" + << _rs_writer->version().first << "-" << _rs_writer->version().second; + return OLAP_ERR_MALLOC_ERROR; + } // 4. 如果merge成功,则将新base文件对应的olap index载入 - _new_olap_indices.push_back(new_base); + _new_rowsets.push_back(new_base); - VLOG(10) << "merge new base success, start load index. tablet=" << _table->full_name() + VLOG(10) << "merge new base success, start load index. tablet=" << _tablet->full_name() << ", version=" << _new_base_version.second; - res = new_base->load(); - if (res != OLAP_SUCCESS) { - OLAP_LOG_WARNING("fail to load index. [version='%d-%d' version_hash=%ld table='%s']", - new_base->version().first, - new_base->version().second, - new_base->version_hash(), - _table->full_name().c_str()); - return res; - } - // Check row num changes uint64_t source_rows = 0; - for (ColumnData* i_data : *base_data_sources) { - source_rows += i_data->segment_group()->num_rows(); + for (auto& rowset : rowsets) { + source_rows += rowset->num_rows(); } bool row_nums_check = config::row_nums_check; if (row_nums_check) { @@ -405,69 +409,57 @@ OLAPStatus BaseCompaction::_do_base_compaction(VersionHash new_base_version_hash << ", time_us=" << watch.get_elapse_time_us(); } - LOG(INFO) << "succeed to do base compaction. table=" << _table->full_name() - << ", base_version=" << _new_base_version.first << "-" << _new_base_version.second; return OLAP_SUCCESS; } -OLAPStatus BaseCompaction::_update_header(uint64_t row_count, vector* unused_olap_indices) { - WriteLock wrlock(_table->get_header_lock_ptr()); - vector unused_versions; - _get_unused_versions(&unused_versions); +OLAPStatus BaseCompaction::_update_header(const vector& unused_rowsets) { + WriteLock wrlock(_tablet->get_header_lock_ptr()); OLAPStatus res = OLAP_SUCCESS; - // 由于在replace_data_sources中可能会发生很小概率的非事务性失败, 因此这里定位FATAL错误 - res = _table->replace_data_sources(&unused_versions, - &_new_olap_indices, - unused_olap_indices); + // 由于在modify_rowsets中可能会发生很小概率的非事务性失败, 因此这里定位FATAL错误 + res = _tablet->modify_rowsets(_new_rowsets, unused_rowsets); if (res != OLAP_SUCCESS) { LOG(FATAL) << "fail to replace data sources. res" << res - << ", tablet=" << _table->full_name() + << ", tablet=" << _tablet->full_name() << ", new_base_version=" << _new_base_version.second << ", old_base_verison=" << _old_base_version.second; return res; } - LOG(INFO) << "BE remove delete conditions. removed_version=" << _new_base_version.second; - - // Base Compaction完成之后,需要删除header中版本号小于等于新base文件版本号的删除条件 - DeleteConditionHandler cond_handler; - cond_handler.delete_cond(_table, _new_base_version.second, true); - // 如果保存Header失败, 所有新增的信息会在下次启动时丢失, 属于严重错误 // 暂时没办法做很好的处理,报FATAL - res = _table->save_header(); + res = _tablet->save_meta(); if (res != OLAP_SUCCESS) { - LOG(FATAL) << "fail to save header. res=" << res - << ", tablet=" << _table->full_name() + LOG(FATAL) << "fail to save tablet meta. res=" << res + << ", tablet=" << _tablet->full_name() << ", new_base_version=" << _new_base_version.second << ", old_base_version=" << _old_base_version.second; return OLAP_ERR_BE_SAVE_HEADER_ERROR; } - _new_olap_indices.clear(); + _new_rowsets.clear(); return OLAP_SUCCESS; } -void BaseCompaction::_delete_old_files(vector* unused_indices) { +void BaseCompaction::_delete_old_files(vector* unused_indices) { if (!unused_indices->empty()) { - OLAPEngine* unused_index = OLAPEngine::get_instance(); + StorageEngine* storage_engine = StorageEngine::instance(); - for (vector::iterator it = unused_indices->begin(); + for (vector::iterator it = unused_indices->begin(); it != unused_indices->end(); ++it) { - unused_index->add_unused_index(*it); + storage_engine->add_unused_rowset(*it); } } } void BaseCompaction::_garbage_collection() { // 清理掉已生成的版本文件 - for (vector::iterator it = _new_olap_indices.begin(); - it != _new_olap_indices.end(); ++it) { - (*it)->delete_all_files(); - SAFE_DELETE(*it); + StorageEngine* storage_engine = StorageEngine::instance(); + for (vector::iterator it = _new_rowsets.begin(); + it != _new_rowsets.end(); ++it) { + storage_engine->add_unused_rowset(*it); } - _new_olap_indices.clear(); + _new_rowsets.clear(); _release_base_compaction_lock(); } @@ -475,8 +467,7 @@ void BaseCompaction::_garbage_collection() { bool BaseCompaction::_validate_need_merged_versions( const vector& candidate_versions) { if (candidate_versions.size() <= 1) { - OLAP_LOG_WARNING("unenough versions need to be merged. [size=%lu]", - candidate_versions.size()); + LOG(WARNING) << "unenough versions need to be merged. size=" << candidate_versions.size(); return false; } @@ -486,10 +477,11 @@ bool BaseCompaction::_validate_need_merged_versions( Version previous_version = candidate_versions[index - 1]; Version current_version = candidate_versions[index]; if (current_version.first != previous_version.second + 1) { - OLAP_LOG_WARNING("wrong need merged version. " - "previous_version=%d-%d; current_version=%d-%d", - previous_version.first, previous_version.second, - current_version.first, current_version.second); + LOG(WARNING) << "wrong need merged version. " + << "previous_version=" << previous_version.first + << "-" << previous_version.second + << ", current_version=" << current_version.first + << "-" << current_version.second; return false; } } @@ -498,11 +490,11 @@ bool BaseCompaction::_validate_need_merged_versions( if (_new_base_version.first != 0 || _new_base_version.first != candidate_versions.begin()->first || _new_base_version.second != candidate_versions.rbegin()->second) { - OLAP_LOG_WARNING("new_base_version is wrong. " - "[new_base_version=%d-%d; vector_version=%d-%d]", - _new_base_version.first, _new_base_version.second, - candidate_versions.begin()->first, - candidate_versions.rbegin()->second); + LOG(WARNING) << "new_base_version is wrong." + << " new_base_version=" << _new_base_version.first + << "-" << _new_base_version.second + << ", vector_version=" << candidate_versions.begin()->first + << "-" << candidate_versions.rbegin()->second; return false; } @@ -512,19 +504,22 @@ bool BaseCompaction::_validate_need_merged_versions( OLAPStatus BaseCompaction::_validate_delete_file_action() { // 1. acquire the latest version to make sure all is right after deleting files - ReadLock rdlock(_table->get_header_lock_ptr()); - const PDelta* lastest_version = _table->lastest_version(); - Version test_version = Version(0, lastest_version->end_version()); - vector test_sources; - _table->acquire_data_sources(test_version, &test_sources); + ReadLock rdlock(_tablet->get_header_lock_ptr()); + const RowsetSharedPtr rowset = _tablet->rowset_with_max_version(); + if (rowset == nullptr) { + LOG(INFO) << "version is -1 when validate_delete_file_action"; + return OLAP_ERR_BE_ERROR_DELETE_ACTION; + } + Version spec_version = Version(0, rowset->end_version()); + vector rs_readers; + _tablet->capture_rs_readers(spec_version, &rs_readers); - if (test_sources.size() == 0) { + if (rs_readers.empty()) { LOG(INFO) << "acquire data sources failed. version=" - << test_version.first << "-" << test_version.second; + << spec_version.first << "-" << spec_version.second; return OLAP_ERR_BE_ERROR_DELETE_ACTION; } - _table->release_data_sources(&test_sources); VLOG(3) << "delete file action is OK"; return OLAP_SUCCESS; diff --git a/be/src/olap/base_compaction.h b/be/src/olap/base_compaction.h index f0fc4510b721e4..331b3c400e544f 100644 --- a/be/src/olap/base_compaction.h +++ b/be/src/olap/base_compaction.h @@ -23,12 +23,14 @@ #include "olap/olap_common.h" #include "olap/olap_define.h" -#include "olap/olap_table.h" +#include "olap/tablet.h" +#include "rowset/rowset_id_generator.h" +#include "rowset/alpha_rowset_writer.h" namespace doris { -class ColumnData; -class SegmentGroup; +class Rowset; +class RowsetReader; // @brief 实现对START_BASE_COMPACTION命令的处理逻辑,并返回处理结果 class BaseCompaction { @@ -36,7 +38,8 @@ class BaseCompaction { BaseCompaction() : _new_base_version(0, 0), _old_base_version(0, 0), - _base_compaction_locked(false) {} + _base_compaction_locked(false), + _rs_writer(nullptr) {} virtual ~BaseCompaction() { _release_base_compaction_lock(); @@ -47,7 +50,7 @@ class BaseCompaction { // 2. 如果满足,计算需要合并哪些版本 // // 输入参数: - // - table: 待执行BE的OLAPTable的智能指针 + // - tablet: 待执行BE的Tablet的智能指针 // - is_manual_trigger // - 如果为true,则是手动执行START_BASE_COMPACTION命令 // - 如果为false,则是根据BE策略来执行 @@ -55,7 +58,7 @@ class BaseCompaction { // 返回值: // - 如果init执行成功,即可以执行BE,则返回OLAP_SUCCESS; // - 其它情况下,返回相应的错误码 - OLAPStatus init(OLAPTablePtr table, bool is_manual_trigger = false); + OLAPStatus init(TabletSharedPtr tablet, bool is_manual_trigger = false); // 执行BaseCompaction, 可能会持续很长时间 // @@ -82,35 +85,33 @@ class BaseCompaction { // // 输入参数: // - new_base_version_hash: 新Base的VersionHash - // - base_data_sources: 生成新Base需要的ColumnData* + // - rs_readers : 生成新Base需要的RowsetReaders* // - row_count: 生成Base过程中产生的row_count // // 返回值: // - 如果执行成功,则返回OLAP_SUCCESS; // - 其它情况下,返回相应的错误码 OLAPStatus _do_base_compaction(VersionHash new_base_version_hash, - std::vector* base_data_sources, - uint64_t* row_count); + const std::vector& rowsets); // 更新Header使得修改对外可见 - // 输出参数: - // - unused_olap_indices: 需要被物理删除的SegmentGroup* + // 输入参数: + // - unused_rowsets: 需要被物理删除的Rowset* // // 返回值: // - 如果执行成功,则返回OLAP_SUCCESS; // - 其它情况下,返回相应的错误码 - OLAPStatus _update_header(uint64_t row_count, - std::vector* unused_olap_indices); + OLAPStatus _update_header(const std::vector& unused_rowsets); - // 删除不再使用的SegmentGroup文件 + // 删除不再使用的Rowset // // 输入参数: - // - unused_olap_indices: 需要被物理删除的SegmentGroup* + // - unused_rowsets: 需要被物理删除的Rowset* // // 返回值: // - 如果执行成功,则返回OLAP_SUCCESS; // - 其它情况下,返回相应的错误码 - void _delete_old_files(std::vector* unused_indices); + void _delete_old_files(std::vector* unused_indices); // 其它函数执行失败时,调用该函数进行清理工作 void _garbage_collection(); @@ -133,7 +134,7 @@ class BaseCompaction { unused_versions->clear(); std::vector all_versions; - _table->list_versions(&all_versions); + _tablet->list_versions(&all_versions); for (std::vector::const_iterator iter = all_versions.begin(); iter != all_versions.end(); ++iter) { if (iter->first <= _new_base_version.second) { @@ -148,7 +149,7 @@ class BaseCompaction { } bool _try_base_compaction_lock() { - if (_table->try_base_compaction_lock()) { + if (_tablet->try_base_compaction_lock()) { _base_compaction_locked = true; return true; } @@ -158,13 +159,13 @@ class BaseCompaction { void _release_base_compaction_lock() { if (_base_compaction_locked) { - _table->release_base_compaction_lock(); + _tablet->release_base_compaction_lock(); _base_compaction_locked = false; } } // 需要进行操作的Table指针 - OLAPTablePtr _table; + TabletSharedPtr _tablet; // 新base的version Version _new_base_version; // 现有base的version @@ -173,10 +174,10 @@ class BaseCompaction { Version _latest_cumulative; // 在此次base compaction执行过程中,将被合并的cumulative文件版本 std::vector _need_merged_versions; - // 需要新增的版本对应的SegmentGroup - std::vector _new_olap_indices; - + // 需要新增的版本对应Rowset的 + std::vector _new_rowsets; bool _base_compaction_locked; + RowsetWriterSharedPtr _rs_writer; DISALLOW_COPY_AND_ASSIGN(BaseCompaction); }; diff --git a/be/src/olap/column_mapping.h b/be/src/olap/column_mapping.h index b5ec1876f57ec4..1cebe4eab3e544 100644 --- a/be/src/olap/column_mapping.h +++ b/be/src/olap/column_mapping.h @@ -23,7 +23,7 @@ namespace doris { struct ColumnMapping { - ColumnMapping() : ref_column(-1), default_value(NULL) {} + ColumnMapping() : ref_column(-1), default_value(nullptr) {} virtual ~ColumnMapping() {} // <0: use default value @@ -33,5 +33,7 @@ struct ColumnMapping { WrapperField* default_value; }; +typedef std::vector SchemaMapping; + } // namespace doris -#endif // DORIS_BE_SRC_COLUMN_MAPPING_H \ No newline at end of file +#endif // DORIS_BE_SRC_COLUMN_MAPPING_H diff --git a/be/src/olap/cumulative_compaction.cpp b/be/src/olap/cumulative_compaction.cpp index 6a208bb13fd1a9..db9b3757d9fb4f 100755 --- a/be/src/olap/cumulative_compaction.cpp +++ b/be/src/olap/cumulative_compaction.cpp @@ -21,62 +21,60 @@ #include #include -#include "olap/olap_engine.h" +#include "olap/storage_engine.h" #include "util/doris_metrics.h" +#include "olap/rowset/alpha_rowset_writer.h" using std::list; using std::nothrow; using std::sort; using std::vector; - namespace doris { -OLAPStatus CumulativeCompaction::init(OLAPTablePtr table) { - LOG(INFO) << "init cumulative compaction handler. [table=" << table->full_name() << "]"; +OLAPStatus CumulativeCompaction::init(TabletSharedPtr tablet) { + LOG(INFO) << "init cumulative compaction handler. tablet=" << tablet->full_name(); if (_is_init) { - OLAP_LOG_WARNING("cumulative handler has been inited.[table=%s]", - table->full_name().c_str()); + LOG(WARNING) << "cumulative handler has been inited. tablet=" << tablet->full_name(); return OLAP_ERR_CUMULATIVE_REPEAT_INIT; } - if (!table->is_loaded()) { + if (!tablet->init_succeeded()) { return OLAP_ERR_CUMULATIVE_INVALID_PARAMETERS; } - _table = table; + _tablet = tablet; _max_delta_file_size = config::cumulative_compaction_budgeted_bytes; - if (!_table->try_cumulative_lock()) { - OLAP_LOG_WARNING("another cumulative is running. [table=%s]", - _table->full_name().c_str()); + if (!_tablet->try_cumulative_lock()) { + LOG(INFO) << "skip compaction, because another cumulative is running. tablet=" << _tablet->full_name(); return OLAP_ERR_CE_TRY_CE_LOCK_ERROR; } - _obtain_header_rdlock(); - _old_cumulative_layer_point = _table->cumulative_layer_point(); - _release_header_lock(); + _tablet->obtain_header_rdlock(); + _old_cumulative_layer_point = _tablet->cumulative_layer_point(); + _tablet->release_header_lock(); // 如果为-1,则该table之前没有设置过cumulative layer point // 我们在这里设置一下 if (_old_cumulative_layer_point == -1) { - LOG(INFO) << "tablet has an unreasonable cumulative layer point. [tablet='" << _table->full_name() - << "' cumulative_layer_point=" << _old_cumulative_layer_point << "]"; - _table->release_cumulative_lock(); + LOG(INFO) << "tablet has an unreasonable cumulative layer point. tablet=" << _tablet->full_name() + << ", cumulative_layer_point=" << _old_cumulative_layer_point; + _tablet->release_cumulative_lock(); return OLAP_ERR_CUMULATIVE_INVALID_PARAMETERS; } - _obtain_header_wrlock(); + _tablet->obtain_header_wrlock(); OLAPStatus res = _calculate_need_merged_versions(); - _release_header_lock(); + _tablet->release_header_lock(); if (res != OLAP_SUCCESS) { - _table->release_cumulative_lock(); + _tablet->release_cumulative_lock(); LOG(INFO) << "no suitable delta versions. don't do cumulative compaction now."; return res; } if (!_validate_need_merged_versions()) { - _table->release_cumulative_lock(); + _tablet->release_cumulative_lock(); LOG(FATAL) << "error! invalid need merged versions."; return OLAP_ERR_CUMULATIVE_INVALID_NEED_MERGED_VERSIONS; } @@ -84,105 +82,114 @@ OLAPStatus CumulativeCompaction::init(OLAPTablePtr table) { _is_init = true; _cumulative_version = Version(_need_merged_versions.begin()->first, _need_merged_versions.rbegin()->first); - + _rs_writer.reset(new (std::nothrow)AlphaRowsetWriter()); return OLAP_SUCCESS; } OLAPStatus CumulativeCompaction::run() { if (!_is_init) { - _table->release_cumulative_lock(); - OLAP_LOG_WARNING("cumulative handler is not inited."); + _tablet->release_cumulative_lock(); + LOG(WARNING) << "cumulative handler is not inited."; return OLAP_ERR_NOT_INITED; } // 0. 准备工作 - LOG(INFO) << "start cumulative compaction. tablet=" << _table->full_name() + LOG(INFO) << "start cumulative compaction. tablet=" << _tablet->full_name() << ", cumulative_version=" << _cumulative_version.first << "-" << _cumulative_version.second; OlapStopWatch watch; // 1. 计算新的cumulative文件的version hash OLAPStatus res = OLAP_SUCCESS; - res = _table->compute_all_versions_hash(_need_merged_versions, &_cumulative_version_hash); + res = _tablet->compute_all_versions_hash(_need_merged_versions, &_cumulative_version_hash); if (res != OLAP_SUCCESS) { - _table->release_cumulative_lock(); - OLAP_LOG_WARNING("failed to computer cumulative version hash. " - "[table=%s; cumulative_version=%d-%d]", - _table->full_name().c_str(), - _cumulative_version.first, - _cumulative_version.second); + _tablet->release_cumulative_lock(); + LOG(WARNING) << "failed to computer cumulative version hash." + << " tablet=" << _tablet->full_name() + << ", cumulative_version=" << _cumulative_version.first + << "-" << _cumulative_version.second; return res; } // 2. 获取待合并的delta文件对应的data文件 - _table->acquire_data_sources_by_versions(_need_merged_versions, &_data_source); - if (_data_source.size() == 0) { - _table->release_cumulative_lock(); - OLAP_LOG_WARNING("failed to acquire data source. [table=%s]", - _table->full_name().c_str()); - return OLAP_ERR_CUMULATIVE_FAILED_ACQUIRE_DATA_SOURCE; + res = _tablet->capture_consistent_rowsets(_need_merged_versions, &_rowsets); + if (res != OLAP_SUCCESS) { + _tablet->release_cumulative_lock(); + LOG(WARNING) << "fail to capture consistent rowsets. tablet=" << _tablet->full_name() + << ", version=" << _cumulative_version.first + << "-" << _cumulative_version.second; + return res; } { DorisMetrics::cumulative_compaction_deltas_total.increment(_need_merged_versions.size()); int64_t merge_bytes = 0; - for (ColumnData* i_data : _data_source) { - merge_bytes += i_data->segment_group()->data_size(); + for (auto& rowset : _rowsets) { + merge_bytes += rowset->data_disk_size(); } DorisMetrics::cumulative_compaction_bytes_total.increment(merge_bytes); } do { // 3. 生成新cumulative文件对应的olap index - _new_segment_group = new (nothrow) SegmentGroup(_table.get(), - _cumulative_version, - _cumulative_version_hash, - false, 0, 0); - if (_new_segment_group == NULL) { - OLAP_LOG_WARNING("failed to malloc new cumulative olap index. " - "[table=%s; cumulative_version=%d-%d]", - _table->full_name().c_str(), - _cumulative_version.first, - _cumulative_version.second); - break; - } + RowsetId rowset_id = 0; + RETURN_NOT_OK(_tablet->next_rowset_id(&rowset_id)); + RowsetWriterContext context; + context.rowset_id = rowset_id; + context.tablet_uid = _tablet->tablet_uid(); + context.tablet_id = _tablet->tablet_id(); + context.partition_id = _tablet->partition_id(); + context.tablet_schema_hash = _tablet->schema_hash(); + context.rowset_type = ALPHA_ROWSET; + context.rowset_path_prefix = _tablet->tablet_path(); + context.tablet_schema = &(_tablet->tablet_schema()); + context.rowset_state = VISIBLE; + context.data_dir = _tablet->data_dir(); + context.version = _cumulative_version; + context.version_hash = _cumulative_version_hash; + _rs_writer->init(context); // 4. 执行cumulative compaction合并过程 + for (auto& rowset : _rowsets) { + RowsetReaderSharedPtr rs_reader(rowset->create_reader()); + if (rs_reader == nullptr) { + LOG(WARNING) << "rowset create reader failed. rowset:" << rowset->rowset_id(); + _tablet->release_cumulative_lock(); + return OLAP_ERR_ROWSET_CREATE_READER; + } + _rs_readers.push_back(rs_reader); + } res = _do_cumulative_compaction(); + _tablet->data_dir()->remove_pending_ids(ROWSET_ID_PREFIX + std::to_string(_rs_writer->rowset_id())); if (res != OLAP_SUCCESS) { - OLAP_LOG_WARNING("failed to do cumulative compaction. " - "[table=%s; cumulative_version=%d-%d]", - _table->full_name().c_str(), - _cumulative_version.first, - _cumulative_version.second); + LOG(WARNING) << "failed to do cumulative compaction." + << ", tablet=" << _tablet->full_name() + << ", cumulative_version=" << _cumulative_version.first + << "-" << _cumulative_version.second; break; } } while (0); // 5. 如果出现错误,执行清理工作 - if (res != OLAP_SUCCESS && _new_segment_group != NULL) { - _new_segment_group->delete_all_files(); - SAFE_DELETE(_new_segment_group); - } - - if (_data_source.size() != 0) { - _table->release_data_sources(&_data_source); + if (res != OLAP_SUCCESS) { + StorageEngine::instance()->add_unused_rowset(_rowset); } - _table->release_cumulative_lock(); + _tablet->release_cumulative_lock(); - VLOG(10) << "elapsed time of doing cumulative compaction. " - << "time=" << watch.get_elapse_time_us(); + LOG(INFO) << "succeed to do cumulative compaction. tablet=" << _tablet->full_name() + << ", cumulative_version=" << _cumulative_version.first + << "-" << _cumulative_version.second + << ". elapsed time of doing cumulative compaction" + << ", time=" << watch.get_elapse_second() << "s"; return res; } OLAPStatus CumulativeCompaction::_calculate_need_merged_versions() { - OLAPStatus res = OLAP_SUCCESS; - Versions delta_versions; - res = _get_delta_versions(&delta_versions); + OLAPStatus res = _get_delta_versions(&delta_versions); if (res != OLAP_SUCCESS) { - LOG(INFO) << "failed to get delta versions."; + LOG(INFO) << "failed to get delta versions. res=" << res; return res; } @@ -207,11 +214,11 @@ OLAPStatus CumulativeCompaction::_calculate_need_merged_versions() { } Version delta = delta_versions[index]; - size_t delta_size = _table->get_version_data_size(delta); + size_t delta_size = _tablet->get_rowset_size_by_version(delta); // 如果遇到大的delta文件,或delete版本文件,则: if (delta_size >= _max_delta_file_size - || _table->is_delete_data_version(delta) - || _table->is_load_delete_version(delta)) { + || _tablet->version_for_delete_predicate(delta) + || _tablet->version_for_load_deletion(delta)) { // 1) 如果need_merged_versions为空,表示这2类文件在区间的开头,直接跳过 if (need_merged_versions.empty()) { continue; @@ -235,7 +242,7 @@ OLAPStatus CumulativeCompaction::_calculate_need_merged_versions() { if (need_merged_versions.size() == 1 || total_size == 0) { // 如果区间末尾是较大的delta版, 则与它合并 if (index < delta_number - && _table->get_version_data_size(delta_versions[index]) >= + && _tablet->get_rowset_size_by_version(delta_versions[index]) >= _max_delta_file_size) { need_merged_versions.push_back(delta_versions[index]); ++index; @@ -277,8 +284,8 @@ OLAPStatus CumulativeCompaction::_calculate_need_merged_versions() { // 如果不设置新的cumulative_layer_point, 则下次执行cumulative compaction时,扫描的文件和这次 // 扫描的文件相同,依然找不到可以合并的delta文件, 无法执行合并过程。 // 依此类推,就进入了死循环状态,永远不会进行cumulative compaction - _table->set_cumulative_layer_point(delta_versions[index].first); - _table->save_header(); + _tablet->set_cumulative_layer_point(delta_versions[index].first); + _tablet->save_meta(); return OLAP_ERR_CUMULATIVE_NO_SUITABLE_VERSIONS; } @@ -290,7 +297,7 @@ OLAPStatus CumulativeCompaction::_get_delta_versions(Versions* delta_versions) { delta_versions->clear(); Versions all_versions; - _table->list_versions(&all_versions); + _tablet->list_versions(&all_versions); for (Versions::const_iterator version = all_versions.begin(); version != all_versions.end(); ++version) { @@ -308,8 +315,8 @@ OLAPStatus CumulativeCompaction::_get_delta_versions(Versions* delta_versions) { // 因为我们总是保留最新的delta不合并到cumulative文件里,所以此时应该返回错误,不进行cumulative if (delta_versions->size() == 1) { delta_versions->clear(); - VLOG(10) << "only one delta version. no new delta versions. " - << ", cumulative_point=" << _old_cumulative_layer_point; + VLOG(10) << "only one delta version. no new delta versions." + << " cumulative_point=" << _old_cumulative_layer_point; return OLAP_ERR_CUMULATIVE_NO_SUITABLE_VERSIONS; } @@ -317,13 +324,13 @@ OLAPStatus CumulativeCompaction::_get_delta_versions(Versions* delta_versions) { // can't do cumulative expansion if there has a hole Versions versions_path; - OLAPStatus select_status = _table->select_versions_to_span( + OLAPStatus select_status = _tablet->capture_consistent_versions( Version(delta_versions->front().first, delta_versions->back().second), &versions_path); if (select_status != OLAP_SUCCESS) { - OLAP_LOG_WARNING("can't do cumulative expansion if fail to select shortest version path. " - "[table=%s start=%d; end=%d]", - _table->full_name().c_str(), - delta_versions->front().first, delta_versions->back().second); + LOG(WARNING) << "can't do cumulative expansion if fail to select shortest version path." + << " tablet=" << _tablet->full_name() + << ", start=" << delta_versions->front().first + << ", end=" << delta_versions->back().second; return select_status; } @@ -333,10 +340,9 @@ OLAPStatus CumulativeCompaction::_get_delta_versions(Versions* delta_versions) { bool CumulativeCompaction::_find_previous_version(const Version current_version, Version* previous_version) { Versions all_versions; - if (OLAP_SUCCESS != _table->select_versions_to_span(Version(0, current_version.second), - &all_versions)) { - OLAP_LOG_WARNING("fail to select shortest version path. [start=%d; end=%d]", - 0, current_version.second); + if (OLAP_SUCCESS != _tablet->capture_consistent_versions(Version(0, current_version.second), + &all_versions)) { + LOG(WARNING) << "fail to select shortest version path. start=0, end=" << current_version.second; return false; } @@ -349,12 +355,12 @@ bool CumulativeCompaction::_find_previous_version(const Version current_version, } if (version->second == current_version.first - 1) { - if (_table->is_delete_data_version(*version) - || _table->is_load_delete_version(*version)) { + if (_tablet->version_for_delete_predicate(*version) + || _tablet->version_for_load_deletion(*version)) { return false; } - size_t data_size = _table->get_version_data_size(*version); + size_t data_size = _tablet->get_rowset_size_by_version(*version); if (data_size >= _max_delta_file_size) { return false; } @@ -370,65 +376,69 @@ bool CumulativeCompaction::_find_previous_version(const Version current_version, OLAPStatus CumulativeCompaction::_do_cumulative_compaction() { OLAPStatus res = OLAP_SUCCESS; OlapStopWatch watch; - Merger merger(_table, _new_segment_group, READER_CUMULATIVE_COMPACTION); + Merger merger(_tablet, _rs_writer, READER_CUMULATIVE_COMPACTION); // 1. merge delta files into new cumulative file uint64_t merged_rows = 0; uint64_t filted_rows = 0; - res = merger.merge(_data_source, &merged_rows, &filted_rows); + res = merger.merge(_rs_readers, &merged_rows, &filted_rows); if (res != OLAP_SUCCESS) { - OLAP_LOG_WARNING("failed to do cumulative merge. [table=%s; cumulative_version=%d-%d]", - _table->full_name().c_str(), - _cumulative_version.first, - _cumulative_version.second); + LOG(WARNING) << "failed to do cumulative merge." + << " tablet=" << _tablet->full_name() + << ", cumulative_version=" << _cumulative_version.first + << "-" << _cumulative_version.second; return res; } - // 2. load new cumulative file - res = _new_segment_group->load(); - if (res != OLAP_SUCCESS) { - OLAP_LOG_WARNING("failed to load cumulative index. [table=%s; cumulative_version=%d-%d]", - _table->full_name().c_str(), - _cumulative_version.first, - _cumulative_version.second); - return res; + _rowset = _rs_writer->build(); + if (_rowset == nullptr) { + LOG(WARNING) << "rowset writer build failed. writer version:" + << _rs_writer->version().first << "-" << _rs_writer->version().second; + return OLAP_ERR_MALLOC_ERROR; } - // Check row num changes + // 2. Check row num changes uint64_t source_rows = 0; - for (ColumnData* i_data : _data_source) { - source_rows += i_data->segment_group()->num_rows(); + for (auto rowset : _rowsets) { + source_rows += rowset->num_rows(); } bool row_nums_check = config::row_nums_check; if (row_nums_check) { - if (source_rows != _new_segment_group->num_rows() + merged_rows + filted_rows) { + if (source_rows != _rowset->num_rows() + merged_rows + filted_rows) { LOG(FATAL) << "fail to check row num! " << "source_rows=" << source_rows << ", merged_rows=" << merged_rows << ", filted_rows=" << filted_rows - << ", new_index_rows=" << _new_segment_group->num_rows(); + << ", new_index_rows=" << _rowset->num_rows(); return OLAP_ERR_CHECK_LINES_ERROR; } } else { LOG(INFO) << "all row nums. source_rows=" << source_rows << ", merged_rows=" << merged_rows << ", filted_rows=" << filted_rows - << ", new_index_rows=" << _new_segment_group->num_rows() + << ", new_index_rows=" << _rowset->num_rows() << ", merged_version_num=" << _need_merged_versions.size() << ", time_us=" << watch.get_elapse_time_us(); } - // 3. add new cumulative file into table - vector unused_indices; - _obtain_header_wrlock(); - res = _update_header(&unused_indices); + // 3. add new cumulative file into tablet + vector unused_rowsets; + _tablet->obtain_header_wrlock(); + res = _tablet->capture_consistent_rowsets(_need_merged_versions, &unused_rowsets); if (res != OLAP_SUCCESS) { - OLAP_LOG_WARNING("failed to update header for new cumulative." - "[table=%s; cumulative_version=%d-%d]", - _table->full_name().c_str(), - _cumulative_version.first, - _cumulative_version.second); - _release_header_lock(); + LOG(WARNING) << "fail to capture consistent rowsets. tablet=" << _tablet->full_name() + << ", version=" << _cumulative_version.first + << "-" << _cumulative_version.second; + _tablet->release_header_lock(); + return res; + } + res = _update_header(unused_rowsets); + if (res != OLAP_SUCCESS) { + LOG(WARNING) << "failed to update header for new cumulative." + << "tablet=" << _tablet->full_name() + << ", cumulative_version=" << _cumulative_version.first + << "-" << _cumulative_version.second; + _tablet->release_header_lock(); return res; } @@ -436,61 +446,58 @@ OLAPStatus CumulativeCompaction::_do_cumulative_compaction() { res = _validate_delete_file_action(); if (res != OLAP_SUCCESS) { LOG(FATAL) << "delete action of cumulative compaction has error. roll back." - << "tablet=" << _table->full_name() - << ", cumulative_version=" << _cumulative_version.first + << "tablet=" << _tablet->full_name() + << ", cumulative_version=" << _cumulative_version.first << "-" << _cumulative_version.second; // if error happened, roll back - OLAPStatus ret = _roll_back(unused_indices); + OLAPStatus ret = _roll_back(unused_rowsets); if (ret != OLAP_SUCCESS) { - LOG(FATAL) << "roll back failed. [table=" << _table->full_name() << "]"; + LOG(FATAL) << "roll back failed. [tablet=" << _tablet->full_name() << "]"; } - _release_header_lock(); + _tablet->release_header_lock(); return res; } // 5. 如果合并成功,设置新的cumulative_layer_point - _table->set_cumulative_layer_point(_new_cumulative_layer_point); - _table->save_header(); - _release_header_lock(); + _tablet->set_cumulative_layer_point(_new_cumulative_layer_point); + _tablet->save_meta(); + _tablet->release_header_lock(); // 6. delete delta files which have been merged into new cumulative file - _delete_unused_delta_files(&unused_indices); + _delete_unused_rowsets(&unused_rowsets); - LOG(INFO) << "succeed to do cumulative compaction. tablet=" << _table->full_name() - << ", cumulative_version=" << _cumulative_version.first << "-" - << _cumulative_version.second; return res; } -OLAPStatus CumulativeCompaction::_update_header(vector* unused_indices) { - vector new_indices; - new_indices.push_back(_new_segment_group); +OLAPStatus CumulativeCompaction::_update_header(const vector& unused_rowsets) { + vector new_rowsets; + new_rowsets.push_back(_rowset); OLAPStatus res = OLAP_SUCCESS; - res = _table->replace_data_sources(&_need_merged_versions, &new_indices, unused_indices); + res = _tablet->modify_rowsets(new_rowsets, unused_rowsets); if (res != OLAP_SUCCESS) { LOG(FATAL) << "failed to replace data sources. res=" << res - << ", tablet=" << _table->full_name(); + << ", tablet=" << _tablet->full_name(); return res; } - res = _table->save_header(); + res = _tablet->save_meta(); if (res != OLAP_SUCCESS) { LOG(FATAL) << "failed to save header. res=" << res - << ", tablet=" << _table->full_name(); + << ", tablet=" << _tablet->full_name(); return res; } return res; } -void CumulativeCompaction::_delete_unused_delta_files(vector* unused_indices) { - if (!unused_indices->empty()) { - OLAPEngine* unused_index = OLAPEngine::get_instance(); +void CumulativeCompaction::_delete_unused_rowsets(vector* unused_rowsets) { + if (!unused_rowsets->empty()) { + StorageEngine* storage_engine = StorageEngine::instance(); - for (vector::iterator it = unused_indices->begin(); - it != unused_indices->end(); ++it) { - unused_index->add_unused_index(*it); + for (vector::iterator it = unused_rowsets->begin(); + it != unused_rowsets->end(); ++it) { + storage_engine->add_unused_rowset(*it); } } } @@ -502,10 +509,11 @@ bool CumulativeCompaction::_validate_need_merged_versions() { Version previous_version = _need_merged_versions[index - 1]; Version current_version = _need_merged_versions[index]; if (current_version.first != previous_version.second + 1) { - OLAP_LOG_WARNING("wrong need merged version. " - "previous_version=%d-%d; current_version=%d-%d", - previous_version.first, previous_version.second, - current_version.first, current_version.second); + LOG(WARNING) << "wrong need merged version. " + << "previous_version=" << previous_version.first + << "-" << previous_version.second + << ", current_version=" << current_version.first + << "-" << current_version.second; return false; } } @@ -515,40 +523,45 @@ bool CumulativeCompaction::_validate_need_merged_versions() { OLAPStatus CumulativeCompaction::_validate_delete_file_action() { // 1. acquire the new cumulative version to make sure that all is right after deleting files - Version test_version = Version(0, _cumulative_version.second); - vector test_sources; - _table->acquire_data_sources(test_version, &test_sources); - if (test_sources.size() == 0) { - OLAP_LOG_WARNING("acquire data source failed. [test_verison=%d-%d]", - test_version.first, test_version.second); + Version spec_version = Version(0, _cumulative_version.second); + vector rs_readers; + _tablet->capture_rs_readers(spec_version, &rs_readers); + if (rs_readers.empty()) { + LOG(WARNING) << "acquire data source failed. " + << "spec_verison=" << spec_version.first << "-" << spec_version.second; return OLAP_ERR_CUMULATIVE_ERROR_DELETE_ACTION; } - _table->release_data_sources(&test_sources); return OLAP_SUCCESS; } -OLAPStatus CumulativeCompaction::_roll_back(const vector& old_olap_indices) { - vector need_remove_version; - need_remove_version.push_back(_cumulative_version); - // unused_indices will only contain new cumulative index +OLAPStatus CumulativeCompaction::_roll_back(vector& old_olap_indices) { + vector unused_rowsets; + OLAPStatus res = _tablet->capture_consistent_rowsets(_cumulative_version, &unused_rowsets); + if (res != OLAP_SUCCESS) { + LOG(WARNING) << "fail to capture consistent rowsets. tablet=" << _tablet->full_name() + << ", version=" << _cumulative_version.first + << "-" << _cumulative_version.second; + return res; + } + + // unused_rowsets will only contain new cumulative index // we don't need to delete it here; we will delete new cumulative index in the end. - vector unused_indices; - OLAPStatus res = OLAP_SUCCESS; - res = _table->replace_data_sources(&need_remove_version, &old_olap_indices, &unused_indices); + res = OLAP_SUCCESS; + res = _tablet->modify_rowsets(old_olap_indices, unused_rowsets); if (res != OLAP_SUCCESS) { - LOG(FATAL) << "failed to replace data sources. [table=" << _table->full_name() << "]"; + LOG(FATAL) << "failed to replace data sources. [tablet=" << _tablet->full_name() << "]"; return res; } - res = _table->save_header(); + res = _tablet->save_meta(); if (res != OLAP_SUCCESS) { - LOG(FATAL) << "failed to save header. [table=" << _table->full_name() << "]"; + LOG(FATAL) << "failed to save header. [tablet=" << _tablet->full_name() << "]"; return res; } - return res; + return res; } } // namespace doris diff --git a/be/src/olap/cumulative_compaction.h b/be/src/olap/cumulative_compaction.h index c9a923e017185f..646deacb6b6f1e 100755 --- a/be/src/olap/cumulative_compaction.h +++ b/be/src/olap/cumulative_compaction.h @@ -24,23 +24,24 @@ #include #include "olap/merger.h" -#include "olap/column_data.h" #include "olap/olap_define.h" -#include "olap/olap_table.h" +#include "olap/tablet.h" +#include "olap/rowset/rowset_id_generator.h" +#include "olap/rowset/alpha_rowset_writer.h" namespace doris { -class SegmentGroup; +class Rowset; class CumulativeCompaction { public: CumulativeCompaction() : _is_init(false), - _header_locked(false), _old_cumulative_layer_point(0), _new_cumulative_layer_point(0), _max_delta_file_size(0), - _new_segment_group(NULL) {} + _rowset(nullptr), + _rs_writer(nullptr) {} ~CumulativeCompaction() {} @@ -49,12 +50,12 @@ class CumulativeCompaction { // - 计算可合并的delta文件 // // 输入参数: - // - table 待执行cumulative compaction的olap table + // - tablet 待执行cumulative compaction的tablet // // 返回值: // - 如果触发cumulative compaction,返回OLAP_SUCCESS // - 否则,返回对应错误码 - OLAPStatus init(OLAPTablePtr table); + OLAPStatus init(TabletSharedPtr tablet); // 执行cumulative compaction // @@ -102,21 +103,21 @@ class CumulativeCompaction { // - 如果不成功,返回相应错误码 OLAPStatus _do_cumulative_compaction(); - // 将合并得到的新cumulative文件载入table + // 将合并得到的新cumulative文件载入tablet // // 输出参数: - // - unused_indices: 返回不再使用的delta文件对应的olap index + // - unused_rowsets: 返回不再使用的delta文件对应的olap index // // 返回值: // - 如果成功,返回OLAP_SUCCESS // - 如果不成功,返回相应错误码 - OLAPStatus _update_header(std::vector* unused_indices); + OLAPStatus _update_header(const std::vector& unused_rowsets); // 删除不再使用的delta文件 // // 输入输出参数 - // - unused_indices: 待删除的不再使用的delta文件对应的olap index - void _delete_unused_delta_files(std::vector* unused_indices); + // - unused_rowsets: 待删除的不再使用的delta文件对应的olap index + void _delete_unused_rowsets(std::vector* unused_rowsets); // 验证得到的m_need_merged_versions是否正确 // @@ -133,46 +134,29 @@ class CumulativeCompaction { OLAPStatus _validate_delete_file_action(); // 恢复header头文件的文件版本和table的data source - OLAPStatus _roll_back(const std::vector& old_olap_indices); - - void _obtain_header_rdlock() { - _table->obtain_header_rdlock(); - _header_locked = true; - } - - void _obtain_header_wrlock() { - _table->obtain_header_wrlock(); - _header_locked = true; - } - - void _release_header_lock() { - if (_header_locked) { - _table->release_header_lock(); - _header_locked = false; - } - } + OLAPStatus _roll_back(std::vector& old_olap_indices); // CumulativeCompaction对象是否初始化 bool _is_init; - // header文件是否加锁 - bool _header_locked; // table现有的cumulative层的标识点 - int32_t _old_cumulative_layer_point; + int64_t _old_cumulative_layer_point; // 待cumulative compaction完成之后,新的cumulative层的标识点 - int32_t _new_cumulative_layer_point; + int64_t _new_cumulative_layer_point; // 一个cumulative文件大小的最大值 // 当delta文件的大小超过该值时,我们认为该delta文件是cumulative文件 size_t _max_delta_file_size; - // 待执行cumulative compaction的olap table - OLAPTablePtr _table; + // 待执行cumulative compaction的tablet + TabletSharedPtr _tablet; // 新cumulative文件的版本 Version _cumulative_version; // 新cumulative文件的version hash VersionHash _cumulative_version_hash; // 新cumulative文件对应的olap index - SegmentGroup* _new_segment_group; + RowsetSharedPtr _rowset; + RowsetWriterSharedPtr _rs_writer; // 可合并的delta文件的data文件 - std::vector _data_source; + std::vector _rowsets; + std::vector _rs_readers; // 可合并的delta文件的版本 std::vector _need_merged_versions; diff --git a/be/src/olap/data_dir.cpp b/be/src/olap/data_dir.cpp new file mode 100755 index 00000000000000..ec94e13cb09451 --- /dev/null +++ b/be/src/olap/data_dir.cpp @@ -0,0 +1,1060 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +#include "olap/data_dir.h" + +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include + +#include +#include +#include +#include +#include +#include + +#include "olap/file_helper.h" +#include "olap/olap_define.h" +#include "olap/olap_snapshot_converter.h" +#include "olap/utils.h" // for check_dir_existed +#include "service/backend_options.h" +#include "util/file_utils.h" +#include "util/string_util.h" +#include "olap/tablet_meta_manager.h" +#include "olap/rowset/rowset_meta_manager.h" +#include "olap/rowset/alpha_rowset_meta.h" +#include "olap/rowset/alpha_rowset.h" +#include "olap/rowset_factory.h" + +namespace doris { + +static const char* const kMtabPath = "/etc/mtab"; +static const char* const kTestFilePath = "/.testfile"; + +DataDir::DataDir(const std::string& path, int64_t capacity_bytes, + TabletManager* tablet_manager, TxnManager* txn_manager) + : _path(path), + _capacity_bytes(capacity_bytes), + _tablet_manager(tablet_manager), + _txn_manager(txn_manager), + _cluster_id(-1), + _available_bytes(0), + _used_bytes(0), + _current_shard(0), + _is_used(false), + _to_be_deleted(false), + _test_file_read_buf(nullptr), + _test_file_write_buf(nullptr), + _meta(nullptr) { +} + +DataDir::~DataDir() { + free(_test_file_read_buf); + free(_test_file_write_buf); + delete _id_generator; + delete _meta; +} + +Status DataDir::init() { + _rand_seed = static_cast(time(NULL)); + if (posix_memalign((void**)&_test_file_write_buf, + DIRECT_IO_ALIGNMENT, + TEST_FILE_BUF_SIZE) != 0) { + LOG(WARNING) << "fail to allocate memory. size=" << TEST_FILE_BUF_SIZE; + return Status::InternalError("No memory"); + } + if (posix_memalign((void**)&_test_file_read_buf, + DIRECT_IO_ALIGNMENT, + TEST_FILE_BUF_SIZE) != 0) { + LOG(WARNING) << "fail to allocate memory. size=" << TEST_FILE_BUF_SIZE; + return Status::InternalError("No memory"); + } + RETURN_IF_ERROR(_check_path_exist()); + std::string align_tag_path = _path + ALIGN_TAG_PREFIX; + if (access(align_tag_path.c_str(), F_OK) == 0) { + LOG(WARNING) << "align tag was found, path=" << _path; + return Status::InternalError("invalid root path: "); + } + + RETURN_IF_ERROR(_init_cluster_id()); + RETURN_IF_ERROR(_init_extension_and_capacity()); + RETURN_IF_ERROR(_init_file_system()); + RETURN_IF_ERROR(_init_meta()); + + _id_generator = new RowsetIdGenerator(_meta); + auto res = _id_generator->init(); + if (res != OLAP_SUCCESS) { + return Status::InternalError("Id generator initialized failed."); + } + + _is_used = true; + return Status::OK(); +} + +Status DataDir::_check_path_exist() { + DIR* dirp = opendir(_path.c_str()); + if (dirp == nullptr) { + char buf[64]; + LOG(WARNING) << "opendir failed, path=" << _path + << ", errno=" << errno << ", errmsg=" << strerror_r(errno, buf, 64); + return Status::InternalError("opendir failed"); + } + struct dirent dirent; + struct dirent* result = nullptr; + if (readdir_r(dirp, &dirent, &result) != 0) { + char buf[64]; + LOG(WARNING) << "readdir failed, path=" << _path + << ", errno=" << errno << ", errmsg=" << strerror_r(errno, buf, 64); + closedir(dirp); + return Status::InternalError("readdir failed"); + } + // opendir and closedir should be called both or not. + closedir(dirp); + return Status::OK(); +} + +Status DataDir::_init_cluster_id() { + std::string cluster_id_path = _path + CLUSTER_ID_PREFIX; + if (access(cluster_id_path.c_str(), F_OK) != 0) { + int fd = open(cluster_id_path.c_str(), O_RDWR | O_CREAT, S_IRUSR | S_IWUSR | S_IRGRP | S_IWGRP); + if (fd < 0 || close(fd) < 0) { + char errmsg[64]; + LOG(WARNING) << "fail to create file. [path='" << cluster_id_path + << "' err='" << strerror_r(errno, errmsg, 64) << "']"; + return Status::InternalError("invalid store path: create cluster id failed"); + } + } + + // obtain lock of all cluster id paths + FILE* fp = NULL; + fp = fopen(cluster_id_path.c_str(), "r+b"); + if (fp == NULL) { + LOG(WARNING) << "fail to open cluster id path. path=" << cluster_id_path; + return Status::InternalError("invalid store path: open cluster id failed"); + } + + int lock_res = flock(fp->_fileno, LOCK_EX | LOCK_NB); + if (lock_res < 0) { + LOG(WARNING) << "fail to lock file descriptor. path=" << cluster_id_path; + fclose(fp); + fp = NULL; + return Status::InternalError("invalid store path: flock cluster id failed"); + } + + // obtain cluster id of all root paths + auto st = _read_cluster_id(cluster_id_path, &_cluster_id); + fclose(fp); + return st; +} + +Status DataDir::_read_cluster_id(const std::string& path, int32_t* cluster_id) { + int32_t tmp_cluster_id = -1; + + std::fstream fs(path.c_str(), std::fstream::in); + if (!fs.is_open()) { + LOG(WARNING) << "fail to open cluster id path. [path='" << path << "']"; + return Status::InternalError("open file failed"); + } + + fs >> tmp_cluster_id; + fs.close(); + + if (tmp_cluster_id == -1 && (fs.rdstate() & std::fstream::eofbit) != 0) { + *cluster_id = -1; + } else if (tmp_cluster_id >= 0 && (fs.rdstate() & std::fstream::eofbit) != 0) { + *cluster_id = tmp_cluster_id; + } else { + OLAP_LOG_WARNING("fail to read cluster id from file. " + "[id=%d eofbit=%d failbit=%d badbit=%d]", + tmp_cluster_id, + fs.rdstate() & std::fstream::eofbit, + fs.rdstate() & std::fstream::failbit, + fs.rdstate() & std::fstream::badbit); + return Status::InternalError("cluster id file corrupt"); + } + return Status::OK(); +} + +Status DataDir::_init_extension_and_capacity() { + boost::filesystem::path boost_path = _path; + std::string extension = boost::filesystem::canonical(boost_path).extension().string(); + if (extension != "") { + if (boost::iequals(extension, ".ssd")) { + _storage_medium = TStorageMedium::SSD; + } else if (boost::iequals(extension, ".hdd")) { + _storage_medium = TStorageMedium::HDD; + } else { + LOG(WARNING) << "store path has wrong extension. path=" << _path; + return Status::InternalError("invalid sotre path: invalid extension"); + } + } else { + _storage_medium = TStorageMedium::HDD; + } + + int64_t disk_capacity = boost::filesystem::space(boost_path).capacity; + if (_capacity_bytes == -1) { + _capacity_bytes = disk_capacity; + } else if (_capacity_bytes > disk_capacity) { + LOG(WARNING) << "root path capacity should not larger than disk capacity. " + << "path=" << _path + << ", capacity_bytes=" << _capacity_bytes + << ", disk_capacity=" << disk_capacity; + return Status::InternalError("invalid store path: invalid capacity"); + } + + std::string data_path = _path + DATA_PREFIX; + if (!check_dir_existed(data_path) && create_dir(data_path) != OLAP_SUCCESS) { + LOG(WARNING) << "failed to create data root path. path=" << data_path; + return Status::InternalError("invalid store path: failed to create data directory"); + } + + return Status::OK(); +} + +Status DataDir::_init_file_system() { + struct stat s; + if (stat(_path.c_str(), &s) != 0) { + char errmsg[64]; + LOG(WARNING) << "stat failed, path=" << _path + << ", errno=" << errno << ", errmsg=" << strerror_r(errno, errmsg, 64); + return Status::InternalError("invalid store path: stat failed"); + } + + dev_t mount_device; + if ((s.st_mode & S_IFMT) == S_IFBLK) { + mount_device = s.st_rdev; + } else { + mount_device = s.st_dev; + } + + FILE* mount_tablet = nullptr; + if ((mount_tablet = setmntent(kMtabPath, "r")) == NULL) { + char errmsg[64]; + LOG(WARNING) << "setmntent failed, path=" << kMtabPath + << ", errno=" << errno << ", errmsg=" << strerror_r(errno, errmsg, 64); + return Status::InternalError("invalid store path: setmntent failed"); + } + + bool is_find = false; + struct mntent* mount_entry = NULL; + while ((mount_entry = getmntent(mount_tablet)) != NULL) { + if (strcmp(_path.c_str(), mount_entry->mnt_dir) == 0 + || strcmp(_path.c_str(), mount_entry->mnt_fsname) == 0) { + is_find = true; + break; + } + + if (stat(mount_entry->mnt_fsname, &s) == 0 && s.st_rdev == mount_device) { + is_find = true; + break; + } + + if (stat(mount_entry->mnt_dir, &s) == 0 && s.st_dev == mount_device) { + is_find = true; + break; + } + } + + endmntent(mount_tablet); + + if (!is_find) { + LOG(WARNING) << "fail to find file system, path=" << _path; + return Status::InternalError("invalid store path: find file system failed"); + } + + _file_system = mount_entry->mnt_fsname; + + return Status::OK(); +} + +Status DataDir::_init_meta() { + // init path hash + _path_hash = hash_of_path(BackendOptions::get_localhost(), _path); + LOG(INFO) << "path: " << _path << ", hash: " << _path_hash; + + // init meta + _meta = new(std::nothrow) OlapMeta(_path); + if (_meta == nullptr) { + LOG(WARNING) << "new olap meta failed"; + return Status::InternalError("new olap meta failed"); + } + OLAPStatus res = _meta->init(); + if (res != OLAP_SUCCESS) { + LOG(WARNING) << "init meta failed"; + return Status::InternalError("init meta failed"); + } + return Status::OK(); +} + +Status DataDir::set_cluster_id(int32_t cluster_id) { + if (_cluster_id != -1) { + if (_cluster_id == cluster_id) { + return Status::OK(); + } + LOG(ERROR) << "going to set cluster id to already assigned store, cluster_id=" + << _cluster_id << ", new_cluster_id=" << cluster_id; + return Status::InternalError("going to set cluster id to already assigned store"); + } + return _write_cluster_id_to_path(_cluster_id_path(), cluster_id); +} + +Status DataDir::_write_cluster_id_to_path(const std::string& path, int32_t cluster_id) { + std::fstream fs(path.c_str(), std::fstream::out); + if (!fs.is_open()) { + LOG(WARNING) << "fail to open cluster id path. path=" << path; + return Status::InternalError("IO Error"); + } + fs << cluster_id; + fs.close(); + return Status::OK(); +} + +void DataDir::health_check() { + // check disk + if (_is_used) { + OLAPStatus res = OLAP_SUCCESS; + if ((res = _read_and_write_test_file()) != OLAP_SUCCESS) { + LOG(WARNING) << "store read/write test file occur IO Error. path=" << _path; + if (is_io_error(res)) { + _is_used = false; + } + } + } +} + +OLAPStatus DataDir::_read_and_write_test_file() { + std::string test_file = _path + kTestFilePath; + + if (access(test_file.c_str(), F_OK) == 0) { + if (remove(test_file.c_str()) != 0) { + char errmsg[64]; + LOG(WARNING) << "fail to delete test file. " + << "path=" << test_file + << ", errno=" << errno << ", err=" << strerror_r(errno, errmsg, 64); + return OLAP_ERR_IO_ERROR; + } + } else { + if (errno != ENOENT) { + char errmsg[64]; + LOG(WARNING) << "fail to access test file. " + << "path=" << test_file + << ", errno=" << errno << ", err=" << strerror_r(errno, errmsg, 64); + return OLAP_ERR_IO_ERROR; + } + } + + OLAPStatus res = OLAP_SUCCESS; + FileHandler file_handler; + if ((res = file_handler.open_with_mode(test_file.c_str(), + O_RDWR | O_CREAT | O_DIRECT, + S_IRUSR | S_IWUSR)) != OLAP_SUCCESS) { + LOG(WARNING) << "fail to create test file. path=" << test_file; + return res; + } + + for (size_t i = 0; i < TEST_FILE_BUF_SIZE; ++i) { + int32_t tmp_value = rand_r(&_rand_seed); + _test_file_write_buf[i] = static_cast(tmp_value); + } + + if ((res = file_handler.pwrite(_test_file_write_buf, TEST_FILE_BUF_SIZE, SEEK_SET)) != OLAP_SUCCESS) { + LOG(WARNING) << "fail to write test file. [file_name=" << test_file << "]"; + return res; + } + + if ((res = file_handler.pread(_test_file_read_buf, TEST_FILE_BUF_SIZE, SEEK_SET)) != OLAP_SUCCESS) { + LOG(WARNING) << "fail to read test file. [file_name=" << test_file << "]"; + return res; + } + + if (memcmp(_test_file_write_buf, _test_file_read_buf, TEST_FILE_BUF_SIZE) != 0) { + OLAP_LOG_WARNING("the test file write_buf and read_buf not equal."); + return OLAP_ERR_TEST_FILE_ERROR; + } + + if ((res = file_handler.close()) != OLAP_SUCCESS) { + LOG(WARNING) << "fail to close test file. [file_name=" << test_file << "]"; + return res; + } + + if (remove(test_file.c_str()) != 0) { + char errmsg[64]; + VLOG(3) << "fail to delete test file. [err='" << strerror_r(errno, errmsg, 64) + << "' path='" << test_file << "']"; + return OLAP_ERR_IO_ERROR; + } + + return res; +} + +OLAPStatus DataDir::get_shard(uint64_t* shard) { + OLAPStatus res = OLAP_SUCCESS; + std::lock_guard l(_mutex); + + std::stringstream shard_path_stream; + uint32_t next_shard = _current_shard; + _current_shard = (_current_shard + 1) % MAX_SHARD_NUM; + shard_path_stream << _path << DATA_PREFIX << "/" << next_shard; + std::string shard_path = shard_path_stream.str(); + if (!check_dir_existed(shard_path)) { + res = create_dir(shard_path); + if (res != OLAP_SUCCESS) { + LOG(WARNING) << "fail to create path. [path='" << shard_path << "']"; + return res; + } + } + + *shard = next_shard; + return OLAP_SUCCESS; +} + +OLAPStatus DataDir::register_tablet(Tablet* tablet) { + std::lock_guard l(_mutex); + + TabletInfo tablet_info(tablet->tablet_id(), tablet->schema_hash(), tablet->tablet_uid()); + _tablet_set.insert(tablet_info); + return OLAP_SUCCESS; +} + +OLAPStatus DataDir::deregister_tablet(Tablet* tablet) { + std::lock_guard l(_mutex); + + TabletInfo tablet_info(tablet->tablet_id(), tablet->schema_hash(), tablet->tablet_uid()); + _tablet_set.erase(tablet_info); + return OLAP_SUCCESS; +} + +void DataDir::clear_tablets(std::vector* tablet_infos) { + for (auto& tablet : _tablet_set) { + tablet_infos->push_back(tablet); + } + _tablet_set.clear(); +} + +std::string DataDir::get_absolute_shard_path(const std::string& shard_string) { + return _path + DATA_PREFIX + "/" + shard_string; +} + +std::string DataDir::get_absolute_tablet_path(TabletMeta* tablet_meta, bool with_schema_hash) { + if (with_schema_hash) { + return _path + DATA_PREFIX + "/" + std::to_string(tablet_meta->shard_id()) + + "/" + std::to_string(tablet_meta->tablet_id()) + "/" + std::to_string(tablet_meta->schema_hash()); + + } else { + return _path + DATA_PREFIX + "/" + std::to_string(tablet_meta->shard_id()) + + "/" + std::to_string(tablet_meta->tablet_id()); + } +} + +std::string DataDir::get_absolute_tablet_path(TabletMetaPB* tablet_meta, bool with_schema_hash) { + if (with_schema_hash) { + return _path + DATA_PREFIX + "/" + std::to_string(tablet_meta->shard_id()) + + "/" + std::to_string(tablet_meta->tablet_id()) + + "/" + std::to_string(tablet_meta->schema_hash()); + + } else { + return _path + DATA_PREFIX + "/" + std::to_string(tablet_meta->shard_id()) + + "/" + std::to_string(tablet_meta->tablet_id()); + } +} + +std::string DataDir::get_absolute_tablet_path(OLAPHeaderMessage& olap_header_msg, bool with_schema_hash) { + if (with_schema_hash) { + return _path + DATA_PREFIX + "/" + std::to_string(olap_header_msg.shard()) + + "/" + std::to_string(olap_header_msg.tablet_id()) + "/" + std::to_string(olap_header_msg.schema_hash()); + + } else { + return _path + DATA_PREFIX + "/" + std::to_string(olap_header_msg.shard()) + + "/" + std::to_string(olap_header_msg.tablet_id()); + } +} + +void DataDir::find_tablet_in_trash(int64_t tablet_id, std::vector* paths) { + // path: /root_path/trash/time_label/tablet_id/schema_hash + std::string trash_path = _path + TRASH_PREFIX; + std::vector sub_dirs; + FileUtils::scan_dir(trash_path, &sub_dirs); + for (auto& sub_dir : sub_dirs) { + // sub dir is time_label + std::string sub_path = trash_path + "/" + sub_dir; + if (!FileUtils::is_dir(sub_path)) { + continue; + } + std::string tablet_path = sub_path + "/" + std::to_string(tablet_id); + bool exist = FileUtils::check_exist(tablet_path); + if (exist) { + paths->emplace_back(std::move(tablet_path)); + } + } +} + +std::string DataDir::get_root_path_from_schema_hash_path_in_trash( + const std::string& schema_hash_dir_in_trash) { + boost::filesystem::path schema_hash_path_in_trash(schema_hash_dir_in_trash); + return schema_hash_path_in_trash.parent_path().parent_path().parent_path().parent_path().string(); +} + +OLAPStatus DataDir::_clean_unfinished_converting_data() { + auto clean_unifinished_tablet_meta_func = [this](int64_t tablet_id, + int32_t schema_hash, const std::string& value) -> bool { + TabletMetaManager::remove(this, tablet_id, schema_hash, HEADER_PREFIX); + LOG(INFO) << "successfully clean temp tablet meta for tablet=" + << tablet_id << "." << schema_hash + << "from data dir: " << _path; + return true; + }; + OLAPStatus clean_unfinished_meta_status = TabletMetaManager::traverse_headers(_meta, + clean_unifinished_tablet_meta_func, HEADER_PREFIX); + if (clean_unfinished_meta_status != OLAP_SUCCESS) { + // If failed to clean meta just skip the error, there will be useless metas in rocksdb column family + LOG(WARNING) << "there is failure when clean temp tablet meta from data dir=" << _path; + } else { + LOG(INFO) << "successfully clean temp tablet meta from data dir=" << _path; + } + auto clean_unifinished_rowset_meta_func = [this](TabletUid tablet_uid, RowsetId rowset_id, const std::string& value) -> bool { + RowsetMetaManager::remove(_meta, tablet_uid, rowset_id); + LOG(INFO) << "successfully clean temp rowset meta for rowset_id=" + << rowset_id << " from data dir=" << _path; + return true; + }; + OLAPStatus clean_unfinished_rowset_meta_status = RowsetMetaManager::traverse_rowset_metas(_meta, + clean_unifinished_rowset_meta_func); + if (clean_unfinished_rowset_meta_status != OLAP_SUCCESS) { + // If failed to clean meta just skip the error, there will be useless metas in rocksdb column family + LOG(FATAL) << "fail to clean temp rowset meta from data dir=" << _path; + } else { + LOG(INFO) << "success to clean temp rowset meta from data dir=" << _path; + } + return OLAP_SUCCESS; +} + +// convert old tablet and its files to new tablet meta and rowset format +// if any error occurred during converting, stop it and break. +OLAPStatus DataDir::_convert_old_tablet() { + auto convert_tablet_func = [this](int64_t tablet_id, + int32_t schema_hash, const std::string& value) -> bool { + OlapSnapshotConverter converter; + // convert olap header and files + OLAPHeaderMessage olap_header_msg; + TabletMetaPB tablet_meta_pb; + vector pending_rowsets; + bool parsed = olap_header_msg.ParseFromString(value); + if (!parsed) { + LOG(FATAL) << "convert olap header to tablet meta failed when load olap header tablet=" + << tablet_id << "." << schema_hash; + return false; + } + string old_data_path_prefix = get_absolute_tablet_path(olap_header_msg, true); + OLAPStatus status = converter.to_new_snapshot(olap_header_msg, old_data_path_prefix, + old_data_path_prefix, *this, &tablet_meta_pb, &pending_rowsets, true); + if (status != OLAP_SUCCESS) { + LOG(FATAL) << "convert olap header to tablet meta failed when convert header and files tablet=" + << tablet_id << "." << schema_hash; + return false; + } + + // write pending rowset to olap meta + for (auto& rowset_pb : pending_rowsets) { + string meta_binary; + rowset_pb.SerializeToString(&meta_binary); + status = RowsetMetaManager::save(_meta, rowset_pb.tablet_uid(), rowset_pb.rowset_id() , meta_binary); + if (status != OLAP_SUCCESS) { + LOG(FATAL) << "convert olap header to tablet meta failed when save rowset meta tablet=" + << tablet_id << "." << schema_hash; + return false; + } + } + + // write converted tablet meta to olap meta + string meta_binary; + tablet_meta_pb.SerializeToString(&meta_binary); + status = TabletMetaManager::save(this, tablet_meta_pb.tablet_id(), tablet_meta_pb.schema_hash(), meta_binary); + if (status != OLAP_SUCCESS) { + LOG(FATAL) << "convert olap header to tablet meta failed when save tablet meta tablet=" + << tablet_id << "." << schema_hash; + return false; + } else { + LOG(INFO) << "convert olap header to tablet meta successfully and save tablet meta to meta tablet=" + << tablet_id << "." << schema_hash; + } + return true; + }; + OLAPStatus convert_tablet_status = TabletMetaManager::traverse_headers(_meta, + convert_tablet_func, OLD_HEADER_PREFIX); + if (convert_tablet_status != OLAP_SUCCESS) { + LOG(FATAL) << "there is failure when convert old tablet, data dir:" << _path; + return convert_tablet_status; + } else { + LOG(INFO) << "successfully convert old tablet, data dir: " << _path; + } + return OLAP_SUCCESS; +} + +OLAPStatus DataDir::remove_old_meta_and_files() { + // clean old meta(olap header message) + auto clean_old_meta_files_func = [this](int64_t tablet_id, + int32_t schema_hash, const std::string& value) -> bool { + // convert olap header and files + OLAPHeaderMessage olap_header_msg; + TabletMetaPB tablet_meta_pb; + vector pending_rowsets; + bool parsed = olap_header_msg.ParseFromString(value); + if (!parsed) { + LOG(FATAL) << "convert olap header to tablet meta failed when load olap header tablet=" + << tablet_id << "." << schema_hash; + return true; + } + OlapSnapshotConverter converter; + OLAPStatus status = converter.to_tablet_meta_pb(olap_header_msg, &tablet_meta_pb, &pending_rowsets); + if (status != OLAP_SUCCESS) { + LOG(FATAL) << "convert olap header to tablet meta failed when convert header and files tablet=" + << tablet_id << "." << schema_hash; + return true; + } + + TabletSchema tablet_schema; + tablet_schema.init_from_pb(tablet_meta_pb.schema()); + string data_path_prefix = get_absolute_tablet_path(&tablet_meta_pb, true); + + // convert visible pdelta file to rowsets and remove old files + for (auto& visible_rowset : tablet_meta_pb.rs_metas()) { + RowsetMetaSharedPtr alpha_rowset_meta(new AlphaRowsetMeta()); + alpha_rowset_meta->init_from_pb(visible_rowset); + AlphaRowset rowset(&tablet_schema, data_path_prefix, this, alpha_rowset_meta); + if (rowset.init() != OLAP_SUCCESS) { + LOG(INFO) << "errors while init rowset. tablet_path=" << data_path_prefix; + return true; + } + std::vector old_files; + if (rowset.remove_old_files(&old_files) != OLAP_SUCCESS) { + LOG(INFO) << "errors while remove_old_files. tablet_path=" << data_path_prefix; + return true; + } + } + + // remove incremental dir and pending dir + std::string pending_delta_path = data_path_prefix + PENDING_DELTA_PREFIX; + if (check_dir_existed(pending_delta_path)) { + LOG(INFO) << "remove pending delta path:" << pending_delta_path; + if(remove_all_dir(pending_delta_path) != OLAP_SUCCESS) { + LOG(INFO) << "errors while remove pending delta path. tablet_path=" << data_path_prefix; + return true; + } + } + + std::string incremental_delta_path = data_path_prefix + INCREMENTAL_DELTA_PREFIX; + if (check_dir_existed(incremental_delta_path)) { + LOG(INFO) << "remove incremental delta path:" << incremental_delta_path; + if(remove_all_dir(incremental_delta_path) != OLAP_SUCCESS) { + LOG(INFO) << "errors while remove incremental delta path. tablet_path=" << data_path_prefix; + return true; + } + } + + TabletMetaManager::remove(this, tablet_id, schema_hash, OLD_HEADER_PREFIX); + LOG(INFO) << "successfully clean old tablet meta(olap header) for tablet=" + << tablet_id << "." << schema_hash + << " tablet_path=" << data_path_prefix; + + return true; + }; + OLAPStatus clean_old_meta_files_status = TabletMetaManager::traverse_headers(_meta, + clean_old_meta_files_func, OLD_HEADER_PREFIX); + if (clean_old_meta_files_status != OLAP_SUCCESS) { + // If failed to clean meta just skip the error, there will be useless metas in rocksdb column family + LOG(WARNING) << "there is failure when clean old tablet meta(olap header) from data dir:" << _path; + } else { + LOG(INFO) << "successfully clean old tablet meta(olap header) from data dir: " << _path; + } + return OLAP_SUCCESS; +} + +bool DataDir::convert_old_data_success() { + return _convert_old_data_success; +} + +OLAPStatus DataDir::set_convert_finished() { + OLAPStatus res = _meta->set_tablet_convert_finished(); + if (res != OLAP_SUCCESS) { + LOG(FATAL) << "save convert flag failed after convert old tablet. dir=" << _path; + return res; + } + return OLAP_SUCCESS; +} + +// TODO(ygl): deal with rowsets and tablets when load failed +OLAPStatus DataDir::load() { + // check if this is an old data path + bool is_tablet_convert_finished = false; + OLAPStatus res = _meta->get_tablet_convert_finished(is_tablet_convert_finished); + if (res != OLAP_SUCCESS) { + LOG(WARNING) << "get convert flag from meta failed dir=" << _path; + return res; + } + _convert_old_data_success = false; + if (!is_tablet_convert_finished) { + _clean_unfinished_converting_data(); + res = _convert_old_tablet(); + if (res != OLAP_SUCCESS) { + LOG(FATAL) << "convert old tablet failed for dir = " << _path; + return res; + } + + _convert_old_data_success = true; + } else { + LOG(INFO) << "tablets have been converted, skip convert process"; + _convert_old_data_success = true; + } + + LOG(INFO) << "start to load tablets from " << _path; + // load rowset meta from meta env and create rowset + // COMMITTED: add to txn manager + // VISIBLE: add to tablet + // if one rowset load failed, then the total data dir will not be loaded + std::vector dir_rowset_metas; + LOG(INFO) << "begin loading rowset from meta"; + auto load_rowset_func = [this, &dir_rowset_metas](TabletUid tablet_uid, RowsetId rowset_id, + const std::string& meta_str) -> bool { + + RowsetMetaSharedPtr rowset_meta(new AlphaRowsetMeta()); + bool parsed = rowset_meta->init(meta_str); + if (!parsed) { + LOG(WARNING) << "parse rowset meta string failed for rowset_id:" << rowset_id; + // return false will break meta iterator, return true to skip this error + return true; + } + dir_rowset_metas.push_back(rowset_meta); + return true; + }; + OLAPStatus load_rowset_status = RowsetMetaManager::traverse_rowset_metas(_meta, load_rowset_func); + + if (load_rowset_status != OLAP_SUCCESS) { + LOG(WARNING) << "errors when load rowset meta from meta env, skip this data dir:" << _path; + } else { + LOG(INFO) << "load rowset from meta finished, data dir: " << _path; + } + + // load tablet + // create tablet from tablet meta and add it to tablet mgr + LOG(INFO) << "begin loading tablet from meta"; + std::set tablet_ids; + auto load_tablet_func = [this, &tablet_ids](int64_t tablet_id, + int32_t schema_hash, const std::string& value) -> bool { + OLAPStatus status = _tablet_manager->load_tablet_from_meta( + this, tablet_id, schema_hash, value, false, false); + if (status != OLAP_SUCCESS) { + LOG(WARNING) << "load tablet from header failed. status:" << status + << ", tablet=" << tablet_id << "." << schema_hash; + } else { + tablet_ids.insert(tablet_id); + } + return true; + }; + OLAPStatus load_tablet_status = TabletMetaManager::traverse_headers(_meta, load_tablet_func); + if (load_tablet_status != OLAP_SUCCESS) { + LOG(WARNING) << "there is failure when loading tablet headers, path:" << _path; + } else { + LOG(INFO) << "load rowset from meta finished, data dir: " << _path; + } + + // tranverse rowset + // 1. add committed rowset to txn map + // 2. add visible rowset to tablet + // ignore any errors when load tablet or rowset, because fe will repair them after report + for (auto rowset_meta : dir_rowset_metas) { + TabletSharedPtr tablet = _tablet_manager->get_tablet( + rowset_meta->tablet_id(), rowset_meta->tablet_schema_hash()); + // tablet maybe dropped, but not drop related rowset meta + if (tablet == nullptr) { + LOG(WARNING) << "could not find tablet id: " << rowset_meta->tablet_id() + << ", schema hash: " << rowset_meta->tablet_schema_hash() + << ", for rowset: " << rowset_meta->rowset_id() + << ", skip this rowset"; + continue; + } + RowsetSharedPtr rowset; + OLAPStatus create_status = RowsetFactory::load_rowset(tablet->tablet_schema(), + tablet->tablet_path(), + tablet->data_dir(), + rowset_meta, &rowset); + if (create_status != OLAP_SUCCESS) { + LOG(WARNING) << "could not create rowset from rowsetmeta: " + << " rowset_id: " << rowset_meta->rowset_id() + << " rowset_type: " << rowset_meta->rowset_type() + << " rowset_state: " << rowset_meta->rowset_state(); + continue; + } + if (rowset_meta->rowset_state() == RowsetStatePB::COMMITTED + && rowset_meta->tablet_uid() == tablet->tablet_uid()) { + OLAPStatus commit_txn_status = _txn_manager->commit_txn( + _meta, + rowset_meta->partition_id(), rowset_meta->txn_id(), + rowset_meta->tablet_id(), rowset_meta->tablet_schema_hash(), + rowset_meta->tablet_uid(), rowset_meta->load_id(), rowset, true); + if (commit_txn_status != OLAP_SUCCESS && commit_txn_status != OLAP_ERR_PUSH_TRANSACTION_ALREADY_EXIST) { + LOG(WARNING) << "failed to add committed rowset: " << rowset_meta->rowset_id() + << " to tablet: " << rowset_meta->tablet_id() + << " for txn: " << rowset_meta->txn_id(); + } else { + LOG(INFO) << "successfully to add committed rowset: " << rowset_meta->rowset_id() + << " to tablet: " << rowset_meta->tablet_id() + << " schema hash: " << rowset_meta->tablet_schema_hash() + << " for txn: " << rowset_meta->txn_id(); + } + } else if (rowset_meta->rowset_state() == RowsetStatePB::VISIBLE + && rowset_meta->tablet_uid() == tablet->tablet_uid()) { + // add visible rowset to tablet, it maybe use in the future + // there should be only preparing rowset in meta env because visible + // rowset is persist with tablet meta currently + OLAPStatus publish_status = tablet->add_inc_rowset(rowset); + if (publish_status != OLAP_SUCCESS && publish_status != OLAP_ERR_PUSH_VERSION_ALREADY_EXIST) { + LOG(WARNING) << "add visilbe rowset to tablet failed rowset_id:" << rowset->rowset_id() + << " tablet id: " << rowset_meta->tablet_id() + << " txn id:" << rowset_meta->txn_id() + << " start_version: " << rowset_meta->version().first + << " end_version: " << rowset_meta->version().second; + } else { + // it is added into tablet meta, then remove it from meta + RowsetMetaManager::remove(tablet->data_dir()->get_meta(), rowset_meta->tablet_uid(), rowset->rowset_id()); + LOG(INFO) << "successfully to add visible rowset: " << rowset_meta->rowset_id() + << " to tablet: " << rowset_meta->tablet_id() + << " txn id:" << rowset_meta->txn_id() + << " start_version: " << rowset_meta->version().first + << " end_version: " << rowset_meta->version().second; + } + } else { + LOG(WARNING) << "find invalid rowset: " << rowset_meta->rowset_id() + << " with tablet id: " << rowset_meta->tablet_id() + << " tablet uid: " << rowset_meta->tablet_uid() + << " schema hash: " << rowset_meta->tablet_schema_hash() + << " txn: " << rowset_meta->txn_id() + << " current valid tablet uid: " << tablet->tablet_uid(); + } + } + return OLAP_SUCCESS; +} + +void DataDir::add_pending_ids(const std::string& id) { + WriteLock wr_lock(&_pending_path_mutex); + _pending_path_ids.insert(id); +} + +void DataDir::remove_pending_ids(const std::string& id) { + WriteLock wr_lock(&_pending_path_mutex); + _pending_path_ids.erase(id); +} + +// path consumer +void DataDir::perform_path_gc() { + // init the set of valid path + // validate the path in data dir + std::unique_lock lck(_check_path_mutex); + cv.wait(lck, [this]{return _all_check_paths.size() > 0;}); + LOG(INFO) << "start to path gc."; + int counter = 0; + for (auto& path : _all_check_paths) { + ++counter; + if (config::path_gc_check_step > 0 && counter % config::path_gc_check_step == 0) { + usleep(config::path_gc_check_step_interval_ms * 1000); + } + TTabletId tablet_id = -1; + TSchemaHash schema_hash = -1; + bool is_valid = _tablet_manager->get_tablet_id_and_schema_hash_from_path(path, + &tablet_id, &schema_hash); + if (!is_valid) { + LOG(WARNING) << "unknown path:" << path; + continue; + } + if (tablet_id > 0 && schema_hash > 0) { + // tablet schema hash path or rowset file path + // gc thread should get tablet include deleted tablet + // or it will delete rowset file before tablet is garbage collected + TabletSharedPtr tablet = _tablet_manager->get_tablet(tablet_id, schema_hash, true); + if (tablet == nullptr) { + std::string tablet_path_id = TABLET_ID_PREFIX + std::to_string(tablet_id); + bool exist_in_pending = _check_pending_ids(tablet_path_id); + if (!exist_in_pending) { + _process_garbage_path(path); + } + } else { + bool valid = tablet->check_path(path); + if (!valid) { + RowsetId rowset_id = -1; + bool is_rowset_file = _tablet_manager->get_rowset_id_from_path(path, &rowset_id); + if (is_rowset_file) { + std::string rowset_path_id = ROWSET_ID_PREFIX + std::to_string(rowset_id); + bool exist_in_pending = _check_pending_ids(rowset_path_id); + if (!exist_in_pending) { + _process_garbage_path(path); + } + } + } + } + } else if (tablet_id > 0 && schema_hash <= 0) { + // tablet id path + if (!FileUtils::is_dir(path)) { + LOG(WARNING) << "unknown path:" << path; + continue; + } + bool exist = _tablet_manager->check_tablet_id_exist(tablet_id); + if (!exist) { + std::string tablet_path_id = TABLET_ID_PREFIX + std::to_string(tablet_id); + bool exist_in_pending = _check_pending_ids(tablet_path_id); + if (!exist_in_pending) { + _process_garbage_path(path); + } + } + } + } + _all_check_paths.clear(); + LOG(INFO) << "finished one time path gc."; +} + +void DataDir::perform_path_gc_by_rowsetid() { + // init the set of valid path + // validate the path in data dir + std::unique_lock lck(_check_path_mutex); + cv.wait(lck, [this]{return _all_check_paths.size() > 0;}); + LOG(INFO) << "start to path gc by rowsetid."; + int counter = 0; + for (auto& path : _all_check_paths) { + ++counter; + if (config::path_gc_check_step > 0 && counter % config::path_gc_check_step == 0) { + usleep(config::path_gc_check_step_interval_ms * 1000); + } + TTabletId tablet_id = -1; + TSchemaHash schema_hash = -1; + bool is_valid = _tablet_manager->get_tablet_id_and_schema_hash_from_path(path, + &tablet_id, &schema_hash); + if (!is_valid) { + LOG(WARNING) << "unknown path:" << path; + continue; + } + if (tablet_id > 0 && schema_hash > 0) { + // tablet schema hash path or rowset file path + // gc thread should get tablet include deleted tablet + // or it will delete rowset file before tablet is garbage collected + RowsetId rowset_id = -1; + bool is_rowset_file = _tablet_manager->get_rowset_id_from_path(path, &rowset_id); + if (is_rowset_file) { + TabletSharedPtr tablet = _tablet_manager->get_tablet(tablet_id, schema_hash); + if (tablet != nullptr) { + bool valid = tablet->check_rowset_id(rowset_id); + if (!valid) { + // if the rowset id is less than tablet's initial end rowset id + // and the rowsetid is not in unused_rowsets + // and the rowsetid is not in committed rowsets + // then delete the path. + if (rowset_id < tablet->initial_end_rowset_id() + && !StorageEngine::instance()->check_rowset_id_in_unused_rowsets(rowset_id) + && !RowsetMetaManager::check_rowset_meta(_meta, tablet->tablet_uid(), rowset_id)) { + _process_garbage_path(path); + } + } + } + } + } + } + _all_check_paths.clear(); + LOG(INFO) << "finished one time path gc by rowsetid."; +} + +// path producer +void DataDir::perform_path_scan() { + { + std::unique_lock lck(_check_path_mutex); + if (_all_check_paths.size() > 0) { + LOG(INFO) << "_all_check_paths is not empty when path scan."; + return; + } + LOG(INFO) << "start to scan data dir path:" << _path; + std::set shards; + std::string data_path = _path + DATA_PREFIX; + if (dir_walk(data_path, &shards, nullptr) != OLAP_SUCCESS) { + LOG(WARNING) << "fail to walk dir. [path=" << data_path << "]"; + return; + } + for (const auto& shard : shards) { + std::string shard_path = data_path + "/" + shard; + std::set tablet_ids; + if (dir_walk(shard_path, &tablet_ids, nullptr) != OLAP_SUCCESS) { + LOG(WARNING) << "fail to walk dir. [path=" << shard_path << "]"; + continue; + } + for (const auto& tablet_id : tablet_ids) { + std::string tablet_id_path = shard_path + "/" + tablet_id; + _all_check_paths.insert(tablet_id_path); + std::set schema_hashes; + if (dir_walk(tablet_id_path, &schema_hashes, nullptr) != OLAP_SUCCESS) { + LOG(WARNING) << "fail to walk dir. [path=" << tablet_id_path << "]"; + continue; + } + for (const auto& schema_hash : schema_hashes) { + std::string tablet_schema_hash_path = tablet_id_path + "/" + schema_hash; + _all_check_paths.insert(tablet_schema_hash_path); + std::set rowset_files; + if (dir_walk(tablet_schema_hash_path, nullptr, &rowset_files) != OLAP_SUCCESS) { + LOG(WARNING) << "fail to walk dir. [path=" << tablet_schema_hash_path << "]"; + continue; + } + for (const auto& rowset_file : rowset_files) { + std::string rowset_file_path = tablet_schema_hash_path + "/" + rowset_file; + _all_check_paths.insert(rowset_file_path); + } + } + } + } + LOG(INFO) << "scan data dir path:" << _path << " finished. path size:" << _all_check_paths.size(); + } + cv.notify_one(); +} + +void DataDir::_process_garbage_path(const std::string& path) { + if (check_dir_existed(path)) { + LOG(INFO) << "collect garbage dir path:" << path; + OLAPStatus status = remove_all_dir(path); + if (status != OLAP_SUCCESS) { + LOG(WARNING) << "remove garbage dir path:" << path << " failed"; + } + } +} + +bool DataDir::_check_pending_ids(const std::string& id) { + ReadLock rd_lock(&_pending_path_mutex); + return _pending_path_ids.find(id) != _pending_path_ids.end(); +} + +void DataDir::_remove_check_paths_no_lock(const std::set& paths) { + for (const auto& path : paths) { + auto path_iter = _all_check_paths.find(path); + if (path_iter != _all_check_paths.end()) { + _all_check_paths.erase(path_iter); + } + } +} + +} // namespace doris diff --git a/be/src/olap/store.h b/be/src/olap/data_dir.h similarity index 50% rename from be/src/olap/store.h rename to be/src/olap/data_dir.h index 15be6bc122b5a7..44528279cf19ce 100644 --- a/be/src/olap/store.h +++ b/be/src/olap/data_dir.h @@ -21,33 +21,35 @@ #include #include #include +#include #include "common/status.h" #include "gen_cpp/Types_types.h" #include "olap/olap_common.h" -#include "olap/olap_engine.h" +#include "olap/storage_engine.h" +#include "olap/rowset/rowset_id_generator.h" namespace doris { -class OLAPRootPath; -class OLAPEngine; - -// A OlapStore used to manange data in same path. -// Now, After OlapStore was created, it will never be deleted for easy implementation. -class OlapStore { +// A DataDir used to manange data in same path. +// Now, After DataDir was created, it will never be deleted for easy implementation. +class DataDir { public: - OlapStore(const std::string& path, int64_t capacity_bytes = -1); - ~OlapStore(); + DataDir(const std::string& path, + int64_t capacity_bytes = -1, + TabletManager* tablet_manager = nullptr, + TxnManager* txn_manager = nullptr); + ~DataDir(); - Status load(); + Status init(); const std::string& path() const { return _path; } - const int64_t path_hash() const { return _path_hash; } + const size_t path_hash() const { return _path_hash; } bool is_used() const { return _is_used; } void set_is_used(bool is_used) { _is_used = is_used; } int32_t cluster_id() const { return _cluster_id; } - RootPathInfo to_root_path_info() { - RootPathInfo info; + DataDirInfo get_dir_info() { + DataDirInfo info; info.path = _path; info.path_hash = _path_hash; info.is_used = _is_used; @@ -55,36 +57,69 @@ class OlapStore { return info; } + // save a cluster_id file under data path to prevent + // invalid be config for example two be use the same + // data path Status set_cluster_id(int32_t cluster_id); void health_check(); OLAPStatus get_shard(uint64_t* shard); - OlapMeta* get_meta(); + + OlapMeta* get_meta() { return _meta; } bool is_ssd_disk() const { return _storage_medium == TStorageMedium::SSD; } + TStorageMedium::type storage_medium() const { return _storage_medium; } - OLAPStatus register_table(OLAPTable* table); - OLAPStatus deregister_table(OLAPTable* table); + OLAPStatus register_tablet(Tablet* tablet); + OLAPStatus deregister_tablet(Tablet* tablet); + void clear_tablets(std::vector* tablet_infos); - std::string get_tablet_schema_hash_path_from_header(OLAPHeader* header); + std::string get_absolute_tablet_path(TabletMeta* tablet_meta, bool with_schema_hash); - std::string get_tablet_path_from_header(OLAPHeader* header); + std::string get_absolute_tablet_path(OLAPHeaderMessage& olap_header_msg, bool with_schema_hash); - std::string get_shard_path_from_header(const std::string& shard_string); + std::string get_absolute_tablet_path(TabletMetaPB* tablet_meta, bool with_schema_hash); + + std::string get_absolute_shard_path(const std::string& shard_string); void find_tablet_in_trash(int64_t tablet_id, std::vector* paths); static std::string get_root_path_from_schema_hash_path_in_trash(const std::string& schema_hash_dir_in_trash); +/* + OLAPStatus next_id(RowsetId* id) { + return _id_generator->get_next_id(id); + } + + OLAPStatus set_next_id(RowsetId new_rowset_id) { + return _id_generator->set_next_id(new_rowset_id); + } +*/ + // load data from meta and data files + OLAPStatus load(); + + void add_pending_ids(const std::string& id); + + void remove_pending_ids(const std::string& id); + + // this function scans the paths in data dir to collect the paths to check + // this is a producer function. After scan, it will notify the perform_path_gc function to gc + void perform_path_scan(); + + // this function is a consumer function + // this function will collect garbage paths scaned by last function + void perform_path_gc(); - OLAPStatus load_tables(OLAPEngine* engine); - OLAPStatus check_none_row_oriented_table_in_store(OLAPEngine* engine); - OLAPStatus _check_none_row_oriented_table_in_store( - OLAPEngine* engine, TTabletId tablet_id, - TSchemaHash schema_hash, const std::string& header); + void perform_path_gc_by_rowsetid(); + + OLAPStatus remove_old_meta_and_files(); + + bool convert_old_data_success(); + + OLAPStatus set_convert_finished(); private: std::string _cluster_id_path() const { return _path + CLUSTER_ID_PREFIX; } @@ -98,23 +133,27 @@ class OlapStore { OLAPStatus _read_and_write_test_file(); Status _read_cluster_id(const std::string& path, int32_t* cluster_id); Status _write_cluster_id_to_path(const std::string& path, int32_t cluster_id); + OLAPStatus _clean_unfinished_converting_data(); + OLAPStatus _convert_old_tablet(); + + void _remove_check_paths_no_lock(const std::set& paths); + + void _process_garbage_path(const std::string& path); - OLAPStatus _load_table_from_header(OLAPEngine* engine, TTabletId tablet_id, - TSchemaHash schema_hash, const std::string& header); + void _remove_check_paths(const std::set& paths); - void _deal_with_header_error(TTabletId tablet_id, TSchemaHash schema_hash, int shard); + bool _check_pending_ids(const std::string& id); private: - friend class OLAPRootPath; - friend class OLAPEngine; - std::string _path; - int64_t _path_hash; - int32_t _cluster_id; + size_t _path_hash; uint32_t _rand_seed; std::string _file_system; int64_t _capacity_bytes; + TabletManager* _tablet_manager; + TxnManager* _txn_manager; + int32_t _cluster_id; int64_t _available_bytes; int64_t _used_bytes; uint64_t _current_shard; @@ -131,7 +170,18 @@ class OlapStore { static const uint32_t MAX_SHARD_NUM = 1024; char* _test_file_read_buf; char* _test_file_write_buf; - OlapMeta* _meta; + OlapMeta* _meta = nullptr; + RowsetIdGenerator* _id_generator = nullptr; + + std::set _all_check_paths; + std::mutex _check_path_mutex; + std::condition_variable cv; + + std::set _pending_path_ids; + RWMutex _pending_path_mutex; + + // used in convert process + bool _convert_old_data_success; }; -} +} // namespace doris diff --git a/be/src/olap/delete_handler.cpp b/be/src/olap/delete_handler.cpp index 93b34d1d419c79..70e7146247ecff 100644 --- a/be/src/olap/delete_handler.cpp +++ b/be/src/olap/delete_handler.cpp @@ -45,53 +45,36 @@ using google::protobuf::RepeatedPtrField; namespace doris { -// 将删除条件存储到table的Header文件中, -// 在存储之前会判断删除条件是否符合要求。主要判断以下2个方面: -// 1. 删除条件的版本要不是当前最大的delta版本号,要不是最大的delta版本号加1 -// 2. 删除条件中指定的列在table中存在,必须是key列,且不能是double,float类型 -OLAPStatus DeleteConditionHandler::store_cond( - OLAPTablePtr table, - const int32_t version, - const vector& conditions) { - if (conditions.size() == 0 || _check_version_valid(table, version) != OLAP_SUCCESS) { - OLAP_LOG_WARNING("invalid parameters for store_cond. " - "[version=%d condition_size=%u]", - version, conditions.size()); +OLAPStatus DeleteConditionHandler::generate_delete_predicate( + const TabletSchema& schema, + const std::vector& conditions, + DeletePredicatePB* del_pred) { + if (conditions.size() == 0) { + LOG(WARNING) << "invalid parameters for store_cond." + << " condition_size=" << conditions.size(); return OLAP_ERR_DELETE_INVALID_PARAMETERS; } // 检查删除条件是否符合要求 for (const TCondition& condition : conditions) { - if (check_condition_valid(table, condition) != OLAP_SUCCESS) { - OLAP_LOG_WARNING("invalid condition. [%s]", - ThriftDebugString(condition).c_str()); + if (check_condition_valid(schema, condition) != OLAP_SUCCESS) { + LOG(WARNING) << "invalid condition. condition=" << ThriftDebugString(condition); return OLAP_ERR_DELETE_INVALID_CONDITION; } } - int cond_index = _check_whether_condition_exist(table, version); - DeleteConditionMessage* del_cond = NULL; - - if (cond_index == -1) { // 删除条件不存在 - del_cond = table->add_delete_data_conditions(); - del_cond->set_version(version); - } else { // 删除条件已经存在 - del_cond = table->mutable_delete_data_conditions(cond_index); - del_cond->clear_sub_conditions(); - } - // 存储删除条件 for (const TCondition& condition : conditions) { - string condition_str = construct_sub_conditions(condition); - del_cond->add_sub_conditions(condition_str); - LOG(INFO) << "store one sub-delete condition." - << "condition=" << condition_str; + string condition_str = construct_sub_predicates(condition); + del_pred->add_sub_predicates(condition_str); + LOG(INFO) << "store one sub-delete condition. condition=" << condition_str; } + del_pred->set_version(-1); return OLAP_SUCCESS; } -string DeleteConditionHandler::construct_sub_conditions(const TCondition& condition) { +string DeleteConditionHandler::construct_sub_predicates(const TCondition& condition) { string op = condition.condition_op; if (op == "<") { op += "<"; @@ -107,84 +90,11 @@ string DeleteConditionHandler::construct_sub_conditions(const TCondition& condit return condition_str; } -// 删除指定版本号的删除条件;需要注意的是,如果table上没有任何删除条件,或者 -// 指定版本号的删除条件不存在,也会返回OLAP_SUCCESS。 -OLAPStatus DeleteConditionHandler::delete_cond(OLAPTablePtr table, - const int32_t version, - bool delete_smaller_version_conditions) { - if (version < 0) { - OLAP_LOG_WARNING("invalid parameters for delete_cond. [version=%d]", version); - return OLAP_ERR_DELETE_INVALID_PARAMETERS; - } - - del_cond_array* delete_conditions = table->mutable_delete_data_conditions(); - - if (delete_conditions->size() == 0) { - return OLAP_SUCCESS; - } - - int index = 0; - - while (index != delete_conditions->size()) { - // 1. 如果删除条件的版本号等于形参指定的版本号,则删除该版本的文件; - // 2. 如果还指定了delete_smaller_version_conditions为true,则同时删除 - // 版本号小于指定版本号的删除条件;否则不删除。 - DeleteConditionMessage temp = delete_conditions->Get(index); - - if (temp.version() == version || - (temp.version() < version && delete_smaller_version_conditions)) { - // 将要移除的删除条件记录到log中 - string del_cond_str; - const RepeatedPtrField& sub_conditions = temp.sub_conditions(); - - for (int i = 0; i != sub_conditions.size(); ++i) { - del_cond_str += sub_conditions.Get(i) + ";"; - } - - LOG(INFO) << "delete one condition. version=" << temp.version() - << ", condition=" << del_cond_str; - - // 移除过滤条件 - // 因为pb没有提供直接删除数组特定元素的方法,所以用下面的删除方式;这种方式会改变存在 - // Header文件中的删除条件的顺序。因为我们不关心删除条件的顺序,所以对我们没影响 - delete_conditions->SwapElements(index, delete_conditions->size() - 1); - delete_conditions->RemoveLast(); - } else { - ++index; - } - } - - return OLAP_SUCCESS; -} - -OLAPStatus DeleteConditionHandler::log_conds(OLAPTablePtr table) { - LOG(INFO) << "display all delete condition. tablet=" << table->full_name(); - table->obtain_header_rdlock(); - const del_cond_array& delete_conditions = table->delete_data_conditions(); - - for (int index = 0; index != delete_conditions.size(); ++index) { - DeleteConditionMessage temp = delete_conditions.Get(index); - string del_cond_str; - const RepeatedPtrField& sub_conditions = temp.sub_conditions(); - - // 将属于一条删除条件的子条件重新拼接成一条删除条件;子条件之间用分号隔开 - for (int i = 0; i != sub_conditions.size(); ++i) { - del_cond_str += sub_conditions.Get(i) + ";"; - } - - LOG(INFO) << "condition item: version=" << temp.version() - << ", condition=" << del_cond_str; - } - - table->release_header_lock(); - return OLAP_SUCCESS; -} - OLAPStatus DeleteConditionHandler::check_condition_valid( - OLAPTablePtr table, + const TabletSchema& schema, const TCondition& cond) { // 检查指定列名的列是否存在 - int field_index = table->get_field_index(cond.column_name); + int field_index = _get_field_index(schema, cond.column_name); if (field_index < 0) { OLAP_LOG_WARNING("field is not existent. [field_index=%d]", field_index); @@ -192,12 +102,12 @@ OLAPStatus DeleteConditionHandler::check_condition_valid( } // 检查指定的列是不是key,是不是float或doulbe类型 - FieldInfo field_info = table->tablet_schema()[field_index]; + const TabletColumn& column = schema.column(field_index); - if (!field_info.is_key - || field_info.type == OLAP_FIELD_TYPE_DOUBLE - || field_info.type == OLAP_FIELD_TYPE_FLOAT) { - OLAP_LOG_WARNING("field is not key column, or its type is float or double."); + if (!column.is_key() + || column.type() == OLAP_FIELD_TYPE_DOUBLE + || column.type() == OLAP_FIELD_TYPE_FLOAT) { + LOG(WARNING) << "field is not key column, or its type is float or double."; return OLAP_ERR_DELETE_INVALID_CONDITION; } @@ -212,7 +122,7 @@ OLAPStatus DeleteConditionHandler::check_condition_valid( } const string& value_str = cond.condition_values[0]; - FieldType field_type = field_info.type; + FieldType field_type = column.type(); bool valid_condition = false; if ("IS" == cond.condition_op @@ -237,10 +147,10 @@ OLAPStatus DeleteConditionHandler::check_condition_valid( } else if (field_type == OLAP_FIELD_TYPE_UNSIGNED_BIGINT) { valid_condition = valid_unsigned_number(value_str); } else if (field_type == OLAP_FIELD_TYPE_DECIMAL) { - valid_condition = valid_decimal(value_str, field_info.precision, field_info.frac); + valid_condition = valid_decimal(value_str, column.precision(), column.frac()); } else if (field_type == OLAP_FIELD_TYPE_CHAR || field_type == OLAP_FIELD_TYPE_VARCHAR || field_type == OLAP_FIELD_TYPE_HLL) { - if (value_str.size() <= field_info.length) { + if (value_str.size() <= column.length()) { valid_condition = true; } } else if (field_type == OLAP_FIELD_TYPE_DATE || field_type == OLAP_FIELD_TYPE_DATETIME) { @@ -257,15 +167,13 @@ OLAPStatus DeleteConditionHandler::check_condition_valid( } } -OLAPStatus DeleteConditionHandler::_check_version_valid(OLAPTablePtr table, +OLAPStatus DeleteConditionHandler::_check_version_valid(std::vector* all_file_versions, const int32_t filter_version) { // 找到当前最大的delta文件版本号 - vector all_file_versions; - table->list_versions(&all_file_versions); int max_delta_version = -1; - vector::const_iterator version_iter = all_file_versions.begin(); + vector::const_iterator version_iter = all_file_versions->begin(); - for (; version_iter != all_file_versions.end(); ++version_iter) { + for (; version_iter != all_file_versions->end(); ++version_iter) { if (version_iter->second > max_delta_version) { max_delta_version = version_iter->second; } @@ -280,9 +188,7 @@ OLAPStatus DeleteConditionHandler::_check_version_valid(OLAPTablePtr table, } } -int DeleteConditionHandler::_check_whether_condition_exist(OLAPTablePtr table, int cond_version) { - const del_cond_array& delete_conditions = table->delete_data_conditions(); - +int DeleteConditionHandler::_check_whether_condition_exist(const DelPredicateArray& delete_conditions, int cond_version) { if (delete_conditions.size() == 0) { return -1; } @@ -290,7 +196,7 @@ int DeleteConditionHandler::_check_whether_condition_exist(OLAPTablePtr table, i int index = 0; while (index != delete_conditions.size()) { - DeleteConditionMessage temp = delete_conditions.Get(index); + DeletePredicatePB temp = delete_conditions.Get(index); if (temp.version() == cond_version) { return index; @@ -334,25 +240,20 @@ bool DeleteHandler::_parse_condition(const std::string& condition_str, TConditio return true; } -OLAPStatus DeleteHandler::init(OLAPTablePtr olap_table, int32_t version) { +OLAPStatus DeleteHandler::init(const TabletSchema& schema, + const DelPredicateArray& delete_conditions, int32_t version) { if (_is_inited) { OLAP_LOG_WARNING("reintialize delete handler."); return OLAP_ERR_INIT_FAILED; } - if (!olap_table) { - OLAP_LOG_WARNING("invalid parameters: invalid olap table."); - return OLAP_ERR_DELETE_INVALID_PARAMETERS; - } - if (version < 0) { OLAP_LOG_WARNING("invalid parameters. [version=%d]", version); return OLAP_ERR_DELETE_INVALID_PARAMETERS; } - const del_cond_array& delete_conditions = olap_table->delete_data_conditions(); - del_cond_array::const_iterator it = delete_conditions.begin(); + DelPredicateArray::const_iterator it = delete_conditions.begin(); for (; it != delete_conditions.end(); ++it) { // 跳过版本号大于version的过滤条件 @@ -370,13 +271,13 @@ OLAPStatus DeleteHandler::init(OLAPTablePtr olap_table, int32_t version) { return OLAP_ERR_MALLOC_ERROR; } - temp.del_cond->set_table(olap_table); + temp.del_cond->set_tablet_schema(&schema); - for (int i = 0; i != it->sub_conditions_size(); ++i) { + for (int i = 0; i != it->sub_predicates_size(); ++i) { TCondition condition; - if (!_parse_condition(it->sub_conditions(i), &condition)) { + if (!_parse_condition(it->sub_predicates(i), &condition)) { OLAP_LOG_WARNING("fail to parse condition. [condition=%s]", - it->sub_conditions(i).c_str()); + it->sub_predicates(i).c_str()); return OLAP_ERR_DELETE_INVALID_PARAMETERS; } diff --git a/be/src/olap/delete_handler.h b/be/src/olap/delete_handler.h index f78c16ac48cbd2..9e0f7689a34736 100644 --- a/be/src/olap/delete_handler.h +++ b/be/src/olap/delete_handler.h @@ -26,87 +26,48 @@ #include "olap/field.h" #include "olap/olap_cond.h" #include "olap/olap_define.h" -#include "olap/olap_table.h" #include "olap/row_cursor.h" namespace doris { -// 实现了删除条件的存储,移除和显示功能 -// * 存储删除条件: -// OLAPStatus res; -// DeleteConditionHandler cond_handler; -// res = cond_handler.store_cond(olap_table, condition_version, delete_condition); -// * 移除删除条件 -// res = cond_handler.delete_cond(olap_table, condition_version, true); -// 或者 -// res = cond_handler.delete_cond(olap_table, condition_version, false); -// * 将一个table上现存有的所有删除条件打印到log中 -// res = cond_handler.log_conds(olap_table); -// 注: -// * 在调用这个类存储和移除删除条件时,需要先对Header文件加写锁; -// 并在调用完成之后调用olap_table->save_header(),然后再释放Header文件的锁 -// * 在调用log_conds()的时候,只需要加读锁 +typedef google::protobuf::RepeatedPtrField DelPredicateArray; + class DeleteConditionHandler { public: - typedef google::protobuf::RepeatedPtrField del_cond_array; DeleteConditionHandler() {} ~DeleteConditionHandler() {} + // generated DeletePredicatePB by TCondition + OLAPStatus generate_delete_predicate(const TabletSchema& schema, + const std::vector& conditions, + DeletePredicatePB* del_pred); + // 检查cond表示的删除条件是否符合要求; // 如果不符合要求,返回OLAP_ERR_DELETE_INVALID_CONDITION;符合要求返回OLAP_SUCCESS - OLAPStatus check_condition_valid(OLAPTablePtr table, const TCondition& cond); - - // 存储指定版本号的删除条件到Header文件中。因此,调用之前需要对Header文件加写锁 - // - // 输入参数: - // * table:指定删除条件要作用的olap engine表;删除条件就存储在这个表的Header文件中 - // * version: 删除条件的版本 - // * del_condition: 用字符串形式表示的删除条件 - // 返回值: - // * OLAP_SUCCESS:调用成功 - // * OLAP_ERR_DELETE_INVALID_PARAMETERS:函数参数不符合要求 - // * OLAP_ERR_DELETE_INVALID_CONDITION:del_condition不符合要求 - OLAPStatus store_cond( - OLAPTablePtr table, - const int32_t version, - const std::vector& conditions); + OLAPStatus check_condition_valid(const TabletSchema& tablet_schema, const TCondition& cond); // construct sub condition from TCondition - std::string construct_sub_conditions(const TCondition& condition); + std::string construct_sub_predicates(const TCondition& condition); - // 从Header文件中移除特定版本号的删除条件。在调用之前需要对Header文件加写锁 - // - // 输入参数: - // * table:需要移除删除条件的olap engine表 - // * version:要移除的删除条件的版本 - // * delete_smaller_version_conditions: - // * 如果true,则移除小于等于指定版本号的删除条件; - // * 如果false,则只删除指定版本的删除条件 - // 返回值: - // * OLAP_SUCCESS: - // * 移除删除条件成功 - // * 这个表没有任何删除条件 - // * 这个表没有指定版本号的删除条件 - // * OLAP_ERR_DELETE_INVALID_PARAMETERS:函数参数不符合要求 - OLAPStatus delete_cond( - OLAPTablePtr table, const int32_t version, bool delete_smaller_version_conditions); - - // 将一个olap engine的表上存有的所有删除条件打印到log中。调用前只需要给Header文件加读锁 - // - // 输入参数: - // table: 要打印删除条件的olap engine表 - // 返回值: - // OLAP_SUCCESS:调用成功 - OLAPStatus log_conds(OLAPTablePtr table); private: // 检查指定的删除条件版本是否符合要求; // 如果不符合要求,返回OLAP_ERR_DELETE_INVALID_VERSION;符合要求返回OLAP_SUCCESS - OLAPStatus _check_version_valid(OLAPTablePtr table, const int32_t filter_version); + OLAPStatus _check_version_valid(std::vector* all_file_versions, const int32_t filter_version); // 检查指定版本的删除条件是否已经存在。如果存在,返回指定版本删除条件的数组下标;不存在返回-1 - int _check_whether_condition_exist(OLAPTablePtr, int cond_version); + int _check_whether_condition_exist(const DelPredicateArray& delete_conditions, int cond_version); + + int32_t _get_field_index(const TabletSchema& schema, const std::string& field_name) const { + for (int i = 0; i < schema.num_columns(); i++) { + if (schema.column(i).name() == field_name) { + return i; + } + } + LOG(WARNING) << "invalid field name. name='" << field_name; + return -1; + } }; // 表示一个删除条件 @@ -122,7 +83,7 @@ struct DeleteConditions { // 1. 使用一个版本号来初始化handler // OLAPStatus res; // DeleteHandler delete_handler; -// res = delete_handler.init(olap_table, condition_version); +// res = delete_handler.init(tablet, condition_version); // 2. 使用这个handler来判定一条数据是否符合删除条件 // bool filter_data; // filter_data = delete_handler.is_filter_data(data_version, row_cursor); @@ -135,7 +96,6 @@ struct DeleteConditions { class DeleteHandler { public: typedef std::vector::size_type cond_num_t; - typedef google::protobuf::RepeatedPtrField del_cond_array; DeleteHandler() : _is_inited(false) {} ~DeleteHandler() {} @@ -148,13 +108,14 @@ class DeleteHandler { // 调用前需要先对Header文件加读锁 // // 输入参数: - // * olap_table: 删除条件和数据所在的table + // * tablet: 删除条件和数据所在的tablet // * version: 要取出的删除条件版本号 // 返回值: // * OLAP_SUCCESS: 调用成功 // * OLAP_ERR_DELETE_INVALID_PARAMETERS: 参数不符合要求 // * OLAP_ERR_MALLOC_ERROR: 在填充_del_conds时,分配内存失败 - OLAPStatus init(OLAPTablePtr olap_table, int32_t version); + OLAPStatus init(const TabletSchema& schema, + const DelPredicateArray& delete_conditions, int32_t version); // 判定一条数据是否符合删除条件 // diff --git a/be/src/olap/delta_writer.cpp b/be/src/olap/delta_writer.cpp index 400da13436695f..d593aef5921a54 100644 --- a/be/src/olap/delta_writer.cpp +++ b/be/src/olap/delta_writer.cpp @@ -18,7 +18,10 @@ #include "olap/delta_writer.h" #include "olap/schema.h" -#include "olap/segment_group.h" +#include "olap/data_dir.h" +#include "olap/rowset/alpha_rowset_writer.h" +#include "olap/rowset/rowset_meta_manager.h" +#include "olap/rowset/rowset_id_generator.h" namespace doris { @@ -28,117 +31,130 @@ OLAPStatus DeltaWriter::open(WriteRequest* req, DeltaWriter** writer) { } DeltaWriter::DeltaWriter(WriteRequest* req) - : _req(*req), _table(nullptr), - _cur_segment_group(nullptr), _new_table(nullptr), - _writer(nullptr), _mem_table(nullptr), - _schema(nullptr), _field_infos(nullptr), - _segment_group_id(-1), _delta_written_success(false) {} + : _req(*req), _tablet(nullptr), + _cur_rowset(nullptr), _new_rowset(nullptr), _new_tablet(nullptr), + _rowset_writer(nullptr), _mem_table(nullptr), + _schema(nullptr), _tablet_schema(nullptr), + _delta_written_success(false) {} DeltaWriter::~DeltaWriter() { if (!_delta_written_success) { _garbage_collection(); } - for (SegmentGroup* segment_group : _segment_group_vec) { - segment_group->release(); - } - SAFE_DELETE(_writer); + SAFE_DELETE(_mem_table); SAFE_DELETE(_schema); + if (_rowset_writer != nullptr) { + _rowset_writer->data_dir()->remove_pending_ids(ROWSET_ID_PREFIX + std::to_string(_rowset_writer->rowset_id())); + } } void DeltaWriter::_garbage_collection() { - OLAPEngine::get_instance()->delete_transaction(_req.partition_id, _req.transaction_id, - _req.tablet_id, _req.schema_hash); - for (SegmentGroup* segment_group : _segment_group_vec) { - OLAPEngine::get_instance()->add_unused_index(segment_group); - } - if (_new_table != nullptr) { - OLAPEngine::get_instance()->delete_transaction(_req.partition_id, _req.transaction_id, - _new_table->tablet_id(), _new_table->schema_hash()); - for (SegmentGroup* segment_group : _new_segment_group_vec) { - OLAPEngine::get_instance()->add_unused_index(segment_group); + OLAPStatus rollback_status = OLAP_SUCCESS; + if (_tablet != nullptr) { + rollback_status = StorageEngine::instance()->txn_manager()->rollback_txn(_req.partition_id, + _req.txn_id,_req.tablet_id, _req.schema_hash, _tablet->tablet_uid()); + } + // has to check rollback status, because the rowset maybe committed in this thread and + // published in another thread, then rollback will failed + // when rollback failed should not delete rowset + if (rollback_status == OLAP_SUCCESS) { + StorageEngine::instance()->add_unused_rowset(_cur_rowset); + } + if (_new_tablet != nullptr) { + rollback_status = StorageEngine::instance()->txn_manager()->rollback_txn(_req.partition_id, _req.txn_id, + _new_tablet->tablet_id(), _new_tablet->schema_hash(), _new_tablet->tablet_uid()); + if (rollback_status == OLAP_SUCCESS) { + StorageEngine::instance()->add_unused_rowset(_new_rowset); } } } OLAPStatus DeltaWriter::init() { - _table = OLAPEngine::get_instance()->get_table(_req.tablet_id, _req.schema_hash); - if (_table == nullptr) { + _tablet = StorageEngine::instance()->tablet_manager()->get_tablet(_req.tablet_id, _req.schema_hash); + if (_tablet == nullptr) { LOG(WARNING) << "tablet_id: " << _req.tablet_id << ", " << "schema_hash: " << _req.schema_hash << " not found"; return OLAP_ERR_TABLE_NOT_FOUND; } - OLAPStatus lock_status = _table->try_migration_rdlock(); - if (lock_status != OLAP_SUCCESS) { - return lock_status; - } else { - OLAPStatus res = _init(); - _table->release_migration_lock(); - return res; - } -} -OLAPStatus DeltaWriter::_init() { { - MutexLock push_lock(_table->get_push_lock()); - RETURN_NOT_OK(OLAPEngine::get_instance()->add_transaction( - _req.partition_id, _req.transaction_id, - _req.tablet_id, _req.schema_hash, _req.load_id)); - //_segment_group_id = _table->current_pending_segment_group_id(_req.transaction_id); + ReadLock base_migration_rlock(_tablet->get_migration_lock_ptr(), TRY_LOCK); + if (!base_migration_rlock.own_lock()) { + return OLAP_ERR_RWLOCK_ERROR; + } + MutexLock push_lock(_tablet->get_push_lock()); + RETURN_NOT_OK(StorageEngine::instance()->txn_manager()->prepare_txn( + _req.partition_id, _req.txn_id, + _req.tablet_id, _req.schema_hash, _tablet->tablet_uid(), _req.load_id)); if (_req.need_gen_rollup) { - TTabletId new_tablet_id; - TSchemaHash new_schema_hash; - _table->obtain_header_rdlock(); - bool is_schema_changing = - _table->get_schema_change_request(&new_tablet_id, &new_schema_hash, nullptr, nullptr); - _table->release_header_lock(); - - if (is_schema_changing) { - LOG(INFO) << "load with schema change." << "old_tablet_id: " << _table->tablet_id() << ", " - << "old_schema_hash: " << _table->schema_hash() << ", " - << "new_tablet_id: " << new_tablet_id << ", " - << "new_schema_hash: " << new_schema_hash << ", " - << "transaction_id: " << _req.transaction_id; - _new_table = OLAPEngine::get_instance()->get_table(new_tablet_id, new_schema_hash); - OLAPEngine::get_instance()->add_transaction( - _req.partition_id, _req.transaction_id, - new_tablet_id, new_schema_hash, _req.load_id); + AlterTabletTaskSharedPtr alter_task = _tablet->alter_task(); + if (alter_task != nullptr && alter_task->alter_state() != ALTER_FAILED) { + TTabletId new_tablet_id = alter_task->related_tablet_id(); + TSchemaHash new_schema_hash = alter_task->related_schema_hash(); + LOG(INFO) << "load with schema change." << "old_tablet_id: " << _tablet->tablet_id() << ", " + << "old_schema_hash: " << _tablet->schema_hash() << ", " + << "new_tablet_id: " << new_tablet_id << ", " + << "new_schema_hash: " << new_schema_hash << ", " + << "transaction_id: " << _req.txn_id; + _new_tablet = StorageEngine::instance()->tablet_manager()->get_tablet(new_tablet_id, new_schema_hash); + if (_new_tablet == nullptr) { + LOG(WARNING) << "find alter task, but could not find new tablet tablet_id: " << new_tablet_id + << ", schema_hash: " << new_schema_hash; + return OLAP_ERR_TABLE_NOT_FOUND; + } + ReadLock new_migration_rlock(_new_tablet->get_migration_lock_ptr(), TRY_LOCK); + if (!new_migration_rlock.own_lock()) { + return OLAP_ERR_RWLOCK_ERROR; + } + StorageEngine::instance()->txn_manager()->prepare_txn( + _req.partition_id, _req.txn_id, + new_tablet_id, new_schema_hash, _new_tablet->tablet_uid(), _req.load_id); } } - - // create pending data dir - std::string dir_path = _table->construct_pending_data_dir_path(); - if (!check_dir_existed(dir_path)) { - RETURN_NOT_OK(create_dirs(dir_path)); - } } - ++_segment_group_id; - _cur_segment_group = new SegmentGroup(_table.get(), false, _segment_group_id, 0, true, - _req.partition_id, _req.transaction_id); - DCHECK(_cur_segment_group != nullptr) << "failed to malloc SegmentGroup"; - _cur_segment_group->acquire(); - _cur_segment_group->set_load_id(_req.load_id); - _segment_group_vec.push_back(_cur_segment_group); - - // New Writer to write data into SegmentGroup - VLOG(3) << "init writer. table=" << _table->full_name() << ", " - << "block_row_size=" << _table->num_rows_per_row_block(); - _writer = ColumnDataWriter::create(_table, _cur_segment_group, true); - DCHECK(_writer != nullptr) << "memory error occur when creating writer"; + RowsetId rowset_id = 0; // get rowset_id from id generator + OLAPStatus status = _tablet->next_rowset_id(&rowset_id); + if (status != OLAP_SUCCESS) { + LOG(WARNING) << "generate rowset id failed, status:" << status; + return OLAP_ERR_ROWSET_GENERATE_ID_FAILED; + } + RowsetWriterContext writer_context; + writer_context.rowset_id = rowset_id; + writer_context.tablet_uid = _tablet->tablet_uid(); + writer_context.tablet_id = _req.tablet_id; + writer_context.partition_id = _req.partition_id; + writer_context.tablet_schema_hash = _req.schema_hash; + writer_context.rowset_type = ALPHA_ROWSET; + writer_context.rowset_path_prefix = _tablet->tablet_path(); + writer_context.tablet_schema = &(_tablet->tablet_schema()); + writer_context.rowset_state = PREPARED; + writer_context.data_dir = _tablet->data_dir(); + writer_context.txn_id = _req.txn_id; + writer_context.load_id = _req.load_id; + + // TODO: new RowsetBuilder according to tablet storage type + _rowset_writer.reset(new AlphaRowsetWriter()); + status = _rowset_writer->init(writer_context); + if (status != OLAP_SUCCESS) { + return OLAP_ERR_ROWSET_WRITER_INIT; + } const std::vector& slots = _req.tuple_desc->slots(); - for (auto& field_info : _table->tablet_schema()) { + const TabletSchema& schema = _tablet->tablet_schema(); + for (size_t col_id = 0; col_id < schema.num_columns(); ++col_id) { + const TabletColumn& column = schema.column(col_id); for (size_t i = 0; i < slots.size(); ++i) { - if (slots[i]->col_name() == field_info.name) { + if (slots[i]->col_name() == column.name()) { _col_ids.push_back(i); } } } - _field_infos = &(_table->tablet_schema()); - _schema = new Schema(*_field_infos), - _mem_table = new MemTable(_schema, _field_infos, &_col_ids, - _req.tuple_desc, _table->keys_type()); + _tablet_schema = &(_tablet->tablet_schema()); + _schema = new Schema(*_tablet_schema); + _mem_table = new MemTable(_schema, _tablet_schema, &_col_ids, + _req.tuple_desc, _tablet->keys_type()); _is_init = true; return OLAP_SUCCESS; } @@ -153,23 +169,11 @@ OLAPStatus DeltaWriter::write(Tuple* tuple) { _mem_table->insert(tuple); if (_mem_table->memory_usage() >= config::write_buffer_size) { - RETURN_NOT_OK(_mem_table->flush(_writer)); - - ++_segment_group_id; - _cur_segment_group = new SegmentGroup(_table.get(), false, _segment_group_id, 0, true, - _req.partition_id, _req.transaction_id); - DCHECK(_cur_segment_group != nullptr) << "failed to malloc SegmentGroup"; - _cur_segment_group->acquire(); - _cur_segment_group->set_load_id(_req.load_id); - _segment_group_vec.push_back(_cur_segment_group); - - SAFE_DELETE(_writer); - _writer = ColumnDataWriter::create(_table, _cur_segment_group, true); - DCHECK(_writer != nullptr) << "memory error occur when creating writer"; + RETURN_NOT_OK(_mem_table->flush(_rowset_writer)); SAFE_DELETE(_mem_table); - _mem_table = new MemTable(_schema, _field_infos, &_col_ids, - _req.tuple_desc, _table->keys_type()); + _mem_table = new MemTable(_schema, _tablet_schema, &_col_ids, + _req.tuple_desc, _tablet->keys_type()); } return OLAP_SUCCESS; } @@ -181,54 +185,56 @@ OLAPStatus DeltaWriter::close(google::protobuf::RepeatedPtrField* t return st; } } - RETURN_NOT_OK(_mem_table->close(_writer)); - - OLAPStatus res = _table->add_pending_version(_req.partition_id, _req.transaction_id, nullptr); + RETURN_NOT_OK(_mem_table->close(_rowset_writer)); + + OLAPStatus res = OLAP_SUCCESS; + // use rowset meta manager to save meta + _cur_rowset = _rowset_writer->build(); + if (_cur_rowset == nullptr) { + LOG(WARNING) << "fail to build rowset"; + return OLAP_ERR_MALLOC_ERROR; + } + res = StorageEngine::instance()->txn_manager()->commit_txn(_tablet->data_dir()->get_meta(), + _req.partition_id, _req.txn_id,_req.tablet_id, _req.schema_hash, _tablet->tablet_uid(), + _req.load_id, _cur_rowset, false); if (res != OLAP_SUCCESS && res != OLAP_ERR_PUSH_TRANSACTION_ALREADY_EXIST) { + LOG(WARNING) << "commit txn: " << _req.txn_id + << " for rowset: " << _cur_rowset->rowset_id() + << " failed."; return res; } - //add pending data to tablet - for (SegmentGroup* segment_group : _segment_group_vec) { - RETURN_NOT_OK(_table->add_pending_segment_group(segment_group)); - RETURN_NOT_OK(segment_group->load()); - } - if (_new_table != nullptr) { - LOG(INFO) << "convert version for schema change. txn id: " << _req.transaction_id; - { - MutexLock push_lock(_new_table->get_push_lock()); - // create pending data dir - std::string dir_path = _new_table->construct_pending_data_dir_path(); - if (!check_dir_existed(dir_path)) { - RETURN_NOT_OK(create_dirs(dir_path)); - } - } + + if (_new_tablet != nullptr) { + LOG(INFO) << "convert version for schema change"; SchemaChangeHandler schema_change; - res = schema_change.schema_version_convert( - _table, _new_table, &_segment_group_vec, &_new_segment_group_vec); + res = schema_change.schema_version_convert(_tablet, _new_tablet, &_cur_rowset, &_new_rowset); if (res != OLAP_SUCCESS) { - LOG(WARNING) << "failed to convert delta for new table in schema change." - << "res: " << res << ", " << "new_table: " << _new_table->full_name(); - return res; + LOG(WARNING) << "failed to convert delta for new tablet in schema change." + << "res: " << res << ", " + << "new_tablet: " << _new_tablet->full_name(); + return res; } - res = _new_table->add_pending_version(_req.partition_id, _req.transaction_id, nullptr); + res = StorageEngine::instance()->txn_manager()->commit_txn(_new_tablet->data_dir()->get_meta(), + _req.partition_id, _req.txn_id, _new_tablet->tablet_id(), + _new_tablet->schema_hash(), _new_tablet->tablet_uid(), + _req.load_id, _new_rowset, false); + if (res != OLAP_SUCCESS && res != OLAP_ERR_PUSH_TRANSACTION_ALREADY_EXIST) { + LOG(WARNING) << "save pending rowset failed. rowset_id:" + << _new_rowset->rowset_id(); return res; } - for (SegmentGroup* segment_group : _new_segment_group_vec) { - RETURN_NOT_OK(_new_table->add_pending_segment_group(segment_group)); - RETURN_NOT_OK(segment_group->load()); - } } #ifndef BE_TEST PTabletInfo* tablet_info = tablet_vec->Add(); - tablet_info->set_tablet_id(_table->tablet_id()); - tablet_info->set_schema_hash(_table->schema_hash()); - if (_new_table != nullptr) { + tablet_info->set_tablet_id(_tablet->tablet_id()); + tablet_info->set_schema_hash(_tablet->schema_hash()); + if (_new_tablet != nullptr) { tablet_info = tablet_vec->Add(); - tablet_info->set_tablet_id(_new_table->tablet_id()); - tablet_info->set_schema_hash(_new_table->schema_hash()); + tablet_info->set_tablet_id(_new_tablet->tablet_id()); + tablet_info->set_schema_hash(_new_tablet->schema_hash()); } #endif @@ -241,4 +247,4 @@ OLAPStatus DeltaWriter::cancel() { return OLAP_SUCCESS; } -} // namespace doris +} // namespace doris diff --git a/be/src/olap/delta_writer.h b/be/src/olap/delta_writer.h index 856c362dca3db3..16db33d4aab312 100644 --- a/be/src/olap/delta_writer.h +++ b/be/src/olap/delta_writer.h @@ -19,13 +19,13 @@ #define DORIS_BE_SRC_DELTA_WRITER_H #include "olap/memtable.h" -#include "olap/olap_engine.h" -#include "olap/olap_table.h" +#include "olap/storage_engine.h" +#include "olap/tablet.h" #include "olap/schema_change.h" -#include "olap/data_writer.h" #include "runtime/descriptors.h" #include "runtime/tuple.h" #include "gen_cpp/internal_service.pb.h" +#include "olap/rowset/rowset_writer.h" namespace doris { @@ -41,7 +41,7 @@ struct WriteRequest { int64_t tablet_id; int32_t schema_hash; WriteType write_type; - int64_t transaction_id; + int64_t txn_id; int64_t partition_id; PUniqueId load_id; bool need_gen_rollup; @@ -60,24 +60,22 @@ class DeltaWriter { OLAPStatus cancel(); int64_t partition_id() const { return _req.partition_id; } + private: void _garbage_collection(); - OLAPStatus _init(); - + +private: bool _is_init = false; WriteRequest _req; - OLAPTablePtr _table; - SegmentGroup* _cur_segment_group; - std::vector _segment_group_vec; - std::vector _new_segment_group_vec; - OLAPTablePtr _new_table; - ColumnDataWriter* _writer; + TabletSharedPtr _tablet; + RowsetSharedPtr _cur_rowset; + RowsetSharedPtr _new_rowset; + TabletSharedPtr _new_tablet; + RowsetWriterSharedPtr _rowset_writer; MemTable* _mem_table; Schema* _schema; - std::vector* _field_infos; + const TabletSchema* _tablet_schema; std::vector _col_ids; - - int32_t _segment_group_id; bool _delta_written_success; }; diff --git a/be/src/olap/field.cpp b/be/src/olap/field.cpp index 42562e71cb0d97..43e771cdf9ab98 100644 --- a/be/src/olap/field.cpp +++ b/be/src/olap/field.cpp @@ -28,48 +28,46 @@ using std::string; namespace doris { -Field* Field::create(const FieldInfo& field_info) { - Field* field = new Field(field_info); +Field* Field::create(const TabletColumn& column) { + Field* field = new Field(column); return field; } // 这个函数目前不支持字符串类型 Field* Field::create_by_type(const FieldType& type) { Field* field = NULL; - FieldInfo field_info; - field_info.aggregation = OLAP_FIELD_AGGREGATION_NONE; - field_info.type = type; + TabletColumn column(OLAP_FIELD_AGGREGATION_NONE, type); if (type == OLAP_FIELD_TYPE_CHAR || type == OLAP_FIELD_TYPE_VARCHAR || type == OLAP_FIELD_TYPE_HLL) { field = NULL; } else { - field = new Field(field_info); + field = new Field(column); } return field; } -Field::Field(const FieldInfo& field_info) - : _type(field_info.type), - _index_size(field_info.index_length), +Field::Field(const TabletColumn& column) + : _type(column.type()), + _index_size(column.index_length()), _offset(0) { - _type_info = get_type_info(field_info.type); + _type_info = get_type_info(column.type()); if (_type == OLAP_FIELD_TYPE_CHAR || _type == OLAP_FIELD_TYPE_VARCHAR || _type == OLAP_FIELD_TYPE_HLL) { _size = sizeof(Slice); } else { /* - * the field_info.size and field_info.index_length is equal to zero, - * if field_info is generated by Field::create_by_type function. + * the column size and index_length is equal to zero, + * if column is generated by Field::create_by_type function. * ColumnStatistics use size but not index_size. */ _size = _type_info->size(); } - _index_size = field_info.index_length; - _aggregate_func = get_aggregate_func(field_info.aggregation, field_info.type); - _finalize_func = get_finalize_func(field_info.aggregation, field_info.type); + _index_size = column.index_length(); + _aggregate_func = get_aggregate_func(column.aggregation(), column.type()); + _finalize_func = get_finalize_func(column.aggregation(), column.type()); } } // namespace doris diff --git a/be/src/olap/field.h b/be/src/olap/field.h index 7247d06f56be88..91e17772a9f719 100644 --- a/be/src/olap/field.h +++ b/be/src/olap/field.h @@ -21,9 +21,9 @@ #include #include "olap/aggregate_func.h" -#include "olap/field_info.h" #include "olap/olap_common.h" #include "olap/olap_define.h" +#include "olap/tablet_schema.h" #include "olap/types.h" #include "olap/utils.h" #include "runtime/mem_pool.h" @@ -40,10 +40,10 @@ class Field { // 使用FieldInfo创建一个Field对象的实例 // 根据类型的不同,使用不同的类模板参数或者子类 // 对于没有预料到的类型,会返回NULL - static Field* create(const FieldInfo& field_info); + static Field* create(const TabletColumn& column); static Field* create_by_type(const FieldType& type); - Field(const FieldInfo& field_info); + Field(const TabletColumn& column); inline void set_offset(size_t offset) { _offset = offset; } inline size_t get_offset() const { return _offset; } diff --git a/be/src/olap/file_helper.h b/be/src/olap/file_helper.h index 2a47306cf7e429..9130f374198cd6 100644 --- a/be/src/olap/file_helper.h +++ b/be/src/olap/file_helper.h @@ -351,9 +351,9 @@ OLAPStatus FileHeader::unserialize( if (OLAP_SUCCESS != file_handler->pread(&_fixed_file_header, _fixed_file_header_size, 0)) { char errmsg[64]; - LOG(WARNING) << "fail to load header structure from file. [file='" + LOG(WARNING) << "fail to load header structure from file. file=" << file_handler->file_name() - << "' err='" << strerror_r(errno, errmsg, 64) << "']"; + << ", error=" << strerror_r(errno, errmsg, 64); return OLAP_ERR_IO_ERROR; } @@ -364,9 +364,9 @@ OLAPStatus FileHeader::unserialize( if (OLAP_SUCCESS != file_handler->pread(&tmp_header, sizeof(tmp_header), 0)) { char errmsg[64]; - LOG(WARNING) << "fail to load header structure from file. [file='" + LOG(WARNING) << "fail to load header structure from file. file=" << file_handler->file_name() - << "' err='" << strerror_r(errno, errmsg, 64) << "']"; + << ", error=" << strerror_r(errno, errmsg, 64); return OLAP_ERR_IO_ERROR; } @@ -389,9 +389,9 @@ OLAPStatus FileHeader::unserialize( if (OLAP_SUCCESS != file_handler->pread(&_extra_fixed_header, sizeof(_extra_fixed_header), _fixed_file_header_size)) { char errmsg[64]; - LOG(WARNING) << "fail to load extra fixed header from file. [file='" + LOG(WARNING) << "fail to load extra fixed header from file. file=" << file_handler->file_name() - << "' err='" << strerror_r(errno, errmsg, 64) << "']"; + << ", error=" << strerror_r(errno, errmsg, 64); return OLAP_ERR_IO_ERROR; } @@ -399,27 +399,27 @@ OLAPStatus FileHeader::unserialize( if (NULL == buf.get()) { char errmsg[64]; - LOG(WARNING) << "malloc protobuf buf error. [file='" + LOG(WARNING) << "malloc protobuf buf error. file=" << file_handler->file_name() - << "' err='" << strerror_r(errno, errmsg, 64) << "']"; + << ", error=" << strerror_r(errno, errmsg, 64); return OLAP_ERR_MALLOC_ERROR; } if (OLAP_SUCCESS != file_handler->pread(buf.get(), _fixed_file_header.protobuf_length, _fixed_file_header_size + sizeof(_extra_fixed_header))) { char errmsg[64]; - LOG(WARNING) << "fail to load protobuf from file. [file='" + LOG(WARNING) << "fail to load protobuf from file. file=" << file_handler->file_name() - << "' err='" << strerror_r(errno, errmsg, 64) << "']"; + << ", error=" << strerror_r(errno, errmsg, 64); return OLAP_ERR_IO_ERROR; } real_file_length = file_handler->length(); if (file_length() != static_cast(real_file_length)) { - LOG(WARNING) << "file length is not match. [file='" << file_handler->file_name() - << "' file_length=" << file_length() - << " real_file_length=" << real_file_length << "]"; + LOG(WARNING) << "file length is not match. file=" << file_handler->file_name() + << ", file_length=" << file_length() + << ", real_file_length=" << real_file_length; return OLAP_ERR_FILE_DATA_ERROR; } @@ -428,9 +428,9 @@ OLAPStatus FileHeader::unserialize( buf.get(), _fixed_file_header.protobuf_length); if (real_protobuf_checksum != _fixed_file_header.protobuf_checksum) { - LOG(WARNING) << "checksum is not match. [file='" << file_handler->file_name() - << "' expect=" << _fixed_file_header.protobuf_checksum - << " actual=" << real_protobuf_checksum << "]"; + LOG(WARNING) << "checksum is not match. file=" << file_handler->file_name() + << ", expect=" << _fixed_file_header.protobuf_checksum + << ", actual=" << real_protobuf_checksum; return OLAP_ERR_CHECKSUM_ERROR; } @@ -438,12 +438,12 @@ OLAPStatus FileHeader::unserialize( std::string protobuf_str(buf.get(), _fixed_file_header.protobuf_length); if (!_proto.ParseFromString(protobuf_str)) { - LOG(WARNING) << "fail to parse file content to protobuf object. [file='" - << file_handler->file_name() << "']"; + LOG(WARNING) << "fail to parse file content to protobuf object. file=" + << file_handler->file_name(); return OLAP_ERR_PARSE_PROTOBUF_ERROR; } } catch (...) { - LOG(WARNING) << "fail to load protobuf. [file='" << file_handler->file_name() << "']"; + LOG(WARNING) << "fail to load protobuf. file='" << file_handler->file_name(); return OLAP_ERR_PARSE_PROTOBUF_ERROR; } diff --git a/be/src/olap/hll.h b/be/src/olap/hll.h index 01d841761609f6..adbfd968511322 100644 --- a/be/src/olap/hll.h +++ b/be/src/olap/hll.h @@ -23,9 +23,7 @@ #include #include -// #include "olap/field_info.h" #include "olap/olap_common.h" -// #include "olap/olap_define.h" namespace doris { diff --git a/be/src/olap/lru_cache.cpp b/be/src/olap/lru_cache.cpp index a74c42744f96ba..5d810e7fd027bd 100644 --- a/be/src/olap/lru_cache.cpp +++ b/be/src/olap/lru_cache.cpp @@ -224,7 +224,7 @@ void LRUCache::_lru_append(LRUHandle* list, LRUHandle* e) { Cache::Handle* LRUCache::lookup(const CacheKey& key, uint32_t hash) { MutexLock l(&_mutex); ++_lookup_count; - LRUHandle* e = _table.lookup(key, hash); + LRUHandle* e = _tablet.lookup(key, hash); if (e != NULL) { ++_hit_count; @@ -260,13 +260,13 @@ Cache::Handle* LRUCache::insert( e->in_cache = true; _lru_append(&_in_use, e); _usage += charge; - _finish_erase(_table.insert(e)); + _finish_erase(_tablet.insert(e)); } // else don't cache. (Tests use capacity_==0 to turn off caching.) while (_usage > _capacity && _lru.next != &_lru) { LRUHandle* old = _lru.next; assert(old->refs == 1); - bool erased = _finish_erase(_table.remove(old->key(), old->hash)); + bool erased = _finish_erase(_tablet.remove(old->key(), old->hash)); if (!erased) { // to avoid unused variable when compiled NDEBUG assert(erased); } @@ -276,7 +276,7 @@ Cache::Handle* LRUCache::insert( } // If e != NULL, finish removing *e from the cache; it has already been removed -// from the hash table. Return whether e != NULL. Requires mutex_ held. +// from the hash tablet. Return whether e != NULL. Requires mutex_ held. bool LRUCache::_finish_erase(LRUHandle* e) { if (e != NULL) { assert(e->in_cache); @@ -290,7 +290,7 @@ bool LRUCache::_finish_erase(LRUHandle* e) { void LRUCache::erase(const CacheKey& key, uint32_t hash) { MutexLock l(&_mutex); - _finish_erase(_table.remove(key, hash)); + _finish_erase(_tablet.remove(key, hash)); } int LRUCache::prune() { @@ -299,7 +299,7 @@ int LRUCache::prune() { while (_lru.next != &_lru) { LRUHandle* e = _lru.next; assert(e->refs == 1); - bool erased = _finish_erase(_table.remove(e->key(), e->hash)); + bool erased = _finish_erase(_tablet.remove(e->key(), e->hash)); if (!erased) { // to avoid unused variable when compiled NDEBUG assert(erased); } diff --git a/be/src/olap/lru_cache.h b/be/src/olap/lru_cache.h index 42b240120fab38..cc251ccfb530f6 100644 --- a/be/src/olap/lru_cache.h +++ b/be/src/olap/lru_cache.h @@ -247,9 +247,9 @@ namespace doris { } } LRUHandle; - // We provide our own simple hash table since it removes a whole bunch + // We provide our own simple hash tablet since it removes a whole bunch // of porting hacks and is also faster than some of the built-in hash - // table implementations in some of the compiler/runtime combinations + // tablet implementations in some of the compiler/runtime combinations // we have tested. E.g., readrandom speeds up by ~5% over the g++ // 4.4.3's builtin hashtable. @@ -270,7 +270,7 @@ namespace doris { LRUHandle* remove(const CacheKey& key, uint32_t hash); private: - // The table consists of an array of buckets where each bucket is + // The tablet consists of an array of buckets where each bucket is // a linked list of cache entries that hash into the bucket. uint32_t _length; uint32_t _elems; @@ -343,7 +343,7 @@ namespace doris { // Entries are in use by clients, and have refs >= 2 and in_cache==true. LRUHandle _in_use; - HandleTable _table; + HandleTable _tablet; uint64_t _lookup_count; // cache查找总次数 uint64_t _hit_count; // 命中cache的总次数 diff --git a/be/src/olap/memtable.cpp b/be/src/olap/memtable.cpp index 5771414c48752b..4a1e495332a63f 100644 --- a/be/src/olap/memtable.cpp +++ b/be/src/olap/memtable.cpp @@ -18,18 +18,18 @@ #include "olap/memtable.h" #include "olap/hll.h" -#include "olap/data_writer.h" +#include "olap/rowset/column_data_writer.h" #include "olap/row_cursor.h" #include "util/runtime_profile.h" #include "util/debug_util.h" namespace doris { -MemTable::MemTable(Schema* schema, std::vector* field_infos, +MemTable::MemTable(Schema* schema, const TabletSchema* tablet_schema, std::vector* col_ids, TupleDescriptor* tuple_desc, KeysType keys_type) : _schema(schema), - _field_infos(field_infos), + _tablet_schema(tablet_schema), _tuple_desc(tuple_desc), _col_ids(col_ids), _keys_type(keys_type), @@ -72,7 +72,7 @@ void MemTable::insert(Tuple* tuple) { case TYPE_CHAR: { const StringValue* src = tuple->get_string_slot(slot->tuple_offset()); Slice* dest = (Slice*)(_tuple_buf + offset); - dest->size = (*_field_infos)[i].length; + dest->size = _tablet_schema->column(i).length(); dest->data = _arena.Allocate(dest->size); memcpy(dest->data, src->ptr, src->len); memset(dest->data + src->len, 0, dest->size - src->len); @@ -150,21 +150,20 @@ void MemTable::insert(Tuple* tuple) { } } -OLAPStatus MemTable::flush(ColumnDataWriter* writer) { +OLAPStatus MemTable::flush(RowsetWriterSharedPtr rowset_writer) { Table::Iterator it(_skip_list); for (it.SeekToFirst(); it.Valid(); it.Next()) { const char* row = it.key(); _schema->finalize(row); - RETURN_NOT_OK(writer->write(row)); - writer->next(row, _schema); + RETURN_NOT_OK(rowset_writer->add_row(row, _schema)); } - - RETURN_NOT_OK(writer->finalize()); + + RETURN_NOT_OK(rowset_writer->flush()); return OLAP_SUCCESS; } -OLAPStatus MemTable::close(ColumnDataWriter* writer) { - return flush(writer); +OLAPStatus MemTable::close(RowsetWriterSharedPtr rowset_writer) { + return flush(rowset_writer); } } // namespace doris diff --git a/be/src/olap/memtable.h b/be/src/olap/memtable.h index 4c74557c02e811..40861a47da9996 100644 --- a/be/src/olap/memtable.h +++ b/be/src/olap/memtable.h @@ -23,25 +23,25 @@ #include "olap/schema.h" #include "olap/skiplist.h" #include "runtime/tuple.h" +#include "olap/rowset/rowset_writer.h" namespace doris { -class ColumnDataWriter; class RowCursor; class MemTable { public: - MemTable(Schema* schema, std::vector* field_infos, + MemTable(Schema* schema, const TabletSchema* tablet_schema, std::vector* col_ids, TupleDescriptor* tuple_desc, KeysType keys_type); ~MemTable(); size_t memory_usage(); void insert(Tuple* tuple); - OLAPStatus flush(ColumnDataWriter* writer); - OLAPStatus close(ColumnDataWriter* writer); + OLAPStatus flush(RowsetWriterSharedPtr rowset_writer); + OLAPStatus close(RowsetWriterSharedPtr rowset_writer); private: Schema* _schema; - std::vector* _field_infos; + const TabletSchema* _tablet_schema; TupleDescriptor* _tuple_desc; std::vector* _col_ids; KeysType _keys_type; diff --git a/be/src/olap/merger.cpp b/be/src/olap/merger.cpp index b77b9b6b9e2d13..e95a68a7476d07 100644 --- a/be/src/olap/merger.cpp +++ b/be/src/olap/merger.cpp @@ -20,13 +20,11 @@ #include #include -#include "olap/column_data.h" #include "olap/olap_define.h" -#include "olap/segment_group.h" -#include "olap/olap_table.h" +#include "olap/rowset/segment_group.h" +#include "olap/tablet.h" #include "olap/reader.h" #include "olap/row_cursor.h" -#include "olap/data_writer.h" using std::list; using std::string; @@ -35,60 +33,40 @@ using std::vector; namespace doris { -Merger::Merger(OLAPTablePtr table, SegmentGroup* segment_group, ReaderType type) : - _table(table), - _segment_group(segment_group), +Merger::Merger(TabletSharedPtr tablet, RowsetWriterSharedPtr writer, ReaderType type) : + _tablet(tablet), + _rs_writer(writer), _reader_type(type), _row_count(0) {} -OLAPStatus Merger::merge(const vector& olap_data_arr, +OLAPStatus Merger::merge(const vector& rs_readers, uint64_t* merged_rows, uint64_t* filted_rows) { // Create and initiate reader for scanning and multi-merging specified // OLAPDatas. Reader reader; ReaderParams reader_params; - reader_params.olap_table = _table; + reader_params.tablet = _tablet; reader_params.reader_type = _reader_type; - reader_params.olap_data_arr = olap_data_arr; - - if (_reader_type == READER_BASE_COMPACTION) { - reader_params.version = _segment_group->version(); - } + reader_params.rs_readers = rs_readers; + reader_params.version = _rs_writer->version(); if (OLAP_SUCCESS != reader.init(reader_params)) { - OLAP_LOG_WARNING("fail to initiate reader. [table='%s']", - _table->full_name().c_str()); + LOG(WARNING) << "fail to initiate reader. tablet=" << _tablet->full_name(); return OLAP_ERR_INIT_FAILED; } - // create and initiate writer for generating new index and data files. - unique_ptr writer(ColumnDataWriter::create(_table, _segment_group, false)); - - if (NULL == writer) { - OLAP_LOG_WARNING("fail to allocate writer."); - return OLAP_ERR_MALLOC_ERROR; - } - bool has_error = false; RowCursor row_cursor; - if (OLAP_SUCCESS != row_cursor.init(_table->tablet_schema())) { - OLAP_LOG_WARNING("fail to init row cursor."); + if (OLAP_SUCCESS != row_cursor.init(_tablet->tablet_schema())) { + LOG(WARNING) << "fail to init row cursor."; has_error = true; } bool eof = false; // The following procedure would last for long time, half of one day, etc. while (!has_error) { - // Attach row cursor to the memory position of the row block being - // written in writer. - if (OLAP_SUCCESS != writer->attached_by(&row_cursor)) { - OLAP_LOG_WARNING("attach row failed. [table='%s']", - _table->full_name().c_str()); - has_error = true; - break; - } - row_cursor.allocate_memory_for_string_type(_table->tablet_schema(), writer->mem_pool()); + row_cursor.allocate_memory_for_string_type(_tablet->tablet_schema(), _rs_writer->mem_pool()); // Read one row into row_cursor OLAPStatus res = reader.next_row_with_aggregation(&row_cursor, &eof); @@ -96,30 +74,30 @@ OLAPStatus Merger::merge(const vector& olap_data_arr, VLOG(3) << "reader read to the end."; break; } else if (OLAP_SUCCESS != res) { - OLAP_LOG_WARNING("reader read failed."); + LOG(WARNING) << "reader read failed."; + has_error = true; + break; + } + + if (OLAP_SUCCESS != _rs_writer->add_row(&row_cursor)) { + LOG(WARNING) << "add row to builder failed. tablet=" << _tablet->full_name(); has_error = true; break; } // Goto next row position in the row block being written - writer->next(row_cursor); ++_row_count; } - if (has_error) { - LOG(WARNING) << "compaction failed."; - return OLAP_ERR_OTHER_ERROR; - } - - if (OLAP_SUCCESS != writer->finalize()) { - OLAP_LOG_WARNING("fail to finalize writer. [table='%s']", - _table->full_name().c_str()); + if (_rs_writer->flush() != OLAP_SUCCESS) { + LOG(WARNING) << "fail to finalize writer. " + << "tablet=" << _tablet->full_name(); has_error = true; } if (!has_error) { *merged_rows = reader.merged_rows(); - *filted_rows = reader.filted_rows(); + *filted_rows = reader.filtered_rows(); } return has_error ? OLAP_ERR_OTHER_ERROR : OLAP_SUCCESS; diff --git a/be/src/olap/merger.h b/be/src/olap/merger.h index e47469fd4be706..946a2086455cbf 100644 --- a/be/src/olap/merger.h +++ b/be/src/olap/merger.h @@ -19,7 +19,8 @@ #define DORIS_BE_SRC_OLAP_MERGER_H #include "olap/olap_define.h" -#include "olap/olap_table.h" +#include "olap/tablet.h" +#include "olap/rowset/rowset_writer.h" namespace doris { @@ -29,14 +30,14 @@ class ColumnData; class Merger { public: // parameter index is created by caller, and it is empty. - Merger(OLAPTablePtr table, SegmentGroup* index, ReaderType type); + Merger(TabletSharedPtr tablet, RowsetWriterSharedPtr writer, ReaderType type); virtual ~Merger() {}; // @brief read from multiple OLAPData and SegmentGroup, then write into single OLAPData and SegmentGroup // @return OLAPStatus: OLAP_SUCCESS or FAIL // @note it will take long time to finish. - OLAPStatus merge(const std::vector& olap_data_arr, + OLAPStatus merge(const std::vector& rs_readers, uint64_t* merged_rows, uint64_t* filted_rows); // 获取在做merge过程中累积的行数 @@ -44,11 +45,10 @@ class Merger { return _row_count; } private: - OLAPTablePtr _table; - SegmentGroup* _segment_group; + TabletSharedPtr _tablet; + RowsetWriterSharedPtr _rs_writer; ReaderType _reader_type; uint64_t _row_count; - Version _simple_merge_version; DISALLOW_COPY_AND_ASSIGN(Merger); }; diff --git a/be/src/olap/olap_common.h b/be/src/olap/olap_common.h index 508b739a99c088..ecc1fdfbcac5a8 100644 --- a/be/src/olap/olap_common.h +++ b/be/src/olap/olap_common.h @@ -28,9 +28,12 @@ #include #include #include +#include #include "gen_cpp/Types_types.h" #include "olap/olap_define.h" +#include "util/hash_util.hpp" +#include "util/uid_util.h" namespace doris { @@ -39,30 +42,57 @@ typedef int64_t VersionHash; typedef __int128 int128_t; typedef unsigned __int128 uint128_t; +typedef UniqueId TabletUid; + +enum CompactionType { + BASE_COMPACTION = 1, + CUMULATIVE_COMPACTION = 2 +}; + +struct DataDirInfo { + DataDirInfo(): + capacity(1), + available(0), + data_used_capacity(0), + is_used(false) { } + + std::string path; + int64_t path_hash; + int64_t capacity; // 总空间,单位字节 + int64_t available; // 可用空间,单位字节 + int64_t data_used_capacity; + bool is_used; // 是否可用标识 + TStorageMedium::type storage_medium; // 存储介质类型:SSD|HDD +}; + struct TabletInfo { TabletInfo( TTabletId in_tablet_id, - TSchemaHash in_schema_hash) : + TSchemaHash in_schema_hash, + UniqueId in_uid) : tablet_id(in_tablet_id), - schema_hash(in_schema_hash) {} + schema_hash(in_schema_hash), + tablet_uid(in_uid) {} bool operator<(const TabletInfo& right) const { if (tablet_id != right.tablet_id) { return tablet_id < right.tablet_id; - } else { + } else if (schema_hash != right.schema_hash) { return schema_hash < right.schema_hash; + } else { + return tablet_uid < right.tablet_uid; } } std::string to_string() const { std::stringstream ss; - ss << "." << tablet_id - << "." << schema_hash; + ss << tablet_id << "." << schema_hash << "." << tablet_uid.to_string(); return ss.str(); } TTabletId tablet_id; TSchemaHash schema_hash; + UniqueId tablet_uid; }; enum RangeCondition { @@ -137,18 +167,6 @@ enum HllDataType { HLL_DATA_NONE }; -enum AlterTabletType { - ALTER_TABLET_SCHEMA_CHANGE = 1, // add/drop/alter column - ALTER_TABLET_CREATE_ROLLUP_TABLE= 2, // split one table to several sub tables -}; - -enum AlterTableStatus { - ALTER_TABLE_WAITING = 0, - ALTER_TABLE_RUNNING = 1, - ALTER_TABLE_FINISHED = 2, - ALTER_TABLE_FAILED = 3, -}; - enum PushType { PUSH_NORMAL = 1, PUSH_FOR_DELETE = 2, @@ -168,47 +186,26 @@ enum ReaderType { typedef std::pair Version; typedef std::vector Versions; + +// used for hash-struct of hash_map. +struct HashOfVersion { + size_t operator()(const Version& version) const { + size_t seed = 0; + seed = HashUtil::hash64(&version.first, sizeof(version.first), seed); + seed = HashUtil::hash64(&version.second, sizeof(version.second), seed); + return seed; + } +}; + // It is used to represent Graph vertex. struct Vertex { - int value; - std::list* edges; + int64_t value; + std::list* edges; }; class Field; class WrapperField; using KeyRange = std::pair; -struct SegmentGroupEntity { - SegmentGroupEntity(int32_t segment_group_id, int32_t num_segments, - int64_t num_rows, size_t data_size, size_t index_size, - bool empty, const std::vector* column_statistics) - : segment_group_id(segment_group_id), num_segments(num_segments), num_rows(num_rows), - data_size(data_size), index_size(index_size), empty(empty) - { - if (column_statistics != nullptr) { - key_ranges = *column_statistics; - } - } - - int32_t segment_group_id; - int32_t num_segments; - int64_t num_rows; - size_t data_size; - size_t index_size; - bool empty; - std::vector key_ranges; -}; - -struct VersionEntity { - VersionEntity(Version v, VersionHash version_hash) - : version(v), version_hash(version_hash) { } - void add_segment_group_entity(const SegmentGroupEntity& segment_group_entity) { - segment_group_vec.push_back(segment_group_entity); - } - - Version version; - VersionHash version_hash; - std::vector segment_group_vec; -}; // ReaderStatistics used to collect statistics when scan data from storage struct OlapReaderStatistics { @@ -243,6 +240,8 @@ typedef std::set UniqueIdSet; // Column unique Id -> column id map typedef std::map UniqueIdToColumnIdMap; +typedef int64_t RowsetId; + } // namespace doris #endif // DORIS_BE_SRC_OLAP_OLAP_COMMON_H diff --git a/be/src/olap/olap_cond.cpp b/be/src/olap/olap_cond.cpp index 0b94a0a6340fbb..8e72bbf6fa8572 100644 --- a/be/src/olap/olap_cond.cpp +++ b/be/src/olap/olap_cond.cpp @@ -22,6 +22,7 @@ #include #include +#include "olap/olap_common.h" #include "olap/olap_define.h" #include "olap/utils.h" #include "olap/wrapper_field.h" @@ -51,7 +52,7 @@ using doris::ColumnStatistics; // 1. 对行的过滤在DeleteHandler。 // 这部分直接调用delete_condition_eval实现,内部调用eval函数,因为对row的过滤不涉及部分过滤这种状态。 // 2. 过滤block是在SegmentReader里面,直接调用del_eval -// 3. 过滤version实在Reader里面,调用delta_pruning_filter +// 3. 过滤version实在Reader里面,调用rowset_pruning_filter namespace doris { @@ -123,7 +124,7 @@ Cond::~Cond() { } } -OLAPStatus Cond::init(const TCondition& tcond, const FieldInfo& fi) { +OLAPStatus Cond::init(const TCondition& tcond, const TabletColumn& column) { // Parse op type op = parse_op_type(tcond.condition_op); if (op == OP_NULL || (op != OP_IN && tcond.condition_values.size() != 1)) { @@ -134,7 +135,7 @@ OLAPStatus Cond::init(const TCondition& tcond, const FieldInfo& fi) { if (op == OP_IS) { // 'is null' or 'is not null' auto operand = tcond.condition_values.begin(); - std::unique_ptr f(WrapperField::create(fi, operand->length())); + std::unique_ptr f(WrapperField::create(column, operand->length())); if (f == nullptr) { OLAP_LOG_WARNING("Create field failed. [name=%s, operand=%s, op_type=%d]", tcond.column_name.c_str(), operand->c_str(), op); @@ -148,7 +149,7 @@ OLAPStatus Cond::init(const TCondition& tcond, const FieldInfo& fi) { operand_field = f.release(); } else if (op != OP_IN) { auto operand = tcond.condition_values.begin(); - std::unique_ptr f(WrapperField::create(fi, operand->length())); + std::unique_ptr f(WrapperField::create(column, operand->length())); if (f == nullptr) { OLAP_LOG_WARNING("Create field failed. [name=%s, operand=%s, op_type=%d]", tcond.column_name.c_str(), operand->c_str(), op); @@ -163,7 +164,7 @@ OLAPStatus Cond::init(const TCondition& tcond, const FieldInfo& fi) { operand_field = f.release(); } else { for (auto& operand : tcond.condition_values) { - std::unique_ptr f(WrapperField::create(fi, operand.length())); + std::unique_ptr f(WrapperField::create(column, operand.length())); if (f == NULL) { OLAP_LOG_WARNING("Create field failed. [name=%s, operand=%s, op_type=%d]", tcond.column_name.c_str(), operand.c_str(), op); @@ -472,9 +473,9 @@ CondColumn::~CondColumn() { } // PRECONDITION 1. index is valid; 2. at least has one operand -OLAPStatus CondColumn::add_cond(const TCondition& tcond, const FieldInfo& fi) { +OLAPStatus CondColumn::add_cond(const TCondition& tcond, const TabletColumn& column) { std::unique_ptr cond(new Cond()); - auto res = cond->init(tcond, fi); + auto res = cond->init(tcond, column); if (res != OLAP_SUCCESS) { return res; } @@ -555,30 +556,30 @@ bool CondColumn::eval(const BloomFilter& bf) const { } OLAPStatus Conditions::append_condition(const TCondition& tcond) { - int32_t index = _table->get_field_index(tcond.column_name); + int32_t index = _get_field_index(tcond.column_name); if (index < 0) { - OLAP_LOG_WARNING("fail to get field index, name is invalid. [index=%d; field_name=%s]", - index, - tcond.column_name.c_str()); + LOG(WARNING) << "fail to get field index, name is invalid. index=" << index + << ", field_name=" << tcond.column_name; return OLAP_ERR_INPUT_PARAMETER_ERROR; } // Skip column which is non-key, or whose type is string or float - const FieldInfo& fi = _table->tablet_schema()[index]; - if (fi.type == OLAP_FIELD_TYPE_DOUBLE || fi.type == OLAP_FIELD_TYPE_FLOAT) { + const TabletColumn& column = _schema->column(index); + if (column.type() == OLAP_FIELD_TYPE_DOUBLE + || column.type() == OLAP_FIELD_TYPE_FLOAT) { return OLAP_SUCCESS; } CondColumn* cond_col = nullptr; auto it = _columns.find(index); if (it == _columns.end()) { - cond_col = new CondColumn(_table, index); + cond_col = new CondColumn(*_schema, index); _columns[index] = cond_col; } else { cond_col = it->second; } - return cond_col->add_cond(tcond, fi); + return cond_col->add_cond(tcond, column); } bool Conditions::delete_conditions_eval(const RowCursor& row) const { @@ -594,32 +595,31 @@ bool Conditions::delete_conditions_eval(const RowCursor& row) const { } VLOG(3) << "Row meets the delete conditions. " - << "condition_count=" << _columns.size() + << "condition_count=" << _columns.size() << ", row=" << row.to_string(); return true; } -bool Conditions::delta_pruning_filter( - const std::vector>& column_statistics) const { +bool Conditions::rowset_pruning_filter(const std::vector& zone_maps) const { //通过所有列上的删除条件对version进行过滤 for (auto& cond_it : _columns) { - if (cond_it.second->is_key() && cond_it.first > column_statistics.size()) { - OLAP_LOG_WARNING("where condition not equal column statistics size." - "[cond_id=%d, column_statistics_size=%lu]", - cond_it.first, - column_statistics.size()); + if (cond_it.second->is_key() && cond_it.first > zone_maps.size()) { + LOG(WARNING) << "where condition not equal zone maps size. " + << "cond_id=" << cond_it.first + << ", zone_map_size=" << zone_maps.size(); return false; } - if (cond_it.second->is_key() && !cond_it.second->eval(column_statistics[cond_it.first])) { + if (cond_it.second->is_key() && !cond_it.second->eval(zone_maps[cond_it.first])) { return true; } } return false; } -int Conditions::delete_pruning_filter( - const std::vector>& col_stat) const { - +int Conditions::delete_pruning_filter(const std::vector& zone_maps) const { + if (_columns.empty()) { + return DEL_NOT_SATISFIED; + } //通过所有列上的删除条件对version进行过滤 /* * the relationship between condcolumn A and B is A & B. @@ -635,16 +635,15 @@ int Conditions::delete_pruning_filter( * this is base on the assumption that the delete condition * is only about key field, not about value field. */ - if (cond_it.second->is_key() && cond_it.first > col_stat.size()) { - OLAP_LOG_WARNING("where condition not equal column statistics size." - "[cond_id=%d, column_statistics_size=%lu]", - cond_it.first, - col_stat.size()); + if (cond_it.second->is_key() && cond_it.first > zone_maps.size()) { + LOG(WARNING) << "where condition not equal column statistics size. " + << "cond_id=" << cond_it.first + << ", zone_map_size=" << zone_maps.size(); del_partial_satisfied = true; continue; } - int del_ret = cond_it.second->del_eval(col_stat[cond_it.first]); + int del_ret = cond_it.second->del_eval(zone_maps[cond_it.first]); if (DEL_SATISFIED == del_ret) { continue; } else if (DEL_PARTIAL_SATISFIED == del_ret) { @@ -655,8 +654,8 @@ int Conditions::delete_pruning_filter( } } - if (true == del_not_satisfied || 0 == _columns.size()) { - // if the size of condcolumn vector is zero, + if (del_not_satisfied) { + // if the size of condcolumn vector is zero, // the delete condtion is not satisfied. ret = DEL_NOT_SATISFIED; } else if (true == del_partial_satisfied) { diff --git a/be/src/olap/olap_cond.h b/be/src/olap/olap_cond.h index 8d923e9a0753b0..ebd5df54e21d83 100644 --- a/be/src/olap/olap_cond.h +++ b/be/src/olap/olap_cond.h @@ -24,11 +24,11 @@ #include #include +#include "gen_cpp/PaloInternalService_types.h" #include "gen_cpp/column_data_file.pb.h" #include "olap/bloom_filter.hpp" #include "olap/stream_index_common.h" #include "olap/field.h" -#include "olap/olap_table.h" #include "olap/row_cursor.h" namespace doris { @@ -67,17 +67,17 @@ struct Cond { Cond(); ~Cond(); - OLAPStatus init(const TCondition& tcond, const FieldInfo& fi); - + OLAPStatus init(const TCondition& tcond, const TabletColumn& column); + // 用一行数据的指定列同条件进行比较,如果符合过滤条件, // 即按照此条件,行应被过滤掉,则返回true,否则返回false bool eval(char* right) const; - - bool eval(const std::pair& statistic) const; - int del_eval(const std::pair& stat) const; + + bool eval(const KeyRange& statistic) const; + int del_eval(const KeyRange& stat) const; bool eval(const BloomFilter& bf) const; - + CondOp op; // valid when op is not OP_IN WrapperField* operand_field; @@ -89,16 +89,16 @@ struct Cond { // 所有归属于同一列上的条件二元组,聚合在一个CondColumn上 class CondColumn { public: - CondColumn(OLAPTablePtr table, int32_t index) : _col_index(index), _table(table) { + CondColumn(const TabletSchema& tablet_schema, int32_t index) : _col_index(index) { _conds.clear(); - _is_key = _table->tablet_schema()[_col_index].is_key; + _is_key = tablet_schema.column(_col_index).is_key(); } ~CondColumn(); // Convert condition's operand from string to Field*, and append this condition to _conds // return true if success, otherwise return false bool add_condition(Cond* condition); - OLAPStatus add_cond(const TCondition& tcond, const FieldInfo& fi); + OLAPStatus add_cond(const TCondition& tcond, const TabletColumn& column); // 对一行数据中的指定列,用所有过滤条件进行比较,如果所有条件都满足,则过滤此行 bool eval(const RowCursor& row) const; @@ -120,7 +120,6 @@ class CondColumn { bool _is_key; int32_t _col_index; std::vector _conds; - OLAPTablePtr _table; }; // 一次请求所关联的条件 @@ -139,11 +138,8 @@ class Conditions { _columns.clear(); } - void set_table(OLAPTablePtr table) { - long do_not_remove_me_until_you_want_a_heart_attacking = table.use_count(); - OLAP_UNUSED_ARG(do_not_remove_me_until_you_want_a_heart_attacking); - - _table = table; + void set_tablet_schema(const TabletSchema* schema) { + _schema = schema; } // 如果成功,则_columns中增加一项,如果失败则无视此condition,同时输出日志 @@ -154,17 +150,26 @@ class Conditions { bool delete_conditions_eval(const RowCursor& row) const; - bool delta_pruning_filter( - const std::vector>& column_statistics) const; - int delete_pruning_filter( - const std::vector>& column_statistics) const; + bool rowset_pruning_filter(const std::vector& zone_maps) const; + int delete_pruning_filter(const std::vector& zone_maps) const; const CondColumns& columns() const { return _columns; } private: - OLAPTablePtr _table; // ref to OLAPTable to access schema + int32_t _get_field_index(const std::string& field_name) const { + for (int i = 0; i < _schema->num_columns(); i++) { + if (_schema->column(i).name() == field_name) { + return i; + } + } + LOG(WARNING) << "invalid field name. [name='" << field_name << "']"; + return -1; + } + +private: + const TabletSchema* _schema; CondColumns _columns; // list of condition column }; diff --git a/be/src/olap/olap_define.h b/be/src/olap/olap_define.h index c8be0e9a78906d..c8b8c62ef7f829 100644 --- a/be/src/olap/olap_define.h +++ b/be/src/olap/olap_define.h @@ -51,6 +51,8 @@ static constexpr uint32_t OLAP_COMPACTION_DEFAULT_CANDIDATE_SIZE = 10; // the max length supported for string type static const uint16_t OLAP_STRING_MAX_LENGTH = 65535; +static const int32_t PREFERRED_SNAPSHOT_VERSION = 2; + // the max bytes for stored string length using StringOffsetType = uint32_t; using StringLengthType = uint16_t; @@ -159,7 +161,10 @@ enum OLAPStatus { OLAP_ERR_INVALID_CLUSTER_INFO = -225, OLAP_ERR_TRANSACTION_NOT_EXIST = -226, OLAP_ERR_DISK_FAILURE = -227, - OLAP_ERR_LZO_DISABLED = -228, + OLAP_ERR_TRANSACTION_ALREADY_COMMITTED = -228, + OLAP_ERR_TRANSACTION_ALREADY_VISIBLE = -229, + OLAP_ERR_VERSION_ALREADY_MERGED = -230, + OLAP_ERR_LZO_DISABLED = -231, // CommandExecutor // [-300, -400) @@ -171,21 +176,24 @@ enum OLAPStatus { OLAP_ERR_CE_TABLET_ID_EXIST = -305, OLAP_ERR_CE_TRY_CE_LOCK_ERROR = -306, - // OLAPTable + // Tablet // [-400, -500) OLAP_ERR_TABLE_VERSION_DUPLICATE_ERROR = -400, OLAP_ERR_TABLE_VERSION_INDEX_MISMATCH_ERROR = -401, OLAP_ERR_TABLE_INDEX_VALIDATE_ERROR = -402, OLAP_ERR_TABLE_INDEX_FIND_ERROR = -403, OLAP_ERR_TABLE_CREATE_FROM_HEADER_ERROR = -404, + OLAP_ERR_TABLE_CREATE_META_ERROR = -405, + OLAP_ERR_TABLE_ALREADY_DELETED_ERROR = -406, - // OLAPEngine + // StorageEngine // [-500, -600) OLAP_ERR_ENGINE_INSERT_EXISTS_TABLE = -500, OLAP_ERR_ENGINE_DROP_NOEXISTS_TABLE = -501, OLAP_ERR_ENGINE_LOAD_INDEX_TABLE_ERROR = -502, OLAP_ERR_TABLE_INSERT_DUPLICATION_ERROR = -503, OLAP_ERR_DELETE_VERSION_ERROR = -504, + OLAP_ERR_GC_SCAN_PATH_ERROR = -505, // FetchHandler // [-600, -700) @@ -203,7 +211,7 @@ enum OLAPStatus { // [-700, -800) OLAP_ERR_READER_IS_UNINITIALIZED = -700, OLAP_ERR_READER_GET_ITERATOR_ERROR = -701, - OLAP_ERR_READER_ACQUIRE_DATA_ERROR = -702, + OLAP_ERR_CAPTURE_ROWSET_READER_ERROR = -702, OLAP_ERR_READER_READING_ERROR = -703, // BaseCompaction @@ -212,7 +220,7 @@ enum OLAPStatus { OLAP_ERR_BE_REPLACE_VERSIONS_ERROR = -801, OLAP_ERR_BE_MERGE_ERROR = -802, OLAP_ERR_BE_COMPUTE_VERSION_HASH_ERROR = -803, - OLAP_ERR_BE_ACQUIRE_DATA_SOURCES_ERROR = -804, + OLAP_ERR_CAPTURE_ROWSET_ERROR = -804, OLAP_ERR_BE_SAVE_HEADER_ERROR = -805, OLAP_ERR_BE_INIT_OLAP_DATA = -806, OLAP_ERR_BE_TRY_OBTAIN_VERSION_LOCKS = -807, @@ -235,6 +243,10 @@ enum OLAPStatus { OLAP_ERR_PUSH_TABLE_NOT_EXIST = -909, OLAP_ERR_PUSH_INPUT_DATA_ERROR = -910, OLAP_ERR_PUSH_TRANSACTION_ALREADY_EXIST = -911, + // only support realtime push api, batch process is deprecated and is removed + OLAP_ERR_PUSH_BATCH_PROCESS_REMOVED = -912, + OLAP_ERR_PUSH_COMMIT_ROWSET = -913, + OLAP_ERR_PUSH_ROWSET_NOT_FOUND = -914, // SegmentGroup // [-1000, -1100) @@ -262,7 +274,7 @@ enum OLAPStatus { OLAP_ERR_ROWBLOCK_FIND_ROW_EXCEPTION = -1301, OLAP_ERR_ROWBLOCK_READ_INFO_ERROR = -1302, - // OLAPHeader + // TabletMeta // [-1400, -1500) OLAP_ERR_HEADER_ADD_VERSION = -1400, OLAP_ERR_HEADER_DELETE_VERSION = -1401, @@ -279,7 +291,7 @@ enum OLAPStatus { OLAP_ERR_HEADER_PB_PARSE_FAILED = -1412, OLAP_ERR_HEADER_HAS_PENDING_DATA = -1413, - // OLAPTableSchema + // TabletSchema // [-1500, -1600) OLAP_ERR_SCHEMA_SCHEMA_INVALID = -1500, OLAP_ERR_SCHEMA_SCHEMA_FIELD_INVALID = -1501, @@ -333,6 +345,18 @@ enum OLAPStatus { OLAP_ERR_META_ITERATOR = -3005, OLAP_ERR_META_DELETE = -3006, OLAP_ERR_META_ALREADY_EXIST = -3007, + + // Rowset + // [-3100, -3200) + OLAP_ERR_ROWSET_WRITER_INIT = -3100, + OLAP_ERR_ROWSET_SAVE_FAILED = -3101, + OLAP_ERR_ROWSET_GENERATE_ID_FAILED = -3102, + OLAP_ERR_ROWSET_DELETE_SEGMENT_GROUP_FILE_FAILED = -3103, + OLAP_ERR_ROWSET_BUILDER_INIT = -3104, + OLAP_ERR_ROWSET_TYPE_NOT_FOUND = -3105, + OLAP_ERR_ROWSET_ALREADY_EXIST = -3106, + OLAP_ERR_ROWSET_CREATE_READER = -3107, + OLAP_ERR_ROWSET_INVALID = -3108 }; enum ColumnFamilyIndex { @@ -348,17 +372,22 @@ static const char* const HINIS_KEY_GROUP_SEPARATOR = "&"; static const std::string DEFAULT_COLUMN_FAMILY = "default"; static const std::string DORIS_COLUMN_FAMILY = "doris"; static const std::string META_COLUMN_FAMILY = "meta"; -static const std::string IS_HEADER_CONVERTED = "is_header_converted"; +static const std::string END_ROWSET_ID = "end_rowset_id"; static const std::string CONVERTED_FLAG = "true"; +static const std::string TABLET_CONVERT_FINISHED = "tablet_convert_finished"; const std::string TABLET_ID_KEY = "tablet_id"; const std::string TABLET_SCHEMA_HASH_KEY = "schema_hash"; +const std::string TABLET_ID_PREFIX = "t_"; +const std::string ROWSET_ID_PREFIX = "s_"; +#ifndef RETURN_NOT_OK #define RETURN_NOT_OK(s) do { \ OLAPStatus _s = (s); \ if (_s != OLAP_SUCCESS) { \ return _s; \ } \ } while (0); +#endif // Declare copy constructor and equal operator as private #ifndef DISALLOW_COPY_AND_ASSIGN @@ -373,7 +402,7 @@ const std::string TABLET_SCHEMA_HASH_KEY = "schema_hash"; // thread-safe(gcc only) method for obtaining singleton #define DECLARE_SINGLETON(classname) \ public: \ - static classname *get_instance() { \ + static classname *instance() { \ classname *p_instance = NULL; \ try { \ static classname s_instance; \ diff --git a/be/src/olap/olap_engine.cpp b/be/src/olap/olap_engine.cpp deleted file mode 100644 index 656fd437bb3728..00000000000000 --- a/be/src/olap/olap_engine.cpp +++ /dev/null @@ -1,3174 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, -// software distributed under the License is distributed on an -// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -// KIND, either express or implied. See the License for the -// specific language governing permissions and limitations -// under the License. - -#include "olap/olap_engine.h" - -#include - -#include -#include -#include -#include -#include -#include - -#include -#include -#include -#include -#include -#include - -#include "olap/base_compaction.h" -#include "olap/cumulative_compaction.h" -#include "olap/lru_cache.h" -#include "olap/olap_header.h" -#include "olap/olap_header_manager.h" -#include "olap/push_handler.h" -#include "olap/reader.h" -#include "olap/schema_change.h" -#include "olap/store.h" -#include "olap/utils.h" -#include "olap/data_writer.h" -#include "util/time.h" -#include "util/doris_metrics.h" -#include "util/pretty_printer.h" - -using apache::thrift::ThriftDebugString; -using boost::filesystem::canonical; -using boost::filesystem::directory_iterator; -using boost::filesystem::path; -using boost::filesystem::recursive_directory_iterator; -using std::back_inserter; -using std::copy; -using std::inserter; -using std::list; -using std::map; -using std::nothrow; -using std::pair; -using std::priority_queue; -using std::set; -using std::set_difference; -using std::string; -using std::stringstream; -using std::vector; - -namespace doris { - -OLAPEngine* OLAPEngine::_s_instance = nullptr; -const std::string HTTP_REQUEST_PREFIX = "/api/_tablet/_download?"; -const std::string HTTP_REQUEST_TOKEN_PARAM = "token="; -const std::string HTTP_REQUEST_FILE_PARAM = "&file="; - -const uint32_t DOWNLOAD_FILE_MAX_RETRY = 3; -const uint32_t LIST_REMOTE_FILE_TIMEOUT = 15; - -bool _sort_table_by_create_time(const OLAPTablePtr& a, const OLAPTablePtr& b) { - return a->creation_time() < b->creation_time(); -} - -static Status _validate_options(const EngineOptions& options) { - if (options.store_paths.empty()) { - return Status::InternalError("store paths is empty");; - } - return Status::OK(); -} - -Status OLAPEngine::open(const EngineOptions& options, OLAPEngine** engine_ptr) { - RETURN_IF_ERROR(_validate_options(options)); - std::unique_ptr engine(new OLAPEngine(options)); - auto st = engine->open(); - if (st != OLAP_SUCCESS) { - LOG(WARNING) << "engine open failed, res=" << st; - return Status::InternalError("open engine failed"); - } - st = engine->_start_bg_worker(); - if (st != OLAP_SUCCESS) { - LOG(WARNING) << "engine start background failed, res=" << st; - return Status::InternalError("open engine failed"); - } - *engine_ptr = engine.release(); - return Status::OK(); -} - -OLAPEngine::OLAPEngine(const EngineOptions& options) - : _options(options), - _available_storage_medium_type_count(0), - _effective_cluster_id(-1), - _is_all_cluster_id_exist(true), - _is_drop_tables(false), - _global_table_id(0), - _index_stream_lru_cache(NULL), - _tablet_stat_cache_update_time_ms(0), - _snapshot_base_id(0), - _is_report_disk_state_already(false), - _is_report_olap_table_already(false) { - if (_s_instance == nullptr) { - _s_instance = this; - } -} - -OLAPEngine::~OLAPEngine() { - clear(); -} - -OLAPStatus OLAPEngine::_load_store(OlapStore* store) { - std::string store_path = store->path(); - LOG(INFO) <<"start to load tablets from store_path:" << store_path; - - bool is_header_converted = false; - OLAPStatus res = OlapHeaderManager::get_header_converted(store, is_header_converted); - if (res != OLAP_SUCCESS) { - LOG(WARNING) << "get convert flag from meta failed"; - return res; - } - if (is_header_converted) { - LOG(INFO) << "load header from meta"; - OLAPStatus s = store->load_tables(this); - LOG(INFO) << "load header from meta finished"; - if (s != OLAP_SUCCESS) { - LOG(WARNING) << "there is failure when loading table headers, path:" << store_path; - return s; - } else { - return OLAP_SUCCESS; - } - } - - // compatible for old header load method - // walk all directory to load header file - LOG(INFO) << "load headers from header files"; - - // get all shards - set shards; - if (dir_walk(store_path + DATA_PREFIX, &shards, NULL) != OLAP_SUCCESS) { - LOG(WARNING) << "fail to walk dir. [root=" << store_path << "]"; - return OLAP_ERR_INIT_FAILED; - } - - for (const auto& shard : shards) { - // get all tablets - set tablets; - string one_shard_path = store_path + DATA_PREFIX + '/' + shard; - if (dir_walk(one_shard_path, &tablets, NULL) != OLAP_SUCCESS) { - LOG(WARNING) << "fail to walk dir. [root=" << one_shard_path << "]"; - continue; - } - - for (const auto& tablet : tablets) { - // 遍历table目录寻找此table的所有indexedRollupTable,注意不是SegmentGroup,而是OLAPTable - set schema_hashes; - string one_tablet_path = one_shard_path + '/' + tablet; - if (dir_walk(one_tablet_path, &schema_hashes, NULL) != OLAP_SUCCESS) { - LOG(WARNING) << "fail to walk dir. [root=" << one_tablet_path << "]"; - continue; - } - - for (const auto& schema_hash : schema_hashes) { - TTabletId tablet_id = strtoul(tablet.c_str(), NULL, 10); - TSchemaHash tablet_schema_hash = strtoul(schema_hash.c_str(), NULL, 10); - - // 遍历schema_hash目录寻找此index的所有schema - // 加载失败依然加载下一个Table - if (load_one_tablet( - store, - tablet_id, - tablet_schema_hash, - one_tablet_path + '/' + schema_hash) != OLAP_SUCCESS) { - OLAP_LOG_WARNING("fail to load one table, but continue. [path='%s']", - (one_tablet_path + '/' + schema_hash).c_str()); - } - } - } - } - res = OlapHeaderManager::set_converted_flag(store); - LOG(INFO) << "load header from header files finished"; - return res; -} - -OLAPStatus OLAPEngine::load_one_tablet( - OlapStore* store, TTabletId tablet_id, SchemaHash schema_hash, - const string& schema_hash_path, bool force) { - stringstream header_name_stream; - header_name_stream << schema_hash_path << "/" << tablet_id << ".hdr"; - string header_path = header_name_stream.str(); - path boost_schema_hash_path(schema_hash_path); - - if (access(header_path.c_str(), F_OK) != 0) { - LOG(WARNING) << "fail to find header file. [header_path=" << header_path << "]"; - move_to_trash(boost_schema_hash_path, boost_schema_hash_path); - return OLAP_ERR_FILE_NOT_EXIST; - } - - auto olap_table = OLAPTable::create_from_header_file( - tablet_id, schema_hash, header_path, store); - if (olap_table == NULL) { - LOG(WARNING) << "fail to load table. [header_path=" << header_path << "]"; - move_to_trash(boost_schema_hash_path, boost_schema_hash_path); - return OLAP_ERR_ENGINE_LOAD_INDEX_TABLE_ERROR; - } - - if (olap_table->lastest_version() == NULL && !olap_table->is_schema_changing()) { - OLAP_LOG_WARNING("tablet not in schema change state without delta is invalid. " - "[header_path=%s]", - header_path.c_str()); - move_to_trash(boost_schema_hash_path, boost_schema_hash_path); - return OLAP_ERR_ENGINE_LOAD_INDEX_TABLE_ERROR; - } - - // 这里不需要SAFE_DELETE(olap_table),因为olap_table指针已经在add_table中托管到smart pointer中 - OLAPStatus res = OLAP_SUCCESS; - string table_name = olap_table->full_name(); - res = add_table(tablet_id, schema_hash, olap_table, force); - if (res != OLAP_SUCCESS) { - // 插入已经存在的table时返回成功 - if (res == OLAP_ERR_ENGINE_INSERT_EXISTS_TABLE) { - return OLAP_SUCCESS; - } - - LOG(WARNING) << "failed to add table. [table=" << table_name << "]"; - return OLAP_ERR_ENGINE_LOAD_INDEX_TABLE_ERROR; - } - - if (register_table_into_root_path(olap_table.get()) != OLAP_SUCCESS) { - OLAP_LOG_WARNING("fail to register table into root path. [root_path=%s]", - schema_hash_path.c_str()); - - if (OLAPEngine::get_instance()->drop_table(tablet_id, schema_hash) != OLAP_SUCCESS) { - OLAP_LOG_WARNING("fail to drop table when create table failed. " - "[tablet=%ld schema_hash=%d]", - tablet_id, schema_hash); - } - - return OLAP_ERR_ENGINE_LOAD_INDEX_TABLE_ERROR; - } - - // load pending data (for realtime push), will add transaction relationship into engine - olap_table->load_pending_data(); - - VLOG(3) << "succeed to add table. tablet=" << olap_table->full_name() - << ", path=" << schema_hash_path; - return OLAP_SUCCESS; -} - -void OLAPEngine::check_none_row_oriented_table(const std::vector& stores) { - for (auto store : stores) { - auto res = _check_none_row_oriented_table_in_store(store); - if (res != OLAP_SUCCESS) { - LOG(WARNING) << "io error when init load tables. res=" << res - << ", store=" << store->path(); - } - } -} - -OLAPStatus OLAPEngine::_check_none_row_oriented_table_in_store(OlapStore* store) { - std::string store_path = store->path(); - LOG(INFO) <<"start to load tablets from store_path:" << store_path; - - bool is_header_converted = false; - OLAPStatus res = OlapHeaderManager::get_header_converted(store, is_header_converted); - if (res != OLAP_SUCCESS) { - LOG(WARNING) << "get convert flag from meta failed"; - return res; - } - if (is_header_converted) { - OLAPStatus s = store->check_none_row_oriented_table_in_store(this); - if (s != OLAP_SUCCESS) { - LOG(WARNING) << "there is failure when loading table headers, path:" << store_path; - return s; - } else { - return OLAP_SUCCESS; - } - } - - // compatible for old header load method - // walk all directory to load header file - LOG(INFO) << "check has none row-oriented table from header files"; - - // get all shards - set shards; - if (dir_walk(store_path + DATA_PREFIX, &shards, NULL) != OLAP_SUCCESS) { - LOG(WARNING) << "fail to walk dir. [root=" << store_path << "]"; - return OLAP_ERR_INIT_FAILED; - } - - for (const auto& shard : shards) { - // get all tablets - set tablets; - string one_shard_path = store_path + DATA_PREFIX + '/' + shard; - if (dir_walk(one_shard_path, &tablets, NULL) != OLAP_SUCCESS) { - LOG(WARNING) << "fail to walk dir. [root=" << one_shard_path << "]"; - continue; - } - - for (const auto& tablet : tablets) { - // 遍历table目录寻找此table的所有indexedRollupTable,注意不是SegmentGroup,而是OLAPTable - set schema_hashes; - string one_tablet_path = one_shard_path + '/' + tablet; - if (dir_walk(one_tablet_path, &schema_hashes, NULL) != OLAP_SUCCESS) { - LOG(WARNING) << "fail to walk dir. [root=" << one_tablet_path << "]"; - continue; - } - - for (const auto& schema_hash : schema_hashes) { - TTabletId tablet_id = strtoul(tablet.c_str(), NULL, 10); - TSchemaHash tablet_schema_hash = strtoul(schema_hash.c_str(), NULL, 10); - - // 遍历schema_hash目录寻找此index的所有schema - // 加载失败依然加载下一个Table - if (check_none_row_oriented_table_in_path( - store, - tablet_id, - tablet_schema_hash, - one_tablet_path + '/' + schema_hash) != OLAP_SUCCESS) { - OLAP_LOG_WARNING("fail to load one table, but continue. [path='%s']", - (one_tablet_path + '/' + schema_hash).c_str()); - } - } - } - } - return res; -} - -OLAPStatus OLAPEngine::check_none_row_oriented_table_in_path( - OlapStore* store, TTabletId tablet_id, - SchemaHash schema_hash, const string& schema_hash_path) { - stringstream header_name_stream; - header_name_stream << schema_hash_path << "/" << tablet_id << ".hdr"; - string header_path = header_name_stream.str(); - path boost_schema_hash_path(schema_hash_path); - - if (access(header_path.c_str(), F_OK) != 0) { - LOG(WARNING) << "fail to find header file. [header_path=" << header_path << "]"; - move_to_trash(boost_schema_hash_path, boost_schema_hash_path); - return OLAP_ERR_FILE_NOT_EXIST; - } - - auto olap_table = OLAPTable::create_from_header_file_for_check( - tablet_id, schema_hash, header_path); - if (olap_table == NULL) { - LOG(WARNING) << "fail to load table. [header_path=" << header_path << "]"; - move_to_trash(boost_schema_hash_path, boost_schema_hash_path); - return OLAP_ERR_ENGINE_LOAD_INDEX_TABLE_ERROR; - } - - LOG(INFO) << "data_file_type:" << olap_table->data_file_type(); - if (olap_table->data_file_type() == OLAP_DATA_FILE) { - LOG(FATAL) << "Not support row-oriented table any more. Please convert it to column-oriented table." - << "tablet=" << olap_table->full_name(); - } - - return OLAP_SUCCESS; -} - -void OLAPEngine::load_stores(const std::vector& stores) { - std::vector threads; - for (auto store : stores) { - threads.emplace_back([this, store] { - auto res = _load_store(store); - if (res != OLAP_SUCCESS) { - LOG(WARNING) << "io error when init load tables. res=" << res - << ", store=" << store->path(); - } - }); - } - for (auto& thread : threads) { - thread.join(); - } -} - -OLAPStatus OLAPEngine::open() { - // init store_map - for (auto& path : _options.store_paths) { - OlapStore* store = new OlapStore(path.path, path.capacity_bytes); - auto st = store->load(); - if (!st.ok()) { - LOG(WARNING) << "Store load failed, path=" << path.path; - return OLAP_ERR_INVALID_ROOT_PATH; - } - _store_map.emplace(path.path, store); - } - _effective_cluster_id = config::cluster_id; - auto res = check_all_root_path_cluster_id(); - if (res != OLAP_SUCCESS) { - LOG(WARNING) << "fail to check cluster info. res=" << res; - return res; - } - - _update_storage_medium_type_count(); - - auto cache = new_lru_cache(config::file_descriptor_cache_capacity); - if (cache == nullptr) { - OLAP_LOG_WARNING("failed to init file descriptor LRUCache"); - _tablet_map.clear(); - return OLAP_ERR_INIT_FAILED; - } - FileHandler::set_fd_cache(cache); - - // 初始化LRUCache - // cache大小可通过配置文件配置 - _index_stream_lru_cache = new_lru_cache(config::index_stream_cache_capacity); - if (_index_stream_lru_cache == NULL) { - OLAP_LOG_WARNING("failed to init index stream LRUCache"); - _tablet_map.clear(); - return OLAP_ERR_INIT_FAILED; - } - - // 初始化CE调度器 - int32_t cumulative_compaction_num_threads = config::cumulative_compaction_num_threads; - int32_t base_compaction_num_threads = config::base_compaction_num_threads; - uint32_t file_system_num = get_file_system_count(); - _max_cumulative_compaction_task_per_disk = (cumulative_compaction_num_threads + file_system_num - 1) / file_system_num; - _max_base_compaction_task_per_disk = (base_compaction_num_threads + file_system_num - 1) / file_system_num; - - auto stores = get_stores(); - check_none_row_oriented_table(stores); - load_stores(stores); - // 取消未完成的SchemaChange任务 - _cancel_unfinished_schema_change(); - - return OLAP_SUCCESS; -} - -void OLAPEngine::_update_storage_medium_type_count() { - set available_storage_medium_types; - - std::lock_guard l(_store_lock); - for (auto& it : _store_map) { - if (it.second->is_used()) { - available_storage_medium_types.insert(it.second->storage_medium()); - } - } - - _available_storage_medium_type_count = available_storage_medium_types.size(); -} - - -OLAPStatus OLAPEngine::_judge_and_update_effective_cluster_id(int32_t cluster_id) { - OLAPStatus res = OLAP_SUCCESS; - - if (cluster_id == -1 && _effective_cluster_id == -1) { - // maybe this is a new cluster, cluster id will get from heartbeate - return res; - } else if (cluster_id != -1 && _effective_cluster_id == -1) { - _effective_cluster_id = cluster_id; - } else if (cluster_id == -1 && _effective_cluster_id != -1) { - // _effective_cluster_id is the right effective cluster id - return res; - } else { - if (cluster_id != _effective_cluster_id) { - OLAP_LOG_WARNING("multiple cluster ids is not equal. [id1=%d id2=%d]", - _effective_cluster_id, cluster_id); - return OLAP_ERR_INVALID_CLUSTER_INFO; - } - } - - return res; -} - -void OLAPEngine::set_store_used_flag(const string& path, bool is_used) { - std::lock_guard l(_store_lock); - auto it = _store_map.find(path); - if (it == _store_map.end()) { - LOG(WARNING) << "store not exist, path=" << path; - } - - it->second->set_is_used(is_used); - _update_storage_medium_type_count(); -} - -void OLAPEngine::get_all_available_root_path(std::vector* available_paths) { - available_paths->clear(); - std::lock_guard l(_store_lock); - for (auto& it : _store_map) { - if (it.second->is_used()) { - available_paths->push_back(it.first); - } - } -} - -template -std::vector OLAPEngine::get_stores() { - std::vector stores; - stores.reserve(_store_map.size()); - - std::lock_guard l(_store_lock); - if (include_unused) { - for (auto& it : _store_map) { - stores.push_back(it.second); - } - } else { - for (auto& it : _store_map) { - if (it.second->is_used()) { - stores.push_back(it.second); - } - } - } - return stores; -} - -template std::vector OLAPEngine::get_stores(); -template std::vector OLAPEngine::get_stores(); - -OLAPStatus OLAPEngine::get_all_root_path_info(vector* root_paths_info) { - OLAPStatus res = OLAP_SUCCESS; - root_paths_info->clear(); - - MonotonicStopWatch timer; - timer.start(); - int tablet_counter = 0; - - // get all root path info and construct a path map. - // path -> RootPathInfo - std::map path_map; - { - std::lock_guard l(_store_lock); - for (auto& it : _store_map) { - std::string path = it.first; - path_map.emplace(path, it.second->to_root_path_info()); - // if this path is not used, init it's info - if (!path_map[path].is_used) { - path_map[path].capacity = 1; - path_map[path].data_used_capacity = 0; - path_map[path].available = 0; - path_map[path].storage_medium = TStorageMedium::HDD; - } else { - path_map[path].storage_medium = it.second->storage_medium(); - } - } - } - - // for each tablet, get it's data size, and accumulate the path 'data_used_capacity' - // which the tablet belongs to. - _tablet_map_lock.rdlock(); - for (auto& entry : _tablet_map) { - TableInstances& instance = entry.second; - for (auto& tablet : instance.table_arr) { - ++tablet_counter; - int64_t data_size = tablet->get_data_size(); - auto find = path_map.find(tablet->storage_root_path_name()); - if (find == path_map.end()) { - continue; - } - if (find->second.is_used) { - find->second.data_used_capacity += data_size; - } - } - } - _tablet_map_lock.unlock(); - - // add path info to root_paths_info - for (auto& entry : path_map) { - root_paths_info->emplace_back(entry.second); - } - - // get available capacity of each path - for (auto& info: *root_paths_info) { - if (info.is_used) { - _get_path_available_capacity(info.path, &info.available); - } - } - timer.stop(); - LOG(INFO) << "get root path info cost: " << timer.elapsed_time() / 1000000 - << " ms. tablet counter: " << tablet_counter; - - return res; -} - -OLAPStatus OLAPEngine::register_table_into_root_path(OLAPTable* olap_table) { - return olap_table->store()->register_table(olap_table); -} - -void OLAPEngine::start_disk_stat_monitor() { - for (auto& it : _store_map) { - it.second->health_check(); - } - _update_storage_medium_type_count(); - _delete_tables_on_unused_root_path(); - - // if drop tables - // notify disk_state_worker_thread and olap_table_worker_thread until they received - if (_is_drop_tables) { - report_notify(true); - - bool is_report_disk_state_expected = true; - bool is_report_olap_table_expected = true; - bool is_report_disk_state_exchanged = - _is_report_disk_state_already.compare_exchange_strong(is_report_disk_state_expected, false); - bool is_report_olap_table_exchanged = - _is_report_olap_table_already.compare_exchange_strong(is_report_olap_table_expected, false); - if (is_report_disk_state_exchanged && is_report_olap_table_exchanged) { - _is_drop_tables = false; - } - } -} - -bool OLAPEngine::_used_disk_not_enough(uint32_t unused_num, uint32_t total_num) { - return ((total_num == 0) || (unused_num * 100 / total_num > _min_percentage_of_error_disk)); -} - -OLAPStatus OLAPEngine::check_all_root_path_cluster_id() { - int32_t cluster_id = -1; - for (auto& it : _store_map) { - int32_t tmp_cluster_id = it.second->cluster_id(); - if (tmp_cluster_id == -1) { - _is_all_cluster_id_exist = false; - } else if (tmp_cluster_id == cluster_id) { - // both hava right cluster id, do nothing - } else if (cluster_id == -1) { - cluster_id = tmp_cluster_id; - } else { - LOG(WARNING) << "multiple cluster ids is not equal. one=" << cluster_id - << ", other=" << tmp_cluster_id; - return OLAP_ERR_INVALID_CLUSTER_INFO; - } - } - - // judge and get effective cluster id - OLAPStatus res = OLAP_SUCCESS; - res = _judge_and_update_effective_cluster_id(cluster_id); - if (res != OLAP_SUCCESS) { - OLAP_LOG_WARNING("fail to judge and update effective cluster id. [res=%d]", res); - return res; - } - - // write cluster id into cluster_id_path if get effective cluster id success - if (_effective_cluster_id != -1 && !_is_all_cluster_id_exist) { - set_cluster_id(_effective_cluster_id); - } - - return res; -} - -Status OLAPEngine::set_cluster_id(int32_t cluster_id) { - std::lock_guard l(_store_lock); - for (auto& it : _store_map) { - RETURN_IF_ERROR(it.second->set_cluster_id(cluster_id)); - } - _effective_cluster_id = cluster_id; - _is_all_cluster_id_exist = true; - return Status::OK(); -} - -std::vector OLAPEngine::get_stores_for_create_table( - TStorageMedium::type storage_medium) { - std::vector stores; - { - std::lock_guard l(_store_lock); - for (auto& it : _store_map) { - if (it.second->is_used()) { - if (_available_storage_medium_type_count == 1 - || it.second->storage_medium() == storage_medium) { - stores.push_back(it.second); - } - } - } - } - - std::random_device rd; - srand(rd()); - std::random_shuffle(stores.begin(), stores.end()); - return stores; -} - -OlapStore* OLAPEngine::get_store(const std::string& path) { - std::lock_guard l(_store_lock); - auto it = _store_map.find(path); - if (it == std::end(_store_map)) { - return nullptr; - } - return it->second; -} - -OlapStore* OLAPEngine::get_store(int64_t path_hash) { - std::lock_guard l(_store_lock); - for (auto& it : _store_map) { - if (it.second->is_used()) { - if (it.second->path_hash() == path_hash) { - return it.second; - } - } - } - return nullptr; -} - -void OLAPEngine::_delete_tables_on_unused_root_path() { - vector tablet_info_vec; - uint32_t unused_root_path_num = 0; - uint32_t total_root_path_num = 0; - - std::lock_guard l(_store_lock); - - for (auto& it : _store_map) { - total_root_path_num++; - if (it.second->is_used()) { - continue; - } - for (auto& tablet : it.second->_tablet_set) { - tablet_info_vec.push_back(tablet); - } - it.second->_tablet_set.clear(); - } - - if (_used_disk_not_enough(unused_root_path_num, total_root_path_num)) { - LOG(FATAL) << "engine stop running, because more than " << _min_percentage_of_error_disk - << " disks error. total_disks=" << total_root_path_num - << ", error_disks=" << unused_root_path_num; - exit(0); - } - - if (!tablet_info_vec.empty()) { - _is_drop_tables = true; - } - - OLAPEngine::get_instance()->drop_tables_on_error_root_path(tablet_info_vec); -} - -OLAPStatus OLAPEngine::_get_path_available_capacity( - const string& root_path, - int64_t* disk_available) { - OLAPStatus res = OLAP_SUCCESS; - - try { - boost::filesystem::path path_name(root_path); - boost::filesystem::space_info path_info = boost::filesystem::space(path_name); - *disk_available = path_info.available; - } catch (boost::filesystem::filesystem_error& e) { - LOG(WARNING) << "get space info failed. path: " << root_path << " erro:" << e.what(); - return OLAP_ERR_STL_ERROR; - } - - return res; -} - -OLAPStatus OLAPEngine::clear() { - // 删除lru中所有内容,其实进程退出这么做本身意义不大,但对单测和更容易发现问题还是有很大意义的 - delete FileHandler::get_fd_cache(); - FileHandler::set_fd_cache(nullptr); - SAFE_DELETE(_index_stream_lru_cache); - - _tablet_map.clear(); - _transaction_tablet_map.clear(); - _global_table_id = 0; - - return OLAP_SUCCESS; -} - -OLAPTablePtr OLAPEngine::_get_table_with_no_lock(TTabletId tablet_id, SchemaHash schema_hash) { - VLOG(3) << "begin to get olap table. tablet_id=" << tablet_id; - tablet_map_t::iterator it = _tablet_map.find(tablet_id); - if (it != _tablet_map.end()) { - for (OLAPTablePtr table : it->second.table_arr) { - if (table->equal(tablet_id, schema_hash)) { - VLOG(3) << "get olap table success. tablet_id=" << tablet_id; - return table; - } - } - } - - VLOG(3) << "fail to get olap table. tablet_id=" << tablet_id; - // Return empty olap_table if fail - OLAPTablePtr olap_table; - return olap_table; -} - -OLAPTablePtr OLAPEngine::get_table(TTabletId tablet_id, SchemaHash schema_hash, bool load_table, std::string* err) { - _tablet_map_lock.rdlock(); - OLAPTablePtr olap_table; - olap_table = _get_table_with_no_lock(tablet_id, schema_hash); - _tablet_map_lock.unlock(); - - if (olap_table.get() != NULL) { - if (!olap_table->is_used()) { - OLAP_LOG_WARNING("olap table cannot be used. [table=%ld]", tablet_id); - if (err != nullptr) { *err = "tablet cannot be used"; } - olap_table.reset(); - } else if (load_table && !olap_table->is_loaded()) { - OLAPStatus ost = olap_table->load(); - if (ost != OLAP_SUCCESS) { - OLAP_LOG_WARNING("fail to load olap table. [table=%ld]", tablet_id); - if (err != nullptr) { *err = "load tablet failed"; } - olap_table.reset(); - } - } - } else if (err != nullptr) { - *err = "tablet does not exist"; - } - - return olap_table; -} - -OLAPStatus OLAPEngine::get_tables_by_id( - TTabletId tablet_id, - list* table_list) { - OLAPStatus res = OLAP_SUCCESS; - VLOG(3) << "begin to get tables by id. tablet_id=" << tablet_id; - - _tablet_map_lock.rdlock(); - tablet_map_t::iterator it = _tablet_map.find(tablet_id); - if (it != _tablet_map.end()) { - for (OLAPTablePtr olap_table : it->second.table_arr) { - table_list->push_back(olap_table); - } - } - _tablet_map_lock.unlock(); - - if (table_list->size() == 0) { - OLAP_LOG_WARNING("there is no tablet with specified id. [table=%ld]", tablet_id); - return OLAP_ERR_TABLE_NOT_FOUND; - } - - for (std::list::iterator it = table_list->begin(); - it != table_list->end();) { - if (!(*it)->is_loaded()) { - if ((*it)->load() != OLAP_SUCCESS) { - OLAP_LOG_WARNING("fail to load table. [table='%s']", - (*it)->full_name().c_str()); - it = table_list->erase(it); - continue; - } - } else if ((*it)->is_used()) { - LOG(WARNING) << "table is bad: " << (*it)->full_name().c_str(); - it = table_list->erase(it); - continue; - } - ++it; - } - - VLOG(3) << "success to get tables by id. table_num=" << table_list->size(); - return res; -} - -bool OLAPEngine::check_tablet_id_exist(TTabletId tablet_id) { - bool is_exist = false; - _tablet_map_lock.rdlock(); - - tablet_map_t::iterator it = _tablet_map.find(tablet_id); - if (it != _tablet_map.end() && it->second.table_arr.size() != 0) { - is_exist = true; - } - - _tablet_map_lock.unlock(); - return is_exist; -} - -OLAPStatus OLAPEngine::add_table(TTabletId tablet_id, SchemaHash schema_hash, - const OLAPTablePtr& table, bool force) { - OLAPStatus res = OLAP_SUCCESS; - VLOG(3) << "begin to add olap table to OLAPEngine. " - << "tablet_id=" << tablet_id << ", schema_hash=" << schema_hash - << ", force=" << force; - _tablet_map_lock.wrlock(); - - table->set_id(_global_table_id++); - - OLAPTablePtr table_item; - for (OLAPTablePtr item : _tablet_map[tablet_id].table_arr) { - if (item->equal(tablet_id, schema_hash)) { - table_item = item; - break; - } - } - - if (table_item.get() == NULL) { - _tablet_map[tablet_id].table_arr.push_back(table); - _tablet_map[tablet_id].table_arr.sort(_sort_table_by_create_time); - _tablet_map_lock.unlock(); - - return res; - } - _tablet_map_lock.unlock(); - - if (!force) { - if (table_item->tablet_path() == table->tablet_path()) { - LOG(WARNING) << "add the same tablet twice! tablet_id=" - << tablet_id << " schema_hash=" << tablet_id; - return OLAP_ERR_ENGINE_INSERT_EXISTS_TABLE; - } - } - - table_item->obtain_header_rdlock(); - int64_t old_time = table_item->lastest_version()->creation_time(); - int64_t new_time = table->lastest_version()->creation_time(); - int32_t old_version = table_item->lastest_version()->end_version(); - int32_t new_version = table->lastest_version()->end_version(); - table_item->release_header_lock(); - - /* - * In restore process, we replace all origin files in tablet dir with - * the downloaded snapshot files. Than we try to reload tablet header. - * force == true means we forcibly replace the OLAPTable in _tablet_map - * with the new one. But if we do so, the files in the tablet dir will be - * dropped when the origin OLAPTable deconstruct. - * So we set keep_files == true to not delete files when the - * origin OLAPTable deconstruct. - */ - bool keep_files = force ? true : false; - if (force || (new_version > old_version - || (new_version == old_version && new_time > old_time))) { - drop_table(tablet_id, schema_hash, keep_files); - _tablet_map_lock.wrlock(); - _tablet_map[tablet_id].table_arr.push_back(table); - _tablet_map[tablet_id].table_arr.sort(_sort_table_by_create_time); - _tablet_map_lock.unlock(); - } else { - table->mark_dropped(); - res = OLAP_ERR_ENGINE_INSERT_EXISTS_TABLE; - } - LOG(WARNING) << "add duplicated table. force=" << force << ", res=" << res - << ", tablet_id=" << tablet_id << ", schema_hash=" << schema_hash - << ", old_version=" << old_version << ", new_version=" << new_version - << ", old_time=" << old_time << ", new_time=" << new_time - << ", old_tablet_path=" << table_item->tablet_path() - << ", new_tablet_path=" << table->tablet_path(); - - return res; -} - -OLAPStatus OLAPEngine::add_transaction( - TPartitionId partition_id, TTransactionId transaction_id, - TTabletId tablet_id, SchemaHash schema_hash, const PUniqueId& load_id) { - - pair key(partition_id, transaction_id); - TabletInfo tablet_info(tablet_id, schema_hash); - WriteLock wrlock(&_transaction_tablet_map_lock); - auto it = _transaction_tablet_map.find(key); - if (it != _transaction_tablet_map.end()) { - auto load_info = it->second.find(tablet_info); - if (load_info != it->second.end()) { - for (PUniqueId& pid : load_info->second) { - if (pid.hi() == load_id.hi() && pid.lo() == load_id.lo()) { - LOG(WARNING) << "find transaction exists when add to engine." - << "partition_id: " << key.first - << ", transaction_id: " << key.second - << ", table: " << tablet_info.to_string(); - return OLAP_ERR_PUSH_TRANSACTION_ALREADY_EXIST; - } - } - } - } - - _transaction_tablet_map[key][tablet_info].push_back(load_id); - VLOG(3) << "add transaction to engine successfully." - << "partition_id: " << key.first - << ", transaction_id: " << key.second - << ", table: " << tablet_info.to_string(); - return OLAP_SUCCESS; -} - -void OLAPEngine::delete_transaction( - TPartitionId partition_id, TTransactionId transaction_id, - TTabletId tablet_id, SchemaHash schema_hash, bool delete_from_tablet) { - - pair key(partition_id, transaction_id); - TabletInfo tablet_info(tablet_id, schema_hash); - WriteLock wrlock(&_transaction_tablet_map_lock); - - auto it = _transaction_tablet_map.find(key); - if (it != _transaction_tablet_map.end()) { - VLOG(3) << "delete transaction to engine successfully." - << ",partition_id: " << key.first - << ", transaction_id: " << key.second - << ", table: " << tablet_info.to_string(); - it->second.erase(tablet_info); - if (it->second.empty()) { - _transaction_tablet_map.erase(it); - } - - // delete transaction from tablet - if (delete_from_tablet) { - OLAPTablePtr tablet = get_table(tablet_info.tablet_id, tablet_info.schema_hash); - if (tablet.get() != nullptr) { - tablet->delete_pending_data(transaction_id); - } - } - } -} - -void OLAPEngine::get_transactions_by_tablet(OLAPTablePtr tablet, int64_t* partition_id, - set* transaction_ids) { - if (tablet.get() == nullptr || partition_id == nullptr || transaction_ids == nullptr) { - OLAP_LOG_WARNING("parameter is null when get transactions by tablet"); - return; - } - - TabletInfo tablet_info(tablet->tablet_id(), tablet->schema_hash()); - ReadLock rdlock(&_transaction_tablet_map_lock); - for (auto& it : _transaction_tablet_map) { - if (it.second.find(tablet_info) != it.second.end()) { - *partition_id = it.first.first; - transaction_ids->insert(it.first.second); - VLOG(3) << "find transaction on tablet." - << "partition_id: " << it.first.first - << ", transaction_id: " << it.first.second - << ", table: " << tablet_info.to_string(); - } - } -} - -bool OLAPEngine::has_transaction(TPartitionId partition_id, TTransactionId transaction_id, - TTabletId tablet_id, SchemaHash schema_hash) { - pair key(partition_id, transaction_id); - TabletInfo tablet_info(tablet_id, schema_hash); - - _transaction_tablet_map_lock.rdlock(); - auto it = _transaction_tablet_map.find(key); - bool found = it != _transaction_tablet_map.end() - && it->second.find(tablet_info) != it->second.end(); - _transaction_tablet_map_lock.unlock(); - - return found; -} - -OLAPStatus OLAPEngine::publish_version(const TPublishVersionRequest& publish_version_req, - vector* error_tablet_ids) { - LOG(INFO) << "begin to process publish version. transaction_id=" - << publish_version_req.transaction_id; - - int64_t transaction_id = publish_version_req.transaction_id; - OLAPStatus res = OLAP_SUCCESS; - - // each partition - for (const TPartitionVersionInfo& partitionVersionInfo - : publish_version_req.partition_version_infos) { - - int64_t partition_id = partitionVersionInfo.partition_id; - pair key(partition_id, transaction_id); - - _transaction_tablet_map_lock.rdlock(); - auto it = _transaction_tablet_map.find(key); - if (it == _transaction_tablet_map.end()) { - OLAP_LOG_WARNING("no tablet to publish version. [partition_id=%ld transaction_id=%ld]", - partition_id, transaction_id); - _transaction_tablet_map_lock.unlock(); - continue; - } - std::map> load_info_map = it->second; - _transaction_tablet_map_lock.unlock(); - - Version version(partitionVersionInfo.version, partitionVersionInfo.version); - VersionHash version_hash = partitionVersionInfo.version_hash; - - // each tablet - for (auto& load_info : load_info_map) { - const TabletInfo& tablet_info = load_info.first; - VLOG(3) << "begin to publish version on tablet. " - << "tablet_id=" << tablet_info.tablet_id - << ", schema_hash=" << tablet_info.schema_hash - << ", version=" << version.first - << ", version_hash=" << version_hash - << ", transaction_id=" << transaction_id; - OLAPTablePtr tablet = get_table(tablet_info.tablet_id, tablet_info.schema_hash); - - if (tablet.get() == NULL) { - OLAP_LOG_WARNING("can't get table when publish version. [tablet_id=%ld schema_hash=%d]", - tablet_info.tablet_id, tablet_info.schema_hash); - error_tablet_ids->push_back(tablet_info.tablet_id); - res = OLAP_ERR_PUSH_TABLE_NOT_EXIST; - continue; - } - - - // publish version - OLAPStatus publish_status = tablet->publish_version( - transaction_id, version, version_hash); - - // if data existed, delete transaction from engine and tablet - if (publish_status == OLAP_ERR_PUSH_VERSION_ALREADY_EXIST) { - OLAP_LOG_WARNING("can't publish version on tablet since data existed. " - "[table=%s transaction_id=%ld version=%d]", - tablet->full_name().c_str(), transaction_id, version.first); - delete_transaction(partition_id, transaction_id, - tablet->tablet_id(), tablet->schema_hash()); - - // if publish successfully, delete transaction from engine - } else if (publish_status == OLAP_SUCCESS) { - LOG(INFO) << "publish version successfully on tablet. tablet=" << tablet->full_name() - << ", transaction_id=" << transaction_id << ", version=" << version.first; - _transaction_tablet_map_lock.wrlock(); - auto it2 = _transaction_tablet_map.find(key); - if (it2 != _transaction_tablet_map.end()) { - VLOG(3) << "delete transaction from engine. table=" << tablet->full_name() - << "transaction_id: " << transaction_id; - it2->second.erase(tablet_info); - if (it2->second.empty()) { - _transaction_tablet_map.erase(it2); - } - } - _transaction_tablet_map_lock.unlock(); - - } else { - OLAP_LOG_WARNING("fail to publish version on tablet. " - "[table=%s transaction_id=%ld version=%d res=%d]", - tablet->full_name().c_str(), transaction_id, - version.first, publish_status); - error_tablet_ids->push_back(tablet->tablet_id()); - res = publish_status; - } - } - } - - LOG(INFO) << "finish to publish version on transaction." - << "transaction_id=" << transaction_id - << ", error_tablet_size=" << error_tablet_ids->size(); - return res; -} - -void OLAPEngine::clear_transaction_task(const TTransactionId transaction_id, - const vector partition_ids) { - LOG(INFO) << "begin to clear transaction task. transaction_id=" << transaction_id; - - // each partition - for (const TPartitionId& partition_id : partition_ids) { - - // get tablets in this transaction - pair key(partition_id, transaction_id); - - _transaction_tablet_map_lock.rdlock(); - auto it = _transaction_tablet_map.find(key); - if (it == _transaction_tablet_map.end()) { - OLAP_LOG_WARNING("no tablet to clear transaction. [partition_id=%ld transaction_id=%ld]", - partition_id, transaction_id); - _transaction_tablet_map_lock.unlock(); - continue; - } - std::map> load_info_map = it->second; - _transaction_tablet_map_lock.unlock(); - - // each tablet - for (auto& load_info : load_info_map) { - const TabletInfo& tablet_info = load_info.first; - delete_transaction(partition_id, transaction_id, - tablet_info.tablet_id, tablet_info.schema_hash); - } - } - - LOG(INFO) << "finish to clear transaction task. transaction_id=" << transaction_id; -} - -OLAPStatus OLAPEngine::clone_incremental_data(OLAPTablePtr tablet, OLAPHeader& clone_header, - int64_t committed_version) { - LOG(INFO) << "begin to incremental clone. tablet=" << tablet->full_name() - << ", committed_version=" << committed_version; - - // calculate missing version again - vector missing_versions; - tablet->get_missing_versions_with_header_locked(committed_version, &missing_versions); - - // add least complete version - // prevent lastest version not replaced (if need to rewrite) when restart - const PDelta* least_complete_version = tablet->least_complete_version(missing_versions); - - vector versions_to_delete; - vector versions_to_clone; - - // it's not a merged version in principle - if (least_complete_version != NULL && - least_complete_version->start_version() == least_complete_version->end_version()) { - - Version version(least_complete_version->start_version(), least_complete_version->end_version()); - const PDelta* clone_src_version = clone_header.get_incremental_version(version); - - // if least complete version not found in clone src, return error - if (clone_src_version == nullptr) { - OLAP_LOG_WARNING("failed to find least complete version in clone header. " - "[clone_header_file=%s least_complete_version=%d-%d]", - clone_header.file_name().c_str(), - least_complete_version->start_version(), least_complete_version->end_version()); - return OLAP_ERR_VERSION_NOT_EXIST; - - // if least complete version_hash in clone src is different, clone it - } else if (clone_src_version->version_hash() != least_complete_version->version_hash()) { - versions_to_clone.push_back(clone_src_version); - versions_to_delete.push_back(Version( - least_complete_version->start_version(), - least_complete_version->end_version())); - - VLOG(3) << "least complete version_hash in clone src is different, replace it. " - << "tablet=" << tablet->full_name() - << ", least_complete_version=" << least_complete_version->start_version() - << "-" << least_complete_version->end_version() - << ", local_hash=" << least_complete_version->version_hash() - << ", clone_hash=" << clone_src_version->version_hash(); - } - } - - VLOG(3) << "get missing versions again when incremental clone. " - << "tablet=" << tablet->full_name() - << ", committed_version=" << committed_version - << ", missing_versions_size=" << missing_versions.size(); - - // check missing versions exist in clone src - for (Version version : missing_versions) { - const PDelta* clone_src_version = clone_header.get_incremental_version(version); - if (clone_src_version == NULL) { - LOG(WARNING) << "missing version not found in clone src." - << "clone_header_file=" << clone_header.file_name() - << ", missing_version=" << version.first << "-" << version.second; - return OLAP_ERR_VERSION_NOT_EXIST; - } - - versions_to_clone.push_back(clone_src_version); - } - - // clone_data to tablet - OLAPStatus clone_res = tablet->clone_data(clone_header, versions_to_clone, versions_to_delete); - LOG(INFO) << "finish to incremental clone. [table=" << tablet->full_name() << " res=" << clone_res << "]"; - return clone_res; -} - -OLAPStatus OLAPEngine::clone_full_data(OLAPTablePtr tablet, OLAPHeader& clone_header) { - Version clone_latest_version = clone_header.get_latest_version(); - LOG(INFO) << "begin to full clone. table=" << tablet->full_name() << "," - << "clone_latest_version=" << clone_latest_version.first << "-" << clone_latest_version.second; - vector versions_to_delete; - - // check local versions - for (int i = 0; i < tablet->file_delta_size(); i++) { - Version local_version(tablet->get_delta(i)->start_version(), - tablet->get_delta(i)->end_version()); - VersionHash local_version_hash = tablet->get_delta(i)->version_hash(); - LOG(INFO) << "check local delta when full clone." - << "table=" << tablet->full_name() - << ", local_version=" << local_version.first << "-" << local_version.second; - - // if local version cross src latest, clone failed - if (local_version.first <= clone_latest_version.second - && local_version.second > clone_latest_version.second) { - LOG(WARNING) << "stop to full clone, version cross src latest." - << "table=" << tablet->full_name() - << ", local_version=" << local_version.first << "-" << local_version.second; - return OLAP_ERR_TABLE_VERSION_DUPLICATE_ERROR; - - } else if (local_version.second <= clone_latest_version.second) { - // if local version smaller than src, check if existed in src, will not clone it - bool existed_in_src = false; - - // if delta labeled with local_version is same with the specified version in clone header, - // there is no necessity to clone it. - for (int j = 0; j < clone_header.file_delta_size(); ++j) { - if (clone_header.get_delta(j)->start_version() == local_version.first - && clone_header.get_delta(j)->end_version() == local_version.second - && clone_header.get_delta(j)->version_hash() == local_version_hash) { - existed_in_src = true; - LOG(INFO) << "Delta has already existed in local header, no need to clone." - << "table=" << tablet->full_name() - << ", version='" << local_version.first<< "-" << local_version.second - << ", version_hash=" << local_version_hash; - - OLAPStatus delete_res = clone_header.delete_version(local_version); - if (delete_res != OLAP_SUCCESS) { - LOG(WARNING) << "failed to delete existed version from clone src when full clone. " - << "clone_header_file=" << clone_header.file_name() - << "version=" << local_version.first << "-" << local_version.second; - return delete_res; - } - break; - } - } - - // Delta labeled in local_version is not existed in clone header, - // some overlapping delta will be cloned to replace it. - // And also, the specified delta should deleted from local header. - if (!existed_in_src) { - versions_to_delete.push_back(local_version); - LOG(INFO) << "Delete delta not included by the clone header, should delete it from local header." - << "table=" << tablet->full_name() << "," - << ", version=" << local_version.first<< "-" << local_version.second - << ", version_hash=" << local_version_hash; - } - } - } - vector clone_deltas; - for (int i = 0; i < clone_header.file_delta_size(); ++i) { - clone_deltas.push_back(clone_header.get_delta(i)); - LOG(INFO) << "Delta to clone." - << "table=" << tablet->full_name() - << ", version=" << clone_header.get_delta(i)->start_version() << "-" - << clone_header.get_delta(i)->end_version() - << ", version_hash=" << clone_header.get_delta(i)->version_hash(); - } - - // clone_data to tablet - OLAPStatus clone_res = tablet->clone_data(clone_header, clone_deltas, versions_to_delete); - LOG(INFO) << "finish to full clone. [table=" << tablet->full_name() << ", res=" << clone_res << "]"; - return clone_res; -} - -// Drop table specified, the main logical is as follows: -// 1. table not in schema change: -// drop specified table directly; -// 2. table in schema change: -// a. schema change not finished && dropped table is base : -// base table cannot be dropped; -// b. other cases: -// drop specified table and clear schema change info. -OLAPStatus OLAPEngine::drop_table( - TTabletId tablet_id, SchemaHash schema_hash, bool keep_files) { - LOG(INFO) << "begin to process drop table." - << "table=" << tablet_id << ", schema_hash=" << schema_hash; - DorisMetrics::drop_tablet_requests_total.increment(1); - - OLAPStatus res = OLAP_SUCCESS; - - // Get table which need to be droped - _tablet_map_lock.rdlock(); - OLAPTablePtr dropped_table = _get_table_with_no_lock(tablet_id, schema_hash); - _tablet_map_lock.unlock(); - if (dropped_table.get() == NULL) { - OLAP_LOG_WARNING("fail to drop not existed table. [tablet_id=%ld schema_hash=%d]", - tablet_id, schema_hash); - return OLAP_ERR_TABLE_NOT_FOUND; - } - - // Try to get schema change info - AlterTabletType type; - TTabletId related_tablet_id; - TSchemaHash related_schema_hash; - vector schema_change_versions; - dropped_table->obtain_header_rdlock(); - bool ret = dropped_table->get_schema_change_request( - &related_tablet_id, &related_schema_hash, &schema_change_versions, &type); - dropped_table->release_header_lock(); - - // Drop table directly when not in schema change - if (!ret) { - return _drop_table_directly(tablet_id, schema_hash, keep_files); - } - - // Check table is in schema change or not, is base table or not - bool is_schema_change_finished = true; - if (schema_change_versions.size() != 0) { - is_schema_change_finished = false; - } - - bool is_drop_base_table = false; - _tablet_map_lock.rdlock(); - OLAPTablePtr related_table = _get_table_with_no_lock( - related_tablet_id, related_schema_hash); - _tablet_map_lock.unlock(); - if (related_table.get() == NULL) { - OLAP_LOG_WARNING("drop table directly when related table not found. " - "[tablet_id=%ld schema_hash=%d]", - related_tablet_id, related_schema_hash); - return _drop_table_directly(tablet_id, schema_hash, keep_files); - } - - if (dropped_table->creation_time() < related_table->creation_time()) { - is_drop_base_table = true; - } - - if (is_drop_base_table && !is_schema_change_finished) { - OLAP_LOG_WARNING("base table in schema change cannot be droped. [table=%s]", - dropped_table->full_name().c_str()); - return OLAP_ERR_PREVIOUS_SCHEMA_CHANGE_NOT_FINISHED; - } - - // Drop specified table and clear schema change info - _tablet_map_lock.wrlock(); - related_table->obtain_header_wrlock(); - related_table->clear_schema_change_request(); - res = related_table->save_header(); - if (res != OLAP_SUCCESS) { - LOG(FATAL) << "fail to save table header. res=" << res - << ", tablet=" << related_table->full_name(); - } - - res = _drop_table_directly_unlocked(tablet_id, schema_hash, keep_files); - related_table->release_header_lock(); - _tablet_map_lock.unlock(); - if (res != OLAP_SUCCESS) { - OLAP_LOG_WARNING("fail to drop table which in schema change. [table=%s]", - dropped_table->full_name().c_str()); - return res; - } - - LOG(INFO) << "finish to drop tablet. res=" << res; - return res; -} - -OLAPStatus OLAPEngine::_drop_table_directly( - TTabletId tablet_id, SchemaHash schema_hash, bool keep_files) { - _tablet_map_lock.wrlock(); - OLAPStatus res = _drop_table_directly_unlocked(tablet_id, schema_hash, keep_files); - _tablet_map_lock.unlock(); - return res; -} - -OLAPStatus OLAPEngine::_drop_table_directly_unlocked( - TTabletId tablet_id, SchemaHash schema_hash, bool keep_files) { - OLAPStatus res = OLAP_SUCCESS; - - OLAPTablePtr dropped_table = _get_table_with_no_lock(tablet_id, schema_hash); - if (dropped_table.get() == NULL) { - OLAP_LOG_WARNING("fail to drop not existed table. [tablet_id=%ld schema_hash=%d]", - tablet_id, schema_hash); - return OLAP_ERR_TABLE_NOT_FOUND; - } - - for (list::iterator it = _tablet_map[tablet_id].table_arr.begin(); - it != _tablet_map[tablet_id].table_arr.end();) { - if ((*it)->equal(tablet_id, schema_hash)) { - if (!keep_files) { - (*it)->mark_dropped(); - } - it = _tablet_map[tablet_id].table_arr.erase(it); - } else { - ++it; - } - } - - if (_tablet_map[tablet_id].table_arr.empty()) { - _tablet_map.erase(tablet_id); - } - - res = dropped_table->store()->deregister_table(dropped_table.get()); - if (res != OLAP_SUCCESS) { - OLAP_LOG_WARNING("fail to unregister from root path. [res=%d table=%ld]", - res, tablet_id); - } - - return res; -} - -OLAPStatus OLAPEngine::drop_tables_on_error_root_path( - const vector& tablet_info_vec) { - OLAPStatus res = OLAP_SUCCESS; - - _tablet_map_lock.wrlock(); - - for (const TabletInfo& tablet_info : tablet_info_vec) { - TTabletId tablet_id = tablet_info.tablet_id; - TSchemaHash schema_hash = tablet_info.schema_hash; - VLOG(3) << "drop_table begin. tablet_id=" << tablet_id - << ", schema_hash=" << schema_hash; - OLAPTablePtr dropped_table = _get_table_with_no_lock(tablet_id, schema_hash); - if (dropped_table.get() == NULL) { - OLAP_LOG_WARNING("dropping table not exist. [table=%ld schema_hash=%d]", - tablet_id, schema_hash); - continue; - } else { - for (list::iterator it = _tablet_map[tablet_id].table_arr.begin(); - it != _tablet_map[tablet_id].table_arr.end();) { - if ((*it)->equal(tablet_id, schema_hash)) { - it = _tablet_map[tablet_id].table_arr.erase(it); - } else { - ++it; - } - } - - if (_tablet_map[tablet_id].table_arr.empty()) { - _tablet_map.erase(tablet_id); - } - } - } - - _tablet_map_lock.unlock(); - - return res; -} - -OLAPTablePtr OLAPEngine::create_table( - const TCreateTabletReq& request, const string* ref_root_path, - const bool is_schema_change_table, const OLAPTablePtr ref_olap_table) { - // Get all available stores, use ref_root_path if the caller specified - std::vector stores; - if (ref_root_path == nullptr) { - stores = get_stores_for_create_table(request.storage_medium); - if (stores.empty()) { - LOG(WARNING) << "there is no available disk that can be used to create table."; - return nullptr; - } - } else { - stores.push_back(ref_olap_table->store()); - } - - OLAPTablePtr olap_table; - // Try to create table on each of all_available_root_path, util success - for (auto& store : stores) { - OLAPHeader* header = new OLAPHeader(); - OLAPStatus res = _create_new_table_header(request, store, is_schema_change_table, ref_olap_table, header); - if (res != OLAP_SUCCESS) { - LOG(WARNING) << "fail to create table header. [res=" << res << " root=" << store->path(); - break; - } - - olap_table = OLAPTable::create_from_header(header, store); - if (olap_table == nullptr) { - LOG(WARNING) << "fail to load olap table from header. root_path:%s" << store->path(); - break; - } - - // commit header finally - res = OlapHeaderManager::save(store, request.tablet_id, request.tablet_schema.schema_hash, header); - if (res != OLAP_SUCCESS) { - LOG(WARNING) << "fail to save header. [res=" << res << " root=" << store->path(); - break; - } - break; - } - - return olap_table; -} - -OLAPStatus OLAPEngine::create_init_version(TTabletId tablet_id, SchemaHash schema_hash, - Version version, VersionHash version_hash) { - VLOG(3) << "begin to create init version. " - << "begin=" << version.first << ", end=" << version.second; - OLAPTablePtr table; - ColumnDataWriter* writer = NULL; - SegmentGroup* new_segment_group = NULL; - OLAPStatus res = OLAP_SUCCESS; - std::vector index_vec; - - do { - if (version.first > version.second) { - OLAP_LOG_WARNING("begin should not larger than end. [begin=%d end=%d]", - version.first, version.second); - res = OLAP_ERR_INPUT_PARAMETER_ERROR; - break; - } - - // Get olap table and generate new index - table = get_table(tablet_id, schema_hash); - if (table.get() == NULL) { - OLAP_LOG_WARNING("fail to find table. [table=%ld]", tablet_id); - res = OLAP_ERR_TABLE_NOT_FOUND; - break; - } - - new_segment_group = new(nothrow) SegmentGroup(table.get(), version, version_hash, false, 0, 0); - if (new_segment_group == NULL) { - LOG(WARNING) << "fail to malloc index. [table=" << table->full_name() << "]"; - res = OLAP_ERR_MALLOC_ERROR; - break; - } - - // Create writer, which write nothing to table, to generate empty data file - writer = ColumnDataWriter::create(table, new_segment_group, false); - if (writer == NULL) { - LOG(WARNING) << "fail to create writer. [table=" << table->full_name() << "]"; - res = OLAP_ERR_MALLOC_ERROR; - break; - } - - res = writer->finalize(); - if (res != OLAP_SUCCESS) { - LOG(WARNING) << "fail to finalize writer. [table=" << table->full_name() << "]"; - break; - } - - // Load new index and add to table - res = new_segment_group->load(); - if (res != OLAP_SUCCESS) { - LOG(WARNING) << "fail to load new index. [table=" << table->full_name() << "]"; - break; - } - - WriteLock wrlock(table->get_header_lock_ptr()); - index_vec.push_back(new_segment_group); - res = table->register_data_source(index_vec); - if (res != OLAP_SUCCESS) { - OLAP_LOG_WARNING("fail to register index to data sources. [table=%s]", - table->full_name().c_str()); - break; - } - - res = table->save_header(); - if (res != OLAP_SUCCESS) { - LOG(WARNING) << "fail to save header. [table=" << table->full_name() << "]"; - break; - } - } while (0); - - // Unregister index and delete files(index and data) if failed - if (res != OLAP_SUCCESS && table.get() != NULL) { - std::vector unused_index; - table->obtain_header_wrlock(); - table->unregister_data_source(version, &unused_index); - table->release_header_lock(); - - for (SegmentGroup* index : index_vec) { - index->delete_all_files(); - SAFE_DELETE(index); - } - } - - VLOG(3) << "create init version end. res=" << res; - SAFE_DELETE(writer); - return res; -} - -bool OLAPEngine::try_schema_change_lock(TTabletId tablet_id) { - bool res = false; - VLOG(3) << "try_schema_change_lock begin. table_id=" << tablet_id; - _tablet_map_lock.rdlock(); - - tablet_map_t::iterator it = _tablet_map.find(tablet_id); - if (it == _tablet_map.end()) { - OLAP_LOG_WARNING("tablet does not exists. [table=%ld]", tablet_id); - } else { - res = (it->second.schema_change_lock.trylock() == OLAP_SUCCESS); - } - - _tablet_map_lock.unlock(); - VLOG(3) << "try_schema_change_lock end. table_id=" << tablet_id; - return res; -} - -void OLAPEngine::release_schema_change_lock(TTabletId tablet_id) { - VLOG(3) << "release_schema_change_lock begin. tablet_id=" << tablet_id; - _tablet_map_lock.rdlock(); - - tablet_map_t::iterator it = _tablet_map.find(tablet_id); - if (it == _tablet_map.end()) { - OLAP_LOG_WARNING("tablet does not exists. [table=%ld]", tablet_id); - } else { - it->second.schema_change_lock.unlock(); - } - - _tablet_map_lock.unlock(); - VLOG(3) << "release_schema_change_lock end. tablet_id=" << tablet_id; -} - -void OLAPEngine::_build_tablet_info(OLAPTablePtr olap_table, TTabletInfo* tablet_info) { - tablet_info->tablet_id = olap_table->tablet_id(); - tablet_info->schema_hash = olap_table->schema_hash(); - - olap_table->obtain_header_rdlock(); - tablet_info->row_count = olap_table->get_num_rows(); - tablet_info->data_size = olap_table->get_data_size(); - const PDelta* last_file_version = olap_table->lastest_version(); - if (last_file_version == NULL) { - tablet_info->version = -1; - tablet_info->version_hash = 0; - } else { - // report the version before first missing version - vector missing_versions; - olap_table->get_missing_versions_with_header_locked( - last_file_version->end_version(), &missing_versions); - - if (!missing_versions.empty()) { - tablet_info->__set_version_miss(true); - } - - const PDelta* least_complete_version = - olap_table->least_complete_version(missing_versions); - if (least_complete_version == NULL) { - tablet_info->version = -1; - tablet_info->version_hash = 0; - } else { - tablet_info->version = least_complete_version->end_version(); - tablet_info->version_hash = least_complete_version->version_hash(); - } - } - olap_table->release_header_lock(); -} - -OLAPStatus OLAPEngine::report_tablet_info(TTabletInfo* tablet_info) { - DorisMetrics::report_tablet_requests_total.increment(1); - LOG(INFO) << "begin to process report tablet info." - << "tablet_id=" << tablet_info->tablet_id - << ", schema_hash=" << tablet_info->schema_hash; - - OLAPStatus res = OLAP_SUCCESS; - - OLAPTablePtr olap_table = get_table( - tablet_info->tablet_id, tablet_info->schema_hash); - if (olap_table.get() == NULL) { - OLAP_LOG_WARNING("can't find table. [table=%ld schema_hash=%d]", - tablet_info->tablet_id, tablet_info->schema_hash); - return OLAP_ERR_TABLE_NOT_FOUND; - } - - _build_tablet_info(olap_table, tablet_info); - LOG(INFO) << "success to process report tablet info."; - return res; -} - -OLAPStatus OLAPEngine::report_all_tablets_info(std::map* tablets_info) { - LOG(INFO) << "begin to process report all tablets info."; - DorisMetrics::report_all_tablets_requests_total.increment(1); - - if (tablets_info == NULL) { - return OLAP_ERR_INPUT_PARAMETER_ERROR; - } - - _tablet_map_lock.rdlock(); - for (const auto& item : _tablet_map) { - if (item.second.table_arr.size() == 0) { - continue; - } - - TTablet tablet; - for (OLAPTablePtr olap_table : item.second.table_arr) { - if (olap_table.get() == NULL) { - continue; - } - - TTabletInfo tablet_info; - _build_tablet_info(olap_table, &tablet_info); - - // report expire transaction - vector transaction_ids; - olap_table->get_expire_pending_data(&transaction_ids); - tablet_info.__set_transaction_ids(transaction_ids); - - if (_available_storage_medium_type_count > 1) { - tablet_info.__set_storage_medium(olap_table->store()->storage_medium()); - } - - tablet_info.__set_version_count(olap_table->file_delta_size()); - tablet_info.__set_path_hash(olap_table->store()->path_hash()); - tablet_info.__set_used(olap_table->is_used()); - - tablet.tablet_infos.push_back(tablet_info); - } - - if (tablet.tablet_infos.size() != 0) { - tablets_info->insert(pair(tablet.tablet_infos[0].tablet_id, tablet)); - } - } - _tablet_map_lock.unlock(); - - LOG(INFO) << "success to process report all tablets info. tablet_num=" << tablets_info->size(); - return OLAP_SUCCESS; -} - -void OLAPEngine::get_tablet_stat(TTabletStatResult& result) { - VLOG(3) << "begin to get all tablet stat."; - - // get current time - int64_t current_time = UnixMillis(); - - { - std::lock_guard l(_tablet_stat_mutex); - // update cache if too old - if (current_time - _tablet_stat_cache_update_time_ms > - config::tablet_stat_cache_update_interval_second * 1000) { - VLOG(3) << "update tablet stat."; - _build_tablet_stat(); - } - } - - result.__set_tablets_stats(_tablet_stat_cache); -} - -void OLAPEngine::_build_tablet_stat() { - _tablet_stat_cache.clear(); - - _tablet_map_lock.rdlock(); - for (const auto& item : _tablet_map) { - if (item.second.table_arr.size() == 0) { - continue; - } - - TTabletStat stat; - stat.tablet_id = item.first; - for (OLAPTablePtr olap_table : item.second.table_arr) { - if (olap_table.get() == NULL) { - continue; - } - - // we only get base tablet's stat - stat.__set_data_size(olap_table->get_data_size()); - stat.__set_row_num(olap_table->get_num_rows()); - VLOG(3) << "tablet_id=" << item.first - << ", data_size=" << olap_table->get_data_size() - << ", row_num:" << olap_table->get_num_rows(); - break; - } - - _tablet_stat_cache.emplace(item.first, stat); - } - _tablet_map_lock.unlock(); - - _tablet_stat_cache_update_time_ms = UnixMillis(); -} - -bool OLAPEngine::_can_do_compaction(OLAPTablePtr table) { - // 如果table正在做schema change,则通过选路判断数据是否转换完成 - // 如果选路成功,则转换完成,可以进行BE - // 如果选路失败,则转换未完成,不能进行BE - ReadLock rdlock(table->get_header_lock_ptr()); - const PDelta* lastest_version = table->lastest_version(); - if (lastest_version == NULL) { - return false; - } - - Version test_version = Version(0, lastest_version->end_version()); - std::vector path_versions; - if (OLAP_SUCCESS != table->select_versions_to_span(test_version, &path_versions)) { - LOG(WARNING) << "tablet has missed version. tablet=" << table->full_name(); - return false; - } - - if (table->is_schema_changing()) { - Version test_version = Version(0, lastest_version->end_version()); - vector path_versions; - if (OLAP_SUCCESS != table->select_versions_to_span(test_version, &path_versions)) { - return false; - } - } - - return true; -} - -void OLAPEngine::start_clean_fd_cache() { - VLOG(10) << "start clean file descritpor cache"; - FileHandler::get_fd_cache()->prune(); - VLOG(10) << "end clean file descritpor cache"; -} - -void OLAPEngine::perform_cumulative_compaction(OlapStore* store) { - OLAPTablePtr best_table = _find_best_tablet_to_compaction(CompactionType::CUMULATIVE_COMPACTION, store); - if (best_table == nullptr) { return; } - - DorisMetrics::cumulative_compaction_request_total.increment(1); - CumulativeCompaction cumulative_compaction; - OLAPStatus res = cumulative_compaction.init(best_table); - if (res != OLAP_SUCCESS) { - if (res != OLAP_ERR_CUMULATIVE_REPEAT_INIT && res != OLAP_ERR_CE_TRY_CE_LOCK_ERROR) { - best_table->set_last_compaction_failure_time(UnixMillis()); - LOG(WARNING) << "failed to init cumulative compaction" - << ", table=" << best_table->full_name() - << ", res=" << res; - - if (res != OLAP_ERR_CUMULATIVE_NO_SUITABLE_VERSIONS) { - DorisMetrics::cumulative_compaction_request_failed.increment(1); - } - } - return; - } - - res = cumulative_compaction.run(); - if (res != OLAP_SUCCESS) { - DorisMetrics::cumulative_compaction_request_failed.increment(1); - best_table->set_last_compaction_failure_time(UnixMillis()); - LOG(WARNING) << "failed to do cumulative compaction" - << ", table=" << best_table->full_name() - << ", res=" << res; - return; - } - best_table->set_last_compaction_failure_time(0); -} - -void OLAPEngine::perform_base_compaction(OlapStore* store) { - OLAPTablePtr best_table = _find_best_tablet_to_compaction(CompactionType::BASE_COMPACTION, store); - if (best_table == nullptr) { return; } - - DorisMetrics::base_compaction_request_total.increment(1); - BaseCompaction base_compaction; - OLAPStatus res = base_compaction.init(best_table); - if (res != OLAP_SUCCESS) { - if (res != OLAP_ERR_BE_TRY_BE_LOCK_ERROR && res != OLAP_ERR_BE_NO_SUITABLE_VERSION) { - DorisMetrics::base_compaction_request_failed.increment(1); - best_table->set_last_compaction_failure_time(UnixMillis()); - LOG(WARNING) << "failed to init base compaction" - << ", table=" << best_table->full_name() - << ", res=" << res; - } - return; - } - - res = base_compaction.run(); - if (res != OLAP_SUCCESS) { - DorisMetrics::base_compaction_request_failed.increment(1); - best_table->set_last_compaction_failure_time(UnixMillis()); - LOG(WARNING) << "failed to init base compaction" - << ", table=" << best_table->full_name() - << ", res=" << res; - return; - } - best_table->set_last_compaction_failure_time(0); -} - -OLAPTablePtr OLAPEngine::_find_best_tablet_to_compaction(CompactionType compaction_type, OlapStore* store) { - ReadLock tablet_map_rdlock(&_tablet_map_lock); - uint32_t highest_score = 0; - OLAPTablePtr best_table; - int64_t now = UnixMillis(); - for (tablet_map_t::value_type& table_ins : _tablet_map){ - for (OLAPTablePtr& table_ptr : table_ins.second.table_arr) { - if (table_ptr->store()->path_hash() != store->path_hash() - || !table_ptr->is_used() || !table_ptr->is_loaded() || !_can_do_compaction(table_ptr)) { - continue; - } - - if (now - table_ptr->last_compaction_failure_time() <= config::min_compaction_failure_interval_sec * 1000) { - LOG(INFO) << "tablet last compaction failure time is: " << table_ptr->last_compaction_failure_time() - << ", tablet: " << table_ptr->tablet_id() << ", skip it."; - continue; - } - - if (compaction_type == CompactionType::CUMULATIVE_COMPACTION) { - if (!table_ptr->try_cumulative_lock()) { - continue; - } else { - table_ptr->release_cumulative_lock(); - } - } - - if (compaction_type == CompactionType::BASE_COMPACTION) { - if (!table_ptr->try_base_compaction_lock()) { - continue; - } else { - table_ptr->release_base_compaction_lock(); - } - } - - ReadLock rdlock(table_ptr->get_header_lock_ptr()); - uint32_t table_score = 0; - if (compaction_type == CompactionType::BASE_COMPACTION) { - table_score = table_ptr->get_base_compaction_score(); - } else if (compaction_type == CompactionType::CUMULATIVE_COMPACTION) { - table_score = table_ptr->get_cumulative_compaction_score(); - } - if (table_score > highest_score) { - highest_score = table_score; - best_table = table_ptr; - } - } - } - - if (best_table != nullptr) { - LOG(INFO) << "find best tablet to do compaction. type: " << (compaction_type == CompactionType::CUMULATIVE_COMPACTION ? "cumulative" : "base") - << ", tablet id: " << best_table->tablet_id() << ", score: " << highest_score; - } - return best_table; -} - -void OLAPEngine::get_cache_status(rapidjson::Document* document) const { - return _index_stream_lru_cache->get_cache_status(document); -} - -OLAPStatus OLAPEngine::start_trash_sweep(double* usage) { - OLAPStatus res = OLAP_SUCCESS; - LOG(INFO) << "start trash and snapshot sweep."; - - const uint32_t snapshot_expire = config::snapshot_expire_time_sec; - const uint32_t trash_expire = config::trash_file_expire_time_sec; - const double guard_space = config::disk_capacity_insufficient_percentage / 100.0; - std::vector root_paths_info; - res = get_all_root_path_info(&root_paths_info); - if (res != OLAP_SUCCESS) { - OLAP_LOG_WARNING("failed to get root path stat info when sweep trash."); - return res; - } - - time_t now = time(NULL); //获取UTC时间 - tm local_tm_now; - if (localtime_r(&now, &local_tm_now) == NULL) { - OLAP_LOG_WARNING("fail to localtime_r time. [time=%lu]", now); - return OLAP_ERR_OS_ERROR; - } - const time_t local_now = mktime(&local_tm_now); //得到当地日历时间 - - for (RootPathInfo& info : root_paths_info) { - if (!info.is_used) { - continue; - } - - double curr_usage = (info.capacity - info.available) - / (double) info.capacity; - *usage = *usage > curr_usage ? *usage : curr_usage; - - OLAPStatus curr_res = OLAP_SUCCESS; - string snapshot_path = info.path + SNAPSHOT_PREFIX; - curr_res = _do_sweep(snapshot_path, local_now, snapshot_expire); - if (curr_res != OLAP_SUCCESS) { - OLAP_LOG_WARNING("failed to sweep snapshot. [path=%s, err_code=%d]", - snapshot_path.c_str(), curr_res); - res = curr_res; - } - - string trash_path = info.path + TRASH_PREFIX; - curr_res = _do_sweep(trash_path, local_now, - curr_usage > guard_space ? 0 : trash_expire); - if (curr_res != OLAP_SUCCESS) { - OLAP_LOG_WARNING("failed to sweep trash. [path=%s, err_code=%d]", - trash_path.c_str(), curr_res); - res = curr_res; - } - } - - // clear expire incremental segment_group - std::vector tablets; - _tablet_map_lock.rdlock(); - for (const auto& item : _tablet_map) { - for (OLAPTablePtr olap_table : item.second.table_arr) { - if (olap_table == nullptr) { - continue; - } - if (olap_table->has_expired_incremental_data()) { - tablets.push_back(olap_table); - } - } - } - _tablet_map_lock.unlock(); - - for (auto& tablet : tablets) { - tablet->delete_expired_incremental_data(); - } - - return res; -} - -OLAPStatus OLAPEngine::_do_sweep( - const string& scan_root, const time_t& local_now, const uint32_t expire) { - OLAPStatus res = OLAP_SUCCESS; - if (!check_dir_existed(scan_root)) { - // dir not existed. no need to sweep trash. - return res; - } - - try { - path boost_scan_root(scan_root); - directory_iterator item(boost_scan_root); - directory_iterator item_end; - for (; item != item_end; ++item) { - string path_name = item->path().string(); - string dir_name = item->path().filename().string(); - string str_time = dir_name.substr(0, dir_name.find('.')); - tm local_tm_create; - if (strptime(str_time.c_str(), "%Y%m%d%H%M%S", &local_tm_create) == nullptr) { - LOG(WARNING) << "fail to strptime time. [time=" << str_time << "]"; - res = OLAP_ERR_OS_ERROR; - continue; - } - if (difftime(local_now, mktime(&local_tm_create)) >= expire) { - if (remove_all_dir(path_name) != OLAP_SUCCESS) { - OLAP_LOG_WARNING("fail to remove file or directory. [path=%s]", - path_name.c_str()); - res = OLAP_ERR_OS_ERROR; - continue; - } - } - } - } catch (...) { - OLAP_LOG_WARNING("Exception occur when scan directory. [path=%s]", - scan_root.c_str()); - res = OLAP_ERR_IO_ERROR; - } - - return res; -} - -OLAPStatus OLAPEngine::_create_new_table_header( - const TCreateTabletReq& request, - OlapStore* store, - const bool is_schema_change_table, - const OLAPTablePtr ref_olap_table, - OLAPHeader* header) { - uint64_t shard = 0; - OLAPStatus res = store->get_shard(&shard); - if (res != OLAP_SUCCESS) { - LOG(WARNING) << "fail to get root path shard. res=" << res; - return res; - } - stringstream schema_hash_dir_stream; - schema_hash_dir_stream << store->path() - << DATA_PREFIX - << "/" << shard - << "/" << request.tablet_id - << "/" << request.tablet_schema.schema_hash; - string schema_hash_dir = schema_hash_dir_stream.str(); - if (check_dir_existed(schema_hash_dir)) { - LOG(WARNING) << "failed to create the dir that existed. path=" << schema_hash_dir; - return OLAP_ERR_CANNOT_CREATE_DIR; - } - res = create_dirs(schema_hash_dir); - if (res != OLAP_SUCCESS) { - LOG(WARNING) << "create dir fail. [res=" << res << " path:" << schema_hash_dir; - return res; - } - - // set basic information - header->set_num_short_key_fields(request.tablet_schema.short_key_column_count); - header->set_compress_kind(COMPRESS_LZ4); - if (request.tablet_schema.keys_type == TKeysType::DUP_KEYS) { - header->set_keys_type(KeysType::DUP_KEYS); - } else if (request.tablet_schema.keys_type == TKeysType::UNIQUE_KEYS) { - header->set_keys_type(KeysType::UNIQUE_KEYS); - } else { - header->set_keys_type(KeysType::AGG_KEYS); - } - DCHECK(request.tablet_schema.storage_type == TStorageType::COLUMN); - header->set_data_file_type(COLUMN_ORIENTED_FILE); - header->set_segment_size(OLAP_MAX_COLUMN_SEGMENT_FILE_SIZE); - header->set_num_rows_per_data_block(config::default_num_rows_per_column_file_block); - - // set column information - uint32_t i = 0; - uint32_t key_count = 0; - bool has_bf_columns = false; - uint32_t next_unique_id = 0; - if (is_schema_change_table) { - next_unique_id = ref_olap_table->next_unique_id(); - } - if (is_schema_change_table && next_unique_id == 0) { - LOG(FATAL) << "old_tablet=" << ref_olap_table->full_name() - << ", new_tablet=" << request.tablet_id - << ", new_schema_hash=" << request.tablet_schema.schema_hash - << ", next_unique_id=" << next_unique_id; - return OLAP_ERR_INPUT_PARAMETER_ERROR; - } - for (TColumn column : request.tablet_schema.columns) { - if (column.column_type.type == TPrimitiveType::VARCHAR - && i < request.tablet_schema.short_key_column_count - 1) { - LOG(WARNING) << "varchar type column should be the last short key."; - return OLAP_ERR_SCHEMA_SCHEMA_INVALID; - } - header->add_column(); - if (is_schema_change_table) { - /* - * for schema change, compare old_olap_table and new_olap_table - * 1. if column in both new_olap_table and old_olap_table, - * assign unique_id of old_olap_table to the column of new_olap_table - * 2. if column exists only in new_olap_table, assign next_unique_id of old_olap_table - * to the new column - * - */ - size_t field_num = ref_olap_table->tablet_schema().size(); - size_t field_off = 0; - for (field_off = 0; field_off < field_num; ++field_off) { - if (ref_olap_table->tablet_schema()[field_off].name == column.column_name) { - uint32_t unique_id = ref_olap_table->tablet_schema()[field_off].unique_id; - header->mutable_column(i)->set_unique_id(unique_id); - break; - } - } - if (field_off == field_num) { - header->mutable_column(i)->set_unique_id(next_unique_id++); - } - } else { - header->mutable_column(i)->set_unique_id(i); - } - header->mutable_column(i)->set_name(column.column_name); - header->mutable_column(i)->set_is_root_column(true); - string data_type; - EnumToString(TPrimitiveType, column.column_type.type, data_type); - header->mutable_column(i)->set_type(data_type); - if (column.column_type.type == TPrimitiveType::DECIMAL || column.column_type.type == TPrimitiveType::DECIMALV2) { - if (column.column_type.__isset.precision && column.column_type.__isset.scale) { - header->mutable_column(i)->set_precision(column.column_type.precision); - header->mutable_column(i)->set_frac(column.column_type.scale); - } else { - LOG(WARNING) << "decimal type column should set precision and frac."; - return OLAP_ERR_SCHEMA_SCHEMA_INVALID; - } - } - if (column.column_type.type == TPrimitiveType::CHAR - || column.column_type.type == TPrimitiveType::VARCHAR || column.column_type.type == TPrimitiveType::HLL) { - if (!column.column_type.__isset.len) { - LOG(WARNING) << "CHAR or VARCHAR should specify length. type=" << column.column_type.type; - return OLAP_ERR_INPUT_PARAMETER_ERROR; - } - } - uint32_t length = FieldInfo::get_field_length_by_type( - column.column_type.type, column.column_type.len); - header->mutable_column(i)->set_length(length); - header->mutable_column(i)->set_index_length(length); - if (column.column_type.type == TPrimitiveType::VARCHAR || column.column_type.type == TPrimitiveType::HLL) { - if (!column.column_type.__isset.index_len) { - header->mutable_column(i)->set_index_length(10); - } else { - header->mutable_column(i)->set_index_length(column.column_type.index_len); - } - } - if (!column.is_key) { - header->mutable_column(i)->set_is_key(false); - string aggregation_type; - EnumToString(TAggregationType, column.aggregation_type, aggregation_type); - header->mutable_column(i)->set_aggregation(aggregation_type); - } else { - ++key_count; - header->add_selectivity(1); - header->mutable_column(i)->set_is_key(true); - header->mutable_column(i)->set_aggregation("NONE"); - } - if (column.__isset.default_value) { - header->mutable_column(i)->set_default_value(column.default_value); - } - if (column.__isset.is_allow_null) { - header->mutable_column(i)->set_is_allow_null(column.is_allow_null); - } else { - header->mutable_column(i)->set_is_allow_null(false); - } - if (column.__isset.is_bloom_filter_column) { - header->mutable_column(i)->set_is_bf_column(column.is_bloom_filter_column); - has_bf_columns = true; - } - ++i; - } - if (is_schema_change_table){ - /* - * for schema change, next_unique_id of new olap table should be greater than - * next_unique_id of old olap table - * */ - header->set_next_column_unique_id(next_unique_id); - } else { - header->set_next_column_unique_id(i); - } - if (has_bf_columns && request.tablet_schema.__isset.bloom_filter_fpp) { - header->set_bf_fpp(request.tablet_schema.bloom_filter_fpp); - } - if (key_count < request.tablet_schema.short_key_column_count) { - LOG(WARNING) << "short key num should not large than key num. " - << "key_num=" << key_count << " short_key_num=" << request.tablet_schema.short_key_column_count; - return OLAP_ERR_INPUT_PARAMETER_ERROR; - } - - header->set_creation_time(time(NULL)); - header->set_cumulative_layer_point(-1); - header->set_tablet_id(request.tablet_id); - header->set_schema_hash(request.tablet_schema.schema_hash); - header->set_shard(shard); - return OLAP_SUCCESS; -} - -OLAPStatus OLAPEngine::_check_existed_or_else_create_dir(const string& path) { - if (check_dir_existed(path)) { - LOG(WARNING) << "failed to create the dir that existed. [path='" << path << "']"; - return OLAP_ERR_CANNOT_CREATE_DIR; - } - - return create_dirs(path); -} - -void OLAPEngine::_cancel_unfinished_schema_change() { - // Schema Change在引擎退出时schemachange信息还保存在在Header里, - // 引擎重启后,需清除schemachange信息,上层会重做 - uint64_t canceled_num = 0; - LOG(INFO) << "begin to cancel unfinished schema change."; - - SchemaChangeHandler schema_change_handler; - TTabletId tablet_id; - TSchemaHash schema_hash; - vector schema_change_versions; - AlterTabletType type; - - for (const auto& tablet_instance : _tablet_map) { - for (OLAPTablePtr olap_table : tablet_instance.second.table_arr) { - if (olap_table.get() == NULL) { - OLAP_LOG_WARNING("get empty OLAPTablePtr. [tablet_id=%ld]", tablet_instance.first); - continue; - } - - bool ret = olap_table->get_schema_change_request( - &tablet_id, &schema_hash, &schema_change_versions, &type); - if (!ret) { - continue; - } - - OLAPTablePtr new_olap_table = get_table(tablet_id, schema_hash, false); - if (new_olap_table.get() == NULL) { - OLAP_LOG_WARNING("the table referenced by schema change cannot be found. " - "schema change cancelled. [tablet='%s']", - olap_table->full_name().c_str()); - continue; - } - - // DORIS-3741. Upon restart, it should not clear schema change request. - new_olap_table->set_schema_change_status( - ALTER_TABLE_FAILED, new_olap_table->schema_hash(), -1); - olap_table->set_schema_change_status( - ALTER_TABLE_FAILED, olap_table->schema_hash(), -1); - VLOG(3) << "cancel unfinished schema change. tablet=" << olap_table->full_name(); - ++canceled_num; - } - } - - LOG(INFO) << "finish to cancel unfinished schema change! canceled_num=" << canceled_num; -} - -void OLAPEngine::start_delete_unused_index() { - _gc_mutex.lock(); - - for (auto it = _gc_files.begin(); it != _gc_files.end();) { - if (it->first->is_in_use()) { - ++it; - } else { - delete it->first; - vector files = it->second; - remove_files(files); - it = _gc_files.erase(it); - } - } - - _gc_mutex.unlock(); -} - -void OLAPEngine::add_unused_index(SegmentGroup* segment_group) { - _gc_mutex.lock(); - - auto it = _gc_files.find(segment_group); - if (it == _gc_files.end()) { - vector files; - int32_t segment_group_id = segment_group->segment_group_id(); - for (size_t seg_id = 0; seg_id < segment_group->num_segments(); ++seg_id) { - string index_file = segment_group->construct_index_file_path(segment_group_id, seg_id); - files.push_back(index_file); - - string data_file = segment_group->construct_data_file_path(segment_group_id, seg_id); - files.push_back(data_file); - } - _gc_files[segment_group] = files; - } - - _gc_mutex.unlock(); -} - -void OLAPEngine::revoke_files_from_gc(const std::vector& files_to_check) { - LOG(INFO) << "start to revoke files from gc. files to check size:" << files_to_check.size(); - _gc_mutex.lock(); - int64_t duration_ns = 0; - { - SCOPED_RAW_TIMER(&duration_ns); - for (auto& file : files_to_check) { - bool found = false; - for (auto& rowset_gc_files : _gc_files) { - auto file_iter = - std::find(rowset_gc_files.second.begin(), rowset_gc_files.second.end(), file); - if (file_iter != rowset_gc_files.second.end()) { - LOG(INFO) << "file:" << file << " exist in unused files to gc. revoke it"; - rowset_gc_files.second.erase(file_iter); - found = true; - break; - } - } - if (!found) { - LOG(INFO) << "file:" << file << " does not exist in unused files"; - } - } - } - _gc_mutex.unlock(); - LOG(INFO) << "revoke files from gc. time duration:" << duration_ns << " ns"; -} - - -OLAPStatus OLAPEngine::_create_init_version( - OLAPTablePtr olap_table, const TCreateTabletReq& request) { - OLAPStatus res = OLAP_SUCCESS; - - if (request.version < 1) { - OLAP_LOG_WARNING("init version of tablet should at least 1."); - return OLAP_ERR_CE_CMD_PARAMS_ERROR; - } else { - Version init_base_version(0, request.version); - res = create_init_version( - request.tablet_id, request.tablet_schema.schema_hash, - init_base_version, request.version_hash); - if (res != OLAP_SUCCESS) { - OLAP_LOG_WARNING("fail to create init base version. [res=%d version=%ld]", - res, request.version); - return res; - } - - Version init_delta_version(request.version + 1, request.version + 1); - res = create_init_version( - request.tablet_id, request.tablet_schema.schema_hash, - init_delta_version, 0); - if (res != OLAP_SUCCESS) { - OLAP_LOG_WARNING("fail to create init delta version. [res=%d version=%ld]", - res, request.version + 1); - return res; - } - } - - olap_table->obtain_header_wrlock(); - olap_table->set_cumulative_layer_point(request.version + 1); - res = olap_table->save_header(); - olap_table->release_header_lock(); - if (res != OLAP_SUCCESS) { - LOG(WARNING) << "fail to save header. [table=" << olap_table->full_name() << "]"; - } - - return res; -} - -// TODO(zc): refactor this funciton -OLAPStatus OLAPEngine::create_table(const TCreateTabletReq& request) { - OLAPStatus res = OLAP_SUCCESS; - bool is_table_added = false; - - LOG(INFO) << "begin to process create table. tablet=" << request.tablet_id - << ", schema_hash=" << request.tablet_schema.schema_hash; - - DorisMetrics::create_tablet_requests_total.increment(1); - - // 1. Make sure create_table operation is idempotent: - // return success if table with same tablet_id and schema_hash exist, - // false if table with same tablet_id but different schema_hash exist - if (check_tablet_id_exist(request.tablet_id)) { - OLAPTablePtr table = get_table( - request.tablet_id, request.tablet_schema.schema_hash); - if (table.get() != NULL) { - LOG(INFO) << "create table success for table already exist."; - return OLAP_SUCCESS; - } else { - OLAP_LOG_WARNING("table with different schema hash already exists."); - return OLAP_ERR_CE_TABLET_ID_EXIST; - } - } - - // 2. Lock to ensure that all create_table operation execute in serial - static Mutex create_table_lock; - MutexLock auto_lock(&create_table_lock); - - OLAPTablePtr olap_table; - do { - // 3. Create table with only header, no deltas - olap_table = create_table(request, NULL, false, NULL); - if (olap_table == NULL) { - res = OLAP_ERR_CE_CMD_PARAMS_ERROR; - OLAP_LOG_WARNING("fail to create olap table. [res=%d]", res); - break; - } - - // 4. Add table to OlapEngine will make it visiable to user - res = add_table( - request.tablet_id, request.tablet_schema.schema_hash, olap_table); - if (res != OLAP_SUCCESS) { - OLAP_LOG_WARNING("fail to add table to OLAPEngine. [res=%d]", res); - break; - } - is_table_added = true; - - OLAPTablePtr olap_table_ptr = get_table( - request.tablet_id, request.tablet_schema.schema_hash); - if (olap_table_ptr.get() == NULL) { - res = OLAP_ERR_TABLE_NOT_FOUND; - OLAP_LOG_WARNING("fail to get table. [res=%d]", res); - break; - } - - // 5. Register table into OLAPEngine, so that we can manage table from - // the perspective of root path. - // Example: unregister all tables when a bad disk found. - res = register_table_into_root_path(olap_table_ptr.get()); - if (res != OLAP_SUCCESS) { - OLAP_LOG_WARNING("fail to register table into OLAPEngine. [res=%d, root_path=%s]", - res, olap_table_ptr->storage_root_path_name().c_str()); - break; - } - - // 6. Create init version if this is not a restore mode replica and request.version is set - // bool in_restore_mode = request.__isset.in_restore_mode && request.in_restore_mode; - // if (!in_restore_mode && request.__isset.version) { - res = _create_init_version(olap_table_ptr, request); - if (res != OLAP_SUCCESS) { - OLAP_LOG_WARNING("fail to create initial version for table. [res=%d]", res); - } - // } - } while (0); - - // 7. clear environment - if (res != OLAP_SUCCESS) { - DorisMetrics::create_tablet_requests_failed.increment(1); - if (is_table_added) { - OLAPStatus status = drop_table( - request.tablet_id, request.tablet_schema.schema_hash); - if (status != OLAP_SUCCESS) { - OLAP_LOG_WARNING("fail to drop table when create table failed. [res=%d]", res); - } - } else if (NULL != olap_table) { - olap_table->delete_all_files(); - } - } - - LOG(INFO) << "finish to process create table. res=" << res; - return res; -} - -OLAPStatus OLAPEngine::schema_change(const TAlterTabletReq& request) { - LOG(INFO) << "begin to schema change. old_tablet_id=" << request.base_tablet_id - << ", new_tablet_id=" << request.new_tablet_req.tablet_id; - - DorisMetrics::schema_change_requests_total.increment(1); - - OLAPStatus res = OLAP_SUCCESS; - - SchemaChangeHandler handler; - res = handler.process_alter_table(ALTER_TABLET_SCHEMA_CHANGE, request); - - if (res != OLAP_SUCCESS) { - OLAP_LOG_WARNING("failed to do schema change. " - "[base_table=%ld new_table=%ld] [res=%d]", - request.base_tablet_id, request.new_tablet_req.tablet_id, res); - DorisMetrics::schema_change_requests_failed.increment(1); - return res; - } - - LOG(INFO) << "success to submit schema change." - << "old_tablet_id=" << request.base_tablet_id - << ", new_tablet_id=" << request.new_tablet_req.tablet_id; - return res; -} - -OLAPStatus OLAPEngine::create_rollup_table(const TAlterTabletReq& request) { - LOG(INFO) << "begin to create rollup table. " - << "old_tablet_id=" << request.base_tablet_id - << ", new_tablet_id=" << request.new_tablet_req.tablet_id; - - DorisMetrics::create_rollup_requests_total.increment(1); - - OLAPStatus res = OLAP_SUCCESS; - - SchemaChangeHandler handler; - res = handler.process_alter_table(ALTER_TABLET_CREATE_ROLLUP_TABLE, request); - - if (res != OLAP_SUCCESS) { - OLAP_LOG_WARNING("failed to do rollup. " - "[base_table=%ld new_table=%ld] [res=%d]", - request.base_tablet_id, request.new_tablet_req.tablet_id, res); - DorisMetrics::create_rollup_requests_failed.increment(1); - return res; - } - - LOG(INFO) << "success to create rollup table. res=" << res - << ", old_tablet_id=" << request.base_tablet_id - << ", new_tablet_id=" << request.new_tablet_req.tablet_id; - return res; -} - -AlterTableStatus OLAPEngine::show_alter_table_status( - TTabletId tablet_id, - TSchemaHash schema_hash) { - LOG(INFO) << "begin to process show alter table status." - << "tablet_id=" << tablet_id - << ", schema_hash=" << schema_hash; - - AlterTableStatus status = ALTER_TABLE_FINISHED; - - OLAPTablePtr table = OLAPEngine::get_instance()->get_table(tablet_id, schema_hash); - if (table.get() == NULL) { - OLAP_LOG_WARNING("fail to get table. [table=%ld schema_hash=%d]", - tablet_id, schema_hash); - status = ALTER_TABLE_FAILED; - } else { - status = table->schema_change_status().status; - } - - return status; -} - -OLAPStatus OLAPEngine::compute_checksum( - TTabletId tablet_id, - TSchemaHash schema_hash, - TVersion version, - TVersionHash version_hash, - uint32_t* checksum) { - LOG(INFO) << "begin to process compute checksum." - << "tablet_id=" << tablet_id - << ", schema_hash=" << schema_hash - << ", version=" << version; - OLAPStatus res = OLAP_SUCCESS; - - if (checksum == NULL) { - OLAP_LOG_WARNING("invalid output parameter which is null pointer."); - return OLAP_ERR_CE_CMD_PARAMS_ERROR; - } - - OLAPTablePtr tablet = get_table(tablet_id, schema_hash); - if (NULL == tablet.get()) { - OLAP_LOG_WARNING("can't find tablet. [tablet_id=%ld schema_hash=%d]", - tablet_id, schema_hash); - return OLAP_ERR_TABLE_NOT_FOUND; - } - - { - ReadLock rdlock(tablet->get_header_lock_ptr()); - const PDelta* message = tablet->lastest_version(); - if (message == NULL) { - LOG(FATAL) << "fail to get latest version. tablet_id=" << tablet_id; - return OLAP_ERR_VERSION_NOT_EXIST; - } - - if (message->end_version() == version - && message->version_hash() != version_hash) { - OLAP_LOG_WARNING("fail to check latest version hash. " - "[res=%d tablet_id=%ld version_hash=%ld request_version_hash=%ld]", - res, tablet_id, message->version_hash(), version_hash); - return OLAP_ERR_CE_CMD_PARAMS_ERROR; - } - } - - Reader reader; - ReaderParams reader_params; - reader_params.olap_table = tablet; - reader_params.reader_type = READER_CHECKSUM; - reader_params.version = Version(0, version); - - // ignore float and double type considering to precision lose - for (size_t i = 0; i < tablet->tablet_schema().size(); ++i) { - FieldType type = tablet->get_field_type_by_index(i); - if (type == OLAP_FIELD_TYPE_FLOAT || type == OLAP_FIELD_TYPE_DOUBLE) { - continue; - } - - reader_params.return_columns.push_back(i); - } - - res = reader.init(reader_params); - if (res != OLAP_SUCCESS) { - OLAP_LOG_WARNING("initiate reader fail. [res=%d]", res); - return res; - } - - RowCursor row; - res = row.init(tablet->tablet_schema(), reader_params.return_columns); - if (res != OLAP_SUCCESS) { - OLAP_LOG_WARNING("failed to init row cursor. [res=%d]", res); - return res; - } - row.allocate_memory_for_string_type(tablet->tablet_schema()); - - bool eof = false; - uint32_t row_checksum = 0; - while (true) { - OLAPStatus res = reader.next_row_with_aggregation(&row, &eof); - if (res == OLAP_SUCCESS && eof) { - VLOG(3) << "reader reads to the end."; - break; - } else if (res != OLAP_SUCCESS) { - OLAP_LOG_WARNING("fail to read in reader. [res=%d]", res); - return res; - } - - row_checksum = row.hash_code(row_checksum); - } - - LOG(INFO) << "success to finish compute checksum. checksum=" << row_checksum; - *checksum = row_checksum; - return OLAP_SUCCESS; -} - -OLAPStatus OLAPEngine::cancel_delete(const TCancelDeleteDataReq& request) { - LOG(INFO) << "begin to process cancel delete." - << "tablet=" << request.tablet_id - << ", version=" << request.version; - - DorisMetrics::cancel_delete_requests_total.increment(1); - - OLAPStatus res = OLAP_SUCCESS; - - // 1. Get all tablets with same tablet_id - list table_list; - res = get_tables_by_id(request.tablet_id, &table_list); - if (res != OLAP_SUCCESS) { - OLAP_LOG_WARNING("can't find table. [table=%ld]", request.tablet_id); - return OLAP_ERR_TABLE_NOT_FOUND; - } - - // 2. Remove delete conditions from each tablet. - DeleteConditionHandler cond_handler; - for (OLAPTablePtr temp_table : table_list) { - OLAPStatus lock_status = temp_table->try_migration_rdlock(); - if (lock_status != OLAP_SUCCESS) { - OLAP_LOG_WARNING("cancel delete failed. could not get migration lock [res=%d table=%s]", - res, temp_table->full_name().c_str()); - break; - } - temp_table->obtain_header_wrlock(); - res = cond_handler.delete_cond(temp_table, request.version, false); - if (res != OLAP_SUCCESS) { - temp_table->release_header_lock(); - temp_table->release_migration_lock(); - OLAP_LOG_WARNING("cancel delete failed. [res=%d table=%s]", - res, temp_table->full_name().c_str()); - break; - } - - res = temp_table->save_header(); - if (res != OLAP_SUCCESS) { - temp_table->release_header_lock(); - temp_table->release_migration_lock(); - OLAP_LOG_WARNING("fail to save header. [res=%d table=%s]", - res, temp_table->full_name().c_str()); - break; - } - temp_table->release_header_lock(); - temp_table->release_migration_lock(); - } - - // Show delete conditions in tablet header. - for (OLAPTablePtr table : table_list) { - cond_handler.log_conds(table); - } - - LOG(INFO) << "finish to process cancel delete. res=" << res; - return res; -} - -OLAPStatus OLAPEngine::delete_data( - const TPushReq& request, - vector* tablet_info_vec) { - LOG(INFO) << "begin to process delete data. request=" << ThriftDebugString(request); - DorisMetrics::delete_requests_total.increment(1); - - OLAPStatus res = OLAP_SUCCESS; - - if (tablet_info_vec == NULL) { - OLAP_LOG_WARNING("invalid output parameter which is null pointer."); - return OLAP_ERR_CE_CMD_PARAMS_ERROR; - } - - // 1. Get all tablets with same tablet_id - OLAPTablePtr table = get_table(request.tablet_id, request.schema_hash); - if (table.get() == NULL) { - OLAP_LOG_WARNING("can't find table. [table=%ld schema_hash=%d]", - request.tablet_id, request.schema_hash); - return OLAP_ERR_TABLE_NOT_FOUND; - } - - // 2. Process delete data by push interface - PushHandler push_handler; - if (request.__isset.transaction_id) { - res = push_handler.process_realtime_push(table, request, PUSH_FOR_DELETE, tablet_info_vec); - } else { - res = push_handler.process(table, request, PUSH_FOR_DELETE, tablet_info_vec); - } - - if (res != OLAP_SUCCESS) { - OLAP_LOG_WARNING("fail to push empty version for delete data. " - "[res=%d table='%s']", - res, table->full_name().c_str()); - DorisMetrics::delete_requests_failed.increment(1); - return res; - } - - LOG(INFO) << "finish to process delete data. res=" << res; - return res; -} - -OLAPStatus OLAPEngine::recover_tablet_until_specfic_version( - const TRecoverTabletReq& recover_tablet_req) { - OLAPTablePtr table = get_table(recover_tablet_req.tablet_id, - recover_tablet_req.schema_hash); - if (table == nullptr) { return OLAP_ERR_TABLE_NOT_FOUND; } - OLAPStatus lock_status = table->try_migration_rdlock(); - if (lock_status != OLAP_SUCCESS) { - return lock_status; - } - OLAPStatus res = OLAP_SUCCESS; - res = table->recover_tablet_until_specfic_version(recover_tablet_req.version, - recover_tablet_req.version_hash); - table->release_migration_lock(); - return res; -} - -string OLAPEngine::get_info_before_incremental_clone(OLAPTablePtr tablet, - int64_t committed_version, vector* missing_versions) { - - // get missing versions - tablet->obtain_header_rdlock(); - tablet->get_missing_versions_with_header_locked(committed_version, missing_versions); - - // get least complete version - // prevent lastest version not replaced (if need to rewrite) after node restart - const PDelta* least_complete_version = tablet->least_complete_version(*missing_versions); - if (least_complete_version != NULL) { - // TODO: Used in upgraded. If old Doris version, version can be converted. - Version version(least_complete_version->start_version(), least_complete_version->end_version()); - missing_versions->push_back(version); - LOG(INFO) << "least complete version for incremental clone. table=" << tablet->full_name() - << ", least_complete_version=" << least_complete_version->end_version(); - } - - tablet->release_header_lock(); - LOG(INFO) << "finish to calculate missing versions when clone. [table=" << tablet->full_name() - << ", committed_version=" << committed_version << " missing_versions_size=" << missing_versions->size() << "]"; - - // get download path - return tablet->tablet_path() + CLONE_PREFIX; -} - -OLAPStatus OLAPEngine::finish_clone(OLAPTablePtr tablet, const string& clone_dir, - int64_t committed_version, bool is_incremental_clone) { - OLAPStatus res = OLAP_SUCCESS; - vector linked_success_files; - - // clone and compaction operation should be performed sequentially - tablet->obtain_base_compaction_lock(); - tablet->obtain_cumulative_lock(); - - tablet->obtain_push_lock(); - tablet->obtain_header_wrlock(); - do { - // check clone dir existed - if (!check_dir_existed(clone_dir)) { - res = OLAP_ERR_DIR_NOT_EXIST; - OLAP_LOG_WARNING("clone dir not existed when clone. [clone_dir=%s]", - clone_dir.c_str()); - break; - } - - // load src header - string clone_header_file = clone_dir + "/" + std::to_string(tablet->tablet_id()) + ".hdr"; - OLAPHeader clone_header(clone_header_file); - if ((res = clone_header.load_and_init()) != OLAP_SUCCESS) { - OLAP_LOG_WARNING("fail to load src header when clone. [clone_header_file=%s]", - clone_header_file.c_str()); - break; - } - - // check all files in /clone and /tablet - set clone_files; - if ((res = dir_walk(clone_dir, NULL, &clone_files)) != OLAP_SUCCESS) { - LOG(WARNING) << "failed to dir walk when clone. [clone_dir=" << clone_dir << "]"; - break; - } - - set local_files; - string tablet_dir = tablet->tablet_path(); - if ((res = dir_walk(tablet_dir, NULL, &local_files)) != OLAP_SUCCESS) { - LOG(WARNING) << "failed to dir walk when clone. [tablet_dir=" << tablet_dir << "]"; - break; - } - - std::vector files_to_check; - for (auto& clone_file : clone_files) { - if (local_files.find(clone_file) != local_files.end()) { - string clone_path = tablet_dir + "/" + clone_file; - files_to_check.push_back(clone_path); - } - } - revoke_files_from_gc(files_to_check); - // get the local files again - // because the original local files maybe be deleted by gc before check is done - local_files.clear(); - if ((res = dir_walk(tablet_dir, NULL, &local_files)) != OLAP_SUCCESS) { - LOG(WARNING) << "failed to dir walk when clone. [tablet_dir=" << tablet_dir << "]"; - break; - } - - // link files from clone dir, if file exists, skip it - for (const string& clone_file : clone_files) { - if (local_files.find(clone_file) != local_files.end()) { - LOG(INFO) << "find same file when clone, skip it. " - << "tablet=" << tablet->full_name() - << ", clone_file=" << clone_file; - continue; - } - - string from = clone_dir + "/" + clone_file; - string to = tablet_dir + "/" + clone_file; - LOG(INFO) << "src file:" << from << "dest file:" << to; - if (link(from.c_str(), to.c_str()) != 0) { - LOG(WARNING) << "fail to create hard link when clone." - << "[from=" << from - << " to=" << to << "]"; - res = OLAP_ERR_OS_ERROR; - break; - } - linked_success_files.emplace_back(std::move(to)); - } - - if (res != OLAP_SUCCESS) { - break; - } - - if (is_incremental_clone) { - res = OLAPEngine::get_instance()->clone_incremental_data( - tablet, clone_header, committed_version); - } else { - res = OLAPEngine::get_instance()->clone_full_data(tablet, clone_header); - } - - // if full clone success, need to update cumulative layer point - if (!is_incremental_clone && res == OLAP_SUCCESS) { - tablet->set_cumulative_layer_point(clone_header.cumulative_layer_point()); - } - - } while (0); - - // clear linked files if errors happen - if (res != OLAP_SUCCESS) { - remove_files(linked_success_files); - } - tablet->release_header_lock(); - tablet->release_push_lock(); - - tablet->release_cumulative_lock(); - tablet->release_base_compaction_lock(); - - // clear clone dir - boost::filesystem::path clone_dir_path(clone_dir); - boost::filesystem::remove_all(clone_dir_path); - LOG(INFO) << "finish to clone data, clear downloaded data. res=" << res - << ", tablet=" << tablet->full_name() - << ", clone_dir=" << clone_dir; - return res; -} - -OLAPStatus OLAPEngine::obtain_shard_path_by_hash( - int64_t path_hash, std::string* shard_path, OlapStore** store) { - LOG(INFO) << "begin to process obtain root path by hash: " << path_hash; - OLAPStatus res = OLAP_SUCCESS; - - auto the_store = OLAPEngine::get_instance()->get_store(path_hash); - if (the_store == nullptr) { - LOG(WARNING) << "failed to get store by path hash: " << path_hash; - return OLAP_REQUEST_FAILED; - } - - uint64_t shard = 0; - res = the_store->get_shard(&shard); - if (res != OLAP_SUCCESS) { - LOG(WARNING) << "fail to get root path shard. res: " << res; - return res; - } - - stringstream root_path_stream; - root_path_stream << the_store->path() << DATA_PREFIX << "/" << shard; - *shard_path = root_path_stream.str(); - *store = the_store; - - LOG(INFO) << "success to process obtain root path by hash. path: " - << shard_path; - return res; -} - -OLAPStatus OLAPEngine::obtain_shard_path( - TStorageMedium::type storage_medium, std::string* shard_path, OlapStore** store) { - LOG(INFO) << "begin to process obtain root path. storage_medium=" << storage_medium; - OLAPStatus res = OLAP_SUCCESS; - - if (shard_path == NULL) { - OLAP_LOG_WARNING("invalid output parameter which is null pointer."); - return OLAP_ERR_CE_CMD_PARAMS_ERROR; - } - - auto stores = OLAPEngine::get_instance()->get_stores_for_create_table(storage_medium); - if (stores.empty()) { - OLAP_LOG_WARNING("no available disk can be used to create table."); - return OLAP_ERR_NO_AVAILABLE_ROOT_PATH; - } - - uint64_t shard = 0; - res = stores[0]->get_shard(&shard); - if (res != OLAP_SUCCESS) { - OLAP_LOG_WARNING("fail to get root path shard. [res=%d]", res); - return res; - } - - stringstream root_path_stream; - root_path_stream << stores[0]->path() << DATA_PREFIX << "/" << shard; - *shard_path = root_path_stream.str(); - *store = stores[0]; - - LOG(INFO) << "success to process obtain root path. path=" << shard_path; - return res; -} - -OLAPStatus OLAPEngine::load_header( - const string& shard_path, - const TCloneReq& request) { - LOG(INFO) << "begin to process load headers." - << "tablet_id=" << request.tablet_id - << ", schema_hash=" << request.schema_hash; - OLAPStatus res = OLAP_SUCCESS; - - OlapStore* store = nullptr; - { - // TODO(zc) - try { - auto store_path = - boost::filesystem::path(shard_path).parent_path().parent_path().string(); - store = OLAPEngine::get_instance()->get_store(store_path); - if (store == nullptr) { - LOG(WARNING) << "invalid shard path, path=" << shard_path; - return OLAP_ERR_INVALID_ROOT_PATH; - } - } catch (...) { - LOG(WARNING) << "invalid shard path, path=" << shard_path; - return OLAP_ERR_INVALID_ROOT_PATH; - } - } - - stringstream schema_hash_path_stream; - schema_hash_path_stream << shard_path - << "/" << request.tablet_id - << "/" << request.schema_hash; - res = OLAPEngine::get_instance()->load_one_tablet( - store, - request.tablet_id, request.schema_hash, - schema_hash_path_stream.str()); - if (res != OLAP_SUCCESS) { - OLAP_LOG_WARNING("fail to process load headers. [res=%d]", res); - return res; - } - - LOG(INFO) << "success to process load headers."; - return res; -} - -OLAPStatus OLAPEngine::load_header( - OlapStore* store, - const string& shard_path, - TTabletId tablet_id, - TSchemaHash schema_hash) { - LOG(INFO) << "begin to process load headers. tablet_id=" << tablet_id - << "schema_hash=" << schema_hash; - OLAPStatus res = OLAP_SUCCESS; - - stringstream schema_hash_path_stream; - schema_hash_path_stream << shard_path - << "/" << tablet_id - << "/" << schema_hash; - res = OLAPEngine::get_instance()->load_one_tablet( - store, - tablet_id, schema_hash, - schema_hash_path_stream.str()); - if (res != OLAP_SUCCESS) { - OLAP_LOG_WARNING("fail to process load headers. [res=%d]", res); - return res; - } - - LOG(INFO) << "success to process load headers."; - return res; -} - -OLAPStatus OLAPEngine::clear_alter_task(const TTabletId tablet_id, - const TSchemaHash schema_hash) { - LOG(INFO) << "begin to process clear alter task. tablet_id=" << tablet_id - << ", schema_hash=" << schema_hash; - OLAPTablePtr tablet = get_table(tablet_id, schema_hash); - if (tablet.get() == NULL) { - OLAP_LOG_WARNING("can't find tablet when process clear alter task. ", - "[tablet_id=%ld, schema_hash=%d]", tablet_id, schema_hash); - return OLAP_SUCCESS; - } - - // get schema change info - AlterTabletType type; - TTabletId related_tablet_id; - TSchemaHash related_schema_hash; - vector schema_change_versions; - tablet->obtain_header_rdlock(); - bool ret = tablet->get_schema_change_request( - &related_tablet_id, &related_schema_hash, &schema_change_versions, &type); - tablet->release_header_lock(); - if (!ret) { - return OLAP_SUCCESS; - } else if (!schema_change_versions.empty()) { - OLAP_LOG_WARNING("find alter task unfinished when process clear alter task. ", - "[tablet=%s versions_to_change_size=%d]", - tablet->full_name().c_str(), schema_change_versions.size()); - return OLAP_ERR_PREVIOUS_SCHEMA_CHANGE_NOT_FINISHED; - } - - // clear schema change info - tablet->obtain_header_wrlock(); - tablet->clear_schema_change_request(); - OLAPStatus res = tablet->save_header(); - if (res != OLAP_SUCCESS) { - LOG(FATAL) << "fail to save header. [res=" << res << " tablet='" << tablet->full_name() << "']"; - } else { - LOG(INFO) << "clear alter task on tablet. [tablet='" << tablet->full_name() << "']"; - } - tablet->release_header_lock(); - - // clear related tablet's schema change info - OLAPTablePtr related_table = get_table(related_tablet_id, related_schema_hash); - if (related_table.get() == NULL) { - OLAP_LOG_WARNING("related table not found when process clear alter task. " - "[tablet_id=%ld schema_hash=%d " - "related_tablet_id=%ld related_schema_hash=%d]", - tablet_id, schema_hash, related_tablet_id, related_schema_hash); - } else { - related_table->obtain_header_wrlock(); - related_table->clear_schema_change_request(); - res = related_table->save_header(); - if (res != OLAP_SUCCESS) { - LOG(FATAL) << "fail to save header. [res=" << res << " tablet='" - << related_table->full_name() << "']"; - } else { - LOG(INFO) << "clear alter task on tablet. [tablet='" << related_table->full_name() << "']"; - } - related_table->release_header_lock(); - } - - LOG(INFO) << "finish to process clear alter task." - << "tablet_id=" << related_tablet_id - << ", schema_hash=" << related_schema_hash; - return OLAP_SUCCESS; -} - -OLAPStatus OLAPEngine::push( - const TPushReq& request, - vector* tablet_info_vec) { - OLAPStatus res = OLAP_SUCCESS; - LOG(INFO) << "begin to process push. tablet_id=" << request.tablet_id - << ", version=" << request.version; - - if (tablet_info_vec == NULL) { - OLAP_LOG_WARNING("invalid output parameter which is null pointer."); - DorisMetrics::push_requests_fail_total.increment(1); - return OLAP_ERR_CE_CMD_PARAMS_ERROR; - } - - OLAPTablePtr olap_table = OLAPEngine::get_instance()->get_table( - request.tablet_id, request.schema_hash); - if (NULL == olap_table.get()) { - OLAP_LOG_WARNING("false to find table. [table=%ld schema_hash=%d]", - request.tablet_id, request.schema_hash); - DorisMetrics::push_requests_fail_total.increment(1); - return OLAP_ERR_TABLE_NOT_FOUND; - } - - PushType type = PUSH_NORMAL; - if (request.push_type == TPushType::LOAD_DELETE) { - type = PUSH_FOR_LOAD_DELETE; - } - - int64_t duration_ns = 0; - PushHandler push_handler; - OLAPStatus lock_status = olap_table->try_migration_rdlock(); - if (lock_status != OLAP_SUCCESS) { - res = lock_status; - } else { - if (request.__isset.transaction_id) { - { - SCOPED_RAW_TIMER(&duration_ns); - res = push_handler.process_realtime_push(olap_table, request, type, tablet_info_vec); - } - } else { - { - SCOPED_RAW_TIMER(&duration_ns); - res = push_handler.process(olap_table, request, type, tablet_info_vec); - } - } - olap_table->release_migration_lock(); - } - - if (res != OLAP_SUCCESS) { - LOG(WARNING) << "fail to push delta, table=" << olap_table->full_name().c_str() - << ",cost=" << PrettyPrinter::print(duration_ns, TUnit::TIME_NS); - DorisMetrics::push_requests_fail_total.increment(1); - } else { - LOG(INFO) << "success to push delta, table=" << olap_table->full_name().c_str() - << ",cost=" << PrettyPrinter::print(duration_ns, TUnit::TIME_NS); - DorisMetrics::push_requests_success_total.increment(1); - DorisMetrics::push_request_duration_us.increment(duration_ns / 1000); - DorisMetrics::push_request_write_bytes.increment(push_handler.write_bytes()); - DorisMetrics::push_request_write_rows.increment(push_handler.write_rows()); - } - return res; -} - -} // namespace doris diff --git a/be/src/olap/olap_engine.h b/be/src/olap/olap_engine.h deleted file mode 100644 index c8cc693f99aa48..00000000000000 --- a/be/src/olap/olap_engine.h +++ /dev/null @@ -1,636 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, -// software distributed under the License is distributed on an -// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -// KIND, either express or implied. See the License for the -// specific language governing permissions and limitations -// under the License. - -#ifndef DORIS_BE_SRC_OLAP_OLAP_ENGINE_H -#define DORIS_BE_SRC_OLAP_OLAP_ENGINE_H - -#include -#include -#include -#include -#include -#include -#include -#include -#include - -#include -#include - -#include "agent/status.h" -#include "common/status.h" -#include "gen_cpp/AgentService_types.h" -#include "gen_cpp/BackendService_types.h" -#include "gen_cpp/MasterService_types.h" -#include "olap/atomic.h" -#include "olap/lru_cache.h" -#include "olap/olap_common.h" -#include "olap/olap_define.h" -#include "olap/olap_table.h" -#include "olap/olap_meta.h" -#include "olap/options.h" - -namespace doris { - -class OLAPTable; -class OlapStore; - -struct RootPathInfo { - RootPathInfo(): - capacity(1), - available(0), - data_used_capacity(0), - is_used(false) { } - - std::string path; - int64_t path_hash; - int64_t capacity; // 总空间,单位字节 - int64_t available; // 可用空间,单位字节 - int64_t data_used_capacity; - bool is_used; // 是否可用标识 - TStorageMedium::type storage_medium; // 存储介质类型:SSD|HDD -}; - -// OLAPEngine singleton to manage all Table pointers. -// Providing add/drop/get operations. -// OLAPEngine instance doesn't own the Table resources, just hold the pointer, -// allocation/deallocation must be done outside. -class OLAPEngine { -public: - OLAPEngine() { } - OLAPEngine(const EngineOptions& options); - ~OLAPEngine(); - - static Status open(const EngineOptions& options, OLAPEngine** engine_ptr); - - static void set_instance(OLAPEngine* engine) { - _s_instance = engine; - } - - static OLAPEngine *get_instance() { - return _s_instance; - } - - // Get table pointer - // TODO(cmy): I think it is better to return Status instead of OLAPTablePtr, - // so that the caller can decide what to do next based on Status. - // Currently, I just add a new parameter 'err' to save the error msg. - // This should be redesigned later. - OLAPTablePtr get_table(TTabletId tablet_id, SchemaHash schema_hash, bool load_table = true, std::string* err = nullptr); - - OLAPStatus get_tables_by_id(TTabletId tablet_id, std::list* table_list); - - bool check_tablet_id_exist(TTabletId tablet_id); - - OLAPStatus create_table(const TCreateTabletReq& request); - - // Create new table for OLAPEngine - // - // Return OLAPTable * succeeded; Otherwise, return NULL if failed - OLAPTablePtr create_table(const TCreateTabletReq& request, - const std::string* ref_root_path, - const bool is_schema_change_table, - const OLAPTablePtr ref_olap_table); - - // Add a table pointer to OLAPEngine - // If force, drop the existing table add this new one - // - // Return OLAP_SUCCESS, if run ok - // OLAP_ERR_TABLE_INSERT_DUPLICATION_ERROR, if find duplication - // OLAP_ERR_NOT_INITED, if not inited - OLAPStatus add_table(TTabletId tablet_id, SchemaHash schema_hash, - const OLAPTablePtr& table, bool force = false); - - OLAPStatus add_transaction(TPartitionId partition_id, TTransactionId transaction_id, - TTabletId tablet_id, SchemaHash schema_hash, - const PUniqueId& load_id); - - void delete_transaction(TPartitionId partition_id, TTransactionId transaction_id, - TTabletId tablet_id, SchemaHash schema_hash, - bool delete_from_tablet = true); - - void get_transactions_by_tablet(OLAPTablePtr tablet, int64_t* partition_id, - std::set* transaction_ids); - - bool has_transaction(TPartitionId partition_id, TTransactionId transaction_id, - TTabletId tablet_id, SchemaHash schema_hash); - - OLAPStatus publish_version(const TPublishVersionRequest& publish_version_req, - std::vector* error_tablet_ids); - - void clear_transaction_task(const TTransactionId transaction_id, - const std::vector partition_ids); - - OLAPStatus clone_incremental_data(OLAPTablePtr tablet, OLAPHeader& clone_header, - int64_t committed_version); - - OLAPStatus clone_full_data(OLAPTablePtr tablet, OLAPHeader& clone_header); - - // Add empty data for OLAPTable - // - // Return OLAP_SUCCESS, if run ok - OLAPStatus create_init_version( - TTabletId tablet_id, SchemaHash schema_hash, - Version version, VersionHash version_hash); - - // Drop a table by description - // If set keep_files == true, files will NOT be deleted when deconstruction. - // Return OLAP_SUCCESS, if run ok - // OLAP_ERR_TABLE_DELETE_NOEXIST_ERROR, if table not exist - // OLAP_ERR_NOT_INITED, if not inited - OLAPStatus drop_table( - TTabletId tablet_id, SchemaHash schema_hash, bool keep_files = false); - - // Drop table directly with check schema change info. - OLAPStatus _drop_table_directly(TTabletId tablet_id, TSchemaHash schema_hash, bool keep_files = false); - OLAPStatus _drop_table_directly_unlocked(TTabletId tablet_id, TSchemaHash schema_hash, bool keep_files = false); - - OLAPStatus drop_tables_on_error_root_path(const std::vector& tablet_info_vec); - - // Prevent schema change executed concurrently. - bool try_schema_change_lock(TTabletId tablet_id); - void release_schema_change_lock(TTabletId tablet_id); - - // 获取所有tables的名字 - // - // Return OLAP_SUCCESS, if run ok - // OLAP_ERR_INPUT_PARAMETER_ERROR, if tables is null - OLAPStatus report_tablet_info(TTabletInfo* tablet_info); - OLAPStatus report_all_tablets_info(std::map* tablets_info); - - void get_tablet_stat(TTabletStatResult& result); - - // Instance should be inited from create_instance - // MUST NOT be called in other circumstances. - OLAPStatus open(); - - // Clear status(tables, ...) - OLAPStatus clear(); - - void start_clean_fd_cache(); - void perform_cumulative_compaction(OlapStore* store); - void perform_base_compaction(OlapStore* store); - - // 获取cache的使用情况信息 - void get_cache_status(rapidjson::Document* document) const; - - void check_none_row_oriented_table(const std::vector& stores); - OLAPStatus check_none_row_oriented_table_in_path( - OlapStore* store, TTabletId tablet_id, - SchemaHash schema_hash, const std::string& schema_hash_path); - OLAPStatus _check_none_row_oriented_table_in_store(OlapStore* store); - - // Note: 这里只能reload原先已经存在的root path,即re-load启动时就登记的root path - // 是允许的,但re-load全新的path是不允许的,因为此处没有彻底更新ce调度器信息 - void load_stores(const std::vector& stores); - - OLAPStatus load_one_tablet(OlapStore* store, - TTabletId tablet_id, - SchemaHash schema_hash, - const std::string& schema_hash_path, - bool force = false); - - Cache* index_stream_lru_cache() { - return _index_stream_lru_cache; - } - - // 清理trash和snapshot文件,返回清理后的磁盘使用量 - OLAPStatus start_trash_sweep(double *usage); - - template - std::vector get_stores(); - Status set_cluster_id(int32_t cluster_id); - - // @brief 设置root_path是否可用 - void set_store_used_flag(const std::string& root_path, bool is_used); - - // @brief 获取所有root_path信息 - OLAPStatus get_all_root_path_info(std::vector* root_paths_info); - - void get_all_available_root_path(std::vector* available_paths); - - OLAPStatus register_table_into_root_path(OLAPTable* olap_table); - - // 磁盘状态监测。监测unused_flag路劲新的对应root_path unused标识位, - // 当检测到有unused标识时,从内存中删除对应表信息,磁盘数据不动。 - // 当磁盘状态为不可用,但未检测到unused标识时,需要从root_path上 - // 重新加载数据。 - void start_disk_stat_monitor(); - - // get root path for creating table. The returned vector of root path should be random, - // for avoiding that all the table would be deployed one disk. - std::vector get_stores_for_create_table( - TStorageMedium::type storage_medium); - OlapStore* get_store(const std::string& path); - OlapStore* get_store(int64_t path_hash); - - uint32_t available_storage_medium_type_count() { - return _available_storage_medium_type_count; - } - - int32_t effective_cluster_id() const { - return _effective_cluster_id; - } - - uint32_t get_file_system_count() { - return _store_map.size(); - } - - // @brief 创建snapshot - // @param tablet_id [in] 原表的id - // @param schema_hash [in] 原表的schema,与tablet_id参数合起来唯一确定一张表 - // @param snapshot_path [out] 新生成的snapshot的路径 - OLAPStatus make_snapshot( - const TSnapshotRequest& request, - std::string* snapshot_path); - - // @brief 释放snapshot - // @param snapshot_path [in] 要被释放的snapshot的路径,只包含到ID - OLAPStatus release_snapshot(const std::string& snapshot_path); - - // @brief 迁移数据,从一种存储介质到另一种存储介质 - OLAPStatus storage_medium_migrate( - TTabletId tablet_id, - TSchemaHash schema_hash, - TStorageMedium::type storage_medium); - - void start_delete_unused_index(); - - void add_unused_index(SegmentGroup* olap_index); - - // check whether files are in gc's unused files - // revoke them from the unused files if they exists - void revoke_files_from_gc(const std::vector& files_to_check); - - // ######################### ALTER TABLE BEGIN ######################### - // The following interfaces are all about alter tablet operation, - // the main logical is that generating a new tablet with different - // schema on base tablet. - - // Create rollup tablet on base tablet, after create_rollup_table, - // both base tablet and new tablet is effective. - // - // @param [in] request specify base tablet, new tablet and its schema - // @return OLAP_SUCCESS if submit success - OLAPStatus create_rollup_table(const TAlterTabletReq& request); - - // Do schema change on tablet, OLAPEngine support - // add column, drop column, alter column type and order, - // after schema_change, base tablet is abandoned. - // Note that the two tablets has same tablet_id but different schema_hash - // - // @param [in] request specify base tablet, new tablet and its schema - // @return OLAP_SUCCESS if submit success - OLAPStatus schema_change(const TAlterTabletReq& request); - - // Show status of all alter table operation. - // - // @param [in] tablet_id & schema_hash specify a tablet - // @return alter table status - AlterTableStatus show_alter_table_status(TTabletId tablet_id, TSchemaHash schema_hash); - - OLAPStatus compute_checksum( - TTabletId tablet_id, - TSchemaHash schema_hash, - TVersion version, - TVersionHash version_hash, - uint32_t* checksum); - - OLAPStatus cancel_delete(const TCancelDeleteDataReq& request); - - // Delete data of specified tablet according to delete conditions, - // once delete_data command submit success, deleted data is not visible, - // but not actually deleted util delay_delete_time run out. - // - // @param [in] request specify tablet and delete conditions - // @param [out] tablet_info_vec return tablet lastest status, which - // include version info, row count, data size, etc - // @return OLAP_SUCCESS if submit delete_data success - virtual OLAPStatus delete_data( - const TPushReq& request, - std::vector* tablet_info_vec); - - OLAPStatus recover_tablet_until_specfic_version( - const TRecoverTabletReq& recover_tablet_req); - - // before doing incremental clone, - // need to calculate tablet's download dir and tablet's missing versions - virtual std::string get_info_before_incremental_clone(OLAPTablePtr tablet, - int64_t committed_version, std::vector* missing_versions); - - virtual OLAPStatus finish_clone(OLAPTablePtr tablet, const std::string& clone_dir, - int64_t committed_version, bool is_incremental_clone); - - - // Obtain the path by specified path hash - virtual OLAPStatus obtain_shard_path_by_hash( - int64_t path_hash, - std::string* shared_path, - OlapStore** store); - - // Obtain shard path for new tablet. - // - // @param [out] shard_path choose an available root_path to clone new tablet - // @return error code - virtual OLAPStatus obtain_shard_path( - TStorageMedium::type storage_medium, - std::string* shared_path, - OlapStore** store); - - // Load new tablet to make it effective. - // - // @param [in] root_path specify root path of new tablet - // @param [in] request specify new tablet info - // @return OLAP_SUCCESS if load tablet success - virtual OLAPStatus load_header( - const std::string& shard_path, const TCloneReq& request); - virtual OLAPStatus load_header( - OlapStore* store, - const std::string& shard_path, - TTabletId tablet_id, - TSchemaHash schema_hash); - - OLAPStatus clear_alter_task(const TTabletId tablet_id, - const TSchemaHash schema_hash); - OLAPStatus push( - const TPushReq& request, - std::vector* tablet_info_vec); - - // call this if you want to trigger a disk and tablet report - void report_notify(bool is_all) { - is_all ? _report_cv.notify_all() : _report_cv.notify_one(); - } - - // call this to wait a report notification until timeout - void wait_for_report_notify(int64_t timeout_sec, bool is_tablet_report) { - std::unique_lock lk(_report_mtx); - auto cv_status = _report_cv.wait_for(lk, std::chrono::seconds(timeout_sec)); - if (cv_status == std::cv_status::no_timeout) { - is_tablet_report ? _is_report_olap_table_already = true : - _is_report_disk_state_already = true; - } - } - -private: - OLAPStatus check_all_root_path_cluster_id(); - - bool _used_disk_not_enough(uint32_t unused_num, uint32_t total_num); - - OLAPStatus _get_path_available_capacity( - const std::string& root_path, - int64_t* disk_available); - - OLAPStatus _config_root_path_unused_flag_file( - const std::string& root_path, - std::string* unused_flag_file); - - void _delete_tables_on_unused_root_path(); - - void _update_storage_medium_type_count(); - - OLAPStatus _judge_and_update_effective_cluster_id(int32_t cluster_id); - - OLAPStatus _calc_snapshot_id_path( - const OLAPTablePtr& olap_table, - std::string* out_path); - - std::string _get_schema_hash_full_path( - const OLAPTablePtr& ref_olap_table, - const std::string& location) const; - - std::string _get_header_full_path( - const OLAPTablePtr& ref_olap_table, - const std::string& schema_hash_path) const; - - void _update_header_file_info( - const std::vector& shortest_version_entity, - OLAPHeader* header); - - OLAPStatus _link_index_and_data_files( - const std::string& header_path, - const OLAPTablePtr& ref_olap_table, - const std::vector& version_entity_vec); - - OLAPStatus _copy_index_and_data_files( - const std::string& header_path, - const OLAPTablePtr& ref_olap_table, - std::vector& version_entity_vec); - - OLAPStatus _create_snapshot_files( - const OLAPTablePtr& ref_olap_table, - const TSnapshotRequest& request, - std::string* snapshot_path); - - OLAPStatus _create_incremental_snapshot_files( - const OLAPTablePtr& ref_olap_table, - const TSnapshotRequest& request, - std::string* snapshot_path); - - OLAPStatus _prepare_snapshot_dir(const OLAPTablePtr& ref_olap_table, - std::string* snapshot_id_path); - - OLAPStatus _append_single_delta( - const TSnapshotRequest& request, - OlapStore* store); - - std::string _construct_index_file_path( - const std::string& tablet_path_prefix, - const Version& version, - VersionHash version_hash, - int32_t segment_group_id, int32_t segment) const; - - std::string _construct_data_file_path( - const std::string& tablet_path_prefix, - const Version& version, - VersionHash version_hash, - int32_t segment_group_id, int32_t segment) const; - - OLAPStatus _generate_new_header( - OlapStore* store, - const uint64_t new_shard, - const OLAPTablePtr& tablet, - const std::vector& version_entity_vec, OLAPHeader* new_olap_header); - - OLAPStatus _create_hard_link(const std::string& from_path, const std::string& to_path); - - OLAPStatus _start_bg_worker(); - - OLAPStatus _create_init_version(OLAPTablePtr olap_table, const TCreateTabletReq& request); - -private: - struct TableInstances { - Mutex schema_change_lock; - std::list table_arr; - }; - - enum CompactionType { - BASE_COMPACTION = 1, - CUMULATIVE_COMPACTION = 2 - }; - - struct CompactionCandidate { - CompactionCandidate(uint32_t nicumulative_compaction_, int64_t tablet_id_, uint32_t index_) : - nice(nicumulative_compaction_), tablet_id(tablet_id_), disk_index(index_) {} - uint32_t nice; // 优先度 - int64_t tablet_id; - uint32_t disk_index = -1; - }; - - struct CompactionCandidateComparator { - bool operator()(const CompactionCandidate& a, const CompactionCandidate& b) { - return a.nice > b.nice; - } - }; - - struct CompactionDiskStat { - CompactionDiskStat(std::string path, uint32_t index, bool used) : - storage_path(path), - disk_index(index), - task_running(0), - task_remaining(0), - is_used(used){} - const std::string storage_path; - const uint32_t disk_index; - uint32_t task_running; - uint32_t task_remaining; - bool is_used; - }; - - typedef std::map tablet_map_t; - typedef std::map file_system_task_count_t; - - OLAPTablePtr _get_table_with_no_lock(TTabletId tablet_id, SchemaHash schema_hash); - - // 遍历root所指定目录, 通过dirs返回此目录下所有有文件夹的名字, files返回所有文件的名字 - OLAPStatus _dir_walk(const std::string& root, - std::set* dirs, - std::set* files); - - // 扫描目录, 加载表 - OLAPStatus _load_store(OlapStore* store); - - OLAPStatus _create_new_table_header(const TCreateTabletReq& request, - OlapStore* store, - const bool is_schema_change_table, - const OLAPTablePtr ref_olap_table, - OLAPHeader* header); - - OLAPStatus _check_existed_or_else_create_dir(const std::string& path); - - OLAPTablePtr _find_best_tablet_to_compaction(CompactionType compaction_type, OlapStore* store); - bool _can_do_compaction(OLAPTablePtr table); - - void _cancel_unfinished_schema_change(); - - OLAPStatus _do_sweep( - const std::string& scan_root, const time_t& local_tm_now, const uint32_t expire); - - void _build_tablet_info(OLAPTablePtr olap_table, TTabletInfo* tablet_info); - void _build_tablet_stat(); - - EngineOptions _options; - std::mutex _store_lock; - std::map _store_map; - uint32_t _available_storage_medium_type_count; - - int32_t _effective_cluster_id; - bool _is_all_cluster_id_exist; - bool _is_drop_tables; - - // 错误磁盘所在百分比,超过设定的值,则engine需要退出运行 - uint32_t _min_percentage_of_error_disk; - - RWMutex _tablet_map_lock; - tablet_map_t _tablet_map; - RWMutex _transaction_tablet_map_lock; - using TxnKey = std::pair; // partition_id, transaction_id; - std::map>> _transaction_tablet_map; - size_t _global_table_id; - Cache* _file_descriptor_lru_cache; - Cache* _index_stream_lru_cache; - uint32_t _max_base_compaction_task_per_disk; - uint32_t _max_cumulative_compaction_task_per_disk; - - Mutex _fs_task_mutex; - file_system_task_count_t _fs_base_compaction_task_num_map; - std::vector _cumulative_compaction_candidate; - - // cache to save tablets' statistics, such as data size and row - // TODO(cmy): for now, this is a naive implementation - std::map _tablet_stat_cache; - std::mutex _tablet_stat_mutex; - // last update time of tablet stat cache - int64_t _tablet_stat_cache_update_time_ms; - - static OLAPEngine* _s_instance; - - // snapshot - Mutex _snapshot_mutex; - uint64_t _snapshot_base_id; - - std::unordered_map> _gc_files; - Mutex _gc_mutex; - - // Thread functions - - // base compaction thread process function - void* _base_compaction_thread_callback(void* arg, OlapStore* store); - - // garbage sweep thread process function. clear snapshot and trash folder - void* _garbage_sweeper_thread_callback(void* arg); - - // delete table with io error process function - void* _disk_stat_monitor_thread_callback(void* arg); - - // unused index process function - void* _unused_index_thread_callback(void* arg); - - // cumulative process function - void* _cumulative_compaction_thread_callback(void* arg, OlapStore* store); - - // clean file descriptors cache - void* _fd_cache_clean_callback(void* arg); - - // thread to monitor snapshot expiry - std::thread _garbage_sweeper_thread; - - // thread to monitor disk stat - std::thread _disk_stat_monitor_thread; - - // thread to monitor unused index - std::thread _unused_index_thread; - - // thread to run base compaction - std::vector _base_compaction_threads; - - // thread to check cumulative - std::vector _cumulative_compaction_threads; - - std::thread _fd_cache_clean_thread; - - static atomic_t _s_request_number; - - // for tablet and disk report - std::mutex _report_mtx; - std::condition_variable _report_cv; - std::atomic_bool _is_report_disk_state_already; - std::atomic_bool _is_report_olap_table_already; -}; - -} // namespace doris - -#endif // DORIS_BE_SRC_OLAP_OLAP_ENGINE_H diff --git a/be/src/olap/olap_header.cpp b/be/src/olap/olap_header.cpp deleted file mode 100644 index 08f31f33c5a552..00000000000000 --- a/be/src/olap/olap_header.cpp +++ /dev/null @@ -1,1128 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, -// software distributed under the License is distributed on an -// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -// KIND, either express or implied. See the License for the -// specific language governing permissions and limitations -// under the License. - -#include "olap/olap_header.h" - -#include -#include -#include -#include -#include -#include -#include - -#include "olap/field.h" -#include "olap/wrapper_field.h" -#include "olap/file_helper.h" -#include "olap/utils.h" - -using google::protobuf::RepeatedPtrField; -using std::ifstream; -using std::ios; -using std::list; -using std::make_pair; -using std::ofstream; -using std::queue; -using std::sort; -using std::string; -using std::stringstream; -using std::unordered_map; -using std::vector; - -namespace doris { -// related static functions of version graph - -// Construct version graph(using adjacency list) from header's information. -static OLAPStatus construct_version_graph( - const RepeatedPtrField& versions_in_header, - vector* version_graph, - unordered_map* vertex_helper_map); - -// Clear version graph and vertex_helper_map, release memory hold by version_graph. -static OLAPStatus clear_version_graph(vector* version_graph, - unordered_map* vertex_helper_map); - -// Add version to graph, it is called near the end of add_version -static OLAPStatus add_version_to_graph(const Version& version, - vector* version_graph, - unordered_map* vertex_helper_map); - -// Delete version from graph, it is called near the end of delete_version -static OLAPStatus delete_version_from_graph( - const RepeatedPtrField& versions_in_header, - const Version& version, - vector* version_graph, - unordered_map* vertex_helper_map); - -// Add vertex to graph, if vertex already exists, still return SUCCESS. -static OLAPStatus add_vertex_to_graph(int vertex_value, - vector* version_graph, - unordered_map* vertex_helper_map); - -OLAPHeader::~OLAPHeader() { - // Release memory of version graph. - clear_version_graph(&_version_graph, &_vertex_helper_map); - Clear(); -} - -void OLAPHeader::change_file_version_to_delta() { - // convert FileVersionMessage to PDelta and PSegmentGroup in initialization. - // FileVersionMessage is used in previous code, and PDelta and PSegmentGroup - // is used in streaming load branch. - for (int i = 0; i < file_version_size(); ++i) { - PDelta* delta = add_delta(); - _convert_file_version_to_delta(file_version(i), delta); - } - - clear_file_version(); -} - -OLAPStatus OLAPHeader::init() { - clear_version_graph(&_version_graph, &_vertex_helper_map); - if (construct_version_graph(delta(), - &_version_graph, - &_vertex_helper_map) != OLAP_SUCCESS) { - OLAP_LOG_WARNING("fail to construct version graph."); - return OLAP_ERR_OTHER_ERROR; - } - if (_file_name == "") { - stringstream file_stream; - file_stream << tablet_id() << ".hdr"; - _file_name = file_stream.str(); - } - return OLAP_SUCCESS; -} - -OLAPStatus OLAPHeader::load_and_init() { - // check the tablet_path is not empty - if (_file_name == "") { - LOG(WARNING) << "file_path is empty for header"; - return OLAP_ERR_DIR_NOT_EXIST; - } - - FileHeader file_header; - FileHandler file_handler; - - if (file_handler.open(_file_name.c_str(), O_RDONLY) != OLAP_SUCCESS) { - LOG(WARNING) << "fail to open index file. [file='" << _file_name << "']"; - return OLAP_ERR_IO_ERROR; - } - - // In file_header.unserialize(), it validates file length, signature, checksum of protobuf. - if (file_header.unserialize(&file_handler) != OLAP_SUCCESS) { - LOG(WARNING) << "fail to unserialize header. [path='" << _file_name << "']"; - return OLAP_ERR_PARSE_PROTOBUF_ERROR; - } - - try { - CopyFrom(file_header.message()); - } catch (...) { - LOG(WARNING) << "fail to copy protocol buffer object. [path='" << _file_name << "']"; - return OLAP_ERR_PARSE_PROTOBUF_ERROR; - } - - if (file_version_size() != 0) { - // convert FileVersionMessage to PDelta and PSegmentGroup in initialization. - for (int i = 0; i < file_version_size(); ++i) { - PDelta* delta = add_delta(); - _convert_file_version_to_delta(file_version(i), delta); - } - - clear_file_version(); - OLAPStatus res = save(); - if (res != OLAP_SUCCESS) { - LOG(FATAL) << "failed to remove file version in initialization"; - } - } - return init(); -} - -OLAPStatus OLAPHeader::load_for_check() { - FileHeader file_header; - FileHandler file_handler; - - if (file_handler.open(_file_name.c_str(), O_RDONLY) != OLAP_SUCCESS) { - OLAP_LOG_WARNING("fail to open index file. [file='%s']", _file_name.c_str()); - return OLAP_ERR_IO_ERROR; - } - - // In file_header.unserialize(), it validates file length, signature, checksum of protobuf. - if (file_header.unserialize(&file_handler) != OLAP_SUCCESS) { - OLAP_LOG_WARNING("fail to unserialize header. [path='%s']", _file_name.c_str()); - return OLAP_ERR_PARSE_PROTOBUF_ERROR; - } - - try { - CopyFrom(file_header.message()); - } catch (...) { - OLAP_LOG_WARNING("fail to copy protocol buffer object. [path='%s']", _file_name.c_str()); - return OLAP_ERR_PARSE_PROTOBUF_ERROR; - } - - return OLAP_SUCCESS; -} - -OLAPStatus OLAPHeader::save() { - return save(_file_name); -} - -OLAPStatus OLAPHeader::save(const string& file_path) { - // check the tablet_path is not empty - if (file_path == "") { - LOG(WARNING) << "file_path is empty for header"; - return OLAP_ERR_DIR_NOT_EXIST; - } - - FileHeader file_header; - FileHandler file_handler; - - if (file_handler.open_with_mode(file_path.c_str(), - O_CREAT | O_WRONLY | O_TRUNC, S_IRUSR | S_IWUSR) != OLAP_SUCCESS) { - LOG(WARNING) << "fail to open header file. [file='" << file_path << "']"; - return OLAP_ERR_IO_ERROR; - } - - try { - file_header.mutable_message()->CopyFrom(*this); - } catch (...) { - LOG(WARNING) << "fail to copy protocol buffer object. [path='" << file_path << "']"; - return OLAP_ERR_OTHER_ERROR; - } - - if (file_header.prepare(&file_handler) != OLAP_SUCCESS - || file_header.serialize(&file_handler) != OLAP_SUCCESS) { - LOG(WARNING) << "fail to serialize to file header. [path='" << file_path << "']"; - return OLAP_ERR_SERIALIZE_PROTOBUF_ERROR; - } - - return OLAP_SUCCESS; -} - -OLAPStatus OLAPHeader::add_version(Version version, VersionHash version_hash, - int32_t segment_group_id, int32_t num_segments, - int64_t index_size, int64_t data_size, int64_t num_rows, - bool empty, const std::vector* column_statistics) { - // Check whether version is valid. - if (version.first > version.second) { - LOG(WARNING) << "the version is not valid." - << "version=" << version.first << "-" << version.second; - return OLAP_ERR_HEADER_ADD_VERSION; - } - - int delta_id = -1; - for (int i = 0; i < delta_size(); ++i) { - if (delta(i).start_version() == version.first - && delta(i).end_version() == version.second) { - for (const PSegmentGroup& segment_group : delta(i).segment_group()) { - if (segment_group.segment_group_id() == segment_group_id) { - LOG(WARNING) << "the version is existed." - << "version=" << version.first << "-" - << version.second; - return OLAP_ERR_HEADER_ADD_VERSION; - } - } - delta_id = i; - break; - } - } - - // if segment_group_id is greater or equal than zero, it is used - // to streaming load - - // Try to add version to protobuf. - PDelta* new_delta = nullptr; - try { - if (segment_group_id == -1 || delta_id == -1) { - // snapshot will use segment_group_id which equals minus one - new_delta = add_delta(); - new_delta->set_start_version(version.first); - new_delta->set_end_version(version.second); - new_delta->set_version_hash(version_hash); - new_delta->set_creation_time(time(NULL)); - } else { - new_delta = const_cast(&delta(delta_id)); - } - PSegmentGroup* new_segment_group = new_delta->add_segment_group(); - new_segment_group->set_segment_group_id(segment_group_id); - new_segment_group->set_num_segments(num_segments); - new_segment_group->set_index_size(index_size); - new_segment_group->set_data_size(data_size); - new_segment_group->set_num_rows(num_rows); - new_segment_group->set_empty(empty); - if (NULL != column_statistics) { - for (size_t i = 0; i < column_statistics->size(); ++i) { - ColumnPruning *column_pruning = - new_segment_group->add_column_pruning(); - column_pruning->set_min(column_statistics->at(i).first->to_string()); - column_pruning->set_max(column_statistics->at(i).second->to_string()); - column_pruning->set_null_flag(column_statistics->at(i).first->is_null()); - } - } - } catch (...) { - OLAP_LOG_WARNING("add file version to protobf error"); - return OLAP_ERR_HEADER_ADD_VERSION; - } - - if (add_version_to_graph(version, &_version_graph, &_vertex_helper_map) != OLAP_SUCCESS) { - OLAP_LOG_WARNING("fail to add version to graph. [version='%d-%d']", - version.first, - version.second); - return OLAP_ERR_HEADER_ADD_VERSION; - } - - return OLAP_SUCCESS; -} - -OLAPStatus OLAPHeader::add_pending_version( - int64_t partition_id, int64_t transaction_id, - const std::vector* delete_conditions) { - for (int i = 0; i < pending_delta_size(); ++i) { - if (pending_delta(i).transaction_id() == transaction_id) { - LOG(WARNING) << "pending delta already exists in header." - << "transaction_id: " << transaction_id; - return OLAP_ERR_PUSH_TRANSACTION_ALREADY_EXIST; - } - } - - try { - PPendingDelta* new_pending_delta = add_pending_delta(); - new_pending_delta->set_partition_id(partition_id); - new_pending_delta->set_transaction_id(transaction_id); - new_pending_delta->set_creation_time(time(NULL)); - - if (delete_conditions != nullptr) { - DeleteConditionMessage* del_cond = new_pending_delta->mutable_delete_condition(); - del_cond->set_version(0); - for (const string& condition : *delete_conditions) { - del_cond->add_sub_conditions(condition); - LOG(INFO) << "store one sub-delete condition. condition=" << condition - << ", transaction_id=" << transaction_id; - } - } - - } catch (...) { - LOG(WARNING) << "fail to add pending segment_group to header protobf"; - return OLAP_ERR_HEADER_ADD_PENDING_DELTA; - } - - return OLAP_SUCCESS; -} - -OLAPStatus OLAPHeader::add_pending_segment_group( - int64_t transaction_id, int32_t num_segments, - int32_t pending_segment_group_id, const PUniqueId& load_id, - bool empty, const std::vector* column_statistics) { - - int32_t delta_id = 0; - for (int32_t i = 0; i < pending_delta_size(); ++i) { - const PPendingDelta& delta = pending_delta(i); - if (delta.transaction_id() == transaction_id) { - delta_id = i; - for (int j = 0; j < delta.pending_segment_group_size(); ++j) { - const PPendingSegmentGroup& pending_segment_group = delta.pending_segment_group(j); - if (pending_segment_group.pending_segment_group_id() == pending_segment_group_id) { - LOG(WARNING) << "pending segment_group already exists in header." - << "transaction_id:" << transaction_id - << ", pending_segment_group_id: " << pending_segment_group_id; - return OLAP_SUCCESS; - } - } - } - } - - try { - PPendingSegmentGroup* new_pending_segment_group - = const_cast(pending_delta(delta_id)).add_pending_segment_group(); - new_pending_segment_group->set_pending_segment_group_id(pending_segment_group_id); - new_pending_segment_group->set_num_segments(num_segments); - new_pending_segment_group->mutable_load_id()->set_hi(load_id.hi()); - new_pending_segment_group->mutable_load_id()->set_lo(load_id.lo()); - new_pending_segment_group->set_empty(empty); - if (NULL != column_statistics) { - for (size_t i = 0; i < column_statistics->size(); ++i) { - ColumnPruning *column_pruning = - new_pending_segment_group->add_column_pruning(); - column_pruning->set_min(column_statistics->at(i).first->to_string()); - column_pruning->set_max(column_statistics->at(i).second->to_string()); - column_pruning->set_null_flag(column_statistics->at(i).first->is_null()); - } - } - } catch (...) { - OLAP_LOG_WARNING("fail to add pending segment_group to protobf"); - return OLAP_ERR_HEADER_ADD_PENDING_DELTA; - } - - return OLAP_SUCCESS; -} - -OLAPStatus OLAPHeader::add_incremental_version(Version version, VersionHash version_hash, - int32_t segment_group_id, int32_t num_segments, - int64_t index_size, int64_t data_size, int64_t num_rows, - bool empty, const std::vector* column_statistics) { - // Check whether version is valid. - if (version.first != version.second) { - OLAP_LOG_WARNING("the incremental version is not valid. [version=%d]", version.first); - return OLAP_ERR_HEADER_ADD_INCREMENTAL_VERSION; - } - - // Check whether the version is existed. - int32_t delta_id = 0; - for (int i = 0; i < incremental_delta_size(); ++i) { - const PDelta& incre_delta = incremental_delta(i); - if (incre_delta.start_version() == version.first) { - delta_id = i; - for (int j = 0; j < incre_delta.segment_group_size(); ++j) { - const PSegmentGroup& incremental_segment_group = incre_delta.segment_group(j); - if (incremental_segment_group.segment_group_id() == segment_group_id) { - LOG(WARNING) << "segment_group already exists in header." - << "version: " << version.first << "-" << version.second << "," - << "segment_group_id: " << segment_group_id; - return OLAP_ERR_HEADER_ADD_PENDING_DELTA; - } - } - } - } - - // Try to add version to protobuf. - try { - PDelta* new_incremental_delta = nullptr; - if (segment_group_id == 0) { - new_incremental_delta = add_incremental_delta(); - new_incremental_delta->set_start_version(version.first); - new_incremental_delta->set_end_version(version.second); - new_incremental_delta->set_version_hash(version_hash); - new_incremental_delta->set_creation_time(time(NULL)); - } else { - new_incremental_delta = const_cast(&incremental_delta(delta_id)); - } - PSegmentGroup* new_incremental_segment_group = new_incremental_delta->add_segment_group(); - new_incremental_segment_group->set_segment_group_id(segment_group_id); - new_incremental_segment_group->set_num_segments(num_segments); - new_incremental_segment_group->set_index_size(index_size); - new_incremental_segment_group->set_data_size(data_size); - new_incremental_segment_group->set_num_rows(num_rows); - new_incremental_segment_group->set_empty(empty); - if (NULL != column_statistics) { - for (size_t i = 0; i < column_statistics->size(); ++i) { - ColumnPruning *column_pruning = - new_incremental_segment_group->add_column_pruning(); - column_pruning->set_min(column_statistics->at(i).first->to_string()); - column_pruning->set_max(column_statistics->at(i).second->to_string()); - column_pruning->set_null_flag(column_statistics->at(i).first->is_null()); - } - } - } catch (...) { - OLAP_LOG_WARNING("add incremental version to protobf error"); - return OLAP_ERR_HEADER_ADD_INCREMENTAL_VERSION; - } - - return OLAP_SUCCESS; -} - -void OLAPHeader::add_delete_condition(const DeleteConditionMessage& delete_condition, - int64_t version) { - // check whether condition exist - DeleteConditionMessage* del_cond = NULL; - int i = 0; - for (; i < delete_data_conditions_size(); i++) { - DeleteConditionMessage temp = delete_data_conditions().Get(i); - if (temp.version() == version) { - break; - } - } - - // clear existed condition - if (i < delete_data_conditions_size()) { - del_cond = mutable_delete_data_conditions(i); - del_cond->clear_sub_conditions(); - } else { - del_cond = add_delete_data_conditions(); - del_cond->set_version(version); - } - - for (const string& condition : delete_condition.sub_conditions()) { - del_cond->add_sub_conditions(condition); - } - LOG(INFO) << "add delete condition. version=" << version; -} - -void OLAPHeader::delete_cond_by_version(const Version& version) { - DCHECK(version.first == version.second); - google::protobuf::RepeatedPtrField* delete_conditions - = mutable_delete_data_conditions(); - int index = 0; - for (; index < delete_conditions->size(); ++index) { - const DeleteConditionMessage& temp = delete_conditions->Get(index); - if (temp.version() == version.first) { - // log delete condtion - string del_cond_str; - const RepeatedPtrField& sub_conditions = temp.sub_conditions(); - - for (int i = 0; i != sub_conditions.size(); ++i) { - del_cond_str += sub_conditions.Get(i) + ";"; - } - - LOG(INFO) << "delete one condition. version=" << temp.version() - << ", condition=" << del_cond_str; - - // remove delete condition from PB - delete_conditions->SwapElements(index, delete_conditions->size() - 1); - delete_conditions->RemoveLast(); - } - } -} - -bool OLAPHeader::is_delete_data_version(Version version) { - if (version.first != version.second) { - return false; - } - - google::protobuf::RepeatedPtrField::const_iterator it; - it = delete_data_conditions().begin(); - for (; it != delete_data_conditions().end(); ++it) { - if (it->version() == version.first) { - return true; - } - } - - return false; -} - -const PPendingDelta* OLAPHeader::get_pending_delta(int64_t transaction_id) const { - for (int i = 0; i < pending_delta_size(); i++) { - if (pending_delta(i).transaction_id() == transaction_id) { - return &pending_delta(i); - } - } - return nullptr; -} - -const PPendingSegmentGroup* OLAPHeader::get_pending_segment_group(int64_t transaction_id, - int32_t pending_segment_group_id) const { - for (int i = 0; i < pending_delta_size(); i++) { - if (pending_delta(i).transaction_id() == transaction_id) { - const PPendingDelta& delta = pending_delta(i); - for (int j = 0; j < delta.pending_segment_group_size(); ++j) { - const PPendingSegmentGroup& pending_segment_group = delta.pending_segment_group(j); - if (pending_segment_group.pending_segment_group_id() == pending_segment_group_id) { - return &pending_segment_group; - } - } - } - } - return nullptr; -} - -const PDelta* OLAPHeader::get_incremental_version(Version version) const { - for (int i = 0; i < incremental_delta_size(); i++) { - if (incremental_delta(i).start_version() == version.first - && incremental_delta(i).end_version() == version.second) { - return &incremental_delta(i); - } - } - return nullptr; -} - -OLAPStatus OLAPHeader::delete_version(Version version) { - // Find the version that need to be deleted. - int index = -1; - for (int i = 0; i < delta_size(); ++i) { - if (delta(i).start_version() == version.first - && delta(i).end_version() == version.second) { - index = i; - break; - } - } - - // Delete version from protobuf. - if (index != -1) { - RepeatedPtrField* version_ptr = mutable_delta(); - for (int i = index; i < delta_size() - 1; ++i) { - version_ptr->SwapElements(i, i + 1); - } - - version_ptr->RemoveLast(); - } - - // Atomic delete is not supported now. - if (delete_version_from_graph(delta(), version, - &_version_graph, - &_vertex_helper_map) != OLAP_SUCCESS) { - OLAP_LOG_WARNING("fail to delete version from graph. [version='%d-%d']", - version.first, - version.second); - return OLAP_ERR_HEADER_DELETE_VERSION; - } - - return OLAP_SUCCESS; -} - -OLAPStatus OLAPHeader::delete_all_versions() { - clear_file_version(); - clear_delta(); - clear_pending_delta(); - clear_incremental_delta(); - clear_version_graph(&_version_graph, &_vertex_helper_map); - - if (construct_version_graph(delta(), - &_version_graph, - &_vertex_helper_map) != OLAP_SUCCESS) { - OLAP_LOG_WARNING("fail to construct version graph."); - return OLAP_ERR_OTHER_ERROR; - } - return OLAP_SUCCESS; -} - -void OLAPHeader::delete_pending_delta(int64_t transaction_id) { - int index = -1; - for (int i = 0; i < pending_delta_size(); ++i) { - if (pending_delta(i).transaction_id() == transaction_id) { - index = i; - break; - } - } - - if (index != -1) { - RepeatedPtrField* pending_delta_ptr = mutable_pending_delta(); - for (int i = index; i < pending_delta_size() - 1; ++i) { - pending_delta_ptr->SwapElements(i, i + 1); - } - - pending_delta_ptr->RemoveLast(); - } -} - -void OLAPHeader::delete_incremental_delta(Version version) { - int index = -1; - for (int i = 0; i < incremental_delta_size(); ++i) { - if (incremental_delta(i).start_version() == version.first - && incremental_delta(i).end_version() == version.second) { - index = i; - break; - } - } - - if (index != -1) { - RepeatedPtrField* version_ptr = mutable_incremental_delta(); - for (int i = index; i < incremental_delta_size() - 1; ++i) { - version_ptr->SwapElements(i, i + 1); - } - - version_ptr->RemoveLast(); - } -} - -// This function is called when base-compaction, cumulative-compaction, quering. -// we use BFS algorithm to get the shortest version path. -OLAPStatus OLAPHeader::select_versions_to_span(const Version& target_version, - vector* span_versions) { - if (target_version.first > target_version.second) { - OLAP_LOG_WARNING("invalid param target_version. [start_version_id=%d end_version_id=%d]", - target_version.first, - target_version.second); - return OLAP_ERR_INPUT_PARAMETER_ERROR; - } - - if (span_versions == NULL) { - OLAP_LOG_WARNING("param span_versions is NULL."); - return OLAP_ERR_INPUT_PARAMETER_ERROR; - } - - // bfs_queue's element is vertex_index. - queue bfs_queue; - // predecessor[i] means the predecessor of vertex_index 'i'. - vector predecessor(_version_graph.size()); - // visited[int]==true means it had entered bfs_queue. - vector visited(_version_graph.size()); - // [start_vertex_value, end_vertex_value) - int start_vertex_value = target_version.first; - int end_vertex_value = target_version.second + 1; - // -1 is invalid vertex index. - int start_vertex_index = -1; - // -1 is valid vertex index. - int end_vertex_index = -1; - // Sometimes, the version path can not have reverse version even you set - // _support_reverse_version to be true. - bool can_support_reverse = _support_reverse_version; - - // Check schema to see if we can support reverse version in the version - // path. If the aggregation type of any value column is SUM, then we can - // not support reverse version. - for (int i = 0; can_support_reverse && i < column_size(); ++i) { - if (column(i).is_key() == false && column(i).aggregation().compare("SUM") != 0) { - can_support_reverse = false; - } - } - - for (size_t i = 0; i < _version_graph.size(); ++i) { - if (_version_graph[i].value == start_vertex_value) { - start_vertex_index = i; - } - if (_version_graph[i].value == end_vertex_value) { - end_vertex_index = i; - } - } - - if (start_vertex_index < 0 || end_vertex_index < 0) { - OLAP_LOG_WARNING("fail to find version in version list. " - "[start_version_id=%d end_version_id=%d tmp_start=%d tmp_end=%d]", - target_version.first, - target_version.second, - start_vertex_index, - end_vertex_index); - return OLAP_ERR_VERSION_NOT_EXIST; - } - - for (int i = 0; i < static_cast(_version_graph.size()); ++i) { - visited[i] = false; - } - - bfs_queue.push(start_vertex_index); - visited[start_vertex_index] = true; - // The predecessor of root is itself. - predecessor[start_vertex_index] = start_vertex_index; - - while (bfs_queue.empty() == false && visited[end_vertex_index] == false) { - int top_vertex_index = bfs_queue.front(); - bfs_queue.pop(); - - for (list::const_iterator it = _version_graph[top_vertex_index].edges->begin(); - it != _version_graph[top_vertex_index].edges->end(); ++it) { - if (visited[*it] == false) { - // If we don't support reverse version in the path, and start vertex - // value is larger than the end vertex value, we skip this edge. - if (can_support_reverse == false - && _version_graph[top_vertex_index].value > _version_graph[*it].value) { - continue; - } - - visited[*it] = true; - predecessor[*it] = top_vertex_index; - bfs_queue.push(*it); - } - } - } - - if (visited[end_vertex_index] == false) { - OLAP_LOG_WARNING("fail to find path to end_version in version list. " - "[start_version_id=%d end_version_id=%d]", - target_version.first, - target_version.second); - return OLAP_ERR_VERSION_NOT_EXIST; - } - - vector reversed_path; - int tmp_vertex_index = end_vertex_index; - reversed_path.push_back(tmp_vertex_index); - - // For start_vertex_index, its predecessor must be itself. - while (predecessor[tmp_vertex_index] != tmp_vertex_index) { - tmp_vertex_index = predecessor[tmp_vertex_index]; - reversed_path.push_back(tmp_vertex_index); - } - - // Make span_versions from reversed_path. - stringstream shortest_path_for_debug; - for (int path_id = reversed_path.size() - 1; path_id > 0; --path_id) { - int tmp_start_vertex_value = _version_graph[reversed_path[path_id]].value; - int tmp_end_vertex_value = _version_graph[reversed_path[path_id - 1]].value; - - // tmp_start_vertex_value mustn't be equal to tmp_end_vertex_value - if (tmp_start_vertex_value <= tmp_end_vertex_value) { - span_versions->push_back(make_pair(tmp_start_vertex_value, tmp_end_vertex_value - 1)); - } else { - span_versions->push_back(make_pair(tmp_end_vertex_value, tmp_start_vertex_value - 1)); - } - - shortest_path_for_debug << (*span_versions)[span_versions->size() - 1].first << '-' - << (*span_versions)[span_versions->size() - 1].second << ' '; - } - - VLOG(10) << "calculated shortest path. " - << "version=" << target_version.first << "-" << target_version.second - << " path=" << shortest_path_for_debug.str(); - - return OLAP_SUCCESS; -} - -const PDelta* OLAPHeader::get_lastest_delta_version() const { - if (delta_size() == 0) { - return nullptr; - } - - const PDelta* max_delta = nullptr; - for (int i = delta_size() - 1; i >= 0; --i) { - if (delta(i).start_version() == delta(i).end_version()) { - if (max_delta == nullptr) { - max_delta = &delta(i); - } else if (delta(i).start_version() > max_delta->start_version()) { - max_delta = &delta(i); - } - } - } - if (max_delta != nullptr) { - LOG(INFO) << "max_delta:" << max_delta->start_version() << "," - << max_delta->end_version(); - } - return max_delta; -} - -const PDelta* OLAPHeader::get_lastest_version() const { - if (delta_size() == 0) { - return nullptr; - } - - const PDelta* max_delta = nullptr; - for (int i = delta_size() - 1; i >= 0; --i) { - if (max_delta == nullptr) { - max_delta = &delta(i); - } else if (delta(i).end_version() > max_delta->end_version()) { - max_delta = &delta(i); - } else if (delta(i).end_version() == max_delta->end_version() - && delta(i).start_version() == delta(i).end_version()) { - max_delta = &delta(i); - } - } - return max_delta; -} - -Version OLAPHeader::get_latest_version() const { - auto delta = get_lastest_version(); - return {delta->start_version(), delta->end_version()}; -} - -const PDelta* OLAPHeader::get_delta(int index) const { - if (delta_size() == 0) { - return nullptr; - } - - return &delta(index); -} - -void OLAPHeader::_convert_file_version_to_delta(const FileVersionMessage& version, - PDelta* delta) { - delta->set_start_version(version.start_version()); - delta->set_end_version(version.end_version()); - delta->set_version_hash(version.version_hash()); - delta->set_creation_time(version.creation_time()); - - PSegmentGroup* segment_group = delta->add_segment_group(); - segment_group->set_segment_group_id(-1); - segment_group->set_num_segments(version.num_segments()); - segment_group->set_index_size(version.index_size()); - segment_group->set_data_size(version.data_size()); - segment_group->set_num_rows(version.num_rows()); - if (version.has_delta_pruning()) { - for (int i = 0; i < version.delta_pruning().column_pruning_size(); ++i) { - ColumnPruning* column_pruning = segment_group->add_column_pruning(); - *column_pruning = version.delta_pruning().column_pruning(i); - } - } -} - -const uint32_t OLAPHeader::get_cumulative_compaction_score() const{ - uint32_t score = 0; - bool base_version_exists = false; - const int32_t point = cumulative_layer_point(); - for (int i = delta_size() - 1; i >= 0; --i) { - if (delta(i).start_version() >= point) { - score++; - } - if (delta(i).start_version() == 0) { - base_version_exists = true; - } - } - score = score < config::cumulative_compaction_num_singleton_deltas ? 0 : score; - - // base不存在可能是tablet正在做alter table,先不选它,设score=0 - return base_version_exists ? score : 0; -} - -const uint32_t OLAPHeader::get_base_compaction_score() const{ - uint32_t score = 0; - const int32_t point = cumulative_layer_point(); - bool base_version_exists = false; - for (int i = delta_size() - 1; i >= 0; --i) { - if (delta(i).end_version() < point) { - score++; - } - if (delta(i).start_version() == 0) { - base_version_exists = true; - } - } - score = score < config::base_compaction_num_cumulative_deltas ? 0 : score; - - // base不存在可能是tablet正在做alter table,先不选它,设score=0 - return base_version_exists ? score : 0; -} - -const OLAPStatus OLAPHeader::version_creation_time(const Version& version, - int64_t* creation_time) const { - if (delta_size() == 0) { - return OLAP_ERR_VERSION_NOT_EXIST; - } - - for (int i = delta_size() - 1; i >= 0; --i) { - const PDelta& temp = delta(i); - if (temp.start_version() == version.first && temp.end_version() == version.second) { - *creation_time = temp.creation_time(); - return OLAP_SUCCESS; - } - } - - return OLAP_ERR_VERSION_NOT_EXIST; -} - -// Related static functions about version graph. - -#define CHECK_GRAPH_PARAMS(param1, param2) \ - if (param1 == NULL || param2 == NULL) { \ - OLAP_LOG_WARNING("invalid graph parameters."); \ - return OLAP_ERR_INPUT_PARAMETER_ERROR; \ - } - -// Construct version graph(using adjacency list) from header's information. -static OLAPStatus construct_version_graph( - const RepeatedPtrField& versions_in_header, - vector* version_graph, - unordered_map* vertex_helper_map) { - if (versions_in_header.size() == 0) { - VLOG(3) << "there is no version in the header."; - return OLAP_SUCCESS; - } - - CHECK_GRAPH_PARAMS(version_graph, vertex_helper_map); - // Distill vertex values from versions in OLAPHeader. - vector vertex_values; - vertex_values.reserve(2 * versions_in_header.size()); - - for (int i = 0; i < versions_in_header.size(); ++i) { - vertex_values.push_back(versions_in_header.Get(i).start_version()); - vertex_values.push_back(versions_in_header.Get(i).end_version() + 1); - VLOG(3) << "added two vertex_values. " - << "version=" << versions_in_header.Get(i).start_version() - << "-" << versions_in_header.Get(i).end_version() + 1; - } - - sort(vertex_values.begin(), vertex_values.end()); - - // Clear vertex_helper_map and version graph. - version_graph->clear(); - - // Items in vertex_values are sorted, but not unique. - // we choose unique items in vertex_values to create vertexes. - int last_vertex_value = -1; - for (size_t i = 0; i < vertex_values.size(); ++i) { - if (i != 0 && vertex_values[i] == last_vertex_value) { - continue; - } - - // Add vertex to graph. - if (add_vertex_to_graph(vertex_values[i], - version_graph, - vertex_helper_map) != OLAP_SUCCESS) { - OLAP_LOG_WARNING("fail to add vertex to version graph. [vertex_value=%d]", - vertex_values[i]); - return OLAP_ERR_OTHER_ERROR; - } - - last_vertex_value = vertex_values[i]; - } - - // Create edges for version graph according to OLAPHeader's versions. - for (int i = 0; i < versions_in_header.size(); ++i) { - // Versions in header are unique. - // We ensure vertex_helper_map has its start_version. - int start_vertex_index = (*vertex_helper_map)[versions_in_header.Get(i).start_version()]; - int end_vertex_index = (*vertex_helper_map)[versions_in_header.Get(i).end_version() + 1]; - // Add one edge from start_version to end_version. - list* edges = (*version_graph)[start_vertex_index].edges; - edges->insert(edges->begin(), end_vertex_index); - // Add reverse edge from end_version to start_version. - list* r_edges = (*version_graph)[end_vertex_index].edges; - r_edges->insert(r_edges->begin(), start_vertex_index); - } - - return OLAP_SUCCESS; -} - -// Clear version graph and vertex_helper_map, release memory hold by version_graph. -static OLAPStatus clear_version_graph(vector* version_graph, - unordered_map* vertex_helper_map) { - CHECK_GRAPH_PARAMS(version_graph, vertex_helper_map); - - // Release memory of version graph. - vertex_helper_map->clear(); - for (vector::iterator it = version_graph->begin(); - it != version_graph->end(); ++it) { - SAFE_DELETE(it->edges); - } - version_graph->clear(); - - return OLAP_SUCCESS; -} - -// Add version to graph, it is called near the end of add_version -static OLAPStatus add_version_to_graph(const Version& version, - vector* version_graph, - unordered_map* vertex_helper_map) { - CHECK_GRAPH_PARAMS(version_graph, vertex_helper_map); - // Add version.first as new vertex of version graph if not exist. - int start_vertex_value = version.first; - int end_vertex_value = version.second + 1; - - // Add vertex to graph. - if (add_vertex_to_graph(start_vertex_value, version_graph, vertex_helper_map) != OLAP_SUCCESS) { - OLAP_LOG_WARNING("fail to add vertex to version graph. [vertex=%d]", start_vertex_value); - return OLAP_ERR_OTHER_ERROR; - } - - if (add_vertex_to_graph(end_vertex_value, version_graph, vertex_helper_map) != OLAP_SUCCESS) { - OLAP_LOG_WARNING("fail to add vertex to version graph. [vertex=%d]", end_vertex_value); - return OLAP_ERR_OTHER_ERROR; - } - - int start_vertex_index = (*vertex_helper_map)[start_vertex_value]; - int end_vertex_index = (*vertex_helper_map)[end_vertex_value]; - - // We assume this version is new version, so we just add two edges - // into version graph. add one edge from start_version to end_version - list* edges = (*version_graph)[start_vertex_index].edges; - edges->insert(edges->begin(), end_vertex_index); - - // We add reverse edge(from end_version to start_version) to graph in spite - // that _support_reverse_version is false. - list* r_edges = (*version_graph)[end_vertex_index].edges; - r_edges->insert(r_edges->begin(), start_vertex_index); - - return OLAP_SUCCESS; -} - -// Delete version from graph, it is called near the end of delete_version -static OLAPStatus delete_version_from_graph( - const RepeatedPtrField& versions_in_header, - const Version& version, - vector* version_graph, - unordered_map* vertex_helper_map) { - CHECK_GRAPH_PARAMS(version_graph, vertex_helper_map); - int start_vertex_value = version.first; - int end_vertex_value = version.second + 1; - - if (vertex_helper_map->find(start_vertex_value) == vertex_helper_map->end()) { - OLAP_LOG_WARNING("vertex for version.first does not exists. [version='%d-%d']", - version.first, - version.second); - return OLAP_ERR_VERSION_NOT_EXIST; - } - - if (vertex_helper_map->find(end_vertex_value) == vertex_helper_map->end()) { - OLAP_LOG_WARNING("vertex for version.second+1 does not exists. [version='%d-%d']", - version.first, - version.second); - return OLAP_ERR_VERSION_NOT_EXIST; - } - - int start_vertex_index = (*vertex_helper_map)[start_vertex_value]; - int end_vertex_index = (*vertex_helper_map)[end_vertex_value]; - // Remove edge and its reverse edge. - (*version_graph)[start_vertex_index].edges->remove(end_vertex_index); - (*version_graph)[end_vertex_index].edges->remove(start_vertex_index); - - // We should reconstruct version graph if the ratio of isolated vertexes - // reaches RATIO_OF_ISOLATED_VERTEX = 30%. The last version may be treated - // as isolated vertex(if no reverse edge), but it doesn't matter. - int num_isolated_vertex = 0; - for (vector::const_iterator it = version_graph->begin(); - it != version_graph->end(); ++it) { - if (it->edges->size() == 0) { - ++num_isolated_vertex; - } - } - - // If the number of isolated vertex reaches this ratio, reconstruct the - // version graph. - // ratio of isolated vertex in version graph - const static double RATIO_OF_ISOLATED_VERTEX = 0.3; - - if (num_isolated_vertex > 1 + static_cast(RATIO_OF_ISOLATED_VERTEX - * version_graph->size())) { - VLOG(3) << "the number of isolated vertexes reaches specified ratio," - << "reconstruct version graph. num_isolated_vertex=" << num_isolated_vertex - << ", num_vertex=" << version_graph->size(); - - // Release memory of version graph. - for (vector::iterator it = version_graph->begin(); - it != version_graph->end(); ++it) { - delete it->edges; - it->edges = NULL; - } - - // We do not use swap pointer technique to avoid version_graph - // construction failue. - clear_version_graph(version_graph, vertex_helper_map); - - // Reconstruct version graph. - if (construct_version_graph(versions_in_header, - version_graph, - vertex_helper_map) != OLAP_SUCCESS) { - OLAP_LOG_WARNING("reconstruct version graph fail."); - return OLAP_ERR_OTHER_ERROR; - } - } - - return OLAP_SUCCESS; -} - -// Add vertex to graph, if vertex already exists, still return SUCCESS. -static OLAPStatus add_vertex_to_graph(int vertex_value, - vector* version_graph, - unordered_map* vertex_helper_map) { - CHECK_GRAPH_PARAMS(version_graph, vertex_helper_map); - - // Vertex with vertex_value already exists. - if (vertex_helper_map->find(vertex_value) != vertex_helper_map->end()) { - VLOG(3) << "vertex with vertex value already exists. value=" << vertex_value; - return OLAP_SUCCESS; - } - - list* edges = new(std::nothrow) list(); - if (edges == NULL) { - OLAP_LOG_WARNING("fail to malloc edge list."); - return OLAP_ERR_MALLOC_ERROR; - } - - Vertex vertex = {vertex_value, edges}; - version_graph->push_back(vertex); - (*vertex_helper_map)[vertex_value] = version_graph->size() - 1; - return OLAP_SUCCESS; -} - -const PDelta* OLAPHeader::get_base_version() const { - if (delta_size() == 0) { - return nullptr; - } - - for (int i = 0; i < delta_size(); ++i) { - if (delta(i).start_version() == 0) { - return &delta(i); - } - } - - return nullptr; -} - -} // namespace doris diff --git a/be/src/olap/olap_header_manager.cpp b/be/src/olap/olap_header_manager.cpp deleted file mode 100755 index 9bd04dca654773..00000000000000 --- a/be/src/olap/olap_header_manager.cpp +++ /dev/null @@ -1,177 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, -// software distributed under the License is distributed on an -// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -// KIND, either express or implied. See the License for the -// specific language governing permissions and limitations -// under the License. - -#include "olap/olap_header_manager.h" - -#include -#include -#include -#include -#include - -#include "olap/olap_define.h" -#include "olap/olap_engine.h" -#include "olap/olap_meta.h" -#include "common/logging.h" -#include "json2pb/json_to_pb.h" -#include "json2pb/pb_to_json.h" - -using rocksdb::DB; -using rocksdb::DBOptions; -using rocksdb::ColumnFamilyDescriptor; -using rocksdb::ColumnFamilyHandle; -using rocksdb::ColumnFamilyOptions; -using rocksdb::ReadOptions; -using rocksdb::WriteOptions; -using rocksdb::Slice; -using rocksdb::Iterator; -using rocksdb::Status; -using rocksdb::kDefaultColumnFamilyName; - -namespace doris { - -const std::string HEADER_PREFIX = "hdr_"; - -OLAPStatus OlapHeaderManager::get_header(OlapStore* store, - TTabletId tablet_id, TSchemaHash schema_hash, OLAPHeader* header) { - OlapMeta* meta = store->get_meta(); - std::stringstream key_stream; - key_stream << HEADER_PREFIX << tablet_id << "_" << schema_hash; - std::string key = key_stream.str(); - std::string value; - OLAPStatus s = meta->get(META_COLUMN_FAMILY_INDEX, key, value); - if (s == OLAP_ERR_META_KEY_NOT_FOUND) { - LOG(WARNING) << "tablet_id:" << tablet_id << ", schema_hash:" << schema_hash << " not found."; - return OLAP_ERR_META_KEY_NOT_FOUND; - } else if (s != OLAP_SUCCESS) { - LOG(WARNING) << "load tablet_id:" << tablet_id << ", schema_hash:" << schema_hash << " failed."; - return s; - } - header->ParseFromString(value); - return header->init(); -} - -OLAPStatus OlapHeaderManager::get_json_header(OlapStore* store, - TTabletId tablet_id, TSchemaHash schema_hash, std::string* json_header) { - OLAPHeader header; - OLAPStatus s = get_header(store, tablet_id, schema_hash, &header); - if (s != OLAP_SUCCESS) { - return s; - } - json2pb::Pb2JsonOptions json_options; - json_options.pretty_json = true; - json2pb::ProtoMessageToJson(header, json_header, json_options); - return OLAP_SUCCESS; -} - - -OLAPStatus OlapHeaderManager::save(OlapStore* store, - TTabletId tablet_id, TSchemaHash schema_hash, const OLAPHeader* header) { - std::stringstream key_stream; - key_stream << HEADER_PREFIX << tablet_id << "_" << schema_hash; - std::string key = key_stream.str(); - std::string value; - header->SerializeToString(&value); - OlapMeta* meta = store->get_meta(); - OLAPStatus s = meta->put(META_COLUMN_FAMILY_INDEX, key, value); - return s; -} - -OLAPStatus OlapHeaderManager::remove(OlapStore* store, TTabletId tablet_id, TSchemaHash schema_hash) { - std::stringstream key_stream; - key_stream << HEADER_PREFIX << tablet_id << "_" << schema_hash; - std::string key = key_stream.str(); - OlapMeta* meta = store->get_meta(); - LOG(INFO) << "start to remove header, key:" << key; - OLAPStatus res = meta->remove(META_COLUMN_FAMILY_INDEX, key); - LOG(INFO) << "remove header, key:" << key << ", res:" << res; - return res; -} - -OLAPStatus OlapHeaderManager::get_header_converted(OlapStore* store, bool& flag) { - // get is_header_converted flag - std::string value; - std::string key = IS_HEADER_CONVERTED; - OlapMeta* meta = store->get_meta(); - OLAPStatus s = meta->get(DEFAULT_COLUMN_FAMILY_INDEX, key, value); - if (s == OLAP_ERR_META_KEY_NOT_FOUND || value == "false") { - flag = false; - } else if (value == "true") { - flag = true; - } else { - LOG(WARNING) << "invalid _is_header_converted. _is_header_converted=" << value; - return OLAP_ERR_HEADER_INVALID_FLAG; - } - return OLAP_SUCCESS; -} - -OLAPStatus OlapHeaderManager::set_converted_flag(OlapStore* store) { - OlapMeta* meta = store->get_meta(); - OLAPStatus s = meta->put(DEFAULT_COLUMN_FAMILY_INDEX, IS_HEADER_CONVERTED, CONVERTED_FLAG); - return s; -} - -OLAPStatus OlapHeaderManager::traverse_headers(OlapMeta* meta, - std::function const& func) { - auto traverse_header_func = [&func](const std::string& key, const std::string& value) -> bool { - std::vector parts; - // key format: "hdr_" + tablet_id + "_" + schema_hash - split_string(key, '_', &parts); - if (parts.size() != 3) { - LOG(WARNING) << "invalid header key:" << key << ", splitted size:" << parts.size(); - return true; - } - TTabletId tablet_id = std::stol(parts[1].c_str(), NULL, 10); - TSchemaHash schema_hash = std::stol(parts[2].c_str(), NULL, 10); - return func(tablet_id, schema_hash, value); - }; - OLAPStatus status = meta->iterate(META_COLUMN_FAMILY_INDEX, HEADER_PREFIX, traverse_header_func); - return status; -} - -OLAPStatus OlapHeaderManager::load_json_header(OlapStore* store, const std::string& header_path) { - std::ifstream infile(header_path); - char buffer[1024]; - std::string json_header; - while (!infile.eof()) { - infile.getline(buffer, 1024); - json_header = json_header + buffer; - } - boost::algorithm::trim(json_header); - OLAPHeader header; - bool ret = json2pb::JsonToProtoMessage(json_header, &header); - if (!ret) { - return OLAP_ERR_HEADER_LOAD_JSON_HEADER; - } - TTabletId tablet_id = header.tablet_id(); - TSchemaHash schema_hash = header.schema_hash(); - OLAPStatus s = save(store, tablet_id, schema_hash, &header); - return s; -} - -OLAPStatus OlapHeaderManager::dump_header(OlapStore* store, TTabletId tablet_id, - TSchemaHash schema_hash, const std::string& dump_path) { - OLAPHeader header; - OLAPStatus res = OlapHeaderManager::get_header(store, tablet_id, schema_hash, &header); - if (res != OLAP_SUCCESS) { - return res; - } - res = header.save(dump_path); - return res; -} - -} diff --git a/be/src/olap/olap_header_manager.h b/be/src/olap/olap_header_manager.h deleted file mode 100644 index c74be38c3fe485..00000000000000 --- a/be/src/olap/olap_header_manager.h +++ /dev/null @@ -1,56 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, -// software distributed under the License is distributed on an -// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -// KIND, either express or implied. See the License for the -// specific language governing permissions and limitations -// under the License. - -#ifndef DORIS_BE_SRC_OLAP_OLAP_HEADER_MANAGER_H -#define DORIS_BE_SRC_OLAP_OLAP_HEADER_MANAGER_H - -#include - -#include "olap/olap_header.h" -#include "olap/olap_define.h" -#include "olap/store.h" - -namespace doris { - -// Helper Class for managing olap table headers of one root path. -class OlapHeaderManager { -public: - static OLAPStatus get_header(OlapStore* store, TTabletId tablet_id, TSchemaHash schema_hash, OLAPHeader* header); - - static OLAPStatus get_json_header(OlapStore* store, TTabletId tablet_id, - TSchemaHash schema_hash, std::string* json_header); - - static OLAPStatus save(OlapStore* store, TTabletId tablet_id, TSchemaHash schema_hash, const OLAPHeader* header); - - static OLAPStatus remove(OlapStore* store, TTabletId tablet_id, TSchemaHash schema_hash); - - static OLAPStatus traverse_headers(OlapMeta* meta, - std::function const& func); - - static OLAPStatus get_header_converted(OlapStore* store, bool& flag); - - static OLAPStatus set_converted_flag(OlapStore* store); - - static OLAPStatus load_json_header(OlapStore* store, const std::string& header_path); - - static OLAPStatus dump_header(OlapStore* store, TTabletId tablet_id, - TSchemaHash schema_hash, const std::string& path); -}; - -} - -#endif // DORIS_BE_SRC_OLAP_OLAP_HEADER_MANAGER_H diff --git a/be/src/olap/olap_index.cpp b/be/src/olap/olap_index.cpp index 5fbc81cf7315b2..86c232ed303bc6 100644 --- a/be/src/olap/olap_index.cpp +++ b/be/src/olap/olap_index.cpp @@ -22,8 +22,7 @@ #include #include -#include "olap/column_data.h" -#include "olap/olap_table.h" +#include "olap/rowset/column_data.h" #include "olap/row_block.h" #include "olap/row_cursor.h" #include "olap/utils.h" @@ -58,26 +57,23 @@ OLAPStatus MemIndex::load_segment(const char* file, size_t *current_num_rows_per OLAPStatus res = OLAP_SUCCESS; SegmentMetaInfo meta; - OLAPIndexHeaderMessage pb; uint32_t adler_checksum = 0; uint32_t num_entries = 0; if (file == NULL) { res = OLAP_ERR_INPUT_PARAMETER_ERROR; - OLAP_LOG_WARNING("load segment for loading index error. [file=%s; res=%d]", file, res); + LOG(WARNING) << "load index error. file=" << file << ", res=" << res; return res; } FileHandler file_handler; if ((res = file_handler.open_with_cache(file, O_RDONLY)) != OLAP_SUCCESS) { - OLAP_LOG_WARNING("fail to open index file. [file='%s']", file); - OLAP_LOG_WARNING("load segment for loading index error. [file=%s; res=%d]", file, res); + LOG(WARNING) << "load index error. file=" << file << ", res=" << res; return res; } if ((res = meta.file_header.unserialize(&file_handler)) != OLAP_SUCCESS) { - OLAP_LOG_WARNING("fail to read index file header. [file='%s']", file); - OLAP_LOG_WARNING("load segment for loading index error. [file=%s; res=%d]", file, res); + LOG(WARNING) << "load index error. file=" << file << ", res=" << res; file_handler.close(); return res; } @@ -94,25 +90,24 @@ OLAPStatus MemIndex::load_segment(const char* file, size_t *current_num_rows_per } else { null_supported = meta.file_header.message().null_supported(); } - size_t num_short_key_fields = short_key_num(); + size_t num_short_key_columns = short_key_num(); bool is_align = false; if (!null_supported) { - is_align = (0 == storage_length % (entry_length() - num_short_key_fields)); + is_align = (0 == storage_length % (entry_length() - num_short_key_columns)); } else { is_align = (0 == storage_length % entry_length()); } if (!is_align) { res = OLAP_ERR_INDEX_LOAD_ERROR; - OLAP_LOG_WARNING("fail to load_segment, buffer length is not correct."); - OLAP_LOG_WARNING("load segment for loading index error. [file=%s; res=%d]", file, res); + LOG(WARNING) << "load index error. file=" << file << ", res=" << res; file_handler.close(); return res; } // calculate the total size of all segments if (!null_supported) { - _index_size += meta.file_header.file_length() + num_entries * num_short_key_fields; - num_entries = storage_length / (entry_length() - num_short_key_fields); + _index_size += meta.file_header.file_length() + num_entries * num_short_key_columns; + num_entries = storage_length / (entry_length() - num_short_key_columns); } else { _index_size += meta.file_header.file_length(); num_entries = storage_length / entry_length(); @@ -175,7 +170,7 @@ OLAPStatus MemIndex::load_segment(const char* file, size_t *current_num_rows_per */ size_t storage_row_bytes = entry_length(); - storage_row_bytes -= (null_supported ? 0 : num_short_key_fields); + storage_row_bytes -= (null_supported ? 0 : num_short_key_columns); char* storage_ptr = storage_data; size_t storage_field_offset = 0; @@ -186,11 +181,12 @@ OLAPStatus MemIndex::load_segment(const char* file, size_t *current_num_rows_per size_t mem_field_offset = 0; size_t null_byte = null_supported ? 1 : 0; - for (size_t i = 0; i < num_short_key_fields; ++i) { + for (size_t i = 0; i < num_short_key_columns; ++i) { + const TabletColumn& column = (*_short_key_columns)[i]; storage_ptr = storage_data + storage_field_offset; - storage_field_offset += (*_fields)[i].index_length + null_byte; + storage_field_offset += column.index_length() + null_byte; mem_ptr = mem_buf + mem_field_offset; - if ((*_fields)[i].type == OLAP_FIELD_TYPE_VARCHAR) { + if (column.type() == OLAP_FIELD_TYPE_VARCHAR) { mem_field_offset += sizeof(Slice) + 1; for (size_t j = 0; j < num_entries; ++j) { /* @@ -216,9 +212,9 @@ OLAPStatus MemIndex::load_segment(const char* file, size_t *current_num_rows_per mem_ptr += mem_row_bytes; storage_ptr += storage_row_bytes; } - } else if ((*_fields)[i].type == OLAP_FIELD_TYPE_CHAR) { + } else if (column.type() == OLAP_FIELD_TYPE_CHAR) { mem_field_offset += sizeof(Slice) + 1; - size_t storage_field_bytes = (*_fields)[i].index_length; + size_t storage_field_bytes = column.index_length(); for (size_t j = 0; j < num_entries; ++j) { /* * Char is in nullbyte|content with fixed length in OlapIndex @@ -242,7 +238,7 @@ OLAPStatus MemIndex::load_segment(const char* file, size_t *current_num_rows_per storage_ptr += storage_row_bytes; } } else { - size_t storage_field_bytes = (*_fields)[i].index_length; + size_t storage_field_bytes = column.index_length(); mem_field_offset += storage_field_bytes + 1; for (size_t j = 0; j < num_entries; ++j) { memory_copy(mem_ptr + 1 - null_byte, storage_ptr, storage_field_bytes + null_byte); @@ -271,16 +267,16 @@ OLAPStatus MemIndex::load_segment(const char* file, size_t *current_num_rows_per } OLAPStatus MemIndex::init(size_t short_key_len, size_t new_short_key_len, - size_t short_key_num, RowFields* fields) { - if (fields == NULL) { - OLAP_LOG_WARNING("fail to init MemIndex, NULL short key fields."); + size_t short_key_num, std::vector* short_key_columns) { + if (short_key_columns == nullptr) { + LOG(WARNING) << "fail to init MemIndex, NULL short key columns."; return OLAP_ERR_INDEX_LOAD_ERROR; } _key_length = short_key_len; _new_key_length = new_short_key_len; _key_num = short_key_num; - _fields = fields; + _short_key_columns = short_key_columns; return OLAP_SUCCESS; } diff --git a/be/src/olap/olap_index.h b/be/src/olap/olap_index.h index 4140819c4ee509..de191001b0733e 100644 --- a/be/src/olap/olap_index.h +++ b/be/src/olap/olap_index.h @@ -33,21 +33,18 @@ #include "olap/file_helper.h" #include "olap/olap_common.h" #include "olap/olap_define.h" -#include "olap/olap_table.h" #include "olap/row_cursor.h" #include "olap/utils.h" namespace doris { class IndexComparator; class SegmentGroup; -class OLAPTable; class RowBlock; class RowCursor; class SegmentComparator; class WrapperField; typedef uint32_t data_file_offset_t; -typedef std::vector RowFields; struct OLAPIndexFixedHeader { OLAPIndexFixedHeader() : data_length(0), num_rows(0) {} @@ -176,7 +173,7 @@ class MemIndex { // 初始化MemIndex, 传入short_key的总长度和对应的Field数组 OLAPStatus init(size_t short_key_len, size_t new_short_key_len, - size_t short_key_num, RowFields* fields); + size_t short_key_num, std::vector* short_key_columns); // 加载一个segment到内存 OLAPStatus load_segment(const char* file, size_t *current_num_rows_per_row_block); @@ -276,8 +273,8 @@ class MemIndex { } // Return short key FieldInfo array - const RowFields& short_key_fields() const { - return *_fields; + const std::vector& short_key_columns() const { + return *_short_key_columns; } // Return the number of indices in MemIndex @@ -331,7 +328,7 @@ class MemIndex { size_t _index_size; size_t _data_size; size_t _num_rows; - RowFields* _fields; + std::vector* _short_key_columns; std::unique_ptr _tracker; std::unique_ptr _mem_pool; diff --git a/be/src/olap/olap_meta.cpp b/be/src/olap/olap_meta.cpp index 033fb84e756aba..28ad378de8b053 100755 --- a/be/src/olap/olap_meta.cpp +++ b/be/src/olap/olap_meta.cpp @@ -55,10 +55,10 @@ OlapMeta::~OlapMeta() { for (auto handle : _handles) { delete handle; } - if (_db != NULL) { + if (_db != nullptr) { _db->Close(); delete _db; - _db= NULL; + _db= nullptr; } } @@ -79,21 +79,21 @@ OLAPStatus OlapMeta::init() { meta_column_family.prefix_extractor.reset(NewFixedPrefixTransform(PREFIX_LENGTH)); column_families.emplace_back(META_COLUMN_FAMILY, meta_column_family); Status s = DB::Open(options, db_path, column_families, &_handles, &_db); - if (!s.ok() || _db == NULL) { + if (!s.ok() || _db == nullptr) { LOG(WARNING) << "rocks db open failed, reason:" << s.ToString(); return OLAP_ERR_META_OPEN_DB; } return OLAP_SUCCESS; } -OLAPStatus OlapMeta::get(const int column_family_index, const std::string& key, std::string& value) { +OLAPStatus OlapMeta::get(const int column_family_index, const std::string& key, std::string* value) { DorisMetrics::meta_read_request_total.increment(1); rocksdb::ColumnFamilyHandle* handle = _handles[column_family_index]; int64_t duration_ns = 0; Status s = Status::OK(); { SCOPED_RAW_TIMER(&duration_ns); - s = _db->Get(ReadOptions(), handle, Slice(key), &value); + s = _db->Get(ReadOptions(), handle, Slice(key), value); } DorisMetrics::meta_read_request_duration_us.increment(duration_ns / 1000); if (s.IsNotFound()) { @@ -182,4 +182,25 @@ std::string OlapMeta::get_root_path() { return _root_path; } +OLAPStatus OlapMeta::get_tablet_convert_finished(bool& flag) { + // get is_header_converted flag + std::string value; + std::string key = TABLET_CONVERT_FINISHED; + OLAPStatus s = get(DEFAULT_COLUMN_FAMILY_INDEX, key, &value); + if (s == OLAP_ERR_META_KEY_NOT_FOUND || value == "false") { + flag = false; + } else if (value == "true") { + flag = true; + } else { + LOG(WARNING) << "invalid _is_header_converted. _is_header_converted=" << value; + return OLAP_ERR_HEADER_INVALID_FLAG; + } + return OLAP_SUCCESS; +} + +OLAPStatus OlapMeta::set_tablet_convert_finished() { + OLAPStatus s = put(DEFAULT_COLUMN_FAMILY_INDEX, TABLET_CONVERT_FINISHED, CONVERTED_FLAG); + return s; +} + } diff --git a/be/src/olap/olap_meta.h b/be/src/olap/olap_meta.h index c7f61cf95c7ad8..3574695a6babfc 100755 --- a/be/src/olap/olap_meta.h +++ b/be/src/olap/olap_meta.h @@ -22,7 +22,6 @@ #include #include -#include "olap/olap_header.h" #include "olap/olap_define.h" #include "rocksdb/db.h" @@ -36,7 +35,7 @@ class OlapMeta { OLAPStatus init(); - OLAPStatus get(const int column_family_index, const std::string& key, std::string& value); + OLAPStatus get(const int column_family_index, const std::string& key, std::string* value); OLAPStatus put(const int column_family_index, const std::string& key, const std::string& value); @@ -47,6 +46,10 @@ class OlapMeta { std::string get_root_path(); + OLAPStatus get_tablet_convert_finished(bool& flag); + + OLAPStatus set_tablet_convert_finished(); + private: std::string _root_path; rocksdb::DB* _db; diff --git a/be/src/olap/olap_server.cpp b/be/src/olap/olap_server.cpp index 4cd45e1fa88652..b637e1d077d430 100644 --- a/be/src/olap/olap_server.cpp +++ b/be/src/olap/olap_server.cpp @@ -15,7 +15,7 @@ // specific language governing permissions and limitations // under the License. -#include "olap/olap_engine.h" +#include "olap/storage_engine.h" #include #include @@ -29,7 +29,7 @@ #include "olap/cumulative_compaction.h" #include "olap/olap_common.h" #include "olap/olap_define.h" -#include "olap/olap_engine.h" +#include "olap/storage_engine.h" #include "agent/cgroups_mgr.h" using std::string; @@ -39,59 +39,89 @@ namespace doris { // number of running SCHEMA-CHANGE threads volatile uint32_t g_schema_change_active_threads = 0; -OLAPStatus OLAPEngine::_start_bg_worker() { +OLAPStatus StorageEngine::_start_bg_worker() { + _unused_rowset_monitor_thread = std::thread( + [this] { + _unused_rowset_monitor_thread_callback(nullptr); + }); + _unused_rowset_monitor_thread.detach(); + // start thread for monitoring the snapshot and trash folder _garbage_sweeper_thread = std::thread( [this] { _garbage_sweeper_thread_callback(nullptr); }); - - // start thread for monitoring the table with io error + _garbage_sweeper_thread.detach(); + // start thread for monitoring the tablet with io error _disk_stat_monitor_thread = std::thread( [this] { _disk_stat_monitor_thread_callback(nullptr); }); + _disk_stat_monitor_thread.detach(); - // start thread for monitoring the unused index - _unused_index_thread = std::thread( - [this] { - _unused_index_thread_callback(nullptr); - }); // convert store map to vector - std::vector store_vec; + std::vector data_dirs; for (auto& tmp_store : _store_map) { - store_vec.push_back(tmp_store.second); + data_dirs.push_back(tmp_store.second); } - int32_t store_num = store_vec.size(); + int32_t data_dir_num = data_dirs.size(); // start be and ce threads for merge data - int32_t base_compaction_num_threads = config::base_compaction_num_threads_per_disk * store_num; + int32_t base_compaction_num_threads = config::base_compaction_num_threads_per_disk * data_dir_num; _base_compaction_threads.reserve(base_compaction_num_threads); for (uint32_t i = 0; i < base_compaction_num_threads; ++i) { _base_compaction_threads.emplace_back( - [this, store_num, store_vec, i] { - _base_compaction_thread_callback(nullptr, store_vec[i % store_num]); + [this, data_dir_num, data_dirs, i] { + _base_compaction_thread_callback(nullptr, data_dirs[i % data_dir_num]); }); } + for (auto& thread : _base_compaction_threads) { + thread.detach(); + } - int32_t cumulative_compaction_num_threads = config::cumulative_compaction_num_threads_per_disk * store_num; + int32_t cumulative_compaction_num_threads = config::cumulative_compaction_num_threads_per_disk * data_dir_num; _cumulative_compaction_threads.reserve(cumulative_compaction_num_threads); for (uint32_t i = 0; i < cumulative_compaction_num_threads; ++i) { _cumulative_compaction_threads.emplace_back( - [this, store_num, store_vec, i] { - _cumulative_compaction_thread_callback(nullptr, store_vec[i % store_num]); + [this, data_dir_num, data_dirs, i] { + _cumulative_compaction_thread_callback(nullptr, data_dirs[i % data_dir_num]); }); } + for (auto& thread : _cumulative_compaction_threads) { + thread.detach(); + } _fd_cache_clean_thread = std::thread( [this] { _fd_cache_clean_callback(nullptr); }); + _fd_cache_clean_thread.detach(); + + // path scan and gc thread + if (config::path_gc_check) { + for (auto data_dir : get_stores()) { + _path_scan_threads.emplace_back( + [this, data_dir] { + _path_scan_thread_callback((void*)data_dir); + }); + + _path_gc_threads.emplace_back( + [this, data_dir] { + _path_gc_thread_callback((void*)data_dir); + }); + } + for (auto& thread : _path_scan_threads) { + thread.detach(); + } + for (auto& thread : _path_gc_threads) { + thread.detach(); + } + } VLOG(10) << "init finished."; return OLAP_SUCCESS; } -void* OLAPEngine::_fd_cache_clean_callback(void* arg) { +void* StorageEngine::_fd_cache_clean_callback(void* arg) { #ifdef GOOGLE_PROFILER ProfilerRegisterThread(); #endif @@ -106,10 +136,10 @@ void* OLAPEngine::_fd_cache_clean_callback(void* arg) { start_clean_fd_cache(); } - return NULL; + return nullptr; } -void* OLAPEngine::_base_compaction_thread_callback(void* arg, OlapStore* store) { +void* StorageEngine::_base_compaction_thread_callback(void* arg, DataDir* data_dir) { #ifdef GOOGLE_PROFILER ProfilerRegisterThread(); #endif @@ -127,15 +157,15 @@ void* OLAPEngine::_base_compaction_thread_callback(void* arg, OlapStore* store) // cgroup is not initialized at this time // add tid to cgroup CgroupsMgr::apply_system_cgroup(); - perform_base_compaction(store); + perform_base_compaction(data_dir); usleep(interval * 1000000); } - return NULL; + return nullptr; } -void* OLAPEngine::_garbage_sweeper_thread_callback(void* arg) { +void* StorageEngine::_garbage_sweeper_thread_callback(void* arg) { #ifdef GOOGLE_PROFILER ProfilerRegisterThread(); #endif @@ -178,10 +208,10 @@ void* OLAPEngine::_garbage_sweeper_thread_callback(void* arg) { } } - return NULL; + return nullptr; } -void* OLAPEngine::_disk_stat_monitor_thread_callback(void* arg) { +void* StorageEngine::_disk_stat_monitor_thread_callback(void* arg) { #ifdef GOOGLE_PROFILER ProfilerRegisterThread(); #endif @@ -189,8 +219,8 @@ void* OLAPEngine::_disk_stat_monitor_thread_callback(void* arg) { uint32_t interval = config::disk_stat_monitor_interval; if (interval <= 0) { - OLAP_LOG_WARNING("disk_stat_monitor_interval config is illegal: [%d], " - "force set to 1", interval); + LOG(WARNING) << "disk_stat_monitor_interval config is illegal: " << interval + << ", force set to 1"; interval = 1; } @@ -199,52 +229,52 @@ void* OLAPEngine::_disk_stat_monitor_thread_callback(void* arg) { sleep(interval); } - return NULL; + return nullptr; } -void* OLAPEngine::_unused_index_thread_callback(void* arg) { +void* StorageEngine::_cumulative_compaction_thread_callback(void* arg, DataDir* data_dir) { #ifdef GOOGLE_PROFILER ProfilerRegisterThread(); #endif - - uint32_t interval = config::unused_index_monitor_interval; - + LOG(INFO) << "try to start cumulative compaction process!"; + uint32_t interval = config::cumulative_compaction_check_interval_seconds; if (interval <= 0) { - OLAP_LOG_WARNING("unused_index_monitor_interval config is illegal: [%d], " - "force set to 1", interval); + LOG(WARNING) << "cumulative compaction check interval config is illegal:" << interval + << "will be forced set to one"; interval = 1; } while (true) { - start_delete_unused_index(); - sleep(interval); + // must be here, because this thread is start on start and + // cgroup is not initialized at this time + // add tid to cgroup + CgroupsMgr::apply_system_cgroup(); + perform_cumulative_compaction(data_dir); + usleep(interval * 1000000); } - return NULL; + return nullptr; } -void* OLAPEngine::_cumulative_compaction_thread_callback(void* arg, OlapStore* store) { +void* StorageEngine::_unused_rowset_monitor_thread_callback(void* arg) { #ifdef GOOGLE_PROFILER ProfilerRegisterThread(); #endif - LOG(INFO) << "try to start cumulative compaction process!"; - uint32_t interval = config::cumulative_compaction_check_interval_seconds; + + uint32_t interval = config::unused_rowset_monitor_interval; + if (interval <= 0) { - LOG(WARNING) << "cumulative compaction check interval config is illegal:" << interval - << "will be forced set to one"; + LOG(WARNING) << "unused_rowset_monitor_interval config is illegal: " << interval + << ", force set to 1"; interval = 1; } while (true) { - // must be here, because this thread is start on start and - // cgroup is not initialized at this time - // add tid to cgroup - CgroupsMgr::apply_system_cgroup(); - perform_cumulative_compaction(store); - usleep(interval * 1000000); + start_delete_unused_rowset(); + sleep(interval); } - return NULL; + return nullptr; } } // namespace doris diff --git a/be/src/olap/olap_snapshot.cpp b/be/src/olap/olap_snapshot.cpp deleted file mode 100644 index 9821a82ad04c56..00000000000000 --- a/be/src/olap/olap_snapshot.cpp +++ /dev/null @@ -1,827 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, -// software distributed under the License is distributed on an -// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -// KIND, either express or implied. See the License for the -// specific language governing permissions and limitations -// under the License. - -#include "olap/olap_engine.h" - -#include -#include -#include - -#include -#include -#include -#include - -#include -#include -#include - -#include "common/status.h" -#include "olap/field.h" -#include "olap/olap_common.h" -#include "olap/column_data.h" -#include "olap/olap_define.h" -#include "olap/olap_table.h" -#include "olap/olap_header_manager.h" -#include "olap/push_handler.h" -#include "olap/store.h" -#include "util/file_utils.h" -#include "util/doris_metrics.h" - -using boost::filesystem::canonical; -using boost::filesystem::copy_file; -using boost::filesystem::copy_option; -using boost::filesystem::path; -using std::map; -using std::nothrow; -using std::set; -using std::string; -using std::stringstream; -using std::vector; -using std::list; - -namespace doris { - -OLAPStatus OLAPEngine::make_snapshot( - const TSnapshotRequest& request, - string* snapshot_path) { - OLAPStatus res = OLAP_SUCCESS; - if (snapshot_path == nullptr) { - OLAP_LOG_WARNING("output parameter cannot be NULL"); - return OLAP_ERR_INPUT_PARAMETER_ERROR; - } - - OLAPTablePtr ref_olap_table = get_table(request.tablet_id, request.schema_hash); - if (ref_olap_table.get() == NULL) { - OLAP_LOG_WARNING("failed to get olap table. [table=%ld schema_hash=%d]", - request.tablet_id, request.schema_hash); - return OLAP_ERR_TABLE_NOT_FOUND; - } - - if (request.__isset.missing_version) { - res = _create_incremental_snapshot_files(ref_olap_table, request, snapshot_path); - // if all nodes has been upgraded, it can be removed - (const_cast(request)).__set_allow_incremental_clone(true); - } else { - res = _create_snapshot_files(ref_olap_table, request, snapshot_path); - } - - if (res != OLAP_SUCCESS) { - OLAP_LOG_WARNING("failed to make snapshot. [res=%d table=%ld schema_hash=%d]", - res, request.tablet_id, request.schema_hash); - return res; - } - - VLOG(3) << "success to make snapshot. [path='" << snapshot_path << "']"; - return res; -} - -OLAPStatus OLAPEngine::release_snapshot(const string& snapshot_path) { - // 如果请求的snapshot_path位于root/snapshot文件夹下,则认为是合法的,可以删除 - // 否则认为是非法请求,返回错误结果 - auto stores = get_stores(); - for (auto store : stores) { - path boost_root_path(store->path()); - string abs_path = canonical(boost_root_path).string(); - - if (snapshot_path.compare(0, abs_path.size(), abs_path) == 0 - && snapshot_path.compare(abs_path.size(), - SNAPSHOT_PREFIX.size(), SNAPSHOT_PREFIX) == 0) { - remove_all_dir(snapshot_path); - VLOG(3) << "success to release snapshot path. [path='" << snapshot_path << "']"; - - return OLAP_SUCCESS; - } - } - - LOG(WARNING) << "released snapshot path illegal. [path='" << snapshot_path << "']"; - return OLAP_ERR_CE_CMD_PARAMS_ERROR; -} - -OLAPStatus OLAPEngine::_calc_snapshot_id_path( - const OLAPTablePtr& olap_table, - string* out_path) { - OLAPStatus res = OLAP_SUCCESS; - if (out_path == nullptr) { - OLAP_LOG_WARNING("output parameter cannot be NULL"); - return OLAP_ERR_INPUT_PARAMETER_ERROR; - } - - // get current timestamp string - string time_str; - if ((res = gen_timestamp_string(&time_str)) != OLAP_SUCCESS) { - OLAP_LOG_WARNING("failed to generate time_string when move file to trash." - "[err code=%d]", res); - return res; - } - - stringstream snapshot_id_path_stream; - MutexLock auto_lock(&_snapshot_mutex); // will automatically unlock when function return. - snapshot_id_path_stream << olap_table->storage_root_path_name() << SNAPSHOT_PREFIX - << "/" << time_str << "." << _snapshot_base_id++; - *out_path = snapshot_id_path_stream.str(); - return res; -} - -string OLAPEngine::_get_schema_hash_full_path( - const OLAPTablePtr& ref_olap_table, - const string& location) const { - stringstream schema_full_path_stream; - schema_full_path_stream << location - << "/" << ref_olap_table->tablet_id() - << "/" << ref_olap_table->schema_hash(); - string schema_full_path = schema_full_path_stream.str(); - - return schema_full_path; -} - -string OLAPEngine::_get_header_full_path( - const OLAPTablePtr& ref_olap_table, - const std::string& schema_hash_path) const { - stringstream header_name_stream; - header_name_stream << schema_hash_path << "/" << ref_olap_table->tablet_id() << ".hdr"; - return header_name_stream.str(); -} - -void OLAPEngine::_update_header_file_info( - const vector& shortest_versions, - OLAPHeader* header) { - // clear schema_change_status - header->clear_schema_change_status(); - // remove all old version and add new version - header->delete_all_versions(); - - for (const VersionEntity& entity : shortest_versions) { - Version version = entity.version; - VersionHash v_hash = entity.version_hash; - for (SegmentGroupEntity segment_group_entity : entity.segment_group_vec) { - int32_t segment_group_id = segment_group_entity.segment_group_id; - const std::vector* column_statistics = nullptr; - if (!segment_group_entity.key_ranges.empty()) { - column_statistics = &(segment_group_entity.key_ranges); - } - header->add_version(version, v_hash, segment_group_id, segment_group_entity.num_segments, - segment_group_entity.index_size, segment_group_entity.data_size, - segment_group_entity.num_rows, segment_group_entity.empty, column_statistics); - } - } -} - -OLAPStatus OLAPEngine::_link_index_and_data_files( - const string& schema_hash_path, - const OLAPTablePtr& ref_olap_table, - const vector& version_entity_vec) { - OLAPStatus res = OLAP_SUCCESS; - - std::stringstream prefix_stream; - prefix_stream << schema_hash_path << "/" << ref_olap_table->tablet_id(); - std::string tablet_path_prefix = prefix_stream.str(); - for (const VersionEntity& entity : version_entity_vec) { - Version version = entity.version; - VersionHash v_hash = entity.version_hash; - for (SegmentGroupEntity segment_group_entity : entity.segment_group_vec) { - int32_t segment_group_id = segment_group_entity.segment_group_id; - for (int seg_id = 0; seg_id < segment_group_entity.num_segments; ++seg_id) { - std::string index_path = - _construct_index_file_path(tablet_path_prefix, version, v_hash, segment_group_id, seg_id); - std::string ref_table_index_path = - ref_olap_table->construct_index_file_path(version, v_hash, segment_group_id, seg_id); - res = _create_hard_link(ref_table_index_path, index_path); - if (res != OLAP_SUCCESS) { - LOG(WARNING) << "fail to create hard link. " - << " schema_hash_path=" << schema_hash_path - << " from_path=" << ref_table_index_path - << " to_path=" << index_path; - return res; - } - - std:: string data_path = - _construct_data_file_path(tablet_path_prefix, version, v_hash, segment_group_id, seg_id); - std::string ref_table_data_path = - ref_olap_table->construct_data_file_path(version, v_hash, segment_group_id, seg_id); - res = _create_hard_link(ref_table_data_path, data_path); - if (res != OLAP_SUCCESS) { - LOG(WARNING) << "fail to create hard link." - << "tablet_path_prefix=" << tablet_path_prefix - << ", from_path=" << ref_table_data_path << ", to_path=" << data_path; - return res; - } - } - } - } - - return res; -} - -OLAPStatus OLAPEngine::_copy_index_and_data_files( - const string& schema_hash_path, - const OLAPTablePtr& ref_olap_table, - vector& version_entity_vec) { - std::stringstream prefix_stream; - prefix_stream << schema_hash_path << "/" << ref_olap_table->tablet_id(); - std::string tablet_path_prefix = prefix_stream.str(); - for (VersionEntity& entity : version_entity_vec) { - Version version = entity.version; - VersionHash v_hash = entity.version_hash; - for (SegmentGroupEntity segment_group_entity : entity.segment_group_vec) { - int32_t segment_group_id = segment_group_entity.segment_group_id; - for (int seg_id = 0; seg_id < segment_group_entity.num_segments; ++seg_id) { - string index_path = - _construct_index_file_path(tablet_path_prefix, version, v_hash, segment_group_id, seg_id); - string ref_table_index_path = ref_olap_table->construct_index_file_path( - version, v_hash, segment_group_id, seg_id); - Status res = FileUtils::copy_file(ref_table_index_path, index_path); - if (!res.ok()) { - LOG(WARNING) << "fail to copy index file." - << "dest=" << index_path - << ", src=" << ref_table_index_path; - return OLAP_ERR_COPY_FILE_ERROR; - } - - string data_path = - _construct_data_file_path(tablet_path_prefix, version, v_hash, segment_group_id, seg_id); - string ref_table_data_path = ref_olap_table->construct_data_file_path( - version, v_hash, segment_group_id, seg_id); - res = FileUtils::copy_file(ref_table_data_path, data_path); - if (!res.ok()) { - LOG(WARNING) << "fail to copy data file." - << "dest=" << index_path - << ", src=" << ref_table_index_path; - return OLAP_ERR_COPY_FILE_ERROR; - } - } - } - } - - return OLAP_SUCCESS; -} - -OLAPStatus OLAPEngine::_create_snapshot_files( - const OLAPTablePtr& ref_olap_table, - const TSnapshotRequest& request, - string* snapshot_path) { - OLAPStatus res = OLAP_SUCCESS; - if (snapshot_path == nullptr) { - OLAP_LOG_WARNING("output parameter cannot be NULL"); - return OLAP_ERR_INPUT_PARAMETER_ERROR; - } - - string snapshot_id_path; - res = _calc_snapshot_id_path(ref_olap_table, &snapshot_id_path); - if (res != OLAP_SUCCESS) { - OLAP_LOG_WARNING("failed to calc snapshot_id_path, [ref table=%s]", - ref_olap_table->storage_root_path_name().c_str()); - return res; - } - - string schema_full_path = _get_schema_hash_full_path( - ref_olap_table, snapshot_id_path); - string header_path = _get_header_full_path(ref_olap_table, schema_full_path); - if (check_dir_existed(schema_full_path)) { - VLOG(10) << "remove the old schema_full_path."; - remove_all_dir(schema_full_path); - } - create_dirs(schema_full_path); - - path boost_path(snapshot_id_path); - string snapshot_id = canonical(boost_path).string(); - - bool header_locked = false; - ref_olap_table->obtain_header_rdlock(); - header_locked = true; - - vector olap_data_sources; - OLAPHeader* new_olap_header = nullptr; - do { - // get latest version - const PDelta* lastest_version = NULL; - lastest_version = ref_olap_table->lastest_version(); - if (lastest_version == NULL) { - OLAP_LOG_WARNING("table has not any version. [path='%s']", - ref_olap_table->full_name().c_str()); - res = OLAP_ERR_VERSION_NOT_EXIST; - break; - } - - // get snapshot version, use request.version if specified - int32_t version = lastest_version->end_version(); - if (request.__isset.version) { - if (lastest_version->end_version() < request.version - || (lastest_version->start_version() == lastest_version->end_version() - && lastest_version->end_version() == request.version - && lastest_version->version_hash() != request.version_hash)) { - OLAP_LOG_WARNING("invalid make snapshot request. " - "[version=%d version_hash=%ld req_version=%d req_version_hash=%ld]", - lastest_version->end_version(), lastest_version->version_hash(), - request.version, request.version_hash); - res = OLAP_ERR_INPUT_PARAMETER_ERROR; - break; - } - - version = request.version; - } - - // get shortest version path - vector shortest_path; - vector shortest_versions; - res = ref_olap_table->select_versions_to_span(Version(0, version), &shortest_path); - if (res != OLAP_SUCCESS) { - OLAP_LOG_WARNING("fail to select versions to span. [res=%d]", res); - break; - } - - for (const Version& version : shortest_path) { - shortest_versions.push_back(ref_olap_table->get_version_entity_by_version(version)); - } - - // get data source and add reference count for prevent to delete data files - ref_olap_table->acquire_data_sources_by_versions(shortest_path, &olap_data_sources); - if (olap_data_sources.size() == 0) { - OLAP_LOG_WARNING("failed to acquire data sources. [table='%s', version=%d]", - ref_olap_table->full_name().c_str(), version); - res = OLAP_ERR_OTHER_ERROR; - break; - } - - // load table header, in order to remove versions that not in shortest version path - OlapStore* store = ref_olap_table->store(); - new_olap_header = new(nothrow) OLAPHeader(); - if (new_olap_header == NULL) { - OLAP_LOG_WARNING("fail to malloc OLAPHeader."); - res = OLAP_ERR_MALLOC_ERROR; - break; - } - - res = OlapHeaderManager::get_header(store, ref_olap_table->tablet_id(), ref_olap_table->schema_hash(), new_olap_header); - if (res != OLAP_SUCCESS) { - LOG(WARNING) << "fail to load header. res=" << res - << "tablet_id=" << ref_olap_table->tablet_id() << ", schema_hash=" << ref_olap_table->schema_hash(); - break; - } - - ref_olap_table->release_header_lock(); - header_locked = false; - _update_header_file_info(shortest_versions, new_olap_header); - - // save new header to snapshot header path - res = new_olap_header->save(header_path); - if (res != OLAP_SUCCESS) { - OLAP_LOG_WARNING("fail to save header. [res=%d tablet_id=%ld, schema_hash=%d, headerpath=%s]", - res, ref_olap_table->tablet_id(), ref_olap_table->schema_hash(), header_path.c_str()); - break; - } - - res = _link_index_and_data_files(schema_full_path, ref_olap_table, shortest_versions); - if (res != OLAP_SUCCESS) { - LOG(WARNING) << "fail to create hard link. [path=" << snapshot_id_path << "]"; - break; - } - - // append a single delta if request.version is end_version of cumulative delta - if (request.__isset.version) { - for (const VersionEntity& entity : shortest_versions) { - if (entity.version.second == request.version) { - if (entity.version.first != request.version) { - res = _append_single_delta(request, store); - if (res != OLAP_SUCCESS) { - OLAP_LOG_WARNING("fail to append single delta. [res=%d]", res); - } - } - break; - } - } - } - } while (0); - - SAFE_DELETE(new_olap_header); - - if (header_locked) { - VLOG(10) << "release header lock."; - ref_olap_table->release_header_lock(); - } - - if (ref_olap_table.get() != NULL) { - VLOG(10) << "release data sources."; - ref_olap_table->release_data_sources(&olap_data_sources); - } - - if (res != OLAP_SUCCESS) { - OLAP_LOG_WARNING("fail to make snapshot, try to delete the snapshot path. [path=%s]", - snapshot_id_path.c_str()); - - if (check_dir_existed(snapshot_id_path)) { - VLOG(3) << "remove snapshot path. [path=" << snapshot_id_path << "]"; - remove_all_dir(snapshot_id_path); - } - } else { - *snapshot_path = snapshot_id; - } - - return res; -} - -OLAPStatus OLAPEngine::_create_incremental_snapshot_files( - const OLAPTablePtr& ref_olap_table, - const TSnapshotRequest& request, - string* snapshot_path) { - LOG(INFO) << "begin to create incremental snapshot files." - << "tablet=" << request.tablet_id - << ", schema_hash=" << request.schema_hash; - OLAPStatus res = OLAP_SUCCESS; - - if (snapshot_path == nullptr) { - OLAP_LOG_WARNING("output parameter cannot be NULL"); - return OLAP_ERR_INPUT_PARAMETER_ERROR; - } - - string snapshot_id_path; - res = _calc_snapshot_id_path(ref_olap_table, &snapshot_id_path); - if (res != OLAP_SUCCESS) { - OLAP_LOG_WARNING("failed to calc snapshot_id_path, [ref table=%s]", - ref_olap_table->storage_root_path_name().c_str()); - return res; - } - - string schema_full_path = _get_schema_hash_full_path(ref_olap_table, snapshot_id_path); - if (check_dir_existed(schema_full_path)) { - VLOG(10) << "remove the old schema_full_path."; - remove_all_dir(schema_full_path); - } - create_dirs(schema_full_path); - - path boost_path(snapshot_id_path); - string snapshot_id = canonical(boost_path).string(); - - ref_olap_table->obtain_header_rdlock(); - - do { - // save header to snapshot path - OLAPHeader olap_header; - res = OlapHeaderManager::get_header(ref_olap_table->store(), - ref_olap_table->tablet_id(), ref_olap_table->schema_hash(), &olap_header); - if (res != OLAP_SUCCESS) { - LOG(WARNING) << "fail to load header. res=" << res << "tablet_id=" - << ref_olap_table->tablet_id() << ", schema_hash=" << ref_olap_table->schema_hash(); - break; - } - string header_path = _get_header_full_path(ref_olap_table, schema_full_path); - res = olap_header.save(header_path); - if (res != OLAP_SUCCESS) { - LOG(WARNING) << "fail to save header to path:" << header_path; - remove_dir(header_path); - break; - } - - for (int64_t missing_version : request.missing_version) { - - // find missing version - const PDelta* incremental_delta = - ref_olap_table->get_incremental_delta(Version(missing_version, missing_version)); - if (incremental_delta != nullptr) { - VLOG(3) << "success to find missing version when snapshot, " - << "begin to link files. tablet_id=" << request.tablet_id - << ", schema_hash=" << request.schema_hash - << ", version=" << missing_version; - // link files - for (uint32_t i = 0; i < incremental_delta->segment_group(0).num_segments(); i++) { - int32_t segment_group_id = incremental_delta->segment_group(0).segment_group_id(); - string from = ref_olap_table->construct_incremental_index_file_path( - Version(missing_version, missing_version), - incremental_delta->version_hash(), segment_group_id, i); - string to = schema_full_path + '/' + basename(from.c_str()); - if ((res = _create_hard_link(from, to)) != OLAP_SUCCESS) { - break; - } - - from = ref_olap_table->construct_incremental_data_file_path( - Version(missing_version, missing_version), - incremental_delta->version_hash(), segment_group_id, i); - to = schema_full_path + '/' + basename(from.c_str()); - if ((res = _create_hard_link(from, to)) != OLAP_SUCCESS) { - break; - } - } - - if (res != OLAP_SUCCESS) { - break; - } - - } else { - OLAP_LOG_WARNING("failed to find missing version when snapshot. " - "[table=%ld schema_hash=%d version=%ld]", - request.tablet_id, request.schema_hash, missing_version); - res = OLAP_ERR_VERSION_NOT_EXIST; - break; - } - } - - } while (0); - - ref_olap_table->release_header_lock(); - - if (res != OLAP_SUCCESS) { - OLAP_LOG_WARNING("failed to make incremental snapshot, try to delete the snapshot path. " - "[path=%s]", snapshot_id_path.c_str()); - - if (check_dir_existed(snapshot_id_path)) { - VLOG(3) << "remove snapshot path. [path=" << snapshot_id_path << "]"; - remove_all_dir(snapshot_id_path); - } - } else { - *snapshot_path = snapshot_id; - } - - return res; -} - -OLAPStatus OLAPEngine::_append_single_delta( - const TSnapshotRequest& request, OlapStore* store) { - OLAPStatus res = OLAP_SUCCESS; - string root_path = store->path(); - OLAPHeader* new_olap_header = new(nothrow) OLAPHeader(); - if (new_olap_header == NULL) { - OLAP_LOG_WARNING("fail to malloc OLAPHeader."); - return OLAP_ERR_MALLOC_ERROR; - } - - res = OlapHeaderManager::get_header(store, request.tablet_id, request.schema_hash, new_olap_header); - if (res != OLAP_SUCCESS) { - OLAP_LOG_WARNING("fail to create tablet from header file. [tablet_id=%ld, schema_hash=%d]", - request.tablet_id, request.schema_hash); - return res; - } - auto tablet = OLAPTable::create_from_header(new_olap_header, store); - if (tablet == NULL) { - OLAP_LOG_WARNING("fail to load tablet. [res=%d tablet_id='%ld, schema_hash=%d']", - res, request.tablet_id, request.schema_hash); - return OLAP_ERR_INPUT_PARAMETER_ERROR; - } - - res = tablet->load(); - if (res != OLAP_SUCCESS) { - LOG(WARNING) << "fail to load tablet. [res=" << res << " header_path=" << store->path(); - return res; - } - - const PDelta* lastest_version = tablet->lastest_version(); - if (lastest_version->start_version() != request.version) { - TPushReq empty_push; - empty_push.tablet_id = request.tablet_id; - empty_push.schema_hash = request.schema_hash; - empty_push.version = request.version + 1; - empty_push.version_hash = 0; - - PushHandler handler; - res = handler.process(tablet, empty_push, PUSH_NORMAL, NULL); - if (res != OLAP_SUCCESS) { - OLAP_LOG_WARNING("fail to push empty version. [res=%d version=%d]", - res, empty_push.version); - return res; - } - } - - return res; -} - -string OLAPEngine::_construct_index_file_path( - const string& tablet_path_prefix, - const Version& version, - VersionHash version_hash, - int32_t segment_group_id, int32_t segment) const { - return OLAPTable::construct_file_path(tablet_path_prefix, version, version_hash, segment_group_id, segment, "idx"); -} - -string OLAPEngine::_construct_data_file_path( - const string& tablet_path_prefix, - const Version& version, - VersionHash version_hash, - int32_t segment_group_id, int32_t segment) const { - return OLAPTable::construct_file_path(tablet_path_prefix, version, version_hash, segment_group_id, segment, "dat"); -} - -OLAPStatus OLAPEngine::_create_hard_link(const string& from_path, const string& to_path) { - if (link(from_path.c_str(), to_path.c_str()) == 0) { - VLOG(10) << "success to create hard link from_path=" << from_path - << ", to_path=" << to_path; - return OLAP_SUCCESS; - } else { - OLAP_LOG_WARNING("failed to create hard link from path=%s to path=%s errno=%d", - from_path.c_str(), to_path.c_str(), errno); - return OLAP_ERR_OTHER_ERROR; - } -} - -OLAPStatus OLAPEngine::storage_medium_migrate( - TTabletId tablet_id, TSchemaHash schema_hash, - TStorageMedium::type storage_medium) { - LOG(INFO) << "begin to process storage media migrate. " - << "tablet_id=" << tablet_id << ", schema_hash=" << schema_hash - << ", dest_storage_medium=" << storage_medium; - DorisMetrics::storage_migrate_requests_total.increment(1); - - OLAPStatus res = OLAP_SUCCESS; - OLAPTablePtr tablet = get_table(tablet_id, schema_hash); - if (tablet.get() == NULL) { - OLAP_LOG_WARNING("can't find olap table. [tablet_id=%ld schema_hash=%d]", - tablet_id, schema_hash); - return OLAP_ERR_TABLE_NOT_FOUND; - } - - // judge case when no need to migrate - uint32_t count = available_storage_medium_type_count(); - if (count <= 1) { - LOG(INFO) << "available storage medium type count is less than 1, " - << "no need to migrate. count=" << count; - return OLAP_SUCCESS; - } - - TStorageMedium::type src_storage_medium = tablet->store()->storage_medium(); - if (src_storage_medium == storage_medium) { - LOG(INFO) << "tablet is already on specified storage medium. " - << "storage_medium=" << storage_medium; - return OLAP_SUCCESS; - } - - vector olap_data_sources; - OLAPStatus lock_status = tablet->try_migration_wrlock(); - if (lock_status != OLAP_SUCCESS) { - return lock_status; - } - tablet->obtain_push_lock(); - - do { - // get all versions to be migrate - tablet->obtain_header_rdlock(); - if (tablet->has_pending_data()) { - tablet->release_header_lock(); - res = OLAP_ERR_HEADER_HAS_PENDING_DATA; - OLAP_LOG_WARNING("could not migration because has pending data [tablet='%s' ]", - tablet->full_name().c_str()); - break; - } - const PDelta* lastest_version = tablet->lastest_version(); - if (lastest_version == NULL) { - tablet->release_header_lock(); - res = OLAP_ERR_VERSION_NOT_EXIST; - OLAP_LOG_WARNING("tablet has not any version."); - break; - } - - int32_t end_version = lastest_version->end_version(); - tablet->acquire_data_sources(Version(0, end_version), &olap_data_sources); - if (olap_data_sources.size() == 0) { - tablet->release_header_lock(); - res = OLAP_ERR_VERSION_NOT_EXIST; - OLAP_LOG_WARNING("fail to acquire data souces. [tablet='%s' version=%d]", - tablet->full_name().c_str(), end_version); - break; - } - - vector version_entity_vec; - tablet->list_version_entities(&version_entity_vec); - tablet->release_header_lock(); - - // generate schema hash path where files will be migrated - auto stores = get_stores_for_create_table(storage_medium); - if (stores.empty()) { - res = OLAP_ERR_INVALID_ROOT_PATH; - OLAP_LOG_WARNING("fail to get root path for create tablet."); - break; - } - - uint64_t shard = 0; - res = stores[0]->get_shard(&shard); - if (res != OLAP_SUCCESS) { - OLAP_LOG_WARNING("fail to get root path shard. [res=%d]", res); - break; - } - - stringstream root_path_stream; - root_path_stream << stores[0]->path() << DATA_PREFIX << "/" << shard; - string schema_hash_path = _get_schema_hash_full_path(tablet, root_path_stream.str()); - if (check_dir_existed(schema_hash_path)) { - VLOG(3) << "schema hash path already exist, remove it. " - << "schema_hash_path=" << schema_hash_path; - remove_all_dir(schema_hash_path); - } - - OLAPHeader* new_olap_header = new(std::nothrow) OLAPHeader(); - res = OlapHeaderManager::get_header(stores[0], tablet->tablet_id(), tablet->schema_hash(), new_olap_header); - if (res != OLAP_ERR_META_KEY_NOT_FOUND) { - LOG(WARNING) << "olap_header already exists. " - << "data_dir:" << stores[0]->path() - << "tablet:" << tablet->full_name(); - delete new_olap_header; - return OLAP_ERR_META_ALREADY_EXIST; - } - - create_dirs(schema_hash_path); - - // migrate all index and data files but header file - res = _copy_index_and_data_files(schema_hash_path, tablet, version_entity_vec); - if (res != OLAP_SUCCESS) { - OLAP_LOG_WARNING("fail to copy index and data files when migrate. [res=%d]", res); - delete new_olap_header; - break; - } - - res = _generate_new_header(stores[0], shard, tablet, version_entity_vec, new_olap_header); - if (res != OLAP_SUCCESS) { - OLAP_LOG_WARNING("fail to generate new header file from the old. [res=%d]", res); - delete new_olap_header; - break; - } - - // load the new tablet into OLAPEngine - auto olap_table = OLAPTable::create_from_header(new_olap_header, stores[0]); - if (olap_table == NULL) { - OLAP_LOG_WARNING("failed to create from header"); - res = OLAP_ERR_TABLE_CREATE_FROM_HEADER_ERROR; - break; - } - res = add_table(tablet_id, schema_hash, olap_table); - if (res != OLAP_SUCCESS) { - OLAP_LOG_WARNING("fail to add tablet to OLAPEngine. [res=%d]", res); - break; - } - - // if old table finished schema change, then the schema change status of the new table is DONE - // else the schema change status of the new table is FAILED - OLAPTablePtr new_tablet = get_table(tablet_id, schema_hash); - if (new_tablet.get() == NULL) { - OLAP_LOG_WARNING("get null olap table. [tablet_id=%ld schema_hash=%d]", - tablet_id, schema_hash); - res = OLAP_ERR_TABLE_NOT_FOUND; - break; - } - SchemaChangeStatus tablet_status = tablet->schema_change_status(); - if (tablet->schema_change_status().status == AlterTableStatus::ALTER_TABLE_FINISHED) { - new_tablet->set_schema_change_status(tablet_status.status, - tablet_status.schema_hash, - tablet_status.version); - } else { - new_tablet->set_schema_change_status(AlterTableStatus::ALTER_TABLE_FAILED, - tablet_status.schema_hash, - tablet_status.version); - } - } while (0); - - tablet->release_push_lock(); - tablet->release_data_sources(&olap_data_sources); - tablet->release_migration_lock(); - return res; -} - -OLAPStatus OLAPEngine::_generate_new_header( - OlapStore* store, - const uint64_t new_shard, - const OLAPTablePtr& tablet, - const vector& version_entity_vec, OLAPHeader* new_olap_header) { - if (store == nullptr) { - LOG(WARNING) << "fail to generate new header for store is null"; - return OLAP_ERR_HEADER_INIT_FAILED; - } - OLAPStatus res = OLAP_SUCCESS; - - OlapStore* ref_store = - OLAPEngine::get_instance()->get_store(tablet->storage_root_path_name()); - res = OlapHeaderManager::get_header(ref_store, tablet->tablet_id(), tablet->schema_hash(), new_olap_header); - if (res == OLAP_ERR_META_KEY_NOT_FOUND) { - LOG(WARNING) << "olap_header has already been dropped. " - << "data_dir:" << ref_store->path() - << "tablet:" << tablet->full_name(); - return res; - } - _update_header_file_info(version_entity_vec, new_olap_header); - new_olap_header->set_shard(new_shard); - - res = OlapHeaderManager::save(store, tablet->tablet_id(), tablet->schema_hash(), new_olap_header); - if (res != OLAP_SUCCESS) { - OLAP_LOG_WARNING("fail to save olap header to new db. [res=%d]", res); - return res; - } - - // delete old header - // TODO: make sure atomic update - OlapHeaderManager::remove(ref_store, tablet->tablet_id(), tablet->schema_hash()); - if (res != OLAP_SUCCESS) { - LOG(WARNING) << "fail to delete olap header to old db. res=" << res; - } - return res; -} - -} // namespace doris diff --git a/be/src/olap/olap_snapshot_converter.cpp b/be/src/olap/olap_snapshot_converter.cpp new file mode 100755 index 00000000000000..eb68c0bd89d32e --- /dev/null +++ b/be/src/olap/olap_snapshot_converter.cpp @@ -0,0 +1,532 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +#include "olap/olap_snapshot_converter.h" + +#include "olap/rowset/alpha_rowset.h" +#include "olap/rowset/rowset_id_generator.h" + +namespace doris { + +OLAPStatus OlapSnapshotConverter::to_olap_header(const TabletMetaPB& tablet_meta_pb, OLAPHeaderMessage* olap_header) { + if (!tablet_meta_pb.schema().has_num_rows_per_row_block()) { + LOG(FATAL) << "tablet schema does not have num_rows_per_row_block." + << " tablet id = " << tablet_meta_pb.tablet_id(); + } + olap_header->set_num_rows_per_data_block(tablet_meta_pb.schema().num_rows_per_row_block()); + if (!tablet_meta_pb.has_cumulative_layer_point()) { + LOG(FATAL) << "tablet schema does not have cumulative_layer_point." + << " tablet id = " << tablet_meta_pb.tablet_id(); + } + olap_header->set_cumulative_layer_point(tablet_meta_pb.cumulative_layer_point()); + if (!tablet_meta_pb.schema().has_num_short_key_columns()) { + LOG(FATAL) << "tablet schema does not have num_short_key_columns." + << " tablet id = " << tablet_meta_pb.tablet_id(); + } + olap_header->set_num_short_key_fields(tablet_meta_pb.schema().num_short_key_columns()); + + for (auto& column : tablet_meta_pb.schema().column()) { + ColumnMessage* column_msg = olap_header->add_column(); + to_column_msg(column, column_msg); + } + + if (!tablet_meta_pb.has_creation_time()) { + LOG(FATAL) << "tablet schema does not have creation_time." + << " tablet id = " << tablet_meta_pb.tablet_id(); + } + olap_header->set_creation_time(tablet_meta_pb.creation_time()); + olap_header->set_data_file_type(DataFileType::COLUMN_ORIENTED_FILE); + if (tablet_meta_pb.schema().has_next_column_unique_id()) { + olap_header->set_next_column_unique_id(tablet_meta_pb.schema().next_column_unique_id()); + } + if (tablet_meta_pb.schema().has_compress_kind()) { + olap_header->set_compress_kind(tablet_meta_pb.schema().compress_kind()); + } + if (tablet_meta_pb.schema().has_bf_fpp()) { + olap_header->set_bf_fpp(tablet_meta_pb.schema().bf_fpp()); + } + if (tablet_meta_pb.schema().has_keys_type()) { + olap_header->set_keys_type(tablet_meta_pb.schema().keys_type()); + } + + for (auto& rs_meta : tablet_meta_pb.rs_metas()) { + PDelta* pdelta = olap_header->add_delta(); + convert_to_pdelta(rs_meta, pdelta); + } + // not add pending delta, it is usedless in clone or backup restore + for (auto& inc_rs_meta : tablet_meta_pb.inc_rs_metas()) { + PDelta* pdelta = olap_header->add_incremental_delta(); + convert_to_pdelta(inc_rs_meta, pdelta); + } + if (tablet_meta_pb.has_in_restore_mode()) { + olap_header->set_in_restore_mode(tablet_meta_pb.in_restore_mode()); + } + if (tablet_meta_pb.has_tablet_id()) { + olap_header->set_tablet_id(tablet_meta_pb.tablet_id()); + } + if (tablet_meta_pb.has_schema_hash()) { + olap_header->set_schema_hash(tablet_meta_pb.schema_hash()); + } + if (tablet_meta_pb.has_shard_id()) { + olap_header->set_shard(tablet_meta_pb.shard_id()); + } + return OLAP_SUCCESS; +} + +OLAPStatus OlapSnapshotConverter::to_tablet_meta_pb(const OLAPHeaderMessage& olap_header, + TabletMetaPB* tablet_meta_pb, vector* pending_rowsets) { + if (olap_header.has_tablet_id()) { + tablet_meta_pb->set_tablet_id(olap_header.tablet_id()); + } + if (olap_header.has_schema_hash()) { + tablet_meta_pb->set_schema_hash(olap_header.schema_hash()); + } + if (olap_header.has_shard()) { + tablet_meta_pb->set_shard_id(olap_header.shard()); + } + tablet_meta_pb->set_creation_time(olap_header.creation_time()); + tablet_meta_pb->set_cumulative_layer_point(olap_header.cumulative_layer_point()); + + TabletSchemaPB* schema = tablet_meta_pb->mutable_schema(); + for (auto& column_msg : olap_header.column()) { + ColumnPB* column_pb = schema->add_column(); + to_column_pb(column_msg, column_pb); + } + if (olap_header.has_keys_type()) { + schema->set_keys_type(olap_header.keys_type()); + } + schema->set_num_short_key_columns(olap_header.num_short_key_fields()); + schema->set_num_rows_per_row_block(olap_header.num_rows_per_data_block()); + schema->set_compress_kind(olap_header.compress_kind()); + if (olap_header.has_bf_fpp()) { + schema->set_bf_fpp(olap_header.bf_fpp()); + } + if (olap_header.has_next_column_unique_id()) { + schema->set_next_column_unique_id(olap_header.next_column_unique_id()); + } + + RowsetId next_id = 10000; + std::unordered_map _rs_version_map; + for (auto& delta : olap_header.delta()) { + RowsetMetaPB* rowset_meta = tablet_meta_pb->add_rs_metas(); + convert_to_rowset_meta(delta, ++next_id, olap_header.tablet_id(), olap_header.schema_hash(), rowset_meta); + Version rowset_version = { delta.start_version(), delta.end_version() }; + _rs_version_map[rowset_version] = rowset_meta; + } + + for (auto& inc_delta : olap_header.incremental_delta()) { + // check if inc delta already exist in delta + Version rowset_version = { inc_delta.start_version(), inc_delta.end_version() }; + auto exist_rs = _rs_version_map.find(rowset_version); + if (exist_rs != _rs_version_map.end()) { + RowsetMetaPB* rowset_meta = tablet_meta_pb->add_inc_rs_metas(); + *rowset_meta = *(exist_rs->second); + continue; + } + RowsetMetaPB* rowset_meta = tablet_meta_pb->add_inc_rs_metas(); + convert_to_rowset_meta(inc_delta, ++next_id, olap_header.tablet_id(), olap_header.schema_hash(), rowset_meta); + } + + for (auto& pending_delta : olap_header.pending_delta()) { + RowsetMetaPB rowset_meta; + convert_to_rowset_meta(pending_delta, ++next_id, olap_header.tablet_id(), olap_header.schema_hash(), &rowset_meta); + pending_rowsets->emplace_back(std::move(rowset_meta)); + } + if (olap_header.has_schema_change_status()) { + AlterTabletPB* alter_tablet_pb = tablet_meta_pb->mutable_alter_task(); + to_alter_tablet_pb(olap_header.schema_change_status(), alter_tablet_pb); + } + if (olap_header.has_in_restore_mode()) { + tablet_meta_pb->set_in_restore_mode(olap_header.in_restore_mode()); + } + tablet_meta_pb->set_tablet_state(TabletStatePB::PB_RUNNING); + *(tablet_meta_pb->mutable_tablet_uid()) = TabletUid().to_proto(); + tablet_meta_pb->set_end_rowset_id(++next_id); + VLOG(3) << "convert tablet meta tablet id = " << olap_header.tablet_id() + << " schema hash = " << olap_header.schema_hash() << " successfully."; + return OLAP_SUCCESS; +} + +OLAPStatus OlapSnapshotConverter::convert_to_pdelta(const RowsetMetaPB& rowset_meta_pb, PDelta* delta) { + if (!rowset_meta_pb.has_start_version()) { + LOG(FATAL) << "rowset does not have start_version." + << " rowset id = " << rowset_meta_pb.rowset_id(); + } + delta->set_start_version(rowset_meta_pb.start_version()); + if (!rowset_meta_pb.has_end_version()) { + LOG(FATAL) << "rowset does not have end_version." + << " rowset id = " << rowset_meta_pb.rowset_id(); + } + delta->set_end_version(rowset_meta_pb.end_version()); + if (!rowset_meta_pb.has_version_hash()) { + LOG(FATAL) << "rowset does not have version_hash." + << " rowset id = " << rowset_meta_pb.rowset_id(); + } + delta->set_version_hash(rowset_meta_pb.version_hash()); + if (!rowset_meta_pb.has_creation_time()) { + LOG(FATAL) << "rowset does not have creation_time." + << " rowset id = " << rowset_meta_pb.rowset_id(); + } + delta->set_creation_time(rowset_meta_pb.creation_time()); + AlphaRowsetExtraMetaPB extra_meta_pb = rowset_meta_pb.alpha_rowset_extra_meta_pb(); + + for (auto& segment_group : extra_meta_pb.segment_groups()) { + SegmentGroupPB* new_segment_group = delta->add_segment_group(); + *new_segment_group = segment_group; + } + if (rowset_meta_pb.has_delete_predicate()) { + DeletePredicatePB* delete_condition = delta->mutable_delete_condition(); + *delete_condition = rowset_meta_pb.delete_predicate(); + } + return OLAP_SUCCESS; +} + +OLAPStatus OlapSnapshotConverter::convert_to_rowset_meta(const PDelta& delta, + int64_t rowset_id, int64_t tablet_id, int32_t schema_hash, RowsetMetaPB* rowset_meta_pb) { + rowset_meta_pb->set_rowset_id(rowset_id); + rowset_meta_pb->set_tablet_id(tablet_id); + rowset_meta_pb->set_tablet_schema_hash(schema_hash); + rowset_meta_pb->set_rowset_type(RowsetTypePB::ALPHA_ROWSET); + rowset_meta_pb->set_rowset_state(RowsetStatePB::VISIBLE); + rowset_meta_pb->set_start_version(delta.start_version()); + rowset_meta_pb->set_end_version(delta.end_version()); + rowset_meta_pb->set_version_hash(delta.version_hash()); + + bool empty = true; + int64_t num_rows = 0; + int64_t index_size = 0; + int64_t data_size = 0; + AlphaRowsetExtraMetaPB* extra_meta_pb = rowset_meta_pb->mutable_alpha_rowset_extra_meta_pb(); + for (auto& segment_group : delta.segment_group()) { + SegmentGroupPB* new_segment_group = extra_meta_pb->add_segment_groups(); + *new_segment_group = segment_group; + // if segment group does not has empty property, then it is not empty + // if segment group's empty == false, then it is not empty + if (!segment_group.has_empty() || !segment_group.empty()) { + empty = false; + } + num_rows += segment_group.num_rows(); + index_size += segment_group.index_size(); + data_size += segment_group.data_size(); + } + + rowset_meta_pb->set_empty(empty); + rowset_meta_pb->set_num_rows(num_rows); + rowset_meta_pb->set_data_disk_size(data_size); + rowset_meta_pb->set_index_disk_size(index_size); + rowset_meta_pb->set_total_disk_size(data_size + index_size); + if (delta.has_delete_condition()) { + DeletePredicatePB* delete_condition = rowset_meta_pb->mutable_delete_predicate(); + *delete_condition = delta.delete_condition(); + } + rowset_meta_pb->set_creation_time(delta.creation_time()); + LOG(INFO) << "convert visible delta start_version = " << delta.start_version() + << " end_version = " << delta.end_version() + << " version_hash = " << delta.version_hash() + << " to rowset id = " << rowset_id + << " tablet_id = " << tablet_id; + return OLAP_SUCCESS; +} + +OLAPStatus OlapSnapshotConverter::convert_to_rowset_meta(const PPendingDelta& pending_delta, + int64_t rowset_id, int64_t tablet_id, int32_t schema_hash, RowsetMetaPB* rowset_meta_pb) { + rowset_meta_pb->set_rowset_id(rowset_id); + rowset_meta_pb->set_tablet_id(tablet_id); + rowset_meta_pb->set_tablet_schema_hash(schema_hash); + rowset_meta_pb->set_rowset_type(RowsetTypePB::ALPHA_ROWSET); + rowset_meta_pb->set_rowset_state(RowsetStatePB::COMMITTED); + rowset_meta_pb->set_partition_id(pending_delta.partition_id()); + rowset_meta_pb->set_txn_id(pending_delta.transaction_id()); + rowset_meta_pb->set_creation_time(pending_delta.creation_time()); + + bool empty = true; + int64_t num_rows = 0; + int64_t index_size = 0; + int64_t data_size = 0; + AlphaRowsetExtraMetaPB* extra_meta_pb = rowset_meta_pb->mutable_alpha_rowset_extra_meta_pb(); + for (auto& pending_segment_group : pending_delta.pending_segment_group()) { + SegmentGroupPB* new_segment_group = extra_meta_pb->add_segment_groups(); + new_segment_group->set_segment_group_id(pending_segment_group.pending_segment_group_id()); + new_segment_group->set_num_segments(pending_segment_group.num_segments()); + new_segment_group->set_index_size(0); + new_segment_group->set_data_size(0); + new_segment_group->set_num_rows(0); + for (auto& pending_zone_map : pending_segment_group.zone_maps()) { + ZoneMap* zone_map = new_segment_group->add_zone_maps(); + *zone_map = pending_zone_map; + } + new_segment_group->set_empty(pending_segment_group.empty()); + PUniqueId* load_id = new_segment_group->mutable_load_id(); + *load_id = pending_segment_group.load_id(); + + if (!pending_segment_group.empty()) { + empty = false; + } + } + + rowset_meta_pb->set_empty(empty); + rowset_meta_pb->set_num_rows(num_rows); + rowset_meta_pb->set_data_disk_size(data_size); + rowset_meta_pb->set_index_disk_size(index_size); + rowset_meta_pb->set_total_disk_size(data_size + index_size); + if (pending_delta.has_delete_condition()) { + DeletePredicatePB* delete_condition = rowset_meta_pb->mutable_delete_predicate(); + *delete_condition = pending_delta.delete_condition(); + } + rowset_meta_pb->set_creation_time(pending_delta.creation_time()); + LOG(INFO) << "convert pending delta txn id = " << pending_delta.transaction_id() + << " tablet_id = " << tablet_id + << " schema_hash = " << schema_hash + << " to rowset id = " << rowset_id; + return OLAP_SUCCESS; +} + +OLAPStatus OlapSnapshotConverter::to_column_pb(const ColumnMessage& column_msg, ColumnPB* column_pb) { + if (column_msg.has_unique_id()) { + column_pb->set_unique_id(column_msg.unique_id()); + } + column_pb->set_name(column_msg.name()); + column_pb->set_type(column_msg.type()); + column_pb->set_is_key(column_msg.is_key()); + column_pb->set_aggregation(column_msg.aggregation()); + if (column_msg.has_is_allow_null()) { + column_pb->set_is_nullable(column_msg.is_allow_null()); + } + if (column_msg.has_default_value()) { + column_pb->set_default_value(column_msg.default_value()); + } + if (column_msg.has_precision()) { + column_pb->set_precision(column_msg.precision()); + } + if (column_msg.has_frac()) { + column_pb->set_frac(column_msg.frac()); + } + column_pb->set_length(column_msg.length()); + if (column_msg.has_index_length()) { + column_pb->set_index_length(column_msg.index_length()); + } + if (column_msg.has_is_bf_column()) { + column_pb->set_is_bf_column(column_msg.is_bf_column()); + } + // TODO(ygl) calculate column id from column list + // column_pb->set_referenced_column_id(column_msg.()); + + if (column_msg.has_referenced_column()) { + column_pb->set_referenced_column(column_msg.referenced_column()); + } + return OLAP_SUCCESS; +} + +OLAPStatus OlapSnapshotConverter::to_column_msg(const ColumnPB& column_pb, ColumnMessage* column_msg) { + if (!column_pb.has_name()) { + LOG(FATAL) << "column pb does not have name" + << " column id " << column_pb.unique_id(); + } + column_msg->set_name(column_pb.name()); + column_msg->set_type(column_pb.type()); + if (!column_pb.has_aggregation()) { + LOG(FATAL) << "column pb does not have aggregation" + << " column id " << column_pb.unique_id(); + } + column_msg->set_aggregation(column_pb.aggregation()); + if (!column_pb.has_length()) { + LOG(FATAL) << "column pb does not have length" + << " column id " << column_pb.unique_id(); + } + column_msg->set_length(column_pb.length()); + if (!column_pb.has_is_key()) { + LOG(FATAL) << "column pb does not have is_key" + << " column id " << column_pb.unique_id(); + } + column_msg->set_is_key(column_pb.is_key()); + if (column_pb.has_default_value()) { + column_msg->set_default_value(column_pb.default_value()); + } + if (column_pb.has_referenced_column()) { + column_msg->set_referenced_column(column_pb.referenced_column()); + } + if (column_pb.has_index_length()) { + column_msg->set_index_length(column_pb.index_length()); + } + if (column_pb.has_precision()) { + column_msg->set_precision(column_pb.precision()); + } + if (column_pb.has_frac()) { + column_msg->set_frac(column_pb.frac()); + } + if (column_pb.has_is_nullable()) { + column_msg->set_is_allow_null(column_pb.is_nullable()); + } + column_msg->set_unique_id(column_pb.unique_id()); + if (column_pb.has_is_bf_column()) { + column_msg->set_is_bf_column(column_pb.is_bf_column()); + } + column_msg->set_is_root_column(true); + return OLAP_SUCCESS; +} + +OLAPStatus OlapSnapshotConverter::to_alter_tablet_pb(const SchemaChangeStatusMessage& schema_change_msg, + AlterTabletPB* alter_tablet_pb) { + alter_tablet_pb->set_related_tablet_id(schema_change_msg.related_tablet_id()); + alter_tablet_pb->set_related_schema_hash(schema_change_msg.related_schema_hash()); + alter_tablet_pb->set_alter_type(static_cast(schema_change_msg.schema_change_type())); + if (schema_change_msg.versions_to_changed().size() == 0) { + alter_tablet_pb->set_alter_state(AlterTabletState::ALTER_FINISHED); + } else { + alter_tablet_pb->set_alter_state(AlterTabletState::ALTER_FAILED); + } + return OLAP_SUCCESS; +} + +// from olap header to tablet meta +OLAPStatus OlapSnapshotConverter::to_new_snapshot(const OLAPHeaderMessage& olap_header, const string& old_data_path_prefix, + const string& new_data_path_prefix, DataDir& data_dir, TabletMetaPB* tablet_meta_pb, + vector* pending_rowsets, bool is_startup) { + RETURN_NOT_OK(to_tablet_meta_pb(olap_header, tablet_meta_pb, pending_rowsets)); + + TabletSchema tablet_schema; + RETURN_NOT_OK(tablet_schema.init_from_pb(tablet_meta_pb->schema())); + + // convert visible pdelta file to rowsets + for (auto& visible_rowset : tablet_meta_pb->rs_metas()) { + RowsetMetaSharedPtr alpha_rowset_meta(new AlphaRowsetMeta()); + alpha_rowset_meta->init_from_pb(visible_rowset); + alpha_rowset_meta->set_tablet_uid(tablet_meta_pb->tablet_uid()); + AlphaRowset rowset(&tablet_schema, new_data_path_prefix, &data_dir, alpha_rowset_meta); + RETURN_NOT_OK(rowset.init()); + std::vector success_files; + RETURN_NOT_OK(rowset.convert_from_old_files(old_data_path_prefix, &success_files)); + _modify_old_segment_group_id(const_cast(visible_rowset)); + } + + // convert inc delta file to rowsets + for (auto& inc_rowset : tablet_meta_pb->inc_rs_metas()) { + RowsetMetaSharedPtr alpha_rowset_meta(new AlphaRowsetMeta()); + alpha_rowset_meta->init_from_pb(inc_rowset); + alpha_rowset_meta->set_tablet_uid(tablet_meta_pb->tablet_uid()); + AlphaRowset rowset(&tablet_schema, new_data_path_prefix, &data_dir, alpha_rowset_meta); + RETURN_NOT_OK(rowset.init()); + std::vector success_files; + std::string inc_data_path = old_data_path_prefix; + // in clone case: there is no incremental perfix + // in start up case: there is incremental prefix + if (is_startup) { + inc_data_path = inc_data_path + "/" + INCREMENTAL_DELTA_PREFIX; + } + RETURN_NOT_OK(rowset.convert_from_old_files(inc_data_path, &success_files)); + _modify_old_segment_group_id(const_cast(inc_rowset)); + } + + for (auto it = pending_rowsets->begin(); it != pending_rowsets->end(); ++it) { + RowsetMetaSharedPtr alpha_rowset_meta(new AlphaRowsetMeta()); + alpha_rowset_meta->init_from_pb(*it); + alpha_rowset_meta->set_tablet_uid(tablet_meta_pb->tablet_uid()); + AlphaRowset rowset(&tablet_schema, new_data_path_prefix, &data_dir, alpha_rowset_meta); + RETURN_NOT_OK(rowset.init()); + std::vector success_files; + // std::string pending_delta_path = old_data_path_prefix + PENDING_DELTA_PREFIX; + // if this is a pending segment group, rowset will add pending_delta_prefix when + // construct old file path + RETURN_NOT_OK(rowset.convert_from_old_files(old_data_path_prefix, &success_files)); + // pending delta does not have row num, index size, data size info + // should load the pending delta, get these info and reset rowset meta's row num + // data size, index size + RETURN_NOT_OK(rowset.reset_sizeinfo()); + // pending rowset not have segment group id == -1 problem, not need to modify sg id in meta + rowset.to_rowset_pb(&(*it)); + } + return OLAP_SUCCESS; +} + +// from tablet meta to olap header +OLAPStatus OlapSnapshotConverter::to_old_snapshot(const TabletMetaPB& tablet_meta_pb, string& new_data_path_prefix, + string& old_data_path_prefix, OLAPHeaderMessage* olap_header) { + RETURN_NOT_OK(to_olap_header(tablet_meta_pb, olap_header)); + + TabletSchema tablet_schema; + RETURN_NOT_OK(tablet_schema.init_from_pb(tablet_meta_pb.schema())); + + // convert visible pdelta file to rowsets + for (auto& visible_rowset : tablet_meta_pb.rs_metas()) { + RowsetMetaSharedPtr alpha_rowset_meta(new AlphaRowsetMeta()); + alpha_rowset_meta->init_from_pb(visible_rowset); + AlphaRowset rowset(&tablet_schema, new_data_path_prefix, nullptr, alpha_rowset_meta); + RETURN_NOT_OK(rowset.init()); + RETURN_NOT_OK(rowset.load()); + std::vector success_files; + RETURN_NOT_OK(rowset.convert_to_old_files(old_data_path_prefix, &success_files)); + } + + // convert inc delta file to rowsets + for (auto& inc_rowset : tablet_meta_pb.inc_rs_metas()) { + RowsetMetaSharedPtr alpha_rowset_meta(new AlphaRowsetMeta()); + alpha_rowset_meta->init_from_pb(inc_rowset); + AlphaRowset rowset(&tablet_schema, new_data_path_prefix, nullptr, alpha_rowset_meta); + RETURN_NOT_OK(rowset.init()); + RETURN_NOT_OK(rowset.load()); + std::vector success_files; + RETURN_NOT_OK(rowset.convert_to_old_files(old_data_path_prefix, &success_files)); + } + return OLAP_SUCCESS; +} + +OLAPStatus OlapSnapshotConverter::save(const string& file_path, const OLAPHeaderMessage& olap_header) { + DCHECK(!file_path.empty()); + + FileHeader file_header; + FileHandler file_handler; + + if (file_handler.open_with_mode(file_path.c_str(), + O_CREAT | O_WRONLY | O_TRUNC, S_IRUSR | S_IWUSR) != OLAP_SUCCESS) { + LOG(WARNING) << "fail to open header file. file='" << file_path; + return OLAP_ERR_IO_ERROR; + } + + try { + file_header.mutable_message()->CopyFrom(olap_header); + } catch (...) { + LOG(WARNING) << "fail to copy protocol buffer object. file='" << file_path; + return OLAP_ERR_OTHER_ERROR; + } + + if (file_header.prepare(&file_handler) != OLAP_SUCCESS + || file_header.serialize(&file_handler) != OLAP_SUCCESS) { + LOG(WARNING) << "fail to serialize to file header. file='" << file_path; + return OLAP_ERR_SERIALIZE_PROTOBUF_ERROR; + } + + return OLAP_SUCCESS; +} + +void OlapSnapshotConverter::_modify_old_segment_group_id(RowsetMetaPB& rowset_meta) { + if (!rowset_meta.has_alpha_rowset_extra_meta_pb()) { + return; + } + AlphaRowsetExtraMetaPB* alpha_rowset_extra_meta_pb = rowset_meta.mutable_alpha_rowset_extra_meta_pb(); + for (auto& segment_group_pb : alpha_rowset_extra_meta_pb->segment_groups()) { + if (segment_group_pb.segment_group_id() == -1) { + // check if segment groups size == 1 + if (alpha_rowset_extra_meta_pb->segment_groups().size() != 1) { + LOG(FATAL) << "the rowset has a segment group's id == -1 but it contains more than one segment group" + << " it should not happen"; + } + (const_cast(segment_group_pb)).set_segment_group_id(0); + } + } +} + +} diff --git a/be/src/olap/olap_snapshot_converter.h b/be/src/olap/olap_snapshot_converter.h new file mode 100644 index 00000000000000..772956af4d7396 --- /dev/null +++ b/be/src/olap/olap_snapshot_converter.h @@ -0,0 +1,86 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +#ifndef DORIS_BE_SRC_OLAP_OLAP_SNAPSHOT_CONVERTER_H +#define DORIS_BE_SRC_OLAP_OLAP_SNAPSHOT_CONVERTER_H + +#include +#include +#include + +#include "gen_cpp/olap_file.pb.h" +#include "olap/data_dir.h" +#include "olap/olap_common.h" +#include "olap/olap_define.h" +#include "olap/tablet_schema.h" +#include "olap/rowset/rowset.h" +#include "olap/rowset/rowset_meta.h" +#include "olap/delete_handler.h" + +using std::ifstream; +using std::string; +using std::vector; + +namespace doris { + + +class OlapSnapshotConverter { +public: + // convert tablet meta pb to olap header + // only consider alpha rowset not other rowsets + OLAPStatus to_olap_header(const TabletMetaPB& tablet_meta_pb, OLAPHeaderMessage* olap_header); + + // convert olap header to tablet meta pb, convert delta to rowsetmetapb + // pending delta is not in tablet meta any more, so that convert pending delta to rowset and add it to pending rowsets + // as a return value + OLAPStatus to_tablet_meta_pb(const OLAPHeaderMessage& olap_header, TabletMetaPB* tablet_meta_pb, + vector* pending_rowsets); + + OLAPStatus convert_to_pdelta(const RowsetMetaPB& rowset_meta_pb, PDelta* delta); + + OLAPStatus convert_to_rowset_meta(const PDelta& delta, int64_t rowset_id, int64_t tablet_id, + int32_t schema_hash, RowsetMetaPB* rowset_meta_pb); + + OLAPStatus convert_to_rowset_meta(const PPendingDelta& pending_delta, int64_t rowset_id, + int64_t tablet_id, int32_t schema_hash, RowsetMetaPB* rowset_meta_pb); + + OLAPStatus to_column_pb(const ColumnMessage& column_msg, ColumnPB* column_pb); + + OLAPStatus to_column_msg(const ColumnPB& column_pb, ColumnMessage* column_msg); + + // only convert schema change msg to alter tablet pb, not the other side because snapshot does not need + // schema change status while restart and upgrade need schema change status + OLAPStatus to_alter_tablet_pb(const SchemaChangeStatusMessage& schema_change_msg, AlterTabletPB* alter_tablet_pb); + + // from olap header to tablet meta + OLAPStatus to_new_snapshot(const OLAPHeaderMessage& olap_header, const string& old_data_path_prefix, + const string& new_data_path_prefix, DataDir& data_dir, TabletMetaPB* tablet_meta_pb, + vector* pending_rowsets, bool is_startup); + + // from tablet meta to olap header + OLAPStatus to_old_snapshot(const TabletMetaPB& tablet_meta_pb, string& new_data_path_prefix, + string& old_data_path_prefix, OLAPHeaderMessage* olap_header); + + OLAPStatus save(const string& file_path, const OLAPHeaderMessage& olap_header); + +private: + void _modify_old_segment_group_id(RowsetMetaPB& rowset_meta); +}; + +} + +#endif // DORIS_BE_SRC_OLAP_OLAP_SNAPSHOT_CONVERTER_H diff --git a/be/src/olap/olap_table.cpp b/be/src/olap/olap_table.cpp deleted file mode 100644 index 5e8ff545158240..00000000000000 --- a/be/src/olap/olap_table.cpp +++ /dev/null @@ -1,2333 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, -// software distributed under the License is distributed on an -// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -// KIND, either express or implied. See the License for the -// specific language governing permissions and limitations -// under the License. - -#include "olap/olap_table.h" - -#include -#include -#include - -#include -#include -#include - -#include - -#include "olap/field.h" -#include "olap/column_data.h" -#include "olap/olap_common.h" -#include "olap/olap_define.h" -#include "olap/olap_engine.h" -#include "olap/olap_index.h" -#include "olap/reader.h" -#include "olap/store.h" -#include "olap/row_cursor.h" -#include "util/defer_op.h" -#include "olap/olap_header_manager.h" -#include "olap/olap_engine.h" -#include "olap/utils.h" -#include "olap/data_writer.h" - -using std::pair; -using std::map; -using std::nothrow; -using std::set; -using std::sort; -using std::string; -using std::stringstream; -using std::vector; -using boost::filesystem::path; - -namespace doris { - -OLAPTablePtr OLAPTable::create_from_header_file( - TTabletId tablet_id, TSchemaHash schema_hash, - const string& header_file, OlapStore* store) { - OLAPHeader* olap_header = NULL; - olap_header = new(nothrow) OLAPHeader(header_file); - if (olap_header == NULL) { - LOG(WARNING) << "fail to malloc OLAPHeader."; - return NULL; - } - - if (olap_header->load_and_init() != OLAP_SUCCESS) { - LOG(WARNING) << "fail to load header. header_file=" << header_file; - delete olap_header; - return NULL; - } - - // add new fields - olap_header->set_tablet_id(tablet_id); - olap_header->set_schema_hash(schema_hash); - path header_file_path(header_file); - std::string shard_path = header_file_path.parent_path().parent_path().parent_path().string(); - std::string shard_str = shard_path.substr(shard_path.find_last_of('/') + 1); - uint64_t shard = stol(shard_str); - olap_header->set_shard(shard); - - // save header info to kv db - // header key format: tablet_id + "_" + schema_hash - OLAPStatus s = OlapHeaderManager::save(store, tablet_id, schema_hash, olap_header); - if (s != OLAP_SUCCESS) { - OLAP_LOG_WARNING("fail to save header to db. [header_file=%s]", header_file.c_str()); - delete olap_header; - return NULL; - } - return create_from_header(olap_header, store); -} - -OLAPTablePtr OLAPTable::create_from_header_file_for_check( - TTabletId tablet_id, TSchemaHash schema_hash, const string& header_file) { - OLAPHeader* olap_header = NULL; - - olap_header = new(nothrow) OLAPHeader(header_file); - if (olap_header == NULL) { - OLAP_LOG_WARNING("fail to malloc OLAPHeader."); - return NULL; - } - - if (olap_header->load_for_check() != OLAP_SUCCESS) { - OLAP_LOG_WARNING("fail to load header. [header_file=%s]", header_file.c_str()); - delete olap_header; - return NULL; - } - - OLAPTablePtr olap_table = std::make_shared(olap_header); - if (olap_table == NULL) { - OLAP_LOG_WARNING("fail to validate table. [header_file=%s]", header_file.c_str()); - delete olap_header; - return NULL; - } - olap_table->_tablet_id = tablet_id; - olap_table->_schema_hash = schema_hash; - olap_table->_full_name = std::to_string(tablet_id) + "." + std::to_string(schema_hash); - return olap_table; -} - -OLAPTable::OLAPTable(OLAPHeader* header) - : _header(header) { - if (header->has_tablet_id()) { - _tablet_id = header->tablet_id(); - _schema_hash = header->schema_hash(); - _full_name = std::to_string(header->tablet_id()) + "." + std::to_string(header->schema_hash()); - } - _table_for_check = true; -} - -OLAPTablePtr OLAPTable::create_from_header( - OLAPHeader* header, - OlapStore* store) { - auto olap_table = std::make_shared(header, store); - if (olap_table == NULL) { - LOG(WARNING) << "fail to malloc a table."; - return nullptr; - } - - return olap_table; -} - -OLAPTable::OLAPTable(OLAPHeader* header, OlapStore* store) : - _header(header), - _is_dropped(false), - _num_fields(0), - _num_null_fields(0), - _num_key_fields(0), - _id(0), - _store(store), - _is_loaded(false), - _is_bad(false), - _last_compaction_failure_time(0) { - if (header == NULL) { - return; // for convenience of mock test. - } - - for (int i = 0; i < header->column_size(); i++) { - FieldInfo field_info; - field_info.name = header->column(i).name(); - field_info.type = FieldInfo::get_field_type_by_string(header->column(i).type()); - field_info.aggregation = FieldInfo::get_aggregation_type_by_string( - header->column(i).aggregation()); - field_info.length = header->column(i).length(); - field_info.is_key = header->column(i).is_key(); - - if (header->column(i).has_default_value()) { - field_info.has_default_value = true; - field_info.set_default_value(header->column(i).default_value().c_str()); - } else { - field_info.has_default_value = false; - } - - if (header->column(i).has_referenced_column()) { - field_info.has_referenced_column = true; - field_info.referenced_column = header->column(i).referenced_column(); - } else { - field_info.has_referenced_column = false; - } - - if (header->column(i).has_index_length() || header->column(i).index_length() != 0) { - field_info.index_length = header->column(i).index_length(); - } else { - field_info.index_length = field_info.length; - } - - if (header->column(i).has_precision()) { - field_info.precision = header->column(i).precision(); - } - - if (header->column(i).has_frac()) { - field_info.frac = header->column(i).frac(); - } - - if (header->column(i).has_unique_id()) { - field_info.unique_id = header->column(i).unique_id(); - } else { - // 该表不支持unique id, 分配一个unique id - field_info.unique_id = static_cast(i); - } - - for (int j = 0; i < header->column(i).sub_column_size(); j++) { - field_info.sub_columns.push_back(header->column(i).sub_column(j)); - } - - field_info.is_root_column = header->column(i).is_root_column(); - if (header->column(i).has_is_allow_null()) { - field_info.is_allow_null = header->column(i).is_allow_null(); - } else { - field_info.is_allow_null = false; - } - - field_info.is_bf_column = header->column(i).is_bf_column(); - - _tablet_schema.push_back(field_info); - // field name --> field position in full row. - _field_index_map[field_info.name] = i; - _field_sizes.push_back(field_info.length); - _num_fields++; - if (true == field_info.is_allow_null) { - _num_null_fields++; - } - - if (field_info.is_key) { - _num_key_fields++; - } - } - - _num_rows_per_row_block = header->num_rows_per_data_block(); - _compress_kind = header->compress_kind(); - std::stringstream tablet_path_stream; - _tablet_id = header->tablet_id(); - _schema_hash = header->schema_hash(); - tablet_path_stream << store->path() << DATA_PREFIX << "/" << header->shard(); - tablet_path_stream << "/" << _tablet_id << "/" << _schema_hash; - _tablet_path = tablet_path_stream.str(); - _storage_root_path = store->path(); - _full_name = std::to_string(header->tablet_id()) + "." + std::to_string(header->schema_hash()); - _table_for_check = false; -} - -OLAPTable::~OLAPTable() { - if (_table_for_check) { - return; - } - - if (_header == NULL) { - return; // for convenience of mock test. - } - - // ensure that there is nobody using OLAPTable, like acquiring OLAPData(SegmentGroup) - obtain_header_wrlock(); - for (auto& it : _data_sources) { - for (SegmentGroup* segment_group : it.second) { - SAFE_DELETE(segment_group); - } - } - _data_sources.clear(); - - // clear the transactions in memory - for (auto& it : _pending_data_sources) { - // false means can't remove the transaction from header, also prevent the loading of tablet - for (SegmentGroup* segment_group : it.second) { - OLAPEngine::get_instance()->delete_transaction( - segment_group->partition_id(), segment_group->transaction_id(), - _tablet_id, _schema_hash, false); - SAFE_DELETE(segment_group); - } - } - _pending_data_sources.clear(); - release_header_lock(); - - SAFE_DELETE(_header); - - // 移动数据目录 - if (_is_dropped) { - LOG(INFO) << "drop table:" << full_name() << ", tablet path:" << _tablet_path; - path table_path(_tablet_path); - std::string header_path = _tablet_path + "/" + std::to_string(_tablet_id) + ".hdr"; - OLAPStatus s = OlapHeaderManager::dump_header(_store, _tablet_id, _schema_hash, header_path); - LOG(INFO) << "dump header to path:" << header_path << ", status:" << s; - LOG(INFO) << "start to remove tablet header:" << full_name(); - s = OlapHeaderManager::remove(_store, _tablet_id, _schema_hash); - LOG(INFO) << "finish remove tablet header:" << full_name() << ", res:" << s; - if (move_to_trash(table_path, table_path) != OLAP_SUCCESS) { - LOG(WARNING) << "fail to delete table. [table_path=" << _tablet_path << "]"; - } - LOG(INFO) << "finish drop table:" << full_name(); - } -} - -OLAPStatus OLAPTable::load() { - OLAPStatus res = OLAP_SUCCESS; - MutexLock l(&_load_lock); - - string one_schema_root = _tablet_path; - set files; - set index_files; - set data_files; - - if (_is_loaded) { - goto EXIT; - } - - res = dir_walk(one_schema_root, NULL, &files); - // Disk Failure will triggered delete file in disk. - // IOError will drop object. File only deleted upon restart. - // TODO. Tablet should has a state to report to FE, delete tablet - // request will get from FE. - if (res == OLAP_ERR_DISK_FAILURE) { - LOG(WARNING) << "fail to walk schema root dir." - << "res=" << res << ", root=" << one_schema_root; - goto EXIT; - } else if (res != OLAP_SUCCESS) { - // OLAPEngine::get_instance()->drop_table(tablet_id(), schema_hash(), true); - goto EXIT; - } - res = load_indices(); - - if (res != OLAP_SUCCESS) { - if (config::auto_recover_index_loading_failure) { - LOG(WARNING) << "fail to load indices. [res=" << res << " table='" << _full_name << "']"; - } else { - // fatal log will let BE process exit - LOG(FATAL) << "fail to load indices. [res=" << res << " table='" << _full_name << "']"; - } - goto EXIT; - } - - // delete unused files - obtain_header_rdlock(); - list_index_files(&index_files); - list_data_files(&data_files); - if (remove_unused_files(one_schema_root, - files, - "", - index_files, - data_files) != OLAP_SUCCESS) { - LOG(WARNING) << "fail to remove unused files. [root='" << one_schema_root << "']"; - } - release_header_lock(); - -EXIT: - // always set _is_loaded to true, so that this tablet will be not loaded again - _is_loaded = true; - - if (res != OLAP_SUCCESS) { - _is_bad = true; - // Do not drop table directly here, FE will get the report and handle it. - // OLAPEngine::get_instance()->drop_table(tablet_id(), schema_hash()); - } - - return res; -} - -OLAPStatus OLAPTable::load_indices() { - OLAPStatus res = OLAP_SUCCESS; - ReadLock rdlock(&_header_lock); - OLAPHeader* header = _header; - VLOG(3) << "begin to load indices. table=" << full_name() << ", " - << "version_size=" << header->file_delta_size(); - - for (int delta_id = 0; delta_id < header->delta_size(); ++delta_id) { - const PDelta& delta = header->delta(delta_id); - Version version; - version.first = delta.start_version(); - version.second = delta.end_version(); - for (int j = 0; j < delta.segment_group_size(); ++j) { - const PSegmentGroup& psegment_group = delta.segment_group(j); - SegmentGroup* segment_group = new SegmentGroup(this, version, delta.version_hash(), - false, psegment_group.segment_group_id(), psegment_group.num_segments()); - if (segment_group == nullptr) { - LOG(WARNING) << "fail to create olap segment_group. [version='" << version.first - << "-" << version.second << "' table='" << full_name() << "']"; - return OLAP_ERR_MALLOC_ERROR; - } - - if (psegment_group.has_empty()) { - segment_group->set_empty(psegment_group.empty()); - } - // 在校验和加载索引前把segment_group放到data-source,以防止加载索引失败造成内存泄露 - _data_sources[version].push_back(segment_group); - - // 判断segment_group是否正常, 在所有版本的都检查完成之后才加载所有版本的segment_group - if (segment_group->validate() != OLAP_SUCCESS) { - OLAP_LOG_WARNING("fail to validate segment_group. [version='%d-%d' version_hash=%ld]", - version.first, - version.second, - header->delta(delta_id).version_hash()); - // 现在只要一个segment_group没有被正确加载,整个table加载失败 - return OLAP_ERR_TABLE_INDEX_VALIDATE_ERROR; - } - - if (psegment_group.column_pruning_size() != 0) { - size_t column_pruning_size = psegment_group.column_pruning_size(); - if (_num_key_fields != column_pruning_size) { - LOG(ERROR) << "column pruning size is error." - << "column_pruning_size=" << column_pruning_size << ", " - << "num_key_fields=" << _num_key_fields; - return OLAP_ERR_TABLE_INDEX_VALIDATE_ERROR; - } - std::vector > \ - column_statistic_strings(_num_key_fields); - std::vector null_vec(_num_key_fields); - for (size_t j = 0; j < _num_key_fields; ++j) { - ColumnPruning column_pruning = psegment_group.column_pruning(j); - column_statistic_strings[j].first = column_pruning.min(); - column_statistic_strings[j].second = column_pruning.max(); - if (column_pruning.has_null_flag()) { - null_vec[j] = column_pruning.null_flag(); - } else { - null_vec[j] = false; - } - } - RETURN_NOT_OK(segment_group->add_column_statistics(column_statistic_strings, null_vec)); - } - } - } - - for (version_olap_index_map_t::const_iterator it = _data_sources.begin(); - it != _data_sources.end(); ++it) { - Version version = it->first; - for (SegmentGroup* segment_group : it->second) { - if ((res = segment_group->load()) != OLAP_SUCCESS) { - LOG(WARNING) << "fail to load segment_group. version=" << version.first << "-" << version.second << ", " - << "version_hash=" << segment_group->version_hash(); - // 现在只要一个segment_group没有被正确加载,整个table加载失败 - return res; - } - - VLOG(3) << "load SegmentGroup success. table=" << full_name() << ", " - << "version=" << version.first << "-" << version.second << ", " - << "version_hash=" << segment_group->version_hash() << ", " - << "num_segments=" << segment_group->num_segments(); - } - } - - return OLAP_SUCCESS; -} - -OLAPStatus OLAPTable::save_header() { - OLAPStatus res = OlapHeaderManager::save(_store, _tablet_id, _schema_hash, _header); - if (res != OLAP_SUCCESS) { - LOG(WARNING) << "fail to save header. [res=" << res << " root=" << _storage_root_path << "]"; - } - - return res; -} - -OLAPStatus OLAPTable::select_versions_to_span( const Version& version, - vector* span_versions) const { - OLAPStatus res = _header->select_versions_to_span(version, span_versions); - if (res != OLAP_SUCCESS) { - LOG(WARNING) << "fail to generate shortest version path. [version='" << version.first - << "-" << version.second << "' table='" << full_name() << "']"; - } - return res; -} - -void OLAPTable::acquire_data_sources(const Version& version, vector* sources) const { - vector span_versions; - - if (_header->select_versions_to_span(version, &span_versions) != OLAP_SUCCESS) { - LOG(WARNING) << "fail to generate shortest version path. [version='" << version.first - << "-" << version.second << "' table='" << full_name() << "']"; - return; - } - - acquire_data_sources_by_versions(span_versions, sources); - return; -} - -void OLAPTable::acquire_data_sources_by_versions(const vector& version_list, - vector* sources) const { - if (sources == NULL) { - LOG(WARNING) << "output parameter for data sources is null. table=" << full_name(); - return; - } - - // first clear the output vector, please do not put any OLAPData - // into this vector, it may be cause memory leak. - sources->clear(); - - for (vector::const_iterator it1 = version_list.begin(); - it1 != version_list.end(); ++it1) { - version_olap_index_map_t::const_iterator it2 = _data_sources.find(*it1); - if (it2 == _data_sources.end()) { - LOG(WARNING) << "fail to find SegmentGroup for version. [version='" << it1->first - << "-" << it1->second << "' table='" << full_name() << "']"; - release_data_sources(sources); - return; - } - - for (SegmentGroup* segment_group : it2->second) { - ColumnData* olap_data = ColumnData::create(segment_group); - if (olap_data == NULL) { - LOG(WARNING) << "fail to malloc Data. [version='" << it1->first - << "-" << it1->second << "' table='" << full_name() << "']"; - release_data_sources(sources); - return; - } - - sources->push_back(olap_data); - - if (olap_data->init() != OLAP_SUCCESS) { - LOG(WARNING) << "fail to initial olap data. [version='" << it1->first - << "-" << it1->second << "' table='" << full_name() << "']"; - release_data_sources(sources); - return; - } - } - } -} - -OLAPStatus OLAPTable::release_data_sources(vector* data_sources) const { - if (data_sources == NULL) { - LOG(WARNING) << "parameter data_sources is null. [table='" << full_name() << "']"; - return OLAP_ERR_INPUT_PARAMETER_ERROR; - } - - for (auto data : *data_sources) { - delete data; - } - - // clear data_sources vector - data_sources->clear(); - return OLAP_SUCCESS; -} - -OLAPStatus OLAPTable::register_data_source(const std::vector& index_vec) { - OLAPStatus res = OLAP_SUCCESS; - - if (index_vec.empty()) { - LOG(WARNING) << "parameter segment_group is null." - << "table=" << full_name(); - return OLAP_ERR_INPUT_PARAMETER_ERROR; - } - - for (SegmentGroup* segment_group : index_vec) { - Version version = segment_group->version(); - const std::vector* column_statistics = nullptr; - if (segment_group->has_column_statistics()) { - column_statistics = &segment_group->get_column_statistics(); - } - res = _header->add_version(version, segment_group->version_hash(), segment_group->segment_group_id(), - segment_group->num_segments(), segment_group->index_size(), segment_group->data_size(), - segment_group->num_rows(), segment_group->empty(), column_statistics); - if (res != OLAP_SUCCESS) { - LOG(WARNING) << "fail to add version to olap header. table=" << full_name() << ", " - << "version=" << version.first << "-" << version.second; - return res; - } - - // put the new segment_group into _data_sources. - // 由于对header的操作可能失败,因此对_data_sources要放在这里 - _data_sources[version].push_back(segment_group); - VLOG(3) << "succeed to register data source. table=" << full_name() << ", " - << "version=" << version.first << "-" << version.second << ", " - << "version_hash=" << segment_group->version_hash() << ", " - << "segment_group_id=" << segment_group->segment_group_id() << ", " - << "num_segments=" << segment_group->num_segments(); - } - - return OLAP_SUCCESS; -} - -OLAPStatus OLAPTable::unregister_data_source(const Version& version, std::vector* segment_group_vec) { - OLAPStatus res = OLAP_SUCCESS; - version_olap_index_map_t::iterator it = _data_sources.find(version); - if (it == _data_sources.end()) { - LOG(WARNING) << "olap segment_group for version does not exists. [version='" << version.first - << "-" << version.second << "' table='" << full_name() << "']"; - return OLAP_ERR_VERSION_NOT_EXIST; - } - - // delete a reference to the data source in the header file - if ((res = _header->delete_version(version)) != OLAP_SUCCESS) { - LOG(WARNING) << "fail to delete version from olap header. [version='" << version.first - << "-" << version.second << "' table='" << full_name() << "']"; - return res; - } - - *segment_group_vec = it->second; - _data_sources.erase(it); - return OLAP_SUCCESS; -} - -OLAPStatus OLAPTable::add_pending_version(int64_t partition_id, int64_t transaction_id, - const std::vector* delete_conditions) { - WriteLock wrlock(&_header_lock); - OLAPStatus res = _header->add_pending_version(partition_id, transaction_id, delete_conditions); - if (res != OLAP_SUCCESS) { - LOG(WARNING) << "fail to add pending delta to header." - << "table=" << full_name() << ", " - << "transaction_id=" << transaction_id; - return res; - } - res = save_header(); - if (res != OLAP_SUCCESS) { - _header->delete_pending_delta(transaction_id); - LOG(FATAL) << "fail to save header when add pending segment_group. [table=" << full_name() - << " transaction_id=" << transaction_id << "]"; - return res; - } - return OLAP_SUCCESS; -} - -OLAPStatus OLAPTable::add_pending_segment_group(SegmentGroup* segment_group) { - if (segment_group == nullptr) { - LOG(WARNING) << "parameter segment_group is null. [table=" << full_name() << "]"; - return OLAP_ERR_INPUT_PARAMETER_ERROR; - } - - int64_t transaction_id = segment_group->transaction_id(); - obtain_header_wrlock(); - OLAPStatus res = OLAP_SUCCESS; - - // add to header - const std::vector* column_statistics = nullptr; - if (segment_group->has_column_statistics()) { - column_statistics = &(segment_group->get_column_statistics()); - } - res = _header->add_pending_segment_group(transaction_id, segment_group->num_segments(), - segment_group->segment_group_id(), segment_group->load_id(), - segment_group->empty(), column_statistics); - if (res != OLAP_SUCCESS) { - LOG(WARNING) << "fail to add pending segment_group to header. [table=" << full_name() - << " transaction_id=" << transaction_id << "]"; - release_header_lock(); - return res; - } - - // save header - res = save_header(); - if (res != OLAP_SUCCESS) { - _header->delete_pending_delta(transaction_id); - LOG(FATAL) << "fail to save header when add pending segment_group. [table=" << full_name() - << " transaction_id=" << transaction_id << "]"; - release_header_lock(); - return res; - } - - // add to data sources - _pending_data_sources[transaction_id].push_back(segment_group); - release_header_lock(); - VLOG(3) << "add pending data to tablet successfully." - << "table=" << full_name() << ", transaction_id=" << transaction_id; - - return res; -} - -int32_t OLAPTable::current_pending_segment_group_id(int64_t transaction_id) { - ReadLock rdlock(&_header_lock); - int32_t segment_group_id = -1; - if (_pending_data_sources.find(transaction_id) != _pending_data_sources.end()) { - for (SegmentGroup* segment_group : _pending_data_sources[transaction_id]) { - if (segment_group->segment_group_id() > segment_group_id) { - segment_group_id = segment_group->segment_group_id(); - } - } - } - return segment_group_id; -} - -OLAPStatus OLAPTable::add_pending_data(SegmentGroup* segment_group, const std::vector* delete_conditions) { - if (segment_group == nullptr) { - LOG(WARNING) << "parameter segment_group is null. table=" << full_name(); - return OLAP_ERR_INPUT_PARAMETER_ERROR; - } - - obtain_header_wrlock(); - int64_t transaction_id = segment_group->transaction_id(); - if (_pending_data_sources.find(transaction_id) != _pending_data_sources.end()) { - LOG(WARNING) << "find pending data existed when add to tablet. [table=" << full_name() - << " transaction_id=" << transaction_id << "]"; - release_header_lock(); - return OLAP_ERR_PUSH_TRANSACTION_ALREADY_EXIST; - } - OLAPStatus res = OLAP_SUCCESS; - - // if push for delete, construct sub conditions - vector condition_strs; - if (delete_conditions != nullptr) { - DeleteConditionHandler del_cond_handler; - for (const TCondition& condition : *delete_conditions) { - condition_strs.push_back(del_cond_handler.construct_sub_conditions(condition)); - } - } - - if (!condition_strs.empty()) { - res = _header->add_pending_version(segment_group->partition_id(), transaction_id, &condition_strs); - } else { - res = _header->add_pending_version(segment_group->partition_id(), transaction_id, nullptr); - } - if (res != OLAP_SUCCESS) { - LOG(WARNING) << "fail to add pending delta to header." - << "table=" << full_name() << ", " - << "transaction_id=" << transaction_id; - release_header_lock(); - return res; - } - - // add to header - const std::vector* column_statistics = nullptr; - if (segment_group->has_column_statistics()) { - column_statistics = &(segment_group->get_column_statistics()); - } - res = _header->add_pending_segment_group(transaction_id, segment_group->num_segments(), - segment_group->segment_group_id(), segment_group->load_id(), - segment_group->empty(), column_statistics); - if (res != OLAP_SUCCESS) { - LOG(WARNING) << "fail to add pending segment_group to header. [table=" << full_name() - << " transaction_id=" << transaction_id << "]"; - release_header_lock(); - return res; - } - - // save header - res = save_header(); - if (res != OLAP_SUCCESS) { - _header->delete_pending_delta(transaction_id); - LOG(FATAL) << "fail to save header when add pending segment_group. [table=" << full_name() - << " transaction_id=" << transaction_id << "]"; - release_header_lock(); - return res; - } - - // add to data sources - _pending_data_sources[transaction_id].push_back(segment_group); - release_header_lock(); - VLOG(3) << "add pending data to tablet successfully." - << "table=" << full_name() << ", transaction_id=" << transaction_id; - return res; - -} - -bool OLAPTable::has_pending_data(int64_t transaction_id) { - ReadLock rdlock(&_header_lock); - return _pending_data_sources.find(transaction_id) != _pending_data_sources.end(); -} - -bool OLAPTable::has_pending_data() { - ReadLock rdlock(&_header_lock); - return !_pending_data_sources.empty(); -} - -void OLAPTable::delete_pending_data(int64_t transaction_id) { - obtain_header_wrlock(); - - auto it = _pending_data_sources.find(transaction_id); - if (it == _pending_data_sources.end()) { - release_header_lock(); - return; - } - - // delete from data sources - for (SegmentGroup* segment_group : it->second) { - segment_group->release(); - OLAPEngine::get_instance()->add_unused_index(segment_group); - } - _pending_data_sources.erase(it); - - // delete from header - _header->delete_pending_delta(transaction_id); - - // save header - if (save_header() != OLAP_SUCCESS) { - LOG(FATAL) << "failed to save header when delete pending data. [table=" << full_name() - << " transaction_id=" << transaction_id << "]"; - } - - release_header_lock(); - LOG(INFO) << "delete pending data from tablet. [table=" << full_name() - << " transaction_id=" << transaction_id << "]"; - -} - -void OLAPTable::get_expire_pending_data(vector* transaction_ids) { - time_t now = time(NULL); - ReadLock rdlock(&_header_lock); - - for (auto& it : _header->pending_delta()) { - double diff = difftime(now, it.creation_time()); - if (diff >= config::pending_data_expire_time_sec) { - transaction_ids->push_back(it.transaction_id()); - VLOG(3) << "find expire pending data. table=" << full_name() << ", " - << "transaction_id=" << it.transaction_id() << " exist_sec=" << diff; - } - } -} - -void OLAPTable::load_pending_data() { - LOG(INFO) << "begin to load pending_data. table=" << full_name() << ", " - << "pending_delta size=" << _header->pending_delta_size(); - MutexLock load_lock(&_load_lock); - - // if a olap segment_group loads failed, delete it from header - std::set error_pending_data; - - for (const PPendingDelta& pending_delta : _header->pending_delta()) { - PUniqueId load_id; - if (pending_delta.pending_segment_group_size() > 0) { - load_id = pending_delta.pending_segment_group()[0].load_id(); - } else { - load_id.set_hi(0); - load_id.set_lo(0); - } - OLAPStatus add_status = OLAPEngine::get_instance()->add_transaction( - pending_delta.partition_id(), pending_delta.transaction_id(), - _tablet_id, _schema_hash, load_id); - if (add_status != OLAP_SUCCESS) { - LOG(ERROR) << "find transaction exists in engine when load pending data. tablet=" << full_name() - << ", transaction_id=" << pending_delta.transaction_id(); - error_pending_data.insert(pending_delta.transaction_id()); - continue; - } - - for (const PPendingSegmentGroup& pending_segment_group : pending_delta.pending_segment_group()) { - SegmentGroup* segment_group = new SegmentGroup(this, false, - pending_segment_group.pending_segment_group_id(), - pending_segment_group.num_segments(), true, - pending_delta.partition_id(), pending_delta.transaction_id()); - DCHECK(segment_group != nullptr); - segment_group->set_load_id(pending_segment_group.load_id()); - if (pending_segment_group.has_empty()) { - segment_group->set_empty(pending_segment_group.empty()); - } - _pending_data_sources[segment_group->transaction_id()].push_back(segment_group); - - if (segment_group->validate() != OLAP_SUCCESS) { - LOG(WARNING) << "fail to validate segment_group when load pending data." - << "table=" << full_name() << ", " - << "transaction_id=" << segment_group->transaction_id(); - error_pending_data.insert(segment_group->transaction_id()); - break; - } - - if (pending_segment_group.column_pruning_size() != 0) { - if (_num_key_fields != pending_segment_group.column_pruning_size()) { - LOG(WARNING) << "column pruning size is error when load pending data." - << "column_pruning_size=" << pending_segment_group.column_pruning_size() << ", " - << "num_key_fields=" << _num_key_fields; - error_pending_data.insert(segment_group->transaction_id()); - break; - } - std::vector> column_statistics_string(_num_key_fields); - std::vector null_vec(_num_key_fields); - for (size_t j = 0; j < _num_key_fields; ++j) { - ColumnPruning column_pruning = pending_segment_group.column_pruning(j); - column_statistics_string[j].first = column_pruning.min(); - column_statistics_string[j].second = column_pruning.max(); - if (column_pruning.has_null_flag()) { - null_vec[j] = column_pruning.null_flag(); - } else { - null_vec[j] = false; - } - } - - if (segment_group->add_column_statistics(column_statistics_string, null_vec) != OLAP_SUCCESS) { - LOG(WARNING) << "fail to set column statistics when load pending data"; - error_pending_data.insert(pending_delta.transaction_id()); - break; - } - } - - if (segment_group->load() != OLAP_SUCCESS) { - LOG(WARNING) << "fail to load segment_group when load pending data." - << "table=" << full_name() << ", transaction_id=" << pending_delta.transaction_id(); - error_pending_data.insert(pending_delta.transaction_id()); - break; - } - } - - if (error_pending_data.find(pending_delta.transaction_id()) != error_pending_data.end()) { - continue; - } - - VLOG(3) << "load pending data successfully. table=" << full_name() << ", " - << "partition_id=" << pending_delta.partition_id() << ", " - << "transaction_id=" << pending_delta.transaction_id(); - } - - LOG(INFO) << "finish to load pending data. table=" << full_name() << ", " - << "error_data_size=" << error_pending_data.size(); - - for (int64_t error_data : error_pending_data) { - delete_pending_data(error_data); - } -} - -// 1. need to replace local data if same version existed -// 2. move pending data to version data -// 3. move pending data to incremental data, it won't be merged, so we can do incremental clone -OLAPStatus OLAPTable::publish_version(int64_t transaction_id, Version version, - VersionHash version_hash) { - OLAPStatus lock_status = _migration_lock.tryrdlock(); - if (lock_status != OLAP_SUCCESS) { - return lock_status; - } else { - OLAPStatus publish_status = _publish_version(transaction_id, version, version_hash); - _migration_lock.unlock(); - return publish_status; - } -} - -OLAPStatus OLAPTable::_publish_version(int64_t transaction_id, Version version, - VersionHash version_hash) { - WriteLock wrlock(&_header_lock); - if (_pending_data_sources.find(transaction_id) == _pending_data_sources.end()) { - LOG(WARNING) << "pending data not exists in tablet, not finished or deleted." - << "table=" << full_name() << ", " - << "transaction_id=" << transaction_id; - return OLAP_ERR_TRANSACTION_NOT_EXIST; - } - RETURN_NOT_OK(_handle_existed_version(transaction_id, version, version_hash)); - std::vector index_vec; - vector linked_files; - OLAPStatus res = OLAP_SUCCESS; - for (SegmentGroup* segment_group : _pending_data_sources[transaction_id]) { - int32_t segment_group_id = segment_group->segment_group_id(); - for (int32_t seg_id = 0; seg_id < segment_group->num_segments(); ++seg_id) { - std::string pending_index_path = segment_group->construct_index_file_path(segment_group_id, seg_id); - std::string index_path = construct_index_file_path(version, version_hash, segment_group_id, seg_id); - res = _create_hard_link(pending_index_path, index_path, &linked_files); - if (res != OLAP_SUCCESS) { remove_files(linked_files); return res; } - - std::string pending_data_path = segment_group->construct_data_file_path(segment_group_id, seg_id); - std::string data_path = construct_data_file_path(version, version_hash, segment_group_id, seg_id); - res = _create_hard_link(pending_data_path, data_path, &linked_files); - if (res != OLAP_SUCCESS) { remove_files(linked_files); return res; } - } - - segment_group->publish_version(version, version_hash); - index_vec.push_back(segment_group); - } - - res = register_data_source(index_vec); - if (res != OLAP_SUCCESS) { remove_files(linked_files); return res; } - - const PPendingDelta* pending_delta = _header->get_pending_delta(transaction_id); - if (pending_delta->has_delete_condition()) { - const DeleteConditionMessage& delete_condition = pending_delta->delete_condition(); - _header->add_delete_condition(delete_condition, version.first); - } - - // add incremental version, if failed, ignore it - res = _add_incremental_data(index_vec, transaction_id, version, version_hash); - VLOG(3) << "finish to add incremental version. res=" << res << ", " - << "table=" << full_name() << ", " - << "transaction_id=" << transaction_id << ", " - << "version=" << version.first << "-" << version.second; - - // save header - res = save_header(); - if (res != OLAP_SUCCESS) { - LOG(FATAL) << "fail to save header when publish version. res=" << res << ", " - << "table=" << full_name() << ", " - << "transaction_id=" << transaction_id; - return res; - } - - _header->delete_pending_delta(transaction_id); - res = save_header(); - if (res != OLAP_SUCCESS) { - remove_files(linked_files); - LOG(FATAL) << "fail to save header when publish version. res=" << res << ", " - << "table=" << full_name() << ", " - << "transaction_id=" << transaction_id; - return res; - } - for (SegmentGroup* segment_group : _pending_data_sources[transaction_id]) { - segment_group->delete_all_files(); - segment_group->set_pending_finished(); - } - _pending_data_sources.erase(transaction_id); - - return res; -} - -// 1. if version is same and version_hash different, delete local data, save header -// 2. if version_hash is same or version is merged, publish success, delete transaction, save header -OLAPStatus OLAPTable::_handle_existed_version(int64_t transaction_id, const Version& version, - const VersionHash& version_hash) { - const PDelta* existed_delta = nullptr; - for (int i = 0; i < file_delta_size(); ++i) { - const PDelta* delta = _header->get_delta(i); - if (version.first >= delta->start_version() - && version.second <= delta->end_version()) { - existed_delta = delta; - } - - } - - if (existed_delta == nullptr) { - return OLAP_SUCCESS; - } - - OLAPStatus res = OLAP_SUCCESS; - // if version is same and version_hash different, delete local data - if (existed_delta->start_version() == version.first - && existed_delta->end_version() == version.second - && existed_delta->version_hash() != version_hash) { - LOG(INFO) << "version_hash is different when publish version, delete local data. [table=" << full_name() - << " transaction_id=" << transaction_id << "]"; - // remove delete condition if current type is PUSH_FOR_DELETE, - // this occurs when user cancel delete_data soon after submit it - bool push_for_delete = false; - res = is_push_for_delete(transaction_id, &push_for_delete); - if (res != OLAP_SUCCESS) { - return res; - } else if (!push_for_delete) { - DeleteConditionHandler del_cond_handler; - OLAPTablePtr olap_table_ptr = - OLAPEngine::get_instance()->get_table(_tablet_id, _schema_hash); - if (olap_table_ptr.get() != nullptr) { - del_cond_handler.delete_cond(olap_table_ptr, version.first, false); - } - } - // delete local data - //SegmentGroup *existed_index = NULL; - std::vector existed_index_vec; - std::vector files_to_remove; - _delete_incremental_data(version, version_hash, &files_to_remove); - res = unregister_data_source(version, &existed_index_vec); - if (res != OLAP_SUCCESS) { - LOG(WARNING) << "fail to unregister data when publish version. [table=" << full_name() - << " version=" << version.first << "-" << version.second << " res=" << res << "]"; - return res; - } - // save header - res = save_header(); - if (res != OLAP_SUCCESS) { - LOG(FATAL) << "fail to save header when unregister data. [tablet=" << full_name() - << " transaction_id=" << transaction_id << "]"; - } - remove_files(files_to_remove); - // use OLAPEngine to delete this segment_group - if (!existed_index_vec.empty()) { - OLAPEngine *unused_index = OLAPEngine::get_instance(); - for (SegmentGroup* segment_group : existed_index_vec) { - unused_index->add_unused_index(segment_group); - } - } - // if version_hash is same or version is merged, publish success - } else { - LOG(INFO) << "version_hash is same when publish version, publish success. [table=" << full_name() - << " transaction_id=" << transaction_id << "]"; - res = OLAP_ERR_PUSH_VERSION_ALREADY_EXIST; - } - return res; -} - -OLAPStatus OLAPTable::_add_incremental_data(std::vector& index_vec, int64_t transaction_id, - const Version& version, const VersionHash& version_hash) { - if (index_vec.empty()) { - LOG(WARNING) << "no parameter when add incremental data. table=" << full_name(); - return OLAP_ERR_INPUT_PARAMETER_ERROR; - } - - // create incremental segment_group's dir - std::string dir_path = construct_incremental_delta_dir_path(); - OLAPStatus res = OLAP_SUCCESS; - if (!check_dir_existed(dir_path)) { - res = create_dirs(dir_path); - if (res != OLAP_SUCCESS && !check_dir_existed(dir_path)) { - LOG(WARNING) << "fail to create segment_group dir. table=" << full_name() << ", " - << " transaction_id=" << transaction_id; - return res; - } - } - std::vector linked_files; - for (SegmentGroup* segment_group : index_vec) { - for (int32_t seg_id = 0; seg_id < segment_group->num_segments(); ++seg_id) { - int32_t segment_group_id = segment_group->segment_group_id(); - std::string index_path = segment_group->construct_index_file_path(segment_group_id, seg_id); - std::string incremental_index_path = - construct_incremental_index_file_path(version, version_hash, segment_group_id, seg_id); - res = _create_hard_link(index_path, incremental_index_path, &linked_files); - if (res != OLAP_SUCCESS) { remove_files(linked_files); return res; } - - std::string data_path = segment_group->construct_data_file_path(segment_group_id, seg_id); - std::string incremental_data_path = - construct_incremental_data_file_path(version, version_hash, segment_group_id, seg_id); - res = _create_hard_link(data_path, incremental_data_path, &linked_files); - if (res != OLAP_SUCCESS) { remove_files(linked_files); return res; } - } - - const std::vector* column_statistics = nullptr; - if (segment_group->has_column_statistics()) { - column_statistics = &(segment_group->get_column_statistics()); - } - res = _header->add_incremental_version( - segment_group->version(), segment_group->version_hash(), - segment_group->segment_group_id(), segment_group->num_segments(), - segment_group->index_size(), segment_group->data_size(), - segment_group->num_rows(), segment_group->empty(), column_statistics); - if (res != OLAP_SUCCESS) { - LOG(WARNING) << "fail to add incremental data. res=" << res << ", " - << "table=" << full_name() << ", " - << "transaction_id=" << transaction_id << ", " - << "version=" << version.first << "-" << version.second; - remove_files(linked_files); - return res; - } - } - - return res; -} - -bool OLAPTable::has_expired_incremental_data() { - bool exist = false; - time_t now = time(NULL); - ReadLock rdlock(&_header_lock); - for (auto& it : _header->incremental_delta()) { - double diff = difftime(now, it.creation_time()); - if (diff >= config::incremental_delta_expire_time_sec) { - exist = true; - break; - } - } - return exist; -} - -void OLAPTable::delete_expired_incremental_data() { - time_t now = time(NULL); - std::vector> expired_versions; - std::vector files_to_remove; - WriteLock wrlock(&_header_lock); - for (auto& it : _header->incremental_delta()) { - double diff = difftime(now, it.creation_time()); - if (diff >= config::incremental_delta_expire_time_sec) { - Version version(it.start_version(), it.end_version()); - expired_versions.push_back(std::make_pair(version, it.version_hash())); - VLOG(3) << "find expire incremental segment_group. tablet=" << full_name() << ", " - << "version=" << it.start_version() << "-" << it.end_version() << ", " - << "exist_sec=" << diff; - } - } - - if (expired_versions.empty()) { return; } - - for (auto& it : expired_versions) { - _delete_incremental_data(it.first, it.second, &files_to_remove); - } - - if (save_header() != OLAP_SUCCESS) { - LOG(FATAL) << "fail to save header when delete expire incremental data." - << "tablet=" << full_name(); - } - remove_files(files_to_remove); -} - -void OLAPTable::_delete_incremental_data(const Version& version, - const VersionHash& version_hash, - vector* files_to_remove) { - const PDelta* incremental_delta = get_incremental_delta(version); - if (incremental_delta == nullptr) { return; } - - vector files_to_delete; - for (const PSegmentGroup& psegment_group : incremental_delta->segment_group()) { - int32_t segment_group_id = psegment_group.segment_group_id(); - for (int seg_id = 0; seg_id < psegment_group.num_segments(); seg_id++) { - std::string incremental_index_path = - construct_incremental_index_file_path(version, version_hash, segment_group_id, seg_id); - files_to_remove->emplace_back(incremental_index_path); - - std::string incremental_data_path = - construct_incremental_data_file_path(version, version_hash, segment_group_id, seg_id); - files_to_remove->emplace_back(incremental_data_path); - } - } - - _header->delete_incremental_delta(version); - VLOG(3) << "delete incremental data. table=" << full_name() << ", " - << "version=" << version.first << "-" << version.second; -} - -void OLAPTable::get_missing_versions_with_header_locked( - int64_t until_version, std::vector* missing_versions) const { - DCHECK(until_version > 0) << "invalid until_version: " << until_version; - std::list existing_versions; - for (int i = 0; i < _header->file_delta_size(); ++i) { - const PDelta* delta = _header->get_delta(i); - existing_versions.emplace_back(delta->start_version(), delta->end_version()); - } - - // sort the existing versions in ascending order - existing_versions.sort([](const Version& a, const Version& b) { - // simple because 2 versions are certainly not overlapping - return a.first < b.first; - }); - - // find the missing version until until_version - int64_t last_version = -1; - for (const Version& version : existing_versions) { - if (version.first > last_version + 1) { - for (int64_t i = last_version + 1; i < version.first; ++i) { - missing_versions->emplace_back(i, i); - } - } - last_version = version.second; - if (until_version <= last_version) { - break; - } - } - for (int64_t i = last_version + 1; i <= until_version; ++i) { - missing_versions->emplace_back(i, i); - } -} - -const PDelta* OLAPTable::least_complete_version( - const vector& missing_versions) const { - - const PDelta* least_delta = nullptr; - if (!missing_versions.empty()) { - Version version = missing_versions.front(); - for (int i = 0; i < _header->file_delta_size(); ++i) { - const PDelta* delta = _header->get_delta(i); - if (delta->end_version() == version.first - 1) { - LOG(INFO) << "find least complete version. table=" << full_name() << ", " - << "version=" << delta->start_version() << "-" << delta->end_version() << ", " - << "version_hash=" << delta->version_hash() << ", " - << "first_missing_version=" << version.first << "-" << version.second; - least_delta = delta; - break; - } - } - } else { - least_delta = lastest_version(); - } - - return least_delta; -} - -OLAPStatus OLAPTable::is_push_for_delete( - int64_t transaction_id, bool* is_push_for_delete) const { - - const PPendingDelta* pending_delta = _header->get_pending_delta(transaction_id); - if (pending_delta == nullptr) { - LOG(WARNING) << "pending segment_group not found when check push for delete. [table=" << full_name() - << " transaction_id=" << transaction_id << "]"; - return OLAP_ERR_TRANSACTION_NOT_EXIST; - } - *is_push_for_delete = pending_delta->has_delete_condition(); - return OLAP_SUCCESS; -} - -SegmentGroup* OLAPTable::_construct_segment_group_from_version(const PDelta* delta, int32_t segment_group_id) { - VLOG(3) << "begin to construct segment_group from version." - << "table=" << full_name() << ", " - << "version=" << delta->start_version() << "-" << delta->end_version() << ", " - << "version_hash=" << delta->version_hash(); - Version version(delta->start_version(), delta->end_version()); - const PSegmentGroup* psegment_group = nullptr; - if (segment_group_id == -1) { - // Previous FileVersionMessage will be convert to PDelta and PSegmentGroup. - // In PSegmentGroup, this is segment_group_id is set to minus one. - // When to get it, should used segment_group + 1 as index. - psegment_group = &(delta->segment_group().Get(segment_group_id + 1)); - } else { - psegment_group = &(delta->segment_group().Get(segment_group_id)); - } - SegmentGroup* segment_group = new SegmentGroup(this, version, delta->version_hash(), - false, segment_group_id, psegment_group->num_segments()); - if (psegment_group->has_empty()) { - segment_group->set_empty(psegment_group->empty()); - } - DCHECK(segment_group != nullptr) << "malloc error when construct segment_group." - << "table=" << full_name() << ", " - << "version=" << version.first << "-" << version.second << ", " - << "version_hash=" << delta->version_hash(); - OLAPStatus res = segment_group->validate(); - if (res != OLAP_SUCCESS) { - SAFE_DELETE(segment_group); - return nullptr; - } - - if (psegment_group->column_pruning_size() != 0) { - if (_num_key_fields != psegment_group->column_pruning_size()) { - LOG(WARNING) << "column pruning size error, " << "table=" << full_name() << ", " - << "version=" << version.first << "-" << version.second << ", " - << "version_hash=" << delta->version_hash() << ", " - << "column_pruning_size=" << psegment_group->column_pruning_size() << ", " - << "num_key_fields=" << _num_key_fields; - SAFE_DELETE(segment_group); - return nullptr; - } - vector> column_statistic_strings(_num_key_fields); - std::vector null_vec(_num_key_fields); - for (size_t j = 0; j < _num_key_fields; ++j) { - ColumnPruning column_pruning = psegment_group->column_pruning(j); - column_statistic_strings[j].first = column_pruning.min(); - column_statistic_strings[j].second = column_pruning.max(); - if (column_pruning.has_null_flag()) { - null_vec[j] = column_pruning.null_flag(); - } else { - null_vec[j] = false; - } - } - - res = segment_group->add_column_statistics(column_statistic_strings, null_vec); - if (res != OLAP_SUCCESS) { - SAFE_DELETE(segment_group); - return nullptr; - } - } - - res = segment_group->load(); - if (res != OLAP_SUCCESS) { - LOG(WARNING) << "fail to load segment_group. res=" << res << ", " - << "table=" << full_name() << ", " - << "version=" << version.first << "-" << version.second << ", " - << "version_hash=" << delta->version_hash(); - SAFE_DELETE(segment_group); - return nullptr; - } - - VLOG(3) << "finish to construct segment_group from version." - << "table=" << full_name() << ", " - << "version=" << version.first << "-" << version.second; - return segment_group; -} - -OLAPStatus OLAPTable::_create_hard_link(const string& from, const string& to, - vector* linked_success_files) { - if (link(from.c_str(), to.c_str()) != 0) { - LOG(WARNING) << "fail to create hard link. from=" << from << ", " - << "to=" << to << ", " << "errno=" << Errno::no(); - return OLAP_ERR_OS_ERROR; - } - linked_success_files->push_back(to); - VLOG(3) << "success to create hard link. [from=" << from << " to=" << to << "]"; - return OLAP_SUCCESS; -} - -OLAPStatus OLAPTable::clone_data(const OLAPHeader& clone_header, - const vector& clone_deltas, - const vector& versions_to_delete) { - LOG(INFO) << "begin to clone data to tablet. table=" << full_name() << ", " - << "clone_versions_size=" << clone_deltas.size() << ", " - << "versions_to_delete_size=" << versions_to_delete.size(); - OLAPStatus res = OLAP_SUCCESS; - version_olap_index_map_t tmp_data_sources; - - do { - // load new local header to operate on - OLAPHeader new_local_header; - OlapHeaderManager::get_header(_store, _tablet_id, _schema_hash, &new_local_header); - - // delete versions from new local header - for (const Version& version : versions_to_delete) { - res = new_local_header.delete_version(version); - if (res != OLAP_SUCCESS) { - LOG(WARNING) << "failed to delete version from new local header. [table=" << full_name() - << " version=" << version.first << "-" << version.second << "]"; - break; - } - if (new_local_header.is_delete_data_version(version)) { - new_local_header.delete_cond_by_version(version); - } - LOG(INFO) << "delete version from new local header when clone. [table='" << full_name() - << "', version=" << version.first << "-" << version.second << "]"; - } - - if (res != OLAP_SUCCESS) { - break; - } - - for (const PDelta* clone_delta : clone_deltas) { - Version version(clone_delta->start_version(), - clone_delta->end_version()); - - // construct new segment_group - for (const PSegmentGroup& psegment_group : clone_delta->segment_group()) { - SegmentGroup* tmp_segment_group = _construct_segment_group_from_version(clone_delta, psegment_group.segment_group_id()); - if (tmp_segment_group == NULL) { - LOG(WARNING) << "fail to construct segment_group when clone data. table=" << full_name() << ", " - << "version=" << version.first << "-" << version.second << ", " - << "version_hash=" << clone_delta->version_hash(); - res = OLAP_ERR_INDEX_LOAD_ERROR; - break; - } - - tmp_data_sources[version].push_back(tmp_segment_group); - - // add version to new local header - const std::vector* column_statistics = nullptr; - if (tmp_segment_group->has_column_statistics()) { - column_statistics = &(tmp_segment_group->get_column_statistics()); - } - res = new_local_header.add_version(version, tmp_segment_group->version_hash(), - tmp_segment_group->segment_group_id(), - tmp_segment_group->num_segments(), - tmp_segment_group->index_size(), - tmp_segment_group->data_size(), - tmp_segment_group->num_rows(), - tmp_segment_group->empty(), - column_statistics); - if (res != OLAP_SUCCESS) { - LOG(WARNING) << "fail to add version to new local header when clone." - << "res=" << res << ", " - << "table=" << full_name() << ", " - << "version=" << version.first << "-" << version.second << ", " - << "version_hash=" << clone_delta->version_hash(); - break; - } - } - - if (res != OLAP_SUCCESS) { break; } - - // add delete conditions to new local header, if it exists in clone_header - if (version.first == version.second) { - for (google::protobuf::RepeatedPtrField::const_iterator it - = clone_header.delete_data_conditions().begin(); - it != clone_header.delete_data_conditions().end(); ++it) { - if (it->version() == version.first) { - // add it - new_local_header.add_delete_condition(*it, version.first); - LOG(INFO) << "add delete condition when clone. [table=" << full_name() - << " version=" << it->version() << "]"; - break; - } - } - } - } - - if (res != OLAP_SUCCESS) { - break; - } - VLOG(3) << "load indices successfully when clone. table=" << full_name() << ", " - << "add_versions_size=" << clone_deltas.size() << ", " - << "new_indices_size=" << tmp_data_sources.size(); - // save and reload header - res = OlapHeaderManager::save(_store, _tablet_id, _schema_hash, &new_local_header); - if (res != OLAP_SUCCESS) { - LOG(WARNING) << "failed to save new local header when clone. res:" << res; - break; - } - res = OlapHeaderManager::get_header(_store, _tablet_id, _schema_hash, _header); - if (res != OLAP_SUCCESS) { - LOG(WARNING) << "failed to reload original header when clone. [table=" << full_name() - << " res=" << res << "]"; - break; - } - - } while (0); - - // if success, update local data sources - if (res == OLAP_SUCCESS) { - - // delete local data source - for (const Version& version_to_delete : versions_to_delete) { - version_olap_index_map_t::iterator it = _data_sources.find(version_to_delete); - if (it != _data_sources.end()) { - std::vector index_to_delete_vec = it->second; - _data_sources.erase(it); - OLAPEngine* unused_index = OLAPEngine::get_instance(); - for (SegmentGroup* segment_group : index_to_delete_vec) { - unused_index->add_unused_index(segment_group); - } - } - } - - // add new data source - for (auto& it : tmp_data_sources) { - for (SegmentGroup* segment_group : it.second) { - _data_sources[segment_group->version()].push_back(segment_group); - } - } - - // clear tmp indices if failed - } else { - for (auto& it : tmp_data_sources) { - for (SegmentGroup* segment_group : it.second) { - SAFE_DELETE(segment_group); - } - } - } - - LOG(INFO) << "finish to clone data to tablet. res=" << res << ", " - << "table=" << full_name() << ", " - << "clone_versions_size=" << clone_deltas.size(); - return res; -} - -OLAPStatus OLAPTable::replace_data_sources(const vector* old_versions, - const vector* new_data_sources, - vector* old_data_sources) { - OLAPStatus res = OLAP_SUCCESS; - - if (old_versions == NULL || new_data_sources == NULL) { - LOG(WARNING) << "parameter old_versions or new_data_sources is null. table=" << full_name(); - return OLAP_ERR_INPUT_PARAMETER_ERROR; - } - - old_data_sources->clear(); - - // check old version existed - for (vector::const_iterator it = old_versions->begin(); - it != old_versions->end(); ++it) { - version_olap_index_map_t::iterator data_source_it = _data_sources.find(*it); - if (data_source_it == _data_sources.end()) { - LOG(WARNING) << "olap segment_group for version does not exists. [version='" << it->first - << "-" << it->second << "' table='" << full_name() << "']"; - return OLAP_ERR_VERSION_NOT_EXIST; - } - } - - // check new versions not existed - for (vector::const_iterator it = new_data_sources->begin(); - it != new_data_sources->end(); ++it) { - if (_data_sources.find((*it)->version()) != _data_sources.end()) { - bool to_be_deleted = false; - - - for (vector::const_iterator old_it = old_versions->begin(); - old_it != old_versions->end(); ++old_it) { - if (*old_it == (*it)->version()) { - to_be_deleted = true; - break; - } - } - - if (!to_be_deleted) { - LOG(WARNING) << "olap segment_group for version exists. [version='" << (*it)->version().first - << "-" << (*it)->version().second << "' table='" << full_name() << "']"; - return OLAP_ERR_TABLE_VERSION_DUPLICATE_ERROR; - } - } - } - - // update versions - for (vector::const_iterator it = old_versions->begin(); - it != old_versions->end(); ++it) { - version_olap_index_map_t::iterator data_source_it = _data_sources.find(*it); - if (data_source_it != _data_sources.end()) { - for (SegmentGroup* segment_group : data_source_it->second) { - old_data_sources->push_back(segment_group); - } - _data_sources.erase(data_source_it); - } - - // 删除失败会导致脏数据 - if ((res = _header->delete_version(*it)) != OLAP_SUCCESS) { - LOG(WARNING) << "fail to delete version from olap header.[version='" << it->first - << "-" << it->second << "' table='" << full_name() << "']"; - return res; - } - - VLOG(3) << "delete version from olap header. table=" << full_name() << ", " - << "version=" << it->first << "-" << it->second; - } - - for (vector::const_iterator it = new_data_sources->begin(); - it != new_data_sources->end(); ++it) { - _data_sources[(*it)->version()].push_back(*it); - - // 新增失败会导致脏数据 - const std::vector* column_statistics = nullptr; - if ((*it)->has_column_statistics()) { - column_statistics = &((*it)->get_column_statistics()); - } - res = _header->add_version((*it)->version(), (*it)->version_hash(), - (*it)->segment_group_id(), (*it)->num_segments(), - (*it)->index_size(), (*it)->data_size(), - (*it)->num_rows(), (*it)->empty(), column_statistics); - - if (res != OLAP_SUCCESS) { - LOG(WARNING) << "fail to add version to olap header.[version='" << (*it)->version().first - << "-" << (*it)->version().second << "' table='" << full_name() << "']"; - return res; - } - - VLOG(3) << "add version to olap header. table=" << full_name() << ", " - << "version=" << (*it)->version().first << "-" << (*it)->version().second; - } - - return OLAP_SUCCESS; -} - -OLAPStatus OLAPTable::compute_all_versions_hash(const vector& versions, - VersionHash* version_hash) const { - if (version_hash == NULL) { - OLAP_LOG_WARNING("invalid parameter: 'new_version_hash' is null."); - return OLAP_ERR_INPUT_PARAMETER_ERROR; - } - *version_hash = 0L; - - for (vector::const_iterator version_index = versions.begin(); - version_index != versions.end(); ++version_index) { - version_olap_index_map_t::const_iterator temp = _data_sources.find(*version_index); - if (temp == _data_sources.end()) { - OLAP_LOG_WARNING("fail to find SegmentGroup." - "[start_version=%d; end_version=%d]", - version_index->first, - version_index->second); - return OLAP_ERR_TABLE_VERSION_INDEX_MISMATCH_ERROR; - } - - *version_hash ^= temp->second[0]->version_hash(); - } - - return OLAP_SUCCESS; -} - -OLAPStatus OLAPTable::merge_header(const OLAPHeader& hdr, int to_version) { - obtain_header_wrlock(); - DeferOp release_lock(std::bind(&OLAPTable::release_header_lock, this)); - - const PDelta* base_version = _header->get_base_version(); - if (base_version->end_version() != to_version) { - return OLAP_ERR_VERSION_NOT_EXIST; - } - - // delete old base version - Version base = { base_version->start_version(), base_version->end_version() }; - OLAPStatus st = _header->delete_version(base); - if (st != OLAP_SUCCESS) { - LOG(WARNING) << "failed to delete version from header" << ", " - << "version=" << base_version->start_version() << ", " - << base_version->end_version(); - return st; - } - VLOG(3) << "finished to delete version from header" - << "version=" << base_version->start_version() << "-" - << base_version->end_version(); - - - // add new versions - for (int i = 0; i < hdr.file_delta_size(); ++i) { - const PDelta* delta = hdr.get_delta(i); - if (delta->end_version() > to_version) { - break; - } - Version version = { delta->start_version(), delta->end_version() }; - VersionHash v_hash = delta->version_hash(); - for (int j = 0; j < delta->segment_group_size(); ++j) { - const PSegmentGroup& psegment_group = delta->segment_group(j); - st = _header->add_version(version, v_hash, psegment_group.segment_group_id(), - psegment_group.num_segments(), psegment_group.index_size(), psegment_group.data_size(), - psegment_group.num_rows(), psegment_group.empty(), nullptr); - if (st != OLAP_SUCCESS) { - LOG(WARNING) << "failed to add version to header" << ", " - << "version=" << version.first << "-" << version.second; - return st; - } - } - } - st = _header->save(); - if (st != OLAP_SUCCESS) { - LOG(FATAL) << "failed to save header when merging. tablet:" << _tablet_id; - return st; - } - - VLOG(3) << "finished to merge header to version:" << to_version << "-" << to_version; - return OLAP_SUCCESS; -} - -SegmentGroup* OLAPTable::_get_largest_index() { - SegmentGroup* largest_index = NULL; - size_t largest_index_sizes = 0; - - for (auto& it : _data_sources) { - // use segment_group of base file as target segment_group when base is not empty, - // or try to find the biggest segment_group. - for (SegmentGroup* segment_group : it.second) { - if (segment_group->empty() || segment_group->zero_num_rows()) { - continue; - } - if (segment_group->index_size() > largest_index_sizes) { - largest_index = segment_group; - largest_index_sizes = segment_group->index_size(); - } - } - } - - return largest_index; -} - -OLAPStatus OLAPTable::split_range( - const OlapTuple& start_key_strings, - const OlapTuple& end_key_strings, - uint64_t request_block_row_count, - std::vector* ranges) { - if (ranges == NULL) { - OLAP_LOG_WARNING("parameter end_row is null."); - return OLAP_ERR_INPUT_PARAMETER_ERROR; - } - - EntrySlice entry; - RowCursor start_key; - RowCursor end_key; - RowCursor helper_cursor; - RowBlockPosition start_pos; - RowBlockPosition end_pos; - RowBlockPosition step_pos; - - // 此helper用于辅助查找,注意它的内容不能拿来使用,是不可预知的,仅作为辅助使用 - if (helper_cursor.init(_tablet_schema, num_short_key_fields()) != OLAP_SUCCESS) { - OLAP_LOG_WARNING("fail to parse strings to key with RowCursor type."); - return OLAP_ERR_INVALID_SCHEMA; - } - - // 如果有startkey,用startkey初始化;反之则用minkey初始化 - if (start_key_strings.size() > 0) { - if (start_key.init_scan_key(_tablet_schema, start_key_strings.values()) != OLAP_SUCCESS) { - OLAP_LOG_WARNING("fail to initial key strings with RowCursor type."); - return OLAP_ERR_INIT_FAILED; - } - - if (start_key.from_tuple(start_key_strings) != OLAP_SUCCESS) { - OLAP_LOG_WARNING("init end key failed"); - return OLAP_ERR_INVALID_SCHEMA; - } - } else { - if (start_key.init(_tablet_schema, num_short_key_fields()) != OLAP_SUCCESS) { - OLAP_LOG_WARNING("fail to initial key strings with RowCursor type."); - return OLAP_ERR_INIT_FAILED; - } - - start_key.allocate_memory_for_string_type(_tablet_schema); - start_key.build_min_key(); - } - - // 和startkey一样处理,没有则用maxkey初始化 - if (end_key_strings.size() > 0) { - if (OLAP_SUCCESS != end_key.init_scan_key(_tablet_schema, end_key_strings.values())) { - OLAP_LOG_WARNING("fail to parse strings to key with RowCursor type."); - return OLAP_ERR_INVALID_SCHEMA; - } - - if (end_key.from_tuple(end_key_strings) != OLAP_SUCCESS) { - OLAP_LOG_WARNING("init end key failed"); - return OLAP_ERR_INVALID_SCHEMA; - } - } else { - if (end_key.init(_tablet_schema, num_short_key_fields()) != OLAP_SUCCESS) { - OLAP_LOG_WARNING("fail to initial key strings with RowCursor type."); - return OLAP_ERR_INIT_FAILED; - } - - end_key.allocate_memory_for_string_type(_tablet_schema); - end_key.build_max_key(); - } - - ReadLock rdlock(get_header_lock_ptr()); - SegmentGroup* base_index = _get_largest_index(); - - // 如果找不到合适的segment_group,就直接返回startkey,endkey - if (base_index == NULL) { - VLOG(3) << "there is no base file now, may be tablet is empty."; - // it may be right if the table is empty, so we return success. - ranges->emplace_back(start_key.to_tuple()); - ranges->emplace_back(end_key.to_tuple()); - return OLAP_SUCCESS; - } - - uint64_t expected_rows = request_block_row_count - / base_index->current_num_rows_per_row_block(); - if (expected_rows == 0) { - OLAP_LOG_WARNING("expected_rows less than 1. [request_block_row_count = '%d']", - request_block_row_count); - return OLAP_ERR_TABLE_NOT_FOUND; - } - - // 找到startkey对应的起始位置 - if (base_index->find_short_key(start_key, &helper_cursor, false, &start_pos) != OLAP_SUCCESS) { - if (base_index->find_first_row_block(&start_pos) != OLAP_SUCCESS) { - OLAP_LOG_WARNING("fail to get first block pos"); - return OLAP_ERR_TABLE_INDEX_FIND_ERROR; - } - } - - step_pos = start_pos; - VLOG(3) << "start_pos=" << start_pos.segment << ", " << start_pos.index_offset; - - //find last row_block is end_key is given, or using last_row_block - if (base_index->find_short_key(end_key, &helper_cursor, false, &end_pos) != OLAP_SUCCESS) { - if (base_index->find_last_row_block(&end_pos) != OLAP_SUCCESS) { - OLAP_LOG_WARNING("fail find last row block."); - return OLAP_ERR_TABLE_INDEX_FIND_ERROR; - } - } - - VLOG(3) << "end_pos=" << end_pos.segment << ", " << end_pos.index_offset; - - //get rows between first and last - OLAPStatus res = OLAP_SUCCESS; - RowCursor cur_start_key; - RowCursor last_start_key; - - if (cur_start_key.init(_tablet_schema, num_short_key_fields()) != OLAP_SUCCESS - || last_start_key.init(_tablet_schema, num_short_key_fields()) != OLAP_SUCCESS) { - OLAP_LOG_WARNING("fail to init cursor"); - return OLAP_ERR_INIT_FAILED; - } - - if (base_index->get_row_block_entry(start_pos, &entry) != OLAP_SUCCESS) { - OLAP_LOG_WARNING("get block entry failed."); - return OLAP_ERR_ROWBLOCK_FIND_ROW_EXCEPTION; - } - - cur_start_key.attach(entry.data); - last_start_key.allocate_memory_for_string_type(_tablet_schema); - last_start_key.copy_without_pool(cur_start_key); - // start_key是last start_key, 但返回的实际上是查询层给出的key - ranges->emplace_back(start_key.to_tuple()); - - while (end_pos > step_pos) { - res = base_index->advance_row_block(expected_rows, &step_pos); - if (res == OLAP_ERR_INDEX_EOF || !(end_pos > step_pos)) { - break; - } else if (res != OLAP_SUCCESS) { - OLAP_LOG_WARNING("advance_row_block failed."); - return OLAP_ERR_ROWBLOCK_FIND_ROW_EXCEPTION; - } - - if (base_index->get_row_block_entry(step_pos, &entry) != OLAP_SUCCESS) { - OLAP_LOG_WARNING("get block entry failed."); - return OLAP_ERR_ROWBLOCK_FIND_ROW_EXCEPTION; - } - cur_start_key.attach(entry.data); - - if (cur_start_key.cmp(last_start_key) != 0) { - ranges->emplace_back(cur_start_key.to_tuple()); // end of last section - ranges->emplace_back(cur_start_key.to_tuple()); // start a new section - last_start_key.copy_without_pool(cur_start_key); - } - } - - ranges->emplace_back(end_key.to_tuple()); - - return OLAP_SUCCESS; -} - -void OLAPTable::list_data_files(set* file_names) const { - _list_files_with_suffix("dat", file_names); -} - -void OLAPTable::list_index_files(set* file_names) const { - _list_files_with_suffix("idx", file_names); -} - -void OLAPTable::_list_files_with_suffix(const string& file_suffix, set* file_names) const { - if (file_names == NULL) { - LOG(WARNING) << "parameter filenames is null. [table='" << full_name() << "']"; - return; - } - - file_names->clear(); - - stringstream prefix_stream; - prefix_stream << _tablet_path << "/" << _tablet_id; - string tablet_path_prefix = prefix_stream.str(); - for (auto& it : _data_sources) { - // every data segment has its file name. - for (SegmentGroup* segment_group : it.second) { - for (int32_t seg_id = 0; seg_id < segment_group->num_segments(); ++seg_id) { - file_names->insert(basename(construct_file_path(tablet_path_prefix, - segment_group->version(), - segment_group->version_hash(), - segment_group->segment_group_id(), - seg_id, - file_suffix).c_str())); - } - } - } -} - -bool OLAPTable::has_segment_group(const Version& version, const SegmentGroup* new_segment_group) const { - auto it = _data_sources.find(version); - if (it == _data_sources.end()) { - return false; - } - bool exist = false; - for (auto segment_group : it->second) { - if (segment_group->segment_group_id() == new_segment_group->segment_group_id()) { - exist = true; - break; - } - } - return exist; -} - -void OLAPTable::list_versions(vector* versions) const { - if (versions == NULL) { - OLAP_LOG_WARNING("parameter versions is null."); - return; - } - - versions->clear(); - - // versions vector is not sorted. - version_olap_index_map_t::const_iterator it; - for (it = _data_sources.begin(); it != _data_sources.end(); ++it) { - versions->push_back(it->first); - } -} - -void OLAPTable::list_version_entities(vector* version_entities) const { - if (version_entities == NULL) { - OLAP_LOG_WARNING("parameter versions is null."); - return; - } - - version_entities->clear(); - - // version_entities vector is not sorted. - version_olap_index_map_t::const_iterator it; - for (it = _data_sources.begin(); it != _data_sources.end(); ++it) { - const std::vector& index_vec = it->second; - VersionEntity version_entity(it->first, index_vec[0]->version_hash()); - for (SegmentGroup* segment_group : index_vec) { - const std::vector* column_statistics = nullptr; - if (segment_group->has_column_statistics()) { - column_statistics = &(segment_group->get_column_statistics()); - } - SegmentGroupEntity segment_group_entity(segment_group->segment_group_id(), segment_group->num_segments(), - segment_group->num_rows(), segment_group->data_size(), - segment_group->index_size(), segment_group->empty(), column_statistics); - version_entity.add_segment_group_entity(segment_group_entity); - } - version_entities->push_back(version_entity); - } -} - -void OLAPTable::delete_all_files() { - // Release resources like memory and disk space. - // we have to call list_versions first, or else error occurs when - // removing hash_map item and iterating hash_map concurrently. - vector versions; - list_versions(&versions); - - // remove indices and data files, release related resources. - for (vector::const_iterator it = versions.begin(); it != versions.end(); ++it) { - std::vector index_vec; - if (unregister_data_source(*it, &index_vec) != OLAP_SUCCESS) { - LOG(WARNING) << "fail to unregister version." - << "version=" << it->first << "-" << it->second; - return; - } - - for (SegmentGroup* segment_group : index_vec) { - segment_group->delete_all_files(); - delete segment_group; - } - } - - // remove olap header file, _header object will be delete in OLAPTable.destructor - if (remove_parent_dir(_tablet_path) != OLAP_SUCCESS) { - LOG(WARNING) << "fail to delete header file and directory. header_path=" << _tablet_path; - } -} - -string OLAPTable::construct_index_file_path(const Version& version, - VersionHash version_hash, - int32_t segment_group_id, - int32_t segment) const { - stringstream prefix_stream; - prefix_stream << _tablet_path << "/" << _tablet_id; - string tablet_path_prefix = prefix_stream.str(); - return construct_file_path(tablet_path_prefix, version, version_hash, segment_group_id, segment, "idx"); -} -string OLAPTable::construct_data_file_path(const Version& version, - VersionHash version_hash, - int32_t segment_group_id, - int32_t segment) const { - stringstream prefix_stream; - prefix_stream << _tablet_path << "/" << _tablet_id; - string tablet_path_prefix = prefix_stream.str(); - return construct_file_path(tablet_path_prefix, version, version_hash, segment_group_id, segment, "dat"); -} -string OLAPTable::construct_file_path(const string& tablet_path_prefix, - const Version& version, - VersionHash version_hash, - int32_t segment_group_id, int32_t segment, - const string& suffix) { - char file_path[OLAP_MAX_PATH_LEN]; - if (segment_group_id == -1) { - snprintf(file_path, - sizeof(file_path), - "%s_%ld_%ld_%ld_%d.%s", - tablet_path_prefix.c_str(), - version.first, - version.second, - version_hash, - segment, - suffix.c_str()); - } else { - snprintf(file_path, - sizeof(file_path), - "%s_%ld_%ld_%ld_%d_%d.%s", - tablet_path_prefix.c_str(), - version.first, - version.second, - version_hash, - segment_group_id, segment, - suffix.c_str()); - } - - return file_path; -} - -string OLAPTable::construct_incremental_delta_dir_path() const { - stringstream segment_group_dir_path; - segment_group_dir_path << _tablet_path << INCREMENTAL_DELTA_PREFIX; - - return segment_group_dir_path.str(); -} -string OLAPTable::construct_incremental_index_file_path(Version version, VersionHash version_hash, - int32_t segment_group_id, int32_t segment) const { - string segment_group_dir_path = construct_incremental_delta_dir_path(); - stringstream segment_group_file_path; - segment_group_file_path << segment_group_dir_path << "/" - << construct_file_name(version, version_hash, segment_group_id, segment, "idx"); - return segment_group_file_path.str(); -} -string OLAPTable::construct_incremental_data_file_path(Version version, VersionHash version_hash, - int32_t segment_group_id, int32_t segment) const { - string segment_group_dir_path = construct_incremental_delta_dir_path(); - stringstream segment_group_file_path; - segment_group_file_path << segment_group_dir_path << "/" - << construct_file_name(version, version_hash, segment_group_id, segment, "dat"); - return segment_group_file_path.str(); -} -string OLAPTable::construct_pending_data_dir_path() const { - return _tablet_path + PENDING_DELTA_PREFIX; -} -string OLAPTable::construct_pending_index_file_path(TTransactionId transaction_id, - int32_t segment_group_id, int32_t segment) const { - string dir_path = construct_pending_data_dir_path(); - stringstream file_path; - file_path << dir_path << "/" - << transaction_id << "_" - << segment_group_id << "_" << segment << ".idx"; - - return file_path.str(); -} -string OLAPTable::construct_pending_data_file_path(TTransactionId transaction_id, - int32_t segment_group_id, int32_t segment) const { - string dir_path = construct_pending_data_dir_path(); - stringstream file_path; - file_path << dir_path << "/" - << transaction_id << "_" - << segment_group_id << "_" << segment << ".dat"; - - return file_path.str(); -} - -string OLAPTable::construct_file_name(const Version& version, - VersionHash version_hash, - int32_t segment_group_id, int32_t segment, - const string& suffix) const { - char file_name[OLAP_MAX_PATH_LEN]; - snprintf(file_name, sizeof(file_name), - "%ld_%ld_%ld_%ld_%d_%d.%s", - _tablet_id, - version.first, - version.second, - version_hash, - segment_group_id, - segment, - suffix.c_str()); - - return file_name; -} - -int32_t OLAPTable::get_field_index(const string& field_name) const { - field_index_map_t::const_iterator res_iterator = _field_index_map.find(field_name); - if (res_iterator == _field_index_map.end()) { - LOG(WARNING) << "invalid field name. [name='" << field_name << "']"; - return -1; - } - - return res_iterator->second; -} - -size_t OLAPTable::get_field_size(const string& field_name) const { - field_index_map_t::const_iterator res_iterator = _field_index_map.find(field_name); - if (res_iterator == _field_index_map.end()) { - LOG(WARNING) << "invalid field name. [name='" << field_name << "']"; - return 0; - } - - if (static_cast(res_iterator->second) >= _field_sizes.size()) { - LOG(WARNING) << "invalid field segment_group. [name='" << field_name << "']"; - return 0; - } - - return _field_sizes[res_iterator->second]; -} - -size_t OLAPTable::get_return_column_size(const string& field_name) const { - field_index_map_t::const_iterator res_iterator = _field_index_map.find(field_name); - if (res_iterator == _field_index_map.end()) { - LOG(WARNING) << "invalid field name. [name='" << field_name << "']"; - return 0; - } - - if (static_cast(res_iterator->second) >= _field_sizes.size()) { - LOG(WARNING) << "invalid field segment_group. [name='" << field_name << "']"; - return 0; - } - - if (_tablet_schema[res_iterator->second].type == OLAP_FIELD_TYPE_VARCHAR || - _tablet_schema[res_iterator->second].type == OLAP_FIELD_TYPE_HLL) { - return 0; - } - - return _field_sizes[res_iterator->second]; -} - - -size_t OLAPTable::get_row_size() const { - size_t size = 0u; - vector::const_iterator it; - for (it = _field_sizes.begin(); it != _field_sizes.end(); ++it) { - size += *it; - } - size += (_num_fields + 7) / 8; - - return size; -} - -int64_t OLAPTable::get_data_size() const { - int64_t total_size = 0; - for (const PDelta& delta : _header->delta()) { - for (const PSegmentGroup& psegment_group : delta.segment_group()) { - total_size += psegment_group.data_size(); - } - } - - return total_size; -} - -int64_t OLAPTable::get_num_rows() const { - int64_t num_rows = 0; - for (const PDelta& delta : _header->delta()) { - for (const PSegmentGroup& psegment_group : delta.segment_group()) { - num_rows += psegment_group.num_rows(); - } - } - - return num_rows; -} - -bool OLAPTable::is_load_delete_version(Version version) { - version_olap_index_map_t::iterator it = _data_sources.find(version); - return it->second[0]->delete_flag(); -} - -bool OLAPTable::is_schema_changing() { - bool is_schema_changing = false; - - obtain_header_rdlock(); - if (_header->has_schema_change_status()) { - is_schema_changing = true; - } - release_header_lock(); - - return is_schema_changing; -} - -bool OLAPTable::get_schema_change_request(TTabletId* tablet_id, - SchemaHash* schema_hash, - vector* versions_to_changed, - AlterTabletType* alter_table_type) const { - if (!_header->has_schema_change_status()) { - return false; - } - - const SchemaChangeStatusMessage& schema_change_status = _header->schema_change_status(); - - (tablet_id == NULL || (*tablet_id = schema_change_status.related_tablet_id())); - (schema_hash == NULL || (*schema_hash = schema_change_status.related_schema_hash())); - (alter_table_type == NULL || (*alter_table_type = - static_cast(schema_change_status.schema_change_type()))); - - if (versions_to_changed != NULL) { - versions_to_changed->clear(); - for (int i = 0, len = schema_change_status.versions_to_changed_size(); i < len; ++i) { - const PDelta& version = schema_change_status.versions_to_changed(i); - versions_to_changed->push_back( - Version(version.start_version(), version.end_version())); - } - } - - return true; -} - -void OLAPTable::set_schema_change_request(TTabletId tablet_id, - TSchemaHash schema_hash, - const vector& versions_to_changed, - const AlterTabletType alter_table_type) { - clear_schema_change_request(); - - SchemaChangeStatusMessage* schema_change_status = _header->mutable_schema_change_status(); - schema_change_status->set_related_tablet_id(tablet_id); - schema_change_status->set_related_schema_hash(schema_hash); - - vector::const_iterator it; - for (it = versions_to_changed.begin(); it != versions_to_changed.end(); ++it) { - PDelta* version = schema_change_status->add_versions_to_changed(); - version->set_start_version(it->first); - version->set_end_version(it->second); - version->set_version_hash(0); - version->set_creation_time(0); - //version->set_index_size(0); - //version->set_data_size(0); - //version->set_num_segments(0); - } - - schema_change_status->set_schema_change_type(alter_table_type); -} - -bool OLAPTable::remove_last_schema_change_version(OLAPTablePtr new_olap_table) { - if (!_header->has_schema_change_status()) { - return false; - } - - if (_header->has_schema_change_status()) { - SchemaChangeStatusMessage* schema_change_status = _header->mutable_schema_change_status(); - ::google::protobuf::RepeatedPtrField* versions_to_changed - = schema_change_status->mutable_versions_to_changed(); - - if (versions_to_changed->size() > 0) { - versions_to_changed->RemoveLast(); - } - } - - return true; -} - -void OLAPTable::clear_schema_change_request() { - LOG(INFO) << "clear schema change status. [tablet='" << _full_name << "']"; - _header->clear_schema_change_status(); -} - -void OLAPTable::set_io_error() { - OLAP_LOG_WARNING("io error occur.[tablet_full_name='%s', root_path_name='%s']", - _full_name.c_str(), - _storage_root_path.c_str()); - OLAPEngine::get_instance()->set_store_used_flag(_storage_root_path, false); -} - -bool OLAPTable::is_used() { - return !_is_bad && _store->is_used(); -} - -VersionEntity OLAPTable::get_version_entity_by_version(const Version& version) { - std::vector& index_vec = _data_sources[version]; - VersionEntity version_entity(version, index_vec[0]->version_hash()); - for (SegmentGroup* segment_group : index_vec) { - const std::vector* column_statistics = nullptr; - if (segment_group->has_column_statistics()) { - column_statistics = &(segment_group->get_column_statistics()); - } - SegmentGroupEntity segment_group_entity(segment_group->segment_group_id(), segment_group->num_segments(), - segment_group->num_rows(), segment_group->data_size(), - segment_group->index_size(), segment_group->empty(), column_statistics); - version_entity.add_segment_group_entity(segment_group_entity); - } - return version_entity; -} - -size_t OLAPTable::get_version_index_size(const Version& version) { - std::vector& index_vec = _data_sources[version]; - size_t index_size = 0; - for (SegmentGroup* segment_group : index_vec) { - index_size += segment_group->index_size(); - } - return index_size; -} - -size_t OLAPTable::get_version_data_size(const Version& version) { - std::vector& index_vec = _data_sources[version]; - size_t data_size = 0; - for (SegmentGroup* segment_group : index_vec) { - data_size += segment_group->data_size(); - } - return data_size; -} - -OLAPStatus OLAPTable::recover_tablet_until_specfic_version( - const int64_t& until_version, const int64_t& version_hash) { - std::vector missing_versions; - { - ReadLock rdlock(&_header_lock); - get_missing_versions_with_header_locked(until_version, &missing_versions); - } - - std::vector segment_group_vec; - OLAPStatus res = OLAP_SUCCESS; - for (Version& missing_version : missing_versions) { - SegmentGroup* segment_group = new SegmentGroup(this, missing_version, version_hash, false, 0, 0); - segment_group->set_empty(true); - ColumnDataWriter* writer = ColumnDataWriter::create(std::shared_ptr(this), segment_group, true); - if (res != OLAP_SUCCESS) { break; } - - res = writer->finalize(); - if (res != OLAP_SUCCESS) { break; } - segment_group_vec.push_back(segment_group); - } - - if (res != OLAP_SUCCESS) { - for (SegmentGroup* segment_group : segment_group_vec) { - segment_group->delete_all_files(); - SAFE_DELETE(segment_group); - } - } else { - for (SegmentGroup* segment_group : segment_group_vec) { - segment_group->load(); - } - } - - { - WriteLock wrlock(&_header_lock); - RETURN_NOT_OK(register_data_source(segment_group_vec)); - RETURN_NOT_OK(save_header()); - } - return OLAP_SUCCESS; -} - -OLAPStatus OLAPTable::test_version(const Version& version) { - vector span_versions; - obtain_header_rdlock(); - OLAPStatus res = _header->select_versions_to_span(version, &span_versions); - release_header_lock(); - - return res; -} - -} // namespace doris diff --git a/be/src/olap/olap_table.h b/be/src/olap/olap_table.h deleted file mode 100644 index 7befcc9b12734d..00000000000000 --- a/be/src/olap/olap_table.h +++ /dev/null @@ -1,770 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, -// software distributed under the License is distributed on an -// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -// KIND, either express or implied. See the License for the -// specific language governing permissions and limitations -// under the License. - -#ifndef DORIS_BE_SRC_OLAP_OLAP_TABLE_H -#define DORIS_BE_SRC_OLAP_OLAP_TABLE_H - -#include -#include -#include -#include -#include -#include - -#include "gen_cpp/AgentService_types.h" -#include "gen_cpp/olap_file.pb.h" -#include "olap/field.h" -#include "olap/olap_define.h" -#include "olap/olap_header.h" -#include "olap/tuple.h" -#include "olap/row_cursor.h" -#include "olap/utils.h" - -namespace doris { -class FieldInfo; -class ColumnData; -class OLAPHeader; -class SegmentGroup; -class OLAPTable; -class RowBlockPosition; -class OlapStore; - -// Define OLAPTable's shared_ptr. It is used for -typedef std::shared_ptr OLAPTablePtr; - -enum BaseCompactionStage { - BASE_COMPACTION_WAITING = 0, - BASE_COMPACTION_RUNNING = 1, -}; - -struct BaseCompactionStatus { - BaseCompactionStatus() : status(BASE_COMPACTION_WAITING), version(-1) {} - - BaseCompactionStage status; - int32_t version; -}; - -enum PushStage { - PUSH_WAITING = 0, - PUSH_RUNNING = 1, -}; - -struct PushStatus { - PushStatus() : status(PUSH_WAITING), version(-1) {} - - PushStage status; - int32_t version; -}; - -enum SyncStage { - SYNC_WAITING = 0, - SYNC_RUNNING = 1, - SYNC_DONE = 2, - SYNC_FAILED = 3, -}; - -struct SyncStatus { - SyncStatus() : status(SYNC_WAITING), version(-1) {} - - SyncStage status; - int32_t version; -}; - -struct SchemaChangeStatus { - SchemaChangeStatus() : status(ALTER_TABLE_WAITING), schema_hash(0), version(-1) {} - - AlterTableStatus status; - SchemaHash schema_hash; - int32_t version; -}; - -class OLAPTable : public std::enable_shared_from_this { -public: - static OLAPTablePtr create_from_header_file( - TTabletId tablet_id, - TSchemaHash schema_hash, - const std::string& header_file, - OlapStore* store = nullptr); - static OLAPTablePtr create_from_header_file_for_check( - TTabletId tablet_id, - TSchemaHash schema_hash, - const std::string& header_file); - - static OLAPTablePtr create_from_header( - OLAPHeader* header, - OlapStore* store = nullptr); - - explicit OLAPTable(OLAPHeader* header, OlapStore* store); - explicit OLAPTable(OLAPHeader* header); - - virtual ~OLAPTable(); - - // Initializes table and loads indices for all versions. - // Returns OLAP_SUCCESS on success. - OLAPStatus load(); - - bool is_loaded() { - return _is_loaded; - } - - OLAPStatus load_indices(); - - OLAPStatus save_header(); - - OLAPHeader* get_header() { - return _header; - } - - OLAPStatus select_versions_to_span(const Version& version, - std::vector* span_versions) const; - - // Acquire data sources needed for querying the given version. - // The data source must later be released with release_data_source() - // to avoid a memory leak. Returns a vector of acquired sources. If - // the vector is empty, we were unable to obtain the sources. - // - // Elements in the output vector are order-sensitive. - // For example, to fetch version 109, OLAPData vector is returned. - // OLAPData:0-100 + - // OLAPData:101-110 + - // OLAPData:110-110 - - void acquire_data_sources(const Version& version, std::vector* sources) const; - - // Acquire data sources whose versions are specified by version_list. - // If you want specified OLAPDatas instead of calling - // OLAPHeader->select_versions_to_span(), call this function. In the - // scenarios like Cumulative Delta and Base generating, different - // strategies can be applied. - // @param [in] version_list - // @param [out] sources - void acquire_data_sources_by_versions(const std::vector& version_list, - std::vector* sources) const; - - // Releases the acquired data sources. Returns true on success. - OLAPStatus release_data_sources(std::vector* data_sources) const; - - // Registers a newly created data source, making it available for - // querying. Adds a reference to the data source in the header file. - OLAPStatus register_data_source(const std::vector& segment_group_vec); - - // Unregisters the data source for given version, frees up resources. - // resources include memory, files. - // After unregister, segment_group will point to the associated SegmentGroup. - OLAPStatus unregister_data_source(const Version& version, std::vector* segment_group_vec); - - // if pending data is push_for_delete, delete conditions is not null - OLAPStatus add_pending_version(int64_t partition_id, int64_t transaction_id, - const std::vector* delete_conditions); - OLAPStatus add_pending_segment_group(SegmentGroup* segment_group); - int32_t current_pending_segment_group_id(int64_t transaction_id); - - OLAPStatus add_pending_data(SegmentGroup* segment_group, const std::vector* delete_conditions); - - bool has_pending_data(int64_t transaction_id); - - bool has_pending_data(); - - void delete_pending_data(int64_t transaction_id); - - // check the pending data that still not publish version - void get_expire_pending_data(std::vector* transaction_ids); - - bool has_expired_incremental_data(); - void delete_expired_incremental_data(); - - // don't need header lock, because it occurs before loading tablet - void load_pending_data(); - - OLAPStatus publish_version(int64_t transaction_id, Version version, VersionHash version_hash); - - const PDelta* get_incremental_delta(Version version) const { - return _header->get_incremental_version(version); - } - - // calculate holes of version - // need header rdlock outside - void get_missing_versions_with_header_locked( - int64_t until_version, std::vector* missing_versions) const; - - // check if pending data is push_for_delete - // need to obtain header rdlock outside - OLAPStatus is_push_for_delete(int64_t transaction_id, bool* is_push_for_delete) const; - - // need to obtain header wrlock outside - OLAPStatus clone_data(const OLAPHeader& clone_header, - const std::vector& clone_deltas, - const std::vector& versions_to_delete); - - // Atomically replaces one set of data sources with another. Returns - // true on success. - OLAPStatus replace_data_sources(const std::vector* old_versions, - const std::vector* new_data_sources, - std::vector* old_data_sources); - - // Computes the cumulative hash for given versions. - // Only use Base file and Delta files to compute for simplicity and - // accuracy. XOR operation of version_hash satisfies associative laws and - // commutative laws. For example, - // version(0,99) = version(0,90) + version(91,100) - version(100,100) - // version_hash(0,99) = version_hash(0,90) - // ^ version_hash(91,100) - // ^ version_hash(100,100) - OLAPStatus compute_all_versions_hash(const std::vector& versions, - VersionHash* version_hash) const; - - // used for restore, merge the (0, to_version) in 'hdr' - OLAPStatus merge_header(const OLAPHeader& hdr, int to_version); - - // Used by monitoring OLAPTable - void list_data_files(std::set* filenames) const; - - void list_index_files(std::set* filenames) const; - - bool has_segment_group(const Version& version, const SegmentGroup* new_segment_group) const; - - void list_versions(std::vector* versions) const; - - // Return version list and their corresponding version hashes - void list_version_entities(std::vector* version_entities) const; - - // mark this table to be dropped, all files will be deleted when - // ~OLAPTable() - void mark_dropped() { - _is_dropped = true; - } - - // Delete all files for this table (.hdr, *.dat, *.idx). This should only - // be called if no one is accessing the table. - void delete_all_files(); - - // Methods to obtain and release locks. - void obtain_header_rdlock() { - _header_lock.rdlock(); - } - void obtain_header_wrlock() { - _header_lock.wrlock(); - } - void release_header_lock() { - _header_lock.unlock(); - } - - RWMutex* get_header_lock_ptr() { - return &_header_lock; - } - - OLAPStatus try_migration_rdlock() { - return _migration_lock.tryrdlock(); - } - - OLAPStatus try_migration_wrlock() { - return _migration_lock.trywrlock(); - } - - void release_migration_lock() { - _migration_lock.unlock(); - } - - // Prevent push operations execute concurrently. - void obtain_push_lock() { - _push_lock.lock(); - } - void release_push_lock() { - _push_lock.unlock(); - } - - Mutex* get_push_lock() { - return &_push_lock; - } - - // Prevent base compaction operations execute concurrently. - bool try_base_compaction_lock() { - return _base_compaction_lock.trylock() == OLAP_SUCCESS; - } - void obtain_base_compaction_lock() { - _base_compaction_lock.lock(); - } - void release_base_compaction_lock() { - _base_compaction_lock.unlock(); - } - - // Prevent cumulative compaction operations execute concurrently. - bool try_cumulative_lock() { - return (OLAP_SUCCESS == _cumulative_lock.trylock()); - } - - void obtain_cumulative_lock() { - _cumulative_lock.lock(); - } - - void release_cumulative_lock() { - _cumulative_lock.unlock(); - } - - // Construct index file path according version, version_hash and segment - // We construct file path through header file name. header file name likes: - // tables_root_path/db/table/index/table_index_schemaversion.hdr - // Index file path is: - // tables_root_path/db/table/index - // /table_index_schemaversion_start_end_versionhash_segment.idx - // The typical index file path is: - // /home/work/olap/storage/data/db2/DailyWinfoIdeaStats/PRIMARY/ - // DailyWinfoIdeaStats_PRIMARY_20120428_0_200_735382373247_1.idx - std::string construct_index_file_path(const Version& version, - VersionHash version_hash, - int32_t segment_group_id, int32_t segment) const; - - // Same as construct_index_file_path except that file suffix is .dat - // The typical index file path is: - // /home/work/olap/storage/data/db2/DailyWinfoIdeaStats/PRIMARY/ - // DailyWinfoIdeaStats_PRIMARY_20120428_0_200_735382373247_1.dat - std::string construct_data_file_path(const Version& version, - VersionHash version_hash, - int32_t segment_group_id, int32_t segment) const; - - // For index file, suffix is "idx", for data file, suffix is "dat". - static std::string construct_file_path(const std::string& tablet_path, - const Version& version, - VersionHash version_hash, - int32_t segment_group_id, int32_t segment, - const std::string& suffix); - - std::string construct_pending_data_dir_path() const; - std::string construct_pending_index_file_path( - TTransactionId transaction_id, int32_t segment_group_id, int32_t segment) const; - std::string construct_pending_data_file_path( - TTransactionId transaction_id, int32_t segment_group_id, int32_t segment) const; - std::string construct_incremental_delta_dir_path() const; - std::string construct_incremental_index_file_path( - Version version, VersionHash version_hash, int32_t segment_group_id, int32_t segment) const; - std::string construct_incremental_data_file_path( - Version version, VersionHash version_hash, int32_t segment_group_id, int32_t segment) const; - - std::string construct_file_name(const Version& version, - VersionHash version_hash, - int32_t segment_group_id, int32_t segment, - const std::string& suffix) const; - - // Return -1 if field name is invalid, else return field index in schema. - int32_t get_field_index(const std::string& field_name) const; - - // Return 0 if file_name is invalid, else return field size in schema. - size_t get_field_size(const std::string& field_name) const; - - size_t get_return_column_size(const std::string& field_name) const; - - // One row in a specified OLAPTable comprises of fixed number of columns - // with fixed length. - size_t get_row_size() const; - - // Get olap table statistics for SHOW STATUS - size_t get_index_size() const; - - int64_t get_data_size() const; - - int64_t get_num_rows() const; - - // Returns fully qualified name for this OLAP table. - // eg. db4.DailyUnitStats.PRIMARY - const std::string& full_name() const { - return _full_name; - } - - void set_full_name(std::string full_name) { - _full_name = full_name; - } - - std::vector& tablet_schema() { - return _tablet_schema; - } - - size_t num_fields() const { - return _num_fields; - } - - size_t num_null_fields() const { - return _num_null_fields; - } - - size_t num_key_fields() const { - return _num_key_fields; - } - - size_t id() const { - return _id; - } - - void set_id(size_t id) { - _id = id; - } - - // Expose some header attributes - const std::string header_file_name() const { - return _header->file_name(); - } - - TTabletId tablet_id() const { - return _tablet_id; - } - - void set_tablet_id(TTabletId tablet_id) { - _tablet_id = tablet_id; - } - - size_t num_short_key_fields() const { - return _header->num_short_key_fields(); - } - - uint32_t next_unique_id() const { - return _header->next_column_unique_id(); - } - - TSchemaHash schema_hash() const { - return _schema_hash; - } - - void set_schema_hash(TSchemaHash schema_hash) { - _schema_hash = schema_hash; - } - - OlapStore* store() const { - return _store; - } - - int file_delta_size() const { - return _header->file_delta_size(); - } - - const PDelta& delta(int index) const { - return _header->delta(index); - } - - const PDelta* get_delta(int index) const { - return _header->get_delta(index); - } - - const PDelta* lastest_delta() const { - return _header->get_lastest_delta_version(); - } - - const PDelta* lastest_version() const { - return _header->get_lastest_version(); - } - - // need to obtain header rdlock outside - const PDelta* least_complete_version( - const std::vector& missing_versions) const; - - const PDelta* base_version() const { - return _header->get_base_version(); - } - - // 在使用之前对header加锁 - const uint32_t get_cumulative_compaction_score() const { - return _header->get_cumulative_compaction_score(); - } - - const uint32_t get_base_compaction_score() const { - return _header->get_base_compaction_score(); - } - - const OLAPStatus delete_version(const Version& version) { - return _header->delete_version(version); - } - - const OLAPStatus version_creation_time(const Version& version, int64_t* creation_time) { - return _header->version_creation_time(version, creation_time); - } - - DataFileType data_file_type() const { - return _header->data_file_type(); - } - - // num rows per rowBlock, typically it is 256 or 512. - size_t num_rows_per_row_block() const { - return _num_rows_per_row_block; - } - - CompressKind compress_kind() const { - return _compress_kind; - } - - int delete_data_conditions_size() const { - return _header->delete_data_conditions_size(); - } - - DeleteConditionMessage* add_delete_data_conditions() { - return _header->add_delete_data_conditions(); - } - - const google::protobuf::RepeatedPtrField& - delete_data_conditions() { - return _header->delete_data_conditions(); - } - - google::protobuf::RepeatedPtrField* - mutable_delete_data_conditions() { - return _header->mutable_delete_data_conditions(); - } - - DeleteConditionMessage* mutable_delete_data_conditions(int index) { - return _header->mutable_delete_data_conditions(index); - } - - double bloom_filter_fpp() const { - if (_header->has_bf_fpp()) { - return _header->bf_fpp(); - } - - return BLOOM_FILTER_DEFAULT_FPP; - } - - KeysType keys_type() const { - if (_header->has_keys_type()) { - return _header->keys_type(); - } - - return KeysType::AGG_KEYS; - } - - bool is_delete_data_version(Version version) { - return _header->is_delete_data_version(version); - } - - bool is_load_delete_version(Version version); - - const int64_t creation_time() const { - return _header->creation_time(); - } - - void set_creation_time(int64_t time_seconds) { - _header->set_creation_time(time_seconds); - } - - // versions in [0, m_cumulative_layer_point) is base and cumulative versions; - // versions in [m_cumulative_layer_point, newest_delta_version] is delta versons; - // 在使用之前对header加锁 - const int32_t cumulative_layer_point() const { - return _header->cumulative_layer_point(); - } - - // 在使用之前对header加锁 - void set_cumulative_layer_point(const int32_t new_point) { - LOG(INFO) << "cumulative_layer_point: " << new_point; - _header->set_cumulative_layer_point(new_point); - } - - // Judge whether olap table in schema change state - bool is_schema_changing(); - - bool get_schema_change_request(TTabletId* tablet_id, - SchemaHash* schema_hash, - std::vector* versions_to_changed, - AlterTabletType* alter_table_type) const; - - void set_schema_change_request(TTabletId tablet_id, - TSchemaHash schema_hash, - const std::vector& versions_to_changed, - const AlterTabletType alter_table_type); - - bool remove_last_schema_change_version(OLAPTablePtr new_olap_table); - void clear_schema_change_request(); - - SchemaChangeStatus schema_change_status() { - return _schema_change_status; - } - - void set_schema_change_status(AlterTableStatus status, - SchemaHash schema_hash, - int32_t version) { - _schema_change_status.status = status; - _schema_change_status.schema_hash = schema_hash; - _schema_change_status.version = version; - VLOG(3) << "set schema change status. tablet_id=" << _tablet_id - << ", schema_hash=" << _schema_change_status.schema_hash - << ", status=" << _schema_change_status.status; - } - - void clear_schema_change_status() { - set_schema_change_status(ALTER_TABLE_WAITING, 0, -1); - } - - bool equal(TTabletId tablet_id, TSchemaHash schema_hash) { - if (this->tablet_id() != tablet_id || this->schema_hash() != schema_hash) { - return false; - } - - return true; - } - - OLAPStatus split_range( - const OlapTuple& start_key_strings, - const OlapTuple& end_key_strings, - uint64_t request_block_row_count, - std::vector* ranges); - - uint32_t segment_size() const { - return _header->segment_size(); - } - - void set_io_error(); - - bool is_used(); - - void set_bad(bool is_bad) { _is_bad = is_bad; } - - int64_t last_compaction_failure_time() { return _last_compaction_failure_time; } - - void set_last_compaction_failure_time(int64_t time) { - _last_compaction_failure_time = time; - } - - // 得到当前table的root path路径,路径末尾不带斜杠(/) - std::string storage_root_path_name() { - return _storage_root_path; - } - - std::string tablet_path() const { - return _tablet_path; - } - - std::string get_field_name_by_index(uint32_t index) { - if (index < _tablet_schema.size()) { - return _tablet_schema[index].name; - } - - return ""; - } - - FieldType get_field_type_by_index(uint32_t index) { - if (index < _tablet_schema.size()) { - return _tablet_schema[index].type; - } - - return OLAP_FIELD_TYPE_NONE; - } - - FieldAggregationMethod get_aggregation_by_index(uint32_t index) { - if (index < _tablet_schema.size()) { - return _tablet_schema[index].aggregation; - } - - return OLAP_FIELD_AGGREGATION_UNKNOWN; - } - - OLAPStatus test_version(const Version& version); - - VersionEntity get_version_entity_by_version(const Version& version); - size_t get_version_index_size(const Version& version); - size_t get_version_data_size(const Version& version); - - bool is_dropped() { - return _is_dropped; - } - - OLAPStatus recover_tablet_until_specfic_version(const int64_t& until_version, - const int64_t& version_hash); -private: - // used for hash-struct of hash_map. - struct HashOfVersion { - uint64_t operator()(const Version& version) const { - uint64_t hash_value = version.first; - hash_value = (hash_value << 32) + version.second; - return hash_value; - } - }; - - struct HashOfString { - size_t operator()(const std::string& str) const { - return std::hash()(str); - } - }; - - // List files with suffix "idx" or "dat". - void _list_files_with_suffix(const std::string& file_suffix, - std::set* file_names) const; - - OLAPStatus _publish_version(int64_t transaction_id, Version version, VersionHash version_hash); - - // 获取最大的index(只看大小) - SegmentGroup* _get_largest_index(); - - SegmentGroup* _construct_segment_group_from_version(const PDelta* delta, int32_t segment_group_id); - - // check if version is same, may delete local data - OLAPStatus _handle_existed_version(int64_t transaction_id, const Version& version, - const VersionHash& version_hash); - - // like "9-9" "10-10", for incremental cloning - OLAPStatus _add_incremental_data(std::vector& index_vec, int64_t transaction_id, - const Version& version, const VersionHash& version_hash); - - void _delete_incremental_data(const Version& version, const VersionHash& version_hash, - std::vector* files_to_remove); - - OLAPStatus _create_hard_link(const std::string& from, const std::string& to, - std::vector* linked_success_files); - - TTabletId _tablet_id; - TSchemaHash _schema_hash; - OLAPHeader* _header; - size_t _num_rows_per_row_block; - CompressKind _compress_kind; - // Set it true when table is dropped, table files and data structures - // can be used and not deleted until table is destructed. - bool _is_dropped; - std::string _full_name; - std::vector _tablet_schema; // field info vector is table schema. - - // Version mapping to SegmentGroup. - // data source can be base delta, cumulative delta, singleton delta. - using version_olap_index_map_t = std::unordered_map, HashOfVersion>; - version_olap_index_map_t _data_sources; - using transaction_olap_index_map_t = std::unordered_map>; - transaction_olap_index_map_t _pending_data_sources; - - size_t _num_fields; - size_t _num_null_fields; - size_t _num_key_fields; - // filed name -> field position in row - using field_index_map_t = std::unordered_map; - field_index_map_t _field_index_map; - std::vector _field_sizes; - // A series of status - SchemaChangeStatus _schema_change_status; - // related locks to ensure that commands are executed correctly. - RWMutex _header_lock; - RWMutex _migration_lock; - Mutex _push_lock; - Mutex _cumulative_lock; - Mutex _base_compaction_lock; - size_t _id; // uniq id, used in cache - std::string _storage_root_path; - OlapStore* _store; - std::atomic _is_loaded; - Mutex _load_lock; - std::string _tablet_path; - - bool _table_for_check; - std::atomic _is_bad; // if this tablet is broken, set to true. default is false - std::atomic _last_compaction_failure_time; // timestamp of last compaction failure - - DISALLOW_COPY_AND_ASSIGN(OLAPTable); -}; - -} // namespace doris - -#endif // DORIS_BE_SRC_OLAP_OLAP_TABLE_H diff --git a/be/src/olap/push_handler.cpp b/be/src/olap/push_handler.cpp index e45a75bfe8a15b..6edea96eb02805 100644 --- a/be/src/olap/push_handler.cpp +++ b/be/src/olap/push_handler.cpp @@ -23,9 +23,12 @@ #include -#include "olap/olap_engine.h" -#include "olap/olap_table.h" +#include "olap/rowset/alpha_rowset_writer.h" +#include "olap/rowset/rowset_id_generator.h" +#include "olap/rowset/rowset_meta_manager.h" #include "olap/schema_change.h" +#include "olap/storage_engine.h" +#include "olap/tablet.h" using std::list; using std::map; @@ -35,524 +38,259 @@ using std::vector; namespace doris { // Process push command, the main logical is as follows: -// a. related tables not exist: -// current table isn't in schemachange state, only push for current table -// b. related tables exist -// I. current table is old table: +// a. related tablets not exist: +// current table isn't in schemachange state, only push for current +// tablet +// b. related tablets exist +// I. current tablet is old table (cur.creation_time < +// related.creation_time): // push for current table and than convert data for related tables // II. current table is new table: // this usually means schema change is over, -// clear schema change info in both current table and related tables, -// finally we will only push for current tables -OLAPStatus PushHandler::process( - OLAPTablePtr olap_table, - const TPushReq& request, - PushType push_type, - vector* tablet_info_vec) { - LOG(INFO) << "begin to push data. tablet=" << olap_table->full_name() - << ", version=" << request.version; - - OLAPStatus res = OLAP_SUCCESS; - _request = request; - _olap_table_arr.clear(); - _olap_table_arr.push_back(olap_table); - vector table_infoes(1); - table_infoes[0].olap_table = olap_table; - - bool is_push_locked = false; - bool is_new_tablet = false; - bool is_new_tablet_effective = false; - - // 1. Get related tablets first if tablet in alter table status, - TTabletId tablet_id; - TSchemaHash schema_hash; - AlterTabletType alter_table_type; - OLAPTablePtr related_olap_table; - _obtain_header_rdlock(); - bool is_schema_changing = olap_table->get_schema_change_request( - &tablet_id, &schema_hash, NULL, &alter_table_type); - _release_header_lock(); - - if (is_schema_changing) { - related_olap_table = OLAPEngine::get_instance()->get_table(tablet_id, schema_hash); - if (NULL == related_olap_table.get()) { - OLAP_LOG_WARNING("can't find olap table, clear invalid schema change info." - "[table=%ld schema_hash=%d]", tablet_id, schema_hash); - _obtain_header_wrlock(); - olap_table->clear_schema_change_request(); - _release_header_lock(); - is_schema_changing = false; - } else { - // _olap_table_arr is used to obtain header lock, - // to avoid deadlock, we must lock tablet header in time order. - if (related_olap_table->creation_time() < olap_table->creation_time()) { - _olap_table_arr.push_front(related_olap_table); - } else { - _olap_table_arr.push_back(related_olap_table); - } - } - } - - // Obtain push lock to avoid simultaneously PUSH and - // conflict with alter table operations. - for (OLAPTablePtr table : _olap_table_arr) { - table->obtain_push_lock(); - } - is_push_locked = true; - - if (is_schema_changing) { - _obtain_header_rdlock(); - is_schema_changing = olap_table->get_schema_change_request( - &tablet_id, &schema_hash, NULL, &alter_table_type); - _release_header_lock(); - - if (!is_schema_changing) { - LOG(INFO) << "schema change info is cleared after base table get related tablet, " - << "maybe new tablet reach at the same time and load firstly. " - << ", old_tablet=" << olap_table->full_name() - << ", new_tablet=" << related_olap_table->full_name() - << ", version=" << _request.version; - } else if (related_olap_table->creation_time() > olap_table->creation_time()) { - // If current table is old table, append it to table_infoes - table_infoes.push_back(TableVars()); - TableVars& new_item = table_infoes.back(); - new_item.olap_table = related_olap_table; - } else { - // if current table is new table, clear schema change info - res = _clear_alter_table_info(olap_table, related_olap_table); - if (res != OLAP_SUCCESS) { - OLAP_LOG_WARNING("fail to clear schema change info. [res=%d]", res); - goto EXIT; - } - - LOG(INFO) << "data of new table is generated, stop convert from base table. " - << "old_tablet=" << olap_table->full_name() - << ", new_tablet=" << related_olap_table->full_name() - << ", version=" << _request.version; - is_new_tablet_effective = true; - } - } - - // To keep logic of alter_table/rollup_table consistent - if (table_infoes.size() == 1) { - table_infoes.resize(2); - } - - // 2. validate request: version and version_hash chek - _obtain_header_rdlock(); - res = _validate_request(table_infoes[0].olap_table, - table_infoes[1].olap_table, - is_new_tablet_effective, - push_type); - _release_header_lock(); - if (res != OLAP_SUCCESS) { - OLAP_LOG_WARNING("fail to validate request. [res=%d table='%s' version=%ld]", - res, olap_table->full_name().c_str(), _request.version); - goto EXIT; - } - - // 3. Remove reverted version including delta and cumulative, - // which will be deleted by background thread - _obtain_header_wrlock(); - for (TableVars& table_var : table_infoes) { - if (NULL == table_var.olap_table.get()) { - continue; - } - - res = _get_versions_reverted(table_var.olap_table, - is_new_tablet, - push_type, - &(table_var.unused_versions)); - if (res != OLAP_SUCCESS) { - OLAP_LOG_WARNING("failed to get reverted versions. " - "[res=%d table='%s' version=%ld]", - res, table_var.olap_table->full_name().c_str(), _request.version); - goto EXIT; - } - - if (table_var.unused_versions.size() != 0) { - res = _update_header(table_var.olap_table, - &(table_var.unused_versions), - &(table_var.added_indices), - &(table_var.unused_indices)); - if (res != OLAP_SUCCESS) { - OLAP_LOG_WARNING("fail to update header for revert. " - "[res=%d table='%s' version=%ld]", - res, table_var.olap_table->full_name().c_str(), _request.version); - goto EXIT; - } - - _delete_old_indices(&(table_var.unused_indices)); - } - - // If there are more than one table, others is doing alter table - is_new_tablet = true; - } - _release_header_lock(); - - // 4. Save delete condition when push for delete - if (push_type == PUSH_FOR_DELETE) { - _obtain_header_wrlock(); - DeleteConditionHandler del_cond_handler; - - for (TableVars& table_var : table_infoes) { - if (table_var.olap_table.get() == NULL) { - continue; - } - - res = del_cond_handler.store_cond( - table_var.olap_table, request.version, request.delete_conditions); - if (res != OLAP_SUCCESS) { - OLAP_LOG_WARNING("fail to store delete condition. [res=%d table='%s']", - res, table_var.olap_table->full_name().c_str()); - goto EXIT; - } - - res = table_var.olap_table->save_header(); - if (res != OLAP_SUCCESS) { - LOG(FATAL) << "fail to save header. res=" << res - << ", table=" << table_var.olap_table->full_name(); - goto EXIT; - } - } - - _release_header_lock(); - } - - // 5. Convert local data file into delta_file and build index, - // which may take a long time - res = _convert(table_infoes[0].olap_table, - table_infoes[1].olap_table, - &(table_infoes[0].added_indices), - &(table_infoes[1].added_indices), - alter_table_type); - if (res != OLAP_SUCCESS) { - OLAP_LOG_WARNING("fail to convert data. [res=%d]", res); - goto EXIT; - } - - // Update table header: add new version and remove reverted version - _obtain_header_wrlock(); - for (TableVars& table_var : table_infoes) { - if (NULL == table_var.olap_table.get()) { - continue; - } - - res = _update_header(table_var.olap_table, - &(table_var.unused_versions), - &(table_var.added_indices), - &(table_var.unused_indices)); - if (res != OLAP_SUCCESS) { - OLAP_LOG_WARNING("fail to update header of new delta." - "[res=%d table='%s' version=%ld]", - res, table_var.olap_table->full_name().c_str(), _request.version); - goto EXIT; - } - } - _release_header_lock(); - - // 6. Delete unused versions which include delta and commulative, - // which, in fact, is added to list and deleted by background thread - for (TableVars& table_var : table_infoes) { - if (NULL == table_var.olap_table.get()) { - continue; - } - - _delete_old_indices(&(table_var.unused_indices)); - } - -EXIT: - _release_header_lock(); - - // Get tablet infos for output - if (res == OLAP_SUCCESS || res == OLAP_ERR_PUSH_VERSION_ALREADY_EXIST) { - if (tablet_info_vec != NULL) { - _get_tablet_infos(table_infoes, tablet_info_vec); - } - res = OLAP_SUCCESS; - } - - // Clear added_indices when error happens - for (TableVars& table_var : table_infoes) { - if (table_var.olap_table.get() == NULL) { - continue; - } - - for (SegmentGroup* segment_group : table_var.added_indices) { - segment_group->delete_all_files(); - SAFE_DELETE(segment_group); - } - } - - // Release push lock - if (is_push_locked) { - for (OLAPTablePtr table : _olap_table_arr) { - table->release_push_lock(); - } - } - _olap_table_arr.clear(); - - LOG(INFO) << "finish to process push. res=" << res; - - return res; -} - -OLAPStatus PushHandler::process_realtime_push( - OLAPTablePtr olap_table, - const TPushReq& request, - PushType push_type, - vector* tablet_info_vec) { - LOG(INFO) << "begin to realtime push. tablet=" << olap_table->full_name() +// clear schema change info in both current tablet and related +// tablets, finally we will only push for current tablets. this is +// very useful in rollup action. +OLAPStatus PushHandler::process_streaming_ingestion( + TabletSharedPtr tablet, const TPushReq& request, PushType push_type, + vector* tablet_info_vec) { + LOG(INFO) << "begin to realtime push. tablet=" << tablet->full_name() + << ", transaction_id=" << request.transaction_id; + + OLAPStatus res = OLAP_SUCCESS; + _request = request; + vector tablet_vars(1); + tablet_vars[0].tablet = tablet; + res = _do_streaming_ingestion(tablet, request, push_type, &tablet_vars, + tablet_info_vec); + + if (res == OLAP_SUCCESS) { + if (tablet_info_vec != NULL) { + _get_tablet_infos(tablet_vars, tablet_info_vec); + } + LOG(INFO) << "process realtime push successfully. " + << "tablet=" << tablet->full_name() + << ", partition_id=" << request.partition_id << ", transaction_id=" << request.transaction_id; + } - OLAPStatus res = OLAP_SUCCESS; - _request = request; - vector table_infoes(1); - table_infoes[0].olap_table = olap_table; - AlterTabletType alter_table_type; - - // add transaction in engine, then check sc status - // lock, prevent sc handler checking transaction concurrently - olap_table->obtain_push_lock(); - PUniqueId load_id; - load_id.set_hi(0); - load_id.set_lo(0); - res = OLAPEngine::get_instance()->add_transaction( - request.partition_id, request.transaction_id, - olap_table->tablet_id(), olap_table->schema_hash(), load_id); - - // if transaction exists, exit - if (res == OLAP_ERR_PUSH_TRANSACTION_ALREADY_EXIST) { - - // if push finished, report success to fe - if (olap_table->has_pending_data(request.transaction_id)) { - OLAP_LOG_WARNING("pending data exists in tablet, which means push finished," - "return success. [table=%s transaction_id=%ld]", - olap_table->full_name().c_str(), request.transaction_id); - res = OLAP_SUCCESS; - } - olap_table->release_push_lock(); - goto EXIT; - } + return res; +} - // only when fe sends schema_change true, should consider to push related table - if (_request.is_schema_changing) { - VLOG(3) << "push req specify schema changing is true. " - << "tablet=" << olap_table->full_name() +OLAPStatus PushHandler::_do_streaming_ingestion( + TabletSharedPtr tablet, const TPushReq& request, PushType push_type, + vector* tablet_vars, + std::vector* tablet_info_vec) { + // add transaction in engine, then check sc status + // lock, prevent sc handler checking transaction concurrently + if (tablet == nullptr) { + return OLAP_ERR_TABLE_NOT_FOUND; + } + ReadLock base_migration_rlock(tablet->get_migration_lock_ptr(), TRY_LOCK); + if (!base_migration_rlock.own_lock()) { + return OLAP_ERR_RWLOCK_ERROR; + } + tablet->obtain_push_lock(); + PUniqueId load_id; + load_id.set_hi(0); + load_id.set_lo(0); + OLAPStatus res = StorageEngine::instance()->txn_manager()->prepare_txn( + request.partition_id, request.transaction_id, tablet->tablet_id(), + tablet->schema_hash(), tablet->tablet_uid(), load_id); + + // prepare txn will be always successful + // if current tablet is under schema change, origin tablet is successful and + // new tablet is not sucessful, it maybe a fatal error because new tablet has + // not load successfully + + // only when fe sends schema_change true, should consider to push related + // tablet + if (_request.is_schema_changing) { + VLOG(3) << "push req specify schema changing is true. " + << "tablet=" << tablet->full_name() + << ", transaction_id=" << request.transaction_id; + AlterTabletTaskSharedPtr alter_task = tablet->alter_task(); + if (alter_task != nullptr && alter_task->alter_state() != ALTER_FAILED) { + TTabletId related_tablet_id = alter_task->related_tablet_id(); + TSchemaHash related_schema_hash = alter_task->related_schema_hash(); + LOG(INFO) << "find schema_change status when realtime push. " + << "tablet=" << tablet->full_name() + << ", related_tablet_id=" << related_tablet_id + << ", related_schema_hash=" << related_schema_hash << ", transaction_id=" << request.transaction_id; - TTabletId related_tablet_id; - TSchemaHash related_schema_hash; - - olap_table->obtain_header_rdlock(); - bool is_schema_changing = olap_table->get_schema_change_request( - &related_tablet_id, &related_schema_hash, NULL, &alter_table_type); - olap_table->release_header_lock(); - - if (is_schema_changing) { - LOG(INFO) << "find schema_change status when realtime push. " - << "tablet=" << olap_table->full_name() - << ", related_tablet_id=" << related_tablet_id - << ", related_schema_hash=" << related_schema_hash - << ", transaction_id=" << request.transaction_id; - OLAPTablePtr related_olap_table = OLAPEngine::get_instance()->get_table( - related_tablet_id, related_schema_hash); - - // if related tablet not exists, only push current tablet - if (NULL == related_olap_table.get()) { - OLAP_LOG_WARNING("can't find related table, only push current tablet. " - "[table=%s related_tablet_id=%ld related_schema_hash=%d]", - olap_table->full_name().c_str(), - related_tablet_id, related_schema_hash); - - // if current tablet is new table, only push current tablet - } else if (olap_table->creation_time() > related_olap_table->creation_time()) { - OLAP_LOG_WARNING("current table is new, only push current tablet. " - "[table=%s related_olap_table=%s]", - olap_table->full_name().c_str(), - related_olap_table->full_name().c_str()); - - // add related transaction in engine - } else { - PUniqueId load_id; - load_id.set_hi(0); - load_id.set_lo(0); - res = OLAPEngine::get_instance()->add_transaction( - request.partition_id, request.transaction_id, - related_olap_table->tablet_id(), related_olap_table->schema_hash(), load_id); - - // if related tablet's transaction exists, only push current tablet - if (res == OLAP_ERR_PUSH_TRANSACTION_ALREADY_EXIST) { - OLAP_LOG_WARNING("related tablet's transaction exists in engine, " - "only push current tablet. " - "[related_table=%s transaction_id=%ld]", - related_olap_table->full_name().c_str(), - request.transaction_id); - } else { - table_infoes.push_back(TableVars()); - TableVars& new_item = table_infoes.back(); - new_item.olap_table = related_olap_table; - } - } - } - } - olap_table->release_push_lock(); - - if (table_infoes.size() == 1) { - table_infoes.resize(2); - } - - // check delete condition if push for delete - if (push_type == PUSH_FOR_DELETE) { - - for (TableVars& table_var : table_infoes) { - if (table_var.olap_table.get() == NULL) { - continue; - } - - if (request.delete_conditions.size() == 0) { - OLAP_LOG_WARNING("invalid parameters for store_cond. [condition_size=0]"); - res = OLAP_ERR_DELETE_INVALID_PARAMETERS; - goto EXIT; - } - - DeleteConditionHandler del_cond_handler; - table_var.olap_table->obtain_header_rdlock(); - for (const TCondition& cond : request.delete_conditions) { - res = del_cond_handler.check_condition_valid(table_var.olap_table, cond); - if (res != OLAP_SUCCESS) { - OLAP_LOG_WARNING("fail to check delete condition. [table=%s res=%d]", - table_var.olap_table->full_name().c_str(), res); - table_var.olap_table->release_header_lock(); - goto EXIT; - } - } - table_var.olap_table->release_header_lock(); - LOG(INFO) << "success to check delete condition when realtime push. " - << "tablet=" << table_var.olap_table->full_name() - << ", transaction_id=" << request.transaction_id; - } - } - - // write - res = _convert(table_infoes[0].olap_table, table_infoes[1].olap_table, - &(table_infoes[0].added_indices), &(table_infoes[1].added_indices), - alter_table_type); - if (res != OLAP_SUCCESS) { - OLAP_LOG_WARNING("fail to convert tmp file when realtime push. [res=%d]", res); - goto EXIT; - } - - // add pending data to tablet - for (TableVars& table_var : table_infoes) { - if (table_var.olap_table.get() == NULL) { - continue; - } - - for (SegmentGroup* segment_group : table_var.added_indices) { - - res = table_var.olap_table->add_pending_data( - segment_group, push_type == PUSH_FOR_DELETE ? &request.delete_conditions : NULL); - - // if pending data exists in tablet, which means push finished - if (res == OLAP_ERR_PUSH_TRANSACTION_ALREADY_EXIST) { - SAFE_DELETE(segment_group); - res = OLAP_SUCCESS; - - } else if (res != OLAP_SUCCESS) { - OLAP_LOG_WARNING("fail to add pending data to tablet. [table=%s transaction_id=%ld]", - table_var.olap_table->full_name().c_str(), request.transaction_id); - goto EXIT; - } - } - } - -EXIT: - // if transaction existed in engine but push not finished, not report to fe - if (res == OLAP_ERR_PUSH_TRANSACTION_ALREADY_EXIST) { - OLAP_LOG_WARNING("find transaction existed when realtime push, not report. ", - "[table=%s partition_id=%ld transaction_id=%ld]", - olap_table->full_name().c_str(), - request.partition_id, request.transaction_id); + TabletSharedPtr related_tablet = + StorageEngine::instance()->tablet_manager()->get_tablet( + related_tablet_id, related_schema_hash); + + // if related tablet not exists, only push current tablet + if (related_tablet == nullptr) { + LOG(WARNING) << "find alter task but not find related tablet, " + << "related_tablet_id=" << related_tablet_id + << ", related_schema_hash=" << related_schema_hash; + tablet->release_push_lock(); + return OLAP_ERR_TABLE_NOT_FOUND; + // if current tablet is new tablet, only push current tablet + } else if (tablet->creation_time() > related_tablet->creation_time()) { + LOG(INFO) << "current tablet is new, only push current tablet. " + << "tablet=" << tablet->full_name() + << " related_tablet=" << related_tablet->full_name(); + } else { + ReadLock new_migration_rlock(related_tablet->get_migration_lock_ptr(), TRY_LOCK); + if (!new_migration_rlock.own_lock()) { + return OLAP_ERR_RWLOCK_ERROR; + } + PUniqueId load_id; + load_id.set_hi(0); + load_id.set_lo(0); + res = StorageEngine::instance()->txn_manager()->prepare_txn( + request.partition_id, request.transaction_id, + related_tablet->tablet_id(), related_tablet->schema_hash(), + related_tablet->tablet_uid(), load_id); + // prepare txn will always be successful + tablet_vars->push_back(TabletVars()); + TabletVars& new_item = tablet_vars->back(); + new_item.tablet = related_tablet; + } + } + } + tablet->release_push_lock(); + + if (tablet_vars->size() == 1) { + tablet_vars->resize(2); + } + + // not call validate request here, because realtime load does not + // contain version info + + // check delete condition if push for delete + std::queue del_preds; + if (push_type == PUSH_FOR_DELETE) { + for (TabletVars& tablet_var : *tablet_vars) { + if (tablet_var.tablet == nullptr) { + continue; + } + + DeletePredicatePB del_pred; + DeleteConditionHandler del_cond_handler; + tablet_var.tablet->obtain_header_rdlock(); + res = del_cond_handler.generate_delete_predicate( + tablet_var.tablet->tablet_schema(), request.delete_conditions, + &del_pred); + del_preds.push(del_pred); + tablet_var.tablet->release_header_lock(); + if (res != OLAP_SUCCESS) { + LOG(WARNING) << "fail to generate delete condition. res=" << res + << ", tablet=" << tablet_var.tablet->full_name(); return res; + } + } + } + + // write + res = _convert(tablet_vars->at(0).tablet, tablet_vars->at(1).tablet, + &(tablet_vars->at(0).rowset_to_add), + &(tablet_vars->at(1).rowset_to_add)); + if (res != OLAP_SUCCESS) { + LOG(WARNING) << "fail to convert tmp file when realtime push. res=" << res + << ", failed to process realtime push." + << ", table=" << tablet->full_name() + << ", transaction_id=" << request.transaction_id; + for (TabletVars& tablet_var : *tablet_vars) { + if (tablet_var.tablet == nullptr) { + continue; + } + + OLAPStatus rollback_status = + StorageEngine::instance()->txn_manager()->rollback_txn( + request.partition_id, request.transaction_id, + tablet_var.tablet->tablet_id(), tablet_var.tablet->schema_hash(), + tablet_var.tablet->tablet_uid()); + // has to check rollback status to ensure not delete a committed rowset + if (rollback_status == OLAP_SUCCESS) { + // actually, olap_index may has been deleted in delete_transaction() + StorageEngine::instance()->add_unused_rowset(tablet_var.rowset_to_add); + } } + return res; + } - if (res == OLAP_SUCCESS) { - if (tablet_info_vec != NULL) { - _get_tablet_infos(table_infoes, tablet_info_vec); - } - LOG(INFO) << "process realtime push successfully. " - << "tablet=" << olap_table->full_name() - << ", partition_id=" << request.partition_id - << ", transaction_id=" << request.transaction_id; - } else { - - // error happens, clear - OLAP_LOG_WARNING("failed to process realtime push. [table=%s transaction_id=%ld]", - olap_table->full_name().c_str(), request.transaction_id); - for (TableVars& table_var : table_infoes) { - if (table_var.olap_table.get() == NULL) { - continue; - } - - OLAPEngine::get_instance()->delete_transaction( - request.partition_id, request.transaction_id, - table_var.olap_table->tablet_id(), table_var.olap_table->schema_hash()); - - // actually, olap_index may has been deleted in delete_transaction() - for (SegmentGroup* segment_group : table_var.added_indices) { - segment_group->release(); - OLAPEngine::get_instance()->add_unused_index(segment_group); - } - } + // add pending data to tablet + for (TabletVars& tablet_var : *tablet_vars) { + if (tablet_var.tablet == nullptr) { + continue; } - return res; + if (push_type == PUSH_FOR_DELETE) { + tablet_var.rowset_to_add->rowset_meta()->set_delete_predicate( + del_preds.front()); + del_preds.pop(); + } + OLAPStatus commit_status = + StorageEngine::instance()->txn_manager()->commit_txn( + tablet_var.tablet->data_dir()->get_meta(), request.partition_id, + request.transaction_id, tablet_var.tablet->tablet_id(), + tablet_var.tablet->schema_hash(), tablet_var.tablet->tablet_uid(), + load_id, tablet_var.rowset_to_add, + false); + if (commit_status != OLAP_SUCCESS && + commit_status != OLAP_ERR_PUSH_TRANSACTION_ALREADY_EXIST) { + res = commit_status; + } + } + return res; } -void PushHandler::_get_tablet_infos( - const vector& table_infoes, - vector* tablet_info_vec) { - for (const TableVars& table_var : table_infoes) { - if (table_var.olap_table.get() == NULL) { - continue; - } - - TTabletInfo tablet_info; - tablet_info.tablet_id = table_var.olap_table->tablet_id(); - tablet_info.schema_hash = table_var.olap_table->schema_hash(); - OLAPEngine::get_instance()->report_tablet_info(&tablet_info); - tablet_info_vec->push_back(tablet_info); +void PushHandler::_get_tablet_infos(const vector& tablet_vars, + vector* tablet_info_vec) { + for (const TabletVars& tablet_var : tablet_vars) { + if (tablet_var.tablet.get() == NULL) { + continue; } + + TTabletInfo tablet_info; + tablet_info.tablet_id = tablet_var.tablet->tablet_id(); + tablet_info.schema_hash = tablet_var.tablet->schema_hash(); + StorageEngine::instance()->tablet_manager()->report_tablet_info( + &tablet_info); + tablet_info_vec->push_back(tablet_info); + } } -OLAPStatus PushHandler::_convert( - OLAPTablePtr curr_olap_table, - OLAPTablePtr new_olap_table, - Indices* curr_olap_indices, - Indices* new_olap_indices, - AlterTabletType alter_table_type) { +OLAPStatus PushHandler::_convert(TabletSharedPtr cur_tablet, + TabletSharedPtr new_tablet, + RowsetSharedPtr* cur_rowset, + RowsetSharedPtr* new_rowset) { OLAPStatus res = OLAP_SUCCESS; RowCursor row; BinaryFile raw_file; IBinaryReader* reader = NULL; - ColumnDataWriter* writer = NULL; - SegmentGroup* delta_segment_group = NULL; - uint32_t num_rows = 0; + RowsetWriterSharedPtr rowset_writer(new AlphaRowsetWriter()); + if (rowset_writer == nullptr) { + LOG(WARNING) << "new rowset writer failed."; + return OLAP_ERR_MALLOC_ERROR; + } + RowsetWriterContext context; + uint32_t num_rows = 0; + RowsetId rowset_id = 0; + res = cur_tablet->next_rowset_id(&rowset_id); + if (res != OLAP_SUCCESS) { + LOG(WARNING) << "generate rowset id failed, res:" << res; + return OLAP_ERR_ROWSET_GENERATE_ID_FAILED; + } + PUniqueId load_id; + load_id.set_hi(0); + load_id.set_lo(0); do { VLOG(3) << "start to convert delta file."; - std::vector tablet_schema = curr_olap_table->tablet_schema(); - - //curr_olap_table->set_tablet_schema(); - tablet_schema = curr_olap_table->tablet_schema(); // 1. Init BinaryReader to read raw file if exist, // in case of empty push and delete data, this will be skipped. if (_request.__isset.http_file_path) { // open raw file - if (OLAP_SUCCESS != (res = raw_file.init(_request.http_file_path.c_str()))) { - OLAP_LOG_WARNING("failed to read raw file. [res=%d file='%s']", - res, _request.http_file_path.c_str()); + if (OLAP_SUCCESS != + (res = raw_file.init(_request.http_file_path.c_str()))) { + LOG(WARNING) << "failed to read raw file. res=" << res + << ", file=" << _request.http_file_path; res = OLAP_ERR_INPUT_PARAMETER_ERROR; break; } @@ -571,387 +309,133 @@ OLAPStatus PushHandler::_convert( } #endif - if (NULL == (reader = IBinaryReader::create(need_decompress))) { - OLAP_LOG_WARNING("fail to create reader. [table='%s' file='%s']", - curr_olap_table->full_name().c_str(), - _request.http_file_path.c_str()); + reader = IBinaryReader::create(need_decompress); + if (reader == nullptr) { + LOG(WARNING) << "fail to create reader. tablet=" << cur_tablet->full_name() + << ", file=" << _request.http_file_path; res = OLAP_ERR_MALLOC_ERROR; break; } // init BinaryReader - if (OLAP_SUCCESS != (res = reader->init(curr_olap_table, &raw_file))) { - OLAP_LOG_WARNING("fail to init reader. [res=%d table='%s' file='%s']", - res, - curr_olap_table->full_name().c_str(), - _request.http_file_path.c_str()); + if (OLAP_SUCCESS != (res = reader->init(cur_tablet, &raw_file))) { + LOG(WARNING) << "fail to init reader. res=" << res + << ", tablet=" << cur_tablet->full_name() + << ", file=" << _request.http_file_path; res = OLAP_ERR_PUSH_INIT_ERROR; break; } } - // 2. New SegmentGroup of curr_olap_table for current push - VLOG(3) << "init SegmentGroup."; - - if (_request.__isset.transaction_id) { - // create pending data dir - string dir_path = curr_olap_table->construct_pending_data_dir_path(); - if (!check_dir_existed(dir_path) && (res = create_dirs(dir_path)) != OLAP_SUCCESS) { - if (!check_dir_existed(dir_path)) { - OLAP_LOG_WARNING("fail to create pending dir. [res=%d table=%s]", - res, curr_olap_table->full_name().c_str()); - break; - } - } - - delta_segment_group = new(std::nothrow) SegmentGroup( - curr_olap_table.get(), (_request.push_type == TPushType::LOAD_DELETE), - 0, 0, true, _request.partition_id, _request.transaction_id); - } else { - delta_segment_group = new(std::nothrow) SegmentGroup( - curr_olap_table.get(), - Version(_request.version, _request.version), - _request.version_hash, - (_request.push_type == TPushType::LOAD_DELETE), - 0, 0); - } - - if (NULL == delta_segment_group) { - OLAP_LOG_WARNING("fail to malloc SegmentGroup. [table='%s' size=%ld]", - curr_olap_table->full_name().c_str(), sizeof(SegmentGroup)); - res = OLAP_ERR_MALLOC_ERROR; - break; - } - curr_olap_indices->push_back(delta_segment_group); - - // 3. New Writer to write data into SegmentGroup - VLOG(3) << "init writer. tablet=" << curr_olap_table->full_name() - << ", block_row_size=" << curr_olap_table->num_rows_per_row_block(); - - if (NULL == (writer = ColumnDataWriter::create(curr_olap_table, delta_segment_group, true))) { - OLAP_LOG_WARNING("fail to create writer. [table='%s']", - curr_olap_table->full_name().c_str()); - res = OLAP_ERR_MALLOC_ERROR; - break; - } + // 2. init RowsetBuilder of cur_tablet for current push + VLOG(3) << "init RowsetBuilder."; + RowsetWriterContext context; + context.rowset_id = rowset_id; + context.tablet_uid = cur_tablet->tablet_uid(); + context.tablet_id = cur_tablet->tablet_id(); + context.partition_id = _request.partition_id; + context.tablet_schema_hash = cur_tablet->schema_hash(); + context.rowset_type = ALPHA_ROWSET; + context.rowset_path_prefix = cur_tablet->tablet_path(); + context.tablet_schema = &(cur_tablet->tablet_schema()); + context.rowset_state = PREPARED; + context.data_dir = cur_tablet->data_dir(); + context.txn_id = _request.transaction_id; + context.load_id = load_id; + rowset_writer->init(context); + + // 3. New RowsetBuilder to write data into rowset + VLOG(3) << "init rowset builder. tablet=" << cur_tablet->full_name() + << ", block_row_size=" << cur_tablet->num_rows_per_row_block(); // 4. Init RowCursor - if (OLAP_SUCCESS != (res = row.init(curr_olap_table->tablet_schema()))) { - OLAP_LOG_WARNING("fail to init rowcursor. [res=%d]", res); + if (OLAP_SUCCESS != (res = row.init(cur_tablet->tablet_schema()))) { + LOG(WARNING) << "fail to init rowcursor. res=" << res; break; } - // 5. Read data from raw file and write into SegmentGroup of curr_olap_table + // 5. Read data from raw file and write into SegmentGroup of cur_tablet if (_request.__isset.http_file_path) { // Convert from raw to delta VLOG(3) << "start to convert row file to delta."; while (!reader->eof()) { - if (OLAP_SUCCESS != (res = writer->attached_by(&row))) { - OLAP_LOG_WARNING( - "fail to attach row to writer. [res=%d table='%s' read_rows=%u]", - res, curr_olap_table->full_name().c_str(), num_rows); - break; - } - - res = reader->next(&row, writer->mem_pool()); + res = reader->next(&row, rowset_writer->mem_pool()); if (OLAP_SUCCESS != res) { - OLAP_LOG_WARNING("read next row failed. [res=%d read_rows=%u]", - res, num_rows); + LOG(WARNING) << "read next row failed." + << " res=" << res << " read_rows=" << num_rows; break; } else { - writer->next(row); + if (OLAP_SUCCESS != (res = rowset_writer->add_row(&row))) { + LOG(WARNING) << "fail to attach row to rowset_writer. " + << " res=" << res + << ", tablet=" << cur_tablet->full_name() + << " read_rows=" << num_rows; + break; + } num_rows++; } } reader->finalize(); - if (false == reader->validate_checksum()) { - OLAP_LOG_WARNING("pushed delta file has wrong checksum."); + if (!reader->validate_checksum()) { + LOG(WARNING) << "pushed delta file has wrong checksum."; res = OLAP_ERR_PUSH_BUILD_DELTA_ERROR; break; } } - if (res != OLAP_SUCCESS) { - LOG(WARNING) << "ingest data failed. res" << res; + if (rowset_writer->flush() != OLAP_SUCCESS) { + LOG(WARNING) << "failed to finalize writer."; break; } + *cur_rowset = rowset_writer->build(); - if (OLAP_SUCCESS != (res = writer->finalize())) { - OLAP_LOG_WARNING("fail to finalize writer. [res=%d]", res); + if (*cur_rowset == nullptr) { + LOG(WARNING) << "fail to build rowset"; + res = OLAP_ERR_MALLOC_ERROR; break; } - VLOG(3) << "load the index."; - if (OLAP_SUCCESS != (res = delta_segment_group->load())) { - OLAP_LOG_WARNING("fail to load index. [res=%d table='%s' version=%ld]", - res, curr_olap_table->full_name().c_str(), _request.version); - break; - } - _write_bytes += delta_segment_group->data_size(); - _write_rows += delta_segment_group->num_rows(); + _write_bytes += (*cur_rowset)->data_disk_size(); + _write_rows += (*cur_rowset)->num_rows(); // 7. Convert data for schema change tables VLOG(10) << "load to related tables of schema_change if possible."; - if (NULL != new_olap_table.get()) { - // create related tablet's pending data dir - string dir_path = new_olap_table->construct_pending_data_dir_path(); - if (!check_dir_existed(dir_path) && (res = create_dirs(dir_path)) != OLAP_SUCCESS) { - if (!check_dir_existed(dir_path)) { - OLAP_LOG_WARNING("fail to create pending dir. [res=%d table=%s]", - res, new_olap_table->full_name().c_str()); - break; - } - } - + if (new_tablet != nullptr) { SchemaChangeHandler schema_change; - res = schema_change.schema_version_convert( - curr_olap_table, - new_olap_table, - curr_olap_indices, - new_olap_indices); + res = schema_change.schema_version_convert(cur_tablet, new_tablet, + cur_rowset, new_rowset); if (res != OLAP_SUCCESS) { - OLAP_LOG_WARNING("failed to change schema version for delta." - "[res=%d new_table='%s']", - res, new_olap_table->full_name().c_str()); + LOG(WARNING) << "failed to change schema version for delta." + << "[res=" << res << " new_tablet='" + << new_tablet->full_name() << "']"; } - } } while (0); SAFE_DELETE(reader); - SAFE_DELETE(writer); - OLAP_LOG_NOTICE_PUSH("processed_rows", "%d", num_rows); VLOG(10) << "convert delta file end. res=" << res - << ", tablet=" << curr_olap_table->full_name(); - return res; -} - -OLAPStatus PushHandler::_validate_request( - OLAPTablePtr olap_table_for_raw, - OLAPTablePtr olap_table_for_schema_change, - bool is_new_tablet_effective, - PushType push_type) { - const PDelta* latest_delta = olap_table_for_raw->lastest_delta(); - - if (NULL == latest_delta) { - const PDelta* lastest_version = olap_table_for_raw->lastest_version(); - - // PUSH the first version when the version is 0, or - // tablet is in alter table status. - if (NULL == lastest_version - && (0 == _request.version || NULL != olap_table_for_schema_change.get())) { - return OLAP_SUCCESS; - } else if (NULL != lastest_version - && (lastest_version->end_version() + 1 == _request.version)) { - return OLAP_SUCCESS; - } - - OLAP_LOG_WARNING("no last pushed delta, the comming version should be 0. [table='%s']", - olap_table_for_raw->full_name().c_str()); - return OLAP_ERR_PUSH_VERSION_INCORRECT; - } - - if (is_new_tablet_effective) { - LOG(INFO) << "maybe a alter tablet has already created from base tablet. " - << "tablet=" << olap_table_for_raw->full_name() - << ", version=" << _request.version; - if (push_type == PUSH_FOR_DELETE - && _request.version == latest_delta->start_version() - && _request.version_hash == latest_delta->version_hash()) { - LOG(INFO) << "base tablet has already convert delete version for new tablet. " - << "version=" << _request.version << ", version_hash=" << _request.version_hash; - return OLAP_ERR_PUSH_VERSION_ALREADY_EXIST; - } - } else { - // Never allow two push has same version and version hash, - // but same verson and different version hash is allowed. - if (_request.version < latest_delta->start_version() - || _request.version > latest_delta->start_version() + 1) { - OLAP_LOG_WARNING( - "try to push a delta with incorrect version. " - "[new_version=%ld lastest_version=%u " - "new_version_hash=%ld lastest_version_hash=%lu]", - _request.version, latest_delta->start_version(), - _request.version_hash, latest_delta->version_hash()); - return OLAP_ERR_PUSH_VERSION_INCORRECT; - } else if (_request.version == latest_delta->start_version() - && _request.version_hash == latest_delta->version_hash()) { - OLAP_LOG_WARNING( - "try to push a already exist delta. " - "[new_version=%ld lastest_version=%u " - "new_version_hash=%ld lastest_version_hash=%lu]", - _request.version, latest_delta->start_version(), - _request.version_hash, latest_delta->version_hash()); - return OLAP_ERR_PUSH_VERSION_ALREADY_EXIST; - } - } - - return OLAP_SUCCESS; -} - - -// The latest version can be reverted for following scene: -// user submit a push job and cancel it soon, but some -// tablets already push success. -OLAPStatus PushHandler::_get_versions_reverted( - OLAPTablePtr olap_table, - bool is_new_tablet, - PushType push_type, - Versions* unused_versions) { - const PDelta* latest_delta = olap_table->lastest_delta(); - - if (NULL == latest_delta) { - const PDelta* lastest_version = olap_table->lastest_version(); - - // PUSH the first version, and the version is 0 - if ((NULL == lastest_version - && (0 == _request.version || is_new_tablet))) { - return OLAP_SUCCESS; - } else if (NULL != lastest_version - && lastest_version->end_version() + 1 == _request.version) { - return OLAP_SUCCESS; - } - - OLAP_LOG_WARNING("no last pushed delta, the comming version should be 0. [table='%s']", - olap_table->full_name().c_str()); - return OLAP_ERR_PUSH_VERSION_INCORRECT; - } - - VLOG(3) << "latest deltas was founded. tablet=" << olap_table->full_name() - << ", version=" << latest_delta->start_version() << "-" << latest_delta->end_version(); - // Remove the cumulative delta that end_version == request.version() - if (_request.version == latest_delta->start_version()) { - Versions all_versions; - olap_table->list_versions(&all_versions); - - for (Versions::const_iterator v = all_versions.begin(); v != all_versions.end(); ++v) { - if (v->second == _request.version) { - unused_versions->push_back(*v); - VLOG(3) << "Add unused version. tablet=" << olap_table->full_name() - << "version=" << v->first << "-" << v->second; - } - } - - // Remove delete condition if current type is PUSH_FOR_DELETE, - // this occurs when user cancel delete_data soon after submit it. - if (push_type != PUSH_FOR_DELETE) { - DeleteConditionHandler del_cond_handler; - del_cond_handler.delete_cond(olap_table, _request.version, false); - } - } - - return OLAP_SUCCESS; -} - -OLAPStatus PushHandler::_update_header( - OLAPTablePtr olap_table, - Versions* unused_versions, - Indices* new_indices, - Indices* unused_indices) { - OLAPStatus res = OLAP_SUCCESS; - - res = olap_table->replace_data_sources( - unused_versions, - new_indices, - unused_indices); - if (res != OLAP_SUCCESS) { - LOG(FATAL) << "fail to replace data sources. res=" << res - << ", tablet=" << olap_table->full_name(); - return res; - } - - // Avoid double update - new_indices->clear(); - unused_versions->clear(); - - // Save header fail will not impact service for memory state - // has already changed, but some data may lost when OLAPEngine restart; - // Note we don't return fail here. - res = olap_table->save_header(); - if (res != OLAP_SUCCESS) { - LOG(FATAL) << "fail to save header. res=" << res - << ", tablet=" << olap_table->full_name(); - } - - return res; -} - -void PushHandler::_delete_old_indices(Indices* unused_indices) { - if (!unused_indices->empty()) { - OLAPEngine* unused_index = OLAPEngine::get_instance(); - - for (Indices::iterator it = unused_indices->begin(); - it != unused_indices->end(); ++it) { - unused_index->add_unused_index(*it); - } - } -} - -OLAPStatus PushHandler::_clear_alter_table_info( - OLAPTablePtr tablet, - OLAPTablePtr related_tablet) { - OLAPStatus res = OLAP_SUCCESS; - _obtain_header_wrlock(); - - do { - res = SchemaChangeHandler::clear_schema_change_single_info( - tablet, NULL, false, false); - if (res != OLAP_SUCCESS) { - LOG(FATAL) << "fail to clear schema change info of new table. res=" << res - << ", tablet=" << tablet->full_name(); - break; - } - - res = tablet->save_header(); - if (res != OLAP_SUCCESS) { - LOG(FATAL) << "fail to save header. res=" << res - << ", table=" << tablet->full_name(); - break; - } - - TTabletId tablet_id; - TSchemaHash schema_hash; - bool is_sc = related_tablet->get_schema_change_request( - &tablet_id, &schema_hash, NULL, NULL); - if (is_sc && tablet_id == tablet->tablet_id() && schema_hash == tablet->schema_hash()) { - res = SchemaChangeHandler::clear_schema_change_single_info( - related_tablet, NULL, false, false); - if (res != OLAP_SUCCESS) { - LOG(FATAL) << "fail to clear schema change info of old table. res=" << res - << ", tablet=" << related_tablet->full_name(); - break; - } - - res = related_tablet->save_header(); - if (res != OLAP_SUCCESS) { - LOG(FATAL) << "fail to save header. res=" << res - << "table=" << related_tablet->full_name(); - break; - } - } - } while (0); - - _release_header_lock(); + << ", tablet=" << cur_tablet->full_name() + << ", processed_rows" << num_rows; return res; } OLAPStatus BinaryFile::init(const char* path) { // open file if (OLAP_SUCCESS != open(path, "rb")) { - OLAP_LOG_WARNING("fail to open file. [file='%s']", path); + LOG(WARNING) << "fail to open file. file=" << path; return OLAP_ERR_IO_ERROR; } // load header if (OLAP_SUCCESS != _header.unserialize(this)) { - OLAP_LOG_WARNING("fail to read file header. [file='%s']", path); + LOG(WARNING) << "fail to read file header. file=" << path; close(); return OLAP_ERR_PUSH_INIT_ERROR; } - return OLAP_SUCCESS; + return OLAP_SUCCESS; } IBinaryReader* IBinaryReader::create(bool need_decompress) { @@ -967,127 +451,125 @@ IBinaryReader* IBinaryReader::create(bool need_decompress) { } BinaryReader::BinaryReader() - : IBinaryReader(), - _row_buf(NULL), - _row_buf_size(0) { -} + : IBinaryReader(), _row_buf(NULL), _row_buf_size(0) {} -OLAPStatus BinaryReader::init( - OLAPTablePtr table, - BinaryFile* file) { - OLAPStatus res = OLAP_SUCCESS; +OLAPStatus BinaryReader::init(TabletSharedPtr tablet, BinaryFile* file) { + OLAPStatus res = OLAP_SUCCESS; - do { - _file = file; - _content_len = _file->file_length() - _file->header_size(); - _row_buf_size = table->get_row_size(); + do { + _file = file; + _content_len = _file->file_length() - _file->header_size(); + _row_buf_size = tablet->row_size(); - if (NULL == (_row_buf = new(std::nothrow) char[_row_buf_size])) { - OLAP_LOG_WARNING("fail to malloc one row buf. [size=%zu]", _row_buf_size); - res = OLAP_ERR_MALLOC_ERROR; - break; - } + _row_buf = new (std::nothrow) char[_row_buf_size]; + if (_row_buf == nullptr) { + LOG(WARNING) << "fail to malloc one row buf. size=" << _row_buf_size; + res = OLAP_ERR_MALLOC_ERROR; + break; + } - if (-1 == _file->seek(_file->header_size(), SEEK_SET)) { - OLAP_LOG_WARNING("skip header, seek fail."); - res = OLAP_ERR_IO_ERROR; - break; - } + if (-1 == _file->seek(_file->header_size(), SEEK_SET)) { + LOG(WARNING) << "skip header, seek fail."; + res = OLAP_ERR_IO_ERROR; + break; + } - _table = table; - _ready = true; - } while (0); + _tablet = tablet; + _ready = true; + } while (0); - if (res != OLAP_SUCCESS) { - SAFE_DELETE_ARRAY(_row_buf); - } - return res; + if (res != OLAP_SUCCESS) { + SAFE_DELETE_ARRAY(_row_buf); + } + return res; } OLAPStatus BinaryReader::finalize() { - _ready = false; - SAFE_DELETE_ARRAY(_row_buf); - return OLAP_SUCCESS; + _ready = false; + SAFE_DELETE_ARRAY(_row_buf); + return OLAP_SUCCESS; } OLAPStatus BinaryReader::next(RowCursor* row, MemPool* mem_pool) { - OLAPStatus res = OLAP_SUCCESS; + OLAPStatus res = OLAP_SUCCESS; - if (!_ready || NULL == row) { - // Here i assume _ready means all states were set up correctly - return OLAP_ERR_INPUT_PARAMETER_ERROR; - } + if (!_ready || NULL == row) { + // Here i assume _ready means all states were set up correctly + return OLAP_ERR_INPUT_PARAMETER_ERROR; + } - const vector& schema = _table->tablet_schema(); - size_t offset = 0; - size_t field_size = 0; - size_t num_null_bytes = (_table->num_null_fields() + 7) / 8; + const TabletSchema& schema = _tablet->tablet_schema(); + size_t offset = 0; + size_t field_size = 0; + size_t num_null_bytes = (_tablet->num_null_columns() + 7) / 8; - if (OLAP_SUCCESS != (res = _file->read(_row_buf + offset, num_null_bytes))) { - OLAP_LOG_WARNING("read file for one row fail. [res=%d]", res); + if (OLAP_SUCCESS != (res = _file->read(_row_buf + offset, num_null_bytes))) { + LOG(WARNING) << "read file for one row fail. res=" << res; + return res; + } + + size_t p = 0; + for (size_t i = 0; i < schema.num_columns(); ++i) { + row->set_not_null(i); + if (schema.column(i).is_nullable()) { + bool is_null = false; + is_null = (_row_buf[p / 8] >> ((num_null_bytes * 8 - p - 1) % 8)) & 1; + if (is_null) { + row->set_null(i); + } + p++; + } + } + offset += num_null_bytes; + + for (uint32_t i = 0; i < schema.num_columns(); i++) { + const TabletColumn& column = schema.column(i); + if (row->is_null(i)) { + continue; + } + if (column.type() == OLAP_FIELD_TYPE_VARCHAR || + column.type() == OLAP_FIELD_TYPE_HLL) { + // Read varchar length buffer first + if (OLAP_SUCCESS != + (res = _file->read(_row_buf + offset, sizeof(StringLengthType)))) { + LOG(WARNING) << "read file for one row fail. res=" << res; return res; + } + + // Get varchar field size + field_size = *reinterpret_cast(_row_buf + offset); + offset += sizeof(StringLengthType); + if (field_size > column.length() - sizeof(StringLengthType)) { + LOG(WARNING) << "invalid data length for VARCHAR! " + << "max_len=" << column.length() - sizeof(StringLengthType) + << ", real_len=" << field_size; + return OLAP_ERR_PUSH_INPUT_DATA_ERROR; + } + } else { + field_size = column.length(); } - size_t p = 0; - for (size_t i = 0; i < schema.size(); ++i) { - row->set_not_null(i); - if (schema[i].is_allow_null) { - bool is_null = false; - is_null = (_row_buf[p/8] >> ((num_null_bytes * 8 - p - 1) % 8)) & 1; - if (is_null) { - row->set_null(i); - } - p++; - } + // Read field content according to field size + if (OLAP_SUCCESS != (res = _file->read(_row_buf + offset, field_size))) { + LOG(WARNING) << "read file for one row fail. res=" << res; + return res; } - offset += num_null_bytes; - - for (uint32_t i = 0; i < schema.size(); i++) { - if (row->is_null(i)) { - continue; - } - if (schema[i].type == OLAP_FIELD_TYPE_VARCHAR || schema[i].type == OLAP_FIELD_TYPE_HLL) { - // Read varchar length buffer first - if (OLAP_SUCCESS != (res = _file->read(_row_buf + offset, - sizeof(StringLengthType)))) { - OLAP_LOG_WARNING("read file for one row fail. [res=%d]", res); - return res; - } - - // Get varchar field size - field_size = *reinterpret_cast(_row_buf + offset); - offset += sizeof(StringLengthType); - if (field_size > schema[i].length - sizeof(StringLengthType)) { - OLAP_LOG_WARNING("invalid data length for VARCHAR! [max_len=%d real_len=%d]", - schema[i].length - sizeof(StringLengthType), - field_size); - return OLAP_ERR_PUSH_INPUT_DATA_ERROR; - } - } else { - field_size = schema[i].length; - } - - // Read field content according to field size - if (OLAP_SUCCESS != (res = _file->read(_row_buf + offset, field_size))) { - OLAP_LOG_WARNING("read file for one row fail. [res=%d]", res); - return res; - } - if (schema[i].type == OLAP_FIELD_TYPE_CHAR - || schema[i].type == OLAP_FIELD_TYPE_VARCHAR - || schema[i].type == OLAP_FIELD_TYPE_HLL) { - Slice slice(_row_buf + offset, field_size); - row->set_field_content(i, reinterpret_cast(&slice), mem_pool); - } else { - row->set_field_content(i, _row_buf + offset, mem_pool); - } - offset += field_size; + if (column.type() == OLAP_FIELD_TYPE_CHAR || + column.type() == OLAP_FIELD_TYPE_VARCHAR || + column.type() == OLAP_FIELD_TYPE_HLL) { + Slice slice(_row_buf + offset, field_size); + row->set_field_content(i, reinterpret_cast(&slice), mem_pool); + } else { + row->set_field_content(i, _row_buf + offset, mem_pool); } - _curr += offset; + offset += field_size; + } + _curr += offset; - // Calculate checksum for validate when push finished. - _adler_checksum = olap_adler32(_adler_checksum, _row_buf, offset); - return res; + // Calculate checksum for validate when push finished. + _adler_checksum = olap_adler32(_adler_checksum, _row_buf, offset); + return res; } LzoBinaryReader::LzoBinaryReader() @@ -1099,197 +581,204 @@ LzoBinaryReader::LzoBinaryReader() _max_row_buf_size(0), _max_compressed_buf_size(0), _row_num(0), - _next_row_start(0) { -} + _next_row_start(0) {} -OLAPStatus LzoBinaryReader::init( - OLAPTablePtr table, - BinaryFile* file) { - OLAPStatus res = OLAP_SUCCESS; +OLAPStatus LzoBinaryReader::init(TabletSharedPtr tablet, BinaryFile* file) { + OLAPStatus res = OLAP_SUCCESS; - do { - _file = file; - _content_len = _file->file_length() - _file->header_size(); + do { + _file = file; + _content_len = _file->file_length() - _file->header_size(); - size_t row_info_buf_size = sizeof(RowNumType) + sizeof(CompressedSizeType); - if (NULL == (_row_info_buf = new(std::nothrow) char[row_info_buf_size])) { - OLAP_LOG_WARNING("fail to malloc rows info buf. [size=%zu]", row_info_buf_size); - res = OLAP_ERR_MALLOC_ERROR; - break; - } + size_t row_info_buf_size = sizeof(RowNumType) + sizeof(CompressedSizeType); + _row_info_buf = new (std::nothrow) char[row_info_buf_size]; + if (_row_info_buf == nullptr) { + LOG(WARNING) << "fail to malloc rows info buf. size=" << row_info_buf_size; + res = OLAP_ERR_MALLOC_ERROR; + break; + } - if (-1 == _file->seek(_file->header_size(), SEEK_SET)) { - OLAP_LOG_WARNING("skip header, seek fail."); - res = OLAP_ERR_IO_ERROR; - break; - } + if (-1 == _file->seek(_file->header_size(), SEEK_SET)) { + LOG(WARNING) << "skip header, seek fail."; + res = OLAP_ERR_IO_ERROR; + break; + } - _table = table; - _ready = true; - } while (0); + _tablet = tablet; + _ready = true; + } while (0); - if (res != OLAP_SUCCESS) { - SAFE_DELETE_ARRAY(_row_info_buf); - } - return res; + if (res != OLAP_SUCCESS) { + SAFE_DELETE_ARRAY(_row_info_buf); + } + return res; } OLAPStatus LzoBinaryReader::finalize() { - _ready = false; - SAFE_DELETE_ARRAY(_row_buf); - SAFE_DELETE_ARRAY(_row_compressed_buf); - SAFE_DELETE_ARRAY(_row_info_buf); - return OLAP_SUCCESS; + _ready = false; + SAFE_DELETE_ARRAY(_row_buf); + SAFE_DELETE_ARRAY(_row_compressed_buf); + SAFE_DELETE_ARRAY(_row_info_buf); + return OLAP_SUCCESS; } OLAPStatus LzoBinaryReader::next(RowCursor* row, MemPool* mem_pool) { - OLAPStatus res = OLAP_SUCCESS; - - if (!_ready || NULL == row) { - // Here i assume _ready means all states were set up correctly - return OLAP_ERR_INPUT_PARAMETER_ERROR; - } - - if (_row_num == 0) { - // read next block - if (OLAP_SUCCESS != (res = _next_block())) { - return res; - } + OLAPStatus res = OLAP_SUCCESS; + + if (!_ready || NULL == row) { + // Here i assume _ready means all states were set up correctly + return OLAP_ERR_INPUT_PARAMETER_ERROR; + } + + if (_row_num == 0) { + // read next block + if (OLAP_SUCCESS != (res = _next_block())) { + return res; + } + } + + const TabletSchema& schema = _tablet->tablet_schema(); + size_t offset = 0; + size_t field_size = 0; + size_t num_null_bytes = (_tablet->num_null_columns() + 7) / 8; + + size_t p = 0; + for (size_t i = 0; i < schema.num_columns(); ++i) { + row->set_not_null(i); + if (schema.column(i).is_nullable()) { + bool is_null = false; + is_null = (_row_buf[_next_row_start + p / 8] >> + ((num_null_bytes * 8 - p - 1) % 8)) & + 1; + if (is_null) { + row->set_null(i); + } + p++; + } + } + offset += num_null_bytes; + + for (uint32_t i = 0; i < schema.num_columns(); i++) { + if (row->is_null(i)) { + continue; + } + + const TabletColumn& column = schema.column(i); + if (column.type() == OLAP_FIELD_TYPE_VARCHAR || + column.type() == OLAP_FIELD_TYPE_HLL) { + // Get varchar field size + field_size = *reinterpret_cast( + _row_buf + _next_row_start + offset); + offset += sizeof(StringLengthType); + + if (field_size > column.length() - sizeof(StringLengthType)) { + LOG(WARNING) << "invalid data length for VARCHAR! " + << "max_len=" << column.length() - sizeof(StringLengthType) + << ", real_len=" << field_size; + return OLAP_ERR_PUSH_INPUT_DATA_ERROR; + } + } else { + field_size = column.length(); } - const vector& schema = _table->tablet_schema(); - size_t offset = 0; - size_t field_size = 0; - size_t num_null_bytes = (_table->num_null_fields() + 7) / 8; - - size_t p = 0; - for (size_t i = 0; i < schema.size(); ++i) { - row->set_not_null(i); - if (schema[i].is_allow_null) { - bool is_null = false; - is_null = (_row_buf[_next_row_start + p/8] >> ((num_null_bytes * 8 - p - 1) % 8)) & 1; - if (is_null) { - row->set_null(i); - } - p++; - } + if (column.type() == OLAP_FIELD_TYPE_CHAR || + column.type() == OLAP_FIELD_TYPE_VARCHAR || + column.type() == OLAP_FIELD_TYPE_HLL) { + Slice slice(_row_buf + _next_row_start + offset, field_size); + row->set_field_content(i, reinterpret_cast(&slice), mem_pool); + } else { + row->set_field_content(i, _row_buf + _next_row_start + offset, mem_pool); } - offset += num_null_bytes; - for (uint32_t i = 0; i < schema.size(); i++) { - if (row->is_null(i)) { - continue; - } + offset += field_size; + } - if (schema[i].type == OLAP_FIELD_TYPE_VARCHAR || schema[i].type == OLAP_FIELD_TYPE_HLL) { - // Get varchar field size - field_size = *reinterpret_cast(_row_buf + _next_row_start + offset); - offset += sizeof(StringLengthType); + // Calculate checksum for validate when push finished. + _adler_checksum = + olap_adler32(_adler_checksum, _row_buf + _next_row_start, offset); - if (field_size > schema[i].length - sizeof(StringLengthType)) { - OLAP_LOG_WARNING("invalid data length for VARCHAR! [max_len=%d real_len=%d]", - schema[i].length - sizeof(StringLengthType), - field_size); - return OLAP_ERR_PUSH_INPUT_DATA_ERROR; - } - } else { - field_size = schema[i].length; - } - - if (schema[i].type == OLAP_FIELD_TYPE_CHAR - || schema[i].type == OLAP_FIELD_TYPE_VARCHAR - || schema[i].type == OLAP_FIELD_TYPE_HLL) { - Slice slice(_row_buf + _next_row_start + offset, field_size); - row->set_field_content(i, reinterpret_cast(&slice), mem_pool); - } else { - row->set_field_content(i, _row_buf + _next_row_start + offset, mem_pool); - } - - offset += field_size; - } - - // Calculate checksum for validate when push finished. - _adler_checksum = olap_adler32(_adler_checksum, _row_buf + _next_row_start, offset); - - _next_row_start += offset; - --_row_num; - return res; + _next_row_start += offset; + --_row_num; + return res; } OLAPStatus LzoBinaryReader::_next_block() { - OLAPStatus res = OLAP_SUCCESS; + OLAPStatus res = OLAP_SUCCESS; - // Get row num and compressed data size - size_t row_info_buf_size = sizeof(RowNumType) + sizeof(CompressedSizeType); - if (OLAP_SUCCESS != (res = _file->read(_row_info_buf, row_info_buf_size))) { - OLAP_LOG_WARNING("read rows info fail. [res=%d]", res); - return res; - } + // Get row num and compressed data size + size_t row_info_buf_size = sizeof(RowNumType) + sizeof(CompressedSizeType); + if (OLAP_SUCCESS != (res = _file->read(_row_info_buf, row_info_buf_size))) { + LOG(WARNING) << "read rows info fail. res=" << res; + return res; + } - RowNumType* rows_num_ptr = reinterpret_cast(_row_info_buf); - _row_num = *rows_num_ptr; - CompressedSizeType* compressed_size_ptr = reinterpret_cast( - _row_info_buf + sizeof(RowNumType)); - CompressedSizeType compressed_size = *compressed_size_ptr; + RowNumType* rows_num_ptr = reinterpret_cast(_row_info_buf); + _row_num = *rows_num_ptr; + CompressedSizeType* compressed_size_ptr = + reinterpret_cast(_row_info_buf + sizeof(RowNumType)); + CompressedSizeType compressed_size = *compressed_size_ptr; - if (_row_num > _max_row_num) { - // renew rows buf - SAFE_DELETE_ARRAY(_row_buf); + if (_row_num > _max_row_num) { + // renew rows buf + SAFE_DELETE_ARRAY(_row_buf); - _max_row_num = _row_num; - _max_row_buf_size = _max_row_num * _table->get_row_size(); - if (NULL == (_row_buf = new(std::nothrow) char[_max_row_buf_size])) { - OLAP_LOG_WARNING("fail to malloc rows buf. [size=%zu]", _max_row_buf_size); - res = OLAP_ERR_MALLOC_ERROR; - return res; - } + _max_row_num = _row_num; + _max_row_buf_size = _max_row_num * _tablet->row_size(); + _row_buf = new (std::nothrow) char[_max_row_buf_size]; + if (_row_buf == nullptr) { + LOG(WARNING) << "fail to malloc rows buf. size=" << _max_row_buf_size; + res = OLAP_ERR_MALLOC_ERROR; + return res; } + } - if (compressed_size > _max_compressed_buf_size) { - // renew rows compressed buf - SAFE_DELETE_ARRAY(_row_compressed_buf); - - _max_compressed_buf_size = compressed_size; - if (NULL == (_row_compressed_buf = new(std::nothrow) char[_max_compressed_buf_size])) { - OLAP_LOG_WARNING("fail to malloc rows compressed buf. [size=%zu]", _max_compressed_buf_size); - res = OLAP_ERR_MALLOC_ERROR; - return res; - } - } + if (compressed_size > _max_compressed_buf_size) { + // renew rows compressed buf + SAFE_DELETE_ARRAY(_row_compressed_buf); - if (OLAP_SUCCESS != (res = _file->read(_row_compressed_buf, compressed_size))) { - OLAP_LOG_WARNING("read compressed rows fail. [res=%d]", res); - return res; + _max_compressed_buf_size = compressed_size; + _row_compressed_buf = new (std::nothrow) char[_max_compressed_buf_size]; + if (_row_compressed_buf == nullptr) { + LOG(WARNING) << "fail to malloc rows compressed buf. size=" << _max_compressed_buf_size; + res = OLAP_ERR_MALLOC_ERROR; + return res; } + } - // python lzo use lzo1x to compress - // and add 5 bytes header (\xf0 + 4 bytes(uncompress data size)) - size_t written_len = 0; - size_t block_header_size = 5; - if (OLAP_SUCCESS != (res = olap_decompress(_row_compressed_buf + block_header_size, - compressed_size - block_header_size, - _row_buf, - _max_row_buf_size, - &written_len, - OLAP_COMP_TRANSPORT))) { - OLAP_LOG_WARNING("olap decompress fail. [res=%d]", res); - return res; - } - - _curr += row_info_buf_size + compressed_size; - _next_row_start = 0; + if (OLAP_SUCCESS != + (res = _file->read(_row_compressed_buf, compressed_size))) { + LOG(WARNING) << "read compressed rows fail. res=" << res; + return res; + } + + // python lzo use lzo1x to compress + // and add 5 bytes header (\xf0 + 4 bytes(uncompress data size)) + size_t written_len = 0; + size_t block_header_size = 5; + if (OLAP_SUCCESS != + (res = olap_decompress(_row_compressed_buf + block_header_size, + compressed_size - block_header_size, _row_buf, + _max_row_buf_size, &written_len, + OLAP_COMP_TRANSPORT))) { + LOG(WARNING) << "olap decompress fail. res=" << res; return res; + } + + _curr += row_info_buf_size + compressed_size; + _next_row_start = 0; + return res; } string PushHandler::_debug_version_list(const Versions& versions) const { - std::ostringstream txt; - txt << "Versions: "; + std::ostringstream txt; + txt << "Versions: "; - for (Versions::const_iterator it = versions.begin(); it != versions.end(); ++it) { - txt << "[" << it->first << "~" << it->second << "],"; - } + for (Versions::const_iterator it = versions.begin(); it != versions.end(); + ++it) { + txt << "[" << it->first << "~" << it->second << "],"; + } - return txt.str(); + return txt.str(); } } // namespace doris diff --git a/be/src/olap/push_handler.h b/be/src/olap/push_handler.h index c84def76eae64c..9a315dbdee349d 100644 --- a/be/src/olap/push_handler.h +++ b/be/src/olap/push_handler.h @@ -27,43 +27,32 @@ #include "olap/file_helper.h" #include "olap/merger.h" #include "olap/olap_common.h" -#include "olap/segment_group.h" +#include "olap/rowset/rowset.h" #include "olap/row_cursor.h" -#include "olap/data_writer.h" namespace doris { -typedef std::vector DataSources; -typedef std::vector Indices; class BinaryFile; class BinaryReader; class ColumnMapping; class RowCursor; -struct TableVars { - OLAPTablePtr olap_table; - Versions unused_versions; - Indices unused_indices; - Indices added_indices; +struct TabletVars { + TabletSharedPtr tablet; + RowsetSharedPtr rowset_to_add; }; class PushHandler { public: typedef std::vector SchemaMapping; - PushHandler() : _header_locked(false) {} + PushHandler() {} ~PushHandler() {} // Load local data file into specified tablet. - OLAPStatus process( - OLAPTablePtr olap_table, - const TPushReq& request, - PushType push_type, - std::vector* tablet_info_vec); - - OLAPStatus process_realtime_push( - OLAPTablePtr olap_table, + OLAPStatus process_streaming_ingestion( + TabletSharedPtr tablet, const TPushReq& request, PushType push_type, std::vector* tablet_info_vec); @@ -71,97 +60,32 @@ class PushHandler { int64_t write_bytes() const { return _write_bytes; } int64_t write_rows() const { return _write_rows; } private: - // Validate request, mainly data version check. - OLAPStatus _validate_request( - OLAPTablePtr olap_table_for_raw, - OLAPTablePtr olap_table_for_schema_change, - bool is_rollup_new_table, - PushType push_type); - - // The latest version can be reverted for following scene: - // user submit a push job and cancel it soon, but some - // tablets already push success. - OLAPStatus _get_versions_reverted( - OLAPTablePtr olap_table, - bool is_schema_change_tablet, - PushType push_type, - Versions* unused_versions); - // Convert local data file to internal formatted delta, // return new delta's SegmentGroup OLAPStatus _convert( - OLAPTablePtr curr_olap_table, - OLAPTablePtr new_olap_table_vec, - Indices* curr_olap_indices, - Indices* new_olap_indices, - AlterTabletType alter_table_type); - - // Update header info when new version add or dirty version removed. - OLAPStatus _update_header( - OLAPTablePtr olap_table, - Versions* unused_versions, - Indices* new_indices, - Indices* unused_indices); - - // remove all old file of cumulatives versions - void _delete_old_indices(Indices* indices); - - // Clear schema change information. - OLAPStatus _clear_alter_table_info( - OLAPTablePtr olap_table, - OLAPTablePtr related_olap_table); + TabletSharedPtr cur_tablet, + TabletSharedPtr new_tablet_vec, + RowsetSharedPtr* cur_rowset, + RowsetSharedPtr* new_rowset); // Only for debug std::string _debug_version_list(const Versions& versions) const; - // Lock tablet header before read header info. - void _obtain_header_rdlock() { - for (std::list::iterator it = _olap_table_arr.begin(); - it != _olap_table_arr.end(); ++it) { - VLOG(3) << "obtain all header locks rd. tablet=" << (*it)->full_name(); - (*it)->obtain_header_rdlock(); - } - - _header_locked = true; - } - - // Locak tablet header before write header info. - void _obtain_header_wrlock() { - for (std::list::iterator it = _olap_table_arr.begin(); - it != _olap_table_arr.end(); ++it) { - VLOG(3) << "obtain all header locks wr. tablet=" << (*it)->full_name(); - (*it)->obtain_header_wrlock(); - } - - _header_locked = true; - } - - // Release tablet header lock. - void _release_header_lock() { - if (_header_locked) { - for (std::list::reverse_iterator it = _olap_table_arr.rbegin(); - it != _olap_table_arr.rend(); ++it) { - VLOG(3) << "release all header locks. tablet=" << (*it)->full_name(); - (*it)->release_header_lock(); - } - - _header_locked = false; - } - } - void _get_tablet_infos( - const std::vector& table_infoes, + const std::vector& tablet_infos, std::vector* tablet_info_vec); - // mainly tablet_id, version and delta file path - TPushReq _request; + OLAPStatus _do_streaming_ingestion( + TabletSharedPtr tablet, + const TPushReq& request, + PushType push_type, + vector* tablet_vars, + std::vector* tablet_info_vec); - // maily contains specified tablet object - // contains related tables also if in schema change, tablet split or rollup - std::list _olap_table_arr; +private: - // lock tablet header before modify tabelt header - bool _header_locked; + // mainly tablet_id, version and delta file path + TPushReq _request; int64_t _write_bytes = 0; int64_t _write_rows = 0; @@ -202,7 +126,7 @@ class IBinaryReader { static IBinaryReader* create(bool need_decompress); virtual ~IBinaryReader() {} - virtual OLAPStatus init(OLAPTablePtr table, BinaryFile* file) = 0; + virtual OLAPStatus init(TabletSharedPtr tablet, BinaryFile* file) = 0; virtual OLAPStatus finalize() = 0; virtual OLAPStatus next(RowCursor* row, MemPool* mem_pool) = 0; @@ -224,7 +148,7 @@ class IBinaryReader { } BinaryFile* _file; - OLAPTablePtr _table; + TabletSharedPtr _tablet; size_t _content_len; size_t _curr; uint32_t _adler_checksum; @@ -239,7 +163,7 @@ class BinaryReader: public IBinaryReader { finalize(); } - virtual OLAPStatus init(OLAPTablePtr table, BinaryFile* file); + virtual OLAPStatus init(TabletSharedPtr tablet, BinaryFile* file); virtual OLAPStatus finalize(); virtual OLAPStatus next(RowCursor* row, MemPool* mem_pool); @@ -260,7 +184,7 @@ class LzoBinaryReader: public IBinaryReader { finalize(); } - virtual OLAPStatus init(OLAPTablePtr table, BinaryFile* file); + virtual OLAPStatus init(TabletSharedPtr tablet, BinaryFile* file); virtual OLAPStatus finalize(); virtual OLAPStatus next(RowCursor* row, MemPool* mem_pool); diff --git a/be/src/olap/reader.cpp b/be/src/olap/reader.cpp index 9ff0badaf6f23c..28fdc10eb0845c 100644 --- a/be/src/olap/reader.cpp +++ b/be/src/olap/reader.cpp @@ -17,8 +17,8 @@ #include "olap/reader.h" -#include "olap/column_data.h" -#include "olap/olap_table.h" +#include "olap/rowset/column_data.h" +#include "olap/tablet.h" #include "olap/row_block.h" #include "olap/row_cursor.h" #include "util/date_func.h" @@ -30,6 +30,7 @@ #include "olap/comparison_predicate.h" #include "olap/in_list_predicate.h" #include "olap/null_predicate.h" +#include "olap/storage_engine.h" using std::nothrow; using std::set; @@ -41,11 +42,11 @@ class CollectIterator { public: ~CollectIterator(); - // Hold reader point to get reader params, + // Hold reader point to get reader params, // set reverse to true if need read in reverse order. OLAPStatus init(Reader* reader); - OLAPStatus add_child(ColumnData* data, RowBlock* block); + OLAPStatus add_child(RowsetReaderSharedPtr rs_reader); // Get top row of the heap, NULL if reach end. const RowCursor* current_row(bool* delete_flag) const { @@ -55,7 +56,7 @@ class CollectIterator { return nullptr; } - // Pop the top element and rebuild the heap to + // Pop the top element and rebuild the heap to // get the next row cursor. inline OLAPStatus next(const RowCursor** row, bool* delete_flag); @@ -65,24 +66,18 @@ class CollectIterator { private: class ChildCtx { public: - ChildCtx(ColumnData* data, RowBlock* block, Reader* reader) - : _data(data), - _is_delete(data->delete_flag()), - _reader(reader), - _row_block(block) { - } + ChildCtx(RowsetReaderSharedPtr rs_reader, Reader* reader) + : _rs_reader(rs_reader), + _is_delete(rs_reader->delete_flag()), + _reader(reader) { } OLAPStatus init() { - auto res = _row_cursor.init(_data->segment_group()->table()->tablet_schema(), - _data->seek_columns()); + auto res = _row_cursor.init(_reader->_tablet->tablet_schema(), _reader->_seek_columns); if (res != OLAP_SUCCESS) { LOG(WARNING) << "failed to init row cursor, res=" << res; return res; } - res = _refresh_current_row(); - if (res != OLAP_SUCCESS) { - return res; - } + RETURN_NOT_OK(_refresh_current_row()); return OLAP_SUCCESS; } @@ -96,7 +91,7 @@ class CollectIterator { } int32_t version() const { - return _data->version().second; + return _rs_reader->version().second; } OLAPStatus next(const RowCursor** row, bool* delete_flag) { @@ -108,15 +103,14 @@ class CollectIterator { } private: - // refresh _current_row, + // refresh_current_row OLAPStatus _refresh_current_row() { - DCHECK(_row_block != nullptr); do { - if (_row_block->has_remaining()) { + if (_row_block != nullptr && _row_block->has_remaining()) { size_t pos = _row_block->pos(); _row_block->get_row(pos, &_row_cursor); if (_row_block->block_status() == DEL_PARTIAL_SATISFIED && - _reader->_delete_handler.is_filter_data(_data->version().second, _row_cursor)) { + _reader->_delete_handler.is_filter_data(_rs_reader->version().second, _row_cursor)) { _reader->_stats.rows_del_filtered++; _row_block->pos_inc(); continue; @@ -124,7 +118,7 @@ class CollectIterator { _current_row = &_row_cursor; return OLAP_SUCCESS; } else { - auto res = _data->get_next_block(&_row_block); + auto res = _rs_reader->next_block(&_row_block); if (res != OLAP_SUCCESS) { _current_row = nullptr; return res; @@ -135,7 +129,7 @@ class CollectIterator { return OLAP_ERR_DATA_EOF; } - ColumnData* _data = nullptr; + RowsetReaderSharedPtr _rs_reader; const RowCursor* _current_row = nullptr; bool _is_delete = false; Reader* _reader; @@ -182,14 +176,14 @@ OLAPStatus CollectIterator::init(Reader* reader) { // multiple data to aggregate for performance in user fetch if (_reader->_reader_type == READER_QUERY && (_reader->_aggregation || - _reader->_olap_table->keys_type() == KeysType::DUP_KEYS)) { + _reader->_tablet->keys_type() == KeysType::DUP_KEYS)) { _merge = false; } return OLAP_SUCCESS; } -OLAPStatus CollectIterator::add_child(ColumnData* data, RowBlock* block) { - std::unique_ptr child(new ChildCtx(data, block, _reader)); +OLAPStatus CollectIterator::add_child(RowsetReaderSharedPtr rs_reader) { + std::unique_ptr child(new ChildCtx(rs_reader, _reader)); RETURN_NOT_OK(child->init()); if (child->current_row() == nullptr) { return OLAP_SUCCESS; @@ -246,7 +240,7 @@ inline OLAPStatus CollectIterator::_normal_next(const RowCursor** row, bool* del // this child has been read, to read next _child_idx++; if (_child_idx < _children.size()) { - _cur_child = _children[_child_idx]; + _cur_child = _children[_child_idx]; *row = _cur_child->current_row(delete_flag); return OLAP_SUCCESS; } else { @@ -305,27 +299,25 @@ OLAPStatus Reader::init(const ReaderParams& read_params) { res = _init_params(read_params); if (res != OLAP_SUCCESS) { - OLAP_LOG_WARNING("fail to init reader when init params.[res=%d]", res); + LOG(WARNING) << "fail to init reader when init params.res" << res + << ", tablet_id :" << read_params.tablet->tablet_id() + << ", schema_hash:" << read_params.tablet->schema_hash() + << ", reader type:" << read_params.reader_type + << ", version:" << read_params.version.first << "-" << read_params.version.second; return res; } - res = _acquire_data_sources(read_params); + res = _capture_rs_readers(read_params); if (res != OLAP_SUCCESS) { - OLAP_LOG_WARNING("fail to init reader when acquire data sources.[res=%d]", res); - return res; - } - - for (auto i_data: _data_sources) { - i_data->set_stats(&_stats); - } - - bool eof = false; - if (OLAP_SUCCESS != (res = _attach_data_to_merge_set(true, &eof))) { - OLAP_LOG_WARNING("failed to attaching data to merge set. [res=%d]", res); + LOG(WARNING) << "fail to init reader when _capture_rs_readers.res" << res + << ", tablet_id :" << read_params.tablet->tablet_id() + << ", schema_hash:" << read_params.tablet->schema_hash() + << ", reader_type:" << read_params.reader_type + << ", version:" << read_params.version.first << "-" << read_params.version.second; return res; } - switch (_olap_table->keys_type()) { + switch (_tablet->keys_type()) { case KeysType::DUP_KEYS: _next_row_func = &Reader::_dup_key_next_row; break; @@ -339,22 +331,15 @@ OLAPStatus Reader::init(const ReaderParams& read_params) { break; } DCHECK(_next_row_func != nullptr) << "No next row function for type:" - << _olap_table->keys_type(); + << _tablet->keys_type(); return OLAP_SUCCESS; } OLAPStatus Reader::_dup_key_next_row(RowCursor* row_cursor, bool* eof) { - *eof = false; - if (_next_key == nullptr) { - auto res = _attach_data_to_merge_set(false, eof); - if (OLAP_SUCCESS != res) { - OLAP_LOG_WARNING("failed to attach data to merge set."); - return res; - } - if (*eof) { - return OLAP_SUCCESS; - } + if (UNLIKELY(_next_key == nullptr)) { + *eof = true; + return OLAP_SUCCESS; } row_cursor->copy_without_pool(*_next_key); auto res = _collect_iter->next(&_next_key, &_next_delete_flag); @@ -367,17 +352,9 @@ OLAPStatus Reader::_dup_key_next_row(RowCursor* row_cursor, bool* eof) { } OLAPStatus Reader::_agg_key_next_row(RowCursor* row_cursor, bool* eof) { - *eof = false; - - if (NULL == _next_key) { - auto res = _attach_data_to_merge_set(false, eof); - if (OLAP_SUCCESS != res) { - OLAP_LOG_WARNING("failed to attach data to merge set."); - return res; - } - if (*eof) { - return OLAP_SUCCESS; - } + if (UNLIKELY(_next_key == nullptr)) { + *eof = true; + return OLAP_SUCCESS; } row_cursor->agg_init(*_next_key); int64_t merged_count = 0; @@ -385,6 +362,7 @@ OLAPStatus Reader::_agg_key_next_row(RowCursor* row_cursor, bool* eof) { auto res = _collect_iter->next(&_next_key, &_next_delete_flag); if (res != OLAP_SUCCESS) { if (res != OLAP_ERR_DATA_EOF) { + LOG(WARNING) << "next failed:" << res; return res; } break; @@ -393,11 +371,11 @@ OLAPStatus Reader::_agg_key_next_row(RowCursor* row_cursor, bool* eof) { if (_aggregation && merged_count > config::doris_scanner_row_num) { break; } + // break while can NOT doing aggregation if (!RowCursor::equal(_key_cids, row_cursor, _next_key)) { break; } - RowCursor::aggregate(_value_cids, row_cursor, _next_key); ++merged_count; } while (true); @@ -410,15 +388,9 @@ OLAPStatus Reader::_unique_key_next_row(RowCursor* row_cursor, bool* eof) { *eof = false; bool cur_delete_flag = false; do { - if (NULL == _next_key) { - auto res = _attach_data_to_merge_set(false, eof); - if (OLAP_SUCCESS != res) { - OLAP_LOG_WARNING("failed to attach data to merge set."); - return res; - } - if (*eof) { - return OLAP_SUCCESS; - } + if (UNLIKELY(_next_key == nullptr)) { + *eof = true; + return OLAP_SUCCESS; } cur_delete_flag = _next_delete_flag; @@ -437,8 +409,7 @@ OLAPStatus Reader::_unique_key_next_row(RowCursor* row_cursor, bool* eof) { // we will not do aggregation in two case: // 1. DUP_KEYS keys type has no semantic to aggregate, // 2. to make cost of each scan round reasonable, we will control merged_count. - if (_olap_table->keys_type() == KeysType::DUP_KEYS - || (_aggregation && merged_count > config::doris_scanner_row_num)) { + if (_aggregation && merged_count > config::doris_scanner_row_num) { row_cursor->finalize_one_merge(_value_cids); break; } @@ -469,7 +440,6 @@ void Reader::close() { VLOG(3) << "merged rows:" << _merged_rows; _conditions.finalize(); _delete_handler.finalize(); - _olap_table->release_data_sources(&_own_data_sources); for (auto pred : _col_predicates) { delete pred; @@ -478,23 +448,12 @@ void Reader::close() { delete _collect_iter; } -OLAPStatus Reader::_acquire_data_sources(const ReaderParams& read_params) { - const std::vector* data_sources; - if (read_params.reader_type == READER_ALTER_TABLE - || read_params.reader_type == READER_BASE_COMPACTION - || read_params.reader_type == READER_CUMULATIVE_COMPACTION) { - data_sources = &read_params.olap_data_arr; - } else { - _olap_table->obtain_header_rdlock(); - _olap_table->acquire_data_sources(_version, &_own_data_sources); - _olap_table->release_header_lock(); - - if (_own_data_sources.size() < 1) { - LOG(WARNING) << "fail to acquire data sources. [table_name='" << _olap_table->full_name() - << "' version=" << _version.first << "-" << _version.second << "]"; - return OLAP_ERR_VERSION_NOT_EXIST; - } - data_sources = &_own_data_sources; +OLAPStatus Reader::_capture_rs_readers(const ReaderParams& read_params) { + const std::vector* rs_readers = &read_params.rs_readers; + if (rs_readers->size() < 1) { + LOG(WARNING) << "fail to acquire data sources. tablet=" << _tablet->full_name() + << ", version=" << _version.first << "-" << _version.second; + return OLAP_ERR_VERSION_NOT_EXIST; } // do not use index stream cache when be/ce/alter/checksum, @@ -504,53 +463,98 @@ OLAPStatus Reader::_acquire_data_sources(const ReaderParams& read_params) { is_using_cache = false; } - for (auto i_data: *data_sources) { - // skip empty version - if (i_data->empty() || i_data->zero_num_rows()) { - continue; - } - i_data->set_delete_handler(_delete_handler); - i_data->set_read_params(_return_columns, - _seek_columns, - _load_bf_columns, - _conditions, - _col_predicates, - is_using_cache, - read_params.runtime_state); - if (i_data->delta_pruning_filter()) { - VLOG(3) << "filter delta in query in condition:" - << i_data->version().first << ", " << i_data->version().second; - _stats.rows_stats_filtered += i_data->num_rows(); - continue; + bool eof = false; + RowCursor* start_key = nullptr; + RowCursor* end_key = nullptr; + bool is_lower_key_included = false; + bool is_upper_key_included = false; + for (int i = 0; i < _keys_param.start_keys.size(); ++i) { + start_key = _keys_param.start_keys[i]; + end_key = _keys_param.end_keys[i]; + if (_keys_param.end_range.compare("lt") == 0) { + is_upper_key_included = false; + } else if (_keys_param.end_range.compare("le") == 0) { + is_upper_key_included = true; + } else { + LOG(WARNING) << "reader params end_range is error. " + << "range=" << _keys_param.to_string(); + return OLAP_ERR_READER_GET_ITERATOR_ERROR; } - int ret = i_data->delete_pruning_filter(); - if (ret == DEL_SATISFIED) { - VLOG(3) << "filter delta in delete predicate:" - << i_data->version().first << ", " << i_data->version().second; - _stats.rows_del_filtered += i_data->num_rows(); - continue; - } else if (ret == DEL_PARTIAL_SATISFIED) { - VLOG(3) << "filter delta partially in delete predicate:" - << i_data->version().first << ", " << i_data->version().second; - i_data->set_delete_status(DEL_PARTIAL_SATISFIED); + + if (_keys_param.range.compare("gt") == 0) { + if (end_key != nullptr && start_key->cmp(*end_key) >= 0) { + VLOG(3) << "return EOF when range=" << _keys_param.range + << ", start_key=" << start_key->to_string() + << ", end_key=" << end_key->to_string(); + eof = true; + break; + } + is_lower_key_included = true; + } else if (_keys_param.range.compare("ge") == 0) { + if (end_key != nullptr && start_key->cmp(*end_key) > 0) { + VLOG(3) << "return EOF when range=" << _keys_param.range + << ", start_key=" << start_key->to_string() + << ", end_key=" << end_key->to_string(); + eof = true; + break; + } + is_lower_key_included = false; + } else if (_keys_param.range.compare("eq") == 0) { + is_lower_key_included = false; + is_upper_key_included = true; } else { - VLOG(3) << "not filter delta in delete predicate:" - << i_data->version().first << ", " << i_data->version().second; - i_data->set_delete_status(DEL_NOT_SATISFIED); + LOG(WARNING) << "reader params range is error. " + << "range=" << _keys_param.to_string(); + return OLAP_ERR_READER_GET_ITERATOR_ERROR; } - _data_sources.push_back(i_data); + _is_lower_keys_included.push_back(is_lower_key_included); + _is_upper_keys_included.push_back(is_upper_key_included); } + if (eof) { return OLAP_SUCCESS; } + + _reader_context.reader_type = read_params.reader_type; + _reader_context.tablet_schema = &_tablet->tablet_schema(); + _reader_context.preaggregation = _aggregation; + _reader_context.return_columns = &_return_columns; + _reader_context.seek_columns = &_seek_columns; + _reader_context.load_bf_columns = &_load_bf_columns; + _reader_context.conditions = &_conditions; + _reader_context.predicates = &_col_predicates; + _reader_context.lower_bound_keys = &_keys_param.start_keys; + _reader_context.is_lower_keys_included = &_is_lower_keys_included; + _reader_context.upper_bound_keys = &_keys_param.end_keys; + _reader_context.is_upper_keys_included = &_is_upper_keys_included; + _reader_context.delete_handler = &_delete_handler; + _reader_context.stats = &_stats; + _reader_context.is_using_cache = is_using_cache; + _reader_context.lru_cache = StorageEngine::instance()->index_stream_lru_cache(); + _reader_context.runtime_state = read_params.runtime_state; + for (auto& rs_reader : *rs_readers) { + rs_reader->init(&_reader_context); + _rs_readers.push_back(rs_reader); + } + + for (auto& rs_reader : _rs_readers) { + OLAPStatus res = _collect_iter->add_child(rs_reader); + if (res != OLAP_SUCCESS && res != OLAP_ERR_DATA_EOF) { + LOG(WARNING) << "failed to add child to iterator"; + return res; + } + } + + _next_key = _collect_iter->current_row(&_next_delete_flag); return OLAP_SUCCESS; } OLAPStatus Reader::_init_params(const ReaderParams& read_params) { + read_params.check_validation(); OLAPStatus res = OLAP_SUCCESS; _aggregation = read_params.aggregation; _reader_type = read_params.reader_type; - _olap_table = read_params.olap_table; + _tablet = read_params.tablet; _version = read_params.version; - + res = _init_conditions_param(read_params); if (res != OLAP_SUCCESS) { OLAP_LOG_WARNING("fail to init conditions param. [res=%d]", res); @@ -577,7 +581,7 @@ OLAPStatus Reader::_init_params(const ReaderParams& read_params) { res = _init_keys_param(read_params); if (res != OLAP_SUCCESS) { - OLAP_LOG_WARNING("fail to init keys param. [res=%d]", res); + LOG(WARNING) << "fail to init keys param. res=" << res; return res; } @@ -609,16 +613,16 @@ OLAPStatus Reader::_init_return_columns(const ReaderParams& read_params) { } } for (auto id : read_params.return_columns) { - if (_olap_table->tablet_schema()[id].is_key) { + if (_tablet->tablet_schema().column(id).is_key()) { _key_cids.push_back(id); } else { _value_cids.push_back(id); } } } else if (read_params.return_columns.size() == 0) { - for (size_t i = 0; i < _olap_table->tablet_schema().size(); ++i) { + for (size_t i = 0; i < _tablet->tablet_schema().num_columns(); ++i) { _return_columns.push_back(i); - if (_olap_table->tablet_schema()[i].is_key) { + if (_tablet->tablet_schema().column(i).is_key()) { _key_cids.push_back(i); } else { _value_cids.push_back(i); @@ -628,7 +632,7 @@ OLAPStatus Reader::_init_return_columns(const ReaderParams& read_params) { } else if (read_params.reader_type == READER_CHECKSUM) { _return_columns = read_params.return_columns; for (auto id : read_params.return_columns) { - if (_olap_table->tablet_schema()[id].is_key) { + if (_tablet->tablet_schema().column(id).is_key()) { _key_cids.push_back(id); } else { _value_cids.push_back(id); @@ -645,6 +649,7 @@ OLAPStatus Reader::_init_return_columns(const ReaderParams& read_params) { return OLAP_SUCCESS; } + OLAPStatus Reader::_init_seek_columns() { std::unordered_set column_set(_return_columns.begin(), _return_columns.end()); for (auto& it : _conditions.columns()) { @@ -661,7 +666,7 @@ OLAPStatus Reader::_init_seek_columns() { max_key_column_count = key->field_count(); } } - for (uint32_t i = 0; i < _olap_table->tablet_schema().size(); i++) { + for (uint32_t i = 0; i < _tablet->tablet_schema().num_columns(); i++) { if (i < max_key_column_count || column_set.find(i) != column_set.end()) { _seek_columns.push_back(i); } @@ -669,108 +674,6 @@ OLAPStatus Reader::_init_seek_columns() { return OLAP_SUCCESS; } -OLAPStatus Reader::_attach_data_to_merge_set(bool first, bool *eof) { - OLAPStatus res = OLAP_SUCCESS; - *eof = false; - - do { - RowCursor *start_key = NULL; - RowCursor *end_key = NULL; - bool find_last_row = false; - bool end_key_find_last_row = false; - _collect_iter->clear(); - - if (_keys_param.start_keys.size() > 0) { - if (_next_key_index >= _keys_param.start_keys.size()) { - *eof = true; - VLOG(3) << "can NOT attach while start_key has been used."; - return res; - } - auto cur_key_index = _next_key_index++; - - start_key = _keys_param.start_keys[cur_key_index]; - - if (0 != _keys_param.end_keys.size()) { - end_key = _keys_param.end_keys[cur_key_index]; - if (0 == _keys_param.end_range.compare("lt")) { - end_key_find_last_row = false; - } else if (0 == _keys_param.end_range.compare("le")) { - end_key_find_last_row = true; - } else { - OLAP_LOG_WARNING("reader params end_range is error. [range='%s']", - _keys_param.to_string().c_str()); - res = OLAP_ERR_READER_GET_ITERATOR_ERROR; - return res; - } - } - - if (0 == _keys_param.range.compare("gt")) { - if (NULL != end_key - && start_key->cmp(*end_key) >= 0) { - VLOG(10) << "return EOF when range=" << _keys_param.range - << ", start_key=" << start_key->to_string() - << ", end_key=" << end_key->to_string(); - *eof = true; - return res; - } - - find_last_row = true; - } else if (0 == _keys_param.range.compare("ge")) { - if (NULL != end_key - && start_key->cmp(*end_key) > 0) { - VLOG(10) << "return EOF when range=" << _keys_param.range - << ", start_key=" << start_key->to_string() - << ", end_key=" << end_key->to_string(); - *eof = true; - return res; - } - - find_last_row = false; - } else if (0 == _keys_param.range.compare("eq")) { - find_last_row = false; - end_key = start_key; - end_key_find_last_row = true; - } else { - OLAP_LOG_WARNING( - "reader params range is error. [range='%s']", - _keys_param.to_string().c_str()); - res = OLAP_ERR_READER_GET_ITERATOR_ERROR; - return res; - } - } else if (false == first) { - *eof = true; - return res; - } - - for (auto data : _data_sources) { - RowBlock* block = nullptr; - auto res = data->prepare_block_read( - start_key, find_last_row, end_key, end_key_find_last_row, &block); - if (res == OLAP_SUCCESS) { - res = _collect_iter->add_child(data, block); - if (res != OLAP_SUCCESS && res != OLAP_ERR_DATA_EOF) { - LOG(WARNING) << "failed to add child to iterator"; - return res; - } - } else if (res == OLAP_ERR_DATA_EOF) { - continue; - } else { - LOG(WARNING) << "prepare block failed, res=" << res; - return res; - } - } - - _next_key = _collect_iter->current_row(&_next_delete_flag); - if (_next_key != NULL) { - break; - } - - first = false; - } while (NULL == _next_key); - - return res; -} - OLAPStatus Reader::_init_keys_param(const ReaderParams& read_params) { OLAPStatus res = OLAP_SUCCESS; @@ -791,7 +694,7 @@ OLAPStatus Reader::_init_keys_param(const ReaderParams& read_params) { return OLAP_ERR_MALLOC_ERROR; } - res = _keys_param.start_keys[i]->init_scan_key(_olap_table->tablet_schema(), + res = _keys_param.start_keys[i]->init_scan_key(_tablet->tablet_schema(), read_params.start_key[i].values()); if (res != OLAP_SUCCESS) { OLAP_LOG_WARNING("fail to init row cursor. [res=%d]", res); @@ -813,7 +716,7 @@ OLAPStatus Reader::_init_keys_param(const ReaderParams& read_params) { return OLAP_ERR_MALLOC_ERROR; } - res = _keys_param.end_keys[i]->init_scan_key(_olap_table->tablet_schema(), + res = _keys_param.end_keys[i]->init_scan_key(_tablet->tablet_schema(), read_params.end_key[i].values()); if (res != OLAP_SUCCESS) { OLAP_LOG_WARNING("fail to init row cursor. [res=%d]", res); @@ -835,7 +738,7 @@ OLAPStatus Reader::_init_keys_param(const ReaderParams& read_params) { OLAPStatus Reader::_init_conditions_param(const ReaderParams& read_params) { OLAPStatus res = OLAP_SUCCESS; - _conditions.set_table(_olap_table); + _conditions.set_tablet_schema(&_tablet->tablet_schema()); for (int i = 0; i < read_params.conditions.size(); ++i) { _conditions.append_condition(read_params.conditions[i]); ColumnPredicate* predicate = _parse_to_predicate(read_params.conditions[i]); @@ -848,9 +751,9 @@ OLAPStatus Reader::_init_conditions_param(const ReaderParams& read_params) { } #define COMPARISON_PREDICATE_CONDITION_VALUE(NAME, PREDICATE) \ -ColumnPredicate* Reader::_new_##NAME##_pred(FieldInfo& fi, int index, const std::string& cond) { \ +ColumnPredicate* Reader::_new_##NAME##_pred(const TabletColumn& column, int index, const std::string& cond) { \ ColumnPredicate* predicate = NULL; \ - switch (fi.type) { \ + switch (column.type()) { \ case OLAP_FIELD_TYPE_TINYINT: { \ std::stringstream ss(cond); \ int32_t value = 0; \ @@ -894,7 +797,7 @@ ColumnPredicate* Reader::_new_##NAME##_pred(FieldInfo& fi, int index, const std: } \ case OLAP_FIELD_TYPE_CHAR: {\ StringValue value; \ - size_t length = std::max(static_cast(fi.length), cond.length());\ + size_t length = std::max(static_cast(column.length()), cond.length());\ char* buffer = reinterpret_cast(_predicate_mem_pool->allocate(length)); \ memset(buffer, 0, length); \ memory_copy(buffer, cond.c_str(), cond.length()); \ @@ -938,26 +841,26 @@ COMPARISON_PREDICATE_CONDITION_VALUE(ge, GreaterEqualPredicate) ColumnPredicate* Reader::_parse_to_predicate(const TCondition& condition) { // TODO: not equal and not in predicate is not pushed down - int index = _olap_table->get_field_index(condition.column_name); - FieldInfo fi = _olap_table->tablet_schema()[index]; - if (fi.aggregation != FieldAggregationMethod::OLAP_FIELD_AGGREGATION_NONE) { + int index = _tablet->field_index(condition.column_name); + const TabletColumn& column = _tablet->tablet_schema().column(index); + if (column.aggregation() != FieldAggregationMethod::OLAP_FIELD_AGGREGATION_NONE) { return nullptr; } ColumnPredicate* predicate = NULL; if (condition.condition_op == "*=" && condition.condition_values.size() == 1) { - predicate = _new_eq_pred(fi, index, condition.condition_values[0]); + predicate = _new_eq_pred(column, index, condition.condition_values[0]); } else if (condition.condition_op == "<<") { - predicate = _new_lt_pred(fi, index, condition.condition_values[0]); + predicate = _new_lt_pred(column, index, condition.condition_values[0]); } else if (condition.condition_op == "<=") { - predicate = _new_le_pred(fi, index, condition.condition_values[0]); + predicate = _new_le_pred(column, index, condition.condition_values[0]); } else if (condition.condition_op == ">>") { - predicate = _new_gt_pred(fi, index, condition.condition_values[0]); + predicate = _new_gt_pred(column, index, condition.condition_values[0]); } else if (condition.condition_op == ">=") { - predicate = _new_ge_pred(fi, index, condition.condition_values[0]); + predicate = _new_ge_pred(column, index, condition.condition_values[0]); } else if (condition.condition_op == "*=" && condition.condition_values.size() > 1) { - switch (fi.type) { + switch (column.type()) { case OLAP_FIELD_TYPE_TINYINT: { std::set values; for (auto& cond_val : condition.condition_values) { @@ -1027,7 +930,7 @@ ColumnPredicate* Reader::_parse_to_predicate(const TCondition& condition) { std::set values; for (auto& cond_val : condition.condition_values) { StringValue value; - size_t length = std::max(static_cast(fi.length), cond_val.length()); + size_t length = std::max(static_cast(column.length()), cond_val.length()); char* buffer = reinterpret_cast(_predicate_mem_pool->allocate(length)); memset(buffer, 0, length); memory_copy(buffer, cond_val.c_str(), cond_val.length()); @@ -1098,14 +1001,14 @@ OLAPStatus Reader::_init_load_bf_columns(const ReaderParams& read_params) { } // remove columns which have no bf stream - for (int i = 0; i < _olap_table->tablet_schema().size(); ++i) { - if (!_olap_table->tablet_schema()[i].is_bf_column) { + for (int i = 0; i < _tablet->tablet_schema().num_columns(); ++i) { + if (!_tablet->tablet_schema().column(i).is_bf_column()) { _load_bf_columns.erase(i); } } // remove columns which have same value between start_key and end_key - int min_scan_key_len = _olap_table->tablet_schema().size(); + int min_scan_key_len = _tablet->tablet_schema().num_columns(); for (int i = 0; i < read_params.start_key.size(); ++i) { if (read_params.start_key[i].size() < min_scan_key_len) { min_scan_key_len = read_params.start_key[i].size(); @@ -1138,9 +1041,13 @@ OLAPStatus Reader::_init_load_bf_columns(const ReaderParams& read_params) { // remove the max_equal_index column when it's not varchar // or longer than number of short key fields - FieldType type = _olap_table->get_field_type_by_index(max_equal_index); + FieldType type = OLAP_FIELD_TYPE_NONE; + if (max_equal_index == -1) { + return res; + } + type = _tablet->tablet_schema().column(max_equal_index).type(); if ((type != OLAP_FIELD_TYPE_VARCHAR && type != OLAP_FIELD_TYPE_HLL) - || max_equal_index + 1 > _olap_table->num_short_key_fields()) { + || max_equal_index + 1 > _tablet->num_short_key_columns()) { _load_bf_columns.erase(max_equal_index); } @@ -1149,9 +1056,11 @@ OLAPStatus Reader::_init_load_bf_columns(const ReaderParams& read_params) { OLAPStatus Reader::_init_delete_condition(const ReaderParams& read_params) { if (read_params.reader_type != READER_CUMULATIVE_COMPACTION) { - _olap_table->obtain_header_rdlock(); - OLAPStatus ret = _delete_handler.init(_olap_table, read_params.version.second); - _olap_table->release_header_lock(); + _tablet->obtain_header_rdlock(); + OLAPStatus ret = _delete_handler.init(_tablet->tablet_schema(), + _tablet->delete_predicates(), + read_params.version.second); + _tablet->release_header_lock(); return ret; } else { diff --git a/be/src/olap/reader.h b/be/src/olap/reader.h index 53f08b1bff1033..2ae14ccfef2295 100644 --- a/be/src/olap/reader.h +++ b/be/src/olap/reader.h @@ -36,19 +36,22 @@ #include "util/runtime_profile.h" #include "olap/column_predicate.h" +#include "olap/tablet.h" +#include "olap/rowset/rowset_reader.h" namespace doris { -class OLAPTable; +class Tablet; class RowCursor; class RowBlock; class CollectIterator; class RuntimeState; +class ColumnData; // Params for Reader, // mainly include tablet, data version and fetch range. struct ReaderParams { - OLAPTablePtr olap_table; + TabletSharedPtr tablet; ReaderType reader_type; bool aggregation; Version version; @@ -58,26 +61,33 @@ struct ReaderParams { std::vector end_key; std::vector conditions; // The ColumnData will be set when using Merger, eg Cumulative, BE. - std::vector olap_data_arr; + std::vector rs_readers; std::vector return_columns; RuntimeProfile* profile; RuntimeState* runtime_state; ReaderParams() : reader_type(READER_QUERY), - aggregation(true), + aggregation(false), + version(-1, 0), profile(NULL), runtime_state(NULL) { start_key.clear(); end_key.clear(); conditions.clear(); - olap_data_arr.clear(); + rs_readers.clear(); + } + + void check_validation() const { + if (version.first == -1) { + LOG(FATAL) << "verison is not set. tablet=" << tablet->full_name(); + } } std::string to_string() { std::stringstream ss; - ss << "table=" << olap_table->full_name() + ss << "tablet=" << tablet->full_name() << " reader_type=" << reader_type << " aggregation=" << aggregation << " version=" << version.first << "-" << version.second @@ -119,7 +129,7 @@ class Reader { return _merged_rows; } - uint64_t filted_rows() const { + uint64_t filtered_rows() const { return _stats.rows_del_filtered; } @@ -164,18 +174,18 @@ class Reader { OLAPStatus _init_params(const ReaderParams& read_params); - OLAPStatus _acquire_data_sources(const ReaderParams& read_params); + OLAPStatus _capture_rs_readers(const ReaderParams& read_params); OLAPStatus _init_keys_param(const ReaderParams& read_params); OLAPStatus _init_conditions_param(const ReaderParams& read_params); - ColumnPredicate* _new_eq_pred(FieldInfo& type, int index, const std::string& cond); - ColumnPredicate* _new_ne_pred(FieldInfo& type, int index, const std::string& cond); - ColumnPredicate* _new_lt_pred(FieldInfo& type, int index, const std::string& cond); - ColumnPredicate* _new_le_pred(FieldInfo& type, int index, const std::string& cond); - ColumnPredicate* _new_gt_pred(FieldInfo& type, int index, const std::string& cond); - ColumnPredicate* _new_ge_pred(FieldInfo& type, int index, const std::string& cond); + ColumnPredicate* _new_eq_pred(const TabletColumn& column, int index, const std::string& cond); + ColumnPredicate* _new_ne_pred(const TabletColumn& column, int index, const std::string& cond); + ColumnPredicate* _new_lt_pred(const TabletColumn& column, int index, const std::string& cond); + ColumnPredicate* _new_le_pred(const TabletColumn& column, int index, const std::string& cond); + ColumnPredicate* _new_gt_pred(const TabletColumn& column, int index, const std::string& cond); + ColumnPredicate* _new_ge_pred(const TabletColumn& column, int index, const std::string& cond); ColumnPredicate* _parse_to_predicate(const TCondition& condition); @@ -186,12 +196,12 @@ class Reader { OLAPStatus _init_load_bf_columns(const ReaderParams& read_params); - OLAPStatus _attach_data_to_merge_set(bool first, bool *eof); - OLAPStatus _dup_key_next_row(RowCursor* row_cursor, bool* eof); OLAPStatus _agg_key_next_row(RowCursor* row_cursor, bool* eof); OLAPStatus _unique_key_next_row(RowCursor* row_cursor, bool* eof); + TabletSharedPtr tablet() { return _tablet; } + private: std::unique_ptr _tracker; std::unique_ptr _predicate_mem_pool; @@ -201,14 +211,17 @@ class Reader { Version _version; - OLAPTablePtr _olap_table; + TabletSharedPtr _tablet; - // _own_data_sources is data source that reader aquire from olap_table, so we need to + // _own_rs_readers is data source that reader aquire from tablet, so we need to // release these when reader closing - std::vector _own_data_sources; - std::vector _data_sources; + std::vector _own_rs_readers; + std::vector _rs_readers; + RowsetReaderContext _reader_context; KeysParam _keys_param; + std::vector _is_lower_keys_included; + std::vector _is_upper_keys_included; int32_t _next_key_index; Conditions _conditions; diff --git a/be/src/olap/row_block.cpp b/be/src/olap/row_block.cpp index d0f46584fc0d4b..4d34fdf754f492 100644 --- a/be/src/olap/row_block.cpp +++ b/be/src/olap/row_block.cpp @@ -37,9 +37,9 @@ using std::vector; namespace doris { -RowBlock::RowBlock(const vector& tablet_schema) : +RowBlock::RowBlock(const TabletSchema* schema) : _capacity(0), - _tablet_schema(tablet_schema) { + _schema(schema) { _tracker.reset(new MemTracker(-1)); _mem_pool.reset(new MemPool(_tracker.get())); } @@ -49,9 +49,8 @@ RowBlock::~RowBlock() { } OLAPStatus RowBlock::init(const RowBlockInfo& block_info) { - _field_count = _tablet_schema.size(); + _field_count = _schema->num_columns(); _info = block_info; - _data_file_type = block_info.data_file_type; _null_supported = block_info.null_supported; _capacity = _info.row_num; _compute_layout(); @@ -81,7 +80,7 @@ OLAPStatus RowBlock::find_row(const RowCursor& key, OLAPStatus res = OLAP_SUCCESS; RowCursor helper_cursor; - if ((res = helper_cursor.init(_tablet_schema)) != OLAP_SUCCESS) { + if ((res = helper_cursor.init(*_schema)) != OLAP_SUCCESS) { OLAP_LOG_WARNING("Init helper cursor fail. [res=%d]", res); return OLAP_ERR_INIT_FAILED; } @@ -111,15 +110,17 @@ OLAPStatus RowBlock::find_row(const RowCursor& key, void RowBlock::clear() { _info.row_num = _capacity; _info.checksum = 0; + _pos = 0; + _limit = 0; _mem_pool->clear(); } void RowBlock::_compute_layout() { std::unordered_set column_set(_info.column_ids.begin(), _info.column_ids.end()); size_t memory_size = 0; - for (int i = 0; i < _tablet_schema.size(); ++i) { - auto& field = _tablet_schema[i]; - if (!column_set.empty() && column_set.find(i) == std::end(column_set)) { + for (int col_id = 0; col_id < _schema->num_columns(); ++col_id) { + const TabletColumn& column = _schema->column(col_id); + if (!column_set.empty() && column_set.find(col_id) == std::end(column_set)) { // which may lead BE crash _field_offset_in_memory.push_back(std::numeric_limits::max()); continue; @@ -128,12 +129,12 @@ void RowBlock::_compute_layout() { _field_offset_in_memory.push_back(memory_size); // All field has a nullbyte in memory - if (field.type == OLAP_FIELD_TYPE_VARCHAR || field.type == OLAP_FIELD_TYPE_HLL - || field.type == OLAP_FIELD_TYPE_CHAR) { + if (column.type() == OLAP_FIELD_TYPE_VARCHAR || column.type() == OLAP_FIELD_TYPE_HLL + || column.type() == OLAP_FIELD_TYPE_CHAR) { // 变长部分额外计算下实际最大的字符串长度(此处length已经包括记录Length的2个字节) memory_size += sizeof(Slice) + sizeof(char); } else { - memory_size += field.length + sizeof(char); + memory_size += column.length() + sizeof(char); } } _mem_row_bytes = memory_size; diff --git a/be/src/olap/row_block.h b/be/src/olap/row_block.h index b66e5de429e1bb..4a9c40250ebcfd 100644 --- a/be/src/olap/row_block.h +++ b/be/src/olap/row_block.h @@ -39,7 +39,6 @@ struct RowBlockInfo { uint32_t checksum; uint32_t row_num; // block最大数据行数 - DataFileType data_file_type; bool null_supported; std::vector column_ids; }; @@ -57,7 +56,7 @@ class RowBlock { friend class RowBlockChanger; friend class VectorizedRowBatch; public: - RowBlock(const std::vector& tablet_schema); + RowBlock(const TabletSchema* schema); // 注意回收内部buffer ~RowBlock(); @@ -91,8 +90,7 @@ class RowBlock { const uint32_t row_num() const { return _info.row_num; } const RowBlockInfo& row_block_info() const { return _info; } - const std::vector& tablet_schema() const { return _tablet_schema; } - + const TabletSchema& tablet_schema() const { return *_schema; } size_t capacity() const { return _capacity; } // Return field pointer, this pointer point to the nullbyte before the field @@ -155,7 +153,7 @@ class RowBlock { }; bool has_nullbyte() { - return _data_file_type == COLUMN_ORIENTED_FILE || _null_supported; + return _null_supported; } // Compute layout for storage buffer and memory buffer @@ -163,10 +161,9 @@ class RowBlock { uint32_t _capacity; RowBlockInfo _info; - const std::vector& _tablet_schema; // 内部保存的schema句柄 - + const TabletSchema* _schema; // 内部保存的schema句柄 + bool _null_supported; - DataFileType _data_file_type; size_t _field_count = 0; bool _need_checksum = true; diff --git a/be/src/olap/row_cursor.cpp b/be/src/olap/row_cursor.cpp index e0a9876e1923ad..5be35cbc12bdf3 100644 --- a/be/src/olap/row_cursor.cpp +++ b/be/src/olap/row_cursor.cpp @@ -50,26 +50,28 @@ RowCursor::~RowCursor() { } } -OLAPStatus RowCursor::_init(const std::vector& tablet_schema, +OLAPStatus RowCursor::_init(const std::vector& schema, const std::vector& columns) { - _field_array.resize(tablet_schema.size(), nullptr); + _field_array.resize(schema.size(), nullptr); _columns = columns; std::vector field_buf_lens; - for (size_t i = 0; i < tablet_schema.size(); ++i) { - FieldType type = tablet_schema[i].type; + for (size_t i = 0; i < schema.size(); ++i) { + const TabletColumn& column = schema[i]; + FieldType type = column.type(); if (type == OLAP_FIELD_TYPE_CHAR || type == OLAP_FIELD_TYPE_VARCHAR || type == OLAP_FIELD_TYPE_HLL) { field_buf_lens.push_back(sizeof(Slice)); } else { - field_buf_lens.push_back(tablet_schema[i].length); + field_buf_lens.push_back(column.length()); } } - _key_column_num = tablet_schema.size(); - for (size_t i = tablet_schema.size() - 1; i >= 0; --i) { - if (tablet_schema[i].is_key) { + _key_column_num = schema.size(); + for (size_t i = schema.size() - 1; i >= 0; --i) { + const TabletColumn& column = schema[i]; + if (column.is_key()) { _key_column_num = i + 1; break; } @@ -78,17 +80,18 @@ OLAPStatus RowCursor::_init(const std::vector& tablet_schema, _fixed_len = 0; _variable_len = 0; for (auto cid : _columns) { - _field_array[cid] = Field::create(tablet_schema[cid]); - if (_field_array[cid] == NULL) { - OLAP_LOG_WARNING("Fail to create field."); + const TabletColumn& column = schema[cid]; + _field_array[cid] = Field::create(column); + if (_field_array[cid] == nullptr) { + LOG(WARNING) << "Fail to create field."; return OLAP_ERR_INIT_FAILED; } _fixed_len += field_buf_lens[cid] + 1; //1 for null byte - FieldType type = tablet_schema[cid].type; + FieldType type = column.type(); if (type == OLAP_FIELD_TYPE_VARCHAR) { - _variable_len += tablet_schema[cid].length - OLAP_STRING_MAX_BYTES; + _variable_len += column.length() - OLAP_STRING_MAX_BYTES; } else if (type == OLAP_FIELD_TYPE_CHAR) { - _variable_len += tablet_schema[cid].length; + _variable_len += column.length(); } else if (type == OLAP_FIELD_TYPE_HLL) { _variable_len += HLL_COLUMN_DEFAULT_LEN + sizeof(HllContext*); } @@ -97,7 +100,7 @@ OLAPStatus RowCursor::_init(const std::vector& tablet_schema, _fixed_buf = new (nothrow) char[_fixed_len]; if (_fixed_buf == nullptr) { - OLAP_LOG_WARNING("Fail to malloc _fixed_buf."); + LOG(WARNING) << "Fail to malloc _fixed_buf."; return OLAP_ERR_MALLOC_ERROR; } _owned_fixed_buf = _fixed_buf; @@ -105,9 +108,9 @@ OLAPStatus RowCursor::_init(const std::vector& tablet_schema, // we must make sure that the offset is the same with RowBlock's std::unordered_set column_set(_columns.begin(), _columns.end()); - _field_offsets.resize(tablet_schema.size(), -1); + _field_offsets.resize(schema.size(), -1); size_t offset = 0; - for (int cid = 0; cid < tablet_schema.size(); ++cid) { + for (int cid = 0; cid < schema.size(); ++cid) { if (column_set.find(cid) != std::end(column_set)) { _field_offsets[cid] = offset; _field_array[cid]->set_offset(offset); @@ -118,16 +121,35 @@ OLAPStatus RowCursor::_init(const std::vector& tablet_schema, return OLAP_SUCCESS; } -OLAPStatus RowCursor::init(const vector& tablet_schema) { - return init(tablet_schema, tablet_schema.size()); +OLAPStatus RowCursor::init(const TabletSchema& schema) { + return init(schema.columns(), schema.num_columns()); } -OLAPStatus RowCursor::init(const vector& tablet_schema, size_t column_count) { - if (column_count > tablet_schema.size()) { - OLAP_LOG_WARNING("input param are invalid. Column count is bigger than table schema size." - "[column_count=%lu tablet_schema.size=%lu]", - column_count, - tablet_schema.size()); +OLAPStatus RowCursor::init(const std::vector& schema) { + return init(schema, schema.size()); +} + +OLAPStatus RowCursor::init(const TabletSchema& schema, size_t column_count) { + if (column_count > schema.num_columns()) { + LOG(WARNING) << "Input param are invalid. Column count is bigger than num_columns of schema. " + << "column_count=" << column_count + << ", schema.num_columns=" << schema.num_columns(); + return OLAP_ERR_INPUT_PARAMETER_ERROR; + } + + std::vector columns; + for (size_t i = 0; i < column_count; ++i) { + columns.push_back(i); + } + RETURN_NOT_OK(_init(schema.columns(), columns)); + return OLAP_SUCCESS; +} + +OLAPStatus RowCursor::init(const std::vector& schema, size_t column_count) { + if (column_count > schema.size()) { + LOG(WARNING) << "Input param are invalid. Column count is bigger than num_columns of schema. " + << "column_count=" << column_count + << ", schema.num_columns=" << schema.size(); return OLAP_ERR_INPUT_PARAMETER_ERROR; } @@ -135,25 +157,23 @@ OLAPStatus RowCursor::init(const vector& tablet_schema, size_t column for (size_t i = 0; i < column_count; ++i) { columns.push_back(i); } - RETURN_NOT_OK(_init(tablet_schema, columns)); + RETURN_NOT_OK(_init(schema, columns)); return OLAP_SUCCESS; } -OLAPStatus RowCursor::init( - const vector& tablet_schema, - const vector& columns) { - RETURN_NOT_OK(_init(tablet_schema, columns)); +OLAPStatus RowCursor::init(const TabletSchema& schema, + const vector& columns) { + RETURN_NOT_OK(_init(schema.columns(), columns)); return OLAP_SUCCESS; } -OLAPStatus RowCursor::init_scan_key(const std::vector& tablet_schema, +OLAPStatus RowCursor::init_scan_key(const TabletSchema& schema, const std::vector& scan_keys) { size_t scan_key_size = scan_keys.size(); - if (scan_key_size > tablet_schema.size()) { - OLAP_LOG_WARNING("input param are invalid. Column count is bigger than table schema size." - "[column_count=%lu tablet_schema.size=%lu]", - scan_key_size , - tablet_schema.size()); + if (scan_key_size > schema.num_columns()) { + LOG(WARNING) << "Input param are invalid. Column count is bigger than num_columns of schema. " + << "column_count=" << scan_key_size + << ", schema.num_columns=" << schema.num_columns(); return OLAP_ERR_INPUT_PARAMETER_ERROR; } @@ -162,24 +182,25 @@ OLAPStatus RowCursor::init_scan_key(const std::vector& tablet_schema, columns.push_back(i); } - RETURN_NOT_OK(_init(tablet_schema, columns)); + RETURN_NOT_OK(_init(schema.columns(), columns)); // NOTE: cid equal with column index // Hyperloglog cannot be key, no need to handle it _variable_len = 0; for (auto cid : _columns) { - FieldType type = tablet_schema[cid].type; + const TabletColumn& column = schema.column(cid); + FieldType type = column.type(); if (type == OLAP_FIELD_TYPE_VARCHAR) { _variable_len += scan_keys[cid].length(); } else if (type == OLAP_FIELD_TYPE_CHAR) { _variable_len += std::max( - scan_keys[cid].length(), (size_t)(tablet_schema[cid].length)); + scan_keys[cid].length(), column.length()); } } // variable_len for null bytes - _variable_buf = new (nothrow) char[_variable_len]; - if (_variable_buf == NULL) { + _variable_buf = new(nothrow) char[_variable_len]; + if (_variable_buf == nullptr) { OLAP_LOG_WARNING("Fail to malloc _variable_buf."); return OLAP_ERR_MALLOC_ERROR; } @@ -187,8 +208,9 @@ OLAPStatus RowCursor::init_scan_key(const std::vector& tablet_schema, char* fixed_ptr = _fixed_buf; char* variable_ptr = _variable_buf; for (auto cid : _columns) { + const TabletColumn& column = schema.column(cid); fixed_ptr = _fixed_buf + _field_array[cid]->get_offset(); - FieldType type = tablet_schema[cid].type; + FieldType type = column.type(); if (type == OLAP_FIELD_TYPE_VARCHAR) { Slice* slice = reinterpret_cast(fixed_ptr + 1); slice->data = variable_ptr; @@ -197,7 +219,7 @@ OLAPStatus RowCursor::init_scan_key(const std::vector& tablet_schema, } else if (type == OLAP_FIELD_TYPE_CHAR) { Slice* slice = reinterpret_cast(fixed_ptr + 1); slice->data = variable_ptr; - slice->size = std::max(scan_keys[cid].length(), (size_t)(tablet_schema[cid].length)); + slice->size = std::max(scan_keys[cid].length(), column.length()); variable_ptr += slice->size; } } @@ -206,7 +228,7 @@ OLAPStatus RowCursor::init_scan_key(const std::vector& tablet_schema, } OLAPStatus RowCursor::allocate_memory_for_string_type( - const std::vector& tablet_schema, + const TabletSchema& schema, MemPool* mem_pool) { // allocate memory for string type(char, varchar, hll) // The memory allocated in this function is used in aggregate and copy function @@ -230,17 +252,18 @@ OLAPStatus RowCursor::allocate_memory_for_string_type( char* fixed_ptr = _fixed_buf; char* variable_ptr = _variable_buf; for (auto cid : _columns) { + const TabletColumn& column = schema.column(cid); fixed_ptr = _fixed_buf + _field_array[cid]->get_offset(); - FieldType type = tablet_schema[cid].type; + FieldType type = column.type(); if (type == OLAP_FIELD_TYPE_VARCHAR) { Slice* slice = reinterpret_cast(fixed_ptr + 1); slice->data = variable_ptr; - slice->size = tablet_schema[cid].length - OLAP_STRING_MAX_BYTES; + slice->size = column.length() - OLAP_STRING_MAX_BYTES; variable_ptr += slice->size; } else if (type == OLAP_FIELD_TYPE_CHAR) { Slice* slice = reinterpret_cast(fixed_ptr + 1); slice->data = variable_ptr; - slice->size = tablet_schema[cid].length; + slice->size = column.length(); variable_ptr += slice->size; } else if (type == OLAP_FIELD_TYPE_HLL) { Slice* slice = reinterpret_cast(fixed_ptr + 1); @@ -286,7 +309,7 @@ int RowCursor::cmp(const RowCursor& other) const { size_t common_prefix_count = min(_key_column_num, other._key_column_num); // 只有key column才会参与比较 for (size_t i = 0; i < common_prefix_count; ++i) { - if (_field_array[i] == NULL || other._field_array[i] == NULL) { + if (_field_array[i] == nullptr || other._field_array[i] == nullptr) { continue; } @@ -307,7 +330,7 @@ int RowCursor::index_cmp(const RowCursor& other) const { size_t common_prefix_count = min(_columns.size(), other._key_column_num); // 只有key column才会参与比较 for (size_t i = 0; i < common_prefix_count; ++i) { - if (_field_array[i] == NULL || other._field_array[i] == NULL) { + if (_field_array[i] == nullptr || other._field_array[i] == nullptr) { continue; } char* left = _field_array[i]->get_field_ptr(_fixed_buf); @@ -325,7 +348,7 @@ bool RowCursor::equal(const RowCursor& other) const { // 按field顺序从后往前比较,有利于尽快发现不同,提升比较性能 size_t common_prefix_count = min(_key_column_num, other._key_column_num); for (int i = common_prefix_count - 1; i >= 0; --i) { - if (_field_array[i] == NULL || other._field_array[i] == NULL) { + if (_field_array[i] == nullptr || other._field_array[i] == nullptr) { continue; } char* left = _field_array[i]->get_field_ptr(_fixed_buf); @@ -339,7 +362,7 @@ bool RowCursor::equal(const RowCursor& other) const { void RowCursor::finalize_one_merge() { for (size_t i = _key_column_num; i < _field_array.size(); ++i) { - if (_field_array[i] == NULL) { + if (_field_array[i] == nullptr) { continue; } char* dest = _field_array[i]->get_ptr(_fixed_buf); @@ -350,7 +373,7 @@ void RowCursor::finalize_one_merge() { void RowCursor::aggregate(const RowCursor& other) { // 只有value column才会参与aggregate for (size_t i = _key_column_num; i < _field_array.size(); ++i) { - if (_field_array[i] == NULL || other._field_array[i] == NULL) { + if (_field_array[i] == nullptr || other._field_array[i] == nullptr) { continue; } @@ -411,7 +434,7 @@ OlapTuple RowCursor::to_tuple() const { OlapTuple tuple; for (auto cid : _columns) { - if (_field_array[cid] != NULL) { + if (_field_array[cid] != nullptr) { Field* field = _field_array[cid]; char* src = field->get_ptr(_fixed_buf); if (field->is_null(_fixed_buf)) { @@ -458,7 +481,7 @@ string RowCursor::to_string(string sep) const { } Field* field = _field_array[cid]; - if (field != NULL) { + if (field != nullptr) { char* src = field->get_ptr(_fixed_buf); result.append(field->to_string(src)); } else { @@ -471,7 +494,7 @@ string RowCursor::to_string(string sep) const { OLAPStatus RowCursor::get_first_different_column_id(const RowCursor& other, size_t* first_diff_id) const { - if (first_diff_id == NULL) { + if (first_diff_id == nullptr) { OLAP_LOG_WARNING("input parameter 'first_diff_id' is NULL."); return OLAP_ERR_INPUT_PARAMETER_ERROR; } @@ -483,7 +506,7 @@ OLAPStatus RowCursor::get_first_different_column_id(const RowCursor& other, size_t i = 0; for (; i < _field_array.size(); ++i) { - if (_field_array[i] == NULL || other._field_array[i] == NULL) { + if (_field_array[i] == nullptr || other._field_array[i] == nullptr) { continue; } diff --git a/be/src/olap/row_cursor.h b/be/src/olap/row_cursor.h index 36769f68836084..3066a4213988f4 100644 --- a/be/src/olap/row_cursor.h +++ b/be/src/olap/row_cursor.h @@ -60,29 +60,28 @@ class RowCursor { ~RowCursor(); // 根据传入schema的创建RowCursor - OLAPStatus init(const std::vector& tablet_schema); + OLAPStatus init(const TabletSchema& schema); + OLAPStatus init(const std::vector& schema); // 根据传入schema的前n列创建RowCursor - OLAPStatus init(const std::vector& tablet_schema, + OLAPStatus init(const std::vector& schema, size_t column_count); + OLAPStatus init(const TabletSchema& schema, size_t column_count); // 根据传入schema和column id list创建RowCursor, // 用于计算过程只使用部分非前缀连续列的场景 - OLAPStatus init(const std::vector& tablet_schema, + OLAPStatus init(const TabletSchema& schema, const std::vector& columns); // 用传入的key的size来初始化 // 目前仅用在拆分key区间的时候 - OLAPStatus init_scan_key(const std::vector& tablet_schema, + OLAPStatus init_scan_key(const TabletSchema& schema, const std::vector& keys); - OLAPStatus init_scan_key(const std::vector& tablet_schema, - const std::vector& field_lengths); - //allocate memory for string type, which include char, varchar, hyperloglog - OLAPStatus allocate_memory_for_string_type(const std::vector& tablet_schema, + OLAPStatus allocate_memory_for_string_type(const TabletSchema& schema, MemPool* mem_pool = nullptr); - + // 两个RowCurosr做比较,返回-1,0,1 int cmp(const RowCursor& other) const; @@ -179,7 +178,7 @@ class RowCursor { inline uint32_t hash_code(uint32_t seed) const; private: // common init function - OLAPStatus _init(const std::vector& tablet_schema, + OLAPStatus _init(const std::vector& schema, const std::vector& columns); std::vector _field_array; // store point array of field diff --git a/be/src/olap/rowset/CMakeLists.txt b/be/src/olap/rowset/CMakeLists.txt new file mode 100644 index 00000000000000..494ad09118eedb --- /dev/null +++ b/be/src/olap/rowset/CMakeLists.txt @@ -0,0 +1,45 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +# where to put generated libraries +set(LIBRARY_OUTPUT_PATH "${BUILD_DIR}/src/olap/rowset") + +# where to put generated binaries +set(EXECUTABLE_OUTPUT_PATH "${BUILD_DIR}/src/olap/rowset") + +add_library(Rowset STATIC + bit_field_reader.cpp + bit_field_writer.cpp + column_data.cpp + column_reader.cpp + column_writer.cpp + column_data_writer.cpp + segment_group.cpp + run_length_byte_reader.cpp + run_length_byte_writer.cpp + run_length_integer_reader.cpp + run_length_integer_writer.cpp + segment_reader.cpp + segment_writer.cpp + rowset_id_generator.cpp + rowset_meta_manager.cpp + alpha_rowset.cpp + alpha_rowset_reader.cpp + alpha_rowset_writer.cpp + alpha_rowset_meta.cpp + rowset_id_generator.cpp +) diff --git a/be/src/olap/rowset/alpha_rowset.cpp b/be/src/olap/rowset/alpha_rowset.cpp new file mode 100644 index 00000000000000..b017ce48b6435c --- /dev/null +++ b/be/src/olap/rowset/alpha_rowset.cpp @@ -0,0 +1,523 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +#include "olap/rowset/alpha_rowset.h" +#include "olap/rowset/alpha_rowset_meta.h" +#include "olap/rowset/rowset_meta_manager.h" +#include "util/hash_util.hpp" + +namespace doris { + +AlphaRowset::AlphaRowset(const TabletSchema* schema, + const std::string rowset_path, + DataDir* data_dir, + RowsetMetaSharedPtr rowset_meta) + : _schema(schema), + _rowset_path(rowset_path), + _data_dir(data_dir), + _rowset_meta(rowset_meta), + _is_cumulative_rowset(false), + _is_pending_rowset(false) { + if (!_rowset_meta->has_version()) { + _is_pending_rowset = true; + } + if (!_is_pending_rowset) { + Version version = _rowset_meta->version(); + if (version.first == version.second) { + _is_cumulative_rowset = false; + } else { + _is_cumulative_rowset = true; + } + } + _ref_count = 0; +} + +OLAPStatus AlphaRowset::init() { + if (is_inited()) { + return OLAP_SUCCESS; + } + OLAPStatus status = _init_segment_groups(); + set_inited(true); + return status; +} + +OLAPStatus AlphaRowset::load() { + // load is depend on init, so that check if init here and do init if not + if (!is_inited()) { + OLAPStatus res = init(); + if (res != OLAP_SUCCESS) { + LOG(WARNING) << "failed to init rowset before load" + << " rowset id " << rowset_id(); + return res; + } + } + if (is_loaded()) { + return OLAP_SUCCESS; + } + for (auto& segment_group: _segment_groups) { + // validate segment group + if (segment_group->validate() != OLAP_SUCCESS) { + LOG(WARNING) << "fail to validate segment_group. [version="<< start_version() + << "-" << end_version() << " version_hash=" << version_hash(); + // if load segment group failed, rowset init failed + return OLAP_ERR_TABLE_INDEX_VALIDATE_ERROR; + } + OLAPStatus res = segment_group->load(); + if (res != OLAP_SUCCESS) { + LOG(WARNING) << "fail to load segment_group. res=" << res << ", " + << "version=" << start_version() << "-" + << end_version() << ", " + << "version_hash=" << version_hash(); + return res; + } + } + set_loaded(true); + return OLAP_SUCCESS; +} + +std::shared_ptr AlphaRowset::create_reader() { + if (!is_loaded()) { + OLAPStatus status = load(); + if (status != OLAP_SUCCESS) { + LOG(WARNING) << "alpha rowset load failed. rowset path:" << _rowset_path; + return nullptr; + } + set_loaded(true); + } + return std::shared_ptr(new AlphaRowsetReader( + _schema->num_rows_per_row_block(), shared_from_this())); +} + +OLAPStatus AlphaRowset::remove() { + LOG(INFO) << "begin to remove rowset: " << rowset_id(); + for (auto segment_group : _segment_groups) { + bool ret = segment_group->delete_all_files(); + if (!ret) { + LOG(WARNING) << "delete segment group files failed." + << " tablet id:" << segment_group->get_tablet_id() + << ", rowset path:" << segment_group->rowset_path_prefix(); + return OLAP_ERR_ROWSET_DELETE_SEGMENT_GROUP_FILE_FAILED; + } + } + return OLAP_SUCCESS; +} + +void AlphaRowset::to_rowset_pb(RowsetMetaPB* rs_meta) { + return _rowset_meta->to_rowset_pb(rs_meta); +} + +RowsetMetaSharedPtr AlphaRowset::rowset_meta() const { + return _rowset_meta; +} + +size_t AlphaRowset::data_disk_size() const { + return _rowset_meta->total_disk_size(); +} + +size_t AlphaRowset::index_disk_size() const { + return _rowset_meta->index_disk_size(); +} + +bool AlphaRowset::empty() const { + return _rowset_meta->empty(); +} + +bool AlphaRowset::zero_num_rows() const { + return _rowset_meta->num_rows() == 0; +} + +size_t AlphaRowset::num_rows() const { + return _rowset_meta->num_rows(); +} + +Version AlphaRowset::version() const { + return _rowset_meta->version(); +} + +void AlphaRowset::set_version_and_version_hash(Version version, VersionHash version_hash) { + _rowset_meta->set_version(version); + _rowset_meta->set_version_hash(version_hash); + // set the rowset state to VISIBLE + _rowset_meta->set_rowset_state(VISIBLE); + + if (rowset_meta()->has_delete_predicate()) { + rowset_meta()->mutable_delete_predicate()->set_version(version.first); + return; + } + + AlphaRowsetMetaSharedPtr alpha_rowset_meta = + std::dynamic_pointer_cast(_rowset_meta); + vector published_segment_groups; + alpha_rowset_meta->get_segment_groups(&published_segment_groups); + int32_t segment_group_idx = 0; + for (auto segment_group : _segment_groups) { + segment_group->set_version(version); + segment_group->set_version_hash(version_hash); + segment_group->set_pending_finished(); + published_segment_groups.at(segment_group_idx).clear_load_id(); + ++segment_group_idx; + } + alpha_rowset_meta->clear_segment_group(); + for (auto& segment_group_meta : published_segment_groups) { + alpha_rowset_meta->add_segment_group(segment_group_meta); + } + + _is_pending_rowset = false; +} + +int64_t AlphaRowset::start_version() const { + return _rowset_meta->version().first; +} + +int64_t AlphaRowset::end_version() const { + return _rowset_meta->version().second; +} + +VersionHash AlphaRowset::version_hash() const { + return _rowset_meta->version_hash(); +} + +bool AlphaRowset::in_use() const { + return _ref_count > 0; +} + +void AlphaRowset::acquire() { + atomic_inc(&_ref_count); +} + +void AlphaRowset::release() { + atomic_dec(&_ref_count); +} + +int64_t AlphaRowset::ref_count() const { + return _ref_count; +} + +OLAPStatus AlphaRowset::make_snapshot(const std::string& snapshot_path, + std::vector* success_links) { + for (auto& segment_group : _segment_groups) { + OLAPStatus status = segment_group->make_snapshot(snapshot_path, success_links); + if (status != OLAP_SUCCESS) { + LOG(WARNING) << "create hard links failed for segment group:" + << segment_group->segment_group_id(); + return status; + } + } + return OLAP_SUCCESS; +} + +OLAPStatus AlphaRowset::copy_files_to_path(const std::string& dest_path, + std::vector* success_files) { + for (auto& segment_group : _segment_groups) { + OLAPStatus status = segment_group->copy_files_to_path(dest_path, success_files); + if (status != OLAP_SUCCESS) { + LOG(WARNING) << "copy files failed for segment group." + << " segment_group_id:" << segment_group->segment_group_id() + << ", dest_path:" << dest_path; + return status; + } + } + return OLAP_SUCCESS; +} + +OLAPStatus AlphaRowset::convert_from_old_files(const std::string& snapshot_path, + std::vector* success_files) { + for (auto& segment_group : _segment_groups) { + OLAPStatus status = segment_group->convert_from_old_files(snapshot_path, success_files); + if (status != OLAP_SUCCESS) { + LOG(WARNING) << "create hard links failed for segment group:" + << segment_group->segment_group_id(); + return status; + } + } + return OLAP_SUCCESS; +} + +OLAPStatus AlphaRowset::convert_to_old_files(const std::string& snapshot_path, + std::vector* success_files) { + for (auto& segment_group : _segment_groups) { + OLAPStatus status = segment_group->convert_to_old_files(snapshot_path, success_files); + if (status != OLAP_SUCCESS) { + LOG(WARNING) << "create hard links failed for segment group:" + << segment_group->segment_group_id(); + return status; + } + } + return OLAP_SUCCESS; +} + +OLAPStatus AlphaRowset::remove_old_files(std::vector* files_to_remove) { + for (auto& segment_group : _segment_groups) { + OLAPStatus status = segment_group->remove_old_files(files_to_remove); + if (status != OLAP_SUCCESS) { + LOG(WARNING) << "remove old files failed for segment group:" + << segment_group->segment_group_id(); + return status; + } + } + return OLAP_SUCCESS; +} + +RowsetId AlphaRowset::rowset_id() const { + return _rowset_meta->rowset_id(); +} + +int64_t AlphaRowset::creation_time() { + return _rowset_meta->creation_time(); +} + +bool AlphaRowset::is_pending() const { + return _is_pending_rowset; +} + +PUniqueId AlphaRowset::load_id() const { + return _rowset_meta->load_id(); +} + +int64_t AlphaRowset::txn_id() const { + return _rowset_meta->txn_id(); +} + +int64_t AlphaRowset::partition_id() const { + return _rowset_meta->partition_id(); +} + +bool AlphaRowset::delete_flag() { + return _rowset_meta->delete_flag(); +} + +OLAPStatus AlphaRowset::split_range( + const RowCursor& start_key, + const RowCursor& end_key, + uint64_t request_block_row_count, + vector* ranges) { + EntrySlice entry; + RowBlockPosition start_pos; + RowBlockPosition end_pos; + RowBlockPosition step_pos; + + std::shared_ptr largest_segment_group = _segment_group_with_largest_size(); + if (largest_segment_group == nullptr) { + ranges->emplace_back(start_key.to_tuple()); + ranges->emplace_back(end_key.to_tuple()); + return OLAP_SUCCESS; + } + uint64_t expected_rows = request_block_row_count + / largest_segment_group->current_num_rows_per_row_block(); + if (expected_rows == 0) { + LOG(WARNING) << "expected_rows less than 1. [request_block_row_count = " + << request_block_row_count << "]"; + return OLAP_ERR_TABLE_NOT_FOUND; + } + + // 找到startkey对应的起始位置 + RowCursor helper_cursor; + if (helper_cursor.init(*_schema, _schema->num_short_key_columns()) != OLAP_SUCCESS) { + LOG(WARNING) << "fail to parse strings to key with RowCursor type."; + return OLAP_ERR_INVALID_SCHEMA; + } + if (largest_segment_group->find_short_key(start_key, &helper_cursor, false, &start_pos) != OLAP_SUCCESS) { + if (largest_segment_group->find_first_row_block(&start_pos) != OLAP_SUCCESS) { + LOG(WARNING) << "fail to get first block pos"; + return OLAP_ERR_TABLE_INDEX_FIND_ERROR; + } + } + + step_pos = start_pos; + VLOG(3) << "start_pos=" << start_pos.segment << ", " << start_pos.index_offset; + + //find last row_block is end_key is given, or using last_row_block + if (largest_segment_group->find_short_key(end_key, &helper_cursor, false, &end_pos) != OLAP_SUCCESS) { + if (largest_segment_group->find_last_row_block(&end_pos) != OLAP_SUCCESS) { + LOG(WARNING) << "fail find last row block."; + return OLAP_ERR_TABLE_INDEX_FIND_ERROR; + } + } + + VLOG(3) << "end_pos=" << end_pos.segment << ", " << end_pos.index_offset; + + //get rows between first and last + OLAPStatus res = OLAP_SUCCESS; + RowCursor cur_start_key; + RowCursor last_start_key; + + if (cur_start_key.init(*_schema, _schema->num_short_key_columns()) != OLAP_SUCCESS + || last_start_key.init(*_schema, _schema->num_short_key_columns()) != OLAP_SUCCESS) { + LOG(WARNING) << "fail to init cursor"; + return OLAP_ERR_INIT_FAILED; + } + + if (largest_segment_group->get_row_block_entry(start_pos, &entry) != OLAP_SUCCESS) { + LOG(WARNING) << "get block entry failed."; + return OLAP_ERR_ROWBLOCK_FIND_ROW_EXCEPTION; + } + + cur_start_key.attach(entry.data); + last_start_key.allocate_memory_for_string_type(*_schema); + last_start_key.copy_without_pool(cur_start_key); + // start_key是last start_key, 但返回的实际上是查询层给出的key + ranges->emplace_back(start_key.to_tuple()); + + while (end_pos > step_pos) { + res = largest_segment_group->advance_row_block(expected_rows, &step_pos); + if (res == OLAP_ERR_INDEX_EOF || !(end_pos > step_pos)) { + break; + } else if (res != OLAP_SUCCESS) { + LOG(WARNING) << "advance_row_block failed."; + return OLAP_ERR_ROWBLOCK_FIND_ROW_EXCEPTION; + } + + if (largest_segment_group->get_row_block_entry(step_pos, &entry) != OLAP_SUCCESS) { + LOG(WARNING) << "get block entry failed."; + return OLAP_ERR_ROWBLOCK_FIND_ROW_EXCEPTION; + } + cur_start_key.attach(entry.data); + + if (cur_start_key.cmp(last_start_key) != 0) { + ranges->emplace_back(cur_start_key.to_tuple()); // end of last section + ranges->emplace_back(cur_start_key.to_tuple()); // start a new section + last_start_key.copy_without_pool(cur_start_key); + } + } + + ranges->emplace_back(end_key.to_tuple()); + return OLAP_SUCCESS; +} + +bool AlphaRowset::check_path(const std::string& path) { + std::set valid_paths; + for (auto segment_group : _segment_groups) { + for (int i = 0; i < segment_group->num_segments(); ++i) { + std::string data_path = segment_group->construct_data_file_path(i); + std::string index_path = segment_group->construct_index_file_path(i); + valid_paths.insert(data_path); + valid_paths.insert(index_path); + } + } + return valid_paths.find(path) != valid_paths.end(); +} + +OLAPStatus AlphaRowset::_init_segment_groups() { + std::vector segment_group_metas; + AlphaRowsetMetaSharedPtr _alpha_rowset_meta = std::dynamic_pointer_cast(_rowset_meta); + _alpha_rowset_meta->get_segment_groups(&segment_group_metas); + for (auto& segment_group_meta : segment_group_metas) { + std::shared_ptr segment_group; + if (_is_pending_rowset) { + segment_group.reset(new SegmentGroup(_rowset_meta->tablet_id(), + _rowset_meta->rowset_id(), _schema, _rowset_path, false, segment_group_meta.segment_group_id(), + segment_group_meta.num_segments(), true, + _rowset_meta->partition_id(), _rowset_meta->txn_id())); + } else { + segment_group.reset(new SegmentGroup(_rowset_meta->tablet_id(), + _rowset_meta->rowset_id(), _schema, _rowset_path, + _rowset_meta->version(), _rowset_meta->version_hash(), + false, segment_group_meta.segment_group_id(), segment_group_meta.num_segments())); + } + if (segment_group == nullptr) { + LOG(WARNING) << "fail to create olap segment_group. rowset_id='" << _rowset_meta->rowset_id(); + return OLAP_ERR_CREATE_FILE_ERROR; + } + if (segment_group_meta.has_empty()) { + segment_group->set_empty(segment_group_meta.empty()); + } + + if (segment_group_meta.zone_maps_size() != 0) { + size_t zone_maps_size = segment_group_meta.zone_maps_size(); + size_t num_key_columns = _schema->num_key_columns(); + if (num_key_columns != zone_maps_size) { + LOG(ERROR) << "column pruning size is error." + << "zone_maps_size=" << zone_maps_size << ", " + << "num_key_columns=" << _schema->num_key_columns(); + return OLAP_ERR_TABLE_INDEX_VALIDATE_ERROR; + } + std::vector> zone_map_strings(num_key_columns); + std::vector null_vec(num_key_columns); + for (size_t j = 0; j < num_key_columns; ++j) { + const ZoneMap& zone_map = segment_group_meta.zone_maps(j); + zone_map_strings[j].first = zone_map.min(); + zone_map_strings[j].second = zone_map.max(); + if (zone_map.has_null_flag()) { + null_vec[j] = zone_map.null_flag(); + } else { + null_vec[j] = false; + } + } + OLAPStatus status = segment_group->add_zone_maps(zone_map_strings, null_vec); + if (status != OLAP_SUCCESS) { + LOG(WARNING) << "segment group add column statistics failed, status:" << status; + return status; + } + } + _segment_groups.push_back(segment_group); + } + if (_is_cumulative_rowset && _segment_groups.size() > 1) { + LOG(WARNING) << "invalid segment group meta for cumulative rowset. segment group size:" + << _segment_groups.size(); + return OLAP_ERR_ENGINE_LOAD_INDEX_TABLE_ERROR; + } + return OLAP_SUCCESS; +} + +std::shared_ptr AlphaRowset::_segment_group_with_largest_size() { + std::shared_ptr largest_segment_group = nullptr; + size_t largest_segment_group_sizes = 0; + + for (auto segment_group : _segment_groups) { + if (segment_group->empty() || segment_group->zero_num_rows()) { + continue; + } + if (segment_group->index_size() > largest_segment_group_sizes) { + largest_segment_group = segment_group; + largest_segment_group_sizes = segment_group->index_size(); + } + } + return largest_segment_group; +} + +OLAPStatus AlphaRowset::reset_sizeinfo() { + if (!is_loaded()) { + RETURN_NOT_OK(load()); + } + std::vector segment_group_metas; + AlphaRowsetMetaSharedPtr alpha_rowset_meta = std::dynamic_pointer_cast(_rowset_meta); + alpha_rowset_meta->get_segment_groups(&segment_group_metas); + int32_t segment_group_idx = 0; + for (auto segment_group : _segment_groups) { + alpha_rowset_meta->set_data_disk_size(alpha_rowset_meta->data_disk_size() + segment_group->data_size()); + alpha_rowset_meta->set_index_disk_size(alpha_rowset_meta->index_disk_size() + segment_group->index_size()); + alpha_rowset_meta->set_total_disk_size(alpha_rowset_meta->total_disk_size() + + segment_group->index_size() + segment_group->data_size()); + alpha_rowset_meta->set_num_rows(alpha_rowset_meta->num_rows() + segment_group->num_rows()); + segment_group_metas.at(segment_group_idx).set_index_size(segment_group->index_size()); + segment_group_metas.at(segment_group_idx).set_data_size(segment_group->data_size()); + segment_group_metas.at(segment_group_idx).set_num_rows(segment_group->num_rows()); + ++segment_group_idx; + } + alpha_rowset_meta->clear_segment_group(); + for (auto& segment_group_meta : segment_group_metas) { + alpha_rowset_meta->add_segment_group(segment_group_meta); + } + return OLAP_SUCCESS; +} + +std::string AlphaRowset::unique_id() { + // rowset path + rowset_id is unique for a rowset + return _rowset_path + "/" + std::to_string(rowset_id()); +} + +} // namespace doris diff --git a/be/src/olap/rowset/alpha_rowset.h b/be/src/olap/rowset/alpha_rowset.h new file mode 100644 index 00000000000000..8a65b66918974d --- /dev/null +++ b/be/src/olap/rowset/alpha_rowset.h @@ -0,0 +1,149 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +#ifndef DORIS_BE_SRC_OLAP_ROWSET_ALPHA_ROWSET_H +#define DORIS_BE_SRC_OLAP_ROWSET_ALPHA_ROWSET_H + +#include "olap/rowset/rowset.h" +#include "olap/rowset/segment_group.h" +#include "olap/rowset/alpha_rowset_reader.h" +#include "olap/rowset/alpha_rowset_writer.h" +#include "olap/rowset/rowset_meta.h" +#include "olap/data_dir.h" +#include "olap/tuple.h" + +#include +#include + +namespace doris { + +class AlphaRowset; +using AlphaRowsetSharedPtr = std::shared_ptr; + +class AlphaRowset : public Rowset { +public: + AlphaRowset(const TabletSchema* schema, const std::string rowset_path, + DataDir* data_dir, RowsetMetaSharedPtr rowset_meta); + virtual ~AlphaRowset() {} + + static bool is_valid_rowset_path(std::string path); + + OLAPStatus init() override; + + // this api is for lazy loading data + // always means that there are some io + OLAPStatus load() override; + + std::shared_ptr create_reader() override; + + OLAPStatus remove() override; + + void to_rowset_pb(RowsetMetaPB* rs_meta) override; + + RowsetMetaSharedPtr rowset_meta() const override; + + size_t data_disk_size() const override; + + size_t index_disk_size() const override; + + bool empty() const override; + + bool zero_num_rows() const override; + + size_t num_rows() const override; + + Version version() const override; + + void set_version_and_version_hash(Version version, VersionHash version_hash) override; + + int64_t end_version() const override; + + int64_t start_version() const override; + + VersionHash version_hash() const override; + + bool in_use() const override; + + void acquire() override; + + void release() override; + + int64_t ref_count() const override; + + OLAPStatus make_snapshot(const std::string& snapshot_path, + std::vector* success_links) override; + OLAPStatus copy_files_to_path(const std::string& dest_path, + std::vector* success_files) override; + + OLAPStatus convert_from_old_files(const std::string& snapshot_path, + std::vector* success_files); + + OLAPStatus convert_to_old_files(const std::string& snapshot_path, + std::vector* success_files); + + OLAPStatus remove_old_files(std::vector* files_to_remove) override; + + RowsetId rowset_id() const override; + + int64_t creation_time() override; + + bool is_pending() const override; + + PUniqueId load_id() const override; + + int64_t txn_id() const override; + + int64_t partition_id() const override; + + // flag for push delete rowset + bool delete_flag() override; + + OLAPStatus split_range( + const RowCursor& start_key, + const RowCursor& end_key, + uint64_t request_block_row_count, + vector* ranges); + + bool check_path(const std::string& path) override; + + // when convert from old be, should set row num, index size, data size + // info by using segment's info + OLAPStatus reset_sizeinfo(); + + std::string unique_id() override; + +private: + OLAPStatus _init_segment_groups(); + + std::shared_ptr _segment_group_with_largest_size(); + +private: + friend class AlphaRowsetWriter; + friend class AlphaRowsetReader; + const TabletSchema* _schema; + std::string _rowset_path; + DataDir* _data_dir; + RowsetMetaSharedPtr _rowset_meta; + std::vector> _segment_groups; + bool _is_cumulative_rowset; + bool _is_pending_rowset; + atomic_t _ref_count; +}; + +} // namespace doris + +#endif // DORIS_BE_SRC_OLAP_ROWSET_ALPHA_ROWSET_H diff --git a/be/src/olap/rowset/alpha_rowset_meta.cpp b/be/src/olap/rowset/alpha_rowset_meta.cpp new file mode 100644 index 00000000000000..e54936241a7e93 --- /dev/null +++ b/be/src/olap/rowset/alpha_rowset_meta.cpp @@ -0,0 +1,48 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +#include "olap/rowset/alpha_rowset_meta.h" + +#include "common/logging.h" + +namespace doris { + +void AlphaRowsetMeta::get_segment_groups(std::vector* segment_groups) { + if (!_has_alpha_rowset_extra_meta_pb()) { + return; + } + const AlphaRowsetExtraMetaPB& alpha_rowset_extra_meta_pb = _alpha_rowset_extra_meta_pb(); + for (auto& segment_group : alpha_rowset_extra_meta_pb.segment_groups()) { + segment_groups->push_back(segment_group); + } +} + +void AlphaRowsetMeta::add_segment_group(const SegmentGroupPB& segment_group) { + AlphaRowsetExtraMetaPB* alpha_rowset_extra_meta_pb = _mutable_alpha_rowset_extra_meta_pb(); + SegmentGroupPB* new_segment_group = alpha_rowset_extra_meta_pb->add_segment_groups(); + *new_segment_group = segment_group; +} + +void AlphaRowsetMeta::clear_segment_group() { + if (!_has_alpha_rowset_extra_meta_pb()) { + return; + } + AlphaRowsetExtraMetaPB* alpha_rowset_extra_meta_pb = _mutable_alpha_rowset_extra_meta_pb(); + alpha_rowset_extra_meta_pb->clear_segment_groups(); +} + +} // namespace doris \ No newline at end of file diff --git a/be/src/olap/rowset/alpha_rowset_meta.h b/be/src/olap/rowset/alpha_rowset_meta.h new file mode 100644 index 00000000000000..455fb1ff627e95 --- /dev/null +++ b/be/src/olap/rowset/alpha_rowset_meta.h @@ -0,0 +1,44 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +#ifndef DORIS_BE_SRC_OLAP_ROWSET_ALPHA_ROWSET_META_H +#define DORIS_BE_SRC_OLAP_ROWSET_ALPHA_ROWSET_META_H + +#include "olap/rowset/rowset_meta.h" + +#include +#include +#include + +namespace doris { + +class AlphaRowsetMeta; +using AlphaRowsetMetaSharedPtr = std::shared_ptr; + +class AlphaRowsetMeta : public RowsetMeta { +public: + + void get_segment_groups(std::vector* segment_groups); + + void add_segment_group(const SegmentGroupPB& segment_group); + + void clear_segment_group(); +}; + +} + +#endif // DORIS_BE_SRC_OLAP_ROWSET_ALPHA_ROWSET_META_H diff --git a/be/src/olap/rowset/alpha_rowset_reader.cpp b/be/src/olap/rowset/alpha_rowset_reader.cpp new file mode 100644 index 00000000000000..4d67284caf3b24 --- /dev/null +++ b/be/src/olap/rowset/alpha_rowset_reader.cpp @@ -0,0 +1,346 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +#include "olap/rowset/alpha_rowset_reader.h" +#include "olap/rowset/alpha_rowset.h" + +namespace doris { + +AlphaRowsetReader::AlphaRowsetReader( + int num_rows_per_row_block, + RowsetSharedPtr rowset) + : _num_rows_per_row_block(num_rows_per_row_block), + _rowset(rowset), + _alpha_rowset_meta(nullptr), + _segment_groups(std::dynamic_pointer_cast(rowset)->_segment_groups), + _key_range_size(0) { + RowsetMetaSharedPtr rowset_meta_ptr = (std::dynamic_pointer_cast(rowset)->_rowset_meta); + _alpha_rowset_meta = reinterpret_cast(rowset_meta_ptr.get()); +} + +AlphaRowsetReader::~AlphaRowsetReader() { + delete _dst_cursor; +} + +OLAPStatus AlphaRowsetReader::init(RowsetReaderContext* read_context) { + if (read_context == nullptr) { + return OLAP_ERR_INIT_FAILED; + } + _current_read_context = read_context; + if (_current_read_context->stats != nullptr) { + _stats = _current_read_context->stats; + } + + Version version = _alpha_rowset_meta->version(); + _is_singleton_rowset = (version.first == version.second); + _ordinal = 0; + bool merge = false; + /* + * For singleton rowset, there exists three situations. + * 1. QUERY task will set preaggregation. + * If preaggregation is set to be true + * there is not necessary to merge row in advance. + * 2. QEURY task for DUP_KEYS tablet has no necessities + * to merge row in advance. + * 2. COMPACTION/CHECKSUM/ALTER_TABLET task should merge + * row in advance. + * For cumulative rowset, there are no necessities to merge row in advance. + */ + RETURN_NOT_OK(_init_merge_ctxs(read_context)); + if (_is_singleton_rowset && _merge_ctxs.size() > 1) { + if (_current_read_context->reader_type == READER_QUERY + && _current_read_context->preaggregation) { + // 1. QUERY task which set pregaggregation to be true + _next_block = &AlphaRowsetReader::_union_block; + } else if (_current_read_context->reader_type == READER_QUERY + && _current_read_context->tablet_schema->keys_type() == DUP_KEYS) { + // 2. QUERY task for DUP_KEYS tablet + _next_block = &AlphaRowsetReader::_union_block; + } else { + // 3. COMPACTION/CHECKSUM/ALTER_TABLET task + _next_block = &AlphaRowsetReader::_merge_block; + merge = true; + } + } else { + // query task to scan cumulative rowset + _next_block = &AlphaRowsetReader::_union_block; + } + + if (merge) { + _read_block.reset(new (std::nothrow) RowBlock(_current_read_context->tablet_schema)); + if (_read_block == nullptr) { + LOG(WARNING) << "new row block failed in reader"; + return OLAP_ERR_MALLOC_ERROR; + } + RowBlockInfo block_info; + block_info.row_num = _current_read_context->tablet_schema->num_rows_per_row_block(); + block_info.null_supported = true; + _read_block->init(block_info); + _dst_cursor = new (std::nothrow) RowCursor(); + if (_dst_cursor == nullptr) { + LOG(WARNING) << "allocate memory for row cursor failed"; + return OLAP_ERR_MALLOC_ERROR; + } + _dst_cursor->init(*(_current_read_context->tablet_schema), + *(_current_read_context->seek_columns)); + for (size_t i = 0; i < _merge_ctxs.size(); ++i) { + _merge_ctxs[i].row_cursor.reset(new (std::nothrow) RowCursor()); + _merge_ctxs[i].row_cursor->init(*(_current_read_context->tablet_schema), + *(_current_read_context->seek_columns)); + } + } + return OLAP_SUCCESS; +} + +OLAPStatus AlphaRowsetReader::next_block(RowBlock** block) { + return (this->*_next_block)(block); +} + +bool AlphaRowsetReader::delete_flag() { + return _alpha_rowset_meta->delete_flag(); +} + +Version AlphaRowsetReader::version() { + return _alpha_rowset_meta->version(); +} + +VersionHash AlphaRowsetReader::version_hash() { + return _alpha_rowset_meta->version_hash(); +} + +void AlphaRowsetReader::close() { + _merge_ctxs.clear(); +} + +int64_t AlphaRowsetReader::filtered_rows() { + return _stats->rows_del_filtered; +} + +OLAPStatus AlphaRowsetReader::_union_block(RowBlock** block) { + while (_ordinal < _merge_ctxs.size()) { + // union block only use one block to store + OLAPStatus status = _pull_next_block(&(_merge_ctxs[_ordinal])); + if (status == OLAP_ERR_DATA_EOF) { + _ordinal++; + continue; + } else if (status != OLAP_SUCCESS) { + return status; + } else { + (*block) = _merge_ctxs[_ordinal].row_block; + return OLAP_SUCCESS; + } + } + if (_ordinal == _merge_ctxs.size()) { + *block = nullptr; + return OLAP_ERR_DATA_EOF; + } + + return OLAP_SUCCESS; +} + +OLAPStatus AlphaRowsetReader::_merge_block(RowBlock** block) { + // Row among different segment groups may overlap with each other. + // Iterate all row_blocks to fetch min row each round. + OLAPStatus status = OLAP_SUCCESS; + _read_block->clear(); + size_t num_rows_in_block = 0; + while (_read_block->pos() < _num_rows_per_row_block) { + RowCursor* row_cursor = nullptr; + status = _pull_next_row_for_merge_rowset(&row_cursor); + if (status == OLAP_ERR_DATA_EOF && _read_block->pos() > 0) { + status = OLAP_SUCCESS; + break; + } else if (status != OLAP_SUCCESS) { + return status; + } + _read_block->get_row(_read_block->pos(), _dst_cursor); + _dst_cursor->copy(*row_cursor, _read_block->mem_pool()); + _read_block->pos_inc(); + num_rows_in_block++; + } + _read_block->set_pos(0); + _read_block->set_limit(num_rows_in_block); + _read_block->finalize(num_rows_in_block); + *block = _read_block.get(); + return status; +} + +OLAPStatus AlphaRowsetReader::_pull_next_row_for_merge_rowset(RowCursor** row) { + RowCursor* min_row = nullptr; + int min_index = -1; + + size_t ordinal = 0; + while (ordinal < _merge_ctxs.size()) { + MergeContext* merge_ctx = &(_merge_ctxs[ordinal]); + if (merge_ctx->row_block == nullptr || !merge_ctx->row_block->has_remaining()) { + OLAPStatus status = _pull_next_block(merge_ctx); + if (status == OLAP_ERR_DATA_EOF) { + _merge_ctxs.erase(_merge_ctxs.begin() + ordinal); + continue; + } else if (status != OLAP_SUCCESS) { + LOG(WARNING) << "read next row of singleton rowset failed:" << status; + return status; + } + } + RowCursor* current_row = merge_ctx->row_cursor.get(); + merge_ctx->row_block->get_row(merge_ctx->row_block->pos(), current_row); + if (min_row == nullptr || min_row->cmp(*current_row) > 0) { + min_row = current_row; + min_index = ordinal; + } + ordinal++; + } + if (min_row == nullptr || min_index == -1) { + return OLAP_ERR_DATA_EOF; + } + *row = min_row; + _merge_ctxs[min_index].row_block->pos_inc(); + return OLAP_SUCCESS; +} + +OLAPStatus AlphaRowsetReader::_pull_next_block(MergeContext* merge_ctx) { + OLAPStatus status = OLAP_SUCCESS; + if (OLAP_UNLIKELY(merge_ctx->first_read_symbol)) { + if (_key_range_size > 0) { + status = _pull_first_block(merge_ctx); + } else { + status = merge_ctx->column_data->get_first_row_block(&(merge_ctx->row_block)); + if (status != OLAP_SUCCESS && status != OLAP_ERR_DATA_EOF) { + LOG(WARNING) << "get first row block failed, status:" << status; + } + } + merge_ctx->first_read_symbol = false; + return status; + } else { + // get next block + status = merge_ctx->column_data->get_next_block(&(merge_ctx->row_block)); + if (status == OLAP_ERR_DATA_EOF && _key_range_size > 0) { + // reach the end of one predicate + // currently, SegmentReader can only support filter one key range a time + // refresh the predicate and continue read + return _pull_first_block(merge_ctx); + } + } + return status; +} + +OLAPStatus AlphaRowsetReader::_pull_first_block(MergeContext* merge_ctx) { + OLAPStatus status = OLAP_SUCCESS; + merge_ctx->key_range_index++; + while (merge_ctx->key_range_index < _key_range_size) { + status = merge_ctx->column_data->prepare_block_read( + _current_read_context->lower_bound_keys->at(merge_ctx->key_range_index), + _current_read_context->is_lower_keys_included->at(merge_ctx->key_range_index), + _current_read_context->upper_bound_keys->at(merge_ctx->key_range_index), + _current_read_context->is_upper_keys_included->at(merge_ctx->key_range_index), + &(merge_ctx->row_block)); + if (status == OLAP_ERR_DATA_EOF) { + merge_ctx->key_range_index++; + continue; + } else if (status != OLAP_SUCCESS) { + LOG(WARNING) << "prepare block read failed. status=" << status; + return status; + } else { + break; + } + } + if (merge_ctx->key_range_index >= _key_range_size) { + merge_ctx->row_block = nullptr; + return OLAP_ERR_DATA_EOF; + } + return status; +} + +OLAPStatus AlphaRowsetReader::_init_merge_ctxs(RowsetReaderContext* read_context) { + if (read_context->reader_type == READER_QUERY) { + if (read_context->lower_bound_keys->size() != read_context->is_lower_keys_included->size() + || read_context->lower_bound_keys->size() != read_context->upper_bound_keys->size() + || read_context->upper_bound_keys->size() != read_context->is_upper_keys_included->size()) { + std::string error_msg = "invalid key range arguments"; + LOG(WARNING) << error_msg; + return OLAP_ERR_INPUT_PARAMETER_ERROR; + } + _key_range_size = read_context->lower_bound_keys->size(); + } + + for (auto& segment_group : _segment_groups) { + std::unique_ptr new_column_data(ColumnData::create(segment_group.get())); + OLAPStatus status = new_column_data->init(); + if (status != OLAP_SUCCESS) { + LOG(WARNING) << "init column data failed"; + return OLAP_ERR_READER_READING_ERROR; + } + new_column_data->set_delete_handler(read_context->delete_handler); + new_column_data->set_stats(_stats); + new_column_data->set_lru_cache(read_context->lru_cache); + if (read_context->reader_type == READER_ALTER_TABLE) { + new_column_data->schema_change_init(); + new_column_data->set_using_cache(read_context->is_using_cache); + if (new_column_data->empty() && new_column_data->zero_num_rows()) { + continue; + } + } else { + new_column_data->set_read_params(*read_context->return_columns, + *read_context->seek_columns, + *read_context->load_bf_columns, + *read_context->conditions, + *read_context->predicates, + read_context->is_using_cache, + read_context->runtime_state); + // filter + if (new_column_data->rowset_pruning_filter()) { + _stats->rows_stats_filtered += new_column_data->num_rows(); + VLOG(3) << "filter segment group in query in condition. version=" + << new_column_data->version().first + << "-" << new_column_data->version().second; + continue; + } + } + + int ret = new_column_data->delete_pruning_filter(); + if (ret == DEL_SATISFIED) { + _stats->rows_del_filtered += new_column_data->num_rows(); + VLOG(3) << "filter segment group in delete predicate:" + << new_column_data->version().first << ", " << new_column_data->version().second; + continue; + } else if (ret == DEL_PARTIAL_SATISFIED) { + VLOG(3) << "filter segment group partially in delete predicate:" + << new_column_data->version().first << ", " << new_column_data->version().second; + new_column_data->set_delete_status(DEL_PARTIAL_SATISFIED); + } else { + VLOG(3) << "not filter segment group in delete predicate:" + << new_column_data->version().first << ", " << new_column_data->version().second; + new_column_data->set_delete_status(DEL_NOT_SATISFIED); + } + MergeContext merge_ctx; + merge_ctx.column_data = std::move(new_column_data); + _merge_ctxs.emplace_back(std::move(merge_ctx)); + } + + if (!_is_singleton_rowset && _merge_ctxs.size() > 1) { + LOG(WARNING) << "invalid column_datas for cumulative rowset. column_datas size:" + << _merge_ctxs.size(); + return OLAP_ERR_READER_READING_ERROR; + } + return OLAP_SUCCESS; +} + +RowsetSharedPtr AlphaRowsetReader::rowset() { + return _rowset; +} + +} // namespace doris diff --git a/be/src/olap/rowset/alpha_rowset_reader.h b/be/src/olap/rowset/alpha_rowset_reader.h new file mode 100644 index 00000000000000..444db67e629fcf --- /dev/null +++ b/be/src/olap/rowset/alpha_rowset_reader.h @@ -0,0 +1,117 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +#ifndef DORIS_BE_SRC_OLAP_ROWSET_ALPHA_ROWSET_READER_H +#define DORIS_BE_SRC_OLAP_ROWSET_ALPHA_ROWSET_READER_H + +#include "olap/rowset/rowset_reader.h" +#include "olap/rowset/segment_group.h" +#include "olap/rowset/column_data.h" +#include "olap/rowset/alpha_rowset_meta.h" + +#include + +namespace doris { + +struct MergeContext { + std::unique_ptr column_data = nullptr; + + int key_range_index = -1; + + // Read data from ColumnData for the first time. + // ScanKey should be sought in this case. + bool first_read_symbol = true; + + // For singleton Rowset, there are several SegmentGroups + // Each of SegmentGroups correponds to a row_block upon scan + RowBlock* row_block = nullptr; + + // For singleton Rowset, there are several SegmentGroups + // Each of SegmentGroups correponds to a row_cursor + std::unique_ptr row_cursor = nullptr; +}; + +class AlphaRowsetReader : public RowsetReader { +public: + AlphaRowsetReader(int num_rows_per_row_block, RowsetSharedPtr rowset); + + ~AlphaRowsetReader(); + + // reader init + virtual OLAPStatus init(RowsetReaderContext* read_context); + + // read next block data + virtual OLAPStatus next_block(RowBlock** block); + + virtual bool delete_flag(); + + virtual Version version(); + + virtual VersionHash version_hash(); + + // close reader + virtual void close(); + + virtual RowsetSharedPtr rowset(); + + virtual int64_t filtered_rows(); + +private: + + OLAPStatus _init_merge_ctxs(RowsetReaderContext* read_context); + + OLAPStatus _union_block(RowBlock** block); + OLAPStatus _merge_block(RowBlock** block); + OLAPStatus _pull_next_row_for_merge_rowset(RowCursor** row); + OLAPStatus _pull_next_block(MergeContext* merge_ctx); + + // Doris will split query predicates to several scan keys + // This function is used to fetch block when advancing + // current scan key to next scan key. + OLAPStatus _pull_first_block(MergeContext* merge_ctx); + +private: + int _num_rows_per_row_block; + RowsetSharedPtr _rowset; + std::string _rowset_path; + AlphaRowsetMeta* _alpha_rowset_meta; + const std::vector>& _segment_groups; + + std::vector _merge_ctxs; + std::unique_ptr _read_block; + OLAPStatus (AlphaRowsetReader::*_next_block)(RowBlock** block) = nullptr; + RowCursor* _dst_cursor = nullptr; + int _key_range_size; + + // Singleton Rowset is a rowset which start version + // and end version of it is equal. + // In streaming ingestion, row among different segment + // groups may overlap, and is necessary to be taken + // into consideration deliberately. + bool _is_singleton_rowset; + + // ordinal of ColumnData upon reading + size_t _ordinal; + + RowsetReaderContext* _current_read_context; + OlapReaderStatistics _owned_stats; + OlapReaderStatistics* _stats = &_owned_stats; +}; + +} // namespace doris + +#endif // DORIS_BE_SRC_OLAP_ROWSET_ALPHA_ROWSET_READER_H diff --git a/be/src/olap/rowset/alpha_rowset_writer.cpp b/be/src/olap/rowset/alpha_rowset_writer.cpp new file mode 100644 index 00000000000000..fc42f362a2a7ef --- /dev/null +++ b/be/src/olap/rowset/alpha_rowset_writer.cpp @@ -0,0 +1,346 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +#include "olap/rowset/alpha_rowset.h" + +#include "olap/rowset/alpha_rowset_writer.h" +#include "olap/rowset/alpha_rowset_meta.h" +#include "olap/rowset/rowset_meta_manager.h" + +namespace doris { + +AlphaRowsetWriter::AlphaRowsetWriter() : + _segment_group_id(0), + _cur_segment_group(nullptr), + _column_data_writer(nullptr), + _current_rowset_meta(nullptr), + _is_pending_rowset(false), + _num_rows_written(0), + _rowset_build(false), + _writer_state(WRITER_CREATED), + _need_column_data_writer(true) { } + +AlphaRowsetWriter::~AlphaRowsetWriter() { + SAFE_DELETE(_column_data_writer); + if (!_rowset_build) { + garbage_collection(); + } + for (auto& segment_group : _segment_groups) { + segment_group->release(); + delete segment_group; + } + _segment_groups.clear(); +} + +OLAPStatus AlphaRowsetWriter::init(const RowsetWriterContext& rowset_writer_context) { + _rowset_writer_context = rowset_writer_context; + _current_rowset_meta.reset(new(std::nothrow) AlphaRowsetMeta()); + _current_rowset_meta->set_rowset_id(_rowset_writer_context.rowset_id); + _current_rowset_meta->set_partition_id(_rowset_writer_context.partition_id); + _current_rowset_meta->set_tablet_uid(_rowset_writer_context.tablet_uid); + _current_rowset_meta->set_tablet_id(_rowset_writer_context.tablet_id); + _current_rowset_meta->set_tablet_schema_hash(_rowset_writer_context.tablet_schema_hash); + _current_rowset_meta->set_rowset_type(_rowset_writer_context.rowset_type); + _current_rowset_meta->set_rowset_state(rowset_writer_context.rowset_state); + RowsetStatePB rowset_state = _rowset_writer_context.rowset_state; + if (rowset_state == PREPARED + || rowset_state == COMMITTED) { + _is_pending_rowset = true; + } + if (_is_pending_rowset) { + _current_rowset_meta->set_txn_id(_rowset_writer_context.txn_id); + _current_rowset_meta->set_load_id(_rowset_writer_context.load_id); + } else { + _current_rowset_meta->set_version(_rowset_writer_context.version); + _current_rowset_meta->set_version_hash(_rowset_writer_context.version_hash); + } + RETURN_NOT_OK(_init()); + return OLAP_SUCCESS; +} + +OLAPStatus AlphaRowsetWriter::add_row(RowCursor* row) { + if (_writer_state != WRITER_INITED) { + RETURN_NOT_OK(_init()); + } + OLAPStatus status = _column_data_writer->write(row); + if (status != OLAP_SUCCESS) { + std::string error_msg = "add row failed"; + LOG(WARNING) << error_msg; + return status; + } + _num_rows_written++; + return OLAP_SUCCESS; +} + +OLAPStatus AlphaRowsetWriter::add_row(const char* row, Schema* schema) { + if (_writer_state != WRITER_INITED) { + RETURN_NOT_OK(_init()); + } + OLAPStatus status = _column_data_writer->write(row, schema); + if (status != OLAP_SUCCESS) { + std::string error_msg = "add row failed"; + LOG(WARNING) << error_msg; + return status; + } + ++_num_rows_written; + return OLAP_SUCCESS; +} + +OLAPStatus AlphaRowsetWriter::add_row_block(RowBlock* row_block) { + if (_writer_state != WRITER_INITED) { + RETURN_NOT_OK(_init()); + } + size_t pos = 0; + row_block->set_pos(pos); + RowCursor row_cursor; + row_cursor.init(*(_rowset_writer_context.tablet_schema)); + while (pos < row_block->limit()) { + row_block->get_row(pos, &row_cursor); + add_row(&row_cursor); + row_block->pos_inc(); + pos = row_block->pos(); + } + return OLAP_SUCCESS; +} + +OLAPStatus AlphaRowsetWriter::add_rowset(RowsetSharedPtr rowset) { + _need_column_data_writer = false; + // this api is for LinkedSchemaChange + // use create hard link to copy rowset for performance + // this is feasible because LinkedSchemaChange is done on the same disk + AlphaRowsetSharedPtr alpha_rowset = std::dynamic_pointer_cast(rowset); + for (auto& segment_group : alpha_rowset->_segment_groups) { + RETURN_NOT_OK(_init()); + RETURN_NOT_OK(segment_group->link_segments_to_path(_rowset_writer_context.rowset_path_prefix, + _rowset_writer_context.rowset_id)); + _cur_segment_group->set_empty(segment_group->empty()); + _cur_segment_group->set_num_segments(segment_group->num_segments()); + _cur_segment_group->add_zone_maps(segment_group->get_zone_maps()); + RETURN_NOT_OK(flush()); + _num_rows_written += segment_group->num_rows(); + } + LOG(INFO) << "clone add_rowset:" << _num_rows_written; + return OLAP_SUCCESS; +} + +OLAPStatus AlphaRowsetWriter::add_rowset_for_linked_schema_change( + RowsetSharedPtr rowset, const SchemaMapping& schema_mapping) { + _need_column_data_writer = false; + // this api is for LinkedSchemaChange + // use create hard link to copy rowset for performance + // this is feasible because LinkedSchemaChange is done on the same disk + AlphaRowsetSharedPtr alpha_rowset = std::dynamic_pointer_cast(rowset); + for (auto& segment_group : alpha_rowset->_segment_groups) { + RETURN_NOT_OK(_init()); + RETURN_NOT_OK(segment_group->link_segments_to_path(_rowset_writer_context.rowset_path_prefix, + _rowset_writer_context.rowset_id)); + _cur_segment_group->set_empty(segment_group->empty()); + _cur_segment_group->set_num_segments(segment_group->num_segments()); + _cur_segment_group->add_zone_maps_for_linked_schema_change(segment_group->get_zone_maps(), + schema_mapping); + RETURN_NOT_OK(flush()); + _num_rows_written += segment_group->num_rows(); + } + return OLAP_SUCCESS; +} + +OLAPStatus AlphaRowsetWriter::flush() { + if (_writer_state == WRITER_FLUSHED) { + return OLAP_SUCCESS; + } + DCHECK(_writer_state == WRITER_INITED); + if (_need_column_data_writer) { + // column_data_writer finalize will call segment_group->set_empty() + RETURN_NOT_OK(_column_data_writer->finalize()); + } + SAFE_DELETE(_column_data_writer); + _writer_state = WRITER_FLUSHED; + return OLAP_SUCCESS; +} + +RowsetSharedPtr AlphaRowsetWriter::build() { + if (_writer_state != WRITER_FLUSHED) { + LOG(WARNING) << "invalid writer state before build, state:" << _writer_state; + return nullptr; + } + for (auto& segment_group : _segment_groups) { + if (segment_group->load() != OLAP_SUCCESS) { + return nullptr; + } + if (!segment_group->check()) { + return nullptr; + } + _current_rowset_meta->set_data_disk_size(_current_rowset_meta->data_disk_size() + segment_group->data_size()); + _current_rowset_meta->set_index_disk_size(_current_rowset_meta->index_disk_size() + segment_group->index_size()); + _current_rowset_meta->set_total_disk_size(_current_rowset_meta->total_disk_size() + + segment_group->index_size() + segment_group->data_size()); + SegmentGroupPB segment_group_pb; + segment_group_pb.set_segment_group_id(segment_group->segment_group_id()); + segment_group_pb.set_num_segments(segment_group->num_segments()); + segment_group_pb.set_index_size(segment_group->index_size()); + segment_group_pb.set_data_size(segment_group->data_size()); + segment_group_pb.set_num_rows(segment_group->num_rows()); + const std::vector& zone_maps = segment_group->get_zone_maps(); + if (!zone_maps.empty()) { + for (size_t i = 0; i < zone_maps.size(); ++i) { + ZoneMap* new_zone_map = segment_group_pb.add_zone_maps(); + new_zone_map->set_min(zone_maps.at(i).first->to_string()); + new_zone_map->set_max(zone_maps.at(i).second->to_string()); + new_zone_map->set_null_flag(zone_maps.at(i).first->is_null()); + } + } + if (_is_pending_rowset) { + PUniqueId* unique_id = segment_group_pb.mutable_load_id(); + unique_id->set_hi(_rowset_writer_context.load_id.hi()); + unique_id->set_lo(_rowset_writer_context.load_id.lo()); + } + segment_group_pb.set_empty(segment_group->empty()); + AlphaRowsetMetaSharedPtr alpha_rowset_meta + = std::dynamic_pointer_cast(_current_rowset_meta); + alpha_rowset_meta->add_segment_group(segment_group_pb); + } + if (_is_pending_rowset) { + _current_rowset_meta->set_rowset_state(COMMITTED); + } else { + _current_rowset_meta->set_rowset_state(VISIBLE); + } + + _current_rowset_meta->set_empty(_num_rows_written == 0); + _current_rowset_meta->set_num_rows(_num_rows_written); + _current_rowset_meta->set_creation_time(time(nullptr)); + + // validate rowset arguments before create rowset + bool ret = _validate_rowset(); + if (!ret) { + LOG(FATAL) << "validate rowset arguments failed"; + return nullptr; + } + + RowsetSharedPtr rowset(new(std::nothrow) AlphaRowset(_rowset_writer_context.tablet_schema, + _rowset_writer_context.rowset_path_prefix, + _rowset_writer_context.data_dir, _current_rowset_meta)); + DCHECK(rowset != nullptr) << "new rowset failed when build new rowset"; + + OLAPStatus status = rowset->init(); + if (status != OLAP_SUCCESS) { + LOG(WARNING) << "rowset init failed when build new rowset"; + return nullptr; + } + _rowset_build = true; + return rowset; +} + +MemPool* AlphaRowsetWriter::mem_pool() { + if (_column_data_writer != nullptr) { + return _column_data_writer->mem_pool(); + } else { + return nullptr; + } +} + +Version AlphaRowsetWriter::version() { + return _rowset_writer_context.version; +} + +int32_t AlphaRowsetWriter::num_rows() { + return _num_rows_written; +} + +OLAPStatus AlphaRowsetWriter::garbage_collection() { + for (auto& segment_group : _segment_groups) { + bool ret = segment_group->delete_all_files(); + if (!ret) { + LOG(WARNING) << "delete segment group files failed." + << " tablet id:" << segment_group->get_tablet_id() + << ", rowset path:" << segment_group->rowset_path_prefix(); + return OLAP_ERR_ROWSET_DELETE_SEGMENT_GROUP_FILE_FAILED; + } + } + return OLAP_SUCCESS; +} + +DataDir* AlphaRowsetWriter::data_dir() { + return _rowset_writer_context.data_dir; +} + +OLAPStatus AlphaRowsetWriter::_init() { + if (_writer_state == WRITER_INITED) { + return OLAP_SUCCESS; + } + if (_is_pending_rowset) { + _cur_segment_group = new(std::nothrow) SegmentGroup( + _rowset_writer_context.tablet_id, + _rowset_writer_context.rowset_id, + _rowset_writer_context.tablet_schema, + _rowset_writer_context.rowset_path_prefix, + false, _segment_group_id, 0, true, + _rowset_writer_context.partition_id, _rowset_writer_context.txn_id); + } else { + _cur_segment_group = new(std::nothrow) SegmentGroup( + _rowset_writer_context.tablet_id, + _rowset_writer_context.rowset_id, + _rowset_writer_context.tablet_schema, + _rowset_writer_context.rowset_path_prefix, + _rowset_writer_context.version, + _rowset_writer_context.version_hash, + false, _segment_group_id, 0); + } + DCHECK(_cur_segment_group != nullptr) << "failed to malloc SegmentGroup"; + _cur_segment_group->acquire(); + //_cur_segment_group->set_load_id(_rowset_writer_context.load_id); + _segment_groups.push_back(_cur_segment_group); + + _column_data_writer = ColumnDataWriter::create(_cur_segment_group, true, + _rowset_writer_context.tablet_schema->compress_kind(), + _rowset_writer_context.tablet_schema->bloom_filter_fpp()); + DCHECK(_column_data_writer != nullptr) << "memory error occurs when creating writer"; + OLAPStatus res = _column_data_writer->init(); + if (res != OLAP_SUCCESS) { + LOG(WARNING) << "column data writer init failed"; + return res; + } + + _segment_group_id++; + _writer_state = WRITER_INITED; + return OLAP_SUCCESS; +} + +bool AlphaRowsetWriter::_validate_rowset() { + if (_is_pending_rowset) { + int64_t partition_id = _current_rowset_meta->partition_id(); + if (partition_id <= 0) { + LOG(WARNING) << "invalid partition id:" << partition_id << " for pending rowset." + << ", rowset_id:" << _current_rowset_meta->rowset_id() + << ", tablet_id:" << _current_rowset_meta->tablet_id() + << ", schema_hash:" << _current_rowset_meta->tablet_schema_hash(); + return false; + } + } + int64_t num_rows = 0; + for (auto& segment_group : _segment_groups) { + num_rows += segment_group->num_rows(); + } + if (num_rows != _current_rowset_meta->num_rows()) { + LOG(WARNING) << "num_rows between rowset and segment_groups do not match. " + << "num_rows of segment_groups:" << num_rows + << ", num_rows of rowset:" << _current_rowset_meta->num_rows(); + return false; + } + return true; +} + +} // namespace doris diff --git a/be/src/olap/rowset/alpha_rowset_writer.h b/be/src/olap/rowset/alpha_rowset_writer.h new file mode 100644 index 00000000000000..c66a646647cbaf --- /dev/null +++ b/be/src/olap/rowset/alpha_rowset_writer.h @@ -0,0 +1,96 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License") override; you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +#ifndef DORIS_BE_SRC_OLAP_ROWSET_ALPHA_ROWSET_WRITER_H +#define DORIS_BE_SRC_OLAP_ROWSET_ALPHA_ROWSET_WRITER_H + +#include "olap/rowset/rowset_writer.h" +#include "olap/rowset/segment_group.h" +#include "olap/rowset/column_data_writer.h" + +#include + +namespace doris { + +enum WriterState { + WRITER_CREATED, + WRITER_INITED, + WRITER_FLUSHED +}; + +class AlphaRowsetWriter : public RowsetWriter { +public: + AlphaRowsetWriter(); + virtual ~AlphaRowsetWriter(); + + OLAPStatus init(const RowsetWriterContext& rowset_writer_context) override; + + // add a row block to rowset + OLAPStatus add_row(RowCursor* row) override; + + OLAPStatus add_row(const char* row, Schema* schema) override; + + OLAPStatus add_row_block(RowBlock* row_block) override; + + // add rowset by create hard link + OLAPStatus add_rowset(RowsetSharedPtr rowset) override; + OLAPStatus add_rowset_for_linked_schema_change( + RowsetSharedPtr rowset, const SchemaMapping& schema_mapping) override; + + OLAPStatus flush() override; + + // get a rowset + RowsetSharedPtr build() override; + + MemPool* mem_pool() override; + + Version version() override; + + int32_t num_rows() override; + + RowsetId rowset_id() override { + return _rowset_writer_context.rowset_id; + } + + OLAPStatus garbage_collection() override; + + DataDir* data_dir() override; + +private: + OLAPStatus _init(); + + // validate rowset build arguments before create rowset to make sure correctness + bool _validate_rowset(); + +private: + int32_t _segment_group_id; + SegmentGroup* _cur_segment_group; + ColumnDataWriter* _column_data_writer; + std::shared_ptr _current_rowset_meta; + bool _is_pending_rowset; + int _num_rows_written; + RowsetWriterContext _rowset_writer_context; + std::vector _segment_groups; + bool _rowset_build; + WriterState _writer_state; + // add_rowset does not need to call column_data_writer.finalize() + bool _need_column_data_writer; +}; + +} // namespace doris + +#endif // DORIS_BE_SRC_OLAP_ROWSET_ALPHA_ROWSET_WRITER_H diff --git a/be/src/olap/bit_field_reader.cpp b/be/src/olap/rowset/bit_field_reader.cpp similarity index 95% rename from be/src/olap/bit_field_reader.cpp rename to be/src/olap/rowset/bit_field_reader.cpp index 0252ec70c5f6cb..2042d37b29de06 100644 --- a/be/src/olap/bit_field_reader.cpp +++ b/be/src/olap/rowset/bit_field_reader.cpp @@ -15,11 +15,11 @@ // specific language governing permissions and limitations // under the License. -#include "olap/bit_field_reader.h" +#include "olap/rowset/bit_field_reader.h" -#include "olap/column_reader.h" +#include "olap/rowset/column_reader.h" #include "olap/in_stream.h" -#include "olap/run_length_byte_reader.h" +#include "olap/rowset/run_length_byte_reader.h" namespace doris { @@ -127,5 +127,4 @@ OLAPStatus BitFieldReader::skip(uint64_t num_values) { return OLAP_SUCCESS; } -} // namespace doris - +} // namespace doris \ No newline at end of file diff --git a/be/src/olap/bit_field_reader.h b/be/src/olap/rowset/bit_field_reader.h similarity index 89% rename from be/src/olap/bit_field_reader.h rename to be/src/olap/rowset/bit_field_reader.h index abc8a47e75833a..e1fd5a4f475fa4 100644 --- a/be/src/olap/bit_field_reader.h +++ b/be/src/olap/rowset/bit_field_reader.h @@ -15,8 +15,8 @@ // specific language governing permissions and limitations // under the License. -#ifndef DORIS_BE_SRC_OLAP_COLUMN_FILE_BIT_FIELD_READER_H -#define DORIS_BE_SRC_OLAP_COLUMN_FILE_BIT_FIELD_READER_H +#ifndef DORIS_BE_SRC_OLAP_ROWSET_BIT_FIELD_READER_H +#define DORIS_BE_SRC_OLAP_ROWSET_BIT_FIELD_READER_H #include "olap/stream_index_reader.h" #include "olap/olap_define.h" @@ -50,4 +50,4 @@ class BitFieldReader { } // namespace doris -#endif // DORIS_BE_SRC_OLAP_COLUMN_FILE_BIT_FIELD_READER_H +#endif // DORIS_BE_SRC_OLAP_ROWSET_BIT_FIELD_READER_H \ No newline at end of file diff --git a/be/src/olap/bit_field_writer.cpp b/be/src/olap/rowset/bit_field_writer.cpp similarity index 95% rename from be/src/olap/bit_field_writer.cpp rename to be/src/olap/rowset/bit_field_writer.cpp index 81c098ebab80cd..424c560d5fef71 100644 --- a/be/src/olap/bit_field_writer.cpp +++ b/be/src/olap/rowset/bit_field_writer.cpp @@ -15,9 +15,9 @@ // specific language governing permissions and limitations // under the License. -#include "bit_field_writer.h" +#include "olap/rowset/bit_field_writer.h" #include -#include "olap/run_length_byte_writer.h" +#include "olap/rowset/run_length_byte_writer.h" namespace doris { @@ -99,4 +99,4 @@ void BitFieldWriter::get_position(PositionEntryWriter* index_entry) const { index_entry->add_position(8 - _bits_left); } -} // namespace doris +} // namespace doris \ No newline at end of file diff --git a/be/src/olap/bit_field_writer.h b/be/src/olap/rowset/bit_field_writer.h similarity index 89% rename from be/src/olap/bit_field_writer.h rename to be/src/olap/rowset/bit_field_writer.h index 02a6e95aac0a41..4c8dacbdd5830b 100644 --- a/be/src/olap/bit_field_writer.h +++ b/be/src/olap/rowset/bit_field_writer.h @@ -15,8 +15,8 @@ // specific language governing permissions and limitations // under the License. -#ifndef DORIS_BE_SRC_OLAP_COLUMN_FILE_BIT_FIELD_WRITER_H -#define DORIS_BE_SRC_OLAP_COLUMN_FILE_BIT_FIELD_WRITER_H +#ifndef DORIS_BE_SRC_OLAP_ROWSET_BIT_FIELD_WRITER_H +#define DORIS_BE_SRC_OLAP_ROWSET_BIT_FIELD_WRITER_H #include "olap/stream_index_writer.h" #include "olap/olap_define.h" @@ -48,4 +48,4 @@ class BitFieldWriter { } // namespace doris -#endif // DORIS_BE_SRC_OLAP_COLUMN_FILE_BIT_FIELD_WRITER_H +#endif // DORIS_BE_SRC_OLAP_ROWSET_BIT_FIELD_WRITER_H \ No newline at end of file diff --git a/be/src/olap/column_data.cpp b/be/src/olap/rowset/column_data.cpp similarity index 88% rename from be/src/olap/column_data.cpp rename to be/src/olap/rowset/column_data.cpp index 17ad6f41da5e80..f00e195973b54b 100644 --- a/be/src/olap/column_data.cpp +++ b/be/src/olap/rowset/column_data.cpp @@ -15,43 +15,29 @@ // specific language governing permissions and limitations // under the License. -#include "column_data.h" +#include "olap/rowset/column_data.h" -#include "olap/segment_reader.h" +#include "olap/rowset/segment_reader.h" #include "olap/olap_cond.h" -#include "olap/olap_table.h" #include "olap/row_block.h" namespace doris { ColumnData* ColumnData::create(SegmentGroup* segment_group) { - ColumnData* data = NULL; - DataFileType file_type = segment_group->table()->data_file_type(); - - switch (file_type) { - case COLUMN_ORIENTED_FILE: - data = new(std::nothrow) ColumnData(segment_group); - break; - - default: - LOG(WARNING) << "unknown data file type. type=" << DataFileType_Name(file_type).c_str(); - } - + ColumnData* data = new(std::nothrow) ColumnData(segment_group); return data; } ColumnData::ColumnData(SegmentGroup* segment_group) - : _data_file_type(COLUMN_ORIENTED_FILE), - _segment_group(segment_group), + : _segment_group(segment_group), _eof(false), - _conditions(NULL), - _col_predicates(NULL), + _conditions(nullptr), + _col_predicates(nullptr), _delete_status(DEL_NOT_SATISFIED), - _runtime_state(NULL), + _runtime_state(nullptr), _is_using_cache(false), - _segment_reader(NULL) { - _table = segment_group->table(); - _num_rows_per_block = _table->num_rows_per_row_block(); + _segment_reader(nullptr) { + _num_rows_per_block = _segment_group->get_num_rows_per_row_block(); } ColumnData::~ColumnData() { @@ -62,10 +48,9 @@ ColumnData::~ColumnData() { OLAPStatus ColumnData::init() { _segment_group->acquire(); - auto res = _short_key_cursor.init(_segment_group->short_key_fields()); + auto res = _short_key_cursor.init(_segment_group->short_key_columns()); if (res != OLAP_SUCCESS) { - LOG(WARNING) << "key cursor init failed, table:" << _table->id() - << ", res:" << res; + LOG(WARNING) << "key cursor init failed, res:" << res; return res; } return res; @@ -104,7 +89,7 @@ OLAPStatus ColumnData::_next_row(const RowCursor** row, bool without_filter) { return OLAP_SUCCESS; } else { DCHECK(_read_block->block_status() == DEL_PARTIAL_SATISFIED); - bool row_del_filter = _delete_handler.is_filter_data( + bool row_del_filter = _delete_handler->is_filter_data( _segment_group->version().second, _cursor); if (!row_del_filter) { *row = &_cursor; @@ -137,11 +122,11 @@ OLAPStatus ColumnData::_seek_to_block(const RowBlockPosition& block_pos, bool wi } SAFE_DELETE(_segment_reader); std::string file_name; - file_name = segment_group()->construct_data_file_path(segment_group()->segment_group_id(), block_pos.segment); + file_name = segment_group()->construct_data_file_path(block_pos.segment); _segment_reader = new(std::nothrow) SegmentReader( - file_name, _table, segment_group(), block_pos.segment, + file_name, segment_group(), block_pos.segment, _seek_columns, _load_bf_columns, _conditions, - _col_predicates, _delete_handler, _delete_status, _runtime_state, _stats); + _delete_handler, _delete_status, _lru_cache, _runtime_state, _stats); if (_segment_reader == nullptr) { OLAP_LOG_WARNING("fail to malloc segment reader."); return OLAP_ERR_MALLOC_ERROR; @@ -285,8 +270,9 @@ OLAPStatus ColumnData::_find_position_by_full_key( OLAPStatus ColumnData::_seek_to_row(const RowCursor& key, bool find_last_key, bool is_end_key) { RowBlockPosition position; OLAPStatus res = OLAP_SUCCESS; - FieldType type = _table->get_field_type_by_index(key.field_count() - 1); - if (key.field_count() > _table->num_short_key_fields() || OLAP_FIELD_TYPE_VARCHAR == type) { + const TabletSchema& tablet_schema = _segment_group->get_tablet_schema(); + FieldType type = tablet_schema.column(key.field_count() - 1).type(); + if (key.field_count() > _segment_group->get_num_short_key_columns() || OLAP_FIELD_TYPE_VARCHAR == type) { res = _find_position_by_full_key(key, find_last_key, &position); } else { res = _find_position_by_short_key(key, find_last_key, &position); @@ -420,7 +406,7 @@ OLAPStatus ColumnData::prepare_block_read( // 在这个函数里,合并上述几种情况 void ColumnData::set_read_params( const std::vector& return_columns, - const std::vector& seek_solumns, + const std::vector& seek_columns, const std::set& load_bf_columns, const Conditions& conditions, const std::vector& col_predicates, @@ -432,21 +418,21 @@ void ColumnData::set_read_params( _is_using_cache = is_using_cache; _runtime_state = runtime_state; _return_columns = return_columns; - _seek_columns = seek_solumns; + _seek_columns = seek_columns; _load_bf_columns = load_bf_columns; - auto res = _cursor.init(_table->tablet_schema(), _seek_columns); + auto res = _cursor.init(_segment_group->get_tablet_schema(), _seek_columns); if (res != OLAP_SUCCESS) { - OLAP_LOG_WARNING("fail to init row_cursor"); + LOG(WARNING) << "fail to init row_cursor"; } _read_vector_batch.reset(new VectorizedRowBatch( - _table->tablet_schema(), _return_columns, _num_rows_per_block)); + &(_segment_group->get_tablet_schema()), _return_columns, _num_rows_per_block)); _seek_vector_batch.reset(new VectorizedRowBatch( - _table->tablet_schema(), _seek_columns, _num_rows_per_block)); + &(_segment_group->get_tablet_schema()), _seek_columns, _num_rows_per_block)); - _read_block.reset(new RowBlock(_table->tablet_schema())); + _read_block.reset(new RowBlock(&(_segment_group->get_tablet_schema()))); RowBlockInfo block_info; block_info.row_num = _num_rows_per_block; block_info.null_supported = true; @@ -459,15 +445,9 @@ OLAPStatus ColumnData::get_first_row_block(RowBlock** row_block) { _is_normal_read = true; _eof = false; - auto res = _schema_change_init(); - if (res != OLAP_SUCCESS) { - LOG(WARNING) << "failed to initial for schema change block read, res:" << res; - return res; - } - // to be same with OLAPData, we use segment_group. RowBlockPosition block_pos; - res = segment_group()->find_first_row_block(&block_pos); + OLAPStatus res = segment_group()->find_first_row_block(&block_pos); if (res != OLAP_SUCCESS) { if (res == OLAP_ERR_INDEX_EOF) { *row_block = nullptr; @@ -515,26 +495,26 @@ OLAPStatus ColumnData::get_next_row_block(RowBlock** row_block) { return OLAP_SUCCESS; } -bool ColumnData::delta_pruning_filter() { +bool ColumnData::rowset_pruning_filter() { if (empty() || zero_num_rows()) { return true; } - if (!_segment_group->has_column_statistics()) { + if (!_segment_group->has_zone_maps()) { return false; } - return _conditions->delta_pruning_filter(_segment_group->get_column_statistics()); + return _conditions->rowset_pruning_filter(_segment_group->get_zone_maps()); } int ColumnData::delete_pruning_filter() { if (empty() || zero_num_rows()) { - // should return DEL_NOT_SATISFIED, because that when creating rollup table, + // should return DEL_NOT_SATISFIED, because that when creating rollup tablet, // the delete version file should preserved for filter data. return DEL_NOT_SATISFIED; } - if (false == _segment_group->has_column_statistics()) { + if (false == _segment_group->has_zone_maps()) { /* * if segment_group has no column statistics, we cannot judge whether the data can be filtered or not */ @@ -550,13 +530,13 @@ int ColumnData::delete_pruning_filter() { int ret = DEL_PARTIAL_SATISFIED; bool del_partial_stastified = false; bool del_stastified = false; - for (auto& delete_condtion : _delete_handler.get_delete_conditions()) { + for (auto& delete_condtion : _delete_handler->get_delete_conditions()) { if (delete_condtion.filter_version <= _segment_group->version().first) { continue; } Conditions* del_cond = delete_condtion.del_cond; - int del_ret = del_cond->delete_pruning_filter(_segment_group->get_column_statistics()); + int del_ret = del_cond->delete_pruning_filter(_segment_group->get_zone_maps()); if (DEL_SATISFIED == del_ret) { del_stastified = true; break; @@ -582,24 +562,24 @@ uint64_t ColumnData::get_filted_rows() { return _stats->rows_del_filtered; } -OLAPStatus ColumnData::_schema_change_init() { +OLAPStatus ColumnData::schema_change_init() { _is_using_cache = false; - for (int i = 0; i < _table->tablet_schema().size(); ++i) { + for (int i = 0; i < _segment_group->get_tablet_schema().num_columns(); ++i) { _return_columns.push_back(i); _seek_columns.push_back(i); } - auto res = _cursor.init(_table->tablet_schema()); + auto res = _cursor.init(_segment_group->get_tablet_schema()); if (res != OLAP_SUCCESS) { OLAP_LOG_WARNING("fail to init row_cursor"); return res; } _read_vector_batch.reset(new VectorizedRowBatch( - _table->tablet_schema(), _return_columns, _num_rows_per_block)); + &(_segment_group->get_tablet_schema()), _return_columns, _num_rows_per_block)); - _read_block.reset(new RowBlock(_table->tablet_schema())); + _read_block.reset(new RowBlock(&(_segment_group->get_tablet_schema()))); RowBlockInfo block_info; block_info.row_num = _num_rows_per_block; diff --git a/be/src/olap/column_data.h b/be/src/olap/rowset/column_data.h similarity index 92% rename from be/src/olap/column_data.h rename to be/src/olap/rowset/column_data.h index 9e1dfa6836950c..19b9158b3c7a49 100644 --- a/be/src/olap/column_data.h +++ b/be/src/olap/rowset/column_data.h @@ -15,8 +15,8 @@ // specific language governing permissions and limitations // under the License. -#ifndef DORIS_BE_SRC_OLAP_COLUMN_FILE_COLUMN_DATA_H -#define DORIS_BE_SRC_OLAP_COLUMN_FILE_COLUMN_DATA_H +#ifndef DORIS_BE_SRC_OLAP_ROWSET_COLUMN_DATA_H +#define DORIS_BE_SRC_OLAP_ROWSET_COLUMN_DATA_H #include #include @@ -26,16 +26,14 @@ #include "olap/delete_handler.h" #include "olap/olap_common.h" #include "olap/olap_cond.h" -#include "olap/segment_group.h" +#include "olap/rowset/segment_group.h" #include "olap/row_block.h" #include "olap/row_cursor.h" #include "util/runtime_profile.h" namespace doris { -class OLAPTable; - - +class Tablet; class SegmentReader; // This class is column data reader. this class will be used in two case. @@ -59,11 +57,6 @@ class ColumnData { return _segment_group->num_segments(); } - // 查询数据文件类型 - DataFileType data_file_type() { - return _data_file_type; - } - OLAPStatus init(); OLAPStatus prepare_block_read( @@ -88,12 +81,19 @@ class ColumnData { // Only used to binary search in full-key find row const RowCursor* seek_and_get_current_row(const RowBlockPosition& position); + void set_using_cache(bool is_using_cache) { + _is_using_cache = is_using_cache; + } + + void set_lru_cache(Cache* lru_cache) { + _lru_cache = lru_cache; + } void set_stats(OlapReaderStatistics* stats) { _stats = stats; } - void set_delete_handler(const DeleteHandler& delete_handler) { + void set_delete_handler(const DeleteHandler* delete_handler) { _delete_handler = delete_handler; } @@ -110,21 +110,21 @@ class ColumnData { bool empty() const { return _segment_group->empty(); } bool zero_num_rows() const { return _segment_group->zero_num_rows(); } - bool delta_pruning_filter(); + bool rowset_pruning_filter(); int delete_pruning_filter(); uint64_t get_filted_rows(); SegmentGroup* segment_group() const { return _segment_group; } void set_segment_group(SegmentGroup* segment_group) { _segment_group = segment_group; } int64_t num_rows() const { return _segment_group->num_rows(); } - - const std::vector& seek_columns() const { return _seek_columns; } -private: - DISALLOW_COPY_AND_ASSIGN(ColumnData); + Tablet* tablet() const { return _tablet; } // To compatable with schmea change read, use this function to init column data // for schema change read. Only called in get_first_row_block - OLAPStatus _schema_change_init(); + OLAPStatus schema_change_init(); + +private: + DISALLOW_COPY_AND_ASSIGN(ColumnData); // Try to seek to 'key'. If this funciton returned with OLAP_SUCCESS, current_row() // point to the first row meet the requirement. @@ -159,19 +159,17 @@ class ColumnData { return &_cursor; } private: - DataFileType _data_file_type; SegmentGroup* _segment_group; // 当到达文件末尾或者到达end key时设置此标志 bool _eof; const Conditions* _conditions; const std::vector* _col_predicates; - DeleteHandler _delete_handler; + const DeleteHandler*_delete_handler = nullptr; DelCondSatisfied _delete_status; RuntimeState* _runtime_state; - OlapReaderStatistics _owned_stats; - OlapReaderStatistics* _stats = &_owned_stats; + OlapReaderStatistics* _stats; - OLAPTable* _table; + Tablet* _tablet; // whether in normal read, use return columns to load block bool _is_normal_read = false; bool _end_key_is_set = false; @@ -182,7 +180,7 @@ class ColumnData { std::vector _return_columns; std::vector _seek_columns; std::set _load_bf_columns; - + SegmentReader* _segment_reader; std::unique_ptr _seek_vector_batch; @@ -202,6 +200,7 @@ class ColumnData { int64_t _end_row_index = 0; size_t _num_rows_per_block; + Cache* _lru_cache; }; class ColumnDataComparator { @@ -256,4 +255,4 @@ class ColumnDataComparator { } // namespace doris -#endif // DORIS_BE_SRC_OLAP_COLUMN_FILE_COLUMN_DATA_H +#endif // DORIS_BE_SRC_OLAP_ROWSET_COLUMN_DATA_H diff --git a/be/src/olap/data_writer.cpp b/be/src/olap/rowset/column_data_writer.cpp similarity index 62% rename from be/src/olap/data_writer.cpp rename to be/src/olap/rowset/column_data_writer.cpp index 7fd7e039fb37cb..e8b5dd5b90cee4 100644 --- a/be/src/olap/data_writer.cpp +++ b/be/src/olap/rowset/column_data_writer.cpp @@ -15,38 +15,33 @@ // specific language governing permissions and limitations // under the License. -#include "olap/data_writer.h" +#include "olap/rowset/column_data_writer.h" #include -#include "olap/segment_writer.h" -#include "olap/segment_group.h" +#include "olap/rowset/segment_writer.h" +#include "olap/rowset/segment_group.h" #include "olap/row_block.h" namespace doris { -ColumnDataWriter* ColumnDataWriter::create(OLAPTablePtr table, SegmentGroup* segment_group, bool is_push_write) { - ColumnDataWriter* writer = NULL; - switch (table->data_file_type()) { - case COLUMN_ORIENTED_FILE: - writer = new (std::nothrow) ColumnDataWriter(table, segment_group, is_push_write); - break; - default: - LOG(WARNING) << "unknown data file type. type=" << DataFileType_Name(table->data_file_type()); - break; - } - +ColumnDataWriter* ColumnDataWriter::create(SegmentGroup* segment_group, bool is_push_write, + CompressKind compress_kind, double bloom_filter_fpp) { + ColumnDataWriter* writer = new (std::nothrow) ColumnDataWriter(segment_group, is_push_write, + compress_kind, bloom_filter_fpp); return writer; } -ColumnDataWriter::ColumnDataWriter(OLAPTablePtr table, SegmentGroup* segment_group, bool is_push_write) - : _is_push_write(is_push_write), - _table(table), - _column_statistics(_table->num_key_fields(), - std::pair(NULL, NULL)), +ColumnDataWriter::ColumnDataWriter(SegmentGroup* segment_group, + bool is_push_write, CompressKind compress_kind, + double bloom_filter_fpp) + : _segment_group(segment_group), + _is_push_write(is_push_write), + _compress_kind(compress_kind), + _bloom_filter_fpp(bloom_filter_fpp), + _zone_maps(segment_group->get_num_key_columns(), KeyRange(NULL, NULL)), _row_index(0), - _segment_group(segment_group), _row_block(NULL), _segment_writer(NULL), _num_rows(0), @@ -55,14 +50,12 @@ ColumnDataWriter::ColumnDataWriter(OLAPTablePtr table, SegmentGroup* segment_gro _segment(0), _all_num_rows(0), _new_segment_created(false) -{ - init(); -} +{ } ColumnDataWriter::~ColumnDataWriter() { - for (size_t i = 0; i < _column_statistics.size(); ++i) { - SAFE_DELETE(_column_statistics[i].first); - SAFE_DELETE(_column_statistics[i].second); + for (size_t i = 0; i < _zone_maps.size(); ++i) { + SAFE_DELETE(_zone_maps[i].first); + SAFE_DELETE(_zone_maps[i].second); } SAFE_DELETE(_row_block); SAFE_DELETE(_segment_writer); @@ -71,43 +64,42 @@ ColumnDataWriter::~ColumnDataWriter() { OLAPStatus ColumnDataWriter::init() { OLAPStatus res = OLAP_SUCCESS; - for (size_t i = 0; i < _column_statistics.size(); ++i) { - _column_statistics[i].first = WrapperField::create(_table->tablet_schema()[i]); - DCHECK(_column_statistics[i].first != nullptr) << "fail to create column statistics field."; - _column_statistics[i].first->set_to_max(); + for (size_t i = 0; i < _zone_maps.size(); ++i) { + _zone_maps[i].first = WrapperField::create(_segment_group->get_tablet_schema().column(i)); + DCHECK(_zone_maps[i].first != nullptr) << "fail to create column statistics field."; + _zone_maps[i].first->set_to_max(); - _column_statistics[i].second = WrapperField::create(_table->tablet_schema()[i]); - DCHECK(_column_statistics[i].second != nullptr) << "fail to create column statistics field."; - _column_statistics[i].second->set_null(); - _column_statistics[i].second->set_to_min(); + _zone_maps[i].second = WrapperField::create(_segment_group->get_tablet_schema().column(i)); + DCHECK(_zone_maps[i].second != nullptr) << "fail to create column statistics field."; + _zone_maps[i].second->set_null(); + _zone_maps[i].second->set_to_min(); } - double size = static_cast(_table->segment_size()); + double size = static_cast(OLAP_MAX_COLUMN_SEGMENT_FILE_SIZE); size *= OLAP_COLUMN_FILE_SEGMENT_SIZE_SCALE; _max_segment_size = static_cast(lround(size)); - _row_block = new(std::nothrow) RowBlock(_table->tablet_schema()); + _row_block = new(std::nothrow) RowBlock(&(_segment_group->get_tablet_schema())); if (NULL == _row_block) { - LOG(WARNING) << "fail to new RowBlock. [table='" << _table->full_name() << "']"; + LOG(WARNING) << "fail to new RowBlock."; return OLAP_ERR_MALLOC_ERROR; } - res = _cursor.init(_table->tablet_schema()); + res = _cursor.init(_segment_group->get_tablet_schema()); if (OLAP_SUCCESS != res) { - OLAP_LOG_WARNING("fail to initiate row cursor. [res=%d]", res); + LOG(WARNING) << "fail to initiate row cursor. [res=" << res << "]"; return res; } - VLOG(3) << "init ColumnData writer. [table='" << _table->full_name() - << "' block_row_size=" << _table->num_rows_per_row_block() << "]"; - RowBlockInfo block_info(0U, _table->num_rows_per_row_block()); - block_info.data_file_type = DataFileType::COLUMN_ORIENTED_FILE; + VLOG(3) << "init ColumnData writer. segment_group_id=" << _segment_group->segment_group_id() + << ", block_row_number=" << _segment_group->get_num_rows_per_row_block(); + RowBlockInfo block_info(0U, _segment_group->get_num_rows_per_row_block()); block_info.null_supported = true; res = _row_block->init(block_info); if (OLAP_SUCCESS != res) { - OLAP_LOG_WARNING("fail to initiate row block. [res=%d]", res); + LOG(WARNING) << "fail to initiate row block. [res=" << res << "]"; return res; } return OLAP_SUCCESS; @@ -116,13 +108,13 @@ OLAPStatus ColumnDataWriter::init() { OLAPStatus ColumnDataWriter::_init_segment() { OLAPStatus res = _add_segment(); if (OLAP_SUCCESS != res) { - OLAP_LOG_WARNING("fail to add segment. [res=%d]", res); + LOG(WARNING) << "fail to add segment. [res=" << res << "]"; return res; } res = _segment_group->add_segment(); if (OLAP_SUCCESS != res) { - OLAP_LOG_WARNING("fail to add index segment. [res=%d]", res); + LOG(WARNING) << "fail to add index segment. [res=" << res << "]"; return res; } @@ -130,40 +122,42 @@ OLAPStatus ColumnDataWriter::_init_segment() { return res; } -OLAPStatus ColumnDataWriter::attached_by(RowCursor* row_cursor) { - if (_row_index >= _table->num_rows_per_row_block()) { +OLAPStatus ColumnDataWriter::write(RowCursor* row_cursor) { + _row_block->set_row(_row_index, *row_cursor); + next(*row_cursor); + if (_row_index >= _segment_group->get_num_rows_per_row_block()) { if (OLAP_SUCCESS != _flush_row_block(false)) { - OLAP_LOG_WARNING("failed to flush data while attaching row cursor."); + LOG(WARNING) << "failed to flush data while attaching row cursor."; return OLAP_ERR_OTHER_ERROR; } RETURN_NOT_OK(_flush_segment_with_verfication()); } - _row_block->get_row(_row_index, row_cursor); return OLAP_SUCCESS; } -OLAPStatus ColumnDataWriter::write(const char* row) { - if (_row_index >= _table->num_rows_per_row_block()) { +OLAPStatus ColumnDataWriter::write(const char* row, const Schema* schema) { + _row_block->set_row(_row_index, row); + next(row, schema); + if (_row_index >= _segment_group->get_num_rows_per_row_block()) { if (OLAP_SUCCESS != _flush_row_block(false)) { - OLAP_LOG_WARNING("failed to flush data while attaching row cursor."); + LOG(WARNING) << "failed to flush data while attaching row cursor."; return OLAP_ERR_OTHER_ERROR; } RETURN_NOT_OK(_flush_segment_with_verfication()); } - _row_block->set_row(_row_index, row); return OLAP_SUCCESS; } void ColumnDataWriter::next(const RowCursor& row_cursor) { - for (size_t i = 0; i < _table->num_key_fields(); ++i) { + for (size_t i = 0; i < _segment_group->get_num_key_columns(); ++i) { char* right = row_cursor.get_field_by_index(i)->get_field_ptr(row_cursor.get_buf()); - if (_column_statistics[i].first->cmp(right) > 0) { - _column_statistics[i].first->copy(right); + if (_zone_maps[i].first->cmp(right) > 0) { + _zone_maps[i].first->copy(right); } - if (_column_statistics[i].second->cmp(right) < 0) { - _column_statistics[i].second->copy(right); + if (_zone_maps[i].second->cmp(right) < 0) { + _zone_maps[i].second->copy(right); } } @@ -171,14 +165,14 @@ void ColumnDataWriter::next(const RowCursor& row_cursor) { } void ColumnDataWriter::next(const char* row, const Schema* schema) { - for (size_t i = 0; i < _table->num_key_fields(); ++i) { + for (size_t i = 0; i < _segment_group->get_num_key_columns(); ++i) { char* right = const_cast(row + schema->get_col_offset(i)); - if (_column_statistics[i].first->cmp(right) > 0) { - _column_statistics[i].first->copy(right); + if (_zone_maps[i].first->cmp(right) > 0) { + _zone_maps[i].first->copy(right); } - if (_column_statistics[i].second->cmp(right) < 0) { - _column_statistics[i].second->copy(right); + if (_zone_maps[i].second->cmp(right) < 0) { + _zone_maps[i].second->copy(right); } } @@ -190,6 +184,21 @@ OLAPStatus ColumnDataWriter::finalize() { _segment_group->set_empty(true); return OLAP_SUCCESS; } + + // Segment which size reaches OLAP_MAX_COLUMN_SEGMENT_FILE_SIZE + // will be flushed into disk. If the previous segment reach + // the threshold just right, and been flushed into disk. + // The following finalize() when closing ColumnDataWriter + // will generate a non-sense segment. + // In this scenario, undefined behavior will happens. + if (_num_rows == 0 && _row_index == 0) { + // If the two conditions are all satisfied, + // it dedicates that there is no necessity + // to generate segment object and file. + // Return OLAP_SUCCESS is OK. + return OLAP_SUCCESS; + } + OLAPStatus res = _flush_row_block(true); if (OLAP_SUCCESS != res) { OLAP_LOG_WARNING("failed to flush data while attaching row cursor.[res=%d]", res); @@ -198,13 +207,15 @@ OLAPStatus ColumnDataWriter::finalize() { res = _finalize_segment(); if (OLAP_SUCCESS != res) { - OLAP_LOG_WARNING("fail to finalize segment.[res=%d]", res); + LOG(WARNING) << "fail to finalize segment. res=" << res + << ", _row_index=" << _row_index + << ", _all_num_rows=" << _all_num_rows; return res; } - res = _segment_group->add_column_statistics(_column_statistics); + res = _segment_group->add_zone_maps(_zone_maps); if (res != OLAP_SUCCESS) { - OLAP_LOG_WARNING("Fail to set delta pruning![res=%d]", res); + LOG(WARNING) << "Fail to set zone_map! res=" << res; return res; } @@ -244,7 +255,6 @@ OLAPStatus ColumnDataWriter::_flush_row_block(bool finalize) { _num_rows += _row_index; _all_num_rows += _row_index; _row_index = 0; - return OLAP_SUCCESS; } @@ -256,9 +266,9 @@ OLAPStatus ColumnDataWriter::_add_segment() { return OLAP_ERR_WRITER_SEGMENT_NOT_FINALIZED; } - file_name = _segment_group->construct_data_file_path(_segment_group->segment_group_id(), _segment); - _segment_writer = new(std::nothrow) SegmentWriter(file_name, _table, - OLAP_DEFAULT_COLUMN_STREAM_BUFFER_SIZE); + file_name = _segment_group->construct_data_file_path(_segment); + _segment_writer = new(std::nothrow) SegmentWriter(file_name, _segment_group, + OLAP_DEFAULT_COLUMN_STREAM_BUFFER_SIZE, _compress_kind, _bloom_filter_fpp); if (NULL == _segment_writer) { OLAP_LOG_WARNING("fail to allocate SegmentWriter"); @@ -327,5 +337,8 @@ MemPool* ColumnDataWriter::mem_pool() { return _row_block->mem_pool(); } -} // namespace doris +CompressKind ColumnDataWriter::compress_kind() { + return _compress_kind; +} +} // namespace doris diff --git a/be/src/olap/data_writer.h b/be/src/olap/rowset/column_data_writer.h similarity index 70% rename from be/src/olap/data_writer.h rename to be/src/olap/rowset/column_data_writer.h index d27c006daa911f..0f583981259f0e 100644 --- a/be/src/olap/data_writer.h +++ b/be/src/olap/rowset/column_data_writer.h @@ -15,13 +15,14 @@ // specific language governing permissions and limitations // under the License. -#ifndef DORIS_BE_SRC_OLAP_COLUMN_FILE_DATA_WRITER_H -#define DORIS_BE_SRC_OLAP_COLUMN_FILE_DATA_WRITER_H +#ifndef DORIS_BE_SRC_OLAP_ROWSET_COLUMN_DATA_WRITER_H +#define DORIS_BE_SRC_OLAP_ROWSET_COLUMN_DATA_WRITER_H -#include "olap/olap_table.h" +#include "olap/rowset/segment_group.h" #include "olap/row_block.h" #include "olap/schema.h" #include "olap/wrapper_field.h" +#include "gen_cpp/olap_common.pb.h" namespace doris { class RowBlock; @@ -31,17 +32,21 @@ class ColumnDataWriter { public: // Factory function // 调用者获得新建的对象, 并负责delete释放 - static ColumnDataWriter* create(OLAPTablePtr table, SegmentGroup* segment_group, bool is_push_write); - ColumnDataWriter(OLAPTablePtr table, SegmentGroup* segment_group, bool is_push_write); + static ColumnDataWriter* create(SegmentGroup* segment_group, bool is_push_write, + CompressKind compress_kind, double bloom_filter_fpp); + ColumnDataWriter(SegmentGroup* segment_group, bool is_push_write, + CompressKind compress_kind, double bloom_filter_fpp); ~ColumnDataWriter(); OLAPStatus init(); - OLAPStatus attached_by(RowCursor* row_cursor); - OLAPStatus write(const char* row); + OLAPStatus write(RowCursor* row_cursor); + OLAPStatus write(const char* row, const Schema* schema); void next(const RowCursor& row_cursor); void next(const char* row, const Schema* schema); OLAPStatus finalize(); uint64_t written_bytes(); MemPool* mem_pool(); + CompressKind compress_kind(); + private: OLAPStatus _add_segment(); OLAPStatus _flush_segment_with_verfication(); @@ -49,13 +54,15 @@ class ColumnDataWriter { OLAPStatus _flush_row_block(bool finalize); OLAPStatus _init_segment(); +private: + SegmentGroup* _segment_group; bool _is_push_write; - OLAPTablePtr _table; + CompressKind _compress_kind; + double _bloom_filter_fpp; // first is min, second is max - std::vector> _column_statistics; + std::vector> _zone_maps; uint32_t _row_index; - SegmentGroup* _segment_group; RowBlock* _row_block; // 使用RowBlcok缓存要写入的数据 RowCursor _cursor; SegmentWriter* _segment_writer; @@ -69,4 +76,4 @@ class ColumnDataWriter { } // namespace doris -#endif // DORIS_BE_SRC_OLAP_COLUMN_FILE_DATA_WRITER_H +#endif // DORIS_BE_SRC_OLAP_ROWSET_COLUMN_DATA_WRITER_H diff --git a/be/src/olap/column_reader.cpp b/be/src/olap/rowset/column_reader.cpp similarity index 83% rename from be/src/olap/column_reader.cpp rename to be/src/olap/rowset/column_reader.cpp index 7cee100491882d..6a262a06d9a335 100644 --- a/be/src/olap/column_reader.cpp +++ b/be/src/olap/rowset/column_reader.cpp @@ -15,10 +15,11 @@ // specific language governing permissions and limitations // under the License. +#include "olap/rowset/column_reader.h" + #include -#include "olap/bit_field_reader.h" -#include "olap/column_reader.h" +#include "olap/rowset/bit_field_reader.h" #include "olap/file_stream.h" #include "olap/olap_define.h" @@ -289,97 +290,6 @@ StringColumnDictionaryReader::~StringColumnDictionaryReader() { SAFE_DELETE_ARRAY(_read_buffer); } -/* - -// TODO.改为先解析成字典,不过看起来也不会太快,因为这里会全部解析完,而放在后边解析可能能省点资源 -// 后边再测,先保留代码 - -OLAPStatus StringColumnDictionaryReader::init(std::map *streams, - UniqueIdEncodingMap* encodings, - RuntimeProfile* profile) { - ReadOnlyFileStream* dictionary_data_stream = extract_stream(_column_unique_id, - StreamInfoMessage::DICTIONARY_DATA, - streams); - if (NULL == dictionary_data_stream) { - OLAP_LOG_WARNING("dictionary data stream not found. [unique id = %u]", - _column_unique_id); - return OLAP_ERR_COLUMN_STREAM_NOT_EXIST; - } - if (dictionary_data_stream->stream_length() > 0) { - _dictionary_data_buffer = StorageByteBuffer::create( - dictionary_data_stream->estimate_uncompressed_length()); - size_t offset = 0; - size_t length = 0; - // TODO. stream 还需要修改,使之真正能够方便的读取 - while (0 != (length = dictionary_data_stream->available())) { - dictionary_data_stream->read(_dictionary_data_buffer->array() + offset, &length); - offset += length; - } - } else { - _dictionary_data_buffer = NULL; - } - - UniqueIdEncodingMap::iterator it = encodings->find(_column_unique_id); - if (it == encodings->end()) { - OLAP_LOG_WARNING("encoding not found. [unique id = %u]", _column_unique_id); - return OLAP_ERR_COLUMN_STREAM_NOT_EXIST; - } - uint64_t dictionary_size = (*it).second.dictionary_size(); - // 建立字典偏移列表 - ReadOnlyFileStream* dictionary_length_stream = extract_stream(_column_unique_id, - StreamInfoMessage::LENGTH, - streams); - if (NULL == dictionary_length_stream) { - OLAP_LOG_WARNING("dictionary length stream not found. [unique id = %u]", - _column_unique_id); - return OLAP_ERR_COLUMN_STREAM_NOT_EXIST; - } - RunLengthIntegerReader* dictionary_length_reader = - new (std::nothrow) RunLengthIntegerReader(dictionary_length_stream, true); - uint64_t offset = 0; - // 如果上次分配的空间足够多,这次可以不分配 - if (dictionary_size + 1 > _dictionary_size || NULL == _offset_dictionary) { - SAFE_DELETE_ARRAY(_offset_dictionary); - _dictionary_size = dictionary_size + 1; - _offset_dictionary = new (std::nothrow) uint64_t[_dictionary_size]; - if (NULL == _offset_dictionary) { - OLAP_LOG_WARNING("fail to allocate dictionary buffer"); - return OLAP_ERR_MALLOC_ERROR; - } - } - // 应该只有dictionary_size 项,最后一个单位保存一个“不存在的”位置, - // 也就是最后一个字符串的终止位置,这样做是为了支持偏移计算的算法不用处理边界 - int64_t value = 0; - OLAPStatus res = OLAP_SUCCESS; - size_t dictionary_entry = 0; - for (; dictionary_entry < dictionary_size; ++dictionary_entry) { - _offset_dictionary[dictionary_entry] = offset; - res = dictionary_length_reader->next(&value); - // 理论上应该足够读,读出eof也是不对的。 - if (OLAP_SUCCESS != res && OLAP_ERR_DATA_EOF != res) { - OLAP_LOG_WARNING("build offset dictionary failed. [res = %d]", res); - return res; - } - offset += value; - } - _offset_dictionary[dictionary_entry] = offset; - // 建立数据流读取器 - ReadOnlyFileStream* data_stream = extract_stream(_column_unique_id, - StreamInfoMessage::DATA, - streams); - if (NULL == data_stream) { - OLAP_LOG_WARNING("data stream not found. [unique id = %u]", _column_unique_id); - return OLAP_ERR_COLUMN_STREAM_NOT_EXIST; - } - _data_reader = new (std::nothrow) RunLengthIntegerReader(data_stream, true); - if (NULL == _data_reader) { - OLAP_LOG_WARNING("fail to malloc data reader"); - return OLAP_ERR_MALLOC_ERROR; - } - return OLAP_SUCCESS; -} -*/ - OLAPStatus StringColumnDictionaryReader::init( std::map* streams, int size, MemPool* mem_pool) { @@ -407,43 +317,6 @@ OLAPStatus StringColumnDictionaryReader::init( new(std::nothrow) RunLengthIntegerReader(dictionary_length_stream, false); OLAPStatus res = OLAP_SUCCESS; - /* - uint64_t offset = 0; - int64_t value = 0; - size_t length_remain = 0; - size_t length_to_read = 0; - size_t read_buffer_size = 1024; - StorageByteBuffer* read_buffer = StorageByteBuffer::create(read_buffer_size); - if (NULL == read_buffer) { - OLAP_LOG_WARNING("fail to malloc StorageByteBuffer"); - return OLAP_ERR_MALLOC_ERROR; - } - - for (size_t dictionary_entry = 0; dictionary_entry < dictionary_size; ++dictionary_entry) { - res = dictionary_length_reader->next(&value); - // 理论上应该足够读,读出eof也是不对的。 - if (OLAP_SUCCESS != res && OLAP_ERR_DATA_EOF != res) { - OLAP_LOG_WARNING("build offset dictionary failed. [res = %d]", res); - return res; - } - // 其实为offset,长度为value的string - length_remain = value; - std::string dictionary_item; - while (length_remain != 0) { - length_to_read = std::min(length_remain, read_buffer_size); - res = dictionary_data_stream->read(read_buffer->array(), &length_to_read); - if (OLAP_SUCCESS != res) { - OLAP_LOG_WARNING("read dictionary content failed"); - return res; - } - dictionary_item.append(read_buffer->array(), length_to_read); - length_remain -= length_to_read; - } - _dictionary.push_back(dictionary_item); - offset += value; - } - */ - _values = reinterpret_cast(mem_pool->allocate(size * sizeof(Slice))); int64_t read_buffer_size = 1024; char* _read_buffer = new(std::nothrow) char[read_buffer_size]; @@ -620,35 +493,44 @@ ColumnReader::ColumnReader(uint32_t column_id, uint32_t column_unique_id) : _present_reader(NULL) { } + ColumnReader* ColumnReader::create(uint32_t column_id, + const TabletSchema& schema, + const UniqueIdToColumnIdMap& included, + UniqueIdToColumnIdMap& segment_included, + const UniqueIdEncodingMap& encodings) { + return create(column_id, schema.columns(), included, segment_included, encodings); +} + ColumnReader* ColumnReader::create(uint32_t column_id, - const std::vector& columns, + const std::vector& schema, const UniqueIdToColumnIdMap& included, UniqueIdToColumnIdMap& segment_included, const UniqueIdEncodingMap& encodings) { - if (column_id >= columns.size()) { - OLAP_LOG_WARNING("invalid column_id, column_id=%u, columns_size=%lu", - column_id, columns.size()); + if (column_id >= schema.size()) { + LOG(WARNING) << "invalid column_id, column_id=" << column_id + << ", columns_size=" << schema.size(); return NULL; } - const FieldInfo& field_info = columns[column_id]; + const TabletColumn& column = schema[column_id]; ColumnReader* reader = NULL; - uint32_t column_unique_id = field_info.unique_id; + int32_t column_unique_id = column.unique_id(); if (0 == included.count(column_unique_id)) { return NULL; } if (0 == segment_included.count(column_unique_id)) { - if (field_info.has_default_value) { - if (0 == strcasecmp("NULL", field_info.default_value.c_str()) - && field_info.is_allow_null) { + if (column.has_default_value()) { + if (0 == strcasecmp("NULL", column.default_value().c_str()) + && column.is_nullable()) { return new(std::nothrow) NullValueReader(column_id, column_unique_id); } else { return new(std::nothrow) DefaultValueReader(column_id, column_unique_id, - field_info.default_value, field_info.type, field_info.length); + column.default_value(), column.type(), column.length()); } - } else if (field_info.is_allow_null) { + } else if (column.is_nullable()) { + LOG(WARNING) << "create NullValueReader: " << column.name(); return new(std::nothrow) NullValueReader(column_id, column_unique_id); } else { OLAP_LOG_WARNING("not null field has no default value"); @@ -665,7 +547,7 @@ ColumnReader* ColumnReader::create(uint32_t column_id, dictionary_size = (*it).second.dictionary_size(); } - switch (field_info.type) { + switch (column.type()) { case OLAP_FIELD_TYPE_TINYINT: case OLAP_FIELD_TYPE_UNSIGNED_TINYINT: { reader = new(std::nothrow) TinyColumnReader(column_id, column_unique_id); @@ -726,10 +608,10 @@ ColumnReader* ColumnReader::create(uint32_t column_id, case OLAP_FIELD_TYPE_CHAR: { if (ColumnEncodingMessage::DIRECT == encode_kind) { reader = new(std::nothrow) FixLengthStringColumnReader( - column_id, column_unique_id, field_info.length, dictionary_size); + column_id, column_unique_id, column.length(), dictionary_size); } else if (ColumnEncodingMessage::DICTIONARY == encode_kind) { reader = new(std::nothrow) FixLengthStringColumnReader( - column_id, column_unique_id, field_info.length, dictionary_size); + column_id, column_unique_id, column.length(), dictionary_size); } else { OLAP_LOG_WARNING("known encoding format. data may be generated by higher version," "try updating olap/ngine binary to solve this problem"); @@ -765,13 +647,13 @@ ColumnReader* ColumnReader::create(uint32_t column_id, case OLAP_FIELD_TYPE_HLL: { if (ColumnEncodingMessage::DIRECT == encode_kind) { reader = new(std::nothrow) VarStringColumnReader( - column_id, column_unique_id, field_info.length, dictionary_size); + column_id, column_unique_id, column.length(), dictionary_size); } else if (ColumnEncodingMessage::DICTIONARY == encode_kind) { reader = new(std::nothrow) VarStringColumnReader( - column_id, column_unique_id, field_info.length, dictionary_size); + column_id, column_unique_id, column.length(), dictionary_size); } else { - OLAP_LOG_WARNING("known encoding format. data may be generated by higher version, " - "try updating olap/ngine binary to solve this problem"); + LOG(WARNING) << "known encoding format. data may be generated by higher version, " + << "try updating olap/ngine binary to solve this problem"; // TODO. define a new return code return NULL; } @@ -783,29 +665,12 @@ ColumnReader* ColumnReader::create(uint32_t column_id, case OLAP_FIELD_TYPE_LIST: case OLAP_FIELD_TYPE_MAP: default: { - LOG(WARNING) << "unspported filed type. [field=" << field_info.name - << " type=" << field_info.type << "]"; + LOG(WARNING) << "unspported filed type. field=" << column.name() + << ", type=" << column.type(); break; } } - if (NULL != reader) { - std::vector::const_iterator it; - - for (it = field_info.sub_columns.begin(); it != field_info.sub_columns.end(); ++it) { - ColumnReader* sub_reader = create((*it), columns, included, - segment_included, encodings); - - if (NULL == sub_reader) { - OLAP_LOG_WARNING("fail to create sub column reader."); - SAFE_DELETE(reader); - return NULL; - } - - reader->_sub_readers.push_back(sub_reader); - } - } - return reader; } diff --git a/be/src/olap/column_reader.h b/be/src/olap/rowset/column_reader.h similarity index 98% rename from be/src/olap/column_reader.h rename to be/src/olap/rowset/column_reader.h index 106976342e1d74..468bdeed933f27 100644 --- a/be/src/olap/column_reader.h +++ b/be/src/olap/rowset/column_reader.h @@ -15,13 +15,13 @@ // specific language governing permissions and limitations // under the License. -#ifndef DORIS_BE_SRC_OLAP_COLUMN_FILE_COLUMN_READER_H -#define DORIS_BE_SRC_OLAP_COLUMN_FILE_COLUMN_READER_H +#ifndef DORIS_BE_SRC_OLAP_ROWSET_COLUMN_READER_H +#define DORIS_BE_SRC_OLAP_ROWSET_COLUMN_READER_H #include "olap/byte_buffer.h" #include "olap/file_stream.h" -#include "olap/run_length_byte_reader.h" -#include "olap/run_length_integer_reader.h" +#include "olap/rowset/run_length_byte_reader.h" +#include "olap/rowset/run_length_integer_reader.h" #include "olap/stream_name.h" #include "olap/field.h" #include "olap/olap_common.h" @@ -183,7 +183,13 @@ class ColumnReader { // segment_columns - segment中所有column的unique id组成的集合 // encodings - 列的编码信息, 使用encodings[_column_unique_id]访问 static ColumnReader* create(uint32_t column_id, - const std::vector& columns, + const TabletSchema& schema, + const UniqueIdToColumnIdMap& included, + UniqueIdToColumnIdMap& segment_included, + const UniqueIdEncodingMap& encodings); + + static ColumnReader* create(uint32_t column_id, + const std::vector& schema, const UniqueIdToColumnIdMap& included, UniqueIdToColumnIdMap& segment_included, const UniqueIdEncodingMap& encodings); @@ -921,4 +927,4 @@ typedef IntegerColumnReaderWrapper DateTimeColumnReader; } // namespace doris -#endif // DORIS_BE_SRC_OLAP_COLUMN_FILE_COLUMN_READER_H +#endif // DORIS_BE_SRC_OLAP_ROWSET_COLUMN_READER_H diff --git a/be/src/olap/column_writer.cpp b/be/src/olap/rowset/column_writer.cpp similarity index 85% rename from be/src/olap/column_writer.cpp rename to be/src/olap/rowset/column_writer.cpp index 91ed92919d815a..b8fa9ef2b00614 100755 --- a/be/src/olap/column_writer.cpp +++ b/be/src/olap/rowset/column_writer.cpp @@ -15,149 +15,130 @@ // specific language governing permissions and limitations // under the License. -#include "olap/column_writer.h" +#include "olap/rowset/column_writer.h" -#include "olap/bit_field_writer.h" +#include "olap/rowset/bit_field_writer.h" #include "olap/file_helper.h" namespace doris { ColumnWriter* ColumnWriter::create(uint32_t column_id, - const std::vector& columns, + const TabletSchema& schema, OutStreamFactory* stream_factory, size_t num_rows_per_row_block, double bf_fpp) { - ColumnWriter* column_writer = NULL; - const FieldInfo& field_info = columns[column_id]; + ColumnWriter* column_writer = nullptr; + const TabletColumn& column = schema.column(column_id); - switch (columns[column_id].type) { + switch (column.type()) { case OLAP_FIELD_TYPE_TINYINT: case OLAP_FIELD_TYPE_UNSIGNED_TINYINT: { column_writer = new(std::nothrow) ByteColumnWriter(column_id, stream_factory, - field_info, + column, num_rows_per_row_block, bf_fpp); break; } case OLAP_FIELD_TYPE_SMALLINT: { column_writer = new(std::nothrow) IntegerColumnWriterWrapper( - column_id, stream_factory, field_info, num_rows_per_row_block, bf_fpp); + column_id, stream_factory, column, num_rows_per_row_block, bf_fpp); break; } case OLAP_FIELD_TYPE_UNSIGNED_SMALLINT: { column_writer = new(std::nothrow) IntegerColumnWriterWrapper( - column_id, stream_factory, field_info, num_rows_per_row_block, bf_fpp); + column_id, stream_factory, column, num_rows_per_row_block, bf_fpp); break; } case OLAP_FIELD_TYPE_INT: { column_writer = new(std::nothrow) IntegerColumnWriterWrapper( - column_id, stream_factory, field_info, num_rows_per_row_block, bf_fpp); + column_id, stream_factory, column, num_rows_per_row_block, bf_fpp); break; } case OLAP_FIELD_TYPE_UNSIGNED_INT: { column_writer = new(std::nothrow) IntegerColumnWriterWrapper( - column_id, stream_factory, field_info, num_rows_per_row_block, bf_fpp); + column_id, stream_factory, column, num_rows_per_row_block, bf_fpp); break; } case OLAP_FIELD_TYPE_BIGINT: { column_writer = new(std::nothrow) IntegerColumnWriterWrapper( - column_id, stream_factory, field_info, num_rows_per_row_block, bf_fpp); + column_id, stream_factory, column, num_rows_per_row_block, bf_fpp); break; } case OLAP_FIELD_TYPE_UNSIGNED_BIGINT: { column_writer = new(std::nothrow) IntegerColumnWriterWrapper( - column_id, stream_factory, field_info, num_rows_per_row_block, bf_fpp); + column_id, stream_factory, column, num_rows_per_row_block, bf_fpp); break; } case OLAP_FIELD_TYPE_FLOAT: { column_writer = new(std::nothrow) FloatColumnWriter(column_id, - stream_factory, field_info, num_rows_per_row_block, bf_fpp); + stream_factory, column, num_rows_per_row_block, bf_fpp); break; } case OLAP_FIELD_TYPE_DOUBLE: { column_writer = new(std::nothrow) DoubleColumnWriter(column_id, - stream_factory, field_info, num_rows_per_row_block, bf_fpp); + stream_factory, column, num_rows_per_row_block, bf_fpp); break; } case OLAP_FIELD_TYPE_DISCRETE_DOUBLE: { column_writer = new(std::nothrow) DiscreteDoubleColumnWriter( - column_id, stream_factory, field_info, num_rows_per_row_block, bf_fpp); + column_id, stream_factory, column, num_rows_per_row_block, bf_fpp); break; } - case OLAP_FIELD_TYPE_CHAR: { + case OLAP_FIELD_TYPE_CHAR: { column_writer = new(std::nothrow) FixLengthStringColumnWriter( - column_id, stream_factory, field_info, num_rows_per_row_block, bf_fpp); + column_id, stream_factory, column, num_rows_per_row_block, bf_fpp); break; } case OLAP_FIELD_TYPE_DATETIME: { column_writer = new(std::nothrow) DateTimeColumnWriter( - column_id, stream_factory, field_info, num_rows_per_row_block, bf_fpp); + column_id, stream_factory, column, num_rows_per_row_block, bf_fpp); break; } case OLAP_FIELD_TYPE_DATE: { column_writer = new(std::nothrow) DateColumnWriter(column_id, - stream_factory, field_info, num_rows_per_row_block, bf_fpp); + stream_factory, column, num_rows_per_row_block, bf_fpp); break; } case OLAP_FIELD_TYPE_DECIMAL: { column_writer = new(std::nothrow) DecimalColumnWriter(column_id, - stream_factory, field_info, num_rows_per_row_block, bf_fpp); + stream_factory, column, num_rows_per_row_block, bf_fpp); break; } case OLAP_FIELD_TYPE_LARGEINT: { column_writer = new(std::nothrow) LargeIntColumnWriter(column_id, - stream_factory, field_info, num_rows_per_row_block, bf_fpp); + stream_factory, column, num_rows_per_row_block, bf_fpp); break; } case OLAP_FIELD_TYPE_VARCHAR: case OLAP_FIELD_TYPE_HLL: { column_writer = new(std::nothrow) VarStringColumnWriter(column_id, - stream_factory, field_info, num_rows_per_row_block, bf_fpp); + stream_factory, column, num_rows_per_row_block, bf_fpp); break; } case OLAP_FIELD_TYPE_STRUCT: case OLAP_FIELD_TYPE_LIST: case OLAP_FIELD_TYPE_MAP: default: { - OLAP_LOG_WARNING("Unspported filed type. [field=%s type=%d]", - columns[column_id].name.c_str(), - columns[column_id].type); + LOG(WARNING) << "Unspported filed type. field=" << column.name() + << ", type=" << column.type(); break; } } - if (NULL != column_writer) { - std::vector::const_iterator it; - - for (it = columns[column_id].sub_columns.begin(); - it != columns[column_id].sub_columns.end(); ++it) { - ColumnWriter* sub_writer = create(*it, columns, stream_factory, - num_rows_per_row_block, bf_fpp); - - if (NULL == sub_writer) { - OLAP_LOG_WARNING("fail to create sub column writer."); - SAFE_DELETE(column_writer); - return NULL; - } - - column_writer->_sub_writers.push_back(sub_writer); - } - } - return column_writer; } ColumnWriter::ColumnWriter( uint32_t column_id, OutStreamFactory* stream_factory, - const FieldInfo& field_info, + const TabletColumn& column, size_t num_rows_per_row_block, double bf_fpp) : _column_id(column_id), - _field_info(field_info), + _column(column), _stream_factory(stream_factory), - _index(field_info.type), + _index(column.type()), _is_present(NULL), _is_present_stream(NULL), _index_stream(NULL), @@ -177,7 +158,7 @@ ColumnWriter::~ColumnWriter() { } OLAPStatus ColumnWriter::init() { - if (_field_info.is_allow_null) { + if (_column.is_nullable()) { _is_present_stream = _stream_factory->create_stream( unique_column_id(), StreamInfoMessage::PRESENT); @@ -199,14 +180,14 @@ OLAPStatus ColumnWriter::init() { } } - OLAPStatus res = _block_statistics.init(_field_info.type, true); + OLAPStatus res = _block_statistics.init(_column.type(), true); if (OLAP_SUCCESS != res) { OLAP_LOG_WARNING("init block statistic failed"); return res; } - res = _segment_statistics.init(_field_info.type, true); + res = _segment_statistics.init(_column.type(), true); if (OLAP_SUCCESS != res) { OLAP_LOG_WARNING("init segment statistic failed"); @@ -262,9 +243,9 @@ OLAPStatus ColumnWriter::write(RowCursor* row_cursor) { if (is_bf_column()) { if (!is_null) { - if (_field_info.type == OLAP_FIELD_TYPE_CHAR || - _field_info.type == OLAP_FIELD_TYPE_VARCHAR || - _field_info.type == OLAP_FIELD_TYPE_HLL) + if (_column.type() == OLAP_FIELD_TYPE_CHAR || + _column.type() == OLAP_FIELD_TYPE_VARCHAR || + _column.type() == OLAP_FIELD_TYPE_HLL) { Slice* slice = reinterpret_cast(buf); _bf->add_bytes(slice->data, slice->size); @@ -406,19 +387,18 @@ OLAPStatus ColumnWriter::finalize(ColumnDataHeaderMessage* header) { // 在Segment头中记录一份Schema信息 // 这样使得修改表的Schema后不影响对已存在的Segment中的数据读取 column = header->add_column(); - column->set_name(_field_info.name); - column->set_type(FieldInfo::get_string_by_field_type(_field_info.type)); + column->set_name(_column.name()); + column->set_type(FieldInfo::get_string_by_field_type(_column.type())); column->set_aggregation(FieldInfo::get_string_by_aggregation_type( - _field_info.aggregation)); - column->set_length(_field_info.length); - column->set_is_key(_field_info.is_key); - column->set_precision(_field_info.precision); - column->set_frac(_field_info.frac); - column->set_unique_id(_field_info.unique_id); + _column.aggregation())); + column->set_length(_column.length()); + column->set_is_key(_column.is_key()); + column->set_precision(_column.precision()); + column->set_frac(_column.frac()); + column->set_unique_id(_column.unique_id()); column->set_is_bf_column(is_bf_column()); save_encoding(header->add_column_encoding()); - //segment_statistics()->save(header->add_column_statistics()); FINALIZE_EXIT: SAFE_DELETE_ARRAY(index_buf); @@ -458,10 +438,10 @@ void ColumnWriter::get_bloom_filter_info(bool* has_bf_column, //////////////////////////////////////////////////////////////////////////////// ByteColumnWriter::ByteColumnWriter(uint32_t column_id, OutStreamFactory* stream_factory, - const FieldInfo& field_info, + const TabletColumn& column, size_t num_rows_per_row_block, double bf_fpp) - : ColumnWriter(column_id, stream_factory, field_info, num_rows_per_row_block, bf_fpp), + : ColumnWriter(column_id, stream_factory, column, num_rows_per_row_block, bf_fpp), _writer(NULL) {} ByteColumnWriter::~ByteColumnWriter() { @@ -557,10 +537,10 @@ OLAPStatus IntegerColumnWriter::init() { VarStringColumnWriter::VarStringColumnWriter( uint32_t column_id, OutStreamFactory* stream_factory, - const FieldInfo& field_info, + const TabletColumn& column, size_t num_rows_per_row_block, double bf_fpp) : - ColumnWriter(column_id, stream_factory, field_info, num_rows_per_row_block, bf_fpp), + ColumnWriter(column_id, stream_factory, column, num_rows_per_row_block, bf_fpp), _use_dictionary_encoding(false), _dict_total_size(0), _dict_stream(NULL), @@ -620,22 +600,6 @@ OLAPStatus VarStringColumnWriter::write(const char* str, uint32_t len) { return res; } -#if 0 - std::string key(str, len); - StringDict::iterator it; - it = _string_dict.find(DictKey(key)); - - if (it == _string_dict.end()) { - uint32_t key_id = _string_keys.size(); - _string_keys.push_back(key); - _string_dict[DictKey(_string_keys.back())] = key_id; - _string_id.push_back(key_id); - _dict_total_size += key.length(); - } else { - _string_id.push_back(it->second); - } - -#endif return OLAP_SUCCESS; } @@ -802,11 +766,11 @@ void VarStringColumnWriter::record_position() { FixLengthStringColumnWriter::FixLengthStringColumnWriter( uint32_t column_id, OutStreamFactory* stream_factory, - const FieldInfo& field_info, + const TabletColumn& column, size_t num_rows_per_row_block, double bf_fpp) - : VarStringColumnWriter(column_id, stream_factory, field_info, num_rows_per_row_block, bf_fpp), - _length(field_info.length) {} + : VarStringColumnWriter(column_id, stream_factory, column, num_rows_per_row_block, bf_fpp), + _length(column.length()) {} FixLengthStringColumnWriter::~FixLengthStringColumnWriter() {} @@ -814,10 +778,10 @@ FixLengthStringColumnWriter::~FixLengthStringColumnWriter() {} DecimalColumnWriter::DecimalColumnWriter(uint32_t column_id, OutStreamFactory* stream_factory, - const FieldInfo& field_info, + const TabletColumn& column, size_t num_rows_per_row_block, double bf_fpp) - : ColumnWriter(column_id, stream_factory, field_info, num_rows_per_row_block, bf_fpp), + : ColumnWriter(column_id, stream_factory, column, num_rows_per_row_block, bf_fpp), _int_writer(NULL), _frac_writer(NULL) {} @@ -890,10 +854,10 @@ void DecimalColumnWriter::record_position() { LargeIntColumnWriter::LargeIntColumnWriter(uint32_t column_id, OutStreamFactory* stream_factory, - const FieldInfo& field_info, + const TabletColumn& column, size_t num_rows_per_row_block, double bf_fpp) - : ColumnWriter(column_id, stream_factory, field_info, num_rows_per_row_block, bf_fpp), + : ColumnWriter(column_id, stream_factory, column, num_rows_per_row_block, bf_fpp), _high_writer(NULL), _low_writer(NULL) {} diff --git a/be/src/olap/column_writer.h b/be/src/olap/rowset/column_writer.h similarity index 94% rename from be/src/olap/column_writer.h rename to be/src/olap/rowset/column_writer.h index 4ec0e53816bcde..fa3fb77b922830 100644 --- a/be/src/olap/column_writer.h +++ b/be/src/olap/rowset/column_writer.h @@ -15,8 +15,8 @@ // specific language governing permissions and limitations // under the License. -#ifndef DORIS_BE_SRC_OLAP_COLUMN_FILE_COLUMN_WRITER_H -#define DORIS_BE_SRC_OLAP_COLUMN_FILE_COLUMN_WRITER_H +#ifndef DORIS_BE_SRC_OLAP_ROWSET_COLUMN_WRITER_H +#define DORIS_BE_SRC_OLAP_ROWSET_COLUMN_WRITER_H #include @@ -26,8 +26,8 @@ #include "olap/bloom_filter_writer.h" #include "olap/out_stream.h" #include "olap/stream_index_writer.h" -#include "olap/run_length_byte_writer.h" -#include "olap/run_length_integer_writer.h" +#include "olap/rowset/run_length_byte_writer.h" +#include "olap/rowset/run_length_integer_writer.h" #include "olap/field.h" #include "olap/olap_common.h" #include "olap/olap_define.h" @@ -53,10 +53,11 @@ class ColumnWriter { // stream_factory: 用于创建输出流的工厂对象, 该对象的生命期由调用者所有 static ColumnWriter* create( uint32_t column_id, - const std::vector& columns, + const TabletSchema& schema, OutStreamFactory* stream_factory, size_t num_rows_per_row_block, double bf_fpp); + virtual ~ColumnWriter(); virtual OLAPStatus init(); @@ -73,7 +74,7 @@ class ColumnWriter { // * column_unique_id // * column_type // * column_encoding - // * column_statistics + // * zone_maps virtual OLAPStatus finalize(ColumnDataHeaderMessage* header); virtual void save_encoding(ColumnEncodingMessage* encoding); uint32_t column_id() const { @@ -81,7 +82,7 @@ class ColumnWriter { } uint32_t unique_column_id() const { - return _field_info.unique_id; + return _column.unique_id(); } virtual void get_bloom_filter_info(bool* has_bf_column, @@ -98,7 +99,7 @@ class ColumnWriter { protected: ColumnWriter(uint32_t column_id, OutStreamFactory* stream_factory, - const FieldInfo& field_info, + const TabletColumn& column, size_t num_rows_per_row_block, double bf_fpp); @@ -122,11 +123,11 @@ class ColumnWriter { void _remove_is_present_positions(); bool is_bf_column() { - return _field_info.is_bf_column; + return _column.is_bf_column(); } uint32_t _column_id; - const FieldInfo& _field_info; + const TabletColumn& _column; OutStreamFactory* _stream_factory; // 该对象由外部调用者所有 std::vector _sub_writers; // 保存子列的writer PositionEntryWriter _index_entry; @@ -147,7 +148,7 @@ class ColumnWriter { class ByteColumnWriter : public ColumnWriter { public: ByteColumnWriter(uint32_t column_id, OutStreamFactory* stream_factory, - const FieldInfo& field_info, size_t num_rows_per_row_block, + const TabletColumn& column, size_t num_rows_per_row_block, double bf_fpp); virtual ~ByteColumnWriter(); virtual OLAPStatus init(); @@ -228,11 +229,11 @@ class IntegerColumnWriterWrapper : public ColumnWriter { IntegerColumnWriterWrapper( uint32_t column_id, OutStreamFactory* stream_factory, - const FieldInfo& field_info, + const TabletColumn& column, size_t num_rows_per_row_block, double bf_fpp) : - ColumnWriter(column_id, stream_factory, field_info, num_rows_per_row_block, bf_fpp), - _writer(column_id, field_info.unique_id, stream_factory, is_singed) {} + ColumnWriter(column_id, stream_factory, column, num_rows_per_row_block, bf_fpp), + _writer(column_id, column.unique_id(), stream_factory, is_singed) {} virtual ~IntegerColumnWriterWrapper() {} @@ -325,10 +326,10 @@ class DoubleColumnWriterBase: public ColumnWriter { DoubleColumnWriterBase( uint32_t column_id, OutStreamFactory* stream_factory, - const FieldInfo& field_info, + const TabletColumn& column, size_t num_rows_per_row_block, double bf_fpp) : - ColumnWriter(column_id, stream_factory, field_info, num_rows_per_row_block, bf_fpp), + ColumnWriter(column_id, stream_factory, column, num_rows_per_row_block, bf_fpp), _stream(NULL) {} virtual ~DoubleColumnWriterBase() {} @@ -421,7 +422,7 @@ typedef IntegerColumnWriterWrapper DiscreteDoubleColumnWriter; class VarStringColumnWriter : public ColumnWriter { public: VarStringColumnWriter(uint32_t column_id, OutStreamFactory* stream_factory, - const FieldInfo& field_info, size_t num_rows_per_row_block, + const TabletColumn& column, size_t num_rows_per_row_block, double bf_fpp); virtual ~VarStringColumnWriter(); virtual OLAPStatus init(); @@ -500,7 +501,7 @@ class FixLengthStringColumnWriter : public VarStringColumnWriter { public: FixLengthStringColumnWriter(uint32_t column_id, OutStreamFactory* stream_factory, - const FieldInfo& field_info, + const TabletColumn& column, size_t num_rows_per_row_block, double bf_fpp); virtual ~FixLengthStringColumnWriter(); @@ -550,7 +551,7 @@ typedef IntegerColumnWriterWrapper DateTimeColumnWriter; class DecimalColumnWriter : public ColumnWriter { public: DecimalColumnWriter(uint32_t column_id, OutStreamFactory* stream_factory, - const FieldInfo& field_info, size_t num_rows_per_row_block, + const TabletColumn& column, size_t num_rows_per_row_block, double bf_fpp); virtual ~DecimalColumnWriter(); virtual OLAPStatus init(); @@ -600,7 +601,7 @@ class DecimalColumnWriter : public ColumnWriter { class LargeIntColumnWriter : public ColumnWriter { public: LargeIntColumnWriter(uint32_t column_id, OutStreamFactory* stream_factory, - const FieldInfo& field_info, size_t num_rows_per_row_block, + const TabletColumn& column, size_t num_rows_per_row_block, double bf_fpp); virtual ~LargeIntColumnWriter(); virtual OLAPStatus init(); @@ -648,4 +649,4 @@ class LargeIntColumnWriter : public ColumnWriter { }; } // namespace doris -#endif // DORIS_BE_SRC_OLAP_COLUMN_FILE_COLUMN_WRITER_H +#endif // DORIS_BE_SRC_OLAP_ROWSET_COLUMN_WRITER_H diff --git a/be/src/olap/rowset/rowset.h b/be/src/olap/rowset/rowset.h new file mode 100644 index 00000000000000..9eeb36fcf7f388 --- /dev/null +++ b/be/src/olap/rowset/rowset.h @@ -0,0 +1,146 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +#ifndef DORIS_BE_SRC_OLAP_ROWSET_ROWSET_H +#define DORIS_BE_SRC_OLAP_ROWSET_ROWSET_H + +#include "gen_cpp/olap_file.pb.h" +#include "olap/new_status.h" +#include "olap/rowset/rowset_meta.h" + +#include + +namespace doris { + +class Rowset; +using RowsetSharedPtr = std::shared_ptr; + +class RowsetWriter; +class RowsetReader; + +class Rowset : public std::enable_shared_from_this { +public: + Rowset() : _is_inited(false), _is_loaded(false), _need_delete_file(false) { + } + + virtual ~Rowset() { } + + // this api is for init related objects in memory + virtual OLAPStatus init() = 0; + + virtual bool is_inited() { + return _is_inited; + } + + virtual void set_inited(bool inited) { + _is_inited = inited; + } + + virtual bool is_loaded() { + return _is_loaded; + } + + void set_loaded(bool loaded) { + _is_loaded= loaded; + } + + // this api is for lazy loading data + // always means that there are some io + virtual OLAPStatus load() = 0; + + virtual std::shared_ptr create_reader() = 0; + + virtual OLAPStatus remove() = 0; + + virtual void to_rowset_pb(RowsetMetaPB* rs_meta) = 0; + + virtual RowsetMetaSharedPtr rowset_meta() const = 0; + + virtual size_t data_disk_size() const = 0; + + virtual size_t index_disk_size() const = 0; + + virtual bool empty() const = 0; + + virtual bool zero_num_rows() const = 0; + + virtual size_t num_rows() const = 0; + + virtual Version version() const = 0; + + virtual void set_version_and_version_hash(Version version, VersionHash version_hash) = 0; + + virtual int64_t end_version() const = 0; + + virtual int64_t start_version() const = 0; + + virtual VersionHash version_hash() const = 0; + + virtual bool in_use() const = 0; + + virtual void acquire() = 0; + + virtual void release() = 0; + + virtual int64_t ref_count() const = 0; + + virtual OLAPStatus make_snapshot(const std::string& snapshot_path, + std::vector* success_links) = 0; + virtual OLAPStatus copy_files_to_path(const std::string& dest_path, + std::vector* success_files) = 0; + + virtual OLAPStatus remove_old_files(std::vector* files_to_remove) = 0; + + virtual RowsetId rowset_id() const = 0; + + virtual int64_t creation_time() = 0; + + virtual bool is_pending() const = 0; + + virtual PUniqueId load_id() const = 0; + + virtual int64_t txn_id() const = 0; + + virtual int64_t partition_id() const = 0; + + // flag for push delete rowset + virtual bool delete_flag() = 0; + + virtual bool check_path(const std::string& path) = 0; + + virtual std::string unique_id() = 0; + + bool need_delete_file() { + return _need_delete_file; + } + + void set_need_delete_file(bool need_delete_file) { + if (_need_delete_file == true) { + return; + } + _need_delete_file = need_delete_file; + } + +private: + bool _is_inited; + bool _is_loaded; + bool _need_delete_file; +}; + +} // namespace doris + +#endif // DORIS_BE_SRC_OLAP_ROWSET_ROWSET_H diff --git a/be/src/olap/rowset/rowset_id_generator.cpp b/be/src/olap/rowset/rowset_id_generator.cpp new file mode 100644 index 00000000000000..55aca7f05253b7 --- /dev/null +++ b/be/src/olap/rowset/rowset_id_generator.cpp @@ -0,0 +1,76 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. +#include +#include + +#include "olap/rowset/rowset_id_generator.h" +#include "olap/olap_meta.h" + +namespace doris { + +static RowsetId k_batch_interval = 10000; + +OLAPStatus RowsetIdGenerator::init() { + _next_id = k_batch_interval; + _id_batch_end = (k_batch_interval << 1); + // get last stored value from meta + std::string value; + OLAPStatus s = _meta->get(DEFAULT_COLUMN_FAMILY_INDEX, END_ROWSET_ID, &value); + if (s == OLAP_SUCCESS) { + _next_id = std::stol(value); + _id_batch_end = _next_id + k_batch_interval; + } else if (s != OLAP_ERR_META_KEY_NOT_FOUND) { + return s; + } + // else: meta-key not found, we will initialize a initial state + s = _meta->put(DEFAULT_COLUMN_FAMILY_INDEX, END_ROWSET_ID, std::to_string(_id_batch_end)); + if (s != OLAP_SUCCESS) { + return s; + } + return OLAP_SUCCESS; +} + +OLAPStatus RowsetIdGenerator::get_next_id(RowsetId* gen_rowset_id) { + std::lock_guard l(_lock); + if (_next_id >= _id_batch_end) { + _id_batch_end += k_batch_interval; + auto s = _meta->put(DEFAULT_COLUMN_FAMILY_INDEX, END_ROWSET_ID, std::to_string(_id_batch_end)); + if (s != OLAP_SUCCESS) { + return s; + } + } + *gen_rowset_id = _next_id; + ++_next_id; + return OLAP_SUCCESS; +} + +OLAPStatus RowsetIdGenerator::set_next_id(RowsetId new_rowset_id) { + std::lock_guard l(_lock); + // must be < not <= + if (new_rowset_id < _next_id) { + return OLAP_SUCCESS; + } + if (new_rowset_id >= _id_batch_end) { + _id_batch_end = new_rowset_id + k_batch_interval; + auto s = _meta->put(DEFAULT_COLUMN_FAMILY_INDEX, END_ROWSET_ID, std::to_string(_id_batch_end)); + RETURN_NOT_OK(s); + } + _next_id = new_rowset_id + 1; + return OLAP_SUCCESS; +} + +} // doris diff --git a/be/src/olap/rowset/rowset_id_generator.h b/be/src/olap/rowset/rowset_id_generator.h new file mode 100644 index 00000000000000..c6a1436713a3c7 --- /dev/null +++ b/be/src/olap/rowset/rowset_id_generator.h @@ -0,0 +1,54 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +#pragma once + +#include + +#include "olap/olap_define.h" +#include "olap/olap_common.h" + +namespace doris { + +class OlapMeta; + +class RowsetIdGenerator { +public: + RowsetIdGenerator(OlapMeta* meta) : _meta(meta) { } + ~RowsetIdGenerator() {} + + // This function would try to restore sate from meta first, + // If there is no such state, will initialize a state, and store + // it into meta. + OLAPStatus init(); + + // generator a id according to data dir + // rowsetid is not globally unique, it is dir level + // it saves the batch end id into meta env + OLAPStatus get_next_id(RowsetId* rowset_id); + + OLAPStatus set_next_id(RowsetId new_rowset_id); + +private: + OlapMeta* _meta = nullptr; + + std::mutex _lock; + RowsetId _next_id = -1; + RowsetId _id_batch_end = -1; +}; // RowsetIdGenerator + +} // namespace doris diff --git a/be/src/olap/rowset/rowset_meta.h b/be/src/olap/rowset/rowset_meta.h new file mode 100644 index 00000000000000..1bf060cea2371e --- /dev/null +++ b/be/src/olap/rowset/rowset_meta.h @@ -0,0 +1,313 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +#ifndef DORIS_BE_SRC_OLAP_ROWSET_ROWSET_META_H +#define DORIS_BE_SRC_OLAP_ROWSET_ROWSET_META_H + +#include "gen_cpp/olap_file.pb.h" + +#include +#include +#include + +#include "olap/new_status.h" +#include "olap/olap_common.h" +#include "json2pb/json_to_pb.h" +#include "json2pb/pb_to_json.h" +#include "common/logging.h" + +namespace doris { + +class RowsetMeta; +using RowsetMetaSharedPtr = std::shared_ptr; + +class RowsetMeta { +public: + virtual ~RowsetMeta() { } + + virtual bool init(const std::string& pb_rowset_meta) { + bool ret = _deserialize_from_pb(pb_rowset_meta); + if (!ret) { + return false; + } + return true; + } + + virtual bool init_from_pb(const RowsetMetaPB& rowset_meta_pb) { + _rowset_meta_pb = rowset_meta_pb; + return true; + } + + virtual bool init_from_json(const std::string& json_rowset_meta) { + bool ret = json2pb::JsonToProtoMessage(json_rowset_meta, &_rowset_meta_pb); + if (!ret) { + return false; + } + return true; + } + + virtual bool serialize(std::string* value) { + return _serialize_to_pb(value); + } + + virtual bool json_rowset_meta(std::string* json_rowset_meta) { + json2pb::Pb2JsonOptions json_options; + json_options.pretty_json = true; + bool ret = json2pb::ProtoMessageToJson(_rowset_meta_pb, json_rowset_meta, json_options); + return ret; + } + + int64_t rowset_id() { + return _rowset_meta_pb.rowset_id(); + } + + void set_rowset_id(int64_t rowset_id) { + _rowset_meta_pb.set_rowset_id(rowset_id); + } + + int64_t tablet_id() { + return _rowset_meta_pb.tablet_id(); + } + + void set_tablet_id(int64_t tablet_id) { + _rowset_meta_pb.set_tablet_id(tablet_id); + } + + TabletUid tablet_uid() { + return _rowset_meta_pb.tablet_uid(); + } + + void set_tablet_uid(TabletUid tablet_uid) { + *(_rowset_meta_pb.mutable_tablet_uid()) = tablet_uid.to_proto(); + } + + int64_t txn_id() { + return _rowset_meta_pb.txn_id(); + } + + void set_txn_id(int64_t txn_id) { + _rowset_meta_pb.set_txn_id(txn_id); + } + + int32_t tablet_schema_hash() { + return _rowset_meta_pb.tablet_schema_hash(); + } + + void set_tablet_schema_hash(int64_t tablet_schema_hash) { + _rowset_meta_pb.set_tablet_schema_hash(tablet_schema_hash); + } + + RowsetTypePB rowset_type() { + return _rowset_meta_pb.rowset_type(); + } + + void set_rowset_type(RowsetTypePB rowset_type) { + _rowset_meta_pb.set_rowset_type(rowset_type); + } + + RowsetStatePB rowset_state() { + return _rowset_meta_pb.rowset_state(); + } + + void set_rowset_state(RowsetStatePB rowset_state) { + _rowset_meta_pb.set_rowset_state(rowset_state); + } + + Version version() { + return { _rowset_meta_pb.start_version(), + _rowset_meta_pb.end_version() }; + } + + void set_version(Version version) { + _rowset_meta_pb.set_start_version(version.first); + _rowset_meta_pb.set_end_version(version.second); + } + + bool has_version() { + return _rowset_meta_pb.has_start_version() + && _rowset_meta_pb.has_end_version(); + } + + int64_t start_version() const { + return _rowset_meta_pb.start_version(); + } + + void set_start_version(int64_t start_version) { + _rowset_meta_pb.set_start_version(start_version); + } + + int64_t end_version() const { + return _rowset_meta_pb.end_version(); + } + + void set_end_version(int64_t end_version) { + _rowset_meta_pb.set_end_version(end_version); + } + + VersionHash version_hash() { + return _rowset_meta_pb.version_hash(); + } + + void set_version_hash(VersionHash version_hash) { + _rowset_meta_pb.set_version_hash(version_hash); + } + + int64_t num_rows() { + return _rowset_meta_pb.num_rows(); + } + + void set_num_rows(int64_t num_rows) { + _rowset_meta_pb.set_num_rows(num_rows); + } + + size_t total_disk_size() { + return _rowset_meta_pb.total_disk_size(); + } + + void set_total_disk_size(size_t total_disk_size) { + _rowset_meta_pb.set_total_disk_size(total_disk_size); + } + + size_t data_disk_size() { + return _rowset_meta_pb.data_disk_size(); + } + + void set_data_disk_size(size_t data_disk_size) { + _rowset_meta_pb.set_data_disk_size(data_disk_size); + } + + size_t index_disk_size() { + return _rowset_meta_pb.index_disk_size(); + } + + void set_index_disk_size(size_t index_disk_size) { + _rowset_meta_pb.set_index_disk_size(index_disk_size); + } + + void zone_maps(std::vector* zone_maps) { + for (const ZoneMap& zone_map: _rowset_meta_pb.zone_maps()) { + zone_maps->push_back(zone_map); + } + } + + void set_zone_maps(const std::vector& zone_maps) { + for (const ZoneMap& zone_map : zone_maps) { + ZoneMap* new_zone_map = _rowset_meta_pb.add_zone_maps(); + *new_zone_map = zone_map; + } + } + + void add_zone_map(const ZoneMap& zone_map) { + ZoneMap* new_zone_map = _rowset_meta_pb.add_zone_maps(); + *new_zone_map = zone_map; + } + + bool has_delete_predicate() { + return _rowset_meta_pb.has_delete_predicate(); + } + + const DeletePredicatePB& delete_predicate() { + return _rowset_meta_pb.delete_predicate(); + } + + DeletePredicatePB* mutable_delete_predicate() { + return _rowset_meta_pb.mutable_delete_predicate(); + } + + void set_delete_predicate(DeletePredicatePB& delete_predicate) { + DeletePredicatePB* new_delete_condition = _rowset_meta_pb.mutable_delete_predicate(); + *new_delete_condition = delete_predicate; + } + + bool empty() { + return _rowset_meta_pb.empty(); + } + + void set_empty(bool empty) { + _rowset_meta_pb.set_empty(empty); + } + + PUniqueId load_id() { + return _rowset_meta_pb.load_id(); + } + + void set_load_id(PUniqueId load_id) { + PUniqueId* new_load_id = _rowset_meta_pb.mutable_load_id(); + new_load_id->set_hi(load_id.hi()); + new_load_id->set_lo(load_id.lo()); + } + + bool delete_flag() { + return _rowset_meta_pb.delete_flag(); + } + + void set_delete_flag(bool delete_flag) { + _rowset_meta_pb.set_delete_flag(delete_flag); + } + + int64_t creation_time() const { + return _rowset_meta_pb.creation_time(); + } + + void set_creation_time(int64_t creation_time) { + return _rowset_meta_pb.set_creation_time(creation_time); + } + + int64_t partition_id() { + return _rowset_meta_pb.partition_id(); + } + + void set_partition_id(int64_t partition_id) { + return _rowset_meta_pb.set_partition_id(partition_id); + } + + void to_rowset_pb(RowsetMetaPB* rs_meta_pb) { + *rs_meta_pb = _rowset_meta_pb; + } + +private: + friend class AlphaRowsetMeta; + bool _deserialize_from_pb(const std::string& value) { + return _rowset_meta_pb.ParseFromString(value); + } + + bool _serialize_to_pb(std::string* value) { + if (value == nullptr) { + return false; + } + return _rowset_meta_pb.SerializeToString(value); + } + + bool _has_alpha_rowset_extra_meta_pb() { + return _rowset_meta_pb.has_alpha_rowset_extra_meta_pb(); + } + + const AlphaRowsetExtraMetaPB& _alpha_rowset_extra_meta_pb() { + return _rowset_meta_pb.alpha_rowset_extra_meta_pb(); + } + + AlphaRowsetExtraMetaPB* _mutable_alpha_rowset_extra_meta_pb() { + return _rowset_meta_pb.mutable_alpha_rowset_extra_meta_pb(); + } + +private: + RowsetMetaPB _rowset_meta_pb; +}; + +} // namespace doris + +#endif // DORIS_BE_SRC_OLAP_ROWSET_ROWSET_META_H diff --git a/be/src/olap/rowset/rowset_meta_manager.cpp b/be/src/olap/rowset/rowset_meta_manager.cpp new file mode 100644 index 00000000000000..abc6792e293a54 --- /dev/null +++ b/be/src/olap/rowset/rowset_meta_manager.cpp @@ -0,0 +1,151 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +#include "olap/rowset/rowset_meta_manager.h" + +#include +#include +#include +#include +#include + +#include "olap/olap_define.h" +#include "olap/utils.h" +#include "common/logging.h" +#include "json2pb/json_to_pb.h" +#include "json2pb/pb_to_json.h" + +namespace doris { + +const std::string ROWSET_PREFIX = "rst_"; + +bool RowsetMetaManager::check_rowset_meta(OlapMeta* meta, TabletUid tablet_uid, int64_t rowset_id) { + std::string key = ROWSET_PREFIX + tablet_uid.to_string() + "_" + std::to_string(rowset_id); + std::string value; + OLAPStatus s = meta->get(META_COLUMN_FAMILY_INDEX, key, &value); + if (s != OLAP_SUCCESS) { + return false; + } + return true; +} + +OLAPStatus RowsetMetaManager::get_rowset_meta(OlapMeta* meta, TabletUid tablet_uid, int64_t rowset_id, RowsetMetaSharedPtr rowset_meta) { + std::string key = ROWSET_PREFIX + tablet_uid.to_string() + "_" + std::to_string(rowset_id); + std::string value; + OLAPStatus s = meta->get(META_COLUMN_FAMILY_INDEX, key, &value); + if (s == OLAP_ERR_META_KEY_NOT_FOUND) { + std::string error_msg = "rowset id:" + key + " not found."; + LOG(WARNING) << error_msg; + return OLAP_ERR_META_KEY_NOT_FOUND; + } else if (s != OLAP_SUCCESS) { + std::string error_msg = "load rowset id:" + key + " failed."; + LOG(WARNING) << error_msg; + return OLAP_ERR_IO_ERROR; + } + bool ret = rowset_meta->init(value); + if (!ret) { + std::string error_msg = "parse rowset meta failed. rowset id:" + key; + return OLAP_ERR_SERIALIZE_PROTOBUF_ERROR; + } + return OLAP_SUCCESS; +} + +OLAPStatus RowsetMetaManager::get_json_rowset_meta(OlapMeta* meta, TabletUid tablet_uid, int64_t rowset_id, std::string* json_rowset_meta) { + RowsetMetaSharedPtr rowset_meta_ptr(new(std::nothrow) RowsetMeta()); + OLAPStatus status = get_rowset_meta(meta, tablet_uid, rowset_id, rowset_meta_ptr); + if (status != OLAP_SUCCESS) { + return status; + } + bool ret = rowset_meta_ptr->json_rowset_meta(json_rowset_meta); + if (!ret) { + std::string error_msg = "get json rowset meta failed. rowset id:" + std::to_string(rowset_id); + return OLAP_ERR_SERIALIZE_PROTOBUF_ERROR; + } + return OLAP_SUCCESS; +} + +OLAPStatus RowsetMetaManager::save(OlapMeta* meta, TabletUid tablet_uid, int64_t rowset_id, RowsetMeta* rowset_meta) { + std::string key = ROWSET_PREFIX + tablet_uid.to_string() + "_" + std::to_string(rowset_id); + std::string value; + bool ret = rowset_meta->serialize(&value); + if (!ret) { + std::string error_msg = "serialize rowset pb failed. rowset id:" + key; + LOG(WARNING) << error_msg; + return OLAP_ERR_SERIALIZE_PROTOBUF_ERROR; + } + OLAPStatus status = meta->put(META_COLUMN_FAMILY_INDEX, key, value); + return status; +} + +OLAPStatus RowsetMetaManager::save(OlapMeta* meta, TabletUid tablet_uid, int64_t rowset_id, const string& meta_binary) { + std::string key = ROWSET_PREFIX + tablet_uid.to_string() + "_" + std::to_string(rowset_id); + OLAPStatus status = meta->put(META_COLUMN_FAMILY_INDEX, key, meta_binary); + return status; +} + +OLAPStatus RowsetMetaManager::remove(OlapMeta* meta, TabletUid tablet_uid, int64_t rowset_id) { + std::string key = ROWSET_PREFIX + tablet_uid.to_string() + "_" + std::to_string(rowset_id); + LOG(INFO) << "start to remove rowset, key:" << key; + OLAPStatus status = meta->remove(META_COLUMN_FAMILY_INDEX, key); + LOG(INFO) << "remove rowset key:" << key << " finished"; + return status; +} + +OLAPStatus RowsetMetaManager::traverse_rowset_metas(OlapMeta* meta, + std::function const& func) { + auto traverse_rowset_meta_func = [&func](const std::string& key, const std::string& value) -> bool { + std::vector parts; + // key format: rst_uuid_rowset_id + split_string(key, '_', &parts); + if (parts.size() != 3) { + LOG(WARNING) << "invalid rowset key:" << key << ", splitted size:" << parts.size(); + return true; + } + uint64_t rowset_id = std::stol(parts[2].c_str(), NULL, 10); + // TODO(ygl): parset tablet id from parts[1] + std::vector uid_parts; + split_string(parts[1], '-', &uid_parts); + TabletUid tablet_uid(uid_parts[0], uid_parts[1]); + return func(tablet_uid, rowset_id, value); + }; + OLAPStatus status = meta->iterate(META_COLUMN_FAMILY_INDEX, ROWSET_PREFIX, traverse_rowset_meta_func); + return status; +} + +OLAPStatus RowsetMetaManager::load_json_rowset_meta(OlapMeta* meta, const std::string& rowset_meta_path) { + std::ifstream infile(rowset_meta_path); + char buffer[1024]; + std::string json_rowset_meta; + while (!infile.eof()) { + infile.getline(buffer, 1024); + json_rowset_meta = json_rowset_meta + buffer; + } + boost::algorithm::trim(json_rowset_meta); + RowsetMeta rowset_meta; + bool ret = rowset_meta.init_from_json(json_rowset_meta); + if (!ret) { + std::string error_msg = "parse json rowset meta failed."; + LOG(WARNING) << error_msg; + return OLAP_ERR_SERIALIZE_PROTOBUF_ERROR; + } + uint64_t rowset_id = rowset_meta.rowset_id(); + TabletUid tablet_uid = rowset_meta.tablet_uid(); + OLAPStatus status = save(meta, tablet_uid, rowset_id, &rowset_meta); + return status; +} + +} // namespace doris diff --git a/be/src/olap/rowset/rowset_meta_manager.h b/be/src/olap/rowset/rowset_meta_manager.h new file mode 100644 index 00000000000000..0411243d7bbadb --- /dev/null +++ b/be/src/olap/rowset/rowset_meta_manager.h @@ -0,0 +1,54 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +#ifndef DORIS_BE_SRC_OLAP_ROWSET_ROWSET_META_MANAGER_H +#define DORIS_BE_SRC_OLAP_ROWSET_ROWSET_META_MANAGER_H + +#include + +#include "olap/rowset/rowset_meta.h" +#include "olap/olap_meta.h" +#include "olap/new_status.h" + +using std::string; + +namespace doris { + +// Helper class for managing rowset meta of one root path. +class RowsetMetaManager { +public: + static bool check_rowset_meta(OlapMeta* meta, TabletUid tablet_uid, int64_t rowset_id); + + static OLAPStatus get_rowset_meta(OlapMeta* meta, TabletUid tablet_uid, int64_t rowset_id, RowsetMetaSharedPtr rowset_meta); + + static OLAPStatus get_json_rowset_meta(OlapMeta* meta, TabletUid tablet_uid, int64_t rowset_id, std::string* json_rowset_meta); + + static OLAPStatus save(OlapMeta* meta, TabletUid tablet_uid, int64_t rowset_id, RowsetMeta* rowset_meta); + + static OLAPStatus save(OlapMeta* meta, TabletUid tablet_uid, int64_t rowset_id, const string& meta_binary); + + static OLAPStatus remove(OlapMeta* meta, TabletUid tablet_uid, int64_t rowset_id); + + static OLAPStatus traverse_rowset_metas(OlapMeta* meta, + std::function const& func); + + static OLAPStatus load_json_rowset_meta(OlapMeta* meta, const std::string& rowset_meta_path); +}; + +} + +#endif // DORIS_BE_SRC_OLAP_ROWSET_ROWSET_META_MANAGER_H diff --git a/be/src/olap/rowset/rowset_reader.h b/be/src/olap/rowset/rowset_reader.h new file mode 100644 index 00000000000000..0f60fc8e344a6b --- /dev/null +++ b/be/src/olap/rowset/rowset_reader.h @@ -0,0 +1,60 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +#ifndef DORIS_BE_SRC_OLAP_ROWSET_ROWSET_READER_H +#define DORIS_BE_SRC_OLAP_ROWSET_ROWSET_READER_H + +#include "olap/rowset/rowset_reader_context.h" +#include "olap/rowset/rowset.h" + +#include +#include + +namespace doris { + +class RowsetReader; +using RowsetReaderSharedPtr = std::shared_ptr; + +class RowsetReader { +public: + static RowsetReader* create(); + + virtual ~RowsetReader() { } + + // reader init + virtual OLAPStatus init(RowsetReaderContext* read_context) = 0; + + // read next block data + virtual OLAPStatus next_block(RowBlock** block) = 0; + + virtual bool delete_flag() = 0; + + virtual Version version() = 0; + + virtual VersionHash version_hash() = 0; + + virtual RowsetSharedPtr rowset() = 0; + + // close reader + virtual void close() = 0; + + virtual int64_t filtered_rows() = 0; +}; + +} // namespace doris + +#endif // DORIS_BE_SRC_OLAP_ROWSET_ROWSET_READER_H diff --git a/be/src/olap/rowset/rowset_reader_context.h b/be/src/olap/rowset/rowset_reader_context.h new file mode 100644 index 00000000000000..9d516242ee6ee2 --- /dev/null +++ b/be/src/olap/rowset/rowset_reader_context.h @@ -0,0 +1,78 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +#ifndef DORIS_BE_SRC_OLAP_ROWSET_ROWSET_READER_CONTEXT_H +#define DORIS_BE_SRC_OLAP_ROWSET_ROWSET_READER_CONTEXT_H + +#include "olap/schema.h" +#include "olap/column_predicate.h" +#include "olap/row_cursor.h" +#include "olap/row_block.h" +#include "olap/lru_cache.h" +#include "olap/olap_cond.h" +#include "olap/delete_handler.h" +#include "runtime/runtime_state.h" + +namespace doris { + +struct RowsetReaderContext { + RowsetReaderContext() : reader_type(READER_QUERY), + tablet_schema(nullptr), + preaggregation(false), + return_columns(nullptr), + seek_columns(nullptr), + load_bf_columns(nullptr), + conditions(nullptr), + predicates(nullptr), + lower_bound_keys(nullptr), + is_lower_keys_included(nullptr), + upper_bound_keys(nullptr), + is_upper_keys_included(nullptr), + delete_handler(nullptr), + stats(nullptr), + is_using_cache(false), + lru_cache(nullptr), + runtime_state(nullptr) { } + + ReaderType reader_type; + const TabletSchema* tablet_schema; + bool preaggregation; + // projection columns + const std::vector* return_columns; + const std::vector* seek_columns; + // columns to load bloom filter index + // including columns in "=" or "in" conditions + const std::set* load_bf_columns; + // column filter conditions by delete sql + const Conditions* conditions; + // column name -> column predicate + // adding column_name for predicate to make use of column selectivity + const std::vector* predicates; + const std::vector* lower_bound_keys; + const std::vector* is_lower_keys_included; + const std::vector* upper_bound_keys; + const std::vector* is_upper_keys_included; + const DeleteHandler* delete_handler; + OlapReaderStatistics* stats; + bool is_using_cache; + Cache* lru_cache; + RuntimeState* runtime_state; +}; + +} // namespace doris + +#endif // DORIS_BE_SRC_OLAP_ROWSET_ROWSET_READER_CONTEXT_H diff --git a/be/src/olap/rowset/rowset_writer.h b/be/src/olap/rowset/rowset_writer.h new file mode 100644 index 00000000000000..4a9351c61fad44 --- /dev/null +++ b/be/src/olap/rowset/rowset_writer.h @@ -0,0 +1,71 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +#ifndef DORIS_BE_SRC_OLAP_ROWSET_ROWSET_WRITER_H +#define DORIS_BE_SRC_OLAP_ROWSET_ROWSET_WRITER_H + +#include "olap/rowset/rowset.h" +#include "olap/rowset/rowset_writer_context.h" +#include "olap/schema.h" +#include "olap/row_block.h" +#include "gen_cpp/types.pb.h" +#include "runtime/mem_pool.h" + +namespace doris { + +class RowsetWriter; +using RowsetWriterSharedPtr = std::shared_ptr; + +class RowsetWriter { +public: + virtual ~RowsetWriter() { } + + virtual OLAPStatus init(const RowsetWriterContext& rowset_writer_context) = 0; + + // add a row to rowset + virtual OLAPStatus add_row(RowCursor* row_block) = 0; + + virtual OLAPStatus add_row(const char* row, Schema* schema) = 0; + + virtual OLAPStatus add_row_block(RowBlock* row_block) = 0; + + virtual OLAPStatus add_rowset(RowsetSharedPtr rowset) = 0; + virtual OLAPStatus add_rowset_for_linked_schema_change( + RowsetSharedPtr rowset, const SchemaMapping& schema_mapping) = 0; + + virtual OLAPStatus flush() = 0; + + // get a rowset + virtual RowsetSharedPtr build() = 0; + + // TODO(hkp): this interface should be optimized! + virtual MemPool* mem_pool() = 0; + + virtual Version version() = 0; + + virtual int32_t num_rows() = 0; + + virtual RowsetId rowset_id() = 0; + + virtual OLAPStatus garbage_collection() = 0; + + virtual DataDir* data_dir() = 0; +}; + +} // namespace doris + +#endif // DORIS_BE_SRC_OLAP_ROWSET_ROWSET_WRITER_H diff --git a/be/src/olap/rowset/rowset_writer_context.h b/be/src/olap/rowset/rowset_writer_context.h new file mode 100644 index 00000000000000..28002a6fb48b24 --- /dev/null +++ b/be/src/olap/rowset/rowset_writer_context.h @@ -0,0 +1,72 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +#ifndef DORIS_BE_SRC_OLAP_ROWSET_ROWSET_WRITER_CONTEXT_H +#define DORIS_BE_SRC_OLAP_ROWSET_ROWSET_WRITER_CONTEXT_H + +#include "gen_cpp/olap_file.pb.h" +#include "olap/data_dir.h" +#include "olap/tablet_schema.h" + +namespace doris { + +class RowsetWriterContextBuilder; +using RowsetWriterContextBuilderSharedPtr = std::shared_ptr; + +struct RowsetWriterContext { + RowsetWriterContext() : + rowset_id(0), + tablet_id(0), + tablet_schema_hash(0), + partition_id(0), + rowset_type(ALPHA_ROWSET), + rowset_path_prefix(""), + tablet_schema(nullptr), + rowset_state(PREPARED), + data_dir(nullptr), + version(Version(0, 0)), + version_hash(0), + txn_id(0) { + load_id.set_hi(0); + load_id.set_lo(0); + tablet_uid.hi = 0; + tablet_uid.lo = 0; + } + int64_t rowset_id; + int64_t tablet_id; + int64_t tablet_schema_hash; + int64_t partition_id; + RowsetTypePB rowset_type; + std::string rowset_path_prefix; + const TabletSchema* tablet_schema; + // PREPARED/COMMITTED for pending rowset + // VISIBLE for non-pending rowset + RowsetStatePB rowset_state; + DataDir* data_dir; + // properties for non-pending rowset + Version version; + VersionHash version_hash; + + // properties for pending rowset + int64_t txn_id; + PUniqueId load_id; + TabletUid tablet_uid; +}; + +} // namespace doris + +#endif // DORIS_BE_SRC_OLAP_ROWSET_ROWSET_WRITER_CONTEXT_H diff --git a/be/src/olap/run_length_byte_reader.cpp b/be/src/olap/rowset/run_length_byte_reader.cpp similarity index 97% rename from be/src/olap/run_length_byte_reader.cpp rename to be/src/olap/rowset/run_length_byte_reader.cpp index b4e17f1a5d28cd..287fa425dc2324 100644 --- a/be/src/olap/run_length_byte_reader.cpp +++ b/be/src/olap/rowset/run_length_byte_reader.cpp @@ -15,9 +15,9 @@ // specific language governing permissions and limitations // under the License. -#include "olap/run_length_byte_reader.h" +#include "olap/rowset/run_length_byte_reader.h" -#include "olap/column_reader.h" +#include "olap/rowset/column_reader.h" #include "olap/in_stream.h" namespace doris { @@ -145,4 +145,4 @@ OLAPStatus RunLengthByteReader::skip(uint64_t num_values) { return res; } -} // namespace doris +} // namespace doris \ No newline at end of file diff --git a/be/src/olap/run_length_byte_reader.h b/be/src/olap/rowset/run_length_byte_reader.h similarity index 88% rename from be/src/olap/run_length_byte_reader.h rename to be/src/olap/rowset/run_length_byte_reader.h index f20e2a344a5b24..c3e45d1a2fe5a2 100644 --- a/be/src/olap/run_length_byte_reader.h +++ b/be/src/olap/rowset/run_length_byte_reader.h @@ -15,11 +15,11 @@ // specific language governing permissions and limitations // under the License. -#ifndef DORIS_BE_SRC_OLAP_COLUMN_FILE_RUN_LENGTH_BYTE_READER_H -#define DORIS_BE_SRC_OLAP_COLUMN_FILE_RUN_LENGTH_BYTE_READER_H +#ifndef DORIS_BE_SRC_OLAP_ROWSET_RUN_LENGTH_BYTE_READER_H +#define DORIS_BE_SRC_OLAP_ROWSET_RUN_LENGTH_BYTE_READER_H #include "olap/file_stream.h" -#include "olap/run_length_byte_writer.h" +#include "olap/rowset/run_length_byte_writer.h" #include "olap/olap_define.h" namespace doris { @@ -54,4 +54,4 @@ class RunLengthByteReader { } // namespace doris -#endif // DORIS_BE_SRC_OLAP_COLUMN_FILE_RUN_LENGTH_BYTE_READER_H +#endif // DORIS_BE_SRC_OLAP_ROWSET_RUN_LENGTH_BYTE_READER_H \ No newline at end of file diff --git a/be/src/olap/run_length_byte_writer.cpp b/be/src/olap/rowset/run_length_byte_writer.cpp similarity index 98% rename from be/src/olap/run_length_byte_writer.cpp rename to be/src/olap/rowset/run_length_byte_writer.cpp index b9f9ef580d315b..330259d2b76b4f 100644 --- a/be/src/olap/run_length_byte_writer.cpp +++ b/be/src/olap/rowset/run_length_byte_writer.cpp @@ -15,7 +15,7 @@ // specific language governing permissions and limitations // under the License. -#include "olap/run_length_byte_writer.h" +#include "olap/rowset/run_length_byte_writer.h" #include "olap/out_stream.h" @@ -142,5 +142,4 @@ void RunLengthByteWriter::get_position(PositionEntryWriter* index_entry) const { index_entry->add_position(_num_literals); } -} // namespace doris - +} // namespace doris \ No newline at end of file diff --git a/be/src/olap/run_length_byte_writer.h b/be/src/olap/rowset/run_length_byte_writer.h similarity index 90% rename from be/src/olap/run_length_byte_writer.h rename to be/src/olap/rowset/run_length_byte_writer.h index 6002ed65767dea..ac083357fc04af 100644 --- a/be/src/olap/run_length_byte_writer.h +++ b/be/src/olap/rowset/run_length_byte_writer.h @@ -15,8 +15,8 @@ // specific language governing permissions and limitations // under the License. -#ifndef DORIS_BE_SRC_OLAP_COLUMN_FILE_RUN_LENGTH_BYTE_WRITER_H -#define DORIS_BE_SRC_OLAP_COLUMN_FILE_RUN_LENGTH_BYTE_WRITER_H +#ifndef DORIS_BE_SRC_OLAP_ROWSET_RUN_LENGTH_BYTE_WRITER_H +#define DORIS_BE_SRC_OLAP_ROWSET_RUN_LENGTH_BYTE_WRITER_H #include "olap/stream_index_writer.h" #include "olap/olap_define.h" @@ -53,4 +53,4 @@ class RunLengthByteWriter { } // namespace doris -#endif // DORIS_BE_SRC_OLAP_COLUMN_FILE_RUN_LENGTH_BYTE_WRITER_H +#endif // DORIS_BE_SRC_OLAP_ROWSET_RUN_LENGTH_BYTE_WRITER_H \ No newline at end of file diff --git a/be/src/olap/run_length_integer_reader.cpp b/be/src/olap/rowset/run_length_integer_reader.cpp similarity index 99% rename from be/src/olap/run_length_integer_reader.cpp rename to be/src/olap/rowset/run_length_integer_reader.cpp index e7e34f6b6819f6..2afa5362d615e1 100644 --- a/be/src/olap/run_length_integer_reader.cpp +++ b/be/src/olap/rowset/run_length_integer_reader.cpp @@ -15,9 +15,9 @@ // specific language governing permissions and limitations // under the License. -#include "olap/run_length_integer_reader.h" +#include "olap/rowset/run_length_integer_reader.h" -#include "olap/column_reader.h" +#include "olap/rowset/column_reader.h" #include "olap/in_stream.h" #include "olap/serialize.h" @@ -432,5 +432,4 @@ OLAPStatus RunLengthIntegerReader::skip(uint64_t num_values) { return res; } -} // namespace doris - +} // namespace doris \ No newline at end of file diff --git a/be/src/olap/run_length_integer_reader.h b/be/src/olap/rowset/run_length_integer_reader.h similarity index 90% rename from be/src/olap/run_length_integer_reader.h rename to be/src/olap/rowset/run_length_integer_reader.h index 1e16f8b96e5725..41581b71b54a59 100644 --- a/be/src/olap/run_length_integer_reader.h +++ b/be/src/olap/rowset/run_length_integer_reader.h @@ -15,11 +15,11 @@ // specific language governing permissions and limitations // under the License. -#ifndef DORIS_BE_SRC_OLAP_COLUMN_FILE_RUN_LENGTH_INTEGER_READER_H -#define DORIS_BE_SRC_OLAP_COLUMN_FILE_RUN_LENGTH_INTEGER_READER_H +#ifndef DORIS_BE_SRC_OLAP_ROWSET_RUN_LENGTH_INTEGER_READER_H +#define DORIS_BE_SRC_OLAP_ROWSET_RUN_LENGTH_INTEGER_READER_H #include "olap/file_stream.h" -#include "olap/run_length_integer_writer.h" +#include "olap/rowset/run_length_integer_writer.h" #include "olap/stream_index_reader.h" #include "olap/olap_define.h" #include "util/runtime_profile.h" @@ -74,4 +74,4 @@ class RunLengthIntegerReader { } // namespace doris -#endif // DORIS_BE_SRC_OLAP_COLUMN_FILE_RUN_LENGTH_INTEGER_READER_H +#endif // DORIS_BE_SRC_OLAP_ROWSET_RUN_LENGTH_INTEGER_READER_H \ No newline at end of file diff --git a/be/src/olap/run_length_integer_writer.cpp b/be/src/olap/rowset/run_length_integer_writer.cpp similarity index 99% rename from be/src/olap/run_length_integer_writer.cpp rename to be/src/olap/rowset/run_length_integer_writer.cpp index f58ece504d0aa2..44cb6feaff2aff 100644 --- a/be/src/olap/run_length_integer_writer.cpp +++ b/be/src/olap/rowset/run_length_integer_writer.cpp @@ -15,7 +15,7 @@ // specific language governing permissions and limitations // under the License. -#include "olap/run_length_integer_writer.h" +#include "olap/rowset/run_length_integer_writer.h" #include @@ -730,5 +730,4 @@ void RunLengthIntegerWriter::get_position(PositionEntryWriter* index_entry, bool } } -} // namespace doris - +} // namespace doris \ No newline at end of file diff --git a/be/src/olap/run_length_integer_writer.h b/be/src/olap/rowset/run_length_integer_writer.h similarity index 98% rename from be/src/olap/run_length_integer_writer.h rename to be/src/olap/rowset/run_length_integer_writer.h index 60d61ed071f58b..df8462564ca4d8 100644 --- a/be/src/olap/run_length_integer_writer.h +++ b/be/src/olap/rowset/run_length_integer_writer.h @@ -15,8 +15,8 @@ // specific language governing permissions and limitations // under the License. -#ifndef DORIS_BE_SRC_OLAP_COLUMN_FILE_RUN_LENGTH_INTEGER_WRITER_H -#define DORIS_BE_SRC_OLAP_COLUMN_FILE_RUN_LENGTH_INTEGER_WRITER_H +#ifndef DORIS_BE_SRC_OLAP_ROWSET_RUN_LENGTH_INTEGER_WRITER_H +#define DORIS_BE_SRC_OLAP_ROWSET_RUN_LENGTH_INTEGER_WRITER_H #include @@ -296,4 +296,4 @@ class RunLengthIntegerWriter { } // namespace doris -#endif // DORIS_BE_SRC_OLAP_COLUMN_FILE_RUN_LENGTH_INTEGER_WRITER_H +#endif // DORIS_BE_SRC_OLAP_ROWSET_RUN_LENGTH_INTEGER_WRITER_H \ No newline at end of file diff --git a/be/src/olap/rowset/segment_group.cpp b/be/src/olap/rowset/segment_group.cpp new file mode 100644 index 00000000000000..edbf65c490223b --- /dev/null +++ b/be/src/olap/rowset/segment_group.cpp @@ -0,0 +1,1014 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +#include "olap/rowset/segment_group.h" + +#include +#include +#include +#include +#include + +#include "olap/data_dir.h" +#include "olap/column_mapping.h" +#include "olap/rowset/column_data.h" +#include "olap/row_block.h" +#include "olap/row_cursor.h" +#include "olap/schema.h" +#include "olap/storage_engine.h" +#include "olap/utils.h" +#include "olap/wrapper_field.h" +#include "util/stack_util.h" + +using std::ifstream; +using std::string; +using std::vector; + +namespace doris { + +#define SEGMENT_GROUP_PARAM_VALIDATE() \ + do { \ + if (!_index_loaded) { \ + OLAP_LOG_WARNING("fail to find, index is not loaded. [segment_group_id=%d]", \ + _segment_group_id); \ + return OLAP_ERR_NOT_INITED; \ + } \ + } while (0); + +#define POS_PARAM_VALIDATE(pos) \ + do { \ + if (NULL == pos) { \ + OLAP_LOG_WARNING("fail to find, NULL position parameter."); \ + return OLAP_ERR_INPUT_PARAMETER_ERROR; \ + } \ + } while (0); + +#define SLICE_PARAM_VALIDATE(slice) \ + do { \ + if (NULL == slice) { \ + OLAP_LOG_WARNING("fail to find, NULL slice parameter."); \ + return OLAP_ERR_INPUT_PARAMETER_ERROR; \ + } \ + } while (0); + +SegmentGroup::SegmentGroup(int64_t tablet_id, int64_t rowset_id, const TabletSchema* schema, + const std::string& rowset_path_prefix, Version version, VersionHash version_hash, + bool delete_flag, int32_t segment_group_id, int32_t num_segments) + : _tablet_id(tablet_id), + _rowset_id(rowset_id), + _schema(schema), + _rowset_path_prefix(rowset_path_prefix), + _version(version), + _version_hash(version_hash), + _delete_flag(delete_flag), + _segment_group_id(segment_group_id), + _num_segments(num_segments) { + _index_loaded = false; + _ref_count = 0; + _is_pending = false; + _partition_id = 0; + _txn_id = 0; + _short_key_length = 0; + _new_short_key_length = 0; + _short_key_buf = nullptr; + _new_segment_created = false; + _empty = false; + + for (size_t i = 0; i < _schema->num_short_key_columns(); ++i) { + const TabletColumn& column = _schema->column(i); + _short_key_columns.push_back(column); + _short_key_length += column.index_length() + 1;// 1 for null byte + if (column.type() == OLAP_FIELD_TYPE_CHAR || + column.type() == OLAP_FIELD_TYPE_VARCHAR) { + _new_short_key_length += sizeof(Slice) + 1; + } else { + _new_short_key_length += column.index_length() + 1; + } + } +} + +SegmentGroup::SegmentGroup(int64_t tablet_id, int64_t rowset_id, const TabletSchema* schema, + const std::string& rowset_path_prefix, bool delete_flag, + int32_t segment_group_id, int32_t num_segments, bool is_pending, + TPartitionId partition_id, TTransactionId transaction_id) : _tablet_id(tablet_id), + _rowset_id(rowset_id), + _schema(schema), + _rowset_path_prefix(rowset_path_prefix), + _delete_flag(delete_flag), + _segment_group_id(segment_group_id), _num_segments(num_segments), + _is_pending(is_pending), _partition_id(partition_id), + _txn_id(transaction_id) { + _version = {-1, -1}; + _version_hash = 0; + _load_id.set_hi(0); + _load_id.set_lo(0); + _index_loaded = false; + _ref_count = 0; + _short_key_length = 0; + _new_short_key_length = 0; + _short_key_buf = NULL; + _new_segment_created = false; + _empty = false; + + for (size_t i = 0; i < _schema->num_short_key_columns(); ++i) { + const TabletColumn& column = _schema->column(i); + _short_key_columns.push_back(column); + _short_key_length += column.index_length() + 1;// 1 for null byte + if (column.type() == OLAP_FIELD_TYPE_CHAR + || column.type() == OLAP_FIELD_TYPE_VARCHAR) { + _new_short_key_length += sizeof(Slice) + 1; + } else { + _new_short_key_length += column.index_length() + 1; + } + } +} + +SegmentGroup::~SegmentGroup() { + delete [] _short_key_buf; + _current_file_handler.close(); + + for (size_t i = 0; i < _zone_maps.size(); ++i) { + SAFE_DELETE(_zone_maps[i].first); + SAFE_DELETE(_zone_maps[i].second); + } + _seg_pb_map.clear(); +} + +std::string SegmentGroup::_construct_file_name(int32_t segment_id, const string& suffix) const { + // during convert from old files, the segment group id == -1, but we want to convert + // it to 0 + int32_t tmp_sg_id = 0; + if (_segment_group_id > 0) { + tmp_sg_id = _segment_group_id; + } + std::string file_name = std::to_string(_rowset_id) + "_" + + std::to_string(tmp_sg_id) + "_" + std::to_string(segment_id) + suffix; + return file_name; +} + +std::string SegmentGroup::_construct_file_name(int64_t rowset_id, int32_t segment_id, const string& suffix) const { + std::string file_name = std::to_string(rowset_id) + "_" + + std::to_string(_segment_group_id) + "_" + std::to_string(segment_id) + suffix; + return file_name; +} + +std::string SegmentGroup::construct_index_file_path(const std::string& snapshot_path, int32_t segment_id) const { + std::string file_path = snapshot_path; + file_path.append("/"); + file_path.append(_construct_file_name(segment_id, ".idx")); + return file_path; +} + +std::string SegmentGroup::construct_index_file_path(int32_t segment_id) const { + return construct_index_file_path(_rowset_path_prefix, segment_id); +} + +std::string SegmentGroup::construct_data_file_path(const std::string& snapshot_path, int32_t segment_id) const { + std::string file_path = snapshot_path; + file_path.append("/"); + file_path.append(_construct_file_name(segment_id, ".dat")); + return file_path; +} + +std::string SegmentGroup::construct_data_file_path(int32_t segment_id) const { + return construct_data_file_path(_rowset_path_prefix, segment_id); +} + +void SegmentGroup::acquire() { + atomic_inc(&_ref_count); +} + +int64_t SegmentGroup::ref_count() { + return _ref_count; +} + +void SegmentGroup::release() { + atomic_dec(&_ref_count); +} + +bool SegmentGroup::is_in_use() { + return _ref_count > 0; +} + +// you can not use SegmentGroup after delete_all_files(), or else unknown behavior occurs. +bool SegmentGroup::delete_all_files() { + bool success = true; + if (_empty) { + return success; + } + for (uint32_t seg_id = 0; seg_id < _num_segments; ++seg_id) { + // get full path for one segment + string index_path = construct_index_file_path(seg_id); + string data_path = construct_data_file_path(seg_id); + + LOG(INFO) << "delete index file. path=" << index_path; + if (remove(index_path.c_str()) != 0) { + char errmsg[64]; + LOG(WARNING) << "fail to delete index file. err=" << strerror_r(errno, errmsg, 64) + << ", path=" << index_path; + success = false; + } + + LOG(INFO) << "delete data file. path=" << data_path; + if (remove(data_path.c_str()) != 0) { + char errmsg[64]; + LOG(WARNING) << "fail to delete data file. err=" << strerror_r(errno, errmsg, 64) + << ", path=" << data_path; + success = false; + } + } + return success; +} + +OLAPStatus SegmentGroup::add_zone_maps_for_linked_schema_change( + const std::vector>& zone_map_fields, + const SchemaMapping& schema_mapping) { + //When add rollup tablet, the base tablet index maybe empty + if (zone_map_fields.size() == 0) { + return OLAP_SUCCESS; + } + + // 1. rollup tablet num_key_columns() will less than base tablet zone_map_fields.size(). + // For LinkedSchemaChange, the rollup tablet keys order is the same as base tablet + // 2. adding column to existed table, num_key_columns() will larger than + // zone_map_fields.size() + + int num_new_keys = 0; + for (size_t i = 0; i < _schema->num_key_columns(); ++i) { + const TabletColumn& column = _schema->column(i); + + WrapperField* first = WrapperField::create(column); + DCHECK(first != NULL) << "failed to allocate memory for field: " << i; + + WrapperField* second = WrapperField::create(column); + DCHECK(second != NULL) << "failed to allocate memory for field: " << i; + + // for new key column, use default value to fill into column_statistics + if (schema_mapping[i].ref_column == -1) { + num_new_keys++; + + first->copy(schema_mapping[i].default_value); + second->copy(schema_mapping[i].default_value); + } else { + first->copy(zone_map_fields[i - num_new_keys].first); + second->copy(zone_map_fields[i - num_new_keys].second); + } + + _zone_maps.push_back(std::make_pair(first, second)); + } + + return OLAP_SUCCESS; +} + +OLAPStatus SegmentGroup::add_zone_maps( + const std::vector>& zone_map_fields) { + DCHECK(zone_map_fields.size() == _schema->num_key_columns()); + for (size_t i = 0; i < zone_map_fields.size(); ++i) { + const TabletColumn& column = _schema->column(i); + WrapperField* first = WrapperField::create(column); + DCHECK(first != NULL) << "failed to allocate memory for field: " << i; + first->copy(zone_map_fields[i].first); + + WrapperField* second = WrapperField::create(column); + DCHECK(second != NULL) << "failed to allocate memory for field: " << i; + second->copy(zone_map_fields[i].second); + + _zone_maps.push_back(std::make_pair(first, second)); + } + return OLAP_SUCCESS; +} + +OLAPStatus SegmentGroup::add_zone_maps( + std::vector > &zone_map_strings, + std::vector &null_vec) { + DCHECK(zone_map_strings.size() == _schema->num_key_columns()); + for (size_t i = 0; i < zone_map_strings.size(); ++i) { + const TabletColumn& column = _schema->column(i); + WrapperField* first = WrapperField::create(column); + DCHECK(first != NULL) << "failed to allocate memory for field: " << i ; + RETURN_NOT_OK(first->from_string(zone_map_strings[i].first)); + if (null_vec[i]) { + //[min, max] -> [NULL, max] + first->set_null(); + } + WrapperField* second = WrapperField::create(column); + DCHECK(first != NULL) << "failed to allocate memory for field: " << i ; + RETURN_NOT_OK(second->from_string(zone_map_strings[i].second)); + _zone_maps.push_back(std::make_pair(first, second)); + } + return OLAP_SUCCESS; +} + +OLAPStatus SegmentGroup::load() { + if (_empty) { + _index_loaded = true; + return OLAP_SUCCESS; + } + OLAPStatus res = OLAP_ERR_INDEX_LOAD_ERROR; + boost::lock_guard guard(_index_load_lock); + + if (_index_loaded) { + return OLAP_SUCCESS; + } + + if (_num_segments == 0) { + LOG(WARNING) << "fail to load index, segments number is 0."; + return res; + } + + if (_index.init(_short_key_length, _new_short_key_length, + _schema->num_short_key_columns(), &_short_key_columns) != OLAP_SUCCESS) { + LOG(WARNING) << "fail to create MemIndex. num_segment=" << _num_segments; + return res; + } + + // for each segment + for (uint32_t seg_id = 0; seg_id < _num_segments; ++seg_id) { + string seg_path = construct_data_file_path(seg_id); + if (OLAP_SUCCESS != (res = load_pb(seg_path.c_str(), seg_id))) { + LOG(WARNING) << "failed to load pb structures. [seg_path='" << seg_path << "']"; + + return res; + } + + // get full path for one segment + std::string path = construct_index_file_path(seg_id); + if ((res = _index.load_segment(path.c_str(), &_current_num_rows_per_row_block)) + != OLAP_SUCCESS) { + LOG(WARNING) << "fail to load segment. [path='" << path << "']"; + + return res; + } + } + + _delete_flag = _index.delete_flag(); + _index_loaded = true; + + return OLAP_SUCCESS; +} + +OLAPStatus SegmentGroup::load_pb(const char* file, uint32_t seg_id) { + OLAPStatus res = OLAP_SUCCESS; + + FileHeader seg_file_header; + FileHandler seg_file_handler; + res = seg_file_handler.open(file, O_RDONLY); + if (OLAP_SUCCESS != res) { + OLAP_LOG_WARNING("failed to open segment file. [err=%d, file=%s]", res, file); + return res; + } + + res = seg_file_header.unserialize(&seg_file_handler); + if (OLAP_SUCCESS != res) { + seg_file_handler.close(); + OLAP_LOG_WARNING("fail to unserialize header. [err=%d, path='%s']", res, file); + return res; + } + + _seg_pb_map[seg_id] = seg_file_header; + seg_file_handler.close(); + return OLAP_SUCCESS; +} + +bool SegmentGroup::index_loaded() { + return _index_loaded; +} + +OLAPStatus SegmentGroup::validate() { + if (_empty) { + return OLAP_SUCCESS; + } + + OLAPStatus res = OLAP_SUCCESS; + for (uint32_t seg_id = 0; seg_id < _num_segments; ++seg_id) { + FileHeader index_file_header; + FileHeader data_file_header; + + // get full path for one segment + string index_path = construct_index_file_path(seg_id); + string data_path = construct_data_file_path(seg_id); + + // 检查index文件头 + if ((res = index_file_header.validate(index_path)) != OLAP_SUCCESS) { + LOG(WARNING) << "validate index file error. [file='" << index_path << "']"; + return res; + } + + // 检查data文件头 + if ((res = data_file_header.validate(data_path)) != OLAP_SUCCESS) { + LOG(WARNING) << "validate data file error. [file='" << data_path << "']"; + return res; + } + } + + return OLAP_SUCCESS; +} + +bool SegmentGroup::check() { + // if the segment group is converted from old files, _empty == false but _num_segments == 0 + if (_empty && (_num_segments > 0 || !zero_num_rows())) { + LOG(WARNING) << "invalid num segments for empty segment group, _num_segments:" << _num_segments + << ",num rows:" << num_rows(); + return false; + } + return true; +} + +OLAPStatus SegmentGroup::find_short_key(const RowCursor& key, + RowCursor* helper_cursor, + bool find_last, + RowBlockPosition* pos) const { + SEGMENT_GROUP_PARAM_VALIDATE(); + POS_PARAM_VALIDATE(pos); + + // 由于find会从前一个segment找起,如果前一个segment中恰好没有该key, + // 就用前移后移来移动segment的位置. + OLAPIndexOffset offset = _index.find(key, helper_cursor, find_last); + if (offset.offset > 0) { + offset.offset = offset.offset - 1; + + OLAPIndexOffset next_offset = _index.next(offset); + if (!(next_offset == _index.end())) { + offset = next_offset; + } + } + + VLOG(3) << "seg=" << offset.segment << ", offset=" << offset.offset; + return _index.get_row_block_position(offset, pos); +} + +OLAPStatus SegmentGroup::get_row_block_entry(const RowBlockPosition& pos, EntrySlice* entry) const { + SEGMENT_GROUP_PARAM_VALIDATE(); + SLICE_PARAM_VALIDATE(entry); + + return _index.get_entry(_index.get_offset(pos), entry); +} + +OLAPStatus SegmentGroup::find_first_row_block(RowBlockPosition* position) const { + SEGMENT_GROUP_PARAM_VALIDATE(); + POS_PARAM_VALIDATE(position); + + return _index.get_row_block_position(_index.find_first(), position); +} + +OLAPStatus SegmentGroup::find_last_row_block(RowBlockPosition* position) const { + SEGMENT_GROUP_PARAM_VALIDATE(); + POS_PARAM_VALIDATE(position); + + return _index.get_row_block_position(_index.find_last(), position); +} + +OLAPStatus SegmentGroup::find_next_row_block(RowBlockPosition* pos, bool* eof) const { + SEGMENT_GROUP_PARAM_VALIDATE(); + POS_PARAM_VALIDATE(pos); + POS_PARAM_VALIDATE(eof); + + OLAPIndexOffset current = _index.get_offset(*pos); + *eof = false; + + OLAPIndexOffset next = _index.next(current); + if (next == _index.end()) { + *eof = true; + return OLAP_ERR_INDEX_EOF; + } + + return _index.get_row_block_position(next, pos); +} + +OLAPStatus SegmentGroup::find_mid_point(const RowBlockPosition& low, + const RowBlockPosition& high, + RowBlockPosition* output, + uint32_t* dis) const { + *dis = compute_distance(low, high); + if (*dis >= _index.count()) { + return OLAP_ERR_INDEX_EOF; + } else { + *output = low; + if (advance_row_block(*dis / 2, output) != OLAP_SUCCESS) { + return OLAP_ERR_INDEX_EOF; + } + + return OLAP_SUCCESS; + } +} + +OLAPStatus SegmentGroup::find_prev_point( + const RowBlockPosition& current, RowBlockPosition* prev) const { + OLAPIndexOffset current_offset = _index.get_offset(current); + OLAPIndexOffset prev_offset = _index.prev(current_offset); + + return _index.get_row_block_position(prev_offset, prev); +} + +OLAPStatus SegmentGroup::advance_row_block(int64_t num_row_blocks, RowBlockPosition* position) const { + SEGMENT_GROUP_PARAM_VALIDATE(); + POS_PARAM_VALIDATE(position); + + OLAPIndexOffset off = _index.get_offset(*position); + iterator_offset_t absolute_offset = _index.get_absolute_offset(off) + num_row_blocks; + if (absolute_offset >= _index.count()) { + return OLAP_ERR_INDEX_EOF; + } + + return _index.get_row_block_position(_index.get_relative_offset(absolute_offset), position); +} + +// PRECONDITION position1 < position2 +uint32_t SegmentGroup::compute_distance(const RowBlockPosition& position1, + const RowBlockPosition& position2) const { + iterator_offset_t offset1 = _index.get_absolute_offset(_index.get_offset(position1)); + iterator_offset_t offset2 = _index.get_absolute_offset(_index.get_offset(position2)); + + return offset2 > offset1 ? offset2 - offset1 : 0; +} + +OLAPStatus SegmentGroup::add_segment() { + // 打开文件 + ++_num_segments; + + OLAPIndexHeaderMessage* index_header = NULL; + // 构造Proto格式的Header + index_header = _file_header.mutable_message(); + index_header->set_start_version(_version.first); + index_header->set_end_version(_version.second); + index_header->set_cumulative_version_hash(_version_hash); + index_header->set_segment(_num_segments - 1); + index_header->set_num_rows_per_block(_schema->num_rows_per_row_block()); + index_header->set_delete_flag(_delete_flag); + index_header->set_null_supported(true); + + // 分配一段存储short key的内存, 初始化index_row + if (_short_key_buf == NULL) { + _short_key_buf = new(std::nothrow) char[_short_key_length]; + if (_short_key_buf == NULL) { + OLAP_LOG_WARNING("malloc short_key_buf error."); + return OLAP_ERR_MALLOC_ERROR; + } + + if (_current_index_row.init(*_schema) != OLAP_SUCCESS) { + OLAP_LOG_WARNING("init _current_index_row fail."); + return OLAP_ERR_INIT_FAILED; + } + } + + // 初始化checksum + _checksum = ADLER32_INIT; + return OLAP_SUCCESS; +} + +OLAPStatus SegmentGroup::add_row_block(const RowBlock& row_block, const uint32_t data_offset) { + // get first row of the row_block to distill index item. + row_block.get_row(0, &_current_index_row); + return add_short_key(_current_index_row, data_offset); +} + +OLAPStatus SegmentGroup::add_short_key(const RowCursor& short_key, const uint32_t data_offset) { + OLAPStatus res = OLAP_SUCCESS; + if (!_new_segment_created) { + string file_path = construct_index_file_path(_num_segments - 1); + boost::filesystem::path tablet_path(_rowset_path_prefix); + boost::filesystem::path data_dir_path = tablet_path.parent_path().parent_path().parent_path().parent_path(); + std::string data_dir_string = data_dir_path.string(); + DataDir* data_dir = StorageEngine::instance()->get_store(data_dir_string); + data_dir->add_pending_ids(ROWSET_ID_PREFIX + std::to_string(_rowset_id)); + res = _current_file_handler.open_with_mode( + file_path.c_str(), O_CREAT | O_EXCL | O_WRONLY, S_IRUSR | S_IWUSR); + if (res != OLAP_SUCCESS) { + char errmsg[64]; + LOG(WARNING) << "can not create file. file_path=" << file_path + << ", err='" << strerror_r(errno, errmsg, 64); + return res; + } + _new_segment_created = true; + + // 准备FileHeader + if ((res = _file_header.prepare(&_current_file_handler)) != OLAP_SUCCESS) { + OLAP_LOG_WARNING("write file header error. [err=%m]"); + return res; + } + + // 跳过FileHeader + if (_current_file_handler.seek(_file_header.size(), SEEK_SET) == -1) { + OLAP_LOG_WARNING("lseek header file error. [err=%m]"); + res = OLAP_ERR_IO_ERROR; + return res; + } + } + + // 将short key的内容写入_short_key_buf + size_t offset = 0; + + //short_key.write_null_array(_short_key_buf); + //offset += short_key.get_num_null_byte(); + for (size_t i = 0; i < _short_key_columns.size(); i++) { + short_key.write_index_by_index(i, _short_key_buf + offset); + offset += short_key.get_index_size(i) + 1; + } + + // 写入Short Key对应的数据 + if ((res = _current_file_handler.write(_short_key_buf, _short_key_length)) != OLAP_SUCCESS) { + OLAP_LOG_WARNING("write short key failed. [err=%m]"); + + return res; + } + + // 写入对应的数据文件偏移量 + if ((res = _current_file_handler.write(&data_offset, sizeof(data_offset))) != OLAP_SUCCESS) { + OLAP_LOG_WARNING("write data_offset failed. [err=%m]"); + return res; + } + + _checksum = olap_adler32(_checksum, _short_key_buf, _short_key_length); + _checksum = olap_adler32(_checksum, + reinterpret_cast(&data_offset), + sizeof(data_offset)); + return OLAP_SUCCESS; +} + +OLAPStatus SegmentGroup::finalize_segment(uint32_t data_segment_size, int64_t num_rows) { + // 准备FileHeader + OLAPStatus res = OLAP_SUCCESS; + + int file_length = _current_file_handler.tell(); + if (file_length == -1) { + LOG(WARNING) << "get file_length error. err=" << Errno::no() + << ", _new_segment_created=" << _new_segment_created; + return OLAP_ERR_IO_ERROR; + } + + _file_header.set_file_length(file_length); + _file_header.set_checksum(_checksum); + _file_header.mutable_extra()->data_length = data_segment_size; + _file_header.mutable_extra()->num_rows = num_rows; + + // 写入更新之后的FileHeader + if ((res = _file_header.serialize(&_current_file_handler)) != OLAP_SUCCESS) { + OLAP_LOG_WARNING("write file header error. [err=%m]"); + + return res; + } + + VLOG(3) << "finalize_segment. file_name=" << _current_file_handler.file_name() + << ", file_length=" << file_length; + + if ((res = _current_file_handler.close()) != OLAP_SUCCESS) { + OLAP_LOG_WARNING("close file error. [err=%m]"); + + return res; + } + + _new_segment_created = false; + return OLAP_SUCCESS; +} + +uint64_t SegmentGroup::num_index_entries() const { + return _index.count(); +} + +size_t SegmentGroup::current_num_rows_per_row_block() const { + return _current_num_rows_per_row_block; +} + +const TabletSchema& SegmentGroup::get_tablet_schema() { + return *_schema; +} + +int SegmentGroup::get_num_key_columns() { + return _schema->num_key_columns(); +} + +int SegmentGroup::get_num_short_key_columns() { + return _schema->num_short_key_columns(); +} + +size_t SegmentGroup::get_num_rows_per_row_block() { + return _schema->num_rows_per_row_block(); +} + +std::string SegmentGroup::rowset_path_prefix() { + return _rowset_path_prefix; +} + +int64_t SegmentGroup::get_tablet_id() { + return _tablet_id; +} + +OLAPStatus SegmentGroup::make_snapshot(const std::string& snapshot_path, + std::vector* success_links) { + if (_empty) { + return OLAP_SUCCESS; + } + for (int segment_id = 0; segment_id < _num_segments; segment_id++) { + std::string snapshot_data_file_name = construct_data_file_path(snapshot_path, segment_id); + if (check_dir_existed(snapshot_data_file_name)) { + LOG(WARNING) << "snapshot dest file already exist, fail to make snapshot." + << " file=" << snapshot_data_file_name; + return OLAP_ERR_FILE_ALREADY_EXIST; + } + std::string cur_data_file_name = construct_data_file_path(segment_id); + if (link(cur_data_file_name.c_str(), snapshot_data_file_name.c_str()) != 0) { + LOG(WARNING) << "fail to create hard link. from=" << cur_data_file_name << ", " + << "to=" << snapshot_data_file_name << ", " << "errno=" << Errno::no(); + return OLAP_ERR_OS_ERROR; + } + success_links->push_back(snapshot_data_file_name); + std::string snapshot_index_file_name = construct_index_file_path(snapshot_path, segment_id); + if (check_dir_existed(snapshot_index_file_name)) { + LOG(WARNING) << "snapshot dest file already exist, fail to make snapshot." + << " file=" << snapshot_index_file_name; + return OLAP_ERR_FILE_ALREADY_EXIST; + } + std::string cur_index_file_name = construct_index_file_path(segment_id); + if (link(cur_index_file_name.c_str(), snapshot_index_file_name.c_str()) != 0) { + LOG(WARNING) << "fail to create hard link. from=" << cur_index_file_name << ", " + << "to=" << snapshot_index_file_name << ", " << "errno=" << Errno::no(); + return OLAP_ERR_OS_ERROR; + } + success_links->push_back(snapshot_index_file_name); + } + return OLAP_SUCCESS; +} + +OLAPStatus SegmentGroup::copy_files_to_path(const std::string& dest_path, + std::vector* success_files) { + if (_empty) { + return OLAP_SUCCESS; + } + for (int segment_id = 0; segment_id < _num_segments; segment_id++) { + std::string dest_data_file = construct_data_file_path(dest_path, segment_id); + if (check_dir_existed(dest_data_file)) { + LOG(WARNING) << "file already exists:" << dest_data_file; + return OLAP_ERR_FILE_ALREADY_EXIST; + } + std::string data_file_to_copy = construct_data_file_path(segment_id); + if (copy_file(data_file_to_copy, dest_data_file) != OLAP_SUCCESS) { + LOG(WARNING) << "fail to copy data file. from=" << data_file_to_copy + << ", to=" << dest_data_file + << ", errno=" << Errno::no(); + return OLAP_ERR_OS_ERROR; + } + success_files->push_back(dest_data_file); + std::string dest_index_file = construct_index_file_path(dest_path, segment_id); + if (check_dir_existed(dest_index_file)) { + LOG(WARNING) << "file already exists:" << dest_index_file; + return OLAP_ERR_FILE_ALREADY_EXIST; + } + std::string index_file_to_copy = construct_index_file_path(segment_id); + if (copy_file(index_file_to_copy, dest_index_file) != OLAP_SUCCESS) { + LOG(WARNING) << "fail to copy index file. from=" << index_file_to_copy + << ", to=" << dest_index_file + << ", errno=" << Errno::no(); + return OLAP_ERR_OS_ERROR; + } + success_files->push_back(dest_index_file); + } + return OLAP_SUCCESS; +} + +// when convert from old files, remove existing files +// convert from old files in 2 cases: +// case 1: clone from old version be +// case 2: upgrade to new version be +OLAPStatus SegmentGroup::convert_from_old_files(const std::string& snapshot_path, + std::vector* success_links) { + if (_empty) { + // the segment group is empty, it does not have files, just return + return OLAP_SUCCESS; + } + for (int segment_id = 0; segment_id < _num_segments; segment_id++) { + std::string new_data_file_name = construct_data_file_path(_rowset_path_prefix, segment_id); + // if file exist should remove it because same file name does not mean same data + if (check_dir_existed(new_data_file_name)) { + LOG(INFO) << "file already exist, remove it. file=" << new_data_file_name; + RETURN_NOT_OK(remove_dir(new_data_file_name)); + } + std::string old_data_file_name = construct_old_data_file_path(snapshot_path, segment_id); + if (link(old_data_file_name.c_str(), new_data_file_name.c_str()) != 0) { + LOG(WARNING) << "fail to create hard link. from=" << old_data_file_name + << ", to=" << new_data_file_name << ", errno=" << Errno::no(); + return OLAP_ERR_OS_ERROR; + } else { + VLOG(3) << "link data file from " << old_data_file_name + << " to " << new_data_file_name << " successfully"; + } + success_links->push_back(new_data_file_name); + std::string new_index_file_name = construct_index_file_path(_rowset_path_prefix, segment_id); + if (check_dir_existed(new_index_file_name)) { + LOG(INFO) << "file already exist, remove it. file=" << new_index_file_name; + RETURN_NOT_OK(remove_dir(new_index_file_name)); + } + std::string old_index_file_name = construct_old_index_file_path(snapshot_path, segment_id); + if (link(old_index_file_name.c_str(), new_index_file_name.c_str()) != 0) { + LOG(WARNING) << "fail to create hard link. from=" << old_index_file_name + << ", to=" << new_index_file_name + << ", errno=" << Errno::no(); + return OLAP_ERR_OS_ERROR; + } else { + VLOG(3) << "link index file from " << old_index_file_name + << " to " << new_index_file_name << " successfully"; + } + success_links->push_back(new_index_file_name); + } + return OLAP_SUCCESS; +} + +OLAPStatus SegmentGroup::convert_to_old_files(const std::string& snapshot_path, + std::vector* success_links) { + if (_empty) { + return OLAP_SUCCESS; + } + for (int segment_id = 0; segment_id < _num_segments; segment_id++) { + std::string new_data_file_name = construct_data_file_path(_rowset_path_prefix, segment_id); + std::string old_data_file_name = construct_old_data_file_path(snapshot_path, segment_id); + if (!check_dir_existed(old_data_file_name)) { + if (link(new_data_file_name.c_str(), old_data_file_name.c_str()) != 0) { + LOG(WARNING) << "fail to create hard link. from=" << new_data_file_name << ", " + << "to=" << old_data_file_name << ", " << "errno=" << Errno::no(); + return OLAP_ERR_OS_ERROR; + } + success_links->push_back(old_data_file_name); + } + VLOG(3) << "create hard link. from=" << new_data_file_name << ", " + << "to=" << old_data_file_name; + std::string new_index_file_name = construct_index_file_path(_rowset_path_prefix, segment_id); + std::string old_index_file_name = construct_old_index_file_path(snapshot_path, segment_id); + if (!check_dir_existed(old_index_file_name)) { + if (link(new_index_file_name.c_str(), old_index_file_name.c_str()) != 0) { + LOG(WARNING) << "fail to create hard link. from=" << new_index_file_name << ", " + << "to=" << old_index_file_name << ", " << "errno=" << Errno::no(); + return OLAP_ERR_OS_ERROR; + } + success_links->push_back(old_index_file_name); + } + VLOG(3) << "create hard link. from=" << new_index_file_name << ", " + << "to=" << old_index_file_name; + } + return OLAP_SUCCESS; +} + +OLAPStatus SegmentGroup::remove_old_files(std::vector* links_to_remove) { + for (int segment_id = 0; segment_id < _num_segments; segment_id++) { + std::string old_data_file_name = construct_old_data_file_path(_rowset_path_prefix, segment_id); + if (check_dir_existed(old_data_file_name)) { + RETURN_NOT_OK(remove_dir(old_data_file_name)); + links_to_remove->push_back(old_data_file_name); + } + std::string old_index_file_name = construct_old_index_file_path(_rowset_path_prefix, segment_id); + if (check_dir_existed(old_index_file_name)) { + RETURN_NOT_OK(remove_dir(old_index_file_name)); + links_to_remove->push_back(old_index_file_name); + } + // if segment group id == 0, it maybe convert from old files which do not have segment group id in file path + if (_segment_group_id == 0) { + old_data_file_name = _construct_err_sg_data_file_path(_rowset_path_prefix, segment_id); + if (check_dir_existed(old_data_file_name)) { + RETURN_NOT_OK(remove_dir(old_data_file_name)); + links_to_remove->push_back(old_data_file_name); + } + old_index_file_name = _construct_err_sg_index_file_path(_rowset_path_prefix, segment_id); + if (check_dir_existed(old_index_file_name)) { + RETURN_NOT_OK(remove_dir(old_index_file_name)); + links_to_remove->push_back(old_index_file_name); + } + } + } + std::string pending_delta_path = _rowset_path_prefix + PENDING_DELTA_PREFIX; + if (check_dir_existed(pending_delta_path)) { + LOG(INFO) << "remove pending delta path:" << pending_delta_path; + RETURN_NOT_OK(remove_all_dir(pending_delta_path)); + } + return OLAP_SUCCESS; +} + +OLAPStatus SegmentGroup::link_segments_to_path(const std::string& dest_path, int64_t rowset_id) { + if (dest_path.empty()) { + LOG(WARNING) << "dest path is empty, return error"; + return OLAP_ERR_INPUT_PARAMETER_ERROR; + } + for (int segment_id = 0; segment_id < _num_segments; segment_id++) { + std::string data_file_name = _construct_file_name(rowset_id, segment_id, ".dat"); + std::string new_data_file_path = dest_path + "/" + data_file_name; + if (!check_dir_existed(new_data_file_path)) { + std::string origin_data_file_path = construct_data_file_path(_rowset_path_prefix, segment_id); + if (link(origin_data_file_path.c_str(), new_data_file_path.c_str()) != 0) { + LOG(WARNING) << "fail to create hard link. from=" << origin_data_file_path + << ", to=" << new_data_file_path << ", error=" << strerror(errno); + return OLAP_ERR_OS_ERROR; + } + } + std::string index_file_name = _construct_file_name(rowset_id, segment_id, ".idx"); + std::string new_index_file_path = dest_path + "/" + index_file_name; + if (!check_dir_existed(new_index_file_path)) { + std::string origin_idx_file_path = construct_index_file_path(_rowset_path_prefix, segment_id); + if (link(origin_idx_file_path.c_str(), new_index_file_path.c_str()) != 0) { + LOG(WARNING) << "fail to create hard link. from=" << origin_idx_file_path + << ", to=" << new_index_file_path << ", error=" << strerror(errno); + return OLAP_ERR_OS_ERROR; + } + } + } + return OLAP_SUCCESS; +} + +std::string SegmentGroup::construct_old_index_file_path(const std::string& path_prefix, int32_t segment_id) const { + if (_is_pending) { + return _construct_old_pending_file_path(path_prefix, segment_id, ".idx"); + } else { + return _construct_old_file_path(path_prefix, segment_id, ".idx"); + } +} + +std::string SegmentGroup::construct_old_data_file_path(const std::string& path_prefix, int32_t segment_id) const { + if (_is_pending) { + return _construct_old_pending_file_path(path_prefix, segment_id, ".dat"); + } else { + return _construct_old_file_path(path_prefix, segment_id, ".dat"); + } +} + +std::string SegmentGroup::_construct_err_sg_index_file_path(const std::string& path_prefix, int32_t segment_id) const { + if (_is_pending) { + return _construct_old_pending_file_path(path_prefix, segment_id, ".idx"); + } else { + return _construct_err_sg_file_path(path_prefix, segment_id, ".idx"); + } +} + +std::string SegmentGroup::_construct_err_sg_data_file_path(const std::string& path_prefix, int32_t segment_id) const { + if (_is_pending) { + return _construct_old_pending_file_path(path_prefix, segment_id, ".dat"); + } else { + return _construct_err_sg_file_path(path_prefix, segment_id, ".dat"); + } +} + +std::string SegmentGroup::_construct_old_pending_file_path(const std::string& path_prefix, int32_t segment_id, + const std::string& suffix) const { + std::stringstream file_path; + file_path << path_prefix << "/" << PENDING_DELTA_PREFIX << "/" + << _txn_id << "_" + << _segment_group_id << "_" << segment_id << suffix; + return file_path.str(); +} + +std::string SegmentGroup::_construct_old_file_path(const std::string& path_prefix, int32_t segment_id, const std::string& suffix) const { + char file_path[OLAP_MAX_PATH_LEN]; + if (_segment_group_id == -1) { + snprintf(file_path, + sizeof(file_path), + "%s/%ld_%ld_%ld_%ld_%d%s", + path_prefix.c_str(), + _tablet_id, + _version.first, + _version.second, + _version_hash, + segment_id, + suffix.c_str()); + } else { + snprintf(file_path, + sizeof(file_path), + "%s/%ld_%ld_%ld_%ld_%d_%d%s", + path_prefix.c_str(), + _tablet_id, + _version.first, + _version.second, + _version_hash, + _segment_group_id, segment_id, + suffix.c_str()); + } + + return file_path; +} + +// construct file path for sg_id == -1 +std::string SegmentGroup::_construct_err_sg_file_path(const std::string& path_prefix, int32_t segment_id, const std::string& suffix) const { + char file_path[OLAP_MAX_PATH_LEN]; + snprintf(file_path, + sizeof(file_path), + "%s/%ld_%ld_%ld_%ld_%d%s", + path_prefix.c_str(), + _tablet_id, + _version.first, + _version.second, + _version_hash, + segment_id, + suffix.c_str()); + + return file_path; +} + +} // namespace doris diff --git a/be/src/olap/segment_group.h b/be/src/olap/rowset/segment_group.h similarity index 65% rename from be/src/olap/segment_group.h rename to be/src/olap/rowset/segment_group.h index ef9f641418cad7..aba3a0d07cf0c5 100644 --- a/be/src/olap/segment_group.h +++ b/be/src/olap/rowset/segment_group.h @@ -15,7 +15,8 @@ // specific language governing permissions and limitations // under the License. -#pragma once +#ifndef DORIS_BE_SRC_OLAP_ROWSET_SEGMENT_GROUP_H +#define DORIS_BE_SRC_OLAP_ROWSET_SEGMENT_GROUP_H #include #include @@ -32,7 +33,6 @@ #include "olap/file_helper.h" #include "olap/olap_common.h" #include "olap/olap_define.h" -#include "olap/olap_table.h" #include "olap/row_cursor.h" #include "olap/olap_index.h" #include "olap/utils.h" @@ -48,14 +48,14 @@ namespace doris { class SegmentGroup { friend class MemIndex; public: - typedef std::vector SchemaMapping; + SegmentGroup(int64_t tablet_id, int64_t rowset_id, const TabletSchema* tablet_schema, + const std::string& rowset_path_prefix, Version version, + VersionHash version_hash, bool delete_flag, int segment_group_id, int32_t num_segments); - SegmentGroup(OLAPTable* table, Version version, VersionHash version_hash, - bool delete_flag, int segment_group_id, int32_t num_segments); - - SegmentGroup(OLAPTable* table, bool delete_flag, int32_t segment_group_id, - int32_t num_segments, bool is_pending, - TPartitionId partition_id, TTransactionId transaction_id); + SegmentGroup(int64_t tablet_id, int64_t rowset_id, const TabletSchema* tablet_schema, + const std::string& rowset_path_prefix, bool delete_flag, + int32_t segment_group_id, int32_t num_segments, bool is_pending, + TPartitionId partition_id, TTransactionId transaction_id); virtual ~SegmentGroup(); @@ -64,37 +64,30 @@ class SegmentGroup { bool index_loaded(); OLAPStatus load_pb(const char* file, uint32_t seg_id); - bool has_column_statistics() { - return _column_statistics.size() != 0; + bool has_zone_maps() { + return _zone_maps.size() != 0; } - OLAPStatus add_column_statistics_for_linked_schema_change( - const std::vector>& column_statistic_fields, + OLAPStatus add_zone_maps_for_linked_schema_change( + const std::vector>& zone_map_fields, const SchemaMapping& schema_mapping); - OLAPStatus add_column_statistics( - const std::vector>& column_statistic_fields); + OLAPStatus add_zone_maps( + const std::vector>& zone_map_fields); - OLAPStatus add_column_statistics( - std::vector> &column_statistic_strings, + OLAPStatus add_zone_maps( + std::vector> &zone_map_strings, std::vector &null_vec); - const std::vector>& get_column_statistics() { - return _column_statistics; + const std::vector>& get_zone_maps() { + return _zone_maps; } // 检查index文件和data文件的有效性 OLAPStatus validate(); - // Finds position of the first (or last if find_last is set) row - // block that may contain the smallest key equal to or greater than - // 'key'. Returns true on success. If find_last is set, note that - // the position is the last block that can possibly contain the - // given key. - OLAPStatus find_row_block(const RowCursor& key, - RowCursor* helper_cursor, - bool find_last, - RowBlockPosition* position) const; + // this function should be called after load + bool check(); // Finds position of first row block contain the smallest key equal // to or greater than 'key'. Returns true on success. @@ -144,7 +137,6 @@ class SegmentGroup { OLAPStatus add_short_key(const RowCursor& short_key, const uint32_t data_offset); OLAPStatus add_row_block(const RowBlock& row_block, const uint32_t data_offset); OLAPStatus finalize_segment(uint32_t data_segment_size, int64_t num_rows); - void sync(); // reference count void acquire(); @@ -153,21 +145,18 @@ class SegmentGroup { int64_t ref_count(); // delete all files (*.idx; *.dat) - void delete_all_files(); - - // getters and setters. - // get associated OLAPTable pointer - inline OLAPTable* table() const { return _table; } - inline void set_table(OLAPTable* table) { _table = table; } + bool delete_all_files(); inline Version version() const { return _version; } + inline void set_version(Version version) { _version = version; } inline VersionHash version_hash() const { return _version_hash; } + inline void set_version_hash(VersionHash version_hash) { _version_hash = version_hash; } inline bool is_pending() const { return _is_pending; } inline void set_pending_finished() { _is_pending = false; } inline TPartitionId partition_id() const { return _partition_id; } - inline TTransactionId transaction_id() const { return _transaction_id; } + inline TTransactionId transaction_id() const { return _txn_id; } inline bool delete_flag() const { return _delete_flag; } @@ -200,8 +189,8 @@ class SegmentGroup { return _new_short_key_length; } - const RowFields& short_key_fields() const { - return _short_key_info_list; + const std::vector& short_key_columns() const { + return _short_key_columns; } bool empty() const { @@ -221,10 +210,6 @@ class SegmentGroup { // return count of entries in MemIndex uint64_t num_index_entries() const; - size_t current_num_rows_per_row_block() const { - return _current_num_rows_per_row_block; - } - OLAPStatus get_row_block_position(const OLAPIndexOffset& pos, RowBlockPosition* rbp) const { return _index.get_row_block_position(pos, rbp); } @@ -237,14 +222,71 @@ class SegmentGroup { return _index.get_null_supported(seg_id); } - std::string construct_index_file_path(int32_t segment_group_id, int32_t segment) const; - std::string construct_data_file_path(int32_t segment_group_id, int32_t segment) const; - void publish_version(Version version, VersionHash version_hash); + std::string construct_index_file_path(const std::string& snapshot_path, + int32_t segment_id) const; + std::string construct_index_file_path(int32_t segment_id) const; + std::string construct_data_file_path(const std::string& snapshot_path, + int32_t segment_id) const; + std::string construct_data_file_path(int32_t segment_id) const; + + // these two functions are for compatible, and will be deleted later + // so it is better not to use it. + std::string construct_old_index_file_path(const std::string& path_prefix, int32_t segment_id) const; + std::string construct_old_data_file_path(const std::string& path_prefix, int32_t segment_id) const; + + size_t current_num_rows_per_row_block() const; + + const TabletSchema& get_tablet_schema(); + + int get_num_key_columns(); + + int get_num_short_key_columns(); + + size_t get_num_rows_per_row_block(); + + std::string rowset_path_prefix(); + + int64_t get_tablet_id(); + + int64_t rowset_id() { + return _rowset_id; + } + + OLAPStatus convert_from_old_files(const std::string& snapshot_path, + std::vector* success_links); + + OLAPStatus convert_to_old_files(const std::string& snapshot_path, + std::vector* success_links); + + OLAPStatus remove_old_files(std::vector* linkes_to_remove); + + OLAPStatus make_snapshot(const std::string& snapshot_path, + std::vector* success_links); + OLAPStatus copy_files_to_path(const std::string& dest_path, + std::vector* success_files); + + OLAPStatus link_segments_to_path(const std::string& dest_path, int64_t rowset_id); private: - void _check_io_error(OLAPStatus res); + + std::string _construct_file_name(int32_t segment_id, const std::string& suffix) const; + std::string _construct_file_name(int64_t rowset_id, int32_t segment_id, const std::string& suffix) const; - OLAPTable* _table; // table definition for this segmentgroup + std::string _construct_old_pending_file_path(const std::string& path_prefix, int32_t segment_id, const std::string& suffix) const; + + std::string _construct_old_file_path(const std::string& path_prefix, int32_t segment_id, const std::string& suffix) const; + + std::string _construct_err_sg_file_path(const std::string& path_prefix, int32_t segment_id, const std::string& suffix) const; + + std::string _construct_err_sg_index_file_path(const std::string& path_prefix, int32_t segment_id) const; + + std::string _construct_err_sg_data_file_path(const std::string& path_prefix, int32_t segment_id) const; + +private: + int64_t _tablet_id; + int64_t _rowset_id; + const TabletSchema* _schema; + std::string _rowset_path_prefix; // path of rowset Version _version; // version of associated data file VersionHash _version_hash; // version hash for this segmentgroup bool _delete_flag; @@ -256,10 +298,10 @@ class SegmentGroup { MemIndex _index; bool _is_pending; TPartitionId _partition_id; - TTransactionId _transaction_id; + TTransactionId _txn_id; - // short key对应的field_info数组 - RowFields _short_key_info_list; + // short key对应的column information + std::vector _short_key_columns; // short key对应的总长度 size_t _short_key_length; size_t _new_short_key_length; @@ -273,7 +315,6 @@ class SegmentGroup { // 以下是写入流程时需要的一些中间状态 // 当前写入文件的FileHandler FileHandler _current_file_handler; - bool _file_created; bool _new_segment_created; // 当前写入的FileHeader FileHeader _file_header; @@ -282,13 +323,13 @@ class SegmentGroup { // Lock held while loading the index. mutable boost::mutex _index_load_lock; - size_t _current_num_rows_per_row_block; - std::vector> _column_statistics; + std::vector> _zone_maps; std::unordered_map > _seg_pb_map; - DISALLOW_COPY_AND_ASSIGN(SegmentGroup); }; -} +} // namespace doris + +#endif // DORIS_BE_SRC_OLAP_ROWSET_SEGMENT_GROUP_H diff --git a/be/src/olap/segment_reader.cpp b/be/src/olap/rowset/segment_reader.cpp similarity index 90% rename from be/src/olap/segment_reader.cpp rename to be/src/olap/rowset/segment_reader.cpp index 2c74199a2b630e..5e847801f916d4 100644 --- a/be/src/olap/segment_reader.cpp +++ b/be/src/olap/rowset/segment_reader.cpp @@ -15,7 +15,7 @@ // specific language governing permissions and limitations // under the License. -#include "olap/segment_reader.h" +#include "olap/rowset/segment_reader.h" #include @@ -26,7 +26,7 @@ #include "olap/out_stream.h" #include "olap/olap_cond.h" #include "olap/row_block.h" -#include "olap/segment_group.h" +#include "olap/rowset/segment_group.h" namespace doris { @@ -34,21 +34,21 @@ static const uint32_t MIN_FILTER_BLOCK_NUM = 10; SegmentReader::SegmentReader( const std::string file, - OLAPTable* table, SegmentGroup* segment_group, uint32_t segment_id, const std::vector& used_columns, const std::set& load_bf_columns, const Conditions* conditions, - const std::vector* col_predicates, - const DeleteHandler& delete_handler, + const DeleteHandler* delete_handler, const DelCondSatisfied delete_status, + Cache* lru_cache, RuntimeState* runtime_state, OlapReaderStatistics* stats) : _file_name(file), - _table(table), _segment_group(segment_group), _segment_id(segment_id), + _used_columns(used_columns), + _load_bf_columns(load_bf_columns), _conditions(conditions), _delete_handler(delete_handler), _delete_status(delete_status), @@ -58,18 +58,15 @@ SegmentReader::SegmentReader( _block_count(0), _num_rows_in_block(0), _null_supported(false), - _used_columns(used_columns), - _load_bf_columns(load_bf_columns), _mmap_buffer(NULL), _include_blocks(NULL), _is_using_mmap(false), _is_data_loaded(false), _buffer_size(0), - _lru_cache(NULL), - _runtime_state(runtime_state), _shared_buffer(NULL), + _lru_cache(lru_cache), + _runtime_state(runtime_state), _stats(stats) { - _lru_cache = OLAPEngine::get_instance()->index_stream_lru_cache(); _tracker.reset(new MemTracker(-1)); _mem_pool.reset(new MemPool(_tracker.get())); } @@ -189,7 +186,7 @@ OLAPStatus SegmentReader::_set_decompressor() { OLAPStatus SegmentReader::_set_segment_info() { _num_rows_in_block = _header_message().num_rows_per_block(); if (_num_rows_in_block == 0) { - _num_rows_in_block = _table->num_rows_per_row_block(); + _num_rows_in_block = _segment_group->get_num_rows_per_row_block(); } _set_column_map(); @@ -314,7 +311,7 @@ OLAPStatus SegmentReader::get_block( auto res = _load_to_vectorized_row_batch(batch, num_rows_load); if (res != OLAP_SUCCESS) { - OLAP_LOG_WARNING("fail to load block to vectorized_row_batch. [res=%d]", res); + LOG(WARNING) << "fail to load block to vectorized_row_batch. res:" << res; return res; } @@ -327,20 +324,20 @@ OLAPStatus SegmentReader::get_block( void SegmentReader::_set_column_map() { _encodings_map.clear(); - _table_id_to_unique_id_map.clear(); - _unique_id_to_table_id_map.clear(); + _tablet_id_to_unique_id_map.clear(); + _unique_id_to_tablet_id_map.clear(); _unique_id_to_segment_id_map.clear(); for (ColumnId table_column_id : _used_columns) { - ColumnId unique_column_id = tablet_schema()[table_column_id].unique_id; - _table_id_to_unique_id_map[table_column_id] = unique_column_id; - _unique_id_to_table_id_map[unique_column_id] = table_column_id; + ColumnId unique_column_id = tablet_schema().column(table_column_id).unique_id(); + _tablet_id_to_unique_id_map[table_column_id] = unique_column_id; + _unique_id_to_tablet_id_map[unique_column_id] = table_column_id; } for (ColumnId table_column_id : _load_bf_columns) { - ColumnId unique_column_id = tablet_schema()[table_column_id].unique_id; - _table_id_to_unique_id_map[table_column_id] = unique_column_id; - _unique_id_to_table_id_map[unique_column_id] = table_column_id; + ColumnId unique_column_id = tablet_schema().column(table_column_id).unique_id(); + _tablet_id_to_unique_id_map[table_column_id] = unique_column_id; + _unique_id_to_tablet_id_map[unique_column_id] = table_column_id; } size_t segment_column_size = _header_message().column_size(); @@ -348,7 +345,7 @@ void SegmentReader::_set_column_map() { ++segment_column_id) { // 如果找得到,建立映射表 ColumnId unique_column_id = _header_message().column(segment_column_id).unique_id(); - if (_unique_id_to_table_id_map.find(unique_column_id) != _unique_id_to_table_id_map.end()) { + if (_unique_id_to_tablet_id_map.find(unique_column_id) != _unique_id_to_tablet_id_map.end()) { _unique_id_to_segment_id_map[unique_column_id] = segment_column_id; // encoding 应该和segment schema序一致。 _encodings_map[unique_column_id] = @@ -359,12 +356,12 @@ void SegmentReader::_set_column_map() { OLAPStatus SegmentReader::_pick_columns() { for (uint32_t i : _used_columns) { - ColumnId unique_column_id = _table_id_to_unique_id_map[i]; + ColumnId unique_column_id = _tablet_id_to_unique_id_map[i]; _include_columns.insert(unique_column_id); } for (uint32_t i : _load_bf_columns) { - ColumnId unique_column_id = _table_id_to_unique_id_map[i]; + ColumnId unique_column_id = _tablet_id_to_unique_id_map[i]; _include_bf_columns.insert(unique_column_id); } @@ -374,7 +371,7 @@ OLAPStatus SegmentReader::_pick_columns() { OLAPStatus SegmentReader::_pick_delete_row_groups(uint32_t first_block, uint32_t last_block) { VLOG(3) << "pick for " << first_block << " to " << last_block << " for delete_condition"; - if (_delete_handler.empty()) { + if (_delete_handler->empty()) { return OLAP_SUCCESS; } @@ -383,7 +380,7 @@ OLAPStatus SegmentReader::_pick_delete_row_groups(uint32_t first_block, uint32_t return OLAP_SUCCESS; } - for (auto& delete_condition : _delete_handler.get_delete_conditions()) { + for (auto& delete_condition : _delete_handler->get_delete_conditions()) { if (delete_condition.filter_version <= _segment_group->version().first) { continue; } @@ -398,7 +395,7 @@ OLAPStatus SegmentReader::_pick_delete_row_groups(uint32_t first_block, uint32_t bool del_not_satisfied = false; for (auto& i : delete_condition.del_cond->columns()) { ColumnId table_column_id = i.first; - ColumnId unique_column_id = _table_id_to_unique_id_map[table_column_id]; + ColumnId unique_column_id = _tablet_id_to_unique_id_map[table_column_id]; if (0 == _unique_id_to_segment_id_map.count(unique_column_id)) { continue; } @@ -484,7 +481,7 @@ OLAPStatus SegmentReader::_pick_row_groups(uint32_t first_block, uint32_t last_b timer.reset(); for (auto& i : _conditions->columns()) { - FieldAggregationMethod aggregation = _table->get_aggregation_by_index(i.first); + FieldAggregationMethod aggregation = _get_aggregation_by_index(i.first); bool is_continue = (aggregation == OLAP_FIELD_AGGREGATION_NONE || (aggregation == OLAP_FIELD_AGGREGATION_REPLACE && _segment_group->version().first == 0)); @@ -493,7 +490,7 @@ OLAPStatus SegmentReader::_pick_row_groups(uint32_t first_block, uint32_t last_b } ColumnId table_column_id = i.first; - ColumnId unique_column_id = _table_id_to_unique_id_map[table_column_id]; + ColumnId unique_column_id = _tablet_id_to_unique_id_map[table_column_id]; if (0 == _unique_id_to_segment_id_map.count(unique_column_id)) { continue; } @@ -525,7 +522,7 @@ OLAPStatus SegmentReader::_pick_row_groups(uint32_t first_block, uint32_t last_b } for (uint32_t i : _load_bf_columns) { - FieldAggregationMethod aggregation = _table->get_aggregation_by_index(i); + FieldAggregationMethod aggregation = _get_aggregation_by_index(i); bool is_continue = (aggregation == OLAP_FIELD_AGGREGATION_NONE || (aggregation == OLAP_FIELD_AGGREGATION_REPLACE && _segment_group->version().first == 0)); @@ -534,7 +531,7 @@ OLAPStatus SegmentReader::_pick_row_groups(uint32_t first_block, uint32_t last_b } ColumnId table_column_id = i; - ColumnId unique_column_id = _table_id_to_unique_id_map[table_column_id]; + ColumnId unique_column_id = _tablet_id_to_unique_id_map[table_column_id]; if (0 == _unique_id_to_segment_id_map.count(unique_column_id)) { continue; } @@ -625,8 +622,8 @@ OLAPStatus SegmentReader::_load_index(bool is_using_cache) { continue; } - ColumnId table_column_id = _unique_id_to_table_id_map[unique_column_id]; - FieldType type = _table->get_field_type_by_index(table_column_id); + ColumnId table_column_id = _unique_id_to_tablet_id_map[unique_column_id]; + FieldType type = _get_field_type_by_index(table_column_id); char* stream_buffer = NULL; char key_buf[OLAP_LRU_CACHE_MAX_KEY_LENTH]; @@ -711,14 +708,12 @@ OLAPStatus SegmentReader::_load_index(bool is_using_cache) { } if (_block_count != expected_blocks) { - OLAP_LOG_WARNING("something wrong while reading index, expected=%lu, actual=%lu", - expected_blocks, _block_count); - OLAP_LOG_WARNING("_header_message().number_of_rows()=%d," - "_header_message().num_rows_per_block()=%d, table='%s', version='%d-%d'", - _header_message().number_of_rows(), _header_message().num_rows_per_block(), - _segment_group->table()->full_name().c_str(), - _segment_group->version().first, _segment_group->version().second); - LOG(WARNING) << "version:" << _segment_group->version().first << "-" << _segment_group->version().second; + LOG(WARNING) << "something wrong while reading index, expected=" <tablet_schema().size(), nullptr); - _column_indices.resize(_table->tablet_schema().size(), nullptr); + _column_readers.resize(_segment_group->get_tablet_schema().num_columns(), nullptr); + _column_indices.resize(_segment_group->get_tablet_schema().num_columns(), nullptr); for (auto table_column_id : _used_columns) { - ColumnId unique_column_id = _table_id_to_unique_id_map[table_column_id]; + ColumnId unique_column_id = _tablet_id_to_unique_id_map[table_column_id]; // 当前是不会出现table和segment的schema不一致的情况的 std::unique_ptr reader(ColumnReader::create(table_column_id, - _table->tablet_schema(), - _unique_id_to_table_id_map, + _segment_group->get_tablet_schema(), + _unique_id_to_tablet_id_map, _unique_id_to_segment_id_map, _encodings_map)); if (reader == nullptr) { @@ -853,14 +848,14 @@ OLAPStatus SegmentReader::_seek_to_block_directly( PositionProvider position(&_column_indices[cid]->entry(block_id)); if (OLAP_SUCCESS != (res = _column_readers[cid]->seek(&position))) { if (OLAP_ERR_COLUMN_STREAM_EOF == res) { - VLOG(3) << "Stream EOF. tablet_id=" << _table->tablet_id() + VLOG(3) << "Stream EOF. tablet_id=" << _segment_group->get_tablet_id() << ", column_id=" << _column_readers[cid]->column_unique_id() << ", block_id=" << block_id; return OLAP_ERR_DATA_EOF; } else { OLAP_LOG_WARNING("fail to seek to block. " "[tablet_id=%ld column_id=%u block_id=%lu]", - _table->tablet_id(), _column_readers[cid]->column_unique_id(), block_id); + _segment_group->get_tablet_id(), _column_readers[cid]->column_unique_id(), block_id); return OLAP_ERR_COLUMN_SEEK_ERROR; } } diff --git a/be/src/olap/segment_reader.h b/be/src/olap/rowset/segment_reader.h similarity index 91% rename from be/src/olap/segment_reader.h rename to be/src/olap/rowset/segment_reader.h index 5f292669f8da96..8bdf961ce59fc1 100644 --- a/be/src/olap/segment_reader.h +++ b/be/src/olap/rowset/segment_reader.h @@ -15,8 +15,8 @@ // specific language governing permissions and limitations // under the License. -#ifndef DORIS_BE_SRC_OLAP_COLUMN_FILE_SEGMENT_READER_H -#define DORIS_BE_SRC_OLAP_COLUMN_FILE_SEGMENT_READER_H +#ifndef DORIS_BE_SRC_OLAP_ROWSET_SEGMENT_READER_H +#define DORIS_BE_SRC_OLAP_ROWSET_SEGMENT_READER_H #include #include @@ -26,7 +26,8 @@ #include #include "olap/bloom_filter_reader.h" -#include "olap/column_reader.h" +#include "olap/rowset/column_reader.h" +#include "olap/rowset/segment_group.h" #include "olap/compress.h" #include "olap/file_stream.h" #include "olap/in_stream.h" @@ -36,8 +37,6 @@ #include "olap/lru_cache.h" #include "olap/olap_cond.h" #include "olap/olap_define.h" -#include "olap/olap_engine.h" -#include "olap/olap_table.h" #include "olap/row_cursor.h" #include "runtime/runtime_state.h" #include "runtime/mem_pool.h" @@ -46,24 +45,18 @@ namespace doris { -class SegmentGroup; - - -class ColumnReader; - // SegmentReader 用于读取一个Segment文件 class SegmentReader { public: explicit SegmentReader(const std::string file, - OLAPTable* table, SegmentGroup* segment_group, uint32_t segment_id, - const std::vector& return_columns, + const std::vector& used_columns, const std::set& load_bf_columns, const Conditions* conditions, - const std::vector* col_predicates, - const DeleteHandler& delete_handler, + const DeleteHandler* delete_handler, const DelCondSatisfied delete_status, + Cache* lru_cache, RuntimeState* runtime_state, OlapReaderStatistics* stats); @@ -231,8 +224,8 @@ class SegmentReader { OLAPStatus _reset_readers(); // 获取当前的table级schema。 - inline const std::vector& tablet_schema() { - return _table->tablet_schema(); + inline const TabletSchema& tablet_schema() { + return _segment_group->get_tablet_schema(); } inline const ColumnDataHeaderMessage& _header_message() { @@ -263,6 +256,24 @@ class SegmentReader { OLAPStatus _load_to_vectorized_row_batch( VectorizedRowBatch* batch, size_t size); + FieldAggregationMethod _get_aggregation_by_index(uint32_t index) { + const TabletSchema& tablet_schema = _segment_group->get_tablet_schema(); + if (index < tablet_schema.num_columns()) { + return tablet_schema.column(index).aggregation(); + } + + return OLAP_FIELD_AGGREGATION_UNKNOWN; + } + + FieldType _get_field_type_by_index(uint32_t index) { + const TabletSchema& tablet_schema = _segment_group->get_tablet_schema(); + if (index < tablet_schema.num_columns()) { + return tablet_schema.column(index).type(); + } + + return OLAP_FIELD_TYPE_NONE; + } + private: static const int32_t BYTE_STREAM_POSITIONS = 1; static const int32_t RUN_LENGTH_BYTE_POSITIONS = BYTE_STREAM_POSITIONS + 1; @@ -275,14 +286,22 @@ class SegmentReader { static const uint32_t CURRENT_COLUMN_DATA_VERSION = 1; std::string _file_name; // 文件名 - doris::FileHandler _file_handler; // 文件handler - - OLAPTable* _table; SegmentGroup* _segment_group; uint32_t _segment_id; - + // columns that can be used by client. when client seek to range's start or end, + // client may read more columns than normal read. + // For example: + // table1's schema is 'k1, k2, v1'. which k1, k2 is key column, v1 is value column. + // for query 'select sum(v1) from table1', client split all data to sub-range in logical, + // so, one sub-range need to seek to right position with k1 and k2; then only read v1. + // In this situation, _used_columns contains (k1, k2, v1) + std::vector _used_columns; + UniqueIdSet _load_bf_columns; const Conditions* _conditions; // 列过滤条件 - DeleteHandler _delete_handler; + doris::FileHandler _file_handler; // 文件handler + + + const DeleteHandler* _delete_handler = nullptr; DelCondSatisfied _delete_status; bool _eof; // eof标志 @@ -303,22 +322,13 @@ class SegmentReader { bool _null_supported; uint64_t _header_length; // Header(FixHeader+PB)大小,读数据时需要偏移 - // columns that can be used by client. when client seek to range's start or end, - // client may read more columns than normal read. - // For example: - // table1's schema is 'k1, k2, v1'. which k1, k2 is key column, v1 is value column. - // for query 'select sum(v1) from table1', client split all data to sub-range in logical, - // so, one sub-range need to seek to right position with k1 and k2; then only read v1. - // In this situation, _used_columns contains (k1, k2, v1) - std::vector _used_columns; std::vector _column_readers; // 实际的数据读取器 std::vector _column_indices; // 保存column的index UniqueIdSet _include_columns; // 用于判断该列是不是被包含 - UniqueIdSet _load_bf_columns; UniqueIdSet _include_bf_columns; - UniqueIdToColumnIdMap _table_id_to_unique_id_map; // table id到unique id的映射 - UniqueIdToColumnIdMap _unique_id_to_table_id_map; // unique id到table id的映射 + UniqueIdToColumnIdMap _tablet_id_to_unique_id_map; // tablet id到unique id的映射 + UniqueIdToColumnIdMap _unique_id_to_tablet_id_map; // unique id到tablet id的映射 UniqueIdToColumnIdMap _unique_id_to_segment_id_map; // uniqid到segment id的映射 std::map _indices; @@ -344,24 +354,23 @@ class SegmentReader { bool _is_data_loaded; size_t _buffer_size; - Cache* _lru_cache; std::vector _cache_handle; const FileHeader* _file_header; std::unique_ptr _tracker; std::unique_ptr _mem_pool; - RuntimeState* _runtime_state; // 用于统计内存消耗等运行时信息 StorageByteBuffer* _shared_buffer; + Cache* _lru_cache; + RuntimeState* _runtime_state; // 用于统计内存消耗等运行时信息 + OlapReaderStatistics* _stats; // Set when seek_to_block is called, valid until next seek_to_block is called. bool _without_filter = false; - OlapReaderStatistics* _stats; - DISALLOW_COPY_AND_ASSIGN(SegmentReader); }; } // namespace doris -#endif // DORIS_BE_SRC_OLAP_COLUMN_FILE_SEGMENT_READER_H +#endif // DORIS_BE_SRC_OLAP_ROWSET_SEGMENT_READER_H diff --git a/be/src/olap/segment_writer.cpp b/be/src/olap/rowset/segment_writer.cpp similarity index 80% rename from be/src/olap/segment_writer.cpp rename to be/src/olap/rowset/segment_writer.cpp index d801d6379f88db..09a77ecd367c19 100644 --- a/be/src/olap/segment_writer.cpp +++ b/be/src/olap/rowset/segment_writer.cpp @@ -15,23 +15,28 @@ // specific language governing permissions and limitations // under the License. -#include "olap/segment_writer.h" +#include "olap/rowset/segment_writer.h" -#include "olap/column_writer.h" +#include "olap/rowset/column_writer.h" #include "olap/out_stream.h" #include "olap/file_helper.h" #include "olap/utils.h" - +#include "olap/storage_engine.h" +#include "olap/data_dir.h" namespace doris { SegmentWriter::SegmentWriter( const std::string& file_name, - OLAPTablePtr table, - uint32_t stream_buffer_size) : + SegmentGroup* segment_group, + uint32_t stream_buffer_size, + CompressKind compress_kind, + double bloom_filter_fpp) : _file_name(file_name), - _table(table), + _segment_group(segment_group), _stream_buffer_size(stream_buffer_size), + _compress_kind(compress_kind), + _bloom_filter_fpp(bloom_filter_fpp), _stream_factory(NULL), _row_count(0), _block_count(0) {} @@ -49,7 +54,7 @@ OLAPStatus SegmentWriter::init(uint32_t write_mbytes_per_sec) { OLAPStatus res = OLAP_SUCCESS; // 创建factory _stream_factory = - new(std::nothrow) OutStreamFactory(_table->compress_kind(), _stream_buffer_size); + new(std::nothrow) OutStreamFactory(_compress_kind, _stream_buffer_size); if (NULL == _stream_factory) { OLAP_LOG_WARNING("fail to allocate out stream factory"); @@ -57,25 +62,23 @@ OLAPStatus SegmentWriter::init(uint32_t write_mbytes_per_sec) { } // 创建writer - for (uint32_t i = 0; i < _table->tablet_schema().size(); i++) { - if (_table->tablet_schema()[i].is_root_column) { - ColumnWriter* writer = ColumnWriter::create(i, _table->tablet_schema(), - _stream_factory, - _table->num_rows_per_row_block(), - _table->bloom_filter_fpp()); - - if (NULL == writer) { - OLAP_LOG_WARNING("fail to create writer"); - return OLAP_ERR_MALLOC_ERROR; - } else { - _root_writers.push_back(writer); - } + for (uint32_t i = 0; i < _segment_group->get_tablet_schema().num_columns(); i++) { + ColumnWriter* writer = ColumnWriter::create(i, _segment_group->get_tablet_schema(), + _stream_factory, + _segment_group->get_num_rows_per_row_block(), + _bloom_filter_fpp); + + if (NULL == writer) { + OLAP_LOG_WARNING("fail to create writer"); + return OLAP_ERR_MALLOC_ERROR; + } else { + _root_writers.push_back(writer); + } - res = writer->init(); - if (OLAP_SUCCESS != res) { - OLAP_LOG_WARNING("fail to initialize ColumnWriter. [res=%d]", res); - return res; - } + res = writer->init(); + if (OLAP_SUCCESS != res) { + OLAP_LOG_WARNING("fail to initialize ColumnWriter. [res=%d]", res); + return res; } } @@ -85,9 +88,9 @@ OLAPStatus SegmentWriter::init(uint32_t write_mbytes_per_sec) { } OLAPStatus SegmentWriter::write_batch(RowBlock* block, RowCursor* cursor, bool is_finalize) { - DCHECK(block->row_block_info().row_num == _table->num_rows_per_row_block() || is_finalize) + DCHECK(block->row_block_info().row_num == _segment_group->get_num_rows_per_row_block() || is_finalize) << "write block not empty, num_rows=" << block->row_block_info().row_num - << ", table_num_rows=" << _table->num_rows_per_row_block(); + << ", table_num_rows=" << _segment_group->get_num_rows_per_row_block(); OLAPStatus res = OLAP_SUCCESS; for (auto col_writer : _root_writers) { res = col_writer->write_batch(block, cursor); @@ -125,12 +128,12 @@ uint64_t SegmentWriter::estimate_segment_size() { OLAPStatus SegmentWriter::_make_file_header(ColumnDataHeaderMessage* file_header) { OLAPStatus res = OLAP_SUCCESS; file_header->set_number_of_rows(_row_count); - file_header->set_compress_kind(_table->compress_kind()); + file_header->set_compress_kind(_compress_kind); file_header->set_stream_buffer_size(_stream_buffer_size); // TODO. 之前没设置 file_header->set_magic_string("COLUMN DATA"); file_header->set_version(1); - file_header->set_num_rows_per_block(_table->num_rows_per_row_block()); + file_header->set_num_rows_per_block(_segment_group->get_num_rows_per_row_block()); // check if has bloom filter columns bool has_bf_column = false; @@ -152,7 +155,7 @@ OLAPStatus SegmentWriter::_make_file_header(ColumnDataHeaderMessage* file_header // * column_unique_id // * column_type // * column_encoding - // * column_statistics + // * zone_maps res = (*it)->finalize(file_header); if (OLAP_UNLIKELY(OLAP_SUCCESS != res)) { @@ -206,7 +209,11 @@ OLAPStatus SegmentWriter::finalize(uint32_t* segment_file_size) { OLAPStatus res = OLAP_SUCCESS; FileHandler file_handle; FileHeader file_header; - + boost::filesystem::path tablet_path(_segment_group->rowset_path_prefix()); + boost::filesystem::path data_dir_path = tablet_path.parent_path().parent_path().parent_path().parent_path(); + std::string data_dir_string = data_dir_path.string(); + DataDir* data_dir = StorageEngine::instance()->get_store(data_dir_string); + data_dir->add_pending_ids(ROWSET_ID_PREFIX + std::to_string(_segment_group->rowset_id())); if (OLAP_SUCCESS != (res = file_handle.open_with_mode( _file_name, O_CREAT | O_EXCL | O_WRONLY , S_IRUSR | S_IWUSR))) { LOG(WARNING) << "fail to open file. [file_name=" << _file_name << "]"; diff --git a/be/src/olap/segment_writer.h b/be/src/olap/rowset/segment_writer.h similarity index 80% rename from be/src/olap/segment_writer.h rename to be/src/olap/rowset/segment_writer.h index c63038881e9cdf..7dca00100a3189 100644 --- a/be/src/olap/segment_writer.h +++ b/be/src/olap/rowset/segment_writer.h @@ -15,11 +15,11 @@ // specific language governing permissions and limitations // under the License. -#ifndef DORIS_BE_SRC_OLAP_COLUMN_FILE_SEGMENT_WRITER_H -#define DORIS_BE_SRC_OLAP_COLUMN_FILE_SEGMENT_WRITER_H +#ifndef DORIS_BE_SRC_OLAP_ROWSET_SEGMENT_WRITER_H +#define DORIS_BE_SRC_OLAP_ROWSET_SEGMENT_WRITER_H #include "olap/olap_define.h" -#include "olap/data_writer.h" +#include "olap/rowset/column_data_writer.h" namespace doris { @@ -30,8 +30,10 @@ class ColumnDataHeaderMessage; class SegmentWriter { public: explicit SegmentWriter(const std::string& file_name, - OLAPTablePtr table, - uint32_t stream_buffer_size); + SegmentGroup* segment_group, + uint32_t stream_buffer_size, + CompressKind compress_kind, + double bloom_filter_fpp); ~SegmentWriter(); OLAPStatus init(uint32_t write_mbytes_per_sec); OLAPStatus write_batch(RowBlock* block, RowCursor* cursor, bool is_finalize); @@ -44,9 +46,12 @@ class SegmentWriter { // Helper: 生成最终的PB文件头 OLAPStatus _make_file_header(ColumnDataHeaderMessage* file_header); +private: std::string _file_name; - OLAPTablePtr _table; + SegmentGroup* _segment_group; uint32_t _stream_buffer_size; // 输出缓冲区大小 + CompressKind _compress_kind; + double _bloom_filter_fpp; std::vector _root_writers; OutStreamFactory* _stream_factory; uint64_t _row_count; // 已经写入的行总数 @@ -60,4 +65,4 @@ class SegmentWriter { } // namespace doris -#endif // DORIS_BE_SRC_OLAP_COLUMN_FILE_SEGMENT_WRITER_H +#endif // DORIS_BE_SRC_OLAP_ROWSET_SEGMENT_WRITER_H \ No newline at end of file diff --git a/be/src/olap/rowset_factory.cpp b/be/src/olap/rowset_factory.cpp new file mode 100644 index 00000000000000..897d1365117eb7 --- /dev/null +++ b/be/src/olap/rowset_factory.cpp @@ -0,0 +1,38 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +#include "olap/rowset_factory.h" +#include "gen_cpp/olap_file.pb.h" +#include "olap/rowset/alpha_rowset.h" + +namespace doris { + +OLAPStatus RowsetFactory::load_rowset(const TabletSchema& schema, + const std::string& rowset_path, + DataDir* data_dir, + RowsetMetaSharedPtr rowset_meta, + RowsetSharedPtr* rowset) { + + if (rowset_meta->rowset_type() == RowsetTypePB::ALPHA_ROWSET) { + rowset->reset(new AlphaRowset(&schema, rowset_path, data_dir, rowset_meta)); + return (*rowset)->init(); + } else { + return OLAP_ERR_ROWSET_TYPE_NOT_FOUND; + } +} + +} // namespace doris diff --git a/be/test/olap/mock_olap_rootpath.h b/be/src/olap/rowset_factory.h similarity index 60% rename from be/test/olap/mock_olap_rootpath.h rename to be/src/olap/rowset_factory.h index da81cbccfb894d..261fe4f9f7a51e 100644 --- a/be/test/olap/mock_olap_rootpath.h +++ b/be/src/olap/rowset_factory.h @@ -15,14 +15,24 @@ // specific language governing permissions and limitations // under the License. -#ifndef DORIS_BE_TEST_OLAP_MOCK_OLAP_ROOTPATH_H -#define DORIS_BE_TEST_OLAP_MOCK_OLAP_ROOTPATH_H +#ifndef DORIS_BE_SRC_OLAP_ROWSET_FACTORY_H +#define DORIS_BE_SRC_OLAP_ROWSET_FACTORY_H + +#include "gen_cpp/olap_file.pb.h" +#include "olap/data_dir.h" namespace doris { -class MockOLAPRootPath : public OLAPRootPath { +class RowsetFactory { + public: - MOCK_METHOD1(set_cluster_id, OLAPStatus(int32_t cluster_id)); -}; // class MockOLAPRootPath -} // namespace doris -#endif // DORIS_BE_TEST_OLAP_MOCK_OLAP_ROOTPATH_H + static OLAPStatus load_rowset(const TabletSchema& schema, + const std::string& rowset_path, + DataDir* data_dir, + RowsetMetaSharedPtr rowset_meta, + RowsetSharedPtr* rowset); +}; + +} // namespace doris + +#endif // DORIS_BE_SRC_OLAP_ROWSET_FACTORY_H diff --git a/be/src/olap/rowset_graph.cpp b/be/src/olap/rowset_graph.cpp new file mode 100644 index 00000000000000..6076adc004883d --- /dev/null +++ b/be/src/olap/rowset_graph.cpp @@ -0,0 +1,267 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +#include "olap/rowset_graph.h" +#include "common/logging.h" +#include + +namespace doris { + +OLAPStatus RowsetGraph::construct_rowset_graph(const std::vector& rs_metas) { + if (rs_metas.empty()) { + VLOG(3) << "there is no version in the header."; + return OLAP_SUCCESS; + } + + // Distill vertex values from versions in TabletMeta. + std::vector vertex_values; + vertex_values.reserve(2 * rs_metas.size()); + + for (size_t i = 0; i < rs_metas.size(); ++i) { + vertex_values.push_back(rs_metas[i]->start_version()); + vertex_values.push_back(rs_metas[i]->end_version() + 1); + } + + sort(vertex_values.begin(), vertex_values.end()); + + // Items in vertex_values are sorted, but not unique. + // we choose unique items in vertex_values to create vertexes. + int64_t last_vertex_value = -1; + for (size_t i = 0; i < vertex_values.size(); ++i) { + if (i != 0 && vertex_values[i] == last_vertex_value) { + continue; + } + + // Add vertex to graph. + OLAPStatus status = _add_vertex_to_graph(vertex_values[i]); + if (status != OLAP_SUCCESS) { + LOG(WARNING) << "fail to add vertex to version graph. vertex=" << vertex_values[i]; + return status; + } + + last_vertex_value = vertex_values[i]; + } + + // Create edges for version graph according to TabletMeta's versions. + for (size_t i = 0; i < rs_metas.size(); ++i) { + // Versions in header are unique. + // We ensure _vertex_index_map has its start_version. + int64_t start_vertex_index = _vertex_index_map[rs_metas[i]->start_version()]; + int64_t end_vertex_index = _vertex_index_map[rs_metas[i]->end_version() + 1]; + // Add one edge from start_version to end_version. + std::list* edges = _version_graph[start_vertex_index].edges; + edges->insert(edges->begin(), end_vertex_index); + // Add reverse edge from end_version to start_version. + std::list* r_edges = _version_graph[end_vertex_index].edges; + r_edges->insert(r_edges->begin(), start_vertex_index); + } + return OLAP_SUCCESS; +} + +OLAPStatus RowsetGraph::reconstruct_rowset_graph(const std::vector& rs_metas) { + for (auto& vertex : _version_graph) { + SAFE_DELETE(vertex.edges); + } + _version_graph.clear(); + _vertex_index_map.clear(); + return construct_rowset_graph(rs_metas); +} + +OLAPStatus RowsetGraph::add_version_to_graph(const Version& version) { + // Add version.first as new vertex of version graph if not exist. + int64_t start_vertex_value = version.first; + int64_t end_vertex_value = version.second + 1; + + // Add vertex to graph. + OLAPStatus status = _add_vertex_to_graph(start_vertex_value); + if (status != OLAP_SUCCESS) { + LOG(WARNING) << "fail to add vertex to version graph. vertex=" << start_vertex_value; + return status; + } + + status = _add_vertex_to_graph(end_vertex_value); + if (status != OLAP_SUCCESS) { + LOG(WARNING) << "fail to add vertex to version graph. vertex=" << end_vertex_value; + return status; + } + + int64_t start_vertex_index = _vertex_index_map[start_vertex_value]; + int64_t end_vertex_index = _vertex_index_map[end_vertex_value]; + + // We assume this version is new version, so we just add two edges + // into version graph. add one edge from start_version to end_version + std::list* edges = _version_graph[start_vertex_index].edges; + edges->insert(edges->begin(), end_vertex_index); + + // We add reverse edge(from end_version to start_version) to graph + std::list* r_edges = _version_graph[end_vertex_index].edges; + r_edges->insert(r_edges->begin(), start_vertex_index); + + return OLAP_SUCCESS; +} + +OLAPStatus RowsetGraph::delete_version_from_graph(const Version& version) { + int64_t start_vertex_value = version.first; + int64_t end_vertex_value = version.second + 1; + + if (_vertex_index_map.find(start_vertex_value) == _vertex_index_map.end() + || _vertex_index_map.find(end_vertex_value) == _vertex_index_map.end()) { + LOG(WARNING) << "vertex for version does not exists. " + << "version=" << version.first << "-" << version.second; + return OLAP_ERR_HEADER_DELETE_VERSION; + } + + int64_t start_vertex_index = _vertex_index_map[start_vertex_value]; + int64_t end_vertex_index = _vertex_index_map[end_vertex_value]; + // Remove edge and its reverse edge. + _version_graph[start_vertex_index].edges->remove(end_vertex_index); + _version_graph[end_vertex_index].edges->remove(start_vertex_index); + + return OLAP_SUCCESS; +} + +OLAPStatus RowsetGraph::_add_vertex_to_graph(int64_t vertex_value) { + // Vertex with vertex_value already exists. + if (_vertex_index_map.find(vertex_value) != _vertex_index_map.end()) { + VLOG(3) << "vertex with vertex value already exists. value=" << vertex_value; + return OLAP_SUCCESS; + } + + std::list* edges = new std::list(); + if (edges == nullptr) { + LOG(WARNING) << "fail to malloc edge list."; + return OLAP_ERR_OTHER_ERROR; + } + + Vertex vertex = {vertex_value, edges}; + _version_graph.push_back(vertex); + _vertex_index_map[vertex_value] = _version_graph.size() - 1; + return OLAP_SUCCESS; +} + +OLAPStatus RowsetGraph::capture_consistent_versions( + const Version& spec_version, + std::vector* version_path) const { + if (spec_version.first > spec_version.second) { + LOG(WARNING) << "invalid specfied version. " + << "spec_version=" << spec_version.first << "-" << spec_version.second; + return OLAP_ERR_INPUT_PARAMETER_ERROR; + } + + if (version_path == nullptr) { + LOG(WARNING) << "param version_path is nullptr."; + return OLAP_ERR_INPUT_PARAMETER_ERROR; + } + + // bfs_queue's element is vertex_index. + std::queue bfs_queue; + // predecessor[i] means the predecessor of vertex_index 'i'. + std::vector predecessor(_version_graph.size()); + // visited[int64_t]==true means it had entered bfs_queue. + std::vector visited(_version_graph.size()); + // [start_vertex_value, end_vertex_value) + int64_t start_vertex_value = spec_version.first; + int64_t end_vertex_value = spec_version.second + 1; + // -1 is invalid vertex index. + int64_t start_vertex_index = -1; + // -1 is valid vertex index. + int64_t end_vertex_index = -1; + + for (size_t i = 0; i < _version_graph.size(); ++i) { + if (_version_graph[i].value == start_vertex_value) { + start_vertex_index = i; + } + if (_version_graph[i].value == end_vertex_value) { + end_vertex_index = i; + } + } + + if (start_vertex_index < 0 || end_vertex_index < 0) { + LOG(WARNING) << "fail to find path in version_graph. " + << "spec_version: " << spec_version.first << "-" << spec_version.second; + return OLAP_ERR_VERSION_NOT_EXIST; + } + + for (size_t i = 0; i < _version_graph.size(); ++i) { + visited[i] = false; + } + + bfs_queue.push(start_vertex_index); + visited[start_vertex_index] = true; + // The predecessor of root is itself. + predecessor[start_vertex_index] = start_vertex_index; + + while (bfs_queue.empty() == false && visited[end_vertex_index] == false) { + int64_t top_vertex_index = bfs_queue.front(); + bfs_queue.pop(); + auto it = _version_graph[top_vertex_index].edges->begin(); + for (; it != _version_graph[top_vertex_index].edges->end(); ++it) { + if (visited[*it] == false) { + // If we don't support reverse version in the path, and start vertex + // value is larger than the end vertex value, we skip this edge. + if (_version_graph[top_vertex_index].value > _version_graph[*it].value) { + continue; + } + + visited[*it] = true; + predecessor[*it] = top_vertex_index; + bfs_queue.push(*it); + } + } + } + + if (!visited[end_vertex_index]) { + LOG(WARNING) << "fail to find path in version_graph. " + << "spec_version: " << spec_version.first << "-" << spec_version.second; + return OLAP_ERR_VERSION_NOT_EXIST; + } + + std::vector reversed_path; + int64_t tmp_vertex_index = end_vertex_index; + reversed_path.push_back(tmp_vertex_index); + + // For start_vertex_index, its predecessor must be itself. + while (predecessor[tmp_vertex_index] != tmp_vertex_index) { + tmp_vertex_index = predecessor[tmp_vertex_index]; + reversed_path.push_back(tmp_vertex_index); + } + + // Make version_path from reversed_path. + std::stringstream shortest_path_for_debug; + for (size_t path_id = reversed_path.size() - 1; path_id > 0; --path_id) { + int64_t tmp_start_vertex_value = _version_graph[reversed_path[path_id]].value; + int64_t tmp_end_vertex_value = _version_graph[reversed_path[path_id - 1]].value; + + // tmp_start_vertex_value mustn't be equal to tmp_end_vertex_value + if (tmp_start_vertex_value <= tmp_end_vertex_value) { + version_path->push_back(std::make_pair(tmp_start_vertex_value, tmp_end_vertex_value - 1)); + } else { + version_path->push_back(std::make_pair(tmp_end_vertex_value, tmp_start_vertex_value - 1)); + } + + shortest_path_for_debug << (*version_path)[version_path->size() - 1].first << '-' + << (*version_path)[version_path->size() - 1].second << ' '; + } + + VLOG(3) << "success to find path for spec_version. " + << "spec_version=" << spec_version.first << "-" << spec_version.second + << ", path=" << shortest_path_for_debug.str(); + + return OLAP_SUCCESS; +} + +} // namespace doris diff --git a/be/src/olap/rowset_graph.h b/be/src/olap/rowset_graph.h new file mode 100644 index 00000000000000..ab46f0683e165d --- /dev/null +++ b/be/src/olap/rowset_graph.h @@ -0,0 +1,52 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +#ifndef DORIS_BE_SRC_OLAP_ROWSET_GRAPH_H +#define DORIS_BE_SRC_OLAP_ROWSET_GRAPH_H + +#include "olap/olap_common.h" +#include "olap/olap_define.h" +#include "olap/rowset/rowset_meta.h" + +namespace doris { + +class RowsetGraph { +public: + OLAPStatus construct_rowset_graph(const std::vector& rs_metas); + OLAPStatus reconstruct_rowset_graph(const std::vector& rs_metas); + OLAPStatus add_version_to_graph(const Version& version); + OLAPStatus delete_version_from_graph(const Version& version); + OLAPStatus capture_consistent_versions(const Version& spec_version, + std::vector* version_path) const; +private: + OLAPStatus _add_vertex_to_graph(int64_t vertex_value); + + // OLAP version contains two parts, [start_version, end_version]. In order + // to construct graph, the OLAP version has two corresponding vertex, one + // vertex's value is version.start_version, the other is + // version.end_version + 1. + // Use adjacency list to describe version graph. + std::vector _version_graph; + + // vertex value --> vertex_index of _version_graph + // It is easy to find vertex index according to vertex value. + std::unordered_map _vertex_index_map; +}; + +} // namespace doris + +#endif // DORIS_BE_SRC_OLAP_OLAP_ROWSET_GRAPH_H diff --git a/be/src/olap/schema.h b/be/src/olap/schema.h index a43cc7345d1701..98dd2c49ea7d48 100644 --- a/be/src/olap/schema.h +++ b/be/src/olap/schema.h @@ -21,6 +21,7 @@ #include #include "olap/aggregate_func.h" +#include "olap/tablet_schema.h" #include "olap/types.h" #include "runtime/descriptors.h" @@ -90,18 +91,18 @@ class ColumnSchema { class Schema { public: - Schema(const std::vector& field_infos) { + Schema(const TabletSchema& schema) { int offset = 0; _num_key_columns = 0; - for (int i = 0; i < field_infos.size(); ++i) { - FieldInfo field_info = field_infos[i]; - ColumnSchema col_schema(field_info.aggregation, field_info.type); + for (int i = 0; i < schema.num_columns(); ++i) { + const TabletColumn& column = schema.column(i); + ColumnSchema col_schema(column.aggregation(), column.type()); col_schema.set_col_offset(offset); offset += col_schema.size() + 1; // 1 for null byte - if (field_info.is_key) { + if (column.is_key()) { _num_key_columns++; } - if (field_info.type == OLAP_FIELD_TYPE_HLL) { + if (column.type() == OLAP_FIELD_TYPE_HLL) { _hll_col_ids.push_back(i); } _cols.push_back(col_schema); diff --git a/be/src/olap/schema_change.cpp b/be/src/olap/schema_change.cpp index b60f8fb30a7191..208323540de33c 100644 --- a/be/src/olap/schema_change.cpp +++ b/be/src/olap/schema_change.cpp @@ -23,15 +23,14 @@ #include #include -#include "olap/column_data.h" #include "olap/merger.h" -#include "olap/column_data.h" -#include "olap/olap_engine.h" -#include "olap/olap_table.h" +#include "olap/storage_engine.h" +#include "olap/tablet.h" #include "olap/row_block.h" #include "olap/row_cursor.h" -#include "olap/data_writer.h" #include "olap/wrapper_field.h" +#include "olap/rowset/rowset_id_generator.h" +#include "olap/rowset/alpha_rowset_writer.h" #include "common/resource_tls.h" #include "agent/cgroups_mgr.h" @@ -45,23 +44,15 @@ using std::vector; namespace doris { -bool version_entity_sorter(const VersionEntity& a, const VersionEntity& b) { - if (a.version.first != b.version.first) { - return a.version.first < b.version.first; - } else { - return a.version.second < b.version.second; - } -} - -RowBlockChanger::RowBlockChanger(const std::vector &tablet_schema, - const OLAPTablePtr &ref_olap_table) { - _schema_mapping.resize(tablet_schema.size()); +RowBlockChanger::RowBlockChanger(const TabletSchema& tablet_schema, + const TabletSharedPtr &base_tablet) { + _schema_mapping.resize(tablet_schema.num_columns()); } -RowBlockChanger::RowBlockChanger(const vector& tablet_schema, - const OLAPTablePtr& ref_olap_table, +RowBlockChanger::RowBlockChanger(const TabletSchema& tablet_schema, + const TabletSharedPtr& base_tablet, const DeleteHandler& delete_handler) { - _schema_mapping.resize(tablet_schema.size()); + _schema_mapping.resize(tablet_schema.num_columns()); _delete_handler = delete_handler; } @@ -77,7 +68,7 @@ RowBlockChanger::~RowBlockChanger() { ColumnMapping* RowBlockChanger::get_mutable_column_mapping(size_t column_index) { if (column_index >= _schema_mapping.size()) { - return NULL; + return nullptr; } return &(_schema_mapping[column_index]); @@ -85,10 +76,10 @@ ColumnMapping* RowBlockChanger::get_mutable_column_mapping(size_t column_index) #define TYPE_REINTERPRET_CAST(FromType, ToType) \ { \ - size_t row_num = ref_block.row_block_info().row_num; \ + size_t row_num = ref_block->row_block_info().row_num; \ for (size_t row = 0, mutable_row = 0; row < row_num; ++row) { \ if (is_data_left_vec[row] != 0) { \ - char* ref_ptr = ref_block.field_ptr(row, ref_column); \ + char* ref_ptr = ref_block->field_ptr(row, ref_column); \ char* new_ptr = mutable_block->field_ptr(mutable_row++, i); \ *new_ptr = *ref_ptr; \ *(ToType*)(new_ptr + 1) = *(FromType*)(ref_ptr + 1); \ @@ -99,10 +90,10 @@ ColumnMapping* RowBlockChanger::get_mutable_column_mapping(size_t column_index) #define LARGEINT_REINTERPRET_CAST(FromType, ToType) \ { \ - size_t row_num = ref_block.row_block_info().row_num; \ + size_t row_num = ref_block->row_block_info().row_num; \ for (size_t row = 0, mutable_row = 0; row < row_num; ++row) { \ if (is_data_left_vec[row] != 0) { \ - char* ref_ptr = ref_block.field_ptr(row, ref_column); \ + char* ref_ptr = ref_block->field_ptr(row, ref_column); \ char* new_ptr = mutable_block->field_ptr(mutable_row++, i); \ *new_ptr = *ref_ptr; \ ToType new_value = *(FromType*)(ref_ptr + 1); \ @@ -114,7 +105,7 @@ ColumnMapping* RowBlockChanger::get_mutable_column_mapping(size_t column_index) #define CONVERT_FROM_TYPE(from_type) \ { \ - switch (mutable_block->_tablet_schema[i].type) {\ + switch (mutable_block->tablet_schema().column(i).type()) {\ case OLAP_FIELD_TYPE_TINYINT: \ TYPE_REINTERPRET_CAST(from_type, int8_t); \ case OLAP_FIELD_TYPE_UNSIGNED_TINYINT: \ @@ -136,10 +127,9 @@ ColumnMapping* RowBlockChanger::get_mutable_column_mapping(size_t column_index) case OLAP_FIELD_TYPE_DOUBLE: \ TYPE_REINTERPRET_CAST(from_type, double); \ default: \ - OLAP_LOG_WARNING("the column type which was altered to was" \ - " unsupported. [origin_type=%d alter_type=%d]", \ - ref_block._tablet_schema[ref_column].type, \ - mutable_block->_tablet_schema[i].type); \ + LOG(WARNING) << "the column type which was altered to was unsupported." \ + << " origin_type=" << ref_block->tablet_schema().column(ref_column).type() \ + << ", alter_type=" << mutable_block->tablet_schema().column(i).type(); \ return false; \ } \ break; \ @@ -155,53 +145,50 @@ ColumnMapping* RowBlockChanger::get_mutable_column_mapping(size_t column_index) } bool RowBlockChanger::change_row_block( - const DataFileType df_type, - const RowBlock& ref_block, + const RowBlock* ref_block, int32_t data_version, RowBlock* mutable_block, - uint64_t* filted_rows) const { - if (mutable_block == NULL) { + uint64_t* filtered_rows) const { + if (mutable_block == nullptr) { LOG(FATAL) << "mutable block is uninitialized."; return false; - } else if (mutable_block->_tablet_schema.size() != _schema_mapping.size()) { - OLAP_LOG_WARNING("mutable block does not match with schema mapping rules. " - "[block_schema_size=%ld, mapping_schema_size=%ld]", - mutable_block->_tablet_schema.size(), - _schema_mapping.size()); + } else if (mutable_block->tablet_schema().num_columns() != _schema_mapping.size()) { + LOG(WARNING) << "mutable block does not match with schema mapping rules. " + << "block_schema_size=" << mutable_block->tablet_schema().num_columns() + << ", mapping_schema_size=" << _schema_mapping.size(); return false; } - if (mutable_block->capacity() < ref_block.row_block_info().row_num) { - OLAP_LOG_WARNING("mutable block is not large enough for storing the changed block. " - "[mutable_block_size=%ld, ref_block_size=%u]", - mutable_block->capacity(), - ref_block.row_block_info().row_num); + if (mutable_block->capacity() < ref_block->row_block_info().row_num) { + LOG(WARNING) << "mutable block is not large enough for storing the changed block. " + << "mutable_block_size=" << mutable_block->capacity() + << ", ref_block_row_num=" << ref_block->row_block_info().row_num; return false; } mutable_block->clear(); RowCursor write_helper; - if (write_helper.init(mutable_block->_tablet_schema) != OLAP_SUCCESS) { - OLAP_LOG_WARNING("fail to init rowcursor."); + if (write_helper.init(mutable_block->tablet_schema()) != OLAP_SUCCESS) { + LOG(WARNING) << "fail to init rowcursor."; return false; } RowCursor read_helper; - if (read_helper.init(ref_block._tablet_schema) != OLAP_SUCCESS) { - OLAP_LOG_WARNING("fail to init rowcursor."); + if (read_helper.init(ref_block->tablet_schema()) != OLAP_SUCCESS) { + LOG(WARNING) << "fail to init rowcursor."; return false; } // a.1 先判断数据是否需要过滤,最终只有标记为1的才是留下需要的 // 对于没有filter的来说,相当于全部设置为1后留下 - const uint32_t row_num = ref_block.row_block_info().row_num; + const uint32_t row_num = ref_block->row_block_info().row_num; // (0表示过滤掉不要,1表示要,过程中2表示此row要切后续不需要再比较其他列) vector is_data_left_vec(row_num, 1); // 一行一行地进行比较 for (size_t row_index = 0; row_index < row_num; ++row_index) { - ref_block.get_row(row_index, &read_helper); + ref_block->get_row(row_index, &read_helper); // filter data according to delete conditions specified in DeleteData command if (is_data_left_vec[row_index] == 1) { @@ -218,24 +205,24 @@ bool RowBlockChanger::change_row_block( ++new_row_num; } } - *filted_rows = row_num - new_row_num; + *filtered_rows = row_num - new_row_num; const bool need_filter_data = (new_row_num != row_num); const bool filter_all = (new_row_num == 0); MemPool* mem_pool = mutable_block->mem_pool(); // b. 根据前面的过滤信息,只对还标记为1的处理 - for (size_t i = 0, len = mutable_block->tablet_schema().size(); !filter_all && i < len; ++i) { + for (size_t i = 0, len = mutable_block->tablet_schema().num_columns(); !filter_all && i < len; ++i) { int32_t ref_column = _schema_mapping[i].ref_column; if (_schema_mapping[i].ref_column >= 0) { // new column will be assigned as referenced column // check if the type of new column is equal to the older's. - if (mutable_block->tablet_schema()[i].type - == ref_block.tablet_schema()[ref_column].type) { + if (mutable_block->tablet_schema().column(i).type() + == ref_block->tablet_schema().column(ref_column).type()) { // 效率低下,也可以直接计算变长域拷贝,但仍然会破坏封装 - for (size_t row_index = 0, new_row_index = 0; - row_index < ref_block.row_block_info().row_num; ++row_index) { + for (size_t row_index = 0, new_row_index = 0; + row_index < ref_block->row_block_info().row_num; ++row_index) { // 不需要的row,每次处理到这个row时就跳过 if (need_filter_data && is_data_left_vec[row_index] == 0) { continue; @@ -243,23 +230,23 @@ bool RowBlockChanger::change_row_block( // 指定新的要写入的row index(不同于读的row_index) mutable_block->get_row(new_row_index++, &write_helper); - ref_block.get_row(row_index, &read_helper); + ref_block->get_row(row_index, &read_helper); if (true == read_helper.is_null(ref_column)) { write_helper.set_null(i); } else { const Field* field_to_read = read_helper.get_field_by_index(ref_column); - if (NULL == field_to_read) { - OLAP_LOG_WARNING("faile to get ref field.[index=%d]", ref_column); + if (nullptr == field_to_read) { + LOG(WARNING) << "failed to get ref field. index=" << ref_column; return false; } - + write_helper.set_not_null(i); - if (mutable_block->tablet_schema()[i].type == OLAP_FIELD_TYPE_CHAR) { + if (mutable_block->tablet_schema().column(i).type() == OLAP_FIELD_TYPE_CHAR) { // if modify length of CHAR type, the size of slice should be equal // to new length. Slice* src = (Slice*)(field_to_read->get_ptr(read_helper.get_buf())); - size_t size = mutable_block->tablet_schema()[i].length; + size_t size = mutable_block->tablet_schema().column(i).length(); char* buf = reinterpret_cast(mem_pool->allocate(size)); memset(buf, 0, size); size_t copy_size = (size < src->size) ? size : src->size; @@ -274,11 +261,11 @@ bool RowBlockChanger::change_row_block( } // 从ref_column 写入 i列。 - } else if (mutable_block->tablet_schema()[i].type == OLAP_FIELD_TYPE_VARCHAR - && ref_block.tablet_schema()[ref_column].type == OLAP_FIELD_TYPE_CHAR) { + } else if (mutable_block->tablet_schema().column(i).type() == OLAP_FIELD_TYPE_VARCHAR + && ref_block->tablet_schema().column(ref_column).type() == OLAP_FIELD_TYPE_CHAR) { // 效率低下,也可以直接计算变长域拷贝,但仍然会破坏封装 for (size_t row_index = 0, new_row_index = 0; - row_index < ref_block.row_block_info().row_num; ++row_index) { + row_index < ref_block->row_block_info().row_num; ++row_index) { // 不需要的row,每次处理到这个row时就跳过 if (need_filter_data && is_data_left_vec[row_index] == 0) { continue; @@ -287,20 +274,20 @@ bool RowBlockChanger::change_row_block( // 指定新的要写入的row index(不同于读的row_index) mutable_block->get_row(new_row_index++, &write_helper); - ref_block.get_row(row_index, &read_helper); + ref_block->get_row(row_index, &read_helper); if (true == read_helper.is_null(ref_column)) { write_helper.set_null(i); } else { // 要写入的 const Field* field_to_read = read_helper.get_field_by_index(ref_column); - if (NULL == field_to_read) { - OLAP_LOG_WARNING("faile to get ref field.[index=%d]", ref_column); + if (nullptr == field_to_read) { + LOG(WARNING) << "failed to get ref field. index=" << ref_column; return false; } write_helper.set_not_null(i); - int p = ref_block.tablet_schema()[ref_column].length - 1; + int p = ref_block->tablet_schema().column(ref_column).length() - 1; Slice* slice = reinterpret_cast(field_to_read->get_ptr(read_helper.get_buf())); char* buf = slice->data; while (p >= 0 && buf[p] == '\0') { @@ -315,7 +302,7 @@ bool RowBlockChanger::change_row_block( } else { // copy and alter the field // 此处可以暂时不动,新类型暂时不涉及类型转换 - switch (ref_block._tablet_schema[ref_column].type) { + switch (ref_block->tablet_schema().column(ref_column).type()) { case OLAP_FIELD_TYPE_TINYINT: CONVERT_FROM_TYPE(int8_t); case OLAP_FIELD_TYPE_UNSIGNED_TINYINT: @@ -333,24 +320,23 @@ bool RowBlockChanger::change_row_block( case OLAP_FIELD_TYPE_UNSIGNED_BIGINT: CONVERT_FROM_TYPE(uint64_t); default: - OLAP_LOG_WARNING("the column type which was altered from was" - " unsupported. [from_type=%d]", - ref_block._tablet_schema[ref_column].type); + LOG(WARNING) << "the column type which was altered from was unsupported." + << " from_type=" << ref_block->tablet_schema().column(ref_column).type(); return false; } - if (mutable_block->tablet_schema()[i].type < - ref_block.tablet_schema()[ref_column].type) { + if (mutable_block->tablet_schema().column(i).type() < + ref_block->tablet_schema().column(ref_column).type()) { VLOG(3) << "type degraded while altering column. " - << "column=" << mutable_block->tablet_schema()[i].name - << ", origin_type=" << ref_block._tablet_schema[ref_column].type - << ", alter_type=" << mutable_block->_tablet_schema[i].type; + << "column=" << mutable_block->tablet_schema().column(i).name() + << ", origin_type=" << ref_block->tablet_schema().column(ref_column).type() + << ", alter_type=" << mutable_block->tablet_schema().column(i).type(); } } } else { // 新增列,写入默认值 for (size_t row_index = 0, new_row_index = 0; - row_index < ref_block.row_block_info().row_num; ++row_index) { + row_index < ref_block->row_block_info().row_num; ++row_index) { // 不需要的row,每次处理到这个row时就跳过 if (need_filter_data && is_data_left_vec[row_index] == 0) { continue; @@ -382,30 +368,28 @@ bool RowBlockChanger::change_row_block( RowBlockSorter::RowBlockSorter(RowBlockAllocator* row_block_allocator) : _row_block_allocator(row_block_allocator), - _swap_row_block(NULL) {} + _swap_row_block(nullptr) {} RowBlockSorter::~RowBlockSorter() { if (_swap_row_block) { _row_block_allocator->release(_swap_row_block); - _swap_row_block = NULL; + _swap_row_block = nullptr; } } bool RowBlockSorter::sort(RowBlock** row_block) { uint32_t row_num = (*row_block)->row_block_info().row_num; - DataFileType data_file_type = (*row_block)->row_block_info().data_file_type; bool null_supported = (*row_block)->row_block_info().null_supported; - if (_swap_row_block == NULL || _swap_row_block->capacity() < row_num) { - if (_swap_row_block != NULL) { + if (_swap_row_block == nullptr || _swap_row_block->capacity() < row_num) { + if (_swap_row_block != nullptr) { _row_block_allocator->release(_swap_row_block); - _swap_row_block = NULL; + _swap_row_block = nullptr; } - if (_row_block_allocator->allocate(&_swap_row_block, row_num, - data_file_type, null_supported) != OLAP_SUCCESS - || _swap_row_block == NULL) { - OLAP_LOG_WARNING("fail to allocate memory."); + if (_row_block_allocator->allocate(&_swap_row_block, row_num, null_supported) != OLAP_SUCCESS + || _swap_row_block == nullptr) { + LOG(WARNING) << "fail to allocate memory."; return false; } } @@ -417,13 +401,13 @@ bool RowBlockSorter::sort(RowBlock** row_block) { return false; } - RowBlock* temp = NULL; - vector row_cursor_list((*row_block)->row_block_info().row_num, NULL); + RowBlock* temp = nullptr; + vector row_cursor_list((*row_block)->row_block_info().row_num, nullptr); // create an list of row cursor as long as the number of rows in data block. for (size_t i = 0; i < (*row_block)->row_block_info().row_num; ++i) { - if ((row_cursor_list[i] = new(nothrow) RowCursor()) == NULL) { - OLAP_LOG_WARNING("failed to malloc RowCursor. [size=%ld]", sizeof(RowCursor)); + if ((row_cursor_list[i] = new(nothrow) RowCursor()) == nullptr) { + LOG(WARNING) << "failed to malloc RowCursor. size=" << sizeof(RowCursor); goto SORT_ERR_EXIT; } @@ -442,7 +426,7 @@ bool RowBlockSorter::sort(RowBlock** row_block) { for (size_t i = 0; i < row_cursor_list.size(); ++i) { _swap_row_block->get_row(i, &helper_row); if (helper_row.copy(*row_cursor_list[i], _swap_row_block->mem_pool()) != OLAP_SUCCESS) { - OLAP_LOG_WARNING("failed to set row for row block. [row=%ld]", i); + LOG(WARNING) << "failed to set row for row block. row=" << i; goto SORT_ERR_EXIT; } } @@ -468,30 +452,25 @@ bool RowBlockSorter::sort(RowBlock** row_block) { return false; } -RowBlockAllocator::RowBlockAllocator(const vector& tablet_schema, +RowBlockAllocator::RowBlockAllocator(const TabletSchema& tablet_schema, size_t memory_limitation) : _tablet_schema(tablet_schema), _memory_allocated(0), _memory_limitation(memory_limitation) { _row_len = 0; - _row_len += tablet_schema.size(); - for (vector::const_iterator it = tablet_schema.begin(); - it != tablet_schema.end(); ++it) { - _row_len += (*it).length; - } + _row_len = tablet_schema.row_size(); VLOG(3) << "RowBlockAllocator(). row_len=" << _row_len; } RowBlockAllocator::~RowBlockAllocator() { if (_memory_allocated != 0) { - OLAP_LOG_WARNING("memory lost in RowBlockAllocator. [memory_size=%ld]", _memory_allocated); + LOG(WARNING) << "memory lost in RowBlockAllocator. memory_size=" << _memory_allocated; } } OLAPStatus RowBlockAllocator::allocate(RowBlock** row_block, size_t num_rows, - DataFileType data_file_type, bool null_supported) { size_t row_block_size = _row_len * num_rows; @@ -499,25 +478,24 @@ OLAPStatus RowBlockAllocator::allocate(RowBlock** row_block, && _memory_allocated + row_block_size > _memory_limitation) { VLOG(3) << "RowBlockAllocator::alocate() memory exceeded. " << "m_memory_allocated=" << _memory_allocated; - *row_block = NULL; + *row_block = nullptr; return OLAP_SUCCESS; } // TODO(lijiao) : 为什么舍弃原有的m_row_block_buffer - *row_block = new(nothrow) RowBlock(_tablet_schema); + *row_block = new(nothrow) RowBlock(&_tablet_schema); - if (*row_block == NULL) { - OLAP_LOG_WARNING("failed to malloc RowBlock. [size=%ld]", sizeof(RowBlock)); + if (*row_block == nullptr) { + LOG(WARNING) << "failed to malloc RowBlock. size=" << sizeof(RowBlock); return OLAP_ERR_MALLOC_ERROR; } RowBlockInfo row_block_info(0U, num_rows); - row_block_info.data_file_type = data_file_type; row_block_info.null_supported = null_supported; OLAPStatus res = OLAP_SUCCESS; if ((res = (*row_block)->init(row_block_info)) != OLAP_SUCCESS) { - OLAP_LOG_WARNING("failed to init row block."); + LOG(WARNING) << "failed to init row block."; SAFE_DELETE(*row_block); return res; } @@ -531,8 +509,8 @@ OLAPStatus RowBlockAllocator::allocate(RowBlock** row_block, } void RowBlockAllocator::release(RowBlock* row_block) { - if (row_block == NULL) { - OLAP_LOG_WARNING("null row block released."); + if (row_block == nullptr) { + LOG(INFO) << "null row block released."; return; } @@ -545,18 +523,18 @@ void RowBlockAllocator::release(RowBlock* row_block) { delete row_block; } -RowBlockMerger::RowBlockMerger(OLAPTablePtr olap_table) : _olap_table(olap_table) {} +RowBlockMerger::RowBlockMerger(TabletSharedPtr tablet) : _tablet(tablet) {} RowBlockMerger::~RowBlockMerger() {} bool RowBlockMerger::merge( const vector& row_block_arr, - ColumnDataWriter* writer, + RowsetWriterSharedPtr rowset_writer, uint64_t* merged_rows) { uint64_t tmp_merged_rows = 0; RowCursor row_cursor; - if (row_cursor.init(_olap_table->tablet_schema()) != OLAP_SUCCESS) { - OLAP_LOG_WARNING("fail to init row cursor."); + if (row_cursor.init(_tablet->tablet_schema()) != OLAP_SUCCESS) { + LOG(WARNING) << "fail to init row cursor."; goto MERGE_ERR; } @@ -568,11 +546,7 @@ bool RowBlockMerger::merge( // That's not very memory-efficient! while (_heap.size() > 0) { - if (writer->attached_by(&row_cursor) != OLAP_SUCCESS) { - OLAP_LOG_WARNING("writer error."); - goto MERGE_ERR; - } - row_cursor.allocate_memory_for_string_type(_olap_table->tablet_schema(), writer->mem_pool()); + row_cursor.allocate_memory_for_string_type(_tablet->tablet_schema(), rowset_writer->mem_pool()); row_cursor.agg_init(*(_heap.top().row_cursor)); @@ -580,8 +554,8 @@ bool RowBlockMerger::merge( goto MERGE_ERR; } - if (KeysType::DUP_KEYS == _olap_table->keys_type()) { - writer->next(row_cursor); + if (KeysType::DUP_KEYS == _tablet->keys_type()) { + rowset_writer->add_row(&row_cursor); continue; } @@ -593,10 +567,10 @@ bool RowBlockMerger::merge( } } row_cursor.finalize_one_merge(); - writer->next(row_cursor); + rowset_writer->add_row(&row_cursor); } - if (writer->finalize() != OLAP_SUCCESS) { - OLAP_LOG_WARNING("failed to finalizing writer."); + if (rowset_writer->flush() != OLAP_SUCCESS) { + LOG(WARNING) << "failed to finalizing writer."; goto MERGE_ERR; } @@ -621,13 +595,13 @@ bool RowBlockMerger::_make_heap(const vector& row_block_arr) { element.row_block_index = 0; element.row_cursor = new(nothrow) RowCursor(); - if (element.row_cursor == NULL) { + if (element.row_cursor == nullptr) { LOG(FATAL) << "failed to malloc RowCursor. size=" << sizeof(RowCursor); return false; } if (element.row_cursor->init(element.row_block->tablet_schema()) != OLAP_SUCCESS) { - OLAP_LOG_WARNING("failed to init row cursor."); + LOG(WARNING) << "failed to init row cursor."; SAFE_DELETE(element.row_cursor); return false; } @@ -655,21 +629,31 @@ bool RowBlockMerger::_pop_heap() { return true; } -LinkedSchemaChange::LinkedSchemaChange( - OLAPTablePtr base_olap_table, OLAPTablePtr new_olap_table, - const RowBlockChanger& row_block_changer) : - _base_olap_table(base_olap_table), - _new_olap_table(new_olap_table), - _row_block_changer(row_block_changer) {} +bool LinkedSchemaChange::process( + RowsetReaderSharedPtr rowset_reader, + RowsetWriterSharedPtr new_rowset_writer, + TabletSharedPtr new_tablet, + TabletSharedPtr base_tablet) { + OLAPStatus status = new_rowset_writer->add_rowset_for_linked_schema_change( + rowset_reader->rowset(), _row_block_changer.get_schema_mapping()); + if (status != OLAP_SUCCESS) { + LOG(WARNING) << "fail to convert rowset." + << ", new_tablet=" << new_tablet->full_name() + << ", base_tablet=" << base_tablet->full_name() + << ", version=" << new_rowset_writer->version().first + << "-" << new_rowset_writer->version().second; + return false; + } + + return true; +} SchemaChangeDirectly::SchemaChangeDirectly( - OLAPTablePtr olap_table, const RowBlockChanger& row_block_changer) : - _olap_table(olap_table), _row_block_changer(row_block_changer), - _row_block_allocator(NULL), - _src_cursor(NULL), - _dst_cursor(NULL) {} + _row_block_allocator(nullptr), + _src_cursor(nullptr), + _dst_cursor(nullptr) { } SchemaChangeDirectly::~SchemaChangeDirectly() { VLOG(3) << "~SchemaChangeDirectly()"; @@ -678,163 +662,103 @@ SchemaChangeDirectly::~SchemaChangeDirectly() { SAFE_DELETE(_dst_cursor); } -bool SchemaChangeDirectly::_write_row_block(ColumnDataWriter* writer, RowBlock* row_block) { +bool SchemaChangeDirectly::_write_row_block(RowsetWriterSharedPtr rowset_writer, RowBlock* row_block) { for (uint32_t i = 0; i < row_block->row_block_info().row_num; i++) { - if (OLAP_SUCCESS != writer->attached_by(_dst_cursor)) { - OLAP_LOG_WARNING("fail to attach writer"); - return false; - } - row_block->get_row(i, _src_cursor); - - _dst_cursor->copy(*_src_cursor, writer->mem_pool()); - writer->next(*_dst_cursor); - } - - return true; -} - -bool LinkedSchemaChange::process(ColumnData* olap_data, SegmentGroup* new_segment_group) { - for (size_t i = 0; i < olap_data->segment_group()->num_segments(); ++i) { - string index_path = new_segment_group->construct_index_file_path(new_segment_group->segment_group_id(), i); - string base_table_index_path = olap_data->segment_group()->construct_index_file_path(olap_data->segment_group()->segment_group_id(), i); - if (link(base_table_index_path.c_str(), index_path.c_str()) == 0) { - VLOG(3) << "success to create hard link. from_path=" << base_table_index_path - << ", to_path=" << index_path; - } else { - LOG(WARNING) << "fail to create hard link. [from_path=" << base_table_index_path.c_str() - << " to_path=" << index_path.c_str() - << " errno=" << Errno::no() << " errno_str=" << Errno::str() << "]"; - return false; - } - - string data_path = new_segment_group->construct_data_file_path(new_segment_group->segment_group_id(), i); - string base_table_data_path = olap_data->segment_group()->construct_data_file_path(olap_data->segment_group()->segment_group_id(), i); - if (link(base_table_data_path.c_str(), data_path.c_str()) == 0) { - VLOG(3) << "success to create hard link. from_path=" << base_table_data_path - << ", to_path=" << data_path; - } else { - LOG(WARNING) << "fail to create hard link. [from_path=" << base_table_data_path.c_str() - << " to_path=" << data_path.c_str() - << " errno=" << Errno::no() << " errno_str=" << Errno::str() << "]"; + _dst_cursor->copy(*_src_cursor, rowset_writer->mem_pool()); + if (OLAP_SUCCESS != rowset_writer->add_row(_dst_cursor)) { + LOG(WARNING) << "fail to attach writer"; return false; } } - new_segment_group->set_empty(olap_data->empty()); - new_segment_group->set_num_segments(olap_data->segment_group()->num_segments()); - new_segment_group->add_column_statistics_for_linked_schema_change(olap_data->segment_group()->get_column_statistics(), - _row_block_changer.get__schema_mapping() ); - - if (OLAP_SUCCESS != new_segment_group->load()) { - OLAP_LOG_WARNING("fail to reload index. [table='%s' version='%d-%d']", - _new_olap_table->full_name().c_str(), - new_segment_group->version().first, - new_segment_group->version().second); - return false; - } - return true; } -bool SchemaChangeDirectly::process(ColumnData* olap_data, SegmentGroup* new_segment_group) { - DataFileType data_file_type = new_segment_group->table()->data_file_type(); - bool null_supported = true; - - if (NULL == _row_block_allocator) { - if (NULL == (_row_block_allocator = - new(nothrow) RowBlockAllocator(_olap_table->tablet_schema(), 0))) { +bool SchemaChangeDirectly::process(RowsetReaderSharedPtr rowset_reader, RowsetWriterSharedPtr rowset_writer, + TabletSharedPtr new_tablet, + TabletSharedPtr base_tablet) { + if (_row_block_allocator == nullptr) { + _row_block_allocator = new RowBlockAllocator(new_tablet->tablet_schema(), 0); + if (_row_block_allocator == nullptr) { LOG(FATAL) << "failed to malloc RowBlockAllocator. size=" << sizeof(RowBlockAllocator); return false; } } - if (NULL == _src_cursor) { + if (nullptr == _src_cursor) { _src_cursor = new(nothrow) RowCursor(); - if (NULL == _src_cursor) { - OLAP_LOG_WARNING("fail to allocate row cursor."); + if (nullptr == _src_cursor) { + LOG(WARNING) << "fail to allocate row cursor."; return false; } - if (OLAP_SUCCESS != _src_cursor->init(_olap_table->tablet_schema())) { - OLAP_LOG_WARNING("fail to init row cursor."); + if (OLAP_SUCCESS != _src_cursor->init(new_tablet->tablet_schema())) { + LOG(WARNING) << "fail to init row cursor."; return false; } } - if (NULL == _dst_cursor) { + if (nullptr == _dst_cursor) { _dst_cursor = new(nothrow) RowCursor(); - if (NULL == _dst_cursor) { - OLAP_LOG_WARNING("fail to allocate row cursor."); + if (nullptr == _dst_cursor) { + LOG(WARNING) << "fail to allocate row cursor."; return false; } - if (OLAP_SUCCESS != _dst_cursor->init(_olap_table->tablet_schema())) { - OLAP_LOG_WARNING("fail to init row cursor."); + if (OLAP_SUCCESS != _dst_cursor->init(new_tablet->tablet_schema())) { + LOG(WARNING) << "fail to init row cursor."; return false; } } - RowBlock* ref_row_block = NULL; bool need_create_empty_version = false; OLAPStatus res = OLAP_SUCCESS; - if (!olap_data->empty()) { - res = olap_data->get_first_row_block(&ref_row_block); - if (res != OLAP_SUCCESS) { - if (olap_data->eof()) { - need_create_empty_version = true; - } else { - LOG(WARNING) << "failed to get first row block."; - return false; - } + if (!rowset_reader->rowset()->empty()) { + int num_rows = rowset_reader->rowset()->num_rows(); + if (num_rows == 0) { + // actually, the rowset is empty + need_create_empty_version = true; } } else { need_create_empty_version = true; } if (need_create_empty_version) { - res = create_init_version(new_segment_group->table()->tablet_id(), - new_segment_group->table()->schema_hash(), - new_segment_group->version(), - new_segment_group->version_hash(), - new_segment_group); + res = rowset_writer->flush(); if (res != OLAP_SUCCESS) { LOG(WARNING) << "create empty version for schema change failed." - << "version=" << new_segment_group->version().first << "-" << new_segment_group->version().second; + << "version=" << rowset_writer->version().first << "-" << rowset_writer->version().second; return false; } return true; } - VLOG(3) << "init writer. table=" << _olap_table->full_name() - << "block_row_size=" << _olap_table->num_rows_per_row_block(); + VLOG(3) << "init writer. new_tablet=" << new_tablet->full_name() + << "block_row_number=" << new_tablet->num_rows_per_row_block(); bool result = true; - RowBlock* new_row_block = NULL; - ColumnDataWriter* writer = ColumnDataWriter::create(_olap_table, new_segment_group, false); - if (NULL == writer) { - OLAP_LOG_WARNING("failed to create writer."); - result = false; - goto DIRECTLY_PROCESS_ERR; - } + RowBlock* new_row_block = nullptr; - // Reset filted_rows and merged_rows statistic + // Reset filtered_rows and merged_rows statistic reset_merged_rows(); - reset_filted_rows(); + reset_filtered_rows(); - while (NULL != ref_row_block) { + RowBlock* ref_row_block = nullptr; + rowset_reader->next_block(&ref_row_block); + while (ref_row_block != nullptr && ref_row_block->has_remaining()) { // 注意这里强制分配和旧块等大的块(小了可能会存不下) - if (NULL == new_row_block + if (new_row_block == nullptr || new_row_block->capacity() < ref_row_block->row_block_info().row_num) { - if (NULL != new_row_block) { + if (new_row_block != nullptr) { _row_block_allocator->release(new_row_block); - new_row_block = NULL; + new_row_block = nullptr; } if (OLAP_SUCCESS != _row_block_allocator->allocate( &new_row_block, ref_row_block->row_block_info().row_num, - data_file_type, null_supported)) { - OLAP_LOG_WARNING("failed to allocate RowBlock."); + true)) { + LOG(WARNING) << "failed to allocate RowBlock."; result = false; goto DIRECTLY_PROCESS_ERR; } @@ -843,75 +767,69 @@ bool SchemaChangeDirectly::process(ColumnData* olap_data, SegmentGroup* new_segm } // 将ref改为new。这一步按道理来说确实需要等大的块,但理论上和writer无关。 - uint64_t filted_rows = 0; - if (!_row_block_changer.change_row_block(olap_data->data_file_type(), - *ref_row_block, - olap_data->version().second, + uint64_t filtered_rows = 0; + if (!_row_block_changer.change_row_block(ref_row_block, + rowset_reader->version().second, new_row_block, - &filted_rows)) { - OLAP_LOG_WARNING("failed to change data in row block."); + &filtered_rows)) { + LOG(WARNING) << "failed to change data in row block."; result = false; goto DIRECTLY_PROCESS_ERR; } - add_filted_rows(filted_rows); + add_filtered_rows(filtered_rows); - if (!_write_row_block(writer, new_row_block)) { - OLAP_LOG_WARNING("failed to write row block."); + if (!_write_row_block(rowset_writer, new_row_block)) { + LOG(WARNING) << "failed to write row block."; result = false; goto DIRECTLY_PROCESS_ERR; } - olap_data->get_next_row_block(&ref_row_block); - - } - - if (OLAP_SUCCESS != writer->finalize()) { - result = false; - goto DIRECTLY_PROCESS_ERR; + ref_row_block->clear(); + rowset_reader->next_block(&ref_row_block); } - if (OLAP_SUCCESS != new_segment_group->load()) { - OLAP_LOG_WARNING("fail to reload index. [table='%s' version='%d-%d']", - _olap_table->full_name().c_str(), - new_segment_group->version().first, - new_segment_group->version().second); + if (OLAP_SUCCESS != rowset_writer->flush()) { result = false; goto DIRECTLY_PROCESS_ERR; } - add_filted_rows(olap_data->get_filted_rows()); + add_filtered_rows(rowset_reader->filtered_rows()); // Check row num changes if (config::row_nums_check) { - if (olap_data->segment_group()->num_rows() - != new_segment_group->num_rows() + merged_rows() + filted_rows()) { - LOG(FATAL) << "fail to check row num! " - << "source_rows=" << olap_data->segment_group()->num_rows() + if (rowset_reader->rowset()->num_rows() + != rowset_writer->num_rows() + merged_rows() + filtered_rows()) { + LOG(WARNING) << "fail to check row num! " + << "source_rows=" << rowset_reader->rowset()->num_rows() << ", merged_rows=" << merged_rows() - << ", filted_rows=" << filted_rows() - << ", new_index_rows=" << new_segment_group->num_rows(); + << ", filtered_rows=" << filtered_rows() + << ", new_index_rows=" << rowset_writer->num_rows(); result = false; } + LOG(INFO) << "all row nums. source_rows=" << rowset_reader->rowset()->num_rows() + << ", merged_rows=" << merged_rows() + << ", filtered_rows=" << filtered_rows() + << ", new_index_rows=" << rowset_writer->num_rows(); } else { - LOG(INFO) << "all row nums. source_rows=" << olap_data->segment_group()->num_rows() + LOG(INFO) << "all row nums. source_rows=" << rowset_reader->rowset()->num_rows() << ", merged_rows=" << merged_rows() - << ", filted_rows=" << filted_rows() - << ", new_index_rows=" << new_segment_group->num_rows(); + << ", filtered_rows=" << filtered_rows() + << ", new_index_rows=" << rowset_writer->num_rows(); } DIRECTLY_PROCESS_ERR: - SAFE_DELETE(writer); - _row_block_allocator->release(new_row_block); + if (new_row_block) { + _row_block_allocator->release(new_row_block); + new_row_block = nullptr; + } return result; } -SchemaChangeWithSorting::SchemaChangeWithSorting(OLAPTablePtr olap_table, - const RowBlockChanger& row_block_changer, +SchemaChangeWithSorting::SchemaChangeWithSorting(const RowBlockChanger& row_block_changer, size_t memory_limitation) : - _olap_table(olap_table), _row_block_changer(row_block_changer), _memory_limitation(memory_limitation), - _row_block_allocator(NULL) { + _row_block_allocator(nullptr) { // 每次SchemaChange做外排的时候,会写一些临时版本(比如999,1000,1001),为避免Cache冲突,临时 // 版本进行2个处理: // 1. 随机值作为VersionHash @@ -926,44 +844,37 @@ SchemaChangeWithSorting::~SchemaChangeWithSorting() { SAFE_DELETE(_row_block_allocator); } -bool SchemaChangeWithSorting::process(ColumnData* olap_data, SegmentGroup* new_segment_group) { - if (NULL == _row_block_allocator) { - if (NULL == (_row_block_allocator = new(nothrow) RowBlockAllocator( - _olap_table->tablet_schema(), _memory_limitation))) { +bool SchemaChangeWithSorting::process( + RowsetReaderSharedPtr rowset_reader, + RowsetWriterSharedPtr new_rowset_writer, + TabletSharedPtr new_tablet, + TabletSharedPtr base_tablet) { + if (_row_block_allocator == nullptr) { + _row_block_allocator = new (nothrow) RowBlockAllocator(new_tablet->tablet_schema(), _memory_limitation); + if (_row_block_allocator == nullptr) { LOG(FATAL) << "failed to malloc RowBlockAllocator. size=" << sizeof(RowBlockAllocator); return false; } } - DataFileType data_file_type = new_segment_group->table()->data_file_type(); - bool null_supported = true; - - RowBlock* ref_row_block = NULL; bool need_create_empty_version = false; OLAPStatus res = OLAP_SUCCESS; - if (!olap_data->empty()) { - res = olap_data->get_first_row_block(&ref_row_block); - if (res != OLAP_SUCCESS) { - if (olap_data->eof()) { - need_create_empty_version = true; - } else { - LOG(WARNING) << "failed to get first row block."; - return false; - } + RowsetSharedPtr rowset = rowset_reader->rowset(); + if (!rowset->empty()) { + int num_rows = rowset_reader->rowset()->num_rows(); + if (num_rows == 0) { + need_create_empty_version = true; } } else { need_create_empty_version = true; } if (need_create_empty_version) { - res = create_init_version(new_segment_group->table()->tablet_id(), - new_segment_group->table()->schema_hash(), - new_segment_group->version(), - new_segment_group->version_hash(), - new_segment_group); + res = new_rowset_writer->flush(); if (res != OLAP_SUCCESS) { LOG(WARNING) << "create empty version for schema change failed." - << "version=" << new_segment_group->version().first << "-" << new_segment_group->version().second; + << " version=" << new_rowset_writer->version().first + << "-" << new_rowset_writer->version().second; return false; } return true; @@ -974,48 +885,50 @@ bool SchemaChangeWithSorting::process(ColumnData* olap_data, SegmentGroup* new_s RowBlockSorter row_block_sorter(_row_block_allocator); // for internal sorting - RowBlock* new_row_block = NULL; + RowBlock* new_row_block = nullptr; vector row_block_arr; // for external sorting - vector olap_segment_groups; + // src_rowsets to store the rowset generated by internal sorting + vector src_rowsets; _temp_delta_versions.first = _temp_delta_versions.second; - // Reset filted_rows and merged_rows statistic + // Reset filtered_rows and merged_rows statistic reset_merged_rows(); - reset_filted_rows(); + reset_filtered_rows(); - while (NULL != ref_row_block) { + RowBlock* ref_row_block = nullptr; + rowset_reader->next_block(&ref_row_block); + while (ref_row_block != nullptr && ref_row_block->has_remaining()) { if (OLAP_SUCCESS != _row_block_allocator->allocate( - &new_row_block, ref_row_block->row_block_info().row_num, - data_file_type, null_supported)) { - OLAP_LOG_WARNING("failed to allocate RowBlock."); + &new_row_block, ref_row_block->row_block_info().row_num, true)) { + LOG(WARNING) << "failed to allocate RowBlock."; result = false; goto SORTING_PROCESS_ERR; } - if (NULL == new_row_block) { + if (new_row_block == nullptr) { if (row_block_arr.size() < 1) { - OLAP_LOG_WARNING("Memory limitation is too small for Schema Change. " - "[memory_limitation=%ld]", - _memory_limitation); + LOG(WARNING) << "Memory limitation is too small for Schema Change." + << "memory_limitation=" << _memory_limitation; return false; } // enter here while memory limitation is reached. - SegmentGroup* segment_group = NULL; - + RowsetSharedPtr rowset; if (!_internal_sorting(row_block_arr, Version(_temp_delta_versions.second, _temp_delta_versions.second), - &segment_group)) { - OLAP_LOG_WARNING("failed to sorting internally."); + rowset_reader->version_hash(), + new_tablet, + &rowset)) { + LOG(WARNING) << "failed to sorting internally."; result = false; goto SORTING_PROCESS_ERR; } - olap_segment_groups.push_back(segment_group); + src_rowsets.push_back(rowset); for (vector::iterator it = row_block_arr.begin(); it != row_block_arr.end(); ++it) { @@ -1029,22 +942,19 @@ bool SchemaChangeWithSorting::process(ColumnData* olap_data, SegmentGroup* new_s continue; } - uint64_t filted_rows = 0; - if (!_row_block_changer.change_row_block( - olap_data->data_file_type(), - *ref_row_block, - olap_data->version().second, - new_row_block, - &filted_rows)) { - OLAP_LOG_WARNING("failed to change data in row block."); + uint64_t filtered_rows = 0; + if (!_row_block_changer.change_row_block(ref_row_block, + rowset_reader->version().second, + new_row_block, &filtered_rows)) { + LOG(WARNING) << "failed to change data in row block."; result = false; goto SORTING_PROCESS_ERR; } - add_filted_rows(filted_rows); + add_filtered_rows(filtered_rows); if (new_row_block->row_block_info().row_num > 0) { if (!row_block_sorter.sort(&new_row_block)) { - OLAP_LOG_WARNING("failed to sort row block."); + LOG(WARNING) << "failed to sort row block."; result = false; OLAP_GOTO(SORTING_PROCESS_ERR); } @@ -1052,25 +962,28 @@ bool SchemaChangeWithSorting::process(ColumnData* olap_data, SegmentGroup* new_s row_block_arr.push_back(new_row_block); } else { _row_block_allocator->release(new_row_block); - new_row_block = NULL; + new_row_block = nullptr; } - olap_data->get_next_row_block(&ref_row_block); + ref_row_block->clear(); + rowset_reader->next_block(&ref_row_block); } if (!row_block_arr.empty()) { // enter here while memory limitation is reached. - SegmentGroup* segment_group = NULL; + RowsetSharedPtr rowset = nullptr; if (!_internal_sorting(row_block_arr, Version(_temp_delta_versions.second, _temp_delta_versions.second), - &segment_group)) { - OLAP_LOG_WARNING("failed to sorting internally."); + rowset_reader->version_hash(), + new_tablet, + &rowset)) { + LOG(WARNING) << "failed to sorting internally."; result = false; goto SORTING_PROCESS_ERR; } - olap_segment_groups.push_back(segment_group); + src_rowsets.push_back(rowset); for (vector::iterator it = row_block_arr.begin(); it != row_block_arr.end(); ++it) { @@ -1084,36 +997,42 @@ bool SchemaChangeWithSorting::process(ColumnData* olap_data, SegmentGroup* new_s } // TODO(zyh): 如果_temp_delta_versions只有一个,不需要再外排 - if (!_external_sorting(olap_segment_groups, new_segment_group)) { - OLAP_LOG_WARNING("failed to sorting externally."); + if (!_external_sorting(src_rowsets, new_rowset_writer, new_tablet)) { + LOG(WARNING) << "failed to sorting externally."; result = false; goto SORTING_PROCESS_ERR; } - add_filted_rows(olap_data->get_filted_rows()); + add_filtered_rows(rowset_reader->filtered_rows()); // Check row num changes if (config::row_nums_check) { - if (olap_data->segment_group()->num_rows() - != new_segment_group->num_rows() + merged_rows() + filted_rows()) { - OLAP_LOG_WARNING("fail to check row num! " - "[source_rows=%lu merged_rows=%lu filted_rows=%lu new_index_rows=%lu]", - olap_data->segment_group()->num_rows(), - merged_rows(), filted_rows(), new_segment_group->num_rows()); + if (rowset_reader->rowset()->num_rows() + != new_rowset_writer->num_rows() + merged_rows() + filtered_rows()) { + LOG(WARNING) << "fail to check row num!" + << " source_rows=" << rowset_reader->rowset()->num_rows() + << ", merged_rows=" << merged_rows() + << ", filtered_rows=" << filtered_rows() + << ", new_index_rows=" << new_rowset_writer->num_rows(); result = false; } + LOG(INFO) << "all row nums. source_rows=" << rowset_reader->rowset()->num_rows() + << ", merged_rows=" << merged_rows() + << ", filtered_rows=" << filtered_rows() + << ", new_index_rows=" << new_rowset_writer->num_rows(); } else { - LOG(INFO) << "all row nums. source_rows=" << olap_data->segment_group()->num_rows() + LOG(INFO) << "all row nums. source_rows=" << rowset_reader->rowset()->num_rows() << ", merged_rows=" << merged_rows() - << ", filted_rows=" << filted_rows() - << ", new_index_rows=" << new_segment_group->num_rows(); + << ", filtered_rows=" << filtered_rows() + << ", new_index_rows=" << new_rowset_writer->num_rows(); } SORTING_PROCESS_ERR: - for (vector::iterator it = olap_segment_groups.begin(); - it != olap_segment_groups.end(); ++it) { - (*it)->delete_all_files(); - SAFE_DELETE(*it); + + // remove the intermediate rowsets generated by internal sorting + for (vector::iterator it = src_rowsets.begin(); + it != src_rowsets.end(); ++it) { + (*it)->remove(); } for (vector::iterator it = row_block_arr.begin(); @@ -1126,1141 +1045,759 @@ bool SchemaChangeWithSorting::process(ColumnData* olap_data, SegmentGroup* new_s } bool SchemaChangeWithSorting::_internal_sorting(const vector& row_block_arr, - const Version& temp_delta_versions, - SegmentGroup** temp_segment_group) { - ColumnDataWriter* writer = NULL; + const Version& version, + VersionHash version_hash, + TabletSharedPtr new_tablet, + RowsetSharedPtr* rowset) { uint64_t merged_rows = 0; - RowBlockMerger merger(_olap_table); + RowBlockMerger merger(new_tablet); - (*temp_segment_group) = new(nothrow) SegmentGroup(_olap_table.get(), - temp_delta_versions, - rand(), - false, - 0, 0); - if (NULL == (*temp_segment_group)) { - OLAP_LOG_WARNING("failed to malloc SegmentGroup. [size=%ld]", sizeof(SegmentGroup)); - goto INTERNAL_SORTING_ERR; + RowsetWriterSharedPtr rowset_writer(new AlphaRowsetWriter()); + if (rowset_writer == nullptr) { + LOG(WARNING) << "new rowset builder failed"; + return false; } - - VLOG(3) << "init writer. tablet=" << _olap_table->full_name() - << ", block_row_size=" << _olap_table->num_rows_per_row_block(); - writer = ColumnDataWriter::create(_olap_table, *temp_segment_group, false); - if (NULL == writer) { - OLAP_LOG_WARNING("failed to create writer."); - goto INTERNAL_SORTING_ERR; + RowsetId rowset_id = 0; + OLAPStatus status = new_tablet->next_rowset_id(&rowset_id); + if (status != OLAP_SUCCESS) { + LOG(WARNING) << "get next rowset id failed"; + return false; } - - if (!merger.merge(row_block_arr, writer, &merged_rows)) { - OLAP_LOG_WARNING("failed to merge row blocks."); - goto INTERNAL_SORTING_ERR; + RowsetWriterContext context; + context.rowset_id = rowset_id; + context.tablet_uid = new_tablet->tablet_uid(); + context.tablet_id = new_tablet->tablet_id(); + context.partition_id = new_tablet->partition_id(); + context.tablet_schema_hash = new_tablet->schema_hash(); + context.rowset_type = ALPHA_ROWSET; + context.rowset_path_prefix = new_tablet->tablet_path(); + context.tablet_schema = &(new_tablet->tablet_schema()); + context.rowset_state = VISIBLE; + context.data_dir = new_tablet->data_dir(); + context.version = version; + context.version_hash = version_hash; + VLOG(3) << "init rowset builder. tablet=" << new_tablet->full_name() + << ", block_row_size=" << new_tablet->num_rows_per_row_block(); + rowset_writer->init(context); + if (!merger.merge(row_block_arr, rowset_writer, &merged_rows)) { + LOG(WARNING) << "failed to merge row blocks."; + new_tablet->data_dir()->remove_pending_ids(ROWSET_ID_PREFIX + std::to_string(rowset_writer->rowset_id())); + return false; } + new_tablet->data_dir()->remove_pending_ids(ROWSET_ID_PREFIX + std::to_string(rowset_writer->rowset_id())); add_merged_rows(merged_rows); - - if (OLAP_SUCCESS != (*temp_segment_group)->load()) { - OLAP_LOG_WARNING("failed to reload olap index."); - goto INTERNAL_SORTING_ERR; - } - - SAFE_DELETE(writer); + *rowset = rowset_writer->build(); return true; - -INTERNAL_SORTING_ERR: - SAFE_DELETE(writer); - - (*temp_segment_group)->delete_all_files(); - SAFE_DELETE(*temp_segment_group); - return false; } bool SchemaChangeWithSorting::_external_sorting( - vector& src_segment_groups, - SegmentGroup* dest_segment_group) { - Merger merger(_olap_table, dest_segment_group, READER_ALTER_TABLE); + vector& src_rowsets, + RowsetWriterSharedPtr rowset_writer, + TabletSharedPtr new_tablet) { + Merger merger(new_tablet, rowset_writer, READER_ALTER_TABLE); uint64_t merged_rows = 0; - uint64_t filted_rows = 0; - vector olap_data_arr; - - for (vector::iterator it = src_segment_groups.begin(); - it != src_segment_groups.end(); ++it) { - ColumnData* olap_data = ColumnData::create(*it); - if (NULL == olap_data) { - OLAP_LOG_WARNING("fail to create ColumnData."); - goto EXTERNAL_SORTING_ERR; - } - - olap_data_arr.push_back(olap_data); - - if (OLAP_SUCCESS != olap_data->init()) { - OLAP_LOG_WARNING("fail to initial olap data. [version='%d-%d' table='%s']", - (*it)->version().first, - (*it)->version().second, - (*it)->table()->full_name().c_str()); - goto EXTERNAL_SORTING_ERR; + uint64_t filtered_rows = 0; + vector rs_readers; + for (vector::iterator it = src_rowsets.begin(); + it != src_rowsets.end(); ++it) { + RowsetReaderSharedPtr rs_reader = (*it)->create_reader(); + if (rs_reader == nullptr) { + LOG(WARNING) << "fail to create rowset reader."; + return false; } + rs_readers.push_back(rs_reader); } - if (OLAP_SUCCESS != merger.merge(olap_data_arr, &merged_rows, &filted_rows)) { - OLAP_LOG_WARNING("fail to merge deltas. [table='%s' version='%d-%d']", - _olap_table->full_name().c_str(), - dest_segment_group->version().first, - dest_segment_group->version().second); - goto EXTERNAL_SORTING_ERR; + if (OLAP_SUCCESS != merger.merge(rs_readers, &merged_rows, &filtered_rows)) { + LOG(WARNING) << "fail to merge rowsets. tablet=" << new_tablet->full_name() + << ", version=" << rowset_writer->version().first + << "-" << rowset_writer->version().second; + return false; } add_merged_rows(merged_rows); - add_filted_rows(filted_rows); - - if (OLAP_SUCCESS != dest_segment_group->load()) { - OLAP_LOG_WARNING("fail to reload index. [table='%s' version='%d-%d']", - _olap_table->full_name().c_str(), - dest_segment_group->version().first, - dest_segment_group->version().second); - goto EXTERNAL_SORTING_ERR; - } - - for (vector::iterator it = olap_data_arr.begin(); - it != olap_data_arr.end(); ++it) { - SAFE_DELETE(*it); - } + add_filtered_rows(filtered_rows); return true; - -EXTERNAL_SORTING_ERR: - for (vector::iterator it = olap_data_arr.begin(); - it != olap_data_arr.end(); ++it) { - SAFE_DELETE(*it); - } - - dest_segment_group->delete_all_files(); - return false; } -OLAPStatus SchemaChangeHandler::clear_schema_change_single_info( - TTabletId tablet_id, - SchemaHash schema_hash, - AlterTabletType* alter_table_type, - bool only_one, - bool check_only) { - OLAPTablePtr olap_table = OLAPEngine::get_instance()->get_table(tablet_id, schema_hash); - return clear_schema_change_single_info(olap_table, alter_table_type, only_one, check_only); -} - -OLAPStatus SchemaChangeHandler::clear_schema_change_single_info( - OLAPTablePtr olap_table, - AlterTabletType* type, - bool only_one, - bool check_only) { - OLAPStatus res = OLAP_SUCCESS; - - if (NULL == olap_table.get()) { - return res; - } - - vector versions_to_be_changed; - if (olap_table->get_schema_change_request(NULL, - NULL, - &versions_to_be_changed, - NULL)) { - if (versions_to_be_changed.size() != 0) { - OLAP_LOG_WARNING("schema change is not allowed now, " - "until previous schema change is done. [table='%s']", - olap_table->full_name().c_str()); - return OLAP_ERR_PREVIOUS_SCHEMA_CHANGE_NOT_FINISHED; - } - } - - if (!check_only) { - VLOG(3) << "broke old schema change chain"; - olap_table->clear_schema_change_request(); - } - - return res; -} - -OLAPStatus SchemaChangeHandler::_check_and_clear_schema_change_info( - OLAPTablePtr olap_table, - const TAlterTabletReq& request) { - // check for schema change chain ( A->B) - // broken old relation if a chain was found and there is no version to be changed - // so, there is no relation between A & B any more - // including: alter_table, split_table, rollup_table - OLAPStatus res = OLAP_SUCCESS; - TTabletId tablet_id; - TSchemaHash schema_hash; - vector versions_to_be_changed; - AlterTabletType type; - - // checkes schema change & rollup - olap_table->obtain_header_rdlock(); - bool ret = olap_table->get_schema_change_request( - &tablet_id, &schema_hash, &versions_to_be_changed, &type); - olap_table->release_header_lock(); - if (!ret) { - return res; - } - - if (versions_to_be_changed.size() != 0) { - OLAP_LOG_WARNING("schema change is not allowed now, " - "until previous schema change is done"); - return OLAP_ERR_PREVIOUS_SCHEMA_CHANGE_NOT_FINISHED; - } - - if (tablet_id == request.new_tablet_req.tablet_id - && schema_hash == request.new_tablet_req.tablet_schema.schema_hash) { - LOG(INFO) << "schema change task for specified tablet has already finished. " - << "tablet_id=" << tablet_id << ", schema_hash=" << schema_hash; - return res; - } - - // clear schema change info of current tablet - { - WriteLock wrlock(olap_table->get_header_lock_ptr()); - res = clear_schema_change_single_info( - olap_table->tablet_id(), olap_table->schema_hash(), &type, true, false); - if (res != OLAP_SUCCESS) { - OLAP_LOG_WARNING("fail to clear schema change info. [res=%d full_name='%s']", - res, olap_table->full_name().c_str()); - return res; - } - - res = olap_table->save_header(); - if (res != OLAP_SUCCESS) { - OLAP_LOG_WARNING("fail to save tablet header. [res=%d, full_name='%s']", - res, olap_table->full_name().c_str()); - return res; - } - } - - // clear schema change info of related tablet - OLAPTablePtr tablet = OLAPEngine::get_instance()->get_table( - tablet_id, schema_hash); - if (tablet.get() == NULL) { - OLAP_LOG_WARNING("get null tablet! [tablet_id=%ld schema_hash=%d]", - tablet_id, schema_hash); - return OLAP_ERR_TABLE_NOT_FOUND; - } - - { - WriteLock wrlock(tablet->get_header_lock_ptr()); - res = clear_schema_change_single_info( - tablet_id, schema_hash, &type, true, false); - if (res != OLAP_SUCCESS) { - OLAP_LOG_WARNING("fail to clear schema change info. [res=%d full_name='%s']", - res, tablet->full_name().c_str()); - return res; - } - - res = tablet->save_header(); - if (res != OLAP_SUCCESS) { - OLAP_LOG_WARNING("fail to save tablet header. [res=%d, full_name='%s']", - res, tablet->full_name().c_str()); - return res; - } - } - - return res; -} - -OLAPStatus SchemaChangeHandler::process_alter_table( - AlterTabletType type, - const TAlterTabletReq& request) { +OLAPStatus SchemaChangeHandler::process_alter_tablet(AlterTabletType type, + const TAlterTabletReq& request) { + LOG(INFO) << "begin to validate alter tablet request. base_tablet_id=" << request.base_tablet_id + << ", base_schema_hash" << request.base_schema_hash + << ", new_tablet_id=" << request.new_tablet_req.tablet_id + << ", new_schema_hash=" << request.new_tablet_req.tablet_schema.schema_hash; OLAPStatus res = OLAP_SUCCESS; - LOG(INFO) << "begin to validate alter tablet request."; - // 1. Lock schema_change_lock util schema change info is stored in table header - if (!OLAPEngine::get_instance()->try_schema_change_lock(request.base_tablet_id)) { - OLAP_LOG_WARNING("failed to obtain schema change lock. [res=%d table=%ld]", - res, request.base_tablet_id); + // Lock schema_change_lock util schema change info is stored in tablet header + if (!StorageEngine::instance()->tablet_manager()->try_schema_change_lock(request.base_tablet_id)) { + LOG(WARNING) << "failed to obtain schema change lock. " + << "base_tablet=" << request.base_tablet_id; return OLAP_ERR_TRY_LOCK_FAILED; } - // 2. Get base table - OLAPTablePtr ref_olap_table = OLAPEngine::get_instance()->get_table( + // Get base tablet + TabletSharedPtr base_tablet = StorageEngine::instance()->tablet_manager()->get_tablet( request.base_tablet_id, request.base_schema_hash); - if (ref_olap_table.get() == NULL) { - OLAP_LOG_WARNING("fail to find base table. [base_table=%ld base_schema_hash=%d]", - request.base_tablet_id, request.base_schema_hash); - OLAPEngine::get_instance()->release_schema_change_lock(request.base_tablet_id); + if (base_tablet == nullptr) { + LOG(WARNING) << "fail to find base tablet. base_tablet=" << request.base_tablet_id + << ", base_schema_hash=" << request.base_schema_hash; + StorageEngine::instance()->tablet_manager()->release_schema_change_lock(request.base_tablet_id); return OLAP_ERR_TABLE_NOT_FOUND; } - // 3. Check if history schema change information exist, - // if exist, it will be cleaned only when all delta versions converted - res = _check_and_clear_schema_change_info(ref_olap_table, request); - if (res != OLAP_SUCCESS) { - OLAP_LOG_WARNING("failed to check and clear schema change info. [table='%s']", - ref_olap_table->full_name().c_str()); - OLAPEngine::get_instance()->release_schema_change_lock(request.base_tablet_id); - return res; - } - - // 4. return failed if new table already exist in OLAPEngine. - OLAPTablePtr new_tablet = OLAPEngine::get_instance()->get_table( + // 4. Returning success if new tablet already exist in StorageEngine. + // It means that the current request was already handled. + TabletSharedPtr new_tablet = StorageEngine::instance()->tablet_manager()->get_tablet( request.new_tablet_req.tablet_id, request.new_tablet_req.tablet_schema.schema_hash); - if (new_tablet.get() != NULL) { - res = OLAP_SUCCESS; - } else { - OLAPStatus lock_status = ref_olap_table->try_migration_rdlock(); - if (lock_status != OLAP_SUCCESS) { - res = lock_status; + if (new_tablet != nullptr) { + LOG(INFO) << "find alter new tablet exists " << new_tablet->full_name() + << ", check if it is valid"; + // check if new tablet's alter task is finished + AlterTabletTaskSharedPtr new_tablet_alter_task = new_tablet->alter_task(); + AlterTabletTaskSharedPtr base_alter_task = base_tablet->alter_task(); + if ((new_tablet_alter_task != nullptr && new_tablet_alter_task->alter_state() != ALTER_FINISHED) + || (base_alter_task != nullptr && base_alter_task->alter_state() != ALTER_FINISHED) ) { + LOG(INFO) << "find invalid new tablet " + << ", base_tablet=" << base_tablet->full_name() + << ", new_tablet=" << new_tablet->full_name() + << ", base alter task state is " << (base_alter_task == nullptr ? -1 : base_alter_task->alter_state()) + << ", new alter task state is " << (new_tablet_alter_task == nullptr ? -1 : new_tablet_alter_task->alter_state()) + << " should drop new tablet generated by previous task"; + res = StorageEngine::instance()->tablet_manager()->drop_tablet(request.new_tablet_req.tablet_id, + request.new_tablet_req.tablet_schema.schema_hash); + if (res != OLAP_SUCCESS) { + StorageEngine::instance()->tablet_manager()->release_schema_change_lock(request.base_tablet_id); + LOG(WARNING) << "Alter task has been failed. Should drop invalid tablet. but failed. res=" << res + << ", new_tablet_id=" << request.new_tablet_req.tablet_id + << ", new_schema_hash=" << request.new_tablet_req.tablet_schema.schema_hash; + return res; + } } else { - res = _do_alter_table(type, ref_olap_table, request); - ref_olap_table->release_migration_lock(); + StorageEngine::instance()->tablet_manager()->release_schema_change_lock(request.base_tablet_id); + LOG(INFO) << "find valid new tablet " + << ", base_tablet=" << base_tablet->full_name() + << ", new_tablet=" << new_tablet->full_name() + << ", base alter task state is " << (base_alter_task == nullptr ? -1 : base_alter_task->alter_state()) + << ", new alter task state is " << (new_tablet_alter_task == nullptr ? -1 : new_tablet_alter_task->alter_state()) + << " return success"; + return OLAP_SUCCESS; } } - OLAPEngine::get_instance()->release_schema_change_lock(request.base_tablet_id); - - return res; -} + LOG(INFO) << "finish to validate alter tablet request. base_tablet=" << base_tablet->full_name(); -OLAPStatus SchemaChangeHandler::_do_alter_table( - AlterTabletType type, - OLAPTablePtr ref_olap_table, - const TAlterTabletReq& request) { - OLAPStatus res = OLAP_SUCCESS; - OLAPTablePtr new_olap_table; - string base_root_path = ref_olap_table->storage_root_path_name(); - - LOG(INFO) << "begin to do alter tablet job. new_table_id=" << request.new_tablet_req.tablet_id; - // 1. Create new table and register into OLAPEngine - res = _create_new_olap_table(ref_olap_table, - request.new_tablet_req, - &base_root_path, - &new_olap_table); - if (res != OLAP_SUCCESS) { - OLAP_LOG_WARNING("fail to create new olap table. [table=%ld]", - request.new_tablet_req.tablet_id); - return res; + // 4. Create new tablet and register into StorageEngine + new_tablet = StorageEngine::instance()->create_tablet(type, request.new_tablet_req, true, base_tablet); + if (new_tablet == nullptr) { + LOG(WARNING) << "fail to create new tablet. new_tablet_id=" << request.new_tablet_req.tablet_id + << ", new_tablet_hash=" << request.new_tablet_req.tablet_schema.schema_hash; + StorageEngine::instance()->tablet_manager()->release_schema_change_lock(request.base_tablet_id); + return OLAP_ERR_TABLE_CREATE_META_ERROR; } - // set schema change status temporarily, - // after waiting transactions to finish, will calculate versions again - vector tmp_versions_to_be_changed; - tmp_versions_to_be_changed.push_back(Version(-1, -1)); - ref_olap_table->obtain_push_lock(); - ref_olap_table->obtain_header_wrlock(); - new_olap_table->obtain_header_wrlock(); - res = _save_schema_change_info(type, ref_olap_table, new_olap_table, tmp_versions_to_be_changed); - new_olap_table->release_header_lock(); - ref_olap_table->release_header_lock(); - if (res != OLAP_SUCCESS) { - OLAP_LOG_WARNING("fail to save schema change info before waiting transactions. " - "[base=%s new=%s res=%d]", ref_olap_table->full_name().c_str(), - new_olap_table->full_name().c_str(), res); - ref_olap_table->release_push_lock(); - OLAPEngine::get_instance()->drop_table( - new_olap_table->tablet_id(), new_olap_table->schema_hash()); - return res; + ReadLock base_migration_rlock(base_tablet->get_migration_lock_ptr(), TRY_LOCK); + if (!base_migration_rlock.own_lock()) { + StorageEngine::instance()->tablet_manager()->release_schema_change_lock(request.base_tablet_id); + return OLAP_ERR_RWLOCK_ERROR; + } + ReadLock new_migration_rlock(new_tablet->get_migration_lock_ptr(), TRY_LOCK); + if (!new_migration_rlock.own_lock()) { + StorageEngine::instance()->tablet_manager()->release_schema_change_lock(request.base_tablet_id); + return OLAP_ERR_RWLOCK_ERROR; } + base_tablet->obtain_push_lock(); + base_tablet->obtain_header_wrlock(); + new_tablet->obtain_header_wrlock(); + // store schema change information into tablet header + vector empty_version_list; + res = _add_alter_task(type, base_tablet, new_tablet, empty_version_list); + new_tablet->release_header_lock(); + base_tablet->release_header_lock(); + // get current transactions int64_t partition_id; std::set transaction_ids; - OLAPEngine::get_instance()-> - get_transactions_by_tablet(ref_olap_table, &partition_id, &transaction_ids); - ref_olap_table->release_push_lock(); + StorageEngine::instance()->txn_manager()->get_tablet_related_txns(base_tablet->tablet_id(), + base_tablet->schema_hash(), base_tablet->tablet_uid(), &partition_id, &transaction_ids); + base_tablet->release_push_lock(); // wait transactions to publish version int num = 0; while (!transaction_ids.empty()) { - VLOG(3) << "wait transactions when schema change. tablet=" << ref_olap_table->full_name() + VLOG(3) << "wait transactions when schema change. tablet=" << base_tablet->full_name() << ", transaction_size=" << transaction_ids.size(); num++; if (num % 100 == 0) { for (int64_t transaction_id : transaction_ids) { - LOG(INFO) << "transaction_id is waiting by schema_change: " << transaction_id; + LOG(INFO) << "transaction_id is waiting by schema_change." + << " base_tablet=" << base_tablet->full_name() + << " new_tablet=" << new_tablet->full_name() + << " transactionid=" << transaction_id; } } sleep(1); // erase finished transaction vector finished_transactions; for (int64_t transaction_id : transaction_ids) { - if (!OLAPEngine::get_instance()->has_transaction( + if (!StorageEngine::instance()->txn_manager()->has_txn( partition_id, transaction_id, - ref_olap_table->tablet_id(), ref_olap_table->schema_hash())) { + base_tablet->tablet_id(), base_tablet->schema_hash(), base_tablet->tablet_uid())) { finished_transactions.push_back(transaction_id); } } for (int64_t transaction_id : finished_transactions) { transaction_ids.erase(transaction_id); VLOG(3) << "transaction finished when schema change is waiting. " - << "tablet=" << ref_olap_table->full_name() + << "tablet=" << base_tablet->full_name() << ", transaction_id=" << transaction_id << ", transaction_size=" << transaction_ids.size(); } } - // 2. Get version_to_be_changed and store into table header - ref_olap_table->obtain_push_lock(); - ref_olap_table->obtain_header_wrlock(); - new_olap_table->obtain_header_wrlock(); - - // before calculating version_to_be_changed, - // remove all data from new tablet, prevent to rewrite data(those double pushed when wait) - VLOG(3) << "begin to remove all data from new tablet to prevent rewrite. " - << "new_tablet=" << new_olap_table->full_name(); - // only remove the version <= base_tablet's latest version - const PDelta* lastest_file_version = ref_olap_table->lastest_version(); - if (lastest_file_version != NULL) { - VLOG(3) << "find the latest version of base tablet when remove all data from new. " - << "base_tablet=" << ref_olap_table->full_name() - << ", version=" << lastest_file_version->start_version() - << "-" << lastest_file_version->end_version(); - vector new_tablet_versions; - new_olap_table->list_versions(&new_tablet_versions); - for (vector::const_iterator it = new_tablet_versions.begin(); - it != new_tablet_versions.end(); ++it) { - if (it->second <= lastest_file_version->end_version()) { - std::vector segment_groups; - res = new_olap_table->unregister_data_source(*it, &segment_groups); - if (res != OLAP_SUCCESS) { - break; - } - for (SegmentGroup* segment_group : segment_groups) { - segment_group->delete_all_files(); - delete segment_group; + // 2. Get version_to_be_changed and store into tablet header + base_tablet->obtain_push_lock(); + base_tablet->obtain_header_wrlock(); + new_tablet->obtain_header_wrlock(); + + vector versions_to_be_changed; + vector rs_readers; + // delete handlers for new tablet + DeleteHandler delete_handler; + do { + // before calculating version_to_be_changed, + // remove all data from new tablet, prevent to rewrite data(those double pushed when wait) + LOG(INFO) << "begin to remove all data from new tablet to prevent rewrite." + << " new_tablet=" << new_tablet->full_name(); + // only remove the version <= base_tablet's max version + RowsetSharedPtr max_rowset = base_tablet->rowset_with_max_version(); + if (max_rowset != nullptr) { + vector new_tablet_versions; + new_tablet->list_versions(&new_tablet_versions); + std::vector rowsets; + for (auto& version : new_tablet_versions) { + if (version.second <= max_rowset->end_version()) { + RowsetSharedPtr rowset = new_tablet->get_rowset_by_version(version); + rowsets.push_back(rowset); } - VLOG(3) << "unregister data source from new tablet when schema change. " - << "new_tablet=" << new_olap_table->full_name() - << ", version=" << it->first << "-" << it->second - << ", res=" << res; } - } - // save header - if (res == OLAP_SUCCESS) { - res = new_olap_table->save_header(); + new_tablet->modify_rowsets(std::vector(), rowsets); + // save tablet meta + res = new_tablet->save_meta(); if (res != OLAP_SUCCESS) { - OLAP_LOG_WARNING("fail to save header after unregister data source " - "when schema change. [new_tablet=%s res=%d]", - new_olap_table->full_name().c_str(), res); + LOG(FATAL) << "fail to save tablet meta after remove rowset from new tablet" + << new_tablet->full_name(); } + for (auto& rowset : rowsets) { + rowset->remove(); + } + } else { + res = OLAP_ERR_VERSION_NOT_EXIST; + break; } - // if failed, return - if (res != OLAP_SUCCESS) { - new_olap_table->release_header_lock(); - ref_olap_table->release_header_lock(); - ref_olap_table->release_push_lock(); - OLAPEngine::get_instance()->drop_table( - new_olap_table->tablet_id(), new_olap_table->schema_hash()); - OLAP_LOG_WARNING("fail to remove data from new tablet when schema_change. " - "[new_tablet=%s]", new_olap_table->full_name().c_str()); - return res; - } - } - vector versions_to_be_changed; - vector olap_data_arr; - // delete handlers for new olap table - DeleteHandler delete_handler; - do { - // inherit cumulative_layer_point from ref_olap_table - new_olap_table->set_cumulative_layer_point(ref_olap_table->cumulative_layer_point()); + // inherit cumulative_layer_point from base_tablet + new_tablet->set_cumulative_layer_point(base_tablet->cumulative_layer_point()); // get history versions to be changed - res = _get_versions_to_be_changed(ref_olap_table, versions_to_be_changed); - if (res != OLAP_SUCCESS) { - OLAP_LOG_WARNING("fail to get version to be changed. [res=%d]", res); - break; - } - - // store schema change information into table header - res = _save_schema_change_info(type, - ref_olap_table, - new_olap_table, - versions_to_be_changed); + res = _get_versions_to_be_changed(base_tablet, versions_to_be_changed); if (res != OLAP_SUCCESS) { - OLAP_LOG_WARNING("fail to save schema change info. [res=%d]", res); - break; - } - - // acquire data sources correspond to history versions - ref_olap_table->acquire_data_sources_by_versions( - versions_to_be_changed, &olap_data_arr); - if (olap_data_arr.size() < 1) { - OLAP_LOG_WARNING("fail to acquire all data sources." - "[version_num=%d data_source_num=%d]", - versions_to_be_changed.size(), - olap_data_arr.size()); - res = OLAP_ERR_ALTER_DELTA_DOES_NOT_EXISTS; + LOG(WARNING) << "fail to get version to be changed. res=" << res; break; } // init one delete handler int32_t end_version = -1; - for (size_t i = 0; i < olap_data_arr.size(); ++i) { - if (olap_data_arr[i]->version().second > end_version) { - end_version = olap_data_arr[i]->version().second; + for (auto& version : versions_to_be_changed) { + if (version.second > end_version) { + end_version = version.second; } } - res = delete_handler.init(ref_olap_table, end_version); + res = delete_handler.init(base_tablet->tablet_schema(), base_tablet->delete_predicates(), end_version); if (res != OLAP_SUCCESS) { - OLAP_LOG_WARNING("init delete handler failed. [table=%s; end_version=%d]", - ref_olap_table->full_name().c_str(), end_version); + LOG(WARNING) << "init delete handler failed. base_tablet=" << base_tablet->full_name() + << ", end_version=" << end_version; // release delete handlers which have been inited successfully. delete_handler.finalize(); break; } - } while (0); - - new_olap_table->release_header_lock(); - ref_olap_table->release_header_lock(); - ref_olap_table->release_push_lock(); - - if (res == OLAP_SUCCESS) { - // 3. Generate alter job - SchemaChangeParams sc_params; - sc_params.alter_table_type = type; - sc_params.ref_olap_table = ref_olap_table; - sc_params.new_olap_table = new_olap_table; - sc_params.ref_olap_data_arr = olap_data_arr; - sc_params.delete_handler = delete_handler; - - - // 4. Update schema change status of ref_olap_table and new_olap_tables - new_olap_table->set_schema_change_status(ALTER_TABLE_RUNNING, - ref_olap_table->schema_hash(), - versions_to_be_changed.back().second); - ref_olap_table->set_schema_change_status(ALTER_TABLE_RUNNING, - new_olap_table->schema_hash(), - versions_to_be_changed.back().second); - - // add tid to cgroup - CgroupsMgr::apply_system_cgroup(); - - // process the job : special for query table split key - VLOG(10) << "starts to alter table. " - << "old_tablet=" << sc_params.ref_olap_table->full_name() - << ", new_tablet=" << sc_params.new_olap_table->full_name(); - - if ((res = _alter_table(&sc_params)) != OLAP_SUCCESS) { - OLAP_LOG_WARNING("failed to alter table. [request='%s']", - sc_params.debug_message.c_str()); - } - - VLOG(10) << "schema change thread completed the job. " - << "request=" << sc_params.debug_message; - } else { - // Delete olap table when submit alter table failed. - OLAPEngine::get_instance()->drop_table( - new_olap_table->tablet_id(), new_olap_table->schema_hash()); - } - - OLAP_LOG_WARNING("finish to generate alter tablet job. [res=%d]", res); - return res; -} - -OLAPStatus SchemaChangeHandler::_create_new_olap_table( - const OLAPTablePtr ref_olap_table, - const TCreateTabletReq& request, - const string* ref_root_path, - OLAPTablePtr* out_new_olap_table) { - OLAPStatus res = OLAP_SUCCESS; - OLAPTable* new_olap_table = NULL; - bool is_table_added = false; - - // 1. Lock to ensure that all _create_new_olap_table operation execute in serial - static Mutex create_table_lock; - create_table_lock.lock(); - do { - // 2. Create table with only header, no deltas - OLAPTablePtr new_olap_table = OLAPEngine::get_instance()->create_table( - request, ref_root_path, true, ref_olap_table); - if (new_olap_table == NULL) { - OLAP_LOG_WARNING("failed to create table. [table=%ld xml_path=%d]", - request.tablet_id, - request.tablet_schema.schema_hash); - res = OLAP_ERR_INPUT_PARAMETER_ERROR; + // acquire data sources correspond to history versions + base_tablet->capture_rs_readers(versions_to_be_changed, &rs_readers); + if (rs_readers.size() < 1) { + LOG(WARNING) << "fail to acquire all data sources. " + << "version_num=" << versions_to_be_changed.size() + << ", data_source_num=" << rs_readers.size(); + res = OLAP_ERR_ALTER_DELTA_DOES_NOT_EXISTS; break; } - // 有可能出现以下2种特殊情况: - // 1. 因为操作系统时间跳变,导致新生成的表的creation_time小于旧表的creation_time时间 - // 2. 因为olap engine代码中统一以秒为单位,所以如果2个操作(比如create一个表, - // 然后立即alter该表)之间的时间间隔小于1s,则alter得到的新表和旧表的creation_time会相同 - // - // 当出现以上2种情况时,为了能够区分alter得到的新表和旧表,这里把新表的creation_time设置为 - // 旧表的creation_time加1 - if (new_olap_table->creation_time() <= ref_olap_table->creation_time()) { - OLAP_LOG_WARNING("new table's creation time is less than or equal to old table" - "[new_table_creation_time=%ld; old_table_creation_time=%ld]", - new_olap_table->creation_time(), - ref_olap_table->creation_time()); - int64_t new_creation_time = ref_olap_table->creation_time() + 1; - new_olap_table->set_creation_time(new_creation_time); - } + _reader_context.reader_type = READER_ALTER_TABLE; + _reader_context.tablet_schema= &base_tablet->tablet_schema(); + _reader_context.preaggregation = true; + _reader_context.delete_handler = &delete_handler; + _reader_context.is_using_cache = false; + _reader_context.lru_cache = StorageEngine::instance()->index_stream_lru_cache(); - // 3. Add table to OlapEngine will make it visiable to user - res = OLAPEngine::get_instance()->add_table( - request.tablet_id, - request.tablet_schema.schema_hash, - new_olap_table); - if (res != OLAP_SUCCESS) { - OLAP_LOG_WARNING("failed to add table to OLAPEngine. [res=%d table='%s']", - res, new_olap_table->full_name().c_str()); - break; + for (auto& rs_reader : rs_readers) { + rs_reader->init(&_reader_context); } - is_table_added = true; - // 4. Register table into OLAPRootPath, so that we can manage table from - // the perspective of root path. - // Example: unregister all tables when a bad disk found. - res = OLAPEngine::get_instance()->register_table_into_root_path( - new_olap_table.get()); - if (res != OLAP_SUCCESS) { - OLAP_LOG_WARNING("fail to register table into root path. " - "[root_path='%s' table='%s']", - new_olap_table->storage_root_path_name().c_str(), - new_olap_table->full_name().c_str()); - break; - } + } while (0); - OLAPTablePtr olap_table; - olap_table = OLAPEngine::get_instance()->get_table( - request.tablet_id, request.tablet_schema.schema_hash); - if (olap_table.get() == NULL) { - OLAP_LOG_WARNING("failed to get table from OLAPEngine. [table=%ld schema_hash=%d]", - request.tablet_id, - request.tablet_schema.schema_hash); - res = OLAP_ERR_OTHER_ERROR; - break; - } + new_tablet->release_header_lock(); + base_tablet->release_header_lock(); + base_tablet->release_push_lock(); - if (out_new_olap_table != NULL) { - *out_new_olap_table = olap_table; - } - } while (0); + if (res != OLAP_SUCCESS) { + _save_alter_state(ALTER_FAILED, base_tablet, new_tablet); + StorageEngine::instance()->tablet_manager()->release_schema_change_lock(request.base_tablet_id); + // Delete tablet when submit alter tablet failed. + StorageEngine::instance()->tablet_manager()->drop_tablet(new_tablet->tablet_id(), new_tablet->schema_hash()); + return res; + } + // 3. Generate alter job + SchemaChangeParams sc_params; + sc_params.alter_tablet_type = type; + sc_params.base_tablet = base_tablet; + sc_params.new_tablet = new_tablet; + sc_params.ref_rowset_readers = rs_readers; + sc_params.delete_handler = delete_handler; + + res = _convert_historical_rowsets(sc_params); if (res != OLAP_SUCCESS) { - if (is_table_added) { - res = OLAPEngine::get_instance()->drop_table( - request.tablet_id, request.tablet_schema.schema_hash); - if (res != OLAP_SUCCESS) { - LOG(WARNING) << "fail to drop table when create table failed. res=" << res - << ", tablet=" << request.tablet_id - << ":" << request.tablet_schema.schema_hash; - } - } else if (NULL != new_olap_table) { - new_olap_table->delete_all_files(); - } + LOG(WARNING) << "failed to alter tablet. base_tablet=" << base_tablet->full_name() + << ", new_tablet=" << new_tablet->full_name(); + _save_alter_state(ALTER_FAILED, base_tablet, new_tablet); + StorageEngine::instance()->tablet_manager()->release_schema_change_lock(request.base_tablet_id); + StorageEngine::instance()->tablet_manager()->drop_tablet(new_tablet->tablet_id(), new_tablet->schema_hash()); + return res; } - create_table_lock.unlock(); + OLAPStatus save_st = _save_alter_state(ALTER_FINISHED, base_tablet, new_tablet); + if (save_st != OLAP_SUCCESS) { + res = save_st; + } + StorageEngine::instance()->tablet_manager()->release_schema_change_lock(request.base_tablet_id); + return res; } OLAPStatus SchemaChangeHandler::schema_version_convert( - OLAPTablePtr src_olap_table, - OLAPTablePtr dest_olap_table, - vector* ref_segment_groups, - vector* new_segment_groups) { - if (NULL == new_segment_groups) { - OLAP_LOG_WARNING("new_olap_index is NULL."); - return OLAP_ERR_INPUT_PARAMETER_ERROR; - } - + TabletSharedPtr base_tablet, + TabletSharedPtr new_tablet, + RowsetSharedPtr* base_rowset, + RowsetSharedPtr* new_rowset) { OLAPStatus res = OLAP_SUCCESS; LOG(INFO) << "begin to convert delta version for schema changing. " - << "old_tablet=" << src_olap_table->full_name() - << ", dest_tablet=" << dest_olap_table->full_name(); + << "base_tablet=" << base_tablet->full_name() + << ", new_tablet=" << new_tablet->full_name(); // a. 解析Alter请求,转换成内部的表示形式 // 不使用DELETE_DATA命令指定的删除条件 - RowBlockChanger rb_changer(dest_olap_table->tablet_schema(), src_olap_table); + RowBlockChanger rb_changer(new_tablet->tablet_schema(), base_tablet); bool sc_sorting = false; bool sc_directly = false; - if (OLAP_SUCCESS != (res = _parse_request(src_olap_table, - dest_olap_table, + if (OLAP_SUCCESS != (res = _parse_request(base_tablet, + new_tablet, &rb_changer, &sc_sorting, &sc_directly))) { - OLAP_LOG_WARNING("failed to parse the request. [res=%d]", res); + LOG(WARNING) << "failed to parse the request. res=" << res; return res; } // NOTE split_table如果使用row_block,会导致原block变小 // 但由于历史数据在后续base/cumulative后还是会变成正常,故用directly也可以 // b. 生成历史数据转换器 - SchemaChange* sc_procedure = NULL; - if (true == sc_sorting) { + SchemaChange* sc_procedure = nullptr; + if (sc_sorting) { size_t memory_limitation = config::memory_limitation_per_thread_for_schema_change; LOG(INFO) << "doing schema change with sorting."; sc_procedure = new(nothrow) SchemaChangeWithSorting( - dest_olap_table, rb_changer, memory_limitation * 1024 * 1024 * 1024); - } else if (true == sc_directly) { + } else if (sc_directly) { LOG(INFO) << "doing schema change directly."; - sc_procedure = new(nothrow) SchemaChangeDirectly( - dest_olap_table, rb_changer); + sc_procedure = new(nothrow) SchemaChangeDirectly(rb_changer); } else { LOG(INFO) << "doing linked schema change."; - sc_procedure = new(nothrow) LinkedSchemaChange( - src_olap_table, - dest_olap_table, - rb_changer); + sc_procedure = new(nothrow) LinkedSchemaChange(rb_changer); } - if (NULL == sc_procedure) { + if (sc_procedure == nullptr) { LOG(FATAL) << "failed to malloc SchemaChange. size=" << sizeof(SchemaChangeWithSorting); return OLAP_ERR_MALLOC_ERROR; } // c. 转换数据 - ColumnData* olap_data = NULL; - for (vector::iterator it = ref_segment_groups->begin(); - it != ref_segment_groups->end(); ++it) { - ColumnData* olap_data = ColumnData::create(*it); - if (NULL == olap_data) { - OLAP_LOG_WARNING("fail to create ColumnData."); - res = OLAP_ERR_MALLOC_ERROR; - goto SCHEMA_VERSION_CONVERT_ERR; - } - - olap_data->init(); - - SegmentGroup* new_segment_group = nullptr; - if ((*it)->transaction_id() == 0) { - new_segment_group = new SegmentGroup(dest_olap_table.get(), - olap_data->version(), - olap_data->version_hash(), - olap_data->delete_flag(), - (*it)->segment_group_id(), 0); + DeleteHandler delete_handler; + _reader_context.reader_type = READER_ALTER_TABLE; + _reader_context.tablet_schema = &base_tablet->tablet_schema(); + _reader_context.preaggregation = true; + _reader_context.delete_handler = &delete_handler; + _reader_context.is_using_cache = false; + _reader_context.lru_cache = StorageEngine::instance()->index_stream_lru_cache(); + + RowsetReaderSharedPtr rowset_reader = (*base_rowset)->create_reader(); + rowset_reader->init(&_reader_context); + + RowsetId rowset_id = 0; + RETURN_NOT_OK(new_tablet->next_rowset_id(&rowset_id)); + RowsetWriterContext writer_context; + writer_context.rowset_id = rowset_id; + writer_context.tablet_uid = new_tablet->tablet_uid(); + writer_context.tablet_id = new_tablet->tablet_id(); + writer_context.partition_id = (*base_rowset)->partition_id(); + writer_context.tablet_schema_hash = new_tablet->schema_hash(); + writer_context.rowset_type = ALPHA_ROWSET; + writer_context.rowset_path_prefix = new_tablet->tablet_path(); + writer_context.tablet_schema = &(new_tablet->tablet_schema()); + writer_context.rowset_state = PREPARED; + writer_context.txn_id = (*base_rowset)->txn_id(); + writer_context.load_id.set_hi((*base_rowset)->load_id().hi()); + writer_context.load_id.set_lo((*base_rowset)->load_id().lo()); + RowsetWriterSharedPtr rowset_writer(new AlphaRowsetWriter()); + rowset_writer->init(writer_context); + + if (!sc_procedure->process(rowset_reader, rowset_writer, new_tablet, base_tablet)) { + if ((*base_rowset)->is_pending()) { + LOG(WARNING) << "failed to process the transaction when schema change. " + << "tablet=" << new_tablet->full_name() << "'" + << ", transaction="<< (*base_rowset)->txn_id(); } else { - new_segment_group = new SegmentGroup(dest_olap_table.get(), - olap_data->delete_flag(), - (*it)->segment_group_id(), 0, - (*it)->is_pending(), - (*it)->partition_id(), - (*it)->transaction_id()); - } - - if (NULL == new_segment_group) { - LOG(FATAL) << "failed to malloc SegmentGroup. size=" << sizeof(SegmentGroup); - res = OLAP_ERR_MALLOC_ERROR; - goto SCHEMA_VERSION_CONVERT_ERR; - } - - new_segment_groups->push_back(new_segment_group); - - if (!sc_procedure->process(olap_data, new_segment_group)) { - if ((*it)->is_pending()) { - OLAP_LOG_WARNING("failed to process the transaction when schema change. " - "[table='%s' transaction=%ld]", - (*it)->table()->full_name().c_str(), - (*it)->transaction_id()); - } else { - OLAP_LOG_WARNING("failed to process the version. [version='%d-%d']", - (*it)->version().first, - (*it)->version().second); - } - res = OLAP_ERR_INPUT_PARAMETER_ERROR; - goto SCHEMA_VERSION_CONVERT_ERR; - } - - SAFE_DELETE(olap_data); + LOG(WARNING) << "failed to process the version. " + << "version=" << (*base_rowset)->version().first + << "-" << (*base_rowset)->version().second; + } + res = OLAP_ERR_INPUT_PARAMETER_ERROR; + new_tablet->data_dir()->remove_pending_ids(ROWSET_ID_PREFIX + std::to_string(rowset_writer->rowset_id())); + goto SCHEMA_VERSION_CONVERT_ERR; + } + *new_rowset = rowset_writer->build(); + new_tablet->data_dir()->remove_pending_ids(ROWSET_ID_PREFIX + std::to_string(rowset_writer->rowset_id())); + if (*new_rowset == nullptr) { + LOG(WARNING) << "build rowset failed."; + res = OLAP_ERR_MALLOC_ERROR; + goto SCHEMA_VERSION_CONVERT_ERR; } SAFE_DELETE(sc_procedure); - SAFE_DELETE(olap_data); - + LOG(INFO) << "successfully convert rowsets. " + << " base_tablet=" << base_tablet->full_name() + << ", new_tablet=" << new_tablet->full_name(); return res; SCHEMA_VERSION_CONVERT_ERR: - while (!new_segment_groups->empty()) { - SegmentGroup* segment_group = new_segment_groups->back(); - segment_group->delete_all_files(); - SAFE_DELETE(segment_group); - new_segment_groups->pop_back(); + if (*new_rowset != nullptr) { + (*new_rowset)->remove(); } SAFE_DELETE(sc_procedure); - SAFE_DELETE(olap_data); + LOG(WARNING) << "failed to convert rowsets. " + << " base_tablet=" << base_tablet->full_name() + << ", new_tablet=" << new_tablet->full_name() + << " res = " << res; return res; } OLAPStatus SchemaChangeHandler::_get_versions_to_be_changed( - OLAPTablePtr ref_olap_table, + TabletSharedPtr base_tablet, vector& versions_to_be_changed) { - int32_t request_version = 0; - const PDelta* lastest_version = ref_olap_table->lastest_version(); - if (lastest_version != NULL) { - request_version = lastest_version->end_version() - 1; - } else { - OLAP_LOG_WARNING("Table has no version. [path='%s']", - ref_olap_table->full_name().c_str()); + RowsetSharedPtr rowset = base_tablet->rowset_with_max_version(); + if (rowset == nullptr) { + LOG(WARNING) << "Tablet has no version. base_tablet=" << base_tablet->full_name(); return OLAP_ERR_ALTER_DELTA_DOES_NOT_EXISTS; } - // 最新版本的delta可以被重导覆盖,因此计算获取的路径中, - // 必须包含最新版本的delta - if (request_version >= 0) { - vector span_versions; - ref_olap_table->select_versions_to_span(Version(0, request_version), &span_versions); - - // get all version list - vector all_versions; - ref_olap_table->list_version_entities(&all_versions); - if (0 == all_versions.size()) { - OLAP_LOG_WARNING("there'is no any version in the table. [table='%s']", - ref_olap_table->full_name().c_str()); - return OLAP_ERR_VERSION_NOT_EXIST; - } - - for (uint32_t i = 0; i < span_versions.size(); i++) { - versions_to_be_changed.push_back(span_versions[i]); - } + vector span_versions; + base_tablet->capture_consistent_versions(Version(0, rowset->version().second), &span_versions); + for (uint32_t i = 0; i < span_versions.size(); i++) { + versions_to_be_changed.push_back(span_versions[i]); } - versions_to_be_changed.push_back( - Version(lastest_version->start_version(), lastest_version->end_version())); return OLAP_SUCCESS; } -// 增加A->(B|C|...) 的schema_change信息 -OLAPStatus SchemaChangeHandler::_save_schema_change_info( - AlterTabletType alter_table_type, - OLAPTablePtr ref_olap_table, - OLAPTablePtr new_olap_table, +OLAPStatus SchemaChangeHandler::_add_alter_task( + AlterTabletType alter_tablet_type, + TabletSharedPtr base_tablet, + TabletSharedPtr new_tablet, const vector& versions_to_be_changed) { - // check new table exists, + // check new tablet exists, // prevent to set base's status after new's dropping (clear base's status) - if (OLAPEngine::get_instance()->get_table( - new_olap_table->tablet_id(), new_olap_table->schema_hash()).get() == NULL) { - OLAP_LOG_WARNING("fail to find table before saving status. [table='%s']", - new_olap_table->full_name().c_str()); + if (StorageEngine::instance()->tablet_manager()->get_tablet( + new_tablet->tablet_id(), new_tablet->schema_hash()) == nullptr) { + LOG(WARNING) << "new_tablet does not exist. tablet=" << new_tablet->full_name(); return OLAP_ERR_TABLE_NOT_FOUND; } - OLAPStatus res = OLAP_SUCCESS; - // 1. 在新表和旧表中添加schema change标志 - ref_olap_table->clear_schema_change_request(); - ref_olap_table->set_schema_change_request(new_olap_table->tablet_id(), - new_olap_table->schema_hash(), - versions_to_be_changed, - alter_table_type); - new_olap_table->set_schema_change_request(ref_olap_table->tablet_id(), - ref_olap_table->schema_hash(), - vector(), // empty versions - alter_table_type); - - // save new olap table header :只有一个父ref table - res = new_olap_table->save_header(); + base_tablet->delete_alter_task(); + base_tablet->add_alter_task(new_tablet->tablet_id(), + new_tablet->schema_hash(), + versions_to_be_changed, + alter_tablet_type); + OLAPStatus res = base_tablet->save_meta(); + if (res != OLAP_SUCCESS) { + LOG(FATAL) << "fail to save base tablet meta. res=" << res + << ", tablet=" << base_tablet->full_name(); + return res; + } + + new_tablet->add_alter_task(base_tablet->tablet_id(), + base_tablet->schema_hash(), + vector(), // empty versions + alter_tablet_type); + res = new_tablet->save_meta(); + if (res != OLAP_SUCCESS) { + LOG(FATAL) << "fail to save new tablet meta. res=" << res + << ", tablet=" << new_tablet->full_name(); + return res; + } + LOG(INFO) << "successfully add alter task to both base and new"; + return res; +} + +OLAPStatus SchemaChangeHandler::_save_alter_state( + AlterTabletState state, + TabletSharedPtr base_tablet, + TabletSharedPtr new_tablet) { + WriteLock base_wlock(base_tablet->get_header_lock_ptr()); + WriteLock new_wlock(new_tablet->get_header_lock_ptr()); + AlterTabletTaskSharedPtr base_alter_task = base_tablet->alter_task(); + if (base_alter_task == nullptr) { + LOG(INFO) << "could not find alter task info from base tablet " << base_tablet->full_name(); + return OLAP_ERR_ALTER_STATUS_ERR; + } + OLAPStatus res = base_tablet->set_alter_state(state); + if (res != OLAP_SUCCESS) { + LOG(WARNING) << "failed to set alter state to " << state + << " tablet=" << base_tablet->full_name() + << " res=" << res; + return res; + } + res = base_tablet->save_meta(); if (res != OLAP_SUCCESS) { - LOG(FATAL) << "fail to save new table header. res=" << res - << ", tablet=" << new_olap_table->full_name(); + LOG(FATAL) << "fail to save base tablet meta. res=" << res + << ", base_tablet=" << base_tablet->full_name(); return res; } - res = ref_olap_table->save_header(); + AlterTabletTaskSharedPtr new_alter_task = new_tablet->alter_task(); + if (new_alter_task == nullptr) { + LOG(INFO) << "could not find alter task info from new tablet " << new_tablet->full_name(); + return OLAP_ERR_ALTER_STATUS_ERR; + } + res = new_tablet->set_alter_state(state); if (res != OLAP_SUCCESS) { - LOG(FATAL) << "fail to save ref table header. res=" << res - << ", tablet=" << ref_olap_table->full_name().c_str(); + LOG(WARNING) << "failed to set alter state to " << state + << " tablet " << new_tablet->full_name() + << " res" << res; + return res; + } + res = new_tablet->save_meta(); + if (res != OLAP_SUCCESS) { + LOG(FATAL) << "fail to save new tablet meta. res=" << res + << ", new_tablet=" << base_tablet->full_name(); return res; } return res; } -// @static -OLAPStatus SchemaChangeHandler::_alter_table(SchemaChangeParams* sc_params) { - OLAPStatus res = OLAP_SUCCESS; - LOG(INFO) << "begin to process alter table job. " - << "old_olap_table=" << sc_params->ref_olap_table->full_name() - << ", new_olap_table=" << sc_params->new_olap_table->full_name(); +OLAPStatus SchemaChangeHandler::_convert_historical_rowsets(const SchemaChangeParams& sc_params) { + LOG(INFO) << "begin to convert rowsets for new_tablet from base_tablet." + << " base_tablet=" << sc_params.base_tablet->full_name() + << ", new_tablet=" << sc_params.new_tablet->full_name(); // find end version int32_t end_version = -1; - for (size_t i = 0; i < sc_params->ref_olap_data_arr.size(); ++i) { - if (sc_params->ref_olap_data_arr[i]->version().second > end_version) { - end_version = sc_params->ref_olap_data_arr[i]->version().second; + for (size_t i = 0; i < sc_params.ref_rowset_readers.size(); ++i) { + if (sc_params.ref_rowset_readers[i]->version().second > end_version) { + end_version = sc_params.ref_rowset_readers[i]->version().second; } } // change中增加了filter信息,在_parse_request中会设置filter的column信息 // 并在每次row block的change时,过滤一些数据 - RowBlockChanger rb_changer(sc_params->new_olap_table->tablet_schema(), - sc_params->ref_olap_table, - sc_params->delete_handler); + RowBlockChanger rb_changer(sc_params.new_tablet->tablet_schema(), + sc_params.base_tablet, sc_params.delete_handler); bool sc_sorting = false; bool sc_directly = false; - SchemaChange* sc_procedure = NULL; + SchemaChange* sc_procedure = nullptr; // a. 解析Alter请求,转换成内部的表示形式 - res = _parse_request(sc_params->ref_olap_table, - sc_params->new_olap_table, - &rb_changer, - &sc_sorting, - &sc_directly); + OLAPStatus res = _parse_request(sc_params.base_tablet, sc_params.new_tablet, + &rb_changer, &sc_sorting, &sc_directly); if (res != OLAP_SUCCESS) { - OLAP_LOG_WARNING("failed to parse the request. [res=%d]", res); + LOG(WARNING) << "failed to parse the request. res=" << res; goto PROCESS_ALTER_EXIT; } // b. 生成历史数据转换器 - if (true == sc_sorting) { + if (sc_sorting) { size_t memory_limitation = config::memory_limitation_per_thread_for_schema_change; LOG(INFO) << "doing schema change with sorting."; - sc_procedure = new(nothrow) SchemaChangeWithSorting( - sc_params->new_olap_table, - rb_changer, - memory_limitation * 1024 * 1024 * 1024); - } else if (true == sc_directly) { + sc_procedure = new(nothrow) SchemaChangeWithSorting(rb_changer, + memory_limitation * 1024 * 1024 * 1024); + } else if (sc_directly) { LOG(INFO) << "doing schema change directly."; - sc_procedure = new(nothrow) SchemaChangeDirectly( - sc_params->new_olap_table, rb_changer); + sc_procedure = new(nothrow) SchemaChangeDirectly(rb_changer); } else { LOG(INFO) << "doing linked schema change."; - sc_procedure = new(nothrow) LinkedSchemaChange( - sc_params->ref_olap_table, - sc_params->new_olap_table, - rb_changer); + sc_procedure = new(nothrow) LinkedSchemaChange(rb_changer); } - if (NULL == sc_procedure) { - OLAP_LOG_WARNING("failed to malloc SchemaChange. [size=%ld]", - sizeof(SchemaChangeWithSorting)); + if (sc_procedure == nullptr) { + LOG(WARNING) << "failed to malloc SchemaChange. " + << "malloc_size=" << sizeof(SchemaChangeWithSorting); res = OLAP_ERR_MALLOC_ERROR; goto PROCESS_ALTER_EXIT; } // c. 转换历史数据 - for (vector::iterator it = sc_params->ref_olap_data_arr.end() - 1; - it >= sc_params->ref_olap_data_arr.begin(); --it) { - VLOG(10) << "begin to convert a history delta. " - << "version=" << (*it)->version().first << "-" << (*it)->version().second; + for (auto& rs_reader : sc_params.ref_rowset_readers) { + VLOG(10) << "begin to convert a history rowset. version=" + << rs_reader->version().first << "-" << rs_reader->version().second; // set status for monitor // 只要有一个new_table为running,ref table就设置为running // NOTE 如果第一个sub_table先fail,这里会继续按正常走 - sc_params->ref_olap_table->set_schema_change_status( - ALTER_TABLE_RUNNING, - sc_params->new_olap_table->schema_hash(), - -1); - sc_params->new_olap_table->set_schema_change_status( - ALTER_TABLE_RUNNING, - sc_params->ref_olap_table->schema_hash(), - (*it)->version().second); - - // we create a new delta with the same version as the ColumnData processing currently. - SegmentGroup* new_segment_group = new(nothrow) SegmentGroup( - sc_params->new_olap_table.get(), - (*it)->version(), - (*it)->version_hash(), - (*it)->delete_flag(), - (*it)->segment_group()->segment_group_id(), 0); - - if (new_segment_group == NULL) { - OLAP_LOG_WARNING("failed to malloc SegmentGroup. [size=%ld]", sizeof(SegmentGroup)); - res = OLAP_ERR_MALLOC_ERROR; + + RowsetId rowset_id = 0; + TabletSharedPtr new_tablet = sc_params.new_tablet; + res = sc_params.new_tablet->next_rowset_id(&rowset_id); + if (res != OLAP_SUCCESS) { + LOG(WARNING) << "generate next id failed"; goto PROCESS_ALTER_EXIT; } - (*it)->set_delete_handler(sc_params->delete_handler); - int del_ret = (*it)->delete_pruning_filter(); - if (DEL_SATISFIED == del_ret) { - VLOG(3) << "filter delta in schema change:" - << (*it)->version().first << "-" << (*it)->version().second; - res = sc_procedure->create_init_version(new_segment_group->table()->tablet_id(), - new_segment_group->table()->schema_hash(), - new_segment_group->version(), - new_segment_group->version_hash(), - new_segment_group); - sc_procedure->add_filted_rows((*it)->num_rows()); - if (res != OLAP_SUCCESS) { - OLAP_LOG_WARNING("fail to create init version. [res=%d]", res); - res = OLAP_ERR_INPUT_PARAMETER_ERROR; - OLAP_GOTO(PROCESS_ALTER_EXIT); - } - } else if (DEL_PARTIAL_SATISFIED == del_ret) { - VLOG(3) << "filter delta partially in schema change:" - << (*it)->version().first << "-" << (*it)->version().second; - (*it)->set_delete_status(DEL_PARTIAL_SATISFIED); - } else { - VLOG(3) << "not filter delta in schema change:" - << (*it)->version().first << "-" << (*it)->version().second; - (*it)->set_delete_status(DEL_NOT_SATISFIED); + RowsetWriterContext writer_context; + writer_context.rowset_id = rowset_id; + writer_context.tablet_uid = new_tablet->tablet_uid(); + writer_context.tablet_id = new_tablet->tablet_id(); + writer_context.partition_id = new_tablet->partition_id(); + writer_context.tablet_schema_hash = new_tablet->schema_hash(); + writer_context.rowset_type = ALPHA_ROWSET; + writer_context.rowset_path_prefix = new_tablet->tablet_path(); + writer_context.tablet_schema = &(new_tablet->tablet_schema()); + writer_context.rowset_state = VISIBLE; + writer_context.version = rs_reader->version(); + writer_context.version_hash = rs_reader->version_hash(); + RowsetWriterSharedPtr rowset_writer(new AlphaRowsetWriter()); + OLAPStatus status = rowset_writer->init(writer_context); + if (status != OLAP_SUCCESS) { + res = OLAP_ERR_ROWSET_BUILDER_INIT; + goto PROCESS_ALTER_EXIT; } - if (DEL_SATISFIED != del_ret && !sc_procedure->process(*it, new_segment_group)) { - //if del_ret is DEL_SATISFIED, the new delta version has already been created in new_olap_table - OLAP_LOG_WARNING("failed to process the version. [version='%d-%d']", - (*it)->version().first, (*it)->version().second); - new_segment_group->delete_all_files(); - SAFE_DELETE(new_segment_group); - + if (!sc_procedure->process(rs_reader, rowset_writer, sc_params.new_tablet, sc_params.base_tablet)) { + LOG(WARNING) << "failed to process the version." + << " version=" << rs_reader->version().first + << "-" << rs_reader->version().second; res = OLAP_ERR_INPUT_PARAMETER_ERROR; + new_tablet->data_dir()->remove_pending_ids(ROWSET_ID_PREFIX + std::to_string(rowset_writer->rowset_id())); goto PROCESS_ALTER_EXIT; } - + new_tablet->data_dir()->remove_pending_ids(ROWSET_ID_PREFIX + std::to_string(rowset_writer->rowset_id())); // 将新版本的数据加入header // 为了防止死锁的出现,一定要先锁住旧表,再锁住新表 - sc_params->new_olap_table->obtain_push_lock(); - sc_params->ref_olap_table->obtain_header_wrlock(); - sc_params->new_olap_table->obtain_header_wrlock(); - - if (!sc_params->new_olap_table->has_segment_group((*it)->version(), new_segment_group)) { - // register version - std::vector segment_group_vec; - segment_group_vec.push_back(new_segment_group); - res = sc_params->new_olap_table->register_data_source(segment_group_vec); - if (OLAP_SUCCESS != res) { - OLAP_LOG_WARNING("failed to register new version. [table='%s' version='%d-%d']", - sc_params->new_olap_table->full_name().c_str(), - (*it)->version().first, - (*it)->version().second); - new_segment_group->delete_all_files(); - SAFE_DELETE(new_segment_group); - - sc_params->new_olap_table->release_header_lock(); - sc_params->ref_olap_table->release_header_lock(); - sc_params->new_olap_table->release_push_lock(); - - goto PROCESS_ALTER_EXIT; - } - - VLOG(3) << "register new version. tablet=" << sc_params->new_olap_table->full_name() - << ", version=" << (*it)->version().first << "-" << (*it)->version().second; - } else { - OLAP_LOG_WARNING("version already exist, version revert occured. " - "[table='%s' version='%d-%d']", - sc_params->new_olap_table->full_name().c_str(), - (*it)->version().first, (*it)->version().second); - new_segment_group->delete_all_files(); - SAFE_DELETE(new_segment_group); - } - - // 保存header - if (OLAP_SUCCESS != sc_params->new_olap_table->save_header()) { - LOG(FATAL) << "fail to save header. res=" << res - << ", tablet=" << sc_params->new_olap_table->full_name(); - } - - // XXX: 此处需要验证ref_olap_data_arr中最后一个版本是否与new_olap_table的header中记录的最 - // 后一个版本相同。然后还要注意一致性问题。 - if (!sc_params->ref_olap_table->remove_last_schema_change_version( - sc_params->new_olap_table)) { - OLAP_LOG_WARNING("failed to remove the last version did schema change."); - - sc_params->new_olap_table->release_header_lock(); - sc_params->ref_olap_table->release_header_lock(); - sc_params->new_olap_table->release_push_lock(); - - res = OLAP_ERR_INPUT_PARAMETER_ERROR; + sc_params.new_tablet->obtain_push_lock(); + RowsetSharedPtr new_rowset = rowset_writer->build(); + if (new_rowset == nullptr) { + LOG(WARNING) << "failed to build rowset, exit alter process"; + sc_params.new_tablet->release_push_lock(); goto PROCESS_ALTER_EXIT; } - - // 保存header - if (OLAP_SUCCESS != sc_params->ref_olap_table->save_header()) { - LOG(FATAL) << "failed to save header. tablet=" << sc_params->new_olap_table->full_name(); + res = sc_params.new_tablet->add_rowset(new_rowset); + if (res == OLAP_ERR_PUSH_VERSION_ALREADY_EXIST) { + LOG(WARNING) << "version already exist, version revert occured. " + << "tablet=" << sc_params.new_tablet->full_name() + << ", version='" << rs_reader->version().first + << "-" << rs_reader->version().second; + new_rowset->remove(); + res = OLAP_SUCCESS; + } else if (res != OLAP_SUCCESS) { + LOG(WARNING) << "failed to register new version. " + << " tablet=" << sc_params.new_tablet->full_name() + << ", version=" << rs_reader->version().first + << "-" << rs_reader->version().second; + new_rowset->remove(); + sc_params.new_tablet->release_push_lock(); + goto PROCESS_ALTER_EXIT; + } else { + VLOG(3) << "register new version. tablet=" << sc_params.new_tablet->full_name() + << ", version=" << rs_reader->version().first + << "-" << rs_reader->version().second; } - - sc_params->new_olap_table->release_header_lock(); - sc_params->ref_olap_table->release_header_lock(); - sc_params->new_olap_table->release_push_lock(); + sc_params.new_tablet->release_push_lock(); VLOG(10) << "succeed to convert a history version." - << ", version=" << (*it)->version().first << "-" << (*it)->version().second; + << " version=" << rs_reader->version().first + << "-" << rs_reader->version().second; - // 释放ColumnData - vector olap_data_to_be_released(it, it + 1); - sc_params->ref_olap_table->release_data_sources(&olap_data_to_be_released); - - it = sc_params->ref_olap_data_arr.erase(it); // after erasing, it will point to end() + // 释放RowsetReader + rs_reader->close(); } - // XXX: 此时应该不取消SchemaChange状态,因为新Delta还要转换成新旧Schema的版本 - PROCESS_ALTER_EXIT: if (res == OLAP_SUCCESS) { Version test_version(0, end_version); - res = sc_params->new_olap_table->test_version(test_version); + res = sc_params.new_tablet->check_version_integrity(test_version); } - if (res == OLAP_SUCCESS) { - // ref的状态只有2个new table都完成后,才能设置为done - sc_params->ref_olap_table->obtain_header_rdlock(); - res = clear_schema_change_single_info(sc_params->ref_olap_table, NULL, false, true); - sc_params->ref_olap_table->release_header_lock(); - - if (OLAP_SUCCESS == res) { - sc_params->ref_olap_table->set_schema_change_status( - ALTER_TABLE_FINISHED, - sc_params->new_olap_table->schema_hash(), - -1); - } else { - res = OLAP_SUCCESS; - } - - sc_params->new_olap_table->set_schema_change_status( - ALTER_TABLE_FINISHED, - sc_params->ref_olap_table->schema_hash(), - -1); - VLOG(3) << "set alter table job status. " - << "status=" << sc_params->ref_olap_table->schema_change_status().status; - } else { - sc_params->ref_olap_table->set_schema_change_status( - ALTER_TABLE_FAILED, - sc_params->new_olap_table->schema_hash(), - -1); - - sc_params->new_olap_table->set_schema_change_status( - ALTER_TABLE_FAILED, - sc_params->ref_olap_table->schema_hash(), - -1); - VLOG(3) << "set alter table job status. " - << "status=" << sc_params->ref_olap_table->schema_change_status().status; + for (auto& rs_reader : sc_params.ref_rowset_readers) { + rs_reader->close(); } - - sc_params->ref_olap_table->release_data_sources(&(sc_params->ref_olap_data_arr)); SAFE_DELETE(sc_procedure); - LOG(INFO) << "finish to process alter table job. res=" << res; + LOG(INFO) << "finish converting rowsets for new_tablet from base_tablet. " + << "base_tablet=" << sc_params.base_tablet->full_name() + << ", new_tablet=" << sc_params.new_tablet->full_name(); return res; } // @static // 分析column的mapping以及filter key的mapping -OLAPStatus SchemaChangeHandler::_parse_request(OLAPTablePtr ref_olap_table, - OLAPTablePtr new_olap_table, +OLAPStatus SchemaChangeHandler::_parse_request(TabletSharedPtr base_tablet, + TabletSharedPtr new_tablet, RowBlockChanger* rb_changer, bool* sc_sorting, bool* sc_directly) { OLAPStatus res = OLAP_SUCCESS; // set column mapping - for (int i = 0, new_schema_size = new_olap_table->tablet_schema().size(); + for (int i = 0, new_schema_size = new_tablet->tablet_schema().num_columns(); i < new_schema_size; ++i) { - const FieldInfo& new_column_schema = new_olap_table->tablet_schema()[i]; - const string& column_name = new_column_schema.name; + const TabletColumn& new_column = new_tablet->tablet_schema().column(i); + const string& column_name = new_column.name(); ColumnMapping* column_mapping = rb_changer->get_mutable_column_mapping(i); - if (new_column_schema.has_referenced_column) { - int32_t column_index = ref_olap_table->get_field_index( - new_column_schema.referenced_column); + if (new_column.has_reference_column()) { + int32_t column_index = base_tablet->field_index(new_column.referenced_column()); if (column_index < 0) { - OLAP_LOG_WARNING("referenced column was missing. " - "[column='%s' referenced_column='%s']", - column_name.c_str(), - new_column_schema.referenced_column.c_str()); + LOG(WARNING) << "referenced column was missing. " + << "[column=" << column_name + << " referenced_column=" << column_index << "]"; return OLAP_ERR_CE_CMD_PARAMS_ERROR; } column_mapping->ref_column = column_index; VLOG(3) << "A column refered to existed column will be added after schema changing." - << "column=" << column_name << ", ref_column=" << new_column_schema.referenced_column; + << "column=" << column_name << ", ref_column=" << column_index; continue; } - int32_t column_index = ref_olap_table->get_field_index(column_name); + int32_t column_index = base_tablet->field_index(column_name); if (column_index >= 0) { column_mapping->ref_column = column_index; continue; @@ -2271,20 +1808,20 @@ OLAPStatus SchemaChangeHandler::_parse_request(OLAPTablePtr ref_olap_table, { column_mapping->ref_column = -1; - if (i < ref_olap_table->num_short_key_fields()) { + if (i < base_tablet->num_short_key_columns()) { *sc_directly = true; } if (OLAP_SUCCESS != (res = _init_column_mapping( column_mapping, - new_column_schema, - new_column_schema.default_value))) { + new_column, + new_column.default_value()))) { return res; } VLOG(10) << "A column with default value will be added after schema chaning. " << "column=" << column_name - << ", default_value=" << new_column_schema.default_value; + << ", default_value=" << new_column.default_value(); continue; } @@ -2294,7 +1831,7 @@ OLAPStatus SchemaChangeHandler::_parse_request(OLAPTablePtr ref_olap_table, if (OLAP_SUCCESS != (res = _init_column_mapping( column_mapping, - new_column_schema, + new_column, ""))) { return res; } @@ -2309,7 +1846,7 @@ OLAPStatus SchemaChangeHandler::_parse_request(OLAPTablePtr ref_olap_table, // 若Key列的引用序列出现乱序,则需要重排序 int num_default_value = 0; - for (int i = 0, new_schema_size = new_olap_table->num_key_fields(); + for (int i = 0, new_schema_size = new_tablet->num_key_columns(); i < new_schema_size; ++i) { ColumnMapping* column_mapping = rb_changer->get_mutable_column_mapping(i); @@ -2324,59 +1861,55 @@ OLAPStatus SchemaChangeHandler::_parse_request(OLAPTablePtr ref_olap_table, } } - if (ref_olap_table->num_short_key_fields() != new_olap_table->num_short_key_fields()) { + if (base_tablet->num_short_key_columns() != new_tablet->num_short_key_columns()) { // the number of short_keys changed, can't do linked schema change *sc_directly = true; return OLAP_SUCCESS; } - const RowFields& ref_table_schema = ref_olap_table->tablet_schema(); - const RowFields& new_table_schema = new_olap_table->tablet_schema(); - for (size_t i = 0; i < new_olap_table->num_fields(); ++i) { + const TabletSchema& ref_tablet_schema = base_tablet->tablet_schema(); + const TabletSchema& new_tablet_schema = new_tablet->tablet_schema(); + for (size_t i = 0; i < new_tablet->num_columns(); ++i) { ColumnMapping* column_mapping = rb_changer->get_mutable_column_mapping(i); if (column_mapping->ref_column < 0) { continue; } else { - if (new_table_schema[i].type != ref_table_schema[column_mapping->ref_column].type) { + if (new_tablet_schema.column(i).type() != ref_tablet_schema.column(column_mapping->ref_column).type()) { *sc_directly = true; return OLAP_SUCCESS; } else if ( - (new_table_schema[i].type == ref_table_schema[column_mapping->ref_column].type) - && (new_table_schema[i].length - != ref_table_schema[column_mapping->ref_column].length)) { + (new_tablet_schema.column(i).type() == ref_tablet_schema.column(column_mapping->ref_column).type()) + && (new_tablet_schema.column(i).length() + != ref_tablet_schema.column(column_mapping->ref_column).length())) { *sc_directly = true; return OLAP_SUCCESS; - } else if (new_table_schema[i].is_bf_column != ref_table_schema[i].is_bf_column) { + } else if (new_tablet_schema.column(i).is_bf_column() + != ref_tablet_schema.column(column_mapping->ref_column).is_bf_column()) { *sc_directly = true; return OLAP_SUCCESS; } } } - if (ref_olap_table->delete_data_conditions_size() != 0){ + if (base_tablet->delete_predicates().size() != 0){ //there exists delete condtion in header, can't do linked schema change *sc_directly = true; } - if (ref_olap_table->data_file_type() != new_olap_table->data_file_type()) { - //if change the table from row-oriented to column-oriented, or versus - *sc_directly = true; - } - return OLAP_SUCCESS; } OLAPStatus SchemaChangeHandler::_init_column_mapping(ColumnMapping* column_mapping, - const FieldInfo& column_schema, + const TabletColumn& column_schema, const std::string& value) { column_mapping->default_value = WrapperField::create(column_schema); - if (column_mapping->default_value == NULL) { + if (column_mapping->default_value == nullptr) { return OLAP_ERR_MALLOC_ERROR; } - if (column_schema.is_allow_null && !column_schema.has_default_value) { + if (true == column_schema.is_nullable() && value.length() == 0) { column_mapping->default_value->set_null(); } else { column_mapping->default_value->from_string(value); @@ -2385,61 +1918,4 @@ OLAPStatus SchemaChangeHandler::_init_column_mapping(ColumnMapping* column_mappi return OLAP_SUCCESS; } -OLAPStatus SchemaChange::create_init_version( - TTabletId tablet_id, - SchemaHash schema_hash, - Version version, - VersionHash version_hash, - SegmentGroup* segment_group) { - VLOG(3) << "begin to create init version. " - << "begin=" << version.first << ", end=" << version.second; - - OLAPTablePtr table; - ColumnDataWriter* writer = NULL; - OLAPStatus res = OLAP_SUCCESS; - - do { - if (version.first > version.second) { - OLAP_LOG_WARNING("begin should not larger than end. [begin=%d end=%d]", - version.first, version.second); - res = OLAP_ERR_INPUT_PARAMETER_ERROR; - break; - } - - // Get olap table and generate new index - table = OLAPEngine::get_instance()->get_table(tablet_id, schema_hash); - if (table.get() == NULL) { - OLAP_LOG_WARNING("fail to find table. [table=%ld]", tablet_id); - res = OLAP_ERR_TABLE_NOT_FOUND; - break; - } - - // Create writer, which write nothing to table, to generate empty data file - writer = ColumnDataWriter::create(table, segment_group, false); - if (writer == NULL) { - LOG(WARNING) << "fail to create writer. [table=" << table->full_name() << "]"; - res = OLAP_ERR_MALLOC_ERROR; - break; - } - - res = writer->finalize(); - if (res != OLAP_SUCCESS) { - LOG(WARNING) << "fail to finalize writer. [table=" << table->full_name() << "]"; - break; - } - - // Load new index and add to table - res = segment_group->load(); - if (res != OLAP_SUCCESS) { - LOG(WARNING) << "fail to load new index. [table=" << table->full_name() << "]"; - break; - } - } while (0); - - VLOG(3) << "create init version end. res=" << res; - SAFE_DELETE(writer); - return res; -} - } // namespace doris - diff --git a/be/src/olap/schema_change.h b/be/src/olap/schema_change.h index 87b15fb70fa313..2ae94b4380961d 100644 --- a/be/src/olap/schema_change.h +++ b/be/src/olap/schema_change.h @@ -24,48 +24,44 @@ #include "gen_cpp/AgentService_types.h" #include "olap/delete_handler.h" -#include "olap/column_data.h" +#include "olap/rowset/rowset.h" +#include "olap/rowset/rowset_writer.h" +#include "olap/tablet.h" +#include "olap/column_mapping.h" namespace doris { // defined in 'field.h' class Field; class FieldInfo; -// defined in 'olap_data.h' -class ColumnData; -// defined in 'olap_table.h' -class OLAPTable; +// defined in 'tablet.h' +class Tablet; // defined in 'row_block.h' class RowBlock; // defined in 'row_cursor.h' class RowCursor; -// defined in 'writer.h' -class ColumnDataWriter; class RowBlockChanger { public: - typedef std::vector SchemaMapping; - - RowBlockChanger(const std::vector& tablet_schema, - const OLAPTablePtr& ref_olap_table, + RowBlockChanger(const TabletSchema& tablet_schema, + const TabletSharedPtr& base_tablet, const DeleteHandler& delete_handler); - RowBlockChanger(const std::vector& tablet_schema, - const OLAPTablePtr& ref_olap_table); - + RowBlockChanger(const TabletSchema& tablet_schema, + const TabletSharedPtr& base_tablet); + virtual ~RowBlockChanger(); ColumnMapping* get_mutable_column_mapping(size_t column_index); - SchemaMapping get__schema_mapping() const { + SchemaMapping get_schema_mapping() const { return _schema_mapping; } - + bool change_row_block( - const DataFileType df_type, - const RowBlock& origin_block, + const RowBlock* ref_block, int32_t data_version, RowBlock* mutable_block, - uint64_t* filted_rows) const; + uint64_t* filtered_rows) const; private: // @brief column-mapping specification of new schema @@ -96,15 +92,15 @@ class RowBlockSorter { class RowBlockAllocator { public: - RowBlockAllocator(const std::vector& tablet_schema, size_t memory_limitation); + RowBlockAllocator(const TabletSchema& tablet_schema, size_t memory_limitation); virtual ~RowBlockAllocator(); - OLAPStatus allocate(RowBlock** row_block, size_t num_rows, - DataFileType data_file_type, bool null_supported); + OLAPStatus allocate(RowBlock** row_block, size_t num_rows, + bool null_supported); void release(RowBlock* row_block); private: - const std::vector& _tablet_schema; + const TabletSchema& _tablet_schema; size_t _memory_allocated; size_t _row_len; size_t _memory_limitation; @@ -112,12 +108,12 @@ class RowBlockAllocator { class RowBlockMerger { public: - explicit RowBlockMerger(OLAPTablePtr olap_table); + explicit RowBlockMerger(TabletSharedPtr tablet); virtual ~RowBlockMerger(); bool merge( const std::vector& row_block_arr, - ColumnDataWriter* writer, + RowsetWriterSharedPtr rowset_writer, uint64_t* merged_rows); private: @@ -125,7 +121,7 @@ class RowBlockMerger { bool operator<(const MergeElement& other) const { return row_cursor->full_key_cmp(*(other.row_cursor)) > 0; } - + const RowBlock* row_block; RowCursor* row_cursor; uint32_t row_block_index; @@ -134,65 +130,60 @@ class RowBlockMerger { bool _make_heap(const std::vector& row_block_arr); bool _pop_heap(); - OLAPTablePtr _olap_table; + TabletSharedPtr _tablet; std::priority_queue _heap; }; class SchemaChange { public: - SchemaChange() : _filted_rows(0), _merged_rows(0) {} + SchemaChange() : _filtered_rows(0), _merged_rows(0) {} virtual ~SchemaChange() {} - virtual bool process(ColumnData* olap_data, SegmentGroup* new_segment_group) = 0; + virtual bool process(RowsetReaderSharedPtr rowset_reader, + RowsetWriterSharedPtr new_rowset_builder, + TabletSharedPtr tablet, + TabletSharedPtr base_tablet) = 0; - void add_filted_rows(uint64_t filted_rows) { - _filted_rows += filted_rows; + void add_filtered_rows(uint64_t filtered_rows) { + _filtered_rows += filtered_rows; } void add_merged_rows(uint64_t merged_rows) { _merged_rows += merged_rows; } - uint64_t filted_rows() const { - return _filted_rows; + uint64_t filtered_rows() const { + return _filtered_rows; } uint64_t merged_rows() const { return _merged_rows; } - void reset_filted_rows() { - _filted_rows = 0; + void reset_filtered_rows() { + _filtered_rows = 0; } void reset_merged_rows() { _merged_rows = 0; } - OLAPStatus create_init_version( - TTabletId tablet_id, - TSchemaHash schema_hash, - Version version, - VersionHash version_hash, - SegmentGroup* segment_group); - private: - uint64_t _filted_rows; + uint64_t _filtered_rows; uint64_t _merged_rows; }; class LinkedSchemaChange : public SchemaChange { public: - explicit LinkedSchemaChange( - OLAPTablePtr base_olap_table, - OLAPTablePtr new_olap_table, - const RowBlockChanger& row_block_changer); + explicit LinkedSchemaChange(const RowBlockChanger& row_block_changer) + : _row_block_changer(row_block_changer) { } ~LinkedSchemaChange() {} - bool process(ColumnData* olap_data, SegmentGroup* new_segment_group); + bool process(RowsetReaderSharedPtr rowset_reader, + RowsetWriterSharedPtr new_rowset_writer, + TabletSharedPtr new_tablet, + TabletSharedPtr base_tablet); private: - OLAPTablePtr _base_olap_table; - OLAPTablePtr _new_olap_table; const RowBlockChanger& _row_block_changer; DISALLOW_COPY_AND_ASSIGN(LinkedSchemaChange); }; @@ -200,23 +191,24 @@ class LinkedSchemaChange : public SchemaChange { // @brief schema change without sorting. class SchemaChangeDirectly : public SchemaChange { public: - // @params olap_table the instance of table which has new schema. + // @params tablet the instance of tablet which has new schema. // @params row_block_changer changer to modifiy the data of RowBlock explicit SchemaChangeDirectly( - OLAPTablePtr olap_table, const RowBlockChanger& row_block_changer); virtual ~SchemaChangeDirectly(); - virtual bool process(ColumnData* olap_data, SegmentGroup* new_segment_group); + virtual bool process(RowsetReaderSharedPtr rowset_reader, + RowsetWriterSharedPtr new_rowset_writer, + TabletSharedPtr new_tablet, + TabletSharedPtr base_tablet); private: - OLAPTablePtr _olap_table; const RowBlockChanger& _row_block_changer; RowBlockAllocator* _row_block_allocator; RowCursor* _src_cursor; RowCursor* _dst_cursor; - bool _write_row_block(ColumnDataWriter* writer, RowBlock* row_block); + bool _write_row_block(RowsetWriterSharedPtr rowset_builder, RowBlock* row_block); DISALLOW_COPY_AND_ASSIGN(SchemaChangeDirectly); }; @@ -225,24 +217,28 @@ class SchemaChangeDirectly : public SchemaChange { class SchemaChangeWithSorting : public SchemaChange { public: explicit SchemaChangeWithSorting( - OLAPTablePtr olap_table, const RowBlockChanger& row_block_changer, size_t memory_limitation); virtual ~SchemaChangeWithSorting(); - virtual bool process(ColumnData* olap_data, SegmentGroup* new_segment_group); + virtual bool process(RowsetReaderSharedPtr rowset_reader, + RowsetWriterSharedPtr new_rowset_builder, + TabletSharedPtr new_tablet, + TabletSharedPtr base_tablet); private: bool _internal_sorting( const std::vector& row_block_arr, const Version& temp_delta_versions, - SegmentGroup** temp_segment_group); + const VersionHash version_hash, + TabletSharedPtr new_tablet, + RowsetSharedPtr* rowset); bool _external_sorting( - std::vector& src_segment_group_arr, - SegmentGroup* segment_group); + std::vector& src_rowsets, + RowsetWriterSharedPtr rowset_writer, + TabletSharedPtr new_tablet); - OLAPTablePtr _olap_table; const RowBlockChanger& _row_block_changer; size_t _memory_limitation; Version _temp_delta_versions; @@ -256,39 +252,13 @@ class SchemaChangeHandler { SchemaChangeHandler() {} virtual ~SchemaChangeHandler() {} - OLAPStatus process_alter_table(AlterTabletType alter_table_type, + OLAPStatus process_alter_tablet(AlterTabletType alter_tablet_type, const TAlterTabletReq& request); - OLAPStatus schema_version_convert(OLAPTablePtr ref_olap_table, - OLAPTablePtr new_olap_table, - std::vector* ref_segment_groups, - std::vector* new_segment_groups); - - // 清空一个table下的schema_change信息:包括split_talbe以及其他schema_change信息 - // 这里只清理自身的out链,不考虑related的table - // NOTE 需要外部lock header - // Params: - // alter_table_type - // 为NULL时,同时检查table_split和其他普通schema_change - // 否则只检查指定type的信息 - // only_one: - // 为true时:如果其out链只有一个,且可删除,才可能进行clear - // 为false时:如果发现有大于1个out链,不管是否可删除,都不进行删除 - // check_only: - // 检查通过也不删除schema - // Returns: - // 成功:有的都可以清理(没有就直接跳过) - // 失败:如果有信息但不能清理(有version没完成),或不符合only_one条件 - static OLAPStatus clear_schema_change_single_info(TTabletId tablet_id, - SchemaHash schema_hash, - AlterTabletType* alter_table_type, - bool only_one, - bool check_only); - - static OLAPStatus clear_schema_change_single_info(OLAPTablePtr olap_table, - AlterTabletType* alter_table_type, - bool only_one, - bool check_only); + OLAPStatus schema_version_convert(TabletSharedPtr base_tablet, + TabletSharedPtr new_tablet, + RowsetSharedPtr* base_rowset, + RowsetSharedPtr* new_rowset); private: @@ -297,55 +267,44 @@ class SchemaChangeHandler { // Returns: // 成功:如果存在历史信息,没有问题的就清空;或者没有历史信息 // 失败:否则如果有历史信息且无法清空的(有version还没有完成) - OLAPStatus _check_and_clear_schema_change_info(OLAPTablePtr olap_table, + OLAPStatus _check_and_clear_schema_change_info(TabletSharedPtr tablet, const TAlterTabletReq& request); - OLAPStatus _get_versions_to_be_changed(OLAPTablePtr ref_olap_table, + OLAPStatus _get_versions_to_be_changed(TabletSharedPtr base_tablet, std::vector& versions_to_be_changed); - OLAPStatus _do_alter_table(AlterTabletType type, - OLAPTablePtr ref_olap_table, - const TAlterTabletReq& request); - struct SchemaChangeParams { - // 为了让calc_split_key也可使用普通schema_change的线程,才设置了此type - AlterTabletType alter_table_type; - OLAPTablePtr ref_olap_table; - OLAPTablePtr new_olap_table; - std::vector ref_olap_data_arr; - std::string debug_message; + AlterTabletType alter_tablet_type; + TabletSharedPtr base_tablet; + TabletSharedPtr new_tablet; + std::vector ref_rowset_readers; DeleteHandler delete_handler; - // TODO(zc): fuck me please, I don't add mutable here, but no where - mutable std::string user; - mutable std::string group; }; - // 根据给定的table_desc,创建OLAPTable,并挂接到OLAPEngine中 - OLAPStatus _create_new_olap_table(const OLAPTablePtr ref_olap_table, - const TCreateTabletReq& create_tablet_req, - const std::string* ref_root_path, - OLAPTablePtr* out_new_olap_table); + // add alter task to base_tablet and new_tablet. + // add A->(B|C|...) relation chain to all of them. + OLAPStatus _add_alter_task(AlterTabletType alter_tablet_type, + TabletSharedPtr base_tablet, + TabletSharedPtr new_tablet, + const std::vector& versions_to_be_changed); + OLAPStatus _save_alter_state(AlterTabletState state, + TabletSharedPtr base_tablet, + TabletSharedPtr new_tablet); - // 增加A->(B|C|...) 的schema_change信息 - // 在split table时,增加split-table status相关的信息 - // 其他的都增加在schema-change status中 - OLAPStatus _save_schema_change_info(AlterTabletType alter_table_type, - OLAPTablePtr ref_olap_table, - OLAPTablePtr new_olap_table, - const std::vector& versions_to_be_changed); + static OLAPStatus _convert_historical_rowsets(const SchemaChangeParams& sc_params); - static OLAPStatus _alter_table(SchemaChangeParams* sc_params); - - static OLAPStatus _parse_request(OLAPTablePtr ref_olap_table, - OLAPTablePtr new_olap_table, + static OLAPStatus _parse_request(TabletSharedPtr base_tablet, + TabletSharedPtr new_tablet, RowBlockChanger* rb_changer, - bool* sc_sorting, + bool* sc_sorting, bool* sc_directly); // 需要新建default_value时的初始化设置 static OLAPStatus _init_column_mapping(ColumnMapping* column_mapping, - const FieldInfo& column_schema, + const TabletColumn& column_schema, const std::string& value); +private: + RowsetReaderContext _reader_context; DISALLOW_COPY_AND_ASSIGN(SchemaChangeHandler); }; diff --git a/be/src/olap/segment_group.cpp b/be/src/olap/segment_group.cpp deleted file mode 100644 index 3fdd4d27a4152c..00000000000000 --- a/be/src/olap/segment_group.cpp +++ /dev/null @@ -1,674 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, -// software distributed under the License is distributed on an -// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -// KIND, either express or implied. See the License for the -// specific language governing permissions and limitations -// under the License. - -#include "olap/segment_group.h" - -#include -#include -#include -#include - -#include "olap/column_data.h" -#include "olap/olap_table.h" -#include "olap/row_block.h" -#include "olap/row_cursor.h" -#include "olap/utils.h" -#include "olap/column_mapping.h" - -using std::ifstream; -using std::string; -using std::vector; - -namespace doris { - -#define TABLE_PARAM_VALIDATE() \ - do { \ - if (!_index_loaded) { \ - OLAP_LOG_WARNING("fail to find, index is not loaded. [table=%ld schema_hash=%d]", \ - _table->tablet_id(), \ - _table->schema_hash()); \ - return OLAP_ERR_NOT_INITED; \ - } \ - } while (0); - -#define POS_PARAM_VALIDATE(pos) \ - do { \ - if (NULL == pos) { \ - OLAP_LOG_WARNING("fail to find, NULL position parameter."); \ - return OLAP_ERR_INPUT_PARAMETER_ERROR; \ - } \ - } while (0); - -#define SLICE_PARAM_VALIDATE(slice) \ - do { \ - if (NULL == slice) { \ - OLAP_LOG_WARNING("fail to find, NULL slice parameter."); \ - return OLAP_ERR_INPUT_PARAMETER_ERROR; \ - } \ - } while (0); - -SegmentGroup::SegmentGroup(OLAPTable* table, Version version, VersionHash version_hash, - bool delete_flag, int32_t segment_group_id, int32_t num_segments) - : _table(table), - _version(version), - _version_hash(version_hash), - _delete_flag(delete_flag), - _segment_group_id(segment_group_id), - _num_segments(num_segments) { - _index_loaded = false; - _ref_count = 0; - _is_pending = false; - _partition_id = 0; - _transaction_id = 0; - _short_key_length = 0; - _new_short_key_length = 0; - _short_key_buf = nullptr; - _file_created = false; - _new_segment_created = false; - _empty = false; - - const RowFields& tablet_schema = _table->tablet_schema(); - for (size_t i = 0; i < _table->num_short_key_fields(); ++i) { - _short_key_info_list.push_back(tablet_schema[i]); - _short_key_length += tablet_schema[i].index_length + 1;// 1 for null byte - if (tablet_schema[i].type == OLAP_FIELD_TYPE_CHAR || - tablet_schema[i].type == OLAP_FIELD_TYPE_VARCHAR) { - _new_short_key_length += sizeof(Slice) + 1; - } else { - _new_short_key_length += tablet_schema[i].index_length + 1; - } - } -} - -SegmentGroup::SegmentGroup(OLAPTable* table, bool delete_flag, - int32_t segment_group_id, int32_t num_segments, bool is_pending, - TPartitionId partition_id, TTransactionId transaction_id) - : _table(table), _delete_flag(delete_flag), - _segment_group_id(segment_group_id), _num_segments(num_segments), - _is_pending(is_pending), _partition_id(partition_id), - _transaction_id(transaction_id) -{ - _version = {-1, -1}; - _version_hash = 0; - _load_id.set_hi(0); - _load_id.set_lo(0); - _index_loaded = false; - _ref_count = 0; - _short_key_length = 0; - _new_short_key_length = 0; - _short_key_buf = NULL; - _file_created = false; - _new_segment_created = false; - _empty = false; - - const RowFields& tablet_schema = _table->tablet_schema(); - for (size_t i = 0; i < _table->num_short_key_fields(); ++i) { - _short_key_info_list.push_back(tablet_schema[i]); - _short_key_length += tablet_schema[i].index_length + 1;// 1 for null byte - if (tablet_schema[i].type == OLAP_FIELD_TYPE_CHAR || - tablet_schema[i].type == OLAP_FIELD_TYPE_VARCHAR) { - _new_short_key_length += sizeof(Slice) + 1; - } else { - _new_short_key_length += tablet_schema[i].index_length + 1; - } - } -} - -SegmentGroup::~SegmentGroup() { - delete [] _short_key_buf; - _current_file_handler.close(); - - for (size_t i = 0; i < _column_statistics.size(); ++i) { - SAFE_DELETE(_column_statistics[i].first); - SAFE_DELETE(_column_statistics[i].second); - } - _seg_pb_map.clear(); -} - -string SegmentGroup::construct_index_file_path(int32_t segment_group_id, int32_t segment) const { - if (_is_pending) { - return _table->construct_pending_index_file_path(_transaction_id, _segment_group_id, segment); - } else { - return _table->construct_index_file_path(_version, _version_hash, _segment_group_id, segment); - } -} - -string SegmentGroup::construct_data_file_path(int32_t segment_group_id, int32_t segment) const { - if (_is_pending) { - return _table->construct_pending_data_file_path(_transaction_id, segment_group_id, segment); - } else { - return _table->construct_data_file_path(_version, _version_hash, segment_group_id, segment); - } -} - -void SegmentGroup::publish_version(Version version, VersionHash version_hash) { - _version = version; - _version_hash = version_hash; -} - -void SegmentGroup::acquire() { - atomic_inc(&_ref_count); -} - -int64_t SegmentGroup::ref_count() { - return _ref_count; -} - -void SegmentGroup::release() { - atomic_dec(&_ref_count); -} - -bool SegmentGroup::is_in_use() { - return _ref_count > 0; -} - -// you can not use SegmentGroup after delete_all_files(), or else unknown behavior occurs. -void SegmentGroup::delete_all_files() { - if (!_file_created) { return; } - for (uint32_t seg_id = 0; seg_id < _num_segments; ++seg_id) { - // get full path for one segment - string index_path = construct_index_file_path(_segment_group_id, seg_id); - string data_path = construct_data_file_path(_segment_group_id, seg_id); - - if (remove(index_path.c_str()) != 0) { - char errmsg[64]; - LOG(WARNING) << "fail to delete index file. [err='" << strerror_r(errno, errmsg, 64) - << "' path='" << index_path << "']"; - } - - if (remove(data_path.c_str()) != 0) { - char errmsg[64]; - LOG(WARNING) << "fail to delete data file. [err='" << strerror_r(errno, errmsg, 64) - << "' path='" << data_path << "']"; - } - } -} - - -OLAPStatus SegmentGroup::add_column_statistics_for_linked_schema_change( - const std::vector>& column_statistic_fields, - const SchemaMapping& schema_mapping) { - //When add rollup table, the base table index maybe empty - if (column_statistic_fields.size() == 0) { - return OLAP_SUCCESS; - } - - //1 for LinkedSchemaChange, the rollup table keys order is the same as base table - //2 when user add a new key column to base table, _table->num_key_fields() size will - // greater than _column_statistics size - int num_new_keys = 0; - for (size_t i = 0; i < _table->num_key_fields(); ++i) { - const FieldInfo& column_schema = _table->tablet_schema()[i]; - - WrapperField* first = WrapperField::create(column_schema); - DCHECK(first != NULL) << "failed to allocate memory for field: " << i; - - WrapperField* second = WrapperField::create(column_schema); - DCHECK(second != NULL) << "failed to allocate memory for field: " << i; - - //for new key column, use default value to fill into column_statistics - if (schema_mapping[i].ref_column == -1) { - num_new_keys++; - - first->copy(schema_mapping[i].default_value); - second->copy(schema_mapping[i].default_value); - } else { - first->copy(column_statistic_fields[i - num_new_keys].first); - second->copy(column_statistic_fields[i - num_new_keys].second); - } - - _column_statistics.push_back(std::make_pair(first, second)); - } - return OLAP_SUCCESS; -} - -OLAPStatus SegmentGroup::add_column_statistics( - const std::vector>& column_statistic_fields) { - DCHECK(column_statistic_fields.size() == _table->num_key_fields()); - for (size_t i = 0; i < column_statistic_fields.size(); ++i) { - WrapperField* first = WrapperField::create(_table->tablet_schema()[i]); - DCHECK(first != NULL) << "failed to allocate memory for field: " << i; - first->copy(column_statistic_fields[i].first); - - WrapperField* second = WrapperField::create(_table->tablet_schema()[i]); - DCHECK(second != NULL) << "failed to allocate memory for field: " << i; - second->copy(column_statistic_fields[i].second); - - _column_statistics.push_back(std::make_pair(first, second)); - } - return OLAP_SUCCESS; -} - -OLAPStatus SegmentGroup::add_column_statistics( - std::vector > &column_statistic_strings, - std::vector &null_vec) { - DCHECK(column_statistic_strings.size() == _table->num_key_fields()); - for (size_t i = 0; i < column_statistic_strings.size(); ++i) { - WrapperField* first = WrapperField::create(_table->tablet_schema()[i]); - DCHECK(first != NULL) << "failed to allocate memory for field: " << i ; - RETURN_NOT_OK(first->from_string(column_statistic_strings[i].first)); - if (null_vec[i]) { - //[min, max] -> [NULL, max] - first->set_null(); - } - WrapperField* second = WrapperField::create(_table->tablet_schema()[i]); - DCHECK(first != NULL) << "failed to allocate memory for field: " << i ; - RETURN_NOT_OK(second->from_string(column_statistic_strings[i].second)); - _column_statistics.push_back(std::make_pair(first, second)); - } - return OLAP_SUCCESS; -} - -OLAPStatus SegmentGroup::load() { - if (_empty) { - return OLAP_SUCCESS; - } - OLAPStatus res = OLAP_ERR_INDEX_LOAD_ERROR; - boost::lock_guard guard(_index_load_lock); - - if (_index_loaded) { - return OLAP_SUCCESS; - } - - if (_num_segments == 0) { - OLAP_LOG_WARNING("fail to load index, segments number is 0."); - return res; - } - - if (_index.init(_short_key_length, _new_short_key_length, - _table->num_short_key_fields(), &_short_key_info_list) != OLAP_SUCCESS) { - OLAP_LOG_WARNING("fail to create MemIndex. [num_segment=%d]", _num_segments); - return res; - } - - // for each segment - for (uint32_t seg_id = 0; seg_id < _num_segments; ++seg_id) { - if (COLUMN_ORIENTED_FILE == _table->data_file_type()) { - string seg_path = construct_data_file_path(_segment_group_id, seg_id); - if (OLAP_SUCCESS != (res = load_pb(seg_path.c_str(), seg_id))) { - LOG(WARNING) << "failed to load pb structures. [seg_path='" << seg_path << "']"; - _check_io_error(res); - return res; - } - } - - // get full path for one segment - string path = construct_index_file_path(_segment_group_id, seg_id); - if ((res = _index.load_segment(path.c_str(), &_current_num_rows_per_row_block)) - != OLAP_SUCCESS) { - LOG(WARNING) << "fail to load segment. [path='" << path << "']"; - _check_io_error(res); - return res; - } - } - - _delete_flag = _index.delete_flag(); - _index_loaded = true; - _file_created = true; - - return OLAP_SUCCESS; -} - -OLAPStatus SegmentGroup::load_pb(const char* file, uint32_t seg_id) { - OLAPStatus res = OLAP_SUCCESS; - - FileHeader seg_file_header; - FileHandler seg_file_handler; - res = seg_file_handler.open(file, O_RDONLY); - if (OLAP_SUCCESS != res) { - OLAP_LOG_WARNING("failed to open segment file. [err=%d, file=%s]", res, file); - return res; - } - - res = seg_file_header.unserialize(&seg_file_handler); - if (OLAP_SUCCESS != res) { - seg_file_handler.close(); - OLAP_LOG_WARNING("fail to unserialize header. [err=%d, path='%s']", res, file); - return res; - } - - _seg_pb_map[seg_id] = seg_file_header; - seg_file_handler.close(); - return OLAP_SUCCESS; -} - -bool SegmentGroup::index_loaded() { - return _index_loaded; -} - -OLAPStatus SegmentGroup::validate() { - if (_empty) { - return OLAP_SUCCESS; - } - - OLAPStatus res = OLAP_SUCCESS; - for (uint32_t seg_id = 0; seg_id < _num_segments; ++seg_id) { - FileHeader index_file_header; - FileHeader data_file_header; - - // get full path for one segment - string index_path = construct_index_file_path(_segment_group_id, seg_id); - string data_path = construct_data_file_path(_segment_group_id, seg_id); - - // 检查index文件头 - if ((res = index_file_header.validate(index_path)) != OLAP_SUCCESS) { - LOG(WARNING) << "validate index file error. [file='" << index_path << "']"; - _check_io_error(res); - return res; - } - - // 检查data文件头 - if ((res = data_file_header.validate(data_path)) != OLAP_SUCCESS) { - LOG(WARNING) << "validate data file error. [file='" << data_path << "']"; - _check_io_error(res); - return res; - } - } - - return OLAP_SUCCESS; -} - -OLAPStatus SegmentGroup::find_row_block(const RowCursor& key, - RowCursor* helper_cursor, - bool find_last, - RowBlockPosition* pos) const { - TABLE_PARAM_VALIDATE(); - POS_PARAM_VALIDATE(pos); - - // 将这部分逻辑从memindex移出来,这样可以复用find。 - OLAPIndexOffset offset = _index.find(key, helper_cursor, find_last); - if (offset.offset > 0) { - offset.offset = offset.offset - 1; - } else { - offset.offset = 0; - } - - if (find_last) { - OLAPIndexOffset next_offset = _index.next(offset); - if (!(next_offset == _index.end())) { - offset = next_offset; - } - } - - return _index.get_row_block_position(offset, pos); -} - -OLAPStatus SegmentGroup::find_short_key(const RowCursor& key, - RowCursor* helper_cursor, - bool find_last, - RowBlockPosition* pos) const { - TABLE_PARAM_VALIDATE(); - POS_PARAM_VALIDATE(pos); - - // 由于find会从前一个segment找起,如果前一个segment中恰好没有该key, - // 就用前移后移来移动segment的位置. - OLAPIndexOffset offset = _index.find(key, helper_cursor, find_last); - if (offset.offset > 0) { - offset.offset = offset.offset - 1; - - OLAPIndexOffset next_offset = _index.next(offset); - if (!(next_offset == _index.end())) { - offset = next_offset; - } - } - - VLOG(3) << "seg=" << offset.segment << ", offset=" << offset.offset; - return _index.get_row_block_position(offset, pos); -} - -OLAPStatus SegmentGroup::get_row_block_entry(const RowBlockPosition& pos, EntrySlice* entry) const { - TABLE_PARAM_VALIDATE(); - SLICE_PARAM_VALIDATE(entry); - - return _index.get_entry(_index.get_offset(pos), entry); -} - -OLAPStatus SegmentGroup::find_first_row_block(RowBlockPosition* position) const { - TABLE_PARAM_VALIDATE(); - POS_PARAM_VALIDATE(position); - - return _index.get_row_block_position(_index.find_first(), position); -} - -OLAPStatus SegmentGroup::find_last_row_block(RowBlockPosition* position) const { - TABLE_PARAM_VALIDATE(); - POS_PARAM_VALIDATE(position); - - return _index.get_row_block_position(_index.find_last(), position); -} - -OLAPStatus SegmentGroup::find_next_row_block(RowBlockPosition* pos, bool* eof) const { - TABLE_PARAM_VALIDATE(); - POS_PARAM_VALIDATE(pos); - POS_PARAM_VALIDATE(eof); - - OLAPIndexOffset current = _index.get_offset(*pos); - *eof = false; - - OLAPIndexOffset next = _index.next(current); - if (next == _index.end()) { - *eof = true; - return OLAP_ERR_INDEX_EOF; - } - - return _index.get_row_block_position(next, pos); -} - -OLAPStatus SegmentGroup::find_mid_point(const RowBlockPosition& low, - const RowBlockPosition& high, - RowBlockPosition* output, - uint32_t* dis) const { - *dis = compute_distance(low, high); - if (*dis >= _index.count()) { - return OLAP_ERR_INDEX_EOF; - } else { - *output = low; - if (advance_row_block(*dis / 2, output) != OLAP_SUCCESS) { - return OLAP_ERR_INDEX_EOF; - } - - return OLAP_SUCCESS; - } -} - -OLAPStatus SegmentGroup::find_prev_point( - const RowBlockPosition& current, RowBlockPosition* prev) const { - OLAPIndexOffset current_offset = _index.get_offset(current); - OLAPIndexOffset prev_offset = _index.prev(current_offset); - - return _index.get_row_block_position(prev_offset, prev); -} - -OLAPStatus SegmentGroup::advance_row_block(int64_t num_row_blocks, RowBlockPosition* position) const { - TABLE_PARAM_VALIDATE(); - POS_PARAM_VALIDATE(position); - - OLAPIndexOffset off = _index.get_offset(*position); - iterator_offset_t absolute_offset = _index.get_absolute_offset(off) + num_row_blocks; - if (absolute_offset >= _index.count()) { - return OLAP_ERR_INDEX_EOF; - } - - return _index.get_row_block_position(_index.get_relative_offset(absolute_offset), position); -} - -// PRECONDITION position1 < position2 -uint32_t SegmentGroup::compute_distance(const RowBlockPosition& position1, - const RowBlockPosition& position2) const { - iterator_offset_t offset1 = _index.get_absolute_offset(_index.get_offset(position1)); - iterator_offset_t offset2 = _index.get_absolute_offset(_index.get_offset(position2)); - - return offset2 > offset1 ? offset2 - offset1 : 0; -} - -OLAPStatus SegmentGroup::add_segment() { - // 打开文件 - ++_num_segments; - - OLAPIndexHeaderMessage* index_header = NULL; - // 构造Proto格式的Header - index_header = _file_header.mutable_message(); - index_header->set_start_version(_version.first); - index_header->set_end_version(_version.second); - index_header->set_cumulative_version_hash(_version_hash); - index_header->set_segment(_num_segments - 1); - index_header->set_num_rows_per_block(_table->num_rows_per_row_block()); - index_header->set_delete_flag(_delete_flag); - index_header->set_null_supported(true); - - // 分配一段存储short key的内存, 初始化index_row - if (_short_key_buf == NULL) { - _short_key_buf = new(std::nothrow) char[_short_key_length]; - if (_short_key_buf == NULL) { - OLAP_LOG_WARNING("malloc short_key_buf error."); - return OLAP_ERR_MALLOC_ERROR; - } - - if (_current_index_row.init(_table->tablet_schema()) != OLAP_SUCCESS) { - OLAP_LOG_WARNING("init _current_index_row fail."); - return OLAP_ERR_INIT_FAILED; - } - } - - // 初始化checksum - _checksum = ADLER32_INIT; - return OLAP_SUCCESS; -} - -OLAPStatus SegmentGroup::add_row_block(const RowBlock& row_block, const uint32_t data_offset) { - // get first row of the row_block to distill index item. - row_block.get_row(0, &_current_index_row); - return add_short_key(_current_index_row, data_offset); -} - -OLAPStatus SegmentGroup::add_short_key(const RowCursor& short_key, const uint32_t data_offset) { - OLAPStatus res = OLAP_SUCCESS; - if (!_new_segment_created) { - string file_path = construct_index_file_path(_segment_group_id, _num_segments - 1); - res = _current_file_handler.open_with_mode( - file_path.c_str(), O_CREAT | O_EXCL | O_WRONLY, S_IRUSR | S_IWUSR); - if (res != OLAP_SUCCESS) { - char errmsg[64]; - LOG(WARNING) << "can not create file. [file_path='" << file_path - << "' err='" << strerror_r(errno, errmsg, 64) << "']"; - _check_io_error(res); - return res; - } - _file_created = true; - _new_segment_created = true; - - // 准备FileHeader - if ((res = _file_header.prepare(&_current_file_handler)) != OLAP_SUCCESS) { - OLAP_LOG_WARNING("write file header error. [err=%m]"); - _check_io_error(res); - return res; - } - - // 跳过FileHeader - if (_current_file_handler.seek(_file_header.size(), SEEK_SET) == -1) { - OLAP_LOG_WARNING("lseek header file error. [err=%m]"); - res = OLAP_ERR_IO_ERROR; - _check_io_error(res); - return res; - } - } - - // 将short key的内容写入_short_key_buf - size_t offset = 0; - - //short_key.write_null_array(_short_key_buf); - //offset += short_key.get_num_null_byte(); - for (size_t i = 0; i < _short_key_info_list.size(); i++) { - short_key.write_index_by_index(i, _short_key_buf + offset); - offset += short_key.get_index_size(i) + 1; - } - - // 写入Short Key对应的数据 - if ((res = _current_file_handler.write(_short_key_buf, _short_key_length)) != OLAP_SUCCESS) { - OLAP_LOG_WARNING("write short key failed. [err=%m]"); - _check_io_error(res); - return res; - } - - // 写入对应的数据文件偏移量 - if ((res = _current_file_handler.write(&data_offset, sizeof(data_offset))) != OLAP_SUCCESS) { - OLAP_LOG_WARNING("write data_offset failed. [err=%m]"); - _check_io_error(res); - return res; - } - - _checksum = olap_adler32(_checksum, _short_key_buf, _short_key_length); - _checksum = olap_adler32(_checksum, - reinterpret_cast(&data_offset), - sizeof(data_offset)); - return OLAP_SUCCESS; -} - -OLAPStatus SegmentGroup::finalize_segment(uint32_t data_segment_size, int64_t num_rows) { - // 准备FileHeader - OLAPStatus res = OLAP_SUCCESS; - - int file_length = _current_file_handler.tell(); - if (file_length == -1) { - OLAP_LOG_WARNING("get file_length error. [err=%m]"); - _check_io_error(res); - return OLAP_ERR_IO_ERROR; - } - - _file_header.set_file_length(file_length); - _file_header.set_checksum(_checksum); - _file_header.mutable_extra()->data_length = data_segment_size; - _file_header.mutable_extra()->num_rows = num_rows; - - // 写入更新之后的FileHeader - if ((res = _file_header.serialize(&_current_file_handler)) != OLAP_SUCCESS) { - OLAP_LOG_WARNING("write file header error. [err=%m]"); - _check_io_error(res); - return res; - } - - VLOG(3) << "finalize_segment. file_name=" << _current_file_handler.file_name() - << ", file_length=" << file_length; - - if ((res = _current_file_handler.close()) != OLAP_SUCCESS) { - OLAP_LOG_WARNING("close file error. [err=%m]"); - _check_io_error(res); - return res; - } - - _new_segment_created = false; - return OLAP_SUCCESS; -} - -void SegmentGroup::sync() { - if (_current_file_handler.sync() == -1) { - OLAP_LOG_WARNING("fail to sync file.[err=%m]"); - _table->set_io_error(); - } -} - -void SegmentGroup::_check_io_error(OLAPStatus res) { - if (is_io_error(res)) { - _table->set_io_error(); - } -} - -uint64_t SegmentGroup::num_index_entries() const { - return _index.count(); -} - -} diff --git a/be/src/olap/snapshot_manager.cpp b/be/src/olap/snapshot_manager.cpp new file mode 100755 index 00000000000000..27ee1818be4ad5 --- /dev/null +++ b/be/src/olap/snapshot_manager.cpp @@ -0,0 +1,618 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +#include "olap/snapshot_manager.h" + +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include + +#include "olap/olap_snapshot_converter.h" +#include "olap/rowset/alpha_rowset.h" +#include "olap/rowset/alpha_rowset_writer.h" +#include "olap/rowset/rowset.h" +#include "olap/rowset/rowset_id_generator.h" +#include "olap/rowset/rowset_writer.h" + +using boost::filesystem::canonical; +using boost::filesystem::copy_file; +using boost::filesystem::copy_option; +using boost::filesystem::path; +using std::map; +using std::nothrow; +using std::set; +using std::string; +using std::stringstream; +using std::vector; +using std::list; + +namespace doris { + +SnapshotManager* SnapshotManager::_s_instance = nullptr; +std::mutex SnapshotManager::_mlock; + +SnapshotManager* SnapshotManager::instance() { + if (_s_instance == nullptr) { + std::lock_guard lock(_mlock); + if (_s_instance == nullptr) { + _s_instance = new SnapshotManager(); + } + } + return _s_instance; +} + +OLAPStatus SnapshotManager::make_snapshot( + const TSnapshotRequest& request, + string* snapshot_path) { + OLAPStatus res = OLAP_SUCCESS; + if (snapshot_path == nullptr) { + LOG(WARNING) << "output parameter cannot be NULL"; + return OLAP_ERR_INPUT_PARAMETER_ERROR; + } + + TabletSharedPtr ref_tablet = StorageEngine::instance()->tablet_manager()->get_tablet(request.tablet_id, request.schema_hash); + if (ref_tablet == nullptr) { + LOG(WARNING) << "failed to get tablet. tablet=" << request.tablet_id + << " schema_hash=" << request.schema_hash; + return OLAP_ERR_TABLE_NOT_FOUND; + } + + res = _create_snapshot_files(ref_tablet, request, snapshot_path, request.preferred_snapshot_version); + // if all nodes has been upgraded, it can be removed + if (request.__isset.missing_version && res == OLAP_SUCCESS) { + (const_cast(request)).__set_allow_incremental_clone(true); + } + + if (res != OLAP_SUCCESS) { + LOG(WARNING) << "failed to make snapshot. res=" << res + << " tablet=" << request.tablet_id + << " schema_hash=" << request.schema_hash; + return res; + } + + LOG(INFO) << "success to make snapshot. path=['" << snapshot_path << "']"; + return res; +} + +OLAPStatus SnapshotManager::release_snapshot(const string& snapshot_path) { + // 如果请求的snapshot_path位于root/snapshot文件夹下,则认为是合法的,可以删除 + // 否则认为是非法请求,返回错误结果 + auto stores = StorageEngine::instance()->get_stores(); + for (auto store : stores) { + path boost_root_path(store->path()); + string abs_path = canonical(boost_root_path).string(); + + if (snapshot_path.compare(0, abs_path.size(), abs_path) == 0 + && snapshot_path.compare(abs_path.size(), + SNAPSHOT_PREFIX.size(), SNAPSHOT_PREFIX) == 0) { + remove_all_dir(snapshot_path); + VLOG(3) << "success to release snapshot path. [path='" << snapshot_path << "']"; + + return OLAP_SUCCESS; + } + } + + LOG(WARNING) << "released snapshot path illegal. [path='" << snapshot_path << "']"; + return OLAP_ERR_CE_CMD_PARAMS_ERROR; +} + + +OLAPStatus SnapshotManager::convert_rowset_ids(DataDir& data_dir, const string& clone_dir, int64_t tablet_id, + const int32_t& schema_hash, TabletSharedPtr tablet) { + OLAPStatus res = OLAP_SUCCESS; + // check clone dir existed + if (!check_dir_existed(clone_dir)) { + res = OLAP_ERR_DIR_NOT_EXIST; + LOG(WARNING) << "clone dir not existed when convert rowsetids. clone_dir=" + << clone_dir; + return res; + } + + // load original tablet meta + string cloned_meta_file = clone_dir + "/" + std::to_string(tablet_id) + ".hdr"; + TabletMeta cloned_tablet_meta; + if ((res = cloned_tablet_meta.create_from_file(cloned_meta_file)) != OLAP_SUCCESS) { + LOG(WARNING) << "fail to load original tablet meta after clone. " + << ", cloned_meta_file=" << cloned_meta_file; + return res; + } + TabletMetaPB cloned_tablet_meta_pb; + res = cloned_tablet_meta.to_meta_pb(&cloned_tablet_meta_pb); + if (res != OLAP_SUCCESS) { + LOG(WARNING) << "fail to serialize tablet meta to pb object. " + << " , cloned_meta_file=" << cloned_meta_file; + return res; + } + + TabletMetaPB new_tablet_meta_pb; + new_tablet_meta_pb = cloned_tablet_meta_pb; + new_tablet_meta_pb.clear_rs_metas(); + new_tablet_meta_pb.clear_inc_rs_metas(); + // should modify tablet id and schema hash because in restore process the tablet id is not + // equal to tablet id in meta + new_tablet_meta_pb.set_tablet_id(tablet_id); + new_tablet_meta_pb.set_schema_hash(schema_hash); + TabletSchema tablet_schema; + RETURN_NOT_OK(tablet_schema.init_from_pb(new_tablet_meta_pb.schema())); + + RowsetId max_rowset_id = 0; + for (auto& visible_rowset : cloned_tablet_meta_pb.rs_metas()) { + if (visible_rowset.rowset_id() > max_rowset_id) { + max_rowset_id = visible_rowset.rowset_id(); + } + } + + for (auto& inc_rowset : cloned_tablet_meta_pb.inc_rs_metas()) { + if (inc_rowset.rowset_id() > max_rowset_id) { + max_rowset_id = inc_rowset.rowset_id(); + } + } + RowsetId next_rowset_id = 0; + if (tablet == nullptr) { + next_rowset_id = 10000; + } else { + RETURN_NOT_OK(tablet->next_rowset_id(&next_rowset_id)); + } + if (next_rowset_id <= max_rowset_id) { + next_rowset_id = max_rowset_id + 1; + if (tablet != nullptr) { + RETURN_NOT_OK(tablet->set_next_rowset_id(next_rowset_id)); + } + } + + std::unordered_map _rs_version_map; + for (auto& visible_rowset : cloned_tablet_meta_pb.rs_metas()) { + RowsetMetaPB* rowset_meta = new_tablet_meta_pb.add_rs_metas(); + RowsetId rowset_id = 0; + if (tablet != nullptr) { + RETURN_NOT_OK(tablet->next_rowset_id(&rowset_id)); + } else { + rowset_id = ++next_rowset_id; + } + RETURN_NOT_OK(_rename_rowset_id(visible_rowset, clone_dir, data_dir, tablet_schema, rowset_id, rowset_meta)); + rowset_meta->set_tablet_id(tablet_id); + rowset_meta->set_tablet_schema_hash(schema_hash); + Version rowset_version = {visible_rowset.start_version(), visible_rowset.end_version()}; + _rs_version_map[rowset_version] = rowset_meta; + } + + for (auto& inc_rowset : cloned_tablet_meta_pb.inc_rs_metas()) { + Version rowset_version = {inc_rowset.start_version(), inc_rowset.end_version()}; + auto exist_rs = _rs_version_map.find(rowset_version); + if (exist_rs != _rs_version_map.end()) { + RowsetMetaPB* rowset_meta = new_tablet_meta_pb.add_inc_rs_metas(); + *rowset_meta = *(exist_rs->second); + continue; + } + RowsetMetaPB* rowset_meta = new_tablet_meta_pb.add_inc_rs_metas(); + RowsetId rowset_id = 0; + if (tablet != nullptr) { + RETURN_NOT_OK(tablet->next_rowset_id(&rowset_id)); + } else { + rowset_id = ++next_rowset_id; + } + RETURN_NOT_OK(_rename_rowset_id(inc_rowset, clone_dir, data_dir, tablet_schema, rowset_id, rowset_meta)); + rowset_meta->set_tablet_id(tablet_id); + rowset_meta->set_tablet_schema_hash(schema_hash); + } + RowsetId new_next_rowset_id = 0; + if (tablet != nullptr) { + RETURN_NOT_OK(tablet->next_rowset_id(&new_next_rowset_id)); + } else { + new_next_rowset_id = next_rowset_id + 1; + } + new_tablet_meta_pb.set_end_rowset_id(new_next_rowset_id); + + res = TabletMeta::save(cloned_meta_file, new_tablet_meta_pb); + if (res != OLAP_SUCCESS) { + LOG(WARNING) << "fail to save converted tablet meta to dir='" << clone_dir; + return res; + } + + return OLAP_SUCCESS; +} + +OLAPStatus SnapshotManager::_rename_rowset_id(const RowsetMetaPB& rs_meta_pb, const string& new_path, + DataDir& data_dir, TabletSchema& tablet_schema, RowsetId& rowset_id, RowsetMetaPB* new_rs_meta_pb) { + OLAPStatus res = OLAP_SUCCESS; + RowsetMetaSharedPtr alpha_rowset_meta(new AlphaRowsetMeta()); + alpha_rowset_meta->init_from_pb(rs_meta_pb); + RowsetSharedPtr org_rowset(new AlphaRowset(&tablet_schema, new_path, &data_dir, alpha_rowset_meta)); + RETURN_NOT_OK(org_rowset->init()); + RETURN_NOT_OK(org_rowset->load()); + RowsetMetaSharedPtr org_rowset_meta = org_rowset->rowset_meta(); + RowsetWriterContext context; + context.rowset_id = rowset_id; + context.tablet_id = org_rowset_meta->tablet_id(); + context.partition_id = org_rowset_meta->partition_id(); + context.tablet_schema_hash = org_rowset_meta->tablet_schema_hash(); + context.rowset_type = org_rowset_meta->rowset_type(); + context.rowset_path_prefix = new_path; + context.tablet_schema = &tablet_schema; + context.rowset_state = org_rowset_meta->rowset_state(); + context.data_dir = &data_dir; + context.version = org_rowset_meta->version(); + context.version_hash = org_rowset_meta->version_hash(); + RowsetWriterSharedPtr rs_writer(new AlphaRowsetWriter()); + if (rs_writer == nullptr) { + LOG(WARNING) << "fail to new rowset."; + return OLAP_ERR_MALLOC_ERROR; + } + rs_writer->init(context); + res = rs_writer->add_rowset(org_rowset); + if (res != OLAP_SUCCESS) { + LOG(WARNING) << "failed to add rowset " + << " id = " << org_rowset->rowset_id() + << " to rowset " << rowset_id; + return res; + } + RowsetSharedPtr new_rowset = rs_writer->build(); + if (new_rowset == nullptr) { + LOG(WARNING) << "failed to build rowset when rename rowset id"; + return OLAP_ERR_MALLOC_ERROR; + } + RETURN_NOT_OK(new_rowset->init()); + RETURN_NOT_OK(new_rowset->load()); + new_rowset->rowset_meta()->to_rowset_pb(new_rs_meta_pb); + org_rowset->remove(); + return OLAP_SUCCESS; +} + +OLAPStatus SnapshotManager::_calc_snapshot_id_path( + const TabletSharedPtr& tablet, + string* out_path) { + OLAPStatus res = OLAP_SUCCESS; + if (out_path == nullptr) { + LOG(WARNING) << "output parameter cannot be NULL"; + return OLAP_ERR_INPUT_PARAMETER_ERROR; + } + + // get current timestamp string + string time_str; + if ((res = gen_timestamp_string(&time_str)) != OLAP_SUCCESS) { + LOG(WARNING) << "failed to generate time_string when move file to trash." + << "err code=" << res; + return res; + } + + stringstream snapshot_id_path_stream; + MutexLock auto_lock(&_snapshot_mutex); // will automatically unlock when function return. + snapshot_id_path_stream << tablet->data_dir()->path() << SNAPSHOT_PREFIX + << "/" << time_str << "." << _snapshot_base_id++; + *out_path = snapshot_id_path_stream.str(); + return res; +} + +string SnapshotManager::get_schema_hash_full_path( + const TabletSharedPtr& ref_tablet, + const string& location) const { + stringstream schema_full_path_stream; + schema_full_path_stream << location + << "/" << ref_tablet->tablet_id() + << "/" << ref_tablet->schema_hash(); + string schema_full_path = schema_full_path_stream.str(); + + return schema_full_path; +} + +string SnapshotManager::_get_header_full_path( + const TabletSharedPtr& ref_tablet, + const std::string& schema_hash_path) const { + stringstream header_name_stream; + header_name_stream << schema_hash_path << "/" << ref_tablet->tablet_id() << ".hdr"; + return header_name_stream.str(); +} + +OLAPStatus SnapshotManager::_link_index_and_data_files( + const string& schema_hash_path, + const TabletSharedPtr& ref_tablet, + const std::vector& consistent_rowsets) { + OLAPStatus res = OLAP_SUCCESS; + for (auto& rs : consistent_rowsets) { + std::vector success_files; + RETURN_NOT_OK(rs->make_snapshot(schema_hash_path, &success_files)); + } + return res; +} + +OLAPStatus SnapshotManager::_create_snapshot_files( + const TabletSharedPtr& ref_tablet, + const TSnapshotRequest& request, + string* snapshot_path, + int32_t snapshot_version) { + + LOG(INFO) << "receive a make snapshot request," + << " request detail is " << apache::thrift::ThriftDebugString(request) + << " snapshot_path is " << *snapshot_path + << " snapshot_version is " << snapshot_version; + OLAPStatus res = OLAP_SUCCESS; + if (snapshot_path == nullptr) { + LOG(WARNING) << "output parameter cannot be NULL"; + return OLAP_ERR_INPUT_PARAMETER_ERROR; + } + + string snapshot_id_path; + res = _calc_snapshot_id_path(ref_tablet, &snapshot_id_path); + if (res != OLAP_SUCCESS) { + LOG(WARNING) << "failed to calc snapshot_id_path, ref tablet=" + << ref_tablet->data_dir()->path(); + return res; + } + + string schema_full_path = get_schema_hash_full_path( + ref_tablet, snapshot_id_path); + string header_path = _get_header_full_path(ref_tablet, schema_full_path); + if (check_dir_existed(schema_full_path)) { + VLOG(10) << "remove the old schema_full_path."; + remove_all_dir(schema_full_path); + } + create_dirs(schema_full_path); + path boost_path(snapshot_id_path); + string snapshot_id = canonical(boost_path).string(); + do { + DataDir* data_dir = ref_tablet->data_dir(); + TabletMetaSharedPtr new_tablet_meta(new (nothrow) TabletMeta()); + if (new_tablet_meta == nullptr) { + LOG(WARNING) << "fail to malloc TabletMeta."; + res = OLAP_ERR_MALLOC_ERROR; + break; + } + vector consistent_rowsets; + if (request.__isset.missing_version) { + ReadLock rdlock(ref_tablet->get_header_lock_ptr()); + for (int64_t missed_version : request.missing_version) { + Version version = { missed_version, missed_version }; + const RowsetSharedPtr rowset = ref_tablet->get_rowset_by_version(version); + if (rowset != nullptr) { + consistent_rowsets.push_back(rowset); + } else { + LOG(WARNING) << "failed to find missed version when snapshot. " + << " tablet=" << request.tablet_id + << " schema_hash=" << request.schema_hash + << " version=" << version.first << "-" << version.second; + res = OLAP_ERR_VERSION_NOT_EXIST; + break; + } + } + if (res != OLAP_SUCCESS) { + break; + } + res = TabletMetaManager::get_meta(data_dir, ref_tablet->tablet_id(), + ref_tablet->schema_hash(), new_tablet_meta); + if (res != OLAP_SUCCESS) { + LOG(WARNING) << "fail to load header. res=" << res + << " tablet_id=" << ref_tablet->tablet_id() + << " schema_hash=" << ref_tablet->schema_hash(); + break; + } + } else { + ReadLock rdlock(ref_tablet->get_header_lock_ptr()); + // get latest version + const RowsetSharedPtr lastest_version = ref_tablet->rowset_with_max_version(); + if (lastest_version == nullptr) { + LOG(WARNING) << "tablet has not any version. path=" + << ref_tablet->full_name().c_str(); + res = OLAP_ERR_VERSION_NOT_EXIST; + break; + } + // get snapshot version, use request.version if specified + int32_t version = lastest_version->end_version(); + if (request.__isset.version) { + if (lastest_version->end_version() < request.version + || (lastest_version->start_version() == lastest_version->end_version() + && lastest_version->end_version() == request.version + && lastest_version->version_hash() != request.version_hash)) { + LOG(WARNING) << "invalid make snapshot request. " + << " version=" << lastest_version->end_version() + << " version_hash=" << lastest_version->version_hash() + << " req_version=" << request.version + << " req_version_hash=" << request.version_hash; + res = OLAP_ERR_INPUT_PARAMETER_ERROR; + break; + } + version = request.version; + } + // get shortest version path + // it very important!!!! + // it means 0-version has to be a readable version graph + res = ref_tablet->capture_consistent_rowsets(Version(0, version), &consistent_rowsets); + if (res != OLAP_SUCCESS) { + LOG(WARNING) << "fail to select versions to span. res=" << res; + break; + } + + res = TabletMetaManager::get_meta(data_dir, ref_tablet->tablet_id(), + ref_tablet->schema_hash(), new_tablet_meta); + if (res != OLAP_SUCCESS) { + LOG(WARNING) << "fail to load header. res=" << res + << " tablet_id=" << ref_tablet->tablet_id() + << " schema_hash=" << ref_tablet->schema_hash(); + break; + } + } + + vector rs_metas; + for (auto& rs : consistent_rowsets) { + std::vector success_files; + res = rs->make_snapshot(schema_full_path, &success_files); + if (res != OLAP_SUCCESS) { break; } + rs_metas.push_back(rs->rowset_meta()); + VLOG(3) << "add rowset meta to clone list. " + << " start version " << rs->rowset_meta()->start_version() + << " end version " << rs->rowset_meta()->end_version() + << " empty " << rs->rowset_meta()->empty(); + } + if (res != OLAP_SUCCESS) { + LOG(WARNING) << "fail to create hard link. [path=" << snapshot_id_path << "]"; + break; + } + + // clear alter task info in snapshot files + new_tablet_meta->delete_alter_task(); + + if (request.__isset.missing_version) { + new_tablet_meta->revise_inc_rs_metas(rs_metas); + vector empty_rowsets; + new_tablet_meta->revise_rs_metas(empty_rowsets); + } else { + // If this is a full clone, then should clear inc rowset metas because + // related files is not created + vector empty_rowsets; + new_tablet_meta->revise_inc_rs_metas(empty_rowsets); + new_tablet_meta->revise_rs_metas(rs_metas); + } + if (snapshot_version < PREFERRED_SNAPSHOT_VERSION) { + set exist_old_files; + if ((res = dir_walk(schema_full_path, nullptr, &exist_old_files)) != OLAP_SUCCESS) { + LOG(WARNING) << "failed to dir walk when convert old files. dir=" + << schema_full_path; + break; + } + OlapSnapshotConverter converter; + TabletMetaPB tablet_meta_pb; + OLAPHeaderMessage olap_header_msg; + new_tablet_meta->to_meta_pb(&tablet_meta_pb); + res = converter.to_old_snapshot(tablet_meta_pb, schema_full_path, schema_full_path, &olap_header_msg); + if (res != OLAP_SUCCESS) { + break; + } + // convert new version files to old version files successuflly, then should remove the old files + vector files_to_delete; + for (auto file_name : exist_old_files) { + string full_file_path = schema_full_path + "/" + file_name; + files_to_delete.push_back(full_file_path); + } + // remove all files + res = remove_files(files_to_delete); + if (res != OLAP_SUCCESS) { + break; + } + // save new header to snapshot header path + res = converter.save(header_path, olap_header_msg); + LOG(INFO) << "finished convert new snapshot to old snapshot, res=" << res; + } else { + res = new_tablet_meta->save(header_path); + } + if (res != OLAP_SUCCESS) { + break; + } + + // append a single delta if request.version is end_version of cumulative delta + if (request.__isset.version) { + for (auto& rs : consistent_rowsets) { + if (rs->end_version() == request.version) { + if (rs->start_version() != request.version) { + // visible version in fe is 900 + // A need to clone 900 from B, but B's last version is 901, and 901 is not a visible version + // and 901 will be reverted + // since 900 is not the last version in B, 900 maybe compacted with other versions + // if A only get 900, then A's last version will be a comulative delta + // many codes in be assumes that the last version is a single delta + // both clone and backup restore depend on this logic + // TODO (yiguolei) fix it in the future + // res = _append_single_delta(request, data_dir); + if (res != OLAP_SUCCESS) { + LOG(WARNING) << "fail to append single delta. res=" << res; + } + } + break; + } + } + } + } while (0); + + if (res != OLAP_SUCCESS) { + LOG(WARNING) << "fail to make snapshot, try to delete the snapshot path. path=" + << snapshot_id_path.c_str(); + + if (check_dir_existed(snapshot_id_path)) { + VLOG(3) << "remove snapshot path. [path=" << snapshot_id_path << "]"; + remove_all_dir(snapshot_id_path); + } + } else { + *snapshot_path = snapshot_id; + } + + return res; +} + +OLAPStatus SnapshotManager::_append_single_delta( + const TSnapshotRequest& request, DataDir* store) { + OLAPStatus res = OLAP_SUCCESS; + string root_path = store->path(); + TabletMetaSharedPtr new_tablet_meta(new (nothrow) TabletMeta()); + if (new_tablet_meta == nullptr) { + LOG(WARNING) << "fail to malloc TabletMeta."; + return OLAP_ERR_MALLOC_ERROR; + } + + res = TabletMetaManager::get_meta(store, request.tablet_id, request.schema_hash, new_tablet_meta); + if (res != OLAP_SUCCESS) { + LOG(WARNING) << "fail to create tablet from header file. " + << " tablet_id=" << request.tablet_id + << " schema_hash=" << request.schema_hash; + return res; + } + auto tablet = Tablet::create_tablet_from_meta(new_tablet_meta, store); + if (tablet == nullptr) { + LOG(WARNING) << "fail to load tablet. " + << " res=" << res + << " tablet_id=" << request.tablet_id + << " schema_hash=" << request.schema_hash; + return OLAP_ERR_INPUT_PARAMETER_ERROR; + } + + res = tablet->init(); + if (res != OLAP_SUCCESS) { + LOG(WARNING) << "fail to load tablet. [res=" << res + << " header_path=" << store->path(); + return res; + } + + const RowsetSharedPtr lastest_version = tablet->rowset_with_max_version(); + if (lastest_version->start_version() != request.version) { + TPushReq empty_push; + empty_push.tablet_id = request.tablet_id; + empty_push.schema_hash = request.schema_hash; + empty_push.version = request.version + 1; + empty_push.version_hash = 0; + + PushHandler handler; + // res = handler.process(tablet, empty_push, PUSH_NORMAL, NULL); + // TODO (yiguolei) should create a empty version, call create new rowset meta and set version + // just return success to skip push a empty rowset into the snapshot since has alreay removed + // batch process code from push handler + res = OLAP_SUCCESS; + if (res != OLAP_SUCCESS) { + LOG(WARNING) << "fail to push empty version. " + << " res=" << res + << " version=" << empty_push.version; + return res; + } + } + return res; +} + +} // namespace doris diff --git a/be/src/olap/snapshot_manager.h b/be/src/olap/snapshot_manager.h new file mode 100644 index 00000000000000..ef8131cda77c4f --- /dev/null +++ b/be/src/olap/snapshot_manager.h @@ -0,0 +1,115 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +#ifndef DORIS_BE_SRC_OLAP_SNAPSHOT_MANAGER_H +#define DORIS_BE_SRC_OLAP_SNAPSHOT_MANAGER_H + +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "common/status.h" +#include "olap/field.h" +#include "olap/olap_common.h" +#include "olap/rowset/column_data.h" +#include "olap/olap_define.h" +#include "olap/tablet.h" +#include "olap/tablet_meta_manager.h" +#include "olap/push_handler.h" +#include "olap/data_dir.h" +#include "util/file_utils.h" +#include "util/doris_metrics.h" + +namespace doris { + +class SnapshotManager { + +public: + ~SnapshotManager() {} + // @brief 创建snapshot + // @param tablet_id [in] 原表的id + // @param schema_hash [in] 原表的schema,与tablet_id参数合起来唯一确定一张表 + // @param snapshot_path [out] 新生成的snapshot的路径 + OLAPStatus make_snapshot( + const TSnapshotRequest& request, + std::string* snapshot_path); + + std::string get_schema_hash_full_path( + const TabletSharedPtr& ref_tablet, + const std::string& location) const; + + // @brief 释放snapshot + // @param snapshot_path [in] 要被释放的snapshot的路径,只包含到ID + OLAPStatus release_snapshot(const std::string& snapshot_path); + + static SnapshotManager* instance(); + + OLAPStatus convert_rowset_ids(DataDir& data_dir, const string& clone_dir, int64_t tablet_id, + const int32_t& schema_hash, TabletSharedPtr tablet); + +private: + SnapshotManager() + : _snapshot_base_id(0) {} + + OLAPStatus _calc_snapshot_id_path( + const TabletSharedPtr& tablet, + std::string* out_path); + + std::string _get_header_full_path( + const TabletSharedPtr& ref_tablet, + const std::string& schema_hash_path) const; + + OLAPStatus _link_index_and_data_files( + const std::string& header_path, + const TabletSharedPtr& ref_tablet, + const std::vector& consistent_rowsets); + + OLAPStatus _create_snapshot_files( + const TabletSharedPtr& ref_tablet, + const TSnapshotRequest& request, + std::string* snapshot_path, + int32_t snapshot_version); + + OLAPStatus _prepare_snapshot_dir(const TabletSharedPtr& ref_tablet, + std::string* snapshot_id_path); + + OLAPStatus _append_single_delta( + const TSnapshotRequest& request, + DataDir* store); + + OLAPStatus _rename_rowset_id(const RowsetMetaPB& rs_meta_pb, const string& new_path, + DataDir& data_dir, TabletSchema& tablet_schema, RowsetId& next_id, RowsetMetaPB* new_rs_meta_pb); + +private: + static SnapshotManager* _s_instance; + static std::mutex _mlock; + + + // snapshot + Mutex _snapshot_mutex; + uint64_t _snapshot_base_id; +}; // SnapshotManager + +} // doris + +#endif diff --git a/be/src/olap/storage_engine.cpp b/be/src/olap/storage_engine.cpp new file mode 100644 index 00000000000000..c0e74945743619 --- /dev/null +++ b/be/src/olap/storage_engine.cpp @@ -0,0 +1,1005 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +#include "olap/storage_engine.h" + +#include + +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include +#include +#include + +#include "olap/base_compaction.h" +#include "olap/cumulative_compaction.h" +#include "olap/lru_cache.h" +#include "olap/tablet_meta.h" +#include "olap/tablet_meta_manager.h" +#include "olap/push_handler.h" +#include "olap/reader.h" +#include "olap/rowset/rowset_meta_manager.h" +#include "olap/rowset/alpha_rowset.h" +#include "olap/rowset_factory.h" +#include "olap/schema_change.h" +#include "olap/data_dir.h" +#include "olap/utils.h" +#include "olap/rowset/alpha_rowset_meta.h" +#include "olap/rowset/column_data_writer.h" +#include "olap/olap_snapshot_converter.h" +#include "util/time.h" +#include "util/doris_metrics.h" +#include "util/pretty_printer.h" +#include "util/file_utils.h" +#include "agent/cgroups_mgr.h" + +using apache::thrift::ThriftDebugString; +using boost::filesystem::canonical; +using boost::filesystem::directory_iterator; +using boost::filesystem::path; +using boost::filesystem::recursive_directory_iterator; +using std::back_inserter; +using std::copy; +using std::inserter; +using std::list; +using std::map; +using std::nothrow; +using std::pair; +using std::priority_queue; +using std::set; +using std::set_difference; +using std::string; +using std::stringstream; +using std::vector; + +namespace doris { + +StorageEngine* StorageEngine::_s_instance = nullptr; + +static Status _validate_options(const EngineOptions& options) { + if (options.store_paths.empty()) { + return Status::InternalError("store paths is empty");; + } + return Status::OK(); +} + +Status StorageEngine::open(const EngineOptions& options, StorageEngine** engine_ptr) { + RETURN_IF_ERROR(_validate_options(options)); + std::unique_ptr engine(new StorageEngine(options)); + auto st = engine->open(); + if (st != OLAP_SUCCESS) { + LOG(WARNING) << "engine open failed, res=" << st; + return Status::InternalError("open engine failed"); + } + st = engine->_start_bg_worker(); + if (st != OLAP_SUCCESS) { + LOG(WARNING) << "engine start background failed, res=" << st; + return Status::InternalError("open engine failed"); + } + *engine_ptr = engine.release(); + return Status::OK(); +} + +StorageEngine::StorageEngine(const EngineOptions& options) + : _options(options), + _available_storage_medium_type_count(0), + _effective_cluster_id(-1), + _is_all_cluster_id_exist(true), + _is_drop_tables(false), + _index_stream_lru_cache(NULL), + _is_report_disk_state_already(false), + _is_report_tablet_already(false), + _tablet_manager(new TabletManager()), + _txn_manager(new TxnManager()) { + if (_s_instance == nullptr) { + _s_instance = this; + } +} + +StorageEngine::~StorageEngine() { + clear(); +} + +void StorageEngine::load_data_dirs(const std::vector& data_dirs) { + std::vector threads; + for (auto data_dir : data_dirs) { + threads.emplace_back([data_dir] { + auto res = data_dir->load(); + if (res != OLAP_SUCCESS) { + LOG(WARNING) << "io error when init load tables. res=" << res + << ", data dir=" << data_dir->path(); + } + }); + } + for (auto& thread : threads) { + thread.join(); + } + + // check whether all data dir convert successfully + for (auto data_dir : data_dirs) { + if (!data_dir->convert_old_data_success()) { + // if any dir convert failed, exit the process + LOG(FATAL) << "dir = " << data_dir->path() << "convert failed"; + } + } + + std::vector clean_old_file_threads; + for (auto data_dir : data_dirs) { + clean_old_file_threads.emplace_back([data_dir] { + data_dir->set_convert_finished(); + auto res = data_dir->remove_old_meta_and_files(); + if (res != OLAP_SUCCESS) { + LOG(WARNING) << "failed to clean old files dir = " << data_dir->path() + << " res = " << res; + } + }); + } + + for (auto& thread : clean_old_file_threads) { + thread.detach(); + } +} + +OLAPStatus StorageEngine::open() { + // init store_map + for (auto& path : _options.store_paths) { + DataDir* store = new DataDir(path.path, path.capacity_bytes, + _tablet_manager.get(), _txn_manager.get()); + auto st = store->init(); + if (!st.ok()) { + LOG(WARNING) << "Store load failed, path=" << path.path; + return OLAP_ERR_INVALID_ROOT_PATH; + } + _store_map.emplace(path.path, store); + } + _effective_cluster_id = config::cluster_id; + auto res = check_all_root_path_cluster_id(); + if (res != OLAP_SUCCESS) { + LOG(WARNING) << "fail to check cluster info. res=" << res; + return res; + } + + _update_storage_medium_type_count(); + + auto cache = new_lru_cache(config::file_descriptor_cache_capacity); + if (cache == nullptr) { + LOG(WARNING) << "failed to init file descriptor LRUCache"; + _tablet_manager->clear(); + return OLAP_ERR_INIT_FAILED; + } + FileHandler::set_fd_cache(cache); + + // 初始化LRUCache + // cache大小可通过配置文件配置 + _index_stream_lru_cache = new_lru_cache(config::index_stream_cache_capacity); + if (_index_stream_lru_cache == NULL) { + LOG(WARNING) << "failed to init index stream LRUCache"; + _tablet_manager->clear(); + return OLAP_ERR_INIT_FAILED; + } + + // 初始化CE调度器 + int32_t cumulative_compaction_num_threads = config::cumulative_compaction_num_threads; + int32_t base_compaction_num_threads = config::base_compaction_num_threads; + uint32_t file_system_num = get_file_system_count(); + _max_cumulative_compaction_task_per_disk = (cumulative_compaction_num_threads + file_system_num - 1) / file_system_num; + _max_base_compaction_task_per_disk = (base_compaction_num_threads + file_system_num - 1) / file_system_num; + + auto dirs = get_stores(); + load_data_dirs(dirs); + // 取消未完成的SchemaChange任务 + _tablet_manager->cancel_unfinished_schema_change(); + + return OLAP_SUCCESS; +} + +void StorageEngine::_update_storage_medium_type_count() { + set available_storage_medium_types; + + std::lock_guard l(_store_lock); + for (auto& it : _store_map) { + if (it.second->is_used()) { + available_storage_medium_types.insert(it.second->storage_medium()); + } + } + + _available_storage_medium_type_count = available_storage_medium_types.size(); + if (_tablet_manager != nullptr) { + _tablet_manager->update_storage_medium_type_count(_available_storage_medium_type_count); + } +} + + +OLAPStatus StorageEngine::_judge_and_update_effective_cluster_id(int32_t cluster_id) { + OLAPStatus res = OLAP_SUCCESS; + + if (cluster_id == -1 && _effective_cluster_id == -1) { + // maybe this is a new cluster, cluster id will get from heartbeate + return res; + } else if (cluster_id != -1 && _effective_cluster_id == -1) { + _effective_cluster_id = cluster_id; + } else if (cluster_id == -1 && _effective_cluster_id != -1) { + // _effective_cluster_id is the right effective cluster id + return res; + } else { + if (cluster_id != _effective_cluster_id) { + LOG(WARNING) << "multiple cluster ids is not equal. id1=" << _effective_cluster_id + << " id2=" << cluster_id; + return OLAP_ERR_INVALID_CLUSTER_INFO; + } + } + + return res; +} + +void StorageEngine::set_store_used_flag(const string& path, bool is_used) { + std::lock_guard l(_store_lock); + auto it = _store_map.find(path); + if (it == _store_map.end()) { + LOG(WARNING) << "store not exist, path=" << path; + } + + it->second->set_is_used(is_used); + _update_storage_medium_type_count(); +} + +void StorageEngine::get_all_available_root_path(std::vector* available_paths) { + available_paths->clear(); + std::lock_guard l(_store_lock); + for (auto& it : _store_map) { + if (it.second->is_used()) { + available_paths->push_back(it.first); + } + } +} + +template +std::vector StorageEngine::get_stores() { + std::vector stores; + stores.reserve(_store_map.size()); + + std::lock_guard l(_store_lock); + if (include_unused) { + for (auto& it : _store_map) { + stores.push_back(it.second); + } + } else { + for (auto& it : _store_map) { + if (it.second->is_used()) { + stores.push_back(it.second); + } + } + } + return stores; +} + +template std::vector StorageEngine::get_stores(); +template std::vector StorageEngine::get_stores(); + +OLAPStatus StorageEngine::get_all_data_dir_info(vector* data_dir_infos) { + OLAPStatus res = OLAP_SUCCESS; + data_dir_infos->clear(); + + MonotonicStopWatch timer; + timer.start(); + int tablet_counter = 0; + + // get all root path info and construct a path map. + // path -> DataDirInfo + std::map path_map; + { + std::lock_guard l(_store_lock); + for (auto& it : _store_map) { + std::string path = it.first; + path_map.emplace(path, it.second->get_dir_info()); + // if this path is not used, init it's info + if (!path_map[path].is_used) { + path_map[path].capacity = 1; + path_map[path].data_used_capacity = 0; + path_map[path].available = 0; + path_map[path].storage_medium = TStorageMedium::HDD; + } else { + path_map[path].storage_medium = it.second->storage_medium(); + } + } + } + + // for each tablet, get it's data size, and accumulate the path 'data_used_capacity' + // which the tablet belongs to. + _tablet_manager->update_root_path_info(&path_map, &tablet_counter); + + // add path info to data_dir_infos + for (auto& entry : path_map) { + data_dir_infos->emplace_back(entry.second); + } + + // get available capacity of each path + for (auto& info: *data_dir_infos) { + if (info.is_used) { + _get_path_available_capacity(info.path, &info.available); + } + } + timer.stop(); + LOG(INFO) << "get root path info cost: " << timer.elapsed_time() / 1000000 + << " ms. tablet counter: " << tablet_counter; + + return res; +} + +void StorageEngine::start_disk_stat_monitor() { + for (auto& it : _store_map) { + it.second->health_check(); + } + _update_storage_medium_type_count(); + _delete_tablets_on_unused_root_path(); + + // if drop tables + // notify disk_state_worker_thread and tablet_worker_thread until they received + if (_is_drop_tables) { + report_notify(true); + + bool is_report_disk_state_expected = true; + bool is_report_tablet_expected = true; + bool is_report_disk_state_exchanged = + _is_report_disk_state_already.compare_exchange_strong(is_report_disk_state_expected, false); + bool is_report_tablet_exchanged = + _is_report_tablet_already.compare_exchange_strong(is_report_tablet_expected, false); + if (is_report_disk_state_exchanged && is_report_tablet_exchanged) { + _is_drop_tables = false; + } + } +} + +bool StorageEngine::_used_disk_not_enough(uint32_t unused_num, uint32_t total_num) { + return ((total_num == 0) || (unused_num * 100 / total_num > _min_percentage_of_error_disk)); +} + +OLAPStatus StorageEngine::check_all_root_path_cluster_id() { + int32_t cluster_id = -1; + for (auto& it : _store_map) { + int32_t tmp_cluster_id = it.second->cluster_id(); + if (tmp_cluster_id == -1) { + _is_all_cluster_id_exist = false; + } else if (tmp_cluster_id == cluster_id) { + // both hava right cluster id, do nothing + } else if (cluster_id == -1) { + cluster_id = tmp_cluster_id; + } else { + LOG(WARNING) << "multiple cluster ids is not equal. one=" << cluster_id + << ", other=" << tmp_cluster_id; + return OLAP_ERR_INVALID_CLUSTER_INFO; + } + } + + // judge and get effective cluster id + OLAPStatus res = OLAP_SUCCESS; + res = _judge_and_update_effective_cluster_id(cluster_id); + if (res != OLAP_SUCCESS) { + LOG(WARNING) << "fail to judge and update effective cluster id. res=" << res; + return res; + } + + // write cluster id into cluster_id_path if get effective cluster id success + if (_effective_cluster_id != -1 && !_is_all_cluster_id_exist) { + set_cluster_id(_effective_cluster_id); + } + + return res; +} + +Status StorageEngine::set_cluster_id(int32_t cluster_id) { + std::lock_guard l(_store_lock); + for (auto& it : _store_map) { + RETURN_IF_ERROR(it.second->set_cluster_id(cluster_id)); + } + _effective_cluster_id = cluster_id; + _is_all_cluster_id_exist = true; + return Status::OK(); +} + +std::vector StorageEngine::get_stores_for_create_tablet( + TStorageMedium::type storage_medium) { + std::vector stores; + { + std::lock_guard l(_store_lock); + for (auto& it : _store_map) { + if (it.second->is_used()) { + if (_available_storage_medium_type_count == 1 + || it.second->storage_medium() == storage_medium) { + stores.push_back(it.second); + } + } + } + } + std::random_device rd; + srand(rd()); + std::random_shuffle(stores.begin(), stores.end()); + return stores; +} + +DataDir* StorageEngine::get_store(const std::string& path) { + std::lock_guard l(_store_lock); + auto it = _store_map.find(path); + if (it == std::end(_store_map)) { + return nullptr; + } + return it->second; +} + +void StorageEngine::_delete_tablets_on_unused_root_path() { + vector tablet_info_vec; + uint32_t unused_root_path_num = 0; + uint32_t total_root_path_num = 0; + + std::lock_guard l(_store_lock); + if (_store_map.size() == 0) { + return; + } + + for (auto& it : _store_map) { + total_root_path_num++; + if (it.second->is_used()) { + continue; + } + it.second->clear_tablets(&tablet_info_vec); + ++unused_root_path_num; + } + + if (_used_disk_not_enough(unused_root_path_num, total_root_path_num)) { + LOG(FATAL) << "engine stop running, because more than " << _min_percentage_of_error_disk + << " disks error. total_disks=" << total_root_path_num + << ", error_disks=" << unused_root_path_num; + exit(0); + } + + if (!tablet_info_vec.empty()) { + _is_drop_tables = true; + } + + _tablet_manager->drop_tablets_on_error_root_path(tablet_info_vec); +} + +OLAPStatus StorageEngine::_get_path_available_capacity( + const string& root_path, + int64_t* disk_available) { + OLAPStatus res = OLAP_SUCCESS; + + try { + boost::filesystem::path path_name(root_path); + boost::filesystem::space_info path_info = boost::filesystem::space(path_name); + *disk_available = path_info.available; + } catch (boost::filesystem::filesystem_error& e) { + LOG(WARNING) << "get space info failed. path: " << root_path << " erro:" << e.what(); + return OLAP_ERR_STL_ERROR; + } + + return res; +} + +OLAPStatus StorageEngine::clear() { + // 删除lru中所有内容,其实进程退出这么做本身意义不大,但对单测和更容易发现问题还是有很大意义的 + delete FileHandler::get_fd_cache(); + FileHandler::set_fd_cache(nullptr); + SAFE_DELETE(_index_stream_lru_cache); + std::lock_guard l(_store_lock); + for (auto& store_pair : _store_map) { + delete store_pair.second; + store_pair.second = nullptr; + } + _store_map.clear(); + return OLAP_SUCCESS; +} + +void StorageEngine::clear_transaction_task(const TTransactionId transaction_id, + const vector partition_ids) { + LOG(INFO) << "begin to clear transaction task. transaction_id=" << transaction_id; + + for (const TPartitionId& partition_id : partition_ids) { + std::map tablet_infos; + StorageEngine::instance()->txn_manager()->get_txn_related_tablets(transaction_id, partition_id, &tablet_infos); + + // each tablet + for (auto& tablet_info : tablet_infos) { + // should use tablet uid to ensure clean txn correctly + TabletSharedPtr tablet = _tablet_manager->get_tablet(tablet_info.first.tablet_id, + tablet_info.first.schema_hash, tablet_info.first.tablet_uid); + OlapMeta* meta = nullptr; + if (tablet != nullptr) { + meta = tablet->data_dir()->get_meta(); + } + StorageEngine::instance()->txn_manager()->delete_txn(meta, partition_id, transaction_id, + tablet_info.first.tablet_id, tablet_info.first.schema_hash, + tablet_info.first.tablet_uid); + } + } + LOG(INFO) << "finish to clear transaction task. transaction_id=" << transaction_id; +} + +TabletSharedPtr StorageEngine::create_tablet(const AlterTabletType alter_type, + const TCreateTabletReq& request, + const bool is_schema_change_tablet, + const TabletSharedPtr ref_tablet) { + // Get all available stores, use data_dir of ref_tablet when doing schema change + std::vector stores; + if (!is_schema_change_tablet) { + stores = get_stores_for_create_tablet(request.storage_medium); + if (stores.empty()) { + LOG(WARNING) << "there is no available disk that can be used to create tablet."; + return nullptr; + } + } else { + stores.push_back(ref_tablet->data_dir()); + } + + return _tablet_manager->create_tablet(alter_type, request, is_schema_change_tablet, ref_tablet, stores); +} + +void StorageEngine::start_clean_fd_cache() { + VLOG(10) << "start clean file descritpor cache"; + FileHandler::get_fd_cache()->prune(); + VLOG(10) << "end clean file descritpor cache"; +} + +void StorageEngine::perform_cumulative_compaction(DataDir* data_dir) { + TabletSharedPtr best_tablet = _tablet_manager->find_best_tablet_to_compaction(CompactionType::CUMULATIVE_COMPACTION, data_dir); + if (best_tablet == nullptr) { return; } + + DorisMetrics::cumulative_compaction_request_total.increment(1); + CumulativeCompaction cumulative_compaction; + OLAPStatus res = cumulative_compaction.init(best_tablet); + if (res != OLAP_SUCCESS) { + if (res != OLAP_ERR_CUMULATIVE_REPEAT_INIT && res != OLAP_ERR_CE_TRY_CE_LOCK_ERROR) { + best_tablet->set_last_compaction_failure_time(UnixMillis()); + if (res != OLAP_ERR_CUMULATIVE_NO_SUITABLE_VERSIONS) { + LOG(WARNING) << "failed to init cumulative compaction" + << ", table=" << best_tablet->full_name() + << ", res=" << res; + DorisMetrics::cumulative_compaction_request_failed.increment(1); + } + } + return; + } + + res = cumulative_compaction.run(); + if (res != OLAP_SUCCESS) { + DorisMetrics::cumulative_compaction_request_failed.increment(1); + best_tablet->set_last_compaction_failure_time(UnixMillis()); + LOG(WARNING) << "failed to do cumulative compaction" + << ", table=" << best_tablet->full_name() + << ", res=" << res; + return; + } + best_tablet->set_last_compaction_failure_time(0); +} + +void StorageEngine::perform_base_compaction(DataDir* data_dir) { + TabletSharedPtr best_tablet = _tablet_manager->find_best_tablet_to_compaction(CompactionType::BASE_COMPACTION, data_dir); + if (best_tablet == nullptr) { return; } + + DorisMetrics::base_compaction_request_total.increment(1); + BaseCompaction base_compaction; + OLAPStatus res = base_compaction.init(best_tablet); + if (res != OLAP_SUCCESS) { + if (res != OLAP_ERR_BE_TRY_BE_LOCK_ERROR && res != OLAP_ERR_BE_NO_SUITABLE_VERSION) { + DorisMetrics::base_compaction_request_failed.increment(1); + best_tablet->set_last_compaction_failure_time(UnixMillis()); + LOG(WARNING) << "failed to init base compaction" + << ", table=" << best_tablet->full_name() + << ", res=" << res; + } + return; + } + + res = base_compaction.run(); + if (res != OLAP_SUCCESS) { + DorisMetrics::base_compaction_request_failed.increment(1); + best_tablet->set_last_compaction_failure_time(UnixMillis()); + LOG(WARNING) << "failed to init base compaction" + << ", table=" << best_tablet->full_name() + << ", res=" << res; + return; + } + best_tablet->set_last_compaction_failure_time(0); +} + +void StorageEngine::get_cache_status(rapidjson::Document* document) const { + return _index_stream_lru_cache->get_cache_status(document); +} + +OLAPStatus StorageEngine::start_trash_sweep(double* usage) { + OLAPStatus res = OLAP_SUCCESS; + LOG(INFO) << "start trash and snapshot sweep."; + + const uint32_t snapshot_expire = config::snapshot_expire_time_sec; + const uint32_t trash_expire = config::trash_file_expire_time_sec; + const double guard_space = config::disk_capacity_insufficient_percentage / 100.0; + std::vector data_dir_infos; + res = get_all_data_dir_info(&data_dir_infos); + if (res != OLAP_SUCCESS) { + LOG(WARNING) << "failed to get root path stat info when sweep trash."; + return res; + } + + time_t now = time(nullptr); //获取UTC时间 + tm local_tm_now; + if (localtime_r(&now, &local_tm_now) == nullptr) { + LOG(WARNING) << "fail to localtime_r time. time=" << now; + return OLAP_ERR_OS_ERROR; + } + const time_t local_now = mktime(&local_tm_now); //得到当地日历时间 + + for (DataDirInfo& info : data_dir_infos) { + if (!info.is_used) { + continue; + } + + double curr_usage = (info.capacity - info.available) + / (double) info.capacity; + *usage = *usage > curr_usage ? *usage : curr_usage; + + OLAPStatus curr_res = OLAP_SUCCESS; + string snapshot_path = info.path + SNAPSHOT_PREFIX; + curr_res = _do_sweep(snapshot_path, local_now, snapshot_expire); + if (curr_res != OLAP_SUCCESS) { + LOG(WARNING) << "failed to sweep snapshot. path=" << snapshot_path + << ", err_code=" << curr_res; + res = curr_res; + } + + string trash_path = info.path + TRASH_PREFIX; + curr_res = _do_sweep(trash_path, local_now, + curr_usage > guard_space ? 0 : trash_expire); + if (curr_res != OLAP_SUCCESS) { + LOG(WARNING) << "failed to sweep trash. [path=%s" << trash_path + << ", err_code=" << curr_res; + res = curr_res; + } + } + + // clear expire incremental rowset, move deleted tablet to trash + _tablet_manager->start_trash_sweep(); + + // clean rubbish transactions + _clean_unused_txns(); + + return res; +} + +void StorageEngine::_clean_unused_txns() { + std::set tablet_infos; + _txn_manager->get_all_related_tablets(&tablet_infos); + for (auto& tablet_info : tablet_infos) { + TabletSharedPtr tablet = _tablet_manager->get_tablet(tablet_info.tablet_id, tablet_info.schema_hash, tablet_info.tablet_uid, true); + if (tablet == nullptr) { + // TODO(ygl) : should check if tablet still in meta, it's a improvement + // case 1: tablet still in meta, just remove from memory + // case 2: tablet not in meta store, remove rowset from meta + // currently just remove them from memory + // nullptr to indicate not remove them from meta store + _txn_manager->force_rollback_tablet_related_txns(nullptr, tablet_info.tablet_id, tablet_info.schema_hash, + tablet_info.tablet_uid); + } + } +} + +OLAPStatus StorageEngine::_do_sweep( + const string& scan_root, const time_t& local_now, const uint32_t expire) { + OLAPStatus res = OLAP_SUCCESS; + if (!check_dir_existed(scan_root)) { + // dir not existed. no need to sweep trash. + return res; + } + + try { + path boost_scan_root(scan_root); + directory_iterator item(boost_scan_root); + directory_iterator item_end; + for (; item != item_end; ++item) { + string path_name = item->path().string(); + string dir_name = item->path().filename().string(); + string str_time = dir_name.substr(0, dir_name.find('.')); + tm local_tm_create; + if (strptime(str_time.c_str(), "%Y%m%d%H%M%S", &local_tm_create) == nullptr) { + LOG(WARNING) << "fail to strptime time. [time=" << str_time << "]"; + res = OLAP_ERR_OS_ERROR; + continue; + } + if (difftime(local_now, mktime(&local_tm_create)) >= expire) { + if (remove_all_dir(path_name) != OLAP_SUCCESS) { + LOG(WARNING) << "fail to remove file or directory. path=" << path_name; + res = OLAP_ERR_OS_ERROR; + continue; + } + } + } + } catch (...) { + LOG(WARNING) << "Exception occur when scan directory. path=" << scan_root; + res = OLAP_ERR_IO_ERROR; + } + + return res; +} + +void StorageEngine::start_delete_unused_rowset() { + _gc_mutex.lock(); + for (auto it = _unused_rowsets.begin(); it != _unused_rowsets.end();) { + if (it->second.use_count() != 1) { + ++it; + } else if (it->second->need_delete_file()){ + LOG(INFO) << "start to remove rowset:" << it->second->rowset_id() + << ", version:" << it->second->version().first << "-" << it->second->version().second; + OLAPStatus status = it->second->remove(); + LOG(INFO) << "remove rowset:" << it->second->rowset_id() << " finished. status:" << status; + it = _unused_rowsets.erase(it); + } + } + _gc_mutex.unlock(); +} + +void StorageEngine::add_unused_rowset(RowsetSharedPtr rowset) { + if (rowset == nullptr) { return; } + _gc_mutex.lock(); + LOG(INFO) << "add unused rowset, rowset id:" << rowset->rowset_id() + << ", version:" << rowset->version().first + << "-" << rowset->version().second + << ", unique id:" << rowset->unique_id(); + auto it = _unused_rowsets.find(rowset->unique_id()); + if (it == _unused_rowsets.end()) { + rowset->set_need_delete_file(true); + _unused_rowsets[rowset->unique_id()] = rowset; + } + _gc_mutex.unlock(); +} + +// TODO(zc): refactor this funciton +OLAPStatus StorageEngine::create_tablet(const TCreateTabletReq& request) { + // Get all available stores, use ref_root_path if the caller specified + std::vector stores; + stores = get_stores_for_create_tablet(request.storage_medium); + if (stores.empty()) { + LOG(WARNING) << "there is no available disk that can be used to create tablet."; + return OLAP_ERR_CE_CMD_PARAMS_ERROR; + } + return _tablet_manager->create_tablet(request, stores); +} + +OLAPStatus StorageEngine::recover_tablet_until_specfic_version( + const TRecoverTabletReq& recover_tablet_req) { + TabletSharedPtr tablet = _tablet_manager->get_tablet(recover_tablet_req.tablet_id, + recover_tablet_req.schema_hash); + if (tablet == nullptr) { return OLAP_ERR_TABLE_NOT_FOUND; } + RETURN_NOT_OK(tablet->recover_tablet_until_specfic_version(recover_tablet_req.version, + recover_tablet_req.version_hash)); + return OLAP_SUCCESS; +} + +OLAPStatus StorageEngine::obtain_shard_path( + TStorageMedium::type storage_medium, std::string* shard_path, DataDir** store) { + LOG(INFO) << "begin to process obtain root path. storage_medium=" << storage_medium; + OLAPStatus res = OLAP_SUCCESS; + + if (shard_path == NULL) { + LOG(WARNING) << "invalid output parameter which is null pointer."; + return OLAP_ERR_CE_CMD_PARAMS_ERROR; + } + + auto stores = get_stores_for_create_tablet(storage_medium); + if (stores.empty()) { + LOG(WARNING) << "no available disk can be used to create tablet."; + return OLAP_ERR_NO_AVAILABLE_ROOT_PATH; + } + + uint64_t shard = 0; + res = stores[0]->get_shard(&shard); + if (res != OLAP_SUCCESS) { + LOG(WARNING) << "fail to get root path shard. res=" << res; + return res; + } + + stringstream root_path_stream; + root_path_stream << stores[0]->path() << DATA_PREFIX << "/" << shard; + *shard_path = root_path_stream.str(); + *store = stores[0]; + + LOG(INFO) << "success to process obtain root path. path=" << shard_path; + return res; +} + +OLAPStatus StorageEngine::load_header( + const string& shard_path, + const TCloneReq& request) { + LOG(INFO) << "begin to process load headers." + << "tablet_id=" << request.tablet_id + << ", schema_hash=" << request.schema_hash; + OLAPStatus res = OLAP_SUCCESS; + + DataDir* store = nullptr; + { + // TODO(zc) + try { + auto store_path = + boost::filesystem::path(shard_path).parent_path().parent_path().string(); + store = get_store(store_path); + if (store == nullptr) { + LOG(WARNING) << "invalid shard path, path=" << shard_path; + return OLAP_ERR_INVALID_ROOT_PATH; + } + } catch (...) { + LOG(WARNING) << "invalid shard path, path=" << shard_path; + return OLAP_ERR_INVALID_ROOT_PATH; + } + } + + stringstream schema_hash_path_stream; + schema_hash_path_stream << shard_path + << "/" << request.tablet_id + << "/" << request.schema_hash; + // not surely, reload and restore tablet action call this api + // reset tablet uid here + + string header_path = TabletMeta::construct_header_file_path(schema_hash_path_stream.str(), request.tablet_id); + res = TabletMeta::reset_tablet_uid(header_path); + if (res != OLAP_SUCCESS) { + LOG(WARNING) << "fail reset tablet uid file path = " << header_path + << " res=" << res; + return res; + } + res = _tablet_manager->load_tablet_from_dir( + store, + request.tablet_id, request.schema_hash, + schema_hash_path_stream.str(), false); + if (res != OLAP_SUCCESS) { + LOG(WARNING) << "fail to process load headers. res=" << res; + return res; + } + + LOG(INFO) << "success to process load headers."; + return res; +} + +OLAPStatus StorageEngine::execute_task(EngineTask* task) { + // 1. add wlock to related tablets + // 2. do prepare work + // 3. release wlock + { + vector tablet_infos; + task->get_related_tablets(&tablet_infos); + sort(tablet_infos.begin(), tablet_infos.end()); + vector related_tablets; + for (TabletInfo& tablet_info : tablet_infos) { + TabletSharedPtr tablet = _tablet_manager->get_tablet( + tablet_info.tablet_id, tablet_info.schema_hash); + if (tablet != nullptr) { + related_tablets.push_back(tablet); + tablet->obtain_header_wrlock(); + } else { + LOG(WARNING) << "could not get tablet before prepare tabletid: " + << tablet_info.tablet_id; + } + } + // add write lock to all related tablets + OLAPStatus prepare_status = task->prepare(); + for (TabletSharedPtr& tablet : related_tablets) { + tablet->release_header_lock(); + } + if (prepare_status != OLAP_SUCCESS) { + return prepare_status; + } + } + + // do execute work without lock + OLAPStatus exec_status = task->execute(); + if (exec_status != OLAP_SUCCESS) { + return exec_status; + } + + // 1. add wlock to related tablets + // 2. do finish work + // 3. release wlock + { + vector tablet_infos; + // related tablets may be changed after execute task, so that get them here again + task->get_related_tablets(&tablet_infos); + sort(tablet_infos.begin(), tablet_infos.end()); + vector related_tablets; + for (TabletInfo& tablet_info : tablet_infos) { + TabletSharedPtr tablet = _tablet_manager->get_tablet( + tablet_info.tablet_id, tablet_info.schema_hash); + if (tablet != nullptr) { + related_tablets.push_back(tablet); + tablet->obtain_header_wrlock(); + } else { + LOG(WARNING) << "could not get tablet before finish tabletid: " + << tablet_info.tablet_id; + } + } + // add write lock to all related tablets + OLAPStatus fin_status = task->finish(); + for (TabletSharedPtr& tablet : related_tablets) { + tablet->release_header_lock(); + } + return fin_status; + } +} + +// check whether any unused rowsets's id equal to rowset_id +bool StorageEngine::check_rowset_id_in_unused_rowsets(RowsetId rowset_id) { + _gc_mutex.lock(); + for (auto& _unused_rowset_pair : _unused_rowsets) { + if (_unused_rowset_pair.second->rowset_id() == rowset_id) { + _gc_mutex.unlock(); + return true; + } + } + _gc_mutex.unlock(); + return false; +} + +void* StorageEngine::_path_gc_thread_callback(void* arg) { +#ifdef GOOGLE_PROFILER + ProfilerRegisterThread(); +#endif + + LOG(INFO) << "try to start path gc thread!"; + uint32_t interval = config::path_gc_check_interval_second; + if (interval <= 0) { + LOG(WARNING) << "path gc thread check interval config is illegal:" << interval + << "will be forced set to half hour"; + interval = 1800; // 0.5 hour + } + + while (true) { + LOG(INFO) << "try to perform path gc!"; + // perform path gc by rowset id + ((DataDir*)arg)->perform_path_gc_by_rowsetid(); + usleep(interval * 1000000); + } + + return nullptr; +} + +void* StorageEngine::_path_scan_thread_callback(void* arg) { +#ifdef GOOGLE_PROFILER + ProfilerRegisterThread(); +#endif + + LOG(INFO) << "try to start path scan thread!"; + uint32_t interval = config::path_scan_interval_second; + if (interval <= 0) { + LOG(WARNING) << "path gc thread check interval config is illegal:" << interval + << "will be forced set to one day"; + interval = 24 * 3600; // one day + } + + while (true) { + LOG(INFO) << "try to perform path scan!"; + ((DataDir*)arg)->perform_path_scan(); + usleep(interval * 1000000); + } + + return nullptr; +} + +} // namespace doris diff --git a/be/src/olap/storage_engine.h b/be/src/olap/storage_engine.h new file mode 100644 index 00000000000000..99cd6aa307c94a --- /dev/null +++ b/be/src/olap/storage_engine.h @@ -0,0 +1,345 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +#ifndef DORIS_BE_SRC_OLAP_STORAGE_ENGINE_H +#define DORIS_BE_SRC_OLAP_STORAGE_ENGINE_H + +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include + +#include "agent/status.h" +#include "common/status.h" +#include "gen_cpp/AgentService_types.h" +#include "gen_cpp/BackendService_types.h" +#include "gen_cpp/MasterService_types.h" +#include "olap/atomic.h" +#include "olap/lru_cache.h" +#include "olap/olap_common.h" +#include "olap/olap_define.h" +#include "olap/tablet.h" +#include "olap/olap_meta.h" +#include "olap/options.h" +#include "olap/rowset/segment_group.h" +#include "olap/tablet_manager.h" +#include "olap/txn_manager.h" +#include "olap/task/engine_task.h" + +namespace doris { + +class Tablet; +class DataDir; +class EngineTask; + +// StorageEngine singleton to manage all Table pointers. +// Providing add/drop/get operations. +// StorageEngine instance doesn't own the Table resources, just hold the pointer, +// allocation/deallocation must be done outside. +class StorageEngine { +public: + StorageEngine() { } + StorageEngine(const EngineOptions& options); + ~StorageEngine(); + + static Status open(const EngineOptions& options, StorageEngine** engine_ptr); + + static void set_instance(StorageEngine* engine) { + _s_instance = engine; + } + + static StorageEngine* instance() { + return _s_instance; + } + + OLAPStatus create_tablet(const TCreateTabletReq& request); + + // Create new tablet for StorageEngine + // + // Return Tablet * succeeded; Otherwise, return NULL if failed + TabletSharedPtr create_tablet(const AlterTabletType alter_type, + const TCreateTabletReq& request, + const bool is_schema_change_tablet, + const TabletSharedPtr ref_tablet); + + void clear_transaction_task(const TTransactionId transaction_id, + const std::vector partition_ids); + + // Instance should be inited from create_instance + // MUST NOT be called in other circumstances. + OLAPStatus open(); + + // Clear status(tables, ...) + OLAPStatus clear(); + + void start_clean_fd_cache(); + void perform_cumulative_compaction(DataDir* data_dir); + void perform_base_compaction(DataDir* data_dir); + + // 获取cache的使用情况信息 + void get_cache_status(rapidjson::Document* document) const; + + // Note: 这里只能reload原先已经存在的root path,即re-load启动时就登记的root path + // 是允许的,但re-load全新的path是不允许的,因为此处没有彻底更新ce调度器信息 + void load_data_dirs(const std::vector& stores); + + Cache* index_stream_lru_cache() { + return _index_stream_lru_cache; + } + + // 清理trash和snapshot文件,返回清理后的磁盘使用量 + OLAPStatus start_trash_sweep(double *usage); + + template + std::vector get_stores(); + Status set_cluster_id(int32_t cluster_id); + + // @brief 设置root_path是否可用 + void set_store_used_flag(const std::string& root_path, bool is_used); + + // @brief 获取所有root_path信息 + OLAPStatus get_all_data_dir_info(std::vector* data_dir_infos); + + void get_all_available_root_path(std::vector* available_paths); + + // 磁盘状态监测。监测unused_flag路劲新的对应root_path unused标识位, + // 当检测到有unused标识时,从内存中删除对应表信息,磁盘数据不动。 + // 当磁盘状态为不可用,但未检测到unused标识时,需要从root_path上 + // 重新加载数据。 + void start_disk_stat_monitor(); + + // get root path for creating tablet. The returned vector of root path should be random, + // for avoiding that all the tablet would be deployed one disk. + std::vector get_stores_for_create_tablet( + TStorageMedium::type storage_medium); + DataDir* get_store(const std::string& path); + DataDir* get_store(int64_t path_hash); + + uint32_t available_storage_medium_type_count() { + return _available_storage_medium_type_count; + } + + int32_t effective_cluster_id() const { + return _effective_cluster_id; + } + + uint32_t get_file_system_count() { + return _store_map.size(); + } + + void start_delete_unused_rowset(); + + void add_unused_rowset(RowsetSharedPtr rowset); + + OLAPStatus recover_tablet_until_specfic_version( + const TRecoverTabletReq& recover_tablet_req); + + // Obtain shard path for new tablet. + // + // @param [out] shard_path choose an available root_path to clone new tablet + // @return error code + OLAPStatus obtain_shard_path( + TStorageMedium::type storage_medium, + std::string* shared_path, + DataDir** store); + + // Load new tablet to make it effective. + // + // @param [in] root_path specify root path of new tablet + // @param [in] request specify new tablet info + // @return OLAP_SUCCESS if load tablet success + OLAPStatus load_header( + const std::string& shard_path, const TCloneReq& request); + + // call this if you want to trigger a disk and tablet report + void report_notify(bool is_all) { + is_all ? _report_cv.notify_all() : _report_cv.notify_one(); + } + + // call this to wait a report notification until timeout + void wait_for_report_notify(int64_t timeout_sec, bool is_tablet_report) { + std::unique_lock lk(_report_mtx); + auto cv_status = _report_cv.wait_for(lk, std::chrono::seconds(timeout_sec)); + if (cv_status == std::cv_status::no_timeout) { + is_tablet_report ? _is_report_tablet_already = true : + _is_report_disk_state_already = true; + } + } + + OLAPStatus execute_task(EngineTask* task); + + TabletManager* tablet_manager() { return _tablet_manager.get(); } + TxnManager* txn_manager() { return _txn_manager.get(); } + + bool check_rowset_id_in_unused_rowsets(RowsetId rowset_id); + +private: + OLAPStatus check_all_root_path_cluster_id(); + + bool _used_disk_not_enough(uint32_t unused_num, uint32_t total_num); + + OLAPStatus _get_path_available_capacity( + const std::string& root_path, + int64_t* disk_available); + + OLAPStatus _config_root_path_unused_flag_file( + const std::string& root_path, + std::string* unused_flag_file); + + void _delete_tablets_on_unused_root_path(); + + void _update_storage_medium_type_count(); + + OLAPStatus _judge_and_update_effective_cluster_id(int32_t cluster_id); + + OLAPStatus _start_bg_worker(); + + void _clean_unused_txns(); + + OLAPStatus _do_sweep( + const std::string& scan_root, const time_t& local_tm_now, const uint32_t expire); + + // Thread functions + // unused rowset monitor thread + void* _unused_rowset_monitor_thread_callback(void* arg); + + // base compaction thread process function + void* _base_compaction_thread_callback(void* arg, DataDir* data_dir); + + // garbage sweep thread process function. clear snapshot and trash folder + void* _garbage_sweeper_thread_callback(void* arg); + + // delete tablet with io error process function + void* _disk_stat_monitor_thread_callback(void* arg); + + // cumulative process function + void* _cumulative_compaction_thread_callback(void* arg, DataDir* data_dir); + + // clean file descriptors cache + void* _fd_cache_clean_callback(void* arg); + + // path gc process function + void* _path_gc_thread_callback(void* arg); + + void* _path_scan_thread_callback(void* arg); + +private: + + struct CompactionCandidate { + CompactionCandidate(uint32_t nicumulative_compaction_, int64_t tablet_id_, uint32_t index_) : + nice(nicumulative_compaction_), tablet_id(tablet_id_), disk_index(index_) {} + uint32_t nice; // 优先度 + int64_t tablet_id; + uint32_t disk_index = -1; + }; + + struct CompactionCandidateComparator { + bool operator()(const CompactionCandidate& a, const CompactionCandidate& b) { + return a.nice > b.nice; + } + }; + + struct CompactionDiskStat { + CompactionDiskStat(std::string path, uint32_t index, bool used) : + storage_path(path), + disk_index(index), + task_running(0), + task_remaining(0), + is_used(used){} + const std::string storage_path; + const uint32_t disk_index; + uint32_t task_running; + uint32_t task_remaining; + bool is_used; + }; + + typedef std::map file_system_task_count_t; + + EngineOptions _options; + std::mutex _store_lock; + std::map _store_map; + uint32_t _available_storage_medium_type_count; + + int32_t _effective_cluster_id; + bool _is_all_cluster_id_exist; + bool _is_drop_tables; + + // 错误磁盘所在百分比,超过设定的值,则engine需要退出运行 + uint32_t _min_percentage_of_error_disk; + Cache* _file_descriptor_lru_cache; + Cache* _index_stream_lru_cache; + uint32_t _max_base_compaction_task_per_disk; + uint32_t _max_cumulative_compaction_task_per_disk; + + Mutex _fs_task_mutex; + file_system_task_count_t _fs_base_compaction_task_num_map; + std::vector _cumulative_compaction_candidate; + + static StorageEngine* _s_instance; + + std::unordered_map> _gc_files; + std::unordered_map _unused_rowsets; + Mutex _gc_mutex; + + std::thread _unused_rowset_monitor_thread; + + // thread to monitor snapshot expiry + std::thread _garbage_sweeper_thread; + + // thread to monitor disk stat + std::thread _disk_stat_monitor_thread; + + // thread to run base compaction + std::vector _base_compaction_threads; + + // thread to check cumulative + std::vector _cumulative_compaction_threads; + + std::thread _fd_cache_clean_thread; + + std::vector _path_gc_threads; + + // thread to scan disk paths + std::vector _path_scan_threads; + + static atomic_t _s_request_number; + + // for tablet and disk report + std::mutex _report_mtx; + std::condition_variable _report_cv; + std::atomic_bool _is_report_disk_state_already; + std::atomic_bool _is_report_tablet_already; + + Mutex _engine_task_mutex; + + std::unique_ptr _tablet_manager; + std::unique_ptr _txn_manager; + + DISALLOW_COPY_AND_ASSIGN(StorageEngine); +}; + +} // namespace doris + +#endif // DORIS_BE_SRC_OLAP_STORAGE_ENGINE_H diff --git a/be/src/olap/store.cpp b/be/src/olap/store.cpp deleted file mode 100755 index 674c0c18d16cd7..00000000000000 --- a/be/src/olap/store.cpp +++ /dev/null @@ -1,635 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, -// software distributed under the License is distributed on an -// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -// KIND, either express or implied. See the License for the -// specific language governing permissions and limitations -// under the License. - -#include "olap/store.h" - -#include -#include -#include -#include -#include -#include -#include -#include - -#include -#include - -#include -#include -#include -#include -#include -#include - -#include "olap/file_helper.h" -#include "olap/olap_define.h" -#include "olap/utils.h" // for check_dir_existed -#include "service/backend_options.h" -#include "util/file_utils.h" -#include "util/string_util.h" -#include "olap/olap_header_manager.h" - -namespace doris { - -static const char* const kMtabPath = "/etc/mtab"; -static const char* const kTestFilePath = "/.testfile"; - -OlapStore::OlapStore(const std::string& path, int64_t capacity_bytes) - : _path(path), - _cluster_id(-1), - _capacity_bytes(capacity_bytes), - _available_bytes(0), - _used_bytes(0), - _current_shard(0), - _is_used(false), - _to_be_deleted(false), - _test_file_read_buf(nullptr), - _test_file_write_buf(nullptr), - _meta((nullptr)) { -} - -OlapStore::~OlapStore() { - free(_test_file_read_buf); - free(_test_file_write_buf); - if (_meta != nullptr) { - delete _meta; - } -} - -Status OlapStore::load() { - _rand_seed = static_cast(time(NULL)); - if (posix_memalign((void**)&_test_file_write_buf, - DIRECT_IO_ALIGNMENT, - TEST_FILE_BUF_SIZE) != 0) { - LOG(WARNING) << "fail to allocate memory. size=" << TEST_FILE_BUF_SIZE; - return Status::InternalError("No memory"); - } - if (posix_memalign((void**)&_test_file_read_buf, - DIRECT_IO_ALIGNMENT, - TEST_FILE_BUF_SIZE) != 0) { - LOG(WARNING) << "fail to allocate memory. size=" << TEST_FILE_BUF_SIZE; - return Status::InternalError("No memory"); - } - RETURN_IF_ERROR(_check_path_exist()); - std::string align_tag_path = _path + ALIGN_TAG_PREFIX; - if (access(align_tag_path.c_str(), F_OK) == 0) { - LOG(WARNING) << "align tag was found, path=" << _path; - return Status::InternalError("invalid root path: "); - } - - RETURN_IF_ERROR(_init_cluster_id()); - RETURN_IF_ERROR(_init_extension_and_capacity()); - RETURN_IF_ERROR(_init_file_system()); - RETURN_IF_ERROR(_init_meta()); - - _is_used = true; - return Status::OK(); -} - -Status OlapStore::_check_path_exist() { - DIR* dirp = opendir(_path.c_str()); - if (dirp == nullptr) { - char buf[64]; - LOG(WARNING) << "opendir failed, path=" << _path - << ", errno=" << errno << ", errmsg=" << strerror_r(errno, buf, 64); - return Status::InternalError("opendir failed"); - } - struct dirent dirent; - struct dirent* result = nullptr; - if (readdir_r(dirp, &dirent, &result) != 0) { - char buf[64]; - LOG(WARNING) << "readdir failed, path=" << _path - << ", errno=" << errno << ", errmsg=" << strerror_r(errno, buf, 64); - closedir(dirp); - return Status::InternalError("readdir failed"); - } - closedir(dirp); - return Status::OK(); -} - -Status OlapStore::_init_cluster_id() { - std::string cluster_id_path = _path + CLUSTER_ID_PREFIX; - if (access(cluster_id_path.c_str(), F_OK) != 0) { - int fd = open(cluster_id_path.c_str(), O_RDWR | O_CREAT, S_IRUSR | S_IWUSR | S_IRGRP | S_IWGRP); - if (fd < 0 || close(fd) < 0) { - char errmsg[64]; - LOG(WARNING) << "fail to create file. [path='" << cluster_id_path - << "' err='" << strerror_r(errno, errmsg, 64) << "']"; - return Status::InternalError("invalid store path: create cluster id failed"); - } - } - - // obtain lock of all cluster id paths - FILE* fp = NULL; - fp = fopen(cluster_id_path.c_str(), "r+b"); - if (fp == NULL) { - LOG(WARNING) << "fail to open cluster id path. path=" << cluster_id_path; - return Status::InternalError("invalid store path: open cluster id failed"); - } - - int lock_res = flock(fp->_fileno, LOCK_EX | LOCK_NB); - if (lock_res < 0) { - LOG(WARNING) << "fail to lock file descriptor. path=" << cluster_id_path; - fclose(fp); - fp = NULL; - return Status::InternalError("invalid store path: flock cluster id failed"); - } - - // obtain cluster id of all root paths - auto st = _read_cluster_id(cluster_id_path, &_cluster_id); - fclose(fp); - return st; -} - -Status OlapStore::_read_cluster_id(const std::string& path, int32_t* cluster_id) { - int32_t tmp_cluster_id = -1; - - std::fstream fs(path.c_str(), std::fstream::in); - if (!fs.is_open()) { - LOG(WARNING) << "fail to open cluster id path. [path='" << path << "']"; - return Status::InternalError("open file failed"); - } - - fs >> tmp_cluster_id; - fs.close(); - - if (tmp_cluster_id == -1 && (fs.rdstate() & std::fstream::eofbit) != 0) { - *cluster_id = -1; - } else if (tmp_cluster_id >= 0 && (fs.rdstate() & std::fstream::eofbit) != 0) { - *cluster_id = tmp_cluster_id; - } else { - OLAP_LOG_WARNING("fail to read cluster id from file. " - "[id=%d eofbit=%d failbit=%d badbit=%d]", - tmp_cluster_id, - fs.rdstate() & std::fstream::eofbit, - fs.rdstate() & std::fstream::failbit, - fs.rdstate() & std::fstream::badbit); - return Status::InternalError("cluster id file corrupt"); - } - return Status::OK(); -} - -Status OlapStore::_init_extension_and_capacity() { - boost::filesystem::path boost_path = _path; - std::string extension = boost::filesystem::canonical(boost_path).extension().string(); - if (extension != "") { - if (boost::iequals(extension, ".ssd")) { - _storage_medium = TStorageMedium::SSD; - } else if (boost::iequals(extension, ".hdd")) { - _storage_medium = TStorageMedium::HDD; - } else { - LOG(WARNING) << "store path has wrong extension. path=" << _path; - return Status::InternalError("invalid sotre path: invalid extension"); - } - } else { - _storage_medium = TStorageMedium::HDD; - } - - int64_t disk_capacity = boost::filesystem::space(boost_path).capacity; - if (_capacity_bytes == -1) { - _capacity_bytes = disk_capacity; - } else if (_capacity_bytes > disk_capacity) { - LOG(WARNING) << "root path capacity should not larger than disk capacity. " - << "path=" << _path - << ", capacity_bytes=" << _capacity_bytes - << ", disk_capacity=" << disk_capacity; - return Status::InternalError("invalid store path: invalid capacity"); - } - - std::string data_path = _path + DATA_PREFIX; - if (!check_dir_existed(data_path) && create_dir(data_path) != OLAP_SUCCESS) { - LOG(WARNING) << "failed to create data root path. path=" << data_path; - return Status::InternalError("invalid store path: failed to create data directory"); - } - - return Status::OK(); -} - -Status OlapStore::_init_file_system() { - struct stat s; - if (stat(_path.c_str(), &s) != 0) { - char errmsg[64]; - LOG(WARNING) << "stat failed, path=" << _path - << ", errno=" << errno << ", errmsg=" << strerror_r(errno, errmsg, 64); - return Status::InternalError("invalid store path: stat failed"); - } - - dev_t mount_device; - if ((s.st_mode & S_IFMT) == S_IFBLK) { - mount_device = s.st_rdev; - } else { - mount_device = s.st_dev; - } - - FILE* mount_table = nullptr; - if ((mount_table = setmntent(kMtabPath, "r")) == NULL) { - char errmsg[64]; - LOG(WARNING) << "setmntent failed, path=" << kMtabPath - << ", errno=" << errno << ", errmsg=" << strerror_r(errno, errmsg, 64); - return Status::InternalError("invalid store path: setmntent failed"); - } - - bool is_find = false; - struct mntent* mount_entry = NULL; - while ((mount_entry = getmntent(mount_table)) != NULL) { - if (strcmp(_path.c_str(), mount_entry->mnt_dir) == 0 - || strcmp(_path.c_str(), mount_entry->mnt_fsname) == 0) { - is_find = true; - break; - } - - if (stat(mount_entry->mnt_fsname, &s) == 0 && s.st_rdev == mount_device) { - is_find = true; - break; - } - - if (stat(mount_entry->mnt_dir, &s) == 0 && s.st_dev == mount_device) { - is_find = true; - break; - } - } - - endmntent(mount_table); - - if (!is_find) { - LOG(WARNING) << "fail to find file system, path=" << _path; - return Status::InternalError("invalid store path: find file system failed"); - } - - _file_system = mount_entry->mnt_fsname; - - return Status::OK(); -} - -Status OlapStore::_init_meta() { - // init path hash - _path_hash = hash_of_path(BackendOptions::get_localhost(), _path); - LOG(INFO) << "get hash of path: " << _path - << ": " << _path_hash; - - // init meta - _meta = new(std::nothrow) OlapMeta(_path); - if (_meta == nullptr) { - LOG(WARNING) << "new olap meta failed"; - return Status::InternalError("new olap meta failed"); - } - OLAPStatus res = _meta->init(); - if (res != OLAP_SUCCESS) { - LOG(WARNING) << "init meta failed"; - return Status::InternalError("init meta failed"); - } - return Status::OK(); -} - -Status OlapStore::set_cluster_id(int32_t cluster_id) { - if (_cluster_id != -1) { - if (_cluster_id == cluster_id) { - return Status::OK(); - } - LOG(ERROR) << "going to set cluster id to already assigned store, cluster_id=" - << _cluster_id << ", new_cluster_id=" << cluster_id; - return Status::InternalError("going to set cluster id to already assigned store"); - } - return _write_cluster_id_to_path(_cluster_id_path(), cluster_id); -} - -Status OlapStore::_write_cluster_id_to_path(const std::string& path, int32_t cluster_id) { - std::fstream fs(path.c_str(), std::fstream::out); - if (!fs.is_open()) { - LOG(WARNING) << "fail to open cluster id path. path=" << path; - return Status::InternalError("IO Error"); - } - fs << cluster_id; - fs.close(); - return Status::OK(); -} - -void OlapStore::health_check() { - // check disk - if (_is_used) { - OLAPStatus res = OLAP_SUCCESS; - if ((res = _read_and_write_test_file()) != OLAP_SUCCESS) { - LOG(WARNING) << "store read/write test file occur IO Error. path=" << _path; - if (is_io_error(res)) { - _is_used = false; - } - } - } -} - -OLAPStatus OlapStore::_read_and_write_test_file() { - std::string test_file = _path + kTestFilePath; - - if (access(test_file.c_str(), F_OK) == 0) { - if (remove(test_file.c_str()) != 0) { - char errmsg[64]; - LOG(WARNING) << "fail to delete test file. " - << "path=" << test_file - << ", errno=" << errno << ", err=" << strerror_r(errno, errmsg, 64); - return OLAP_ERR_IO_ERROR; - } - } else { - if (errno != ENOENT) { - char errmsg[64]; - LOG(WARNING) << "fail to access test file. " - << "path=" << test_file - << ", errno=" << errno << ", err=" << strerror_r(errno, errmsg, 64); - return OLAP_ERR_IO_ERROR; - } - } - - OLAPStatus res = OLAP_SUCCESS; - FileHandler file_handler; - if ((res = file_handler.open_with_mode(test_file.c_str(), - O_RDWR | O_CREAT | O_DIRECT, - S_IRUSR | S_IWUSR)) != OLAP_SUCCESS) { - LOG(WARNING) << "fail to create test file. path=" << test_file; - return res; - } - - for (size_t i = 0; i < TEST_FILE_BUF_SIZE; ++i) { - int32_t tmp_value = rand_r(&_rand_seed); - _test_file_write_buf[i] = static_cast(tmp_value); - } - - if ((res = file_handler.pwrite(_test_file_write_buf, TEST_FILE_BUF_SIZE, SEEK_SET)) != OLAP_SUCCESS) { - LOG(WARNING) << "fail to write test file. [file_name=" << test_file << "]"; - return res; - } - - if ((res = file_handler.pread(_test_file_read_buf, TEST_FILE_BUF_SIZE, SEEK_SET)) != OLAP_SUCCESS) { - LOG(WARNING) << "fail to read test file. [file_name=" << test_file << "]"; - return res; - } - - if (memcmp(_test_file_write_buf, _test_file_read_buf, TEST_FILE_BUF_SIZE) != 0) { - OLAP_LOG_WARNING("the test file write_buf and read_buf not equal."); - return OLAP_ERR_TEST_FILE_ERROR; - } - - if ((res = file_handler.close()) != OLAP_SUCCESS) { - LOG(WARNING) << "fail to close test file. [file_name=" << test_file << "]"; - return res; - } - - if (remove(test_file.c_str()) != 0) { - char errmsg[64]; - VLOG(3) << "fail to delete test file. [err='" << strerror_r(errno, errmsg, 64) - << "' path='" << test_file << "']"; - return OLAP_ERR_IO_ERROR; - } - - return res; -} - -OLAPStatus OlapStore::get_shard(uint64_t* shard) { - OLAPStatus res = OLAP_SUCCESS; - std::lock_guard l(_mutex); - - std::stringstream shard_path_stream; - uint32_t next_shard = _current_shard; - _current_shard = (_current_shard + 1) % MAX_SHARD_NUM; - shard_path_stream << _path << DATA_PREFIX << "/" << next_shard; - std::string shard_path = shard_path_stream.str(); - if (!check_dir_existed(shard_path)) { - res = create_dir(shard_path); - if (res != OLAP_SUCCESS) { - LOG(WARNING) << "fail to create path. [path='" << shard_path << "']"; - return res; - } - } - - *shard = next_shard; - return OLAP_SUCCESS; -} - -OlapMeta* OlapStore::get_meta() { - return _meta; -} - -OLAPStatus OlapStore::register_table(OLAPTable* table) { - std::lock_guard l(_mutex); - - TabletInfo tablet_info(table->tablet_id(), table->schema_hash()); - _tablet_set.insert(tablet_info); - return OLAP_SUCCESS; -} - -OLAPStatus OlapStore::deregister_table(OLAPTable* table) { - std::lock_guard l(_mutex); - - TabletInfo tablet_info(table->tablet_id(), table->schema_hash()); - _tablet_set.erase(tablet_info); - return OLAP_SUCCESS; -} - -std::string OlapStore::get_shard_path_from_header(const std::string& shard_string) { - return _path + DATA_PREFIX + "/" + shard_string; -} - -std::string OlapStore::get_tablet_schema_hash_path_from_header(OLAPHeader* header) { - return _path + DATA_PREFIX + "/" + std::to_string(header->shard()) - + "/" + std::to_string(header->tablet_id()) + "/" + std::to_string(header->schema_hash()); -} - -std::string OlapStore::get_tablet_path_from_header(OLAPHeader* header) { - return _path + DATA_PREFIX + "/" + std::to_string(header->shard()) - + "/" + std::to_string(header->tablet_id()); -} - -void OlapStore::find_tablet_in_trash(int64_t tablet_id, std::vector* paths) { - // path: /root_path/trash/time_label/tablet_id/schema_hash - std::string trash_path = _path + TRASH_PREFIX; - std::vector sub_dirs; - FileUtils::scan_dir(trash_path, &sub_dirs); - for (auto& sub_dir : sub_dirs) { - // sub dir is time_label - std::string sub_path = trash_path + "/" + sub_dir; - if (!FileUtils::is_dir(sub_path)) { - continue; - } - std::string tablet_path = sub_path + "/" + std::to_string(tablet_id); - bool exist = FileUtils::check_exist(tablet_path); - if (exist) { - paths->emplace_back(std::move(tablet_path)); - } - } -} - -std::string OlapStore::get_root_path_from_schema_hash_path_in_trash( - const std::string& schema_hash_dir_in_trash) { - boost::filesystem::path schema_hash_path_in_trash(schema_hash_dir_in_trash); - return schema_hash_path_in_trash.parent_path().parent_path().parent_path().parent_path().string(); -} - -void OlapStore::_deal_with_header_error(TTabletId tablet_id, TSchemaHash schema_hash, int shard) { - // path: store_path/shard/tablet_id/schema_hash - std::string schema_hash_path = path() + "/" + std::to_string(shard) - + "/" + std::to_string(tablet_id) + "/" + std::to_string(schema_hash); - std::string header_path = schema_hash_path + "/" + std::to_string(tablet_id) + ".hdr"; - OLAPStatus res = OlapHeaderManager::dump_header(this, tablet_id, schema_hash, header_path); - if (res != OLAP_SUCCESS) { - LOG(WARNING) << "dump header failed. tablet_id:" << tablet_id - << "schema_hash:" << schema_hash - << "store path:" << path(); - } else { - LOG(INFO) << "dump header successfully. move path:" << schema_hash_path << " to trash."; - if (move_to_trash(schema_hash_path, schema_hash_path) != OLAP_SUCCESS) { - LOG(WARNING) << "fail to delete table. [table_path=" << schema_hash_path << "]"; - } - } - res = OlapHeaderManager::remove(this, tablet_id, schema_hash); - if (res != OLAP_SUCCESS) { - LOG(WARNING) << "remove header failed. tablet_id:" << tablet_id - << "schema_hash:" << schema_hash - << "store path:" << path(); - } else { - LOG(INFO) << "remove tablet header successfully. tablet:" << tablet_id << "_" << schema_hash; - } -} - -OLAPStatus OlapStore::_load_table_from_header(OLAPEngine* engine, TTabletId tablet_id, - TSchemaHash schema_hash, const std::string& header) { - std::unique_ptr olap_header(new OLAPHeader()); - OLAPStatus res = OLAP_SUCCESS; - bool parsed = olap_header->ParseFromString(header); - if (!parsed) { - // here we can not get shard id - // so just remove invalid header from meta - // the related tablet path should be removed by gc - LOG(WARNING) << "parse header string failed for tablet_id:" << tablet_id - << " schema_hash:" << schema_hash; - res = OlapHeaderManager::remove(this, tablet_id, schema_hash); - if (res != OLAP_SUCCESS) { - LOG(WARNING) << "remove header failed. tablet_id:" << tablet_id - << " schema_hash:" << schema_hash - << " store path:" << path(); - } else { - LOG(INFO) << "remove tablet header successfully. tablet:" << tablet_id << "_" << schema_hash; - } - return OLAP_ERR_HEADER_PB_PARSE_FAILED; - } - if (olap_header->file_version_size() != 0) { - olap_header->change_file_version_to_delta(); - res = OlapHeaderManager::save(this, tablet_id, schema_hash, olap_header.get()); - if (res != OLAP_SUCCESS) { - LOG(FATAL) << "fail to save header, tablet_id:" << tablet_id - << ", schema_hash:" << schema_hash << " to path:" << path(); - return OLAP_ERR_HEADER_PUT; - } - } - - // init must be called - res = olap_header->init(); - if (res != OLAP_SUCCESS) { - LOG(WARNING) << "fail to init header, tablet_id:" << tablet_id - << ", schema_hash:" << schema_hash; - _deal_with_header_error(tablet_id, schema_hash, olap_header->shard()); - return OLAP_ERR_HEADER_INIT_FAILED; - } - OLAPTablePtr olap_table = - OLAPTable::create_from_header(olap_header.release(), this); - if (olap_table == nullptr) { - LOG(WARNING) << "fail to new table. tablet_id=" << tablet_id << ", schema_hash:" << schema_hash; - _deal_with_header_error(tablet_id, schema_hash, olap_header->shard()); - return OLAP_ERR_TABLE_CREATE_FROM_HEADER_ERROR; - } - - if (olap_table->lastest_version() == nullptr && !olap_table->is_schema_changing()) { - LOG(WARNING) << "tablet not in schema change state without delta is invalid." - << "tablet=" << olap_table->full_name(); - // tablet state is invalid, drop tablet - olap_table->mark_dropped(); - return OLAP_ERR_TABLE_INDEX_VALIDATE_ERROR; - } - - res = engine->add_table(tablet_id, schema_hash, olap_table); - if (res != OLAP_SUCCESS) { - // insert existed tablet return OLAP_SUCCESS - if (res == OLAP_ERR_ENGINE_INSERT_EXISTS_TABLE) { - LOG(WARNING) << "add duplicate table. table=" << olap_table->full_name(); - } - - LOG(WARNING) << "failed to add table. table=" << olap_table->full_name(); - return res; - } - res = engine->register_table_into_root_path(olap_table.get()); - if (res != OLAP_SUCCESS) { - LOG(WARNING) << "fail to register table into root path. root_path=" << olap_table->storage_root_path_name(); - - if (engine->drop_table(tablet_id, schema_hash) != OLAP_SUCCESS) { - LOG(WARNING) << "fail to drop table when create table failed. " - <<"tablet=" << tablet_id << " schema_hash=" << schema_hash; - } - - return res; - } - // load pending data (for realtime push), will add transaction relationship into engine - olap_table->load_pending_data(); - - return OLAP_SUCCESS; -} - -OLAPStatus OlapStore::load_tables(OLAPEngine* engine) { - auto load_table_func = [this, engine](long tablet_id, - long schema_hash, const std::string& value) -> bool { - OLAPStatus status = _load_table_from_header(engine, tablet_id, schema_hash, value); - if (status != OLAP_SUCCESS) { - LOG(WARNING) << "load table from header failed. status:" << status - << "tablet=" << tablet_id << "." << schema_hash; - }; - return true; - }; - OLAPStatus status = OlapHeaderManager::traverse_headers(_meta, load_table_func); - return status; -} - -OLAPStatus OlapStore::check_none_row_oriented_table_in_store(OLAPEngine* engine) { - auto load_table_func = [this, engine](long tablet_id, - long schema_hash, const std::string& value) -> bool { - OLAPStatus status = _check_none_row_oriented_table_in_store(engine, tablet_id, schema_hash, value); - if (status != OLAP_SUCCESS) { - LOG(WARNING) << "load table from header failed. status:" << status - << "tablet=" << tablet_id << "." << schema_hash; - }; - return true; - }; - OLAPStatus status = OlapHeaderManager::traverse_headers(_meta, load_table_func); - return status; -} - -OLAPStatus OlapStore::_check_none_row_oriented_table_in_store( - OLAPEngine* engine, TTabletId tablet_id, - TSchemaHash schema_hash, const std::string& header) { - std::unique_ptr olap_header(new OLAPHeader()); - bool parsed = olap_header->ParseFromString(header); - if (!parsed) { - LOG(WARNING) << "parse header string failed for tablet_id:" << tablet_id << " schema_hash:" << schema_hash; - return OLAP_ERR_HEADER_PB_PARSE_FAILED; - } - // init must be called - RETURN_NOT_OK(olap_header->init()); - if (olap_header->data_file_type() == OLAP_DATA_FILE) { - LOG(FATAL) << "Not support row-oriented table any more. Please convert it to column-oriented table." - << "tablet=" << tablet_id << "." << schema_hash; - } - - return OLAP_SUCCESS; -} - -} // namespace doris diff --git a/be/src/olap/tablet.cpp b/be/src/olap/tablet.cpp new file mode 100644 index 00000000000000..06097ce5bf494b --- /dev/null +++ b/be/src/olap/tablet.cpp @@ -0,0 +1,936 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +#include "olap/tablet.h" + +#include +#include +#include + +#include +#include +#include + +#include + +#include "olap/data_dir.h" +#include "olap/olap_common.h" +#include "olap/olap_define.h" +#include "olap/storage_engine.h" +#include "olap/reader.h" +#include "olap/row_cursor.h" +#include "olap/rowset/alpha_rowset.h" +#include "olap/tablet_meta_manager.h" +#include "olap/utils.h" +#include "util/time.h" + +namespace doris { + +using std::pair; +using std::nothrow; +using std::sort; +using std::string; +using std::vector; + +TabletSharedPtr Tablet::create_tablet_from_meta_file( + const string& file_path, DataDir* data_dir) { + TabletMetaSharedPtr tablet_meta(new(nothrow) TabletMeta()); + if (tablet_meta == nullptr) { + LOG(WARNING) << "fail to malloc TabletMeta."; + return NULL; + } + + if (tablet_meta->create_from_file(file_path) != OLAP_SUCCESS) { + LOG(WARNING) << "fail to load tablet_meta. file_path=" << file_path; + return nullptr; + } + + // add new fields + boost::filesystem::path file_path_path(file_path); + string shard_path = file_path_path.parent_path().parent_path().parent_path().string(); + string shard_str = shard_path.substr(shard_path.find_last_of('/') + 1); + uint64_t shard = stol(shard_str); + tablet_meta->set_shard_id(shard); + + // save tablet_meta info to kv db + // tablet_meta key format: tablet_id + "_" + schema_hash + OLAPStatus res = TabletMetaManager::save(data_dir, tablet_meta->tablet_id(), + tablet_meta->schema_hash(), tablet_meta); + if (res != OLAP_SUCCESS) { + LOG(WARNING) << "fail to save tablet_meta to db. file_path=" << file_path; + return nullptr; + } + return create_tablet_from_meta(tablet_meta, data_dir); +} + +TabletSharedPtr Tablet::create_tablet_from_meta( + TabletMetaSharedPtr tablet_meta, + DataDir* data_dir) { + TabletSharedPtr tablet = std::make_shared(tablet_meta, data_dir); + if (tablet == nullptr) { + LOG(WARNING) << "fail to malloc a table."; + return nullptr; + } + + return tablet; +} + +Tablet::Tablet(TabletMetaSharedPtr tablet_meta, DataDir* data_dir) + : _state(tablet_meta->tablet_state()), + _tablet_meta(tablet_meta), + _schema(tablet_meta->tablet_schema()), + _data_dir(data_dir), + _is_bad(false), + _last_compaction_failure_time(UnixMillis()) { + _tablet_path.append(_data_dir->path()); + _tablet_path.append(DATA_PREFIX); + _tablet_path.append("/"); + _tablet_path.append(std::to_string(_tablet_meta->shard_id())); + _tablet_path.append("/"); + _tablet_path.append(std::to_string(_tablet_meta->tablet_id())); + _tablet_path.append("/"); + _tablet_path.append(std::to_string(_tablet_meta->schema_hash())); + + _rs_graph.construct_rowset_graph(_tablet_meta->all_rs_metas()); +} + +Tablet::~Tablet() { + WriteLock wrlock(&_meta_lock); + _rs_version_map.clear(); + _inc_rs_version_map.clear(); +} + +OLAPStatus Tablet::init_once() { + OLAPStatus res = OLAP_SUCCESS; + VLOG(3) << "begin to load tablet. tablet=" << full_name() + << ", version_size=" << _tablet_meta->version_count(); + for (auto& rs_meta : _tablet_meta->all_rs_metas()) { + Version version = { rs_meta->start_version(), rs_meta->end_version() }; + RowsetSharedPtr rowset(new(std::nothrow) AlphaRowset(&_schema, _tablet_path, _data_dir, rs_meta)); + res = rowset->init(); + if (res != OLAP_SUCCESS) { + LOG(WARNING) << "fail to init rowset. tablet_id:" << tablet_id() + << ", schema_hash:" << schema_hash() + << ", version=" << version.first << "-" << version.second + << ", res:" << res; + return res; + } + _rs_version_map[version] = rowset; + } + + // init incremental rowset + for (auto& inc_rs_meta : _tablet_meta->all_inc_rs_metas()) { + Version version = { inc_rs_meta->start_version(), inc_rs_meta->end_version() }; + RowsetSharedPtr rowset = get_rowset_by_version(version); + if (rowset == nullptr) { + rowset.reset(new(std::nothrow) AlphaRowset(&_schema, _tablet_path, _data_dir, inc_rs_meta)); + res = rowset->init(); + if (res != OLAP_SUCCESS) { + LOG(WARNING) << "fail to init incremental rowset. tablet_id:" << tablet_id() + << ", schema_hash:" << schema_hash() + << ", version=" << version.first << "-" << version.second + << ", res:" << res; + return res; + } + } + _inc_rs_version_map[version] = rowset; + } + + return res; +} + +OLAPStatus Tablet::init() { + return _init_once.init([this] { return init_once(); }); +} + +bool Tablet::is_used() { + return !_is_bad && _data_dir->is_used(); +} + +TabletUid Tablet::tablet_uid() { + return _tablet_meta->tablet_uid(); +} + +string Tablet::tablet_path() const { + return _tablet_path; +} + +OLAPStatus Tablet::save_meta() { + OLAPStatus res = _tablet_meta->save_meta(_data_dir); + if (res != OLAP_SUCCESS) { + LOG(FATAL) << "fail to save tablet_meta. res=" << res + << ", root=" << _data_dir->path(); + } + _schema = _tablet_meta->tablet_schema(); + + return res; +} + +OLAPStatus Tablet::revise_tablet_meta( + const vector& rowsets_to_clone, + const vector& versions_to_delete) { + LOG(INFO) << "begin to clone data to tablet. tablet=" << full_name() + << ", rowsets_to_clone=" << rowsets_to_clone.size() + << ", versions_to_delete_size=" << versions_to_delete.size(); + OLAPStatus res = OLAP_SUCCESS; + do { + // load new local tablet_meta to operate on + TabletMetaSharedPtr new_tablet_meta(new (nothrow) TabletMeta()); + RETURN_NOT_OK(TabletMetaManager::get_meta(_data_dir, tablet_id(), schema_hash(), new_tablet_meta)); + + // delete versions from new local tablet_meta + for (const Version& version : versions_to_delete) { + res = new_tablet_meta->delete_rs_meta_by_version(version, nullptr); + if (res != OLAP_SUCCESS) { + LOG(WARNING) << "failed to delete version from new local tablet meta. tablet=" << full_name() + << ", version=" << version.first << "-" << version.second; + break; + } + if (new_tablet_meta->version_for_delete_predicate(version)) { + new_tablet_meta->remove_delete_predicate_by_version(version); + } + LOG(INFO) << "delete version from new local tablet_meta when clone. [table='" << full_name() + << "', version=" << version.first << "-" << version.second << "]"; + } + + if (res != OLAP_SUCCESS) { + break; + } + + for (auto& rs_meta : rowsets_to_clone) { + new_tablet_meta->add_rs_meta(rs_meta); + } + + if (res != OLAP_SUCCESS) { + break; + } + + VLOG(3) << "load rowsets successfully when clone. tablet=" << full_name() + << ", added rowset size=" << rowsets_to_clone.size(); + // save and reload tablet_meta + res = new_tablet_meta->save_meta(_data_dir); + if (res != OLAP_SUCCESS) { + LOG(WARNING) << "failed to save new local tablet_meta when clone. res:" << res; + break; + } + _tablet_meta = new_tablet_meta; + } while (0); + + for (auto& version : versions_to_delete) { + auto it = _rs_version_map.find(version); + StorageEngine::instance()->add_unused_rowset(it->second); + _rs_version_map.erase(it); + } + for (auto& it : _inc_rs_version_map) { + StorageEngine::instance()->add_unused_rowset(it.second); + } + _inc_rs_version_map.clear(); + + for (auto& rs_meta : rowsets_to_clone) { + Version version = { rs_meta->start_version(), rs_meta->end_version() }; + RowsetSharedPtr rowset(new AlphaRowset(&_schema, _tablet_path, _data_dir, rs_meta)); + res = rowset->init(); + if (res != OLAP_SUCCESS) { + LOG(WARNING) << "fail to init rowset. version=" << version.first << "-" << version.second; + return res; + } + _rs_version_map[version] = rowset; + } + + _rs_graph.reconstruct_rowset_graph(_tablet_meta->all_rs_metas()); + + LOG(INFO) << "finish to clone data to tablet. res=" << res << ", " + << "table=" << full_name() << ", " + << "rowsets_to_clone=" << rowsets_to_clone.size(); + return res; +} + +OLAPStatus Tablet::register_tablet_into_dir() { + return _data_dir->register_tablet(this); +} + +OLAPStatus Tablet::deregister_tablet_from_dir() { + return _data_dir->deregister_tablet(this); +} + +OLAPStatus Tablet::add_rowset(RowsetSharedPtr rowset) { + WriteLock wrlock(&_meta_lock); + RETURN_NOT_OK(_check_added_rowset(rowset)); + RETURN_NOT_OK(_tablet_meta->add_rs_meta(rowset->rowset_meta())); + _rs_version_map[rowset->version()] = rowset; + RETURN_NOT_OK(_rs_graph.add_version_to_graph(rowset->version())); + RETURN_NOT_OK(save_meta()); + return OLAP_SUCCESS; +} + +OLAPStatus Tablet::modify_rowsets(const vector& to_add, + const vector& to_delete) { + vector rs_metas_to_add; + for (auto& rs : to_add) { + rs_metas_to_add.push_back(rs->rowset_meta()); + } + + vector rs_metas_to_delete; + for (auto& rs : to_delete) { + rs_metas_to_delete.push_back(rs->rowset_meta()); + } + + _tablet_meta->modify_rs_metas(rs_metas_to_add, rs_metas_to_delete); + for (auto& rs : to_delete) { + auto it = _rs_version_map.find(rs->version()); + _rs_version_map.erase(it); + } + + for (auto& rs : to_add) { + _rs_version_map[rs->version()] = rs;; + } + + _rs_graph.reconstruct_rowset_graph(_tablet_meta->all_rs_metas()); + + return OLAP_SUCCESS; +} + +// snapshot manager may call this api to check if version exists, so that +// the version maybe not exist +const RowsetSharedPtr Tablet::get_rowset_by_version(const Version& version) const { + auto iter = _rs_version_map.find(version); + if (iter == _rs_version_map.end()) { + LOG(INFO) << "no rowset for version:" << version.first << "-" << version.second; + return nullptr; + } + RowsetSharedPtr rowset = iter->second; + return rowset; +} + +size_t Tablet::get_rowset_size_by_version(const Version& version) { + DCHECK(_rs_version_map.find(version) != _rs_version_map.end()) + << "invalid version:" << version.first << "-" << version.second; + auto iter = _rs_version_map.find(version); + if (iter == _rs_version_map.end()) { + LOG(WARNING) << "no rowset for version:" << version.first << "-" << version.second; + return -1; + } + RowsetSharedPtr rowset = iter->second; + return rowset->data_disk_size(); +} + +const RowsetSharedPtr Tablet::rowset_with_max_version() const { + Version max_version = _tablet_meta->max_version(); + if (max_version.first == -1) { + return nullptr; + } + DCHECK(_rs_version_map.find(max_version) != _rs_version_map.end()) + << "invalid version:" << max_version.first << "-" << max_version.second; + auto iter = _rs_version_map.find(max_version); + if (iter == _rs_version_map.end()) { + LOG(WARNING) << "no rowset for version:" << max_version.first << "-" << max_version.second; + return nullptr; + } + RowsetSharedPtr rowset = iter->second; + return rowset; +} + +RowsetSharedPtr Tablet::rowset_with_largest_size() { + RowsetSharedPtr largest_rowset = nullptr; + for (auto& it : _rs_version_map) { + // use segment_group of base file as target segment_group when base is not empty, + // or try to find the biggest segment_group. + if (it.second->empty() || it.second->zero_num_rows()) { + continue; + } + if (largest_rowset == nullptr || it.second->rowset_meta()->index_disk_size() + > largest_rowset->rowset_meta()->index_disk_size()) { + largest_rowset = it.second; + } + } + + return largest_rowset; +} + +OLAPStatus Tablet::add_inc_rowset(const RowsetSharedPtr& rowset) { + WriteLock wrlock(&_meta_lock); + // check if the rowset id is valid + RETURN_NOT_OK(_check_added_rowset(rowset)); + RETURN_NOT_OK(_tablet_meta->add_rs_meta(rowset->rowset_meta())); + _rs_version_map[rowset->version()] = rowset; + _inc_rs_version_map[rowset->version()] = rowset; + RETURN_NOT_OK(_rs_graph.add_version_to_graph(rowset->version())); + RETURN_NOT_OK(_tablet_meta->add_inc_rs_meta(rowset->rowset_meta())); + RETURN_NOT_OK(_tablet_meta->save_meta(_data_dir)); + return OLAP_SUCCESS; +} + +bool Tablet::has_expired_inc_rowset() { + bool exist = false; + time_t now = time(NULL); + ReadLock rdlock(&_meta_lock); + for (auto& rs_meta : _tablet_meta->all_inc_rs_metas()) { + double diff = difftime(now, rs_meta->creation_time()); + if (diff >= config::inc_rowset_expired_sec) { + exist = true; + break; + } + } + return exist; +} + +void Tablet::delete_inc_rowset_by_version(const Version& version, + const VersionHash& version_hash) { + // delete incremental rowset from map + auto it = _inc_rs_version_map.find(version); + if (it != _inc_rs_version_map.end()) { + _inc_rs_version_map.erase(it); + } + RowsetMetaSharedPtr rowset_meta = _tablet_meta->acquire_inc_rs_meta_by_version(version); + if (rowset_meta == nullptr) { return; } + + _tablet_meta->delete_inc_rs_meta_by_version(version); + VLOG(3) << "delete incremental rowset. tablet=" << full_name() << ", " + << "version=" << version.first << "-" << version.second; +} + +void Tablet::delete_expired_inc_rowsets() { + time_t now = time(nullptr); + vector> expired_versions; + WriteLock wrlock(&_meta_lock); + for (auto& rs_meta : _tablet_meta->all_inc_rs_metas()) { + double diff = difftime(now, rs_meta->creation_time()); + if (diff >= config::inc_rowset_expired_sec) { + Version version(rs_meta->start_version(), rs_meta->end_version()); + expired_versions.push_back(std::make_pair(version, rs_meta->version_hash())); + VLOG(3) << "find expire incremental rowset. tablet=" << full_name() << ", " + << "version=" << rs_meta->start_version() << "-" << rs_meta->end_version() << ", " + << "exist_sec=" << diff; + } + } + + if (expired_versions.empty()) { return; } + + for (auto& pair: expired_versions) { + delete_inc_rowset_by_version(pair.first, pair.second); + VLOG(3) << "delete expire incremental data. tablet=" << full_name() << ", " + << "version=" << pair.first.first << "-" << pair.first.second; + } + + if (save_meta() != OLAP_SUCCESS) { + LOG(FATAL) << "fail to save tablet_meta when delete expire incremental data." + << "tablet=" << full_name(); + } +} + +OLAPStatus Tablet::capture_consistent_versions( + const Version& spec_version, vector* version_path) const { + OLAPStatus status = _rs_graph.capture_consistent_versions(spec_version, version_path); + if (status != OLAP_SUCCESS) { + std::vector missed_versions; + calc_missed_versions_unlock(spec_version.second, &missed_versions); + if (missed_versions.empty()) { + LOG(WARNING) << "tablet:" << full_name() + << ", version already has been merged. " + << "spec_version: " << spec_version.first + << "-" << spec_version.second; + status = OLAP_ERR_VERSION_ALREADY_MERGED; + } else { + LOG(WARNING) << "status:" << status << ", tablet:" << full_name() + << ", missed version for version:" + << spec_version.first << "-" << spec_version.second; + _print_missed_versions(missed_versions); + } + return status; + } + return status; +} + +OLAPStatus Tablet::check_version_integrity(const Version& version) { + vector span_versions; + ReadLock rdlock(&_meta_lock); + return capture_consistent_versions(version, &span_versions); +} + +bool Tablet::check_version_exist(const Version& version) const { + return (_rs_version_map.find(version) != _rs_version_map.end()); +} + +void Tablet::list_versions(vector* versions) const { + DCHECK(versions != nullptr && versions->empty()); + + // versions vector is not sorted. + for (auto& it : _rs_version_map) { + versions->push_back(it.first); + } +} + +OLAPStatus Tablet::capture_consistent_rowsets(const Version& spec_version, + vector* rowsets) const { + vector version_path; + RETURN_NOT_OK(capture_consistent_versions(spec_version, &version_path)); + RETURN_NOT_OK(capture_consistent_rowsets(version_path, rowsets)); + return OLAP_SUCCESS; +} + +OLAPStatus Tablet::capture_consistent_rowsets(const vector& version_path, + vector* rowsets) const { + DCHECK(rowsets != nullptr && rowsets->empty()); + for (auto& version : version_path) { + auto it = _rs_version_map.find(version); + if (it == _rs_version_map.end()) { + LOG(WARNING) << "fail to find Rowset for version. tablet=" << full_name() + << ", version='" << version.first << "-" << version.second; + return OLAP_ERR_CAPTURE_ROWSET_ERROR; + } + + rowsets->push_back(it->second); + } + return OLAP_SUCCESS; +} + +OLAPStatus Tablet::capture_rs_readers(const Version& spec_version, + vector* rs_readers) const { + vector version_path; + RETURN_NOT_OK(capture_consistent_versions(spec_version, &version_path)); + RETURN_NOT_OK(capture_rs_readers(version_path, rs_readers)); + return OLAP_SUCCESS; +} + +OLAPStatus Tablet::capture_rs_readers(const vector& version_path, + vector* rs_readers) const { + DCHECK(rs_readers != NULL && rs_readers->empty()); + for (auto version : version_path) { + auto it = _rs_version_map.find(version); + if (it == _rs_version_map.end()) { + LOG(WARNING) << "fail to find Rowset for version. tablet=" << full_name() + << ", version='" << version.first << "-" << version.second; + return OLAP_ERR_CAPTURE_ROWSET_READER_ERROR; + } + std::shared_ptr rs_reader(it->second->create_reader()); + if (rs_reader == nullptr) { + LOG(WARNING) << "failed to create reader for rowset:" << it->second->rowset_id(); + return OLAP_ERR_CAPTURE_ROWSET_READER_ERROR; + } + rs_readers->push_back(rs_reader); + } + return OLAP_SUCCESS; +} + +OLAPStatus Tablet::add_delete_predicate(const DeletePredicatePB& delete_predicate, int64_t version) { + return _tablet_meta->add_delete_predicate(delete_predicate, version); +} + +bool Tablet::version_for_delete_predicate(const Version& version) { + return _tablet_meta->version_for_delete_predicate(version); +} + +bool Tablet::version_for_load_deletion(const Version& version) { + RowsetSharedPtr rowset = _rs_version_map.at(version); + return rowset->delete_flag(); +} + + +AlterTabletTaskSharedPtr Tablet::alter_task() { + return _tablet_meta->alter_task(); +} + +OLAPStatus Tablet::add_alter_task(int64_t related_tablet_id, + int32_t related_schema_hash, + const vector& versions_to_alter, + const AlterTabletType alter_type) { + AlterTabletTask alter_task; + alter_task.set_alter_state(ALTER_RUNNING); + alter_task.set_related_tablet_id(related_tablet_id); + alter_task.set_related_schema_hash(related_schema_hash); + alter_task.set_alter_type(alter_type); + RETURN_NOT_OK(_tablet_meta->add_alter_task(alter_task)); + LOG(INFO) << "successfully add alter task for tablet_id:" << this->tablet_id() + << ", schema_hash:" << this->schema_hash() + << ", related_tablet_id " << related_tablet_id + << ", related_schema_hash " << related_schema_hash + << ", alter_type " << alter_type; + return OLAP_SUCCESS; +} + +OLAPStatus Tablet::delete_alter_task() { + LOG(INFO) << "delete alter task from table. tablet=" << full_name(); + return _tablet_meta->delete_alter_task(); +} + +OLAPStatus Tablet::set_alter_state(AlterTabletState state) { + return _tablet_meta->set_alter_state(state); +} + +OLAPStatus Tablet::protected_delete_alter_task() { + WriteLock wrlock(&_meta_lock); + OLAPStatus res = delete_alter_task(); + if (res != OLAP_SUCCESS) { + LOG(WARNING) << "fail to delete alter task from table. res=" << res + << ", full_name=" << full_name(); + return res; + } + + res = save_meta(); + if (res != OLAP_SUCCESS) { + LOG(WARNING) << "fail to save tablet header. res=" << res + << ", full_name=" << full_name(); + return res; + } + return res; +} + +void Tablet::set_io_error() { + OLAP_LOG_WARNING("io error occur.[tablet_full_name='%s', root_path_name='%s']", + full_name().c_str()); +} + +OLAPStatus Tablet::recover_tablet_until_specfic_version(const int64_t& spec_version, + const int64_t& version_hash) { + return OLAP_SUCCESS; +} + +bool Tablet::can_do_compaction() { + // 如果table正在做schema change,则通过选路判断数据是否转换完成 + // 如果选路成功,则转换完成,可以进行BE + // 如果选路失败,则转换未完成,不能进行BE + ReadLock rdlock(&_meta_lock); + const RowsetSharedPtr lastest_delta = rowset_with_max_version(); + if (lastest_delta == NULL) { + return false; + } + + Version test_version = Version(0, lastest_delta->end_version()); + vector path_versions; + if (OLAP_SUCCESS != capture_consistent_versions(test_version, &path_versions)) { + return false; + } + + return true; +} + +const uint32_t Tablet::calc_cumulative_compaction_score() const { + uint32_t score = 0; + bool base_rowset_exist = false; + const int64_t point = cumulative_layer_point(); + for (auto& rs_meta : _tablet_meta->all_rs_metas()) { + if (rs_meta->start_version() >= point) { + score++; + } + if (rs_meta->start_version() == 0) { + base_rowset_exist = true; + } + } + + // base不存在可能是tablet正在做alter table,先不选它,设score=0 + return base_rowset_exist ? score : 0; +} + +const uint32_t Tablet::calc_base_compaction_score() const { + uint32_t score = 0; + const int64_t point = cumulative_layer_point(); + bool base_rowset_exist = false; + for (auto& rs_meta : _tablet_meta->all_rs_metas()) { + if (rs_meta->start_version() < point) { + score++; + } + if (rs_meta->start_version() == 0) { + base_rowset_exist = true; + } + } + score = score < config::base_compaction_num_cumulative_deltas ? 0 : score; + + // base不存在可能是tablet正在做alter table,先不选它,设score=0 + return base_rowset_exist ? score : 0; +} + +OLAPStatus Tablet::compute_all_versions_hash(const vector& versions, + VersionHash* version_hash) const { + DCHECK(version_hash != nullptr) << "invalid parameter, version_hash is nullptr"; + int64_t v_hash = 0L; + for (auto version : versions) { + auto it = _rs_version_map.find(version); + if (it == _rs_version_map.end()) { + LOG(WARNING) << "fail to find Rowset. " + << "version=" << version.first << "-" << version.second; + return OLAP_ERR_TABLE_VERSION_INDEX_MISMATCH_ERROR; + } + v_hash ^= it->second->version_hash(); + } + *version_hash = v_hash; + return OLAP_SUCCESS; +} + +void Tablet::calc_missed_versions(int64_t spec_version, + vector* missed_versions) { + ReadLock rdlock(&_meta_lock); + calc_missed_versions_unlock(spec_version, missed_versions); +} + +void Tablet::calc_missed_versions_unlock(int64_t spec_version, + vector* missed_versions) const { + DCHECK(spec_version > 0) << "invalid spec_version: " << spec_version; + std::list existing_versions; + for (auto& rs : _tablet_meta->all_rs_metas()) { + existing_versions.emplace_back(rs->version()); + } + + // sort the existing versions in ascending order + existing_versions.sort([](const Version& a, const Version& b) { + // simple because 2 versions are certainly not overlapping + return a.first < b.first; + }); + + // find the missing version until spec_version + int64_t last_version = -1; + for (const Version& version : existing_versions) { + if (version.first > last_version + 1) { + for (int64_t i = last_version + 1; i < version.first; ++i) { + missed_versions->emplace_back(i, i); + } + } + last_version = version.second; + if (spec_version <= last_version) { + break; + } + } + for (int64_t i = last_version + 1; i <= spec_version; ++i) { + missed_versions->emplace_back(i, i); + } +} + +OLAPStatus Tablet::max_continuous_version_from_begining(Version* version, VersionHash* v_hash) { + ReadLock rdlock(&_meta_lock); + vector> existing_versions; + for (auto& rs : _tablet_meta->all_rs_metas()) { + existing_versions.emplace_back(rs->version() , rs->version_hash()); + } + + // sort the existing versions in ascending order + std::sort(existing_versions.begin(), existing_versions.end(), + [](const pair& left, + const pair& right) { + // simple because 2 versions are certainly not overlapping + return left.first.first < right.first.first; + }); + Version max_continuous_version = { -1, 0 }; + VersionHash max_continuous_version_hash = 0; + for (int i = 0; i < existing_versions.size(); ++i) { + if (existing_versions[i].first.first > max_continuous_version.second + 1) { + break; + } + max_continuous_version = existing_versions[i].first; + max_continuous_version_hash = existing_versions[i].second; + } + *version = max_continuous_version; + *v_hash = max_continuous_version_hash; + return OLAP_SUCCESS; +} + +OLAPStatus Tablet::split_range( + const OlapTuple& start_key_strings, + const OlapTuple& end_key_strings, + uint64_t request_block_row_count, + vector* ranges) { + if (ranges == NULL) { + LOG(WARNING) << "parameter end_row is null."; + return OLAP_ERR_INPUT_PARAMETER_ERROR; + } + + RowCursor start_key; + RowCursor end_key; + + // 如果有startkey,用startkey初始化;反之则用minkey初始化 + if (start_key_strings.size() > 0) { + if (start_key.init_scan_key(_schema, start_key_strings.values()) != OLAP_SUCCESS) { + LOG(WARNING) << "fail to initial key strings with RowCursor type."; + return OLAP_ERR_INIT_FAILED; + } + + if (start_key.from_tuple(start_key_strings) != OLAP_SUCCESS) { + LOG(WARNING) << "init end key failed"; + return OLAP_ERR_INVALID_SCHEMA; + } + } else { + if (start_key.init(_schema, num_short_key_columns()) != OLAP_SUCCESS) { + LOG(WARNING) << "fail to initial key strings with RowCursor type."; + return OLAP_ERR_INIT_FAILED; + } + + start_key.allocate_memory_for_string_type(_schema); + start_key.build_min_key(); + } + + // 和startkey一样处理,没有则用maxkey初始化 + if (end_key_strings.size() > 0) { + if (OLAP_SUCCESS != end_key.init_scan_key(_schema, end_key_strings.values())) { + LOG(WARNING) << "fail to parse strings to key with RowCursor type."; + return OLAP_ERR_INVALID_SCHEMA; + } + + if (end_key.from_tuple(end_key_strings) != OLAP_SUCCESS) { + LOG(WARNING) << "init end key failed"; + return OLAP_ERR_INVALID_SCHEMA; + } + } else { + if (end_key.init(_schema, num_short_key_columns()) != OLAP_SUCCESS) { + LOG(WARNING) << "fail to initial key strings with RowCursor type."; + return OLAP_ERR_INIT_FAILED; + } + + end_key.allocate_memory_for_string_type(_schema); + end_key.build_max_key(); + } + + ReadLock rdlock(get_header_lock_ptr()); + RowsetSharedPtr rowset = rowset_with_largest_size(); + + // 如果找不到合适的rowset,就直接返回startkey,endkey + if (rowset == nullptr) { + VLOG(3) << "there is no base file now, may be tablet is empty."; + // it may be right if the tablet is empty, so we return success. + ranges->emplace_back(start_key.to_tuple()); + ranges->emplace_back(end_key.to_tuple()); + return OLAP_SUCCESS; + } + AlphaRowset* alpha_rowset = reinterpret_cast(rowset.get()); + OLAPStatus status = alpha_rowset->split_range(start_key, end_key, request_block_row_count, ranges); + return status; +} + +void Tablet::delete_all_files() { + // Release resources like memory and disk space. + // we have to call list_versions first, or else error occurs when + // removing hash_map item and iterating hash_map concurrently. + ReadLock rdlock(&_meta_lock); + for (auto it = _rs_version_map.begin(); it != _rs_version_map.end(); ++it) { + it->second->remove(); + } + _rs_version_map.clear(); + for (auto it = _inc_rs_version_map.begin(); it != _inc_rs_version_map.end(); ++it) { + it->second->remove(); + } + _inc_rs_version_map.clear(); +} + +bool Tablet::check_path(const std::string& path_to_check) { + ReadLock rdlock(&_meta_lock); + if (path_to_check == _tablet_path) { + return true; + } + boost::filesystem::path tablet_schema_hash_path(_tablet_path); + boost::filesystem::path tablet_id_path = tablet_schema_hash_path.parent_path(); + std::string tablet_id_dir = tablet_id_path.string(); + if (path_to_check == tablet_id_dir) { + return true; + } + for (auto& version_rowset : _rs_version_map) { + bool ret = version_rowset.second->check_path(path_to_check); + if (ret) { + return true; + } + } + for (auto& inc_version_rowset : _inc_rs_version_map) { + bool ret = inc_version_rowset.second->check_path(path_to_check); + if (ret) { + return true; + } + } + return false; +} + +bool Tablet::check_rowset_id(RowsetId rowset_id) { + ReadLock rdlock(&_meta_lock); + for (auto& version_rowset : _rs_version_map) { + if (version_rowset.second->rowset_id() == rowset_id) { + return true; + } + } + + for (auto& inc_version_rowset : _inc_rs_version_map) { + if (inc_version_rowset.second->rowset_id() == rowset_id) { + return true; + } + } + return false; +} + +// lock here, function that call next_rowset_id should not have meta lock +OLAPStatus Tablet::next_rowset_id(RowsetId* id) { + WriteLock wrlock(&_meta_lock); + return _tablet_meta->get_next_rowset_id(id, _data_dir); +} + +// lock here, function that call set_next_rowset_id should not have meta lock +OLAPStatus Tablet::set_next_rowset_id(RowsetId new_rowset_id) { + WriteLock wrlock(&_meta_lock); + return _tablet_meta->set_next_rowset_id(new_rowset_id, _data_dir); +} + +void Tablet::_print_missed_versions(const std::vector& missed_versions) const { + std::stringstream ss; + ss << full_name() << " has "<< missed_versions.size() << " missed version:"; + // print at most 10 version + for (int i = 0; i < 10 && i < missed_versions.size(); ++i) { + ss << missed_versions[i].first << "-" << missed_versions[i].second << ","; + } + LOG(WARNING) << ss.str(); +} + + OLAPStatus Tablet::_check_added_rowset(const RowsetSharedPtr& rowset) { + if (rowset == nullptr) { + return OLAP_ERR_ROWSET_INVALID; + } + // check if the rowset id is valid + if (rowset->rowset_id() >= _tablet_meta->get_cur_rowset_id()) { + LOG(FATAL) << "rowset id is larger than next rowsetid, it is fatal error" + << " rowset_id=" << rowset->rowset_id() + << " next_id=" << _tablet_meta->get_cur_rowset_id(); + return OLAP_ERR_ROWSET_INVALID; + } + Version version = {rowset->start_version(), rowset->end_version()}; + RowsetSharedPtr exist_rs = get_rowset_by_version(version); + // if there exist a rowset with version_hash == 0, should delete it + if (exist_rs != nullptr && exist_rs->version_hash() == 0) { + vector to_add; + vector to_delete; + to_delete.push_back(exist_rs); + RETURN_NOT_OK(modify_rowsets(to_add, to_delete)); + } + + // check if there exist a rowset contains the added rowset + for (auto& it : _rs_version_map) { + if (it.first.first <= rowset->start_version() + && it.first.second >= rowset->end_version()) { + if (it.second == nullptr) { + LOG(FATAL) << "there exist a version " + << " start_version=" << it.first.first + << " end_version=" << it.first.second + << " contains the input rs with version " + << " start_version=" << rowset->start_version() + << " end_version=" << rowset->end_version() + << " but the related rs is null"; + return OLAP_ERR_PUSH_ROWSET_NOT_FOUND; + } else { + return OLAP_ERR_PUSH_VERSION_ALREADY_EXIST; + } + } + } + + return OLAP_SUCCESS; +} + +OLAPStatus Tablet::set_partition_id(int64_t partition_id) { + return _tablet_meta->set_partition_id(partition_id); +} + +} // namespace doris diff --git a/be/src/olap/tablet.h b/be/src/olap/tablet.h new file mode 100644 index 00000000000000..fb34e1c381a8f1 --- /dev/null +++ b/be/src/olap/tablet.h @@ -0,0 +1,396 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +#ifndef DORIS_BE_SRC_OLAP_TABLET_H +#define DORIS_BE_SRC_OLAP_TABLET_H + +#include +#include +#include +#include +#include +#include + +#include "gen_cpp/AgentService_types.h" +#include "gen_cpp/olap_file.pb.h" +#include "olap/olap_define.h" +#include "olap/tuple.h" +#include "olap/row_cursor.h" +#include "olap/rowset_graph.h" +#include "olap/rowset/rowset.h" +#include "olap/rowset/rowset_reader.h" +#include "olap/tablet_meta.h" +#include "olap/utils.h" +#include "util/once.h" + +namespace doris { + +class DataDir; +class Tablet; +class TabletMeta; + +using TabletSharedPtr = std::shared_ptr; + +class Tablet : public std::enable_shared_from_this { +public: + static TabletSharedPtr create_tablet_from_meta_file( + const std::string& header_file, + DataDir* data_dir = nullptr); + static TabletSharedPtr create_tablet_from_meta( + TabletMetaSharedPtr tablet_meta, + DataDir* data_dir = nullptr); + + Tablet(TabletMetaSharedPtr tablet_meta, DataDir* data_dir); + ~Tablet(); + + OLAPStatus init_once(); + OLAPStatus init(); + inline bool init_succeeded(); + + bool is_used(); + inline DataDir* data_dir() const; + OLAPStatus register_tablet_into_dir(); + OLAPStatus deregister_tablet_from_dir(); + std::string tablet_path() const; + + // operation for TabletState + TabletState tablet_state() const { return _state; } + inline OLAPStatus set_tablet_state(TabletState state); + + // Property encapsulated in TabletMeta + inline const TabletMetaSharedPtr tablet_meta(); + OLAPStatus save_meta(); + OLAPStatus merge_tablet_meta(const TabletMeta& hdr, int to_version); + OLAPStatus revise_tablet_meta(const std::vector& rowsets_to_clone, + const std::vector& versions_to_delete); + + + TabletUid tablet_uid(); + inline int64_t table_id() const; + inline const std::string full_name() const; + inline int64_t partition_id() const; + inline int64_t tablet_id() const; + inline int32_t schema_hash() const; + inline int16_t shard_id(); + inline const int64_t creation_time() const; + inline void set_creation_time(int64_t creation_time); + inline const int64_t cumulative_layer_point() const; + inline void set_cumulative_layer_point(const int64_t new_point); + + inline bool equal(int64_t tablet_id, int32_t schema_hash); + inline size_t tablet_footprint(); // disk space occupied by tablet + inline size_t num_rows(); + inline int version_count() const; + inline Version max_version() const; + + // propreties encapsulated in TabletSchema + inline const TabletSchema& tablet_schema() const; + inline KeysType keys_type() const; + inline size_t num_columns() const; + inline size_t num_null_columns() const; + inline size_t num_key_columns() const ; + inline size_t num_short_key_columns() const; + inline size_t num_rows_per_row_block() const; + inline CompressKind compress_kind() const; + inline double bloom_filter_fpp() const; + inline size_t next_unique_id() const; + inline size_t row_size() const; + inline size_t field_index(const string& field_name) const; + + // operation in rowsets + OLAPStatus add_rowset(RowsetSharedPtr rowset); + OLAPStatus modify_rowsets(const vector& to_add, + const vector& to_delete); + const RowsetSharedPtr get_rowset_by_version(const Version& version) const; + size_t get_rowset_size_by_version(const Version& version); + const RowsetSharedPtr rowset_with_max_version() const; + RowsetSharedPtr rowset_with_largest_size(); + + OLAPStatus add_inc_rowset(const RowsetSharedPtr& rowset); + bool has_expired_inc_rowset(); + void delete_inc_rowset_by_version(const Version& version, + const VersionHash& version_hash); + void delete_expired_inc_rowsets(); + + OLAPStatus capture_consistent_versions(const Version& spec_version, vector* version_path) const; + OLAPStatus check_version_integrity(const Version& version); + bool check_version_exist(const Version& version) const; + void list_versions(std::vector* versions) const; + + OLAPStatus capture_consistent_rowsets(const Version& spec_version, + vector* rowsets) const; + OLAPStatus capture_consistent_rowsets(const vector& version_path, + vector* rowsets) const; + OLAPStatus capture_rs_readers(const Version& spec_version, + vector* rs_readers) const; + OLAPStatus capture_rs_readers(const vector& version_path, + vector* rs_readers) const; + + DelPredicateArray delete_predicates() { return _tablet_meta->delete_predicates(); } + OLAPStatus add_delete_predicate(const DeletePredicatePB& delete_predicate, int64_t version); + bool version_for_delete_predicate(const Version& version); + bool version_for_load_deletion(const Version& version); + + // message for alter task + AlterTabletTaskSharedPtr alter_task(); + OLAPStatus add_alter_task(int64_t related_tablet_id, int32_t related_schema_hash, + const vector& versions_to_alter, + const AlterTabletType alter_type); + OLAPStatus delete_alter_task(); + OLAPStatus set_alter_state(AlterTabletState state); + OLAPStatus protected_delete_alter_task(); + + // meta lock + inline void obtain_header_rdlock() { _meta_lock.rdlock(); } + inline void obtain_header_wrlock() { _meta_lock.wrlock(); } + inline void release_header_lock() { _meta_lock.unlock(); } + inline RWMutex* get_header_lock_ptr() { return &_meta_lock; } + + // ingest lock + inline void obtain_push_lock() { _ingest_lock.lock(); } + inline void release_push_lock() { _ingest_lock.unlock(); } + inline Mutex* get_push_lock() { return &_ingest_lock; } + + // base lock + inline bool try_base_compaction_lock() { return _base_lock.trylock() == OLAP_SUCCESS; } + inline void obtain_base_compaction_lock() { _base_lock.lock(); } + inline void release_base_compaction_lock() { _base_lock.unlock(); } + + // cumulative lock + inline bool try_cumulative_lock() { return (OLAP_SUCCESS == _cumulative_lock.trylock()); } + inline void obtain_cumulative_lock() { _cumulative_lock.lock(); } + inline void release_cumulative_lock() { _cumulative_lock.unlock(); } + + inline RWMutex* get_migration_lock_ptr() { return &_migration_lock; } + + // operation for compaction + bool can_do_compaction(); + const uint32_t calc_cumulative_compaction_score() const; + const uint32_t calc_base_compaction_score() const; + OLAPStatus compute_all_versions_hash(const std::vector& versions, + VersionHash* version_hash) const; + + // operation for clone + void calc_missed_versions(int64_t spec_version, vector* missed_versions); + + void calc_missed_versions_unlock(int64_t spec_version, vector* missed_versions) const; + + // This function to find max continous version from the beginning. + // There are 1, 2, 3, 5, 6, 7 versions belongs tablet. + // Version 3 is target. + OLAPStatus max_continuous_version_from_begining(Version* version, VersionHash* v_hash); + + // operation for query + OLAPStatus split_range( + const OlapTuple& start_key_strings, + const OlapTuple& end_key_strings, + uint64_t request_block_row_count, + vector* ranges); + + // operation for recover tablet + OLAPStatus recover_tablet_until_specfic_version(const int64_t& spec_version, + const int64_t& version_hash); + + // I/O Error handler + void set_io_error(); + void set_bad(bool is_bad) { _is_bad = is_bad; } + + int64_t last_compaction_failure_time() { return _last_compaction_failure_time; } + + void set_last_compaction_failure_time(int64_t time) { + _last_compaction_failure_time = time; + } + + void delete_all_files(); + + bool check_path(const std::string& check_path); + + // check rowset_id is valid + bool check_rowset_id(RowsetId rowset_id); + + OLAPStatus next_rowset_id(RowsetId* id); + OLAPStatus set_next_rowset_id(RowsetId new_rowset_id); + + OLAPStatus set_partition_id(int64_t partition_id); + + RowsetId initial_end_rowset_id() { + return _tablet_meta->initial_end_rowset_id(); + } + +private: + void _print_missed_versions(const std::vector& missed_versions) const; + OLAPStatus _check_added_rowset(const RowsetSharedPtr& rowset); + +private: + TabletState _state; + TabletMetaSharedPtr _tablet_meta; + TabletSchema _schema; + + DataDir* _data_dir; + std::string _tablet_path; + RowsetGraph _rs_graph; + + DorisInitOnce _init_once; + RWMutex _meta_lock; + Mutex _ingest_lock; + Mutex _base_lock; + Mutex _cumulative_lock; + RWMutex _migration_lock; + std::unordered_map _rs_version_map; + std::unordered_map _inc_rs_version_map; + + std::atomic _is_bad; // if this tablet is broken, set to true. default is false + std::atomic _last_compaction_failure_time; // timestamp of last compaction failure + + DISALLOW_COPY_AND_ASSIGN(Tablet); +}; + +inline bool Tablet::init_succeeded() { + return _init_once.init_succeeded(); +} + +inline DataDir* Tablet::data_dir() const { + return _data_dir; +} + +inline OLAPStatus Tablet::set_tablet_state(TabletState state) { + RETURN_NOT_OK(_tablet_meta->set_tablet_state(state)); + _state = state; + return OLAP_SUCCESS; +} + +inline const TabletMetaSharedPtr Tablet::tablet_meta() { + return _tablet_meta; +} + +inline int64_t Tablet::table_id() const { + return _tablet_meta->table_id(); +} + +inline const std::string Tablet::full_name() const { + std::stringstream ss; + ss << _tablet_meta->tablet_id() + << "." << _tablet_meta->schema_hash() + << "." << _tablet_meta->tablet_uid().to_string(); + return ss.str(); +} + +inline int64_t Tablet::partition_id() const { + return _tablet_meta->partition_id(); +} + +inline int64_t Tablet::tablet_id() const { + return _tablet_meta->tablet_id(); +} + +inline int32_t Tablet::schema_hash() const { + return _tablet_meta->schema_hash(); +} + +inline int16_t Tablet::shard_id() { + return _tablet_meta->shard_id(); +} + +inline const int64_t Tablet::creation_time() const { + return _tablet_meta->creation_time(); +} // namespace doris + +inline void Tablet::set_creation_time(int64_t creation_time) { + _tablet_meta->set_creation_time(creation_time); +} + +inline const int64_t Tablet::cumulative_layer_point() const { + return _tablet_meta->cumulative_layer_point(); +} + +void inline Tablet::set_cumulative_layer_point(const int64_t new_point) { + return _tablet_meta->set_cumulative_layer_point(new_point); +} + +inline bool Tablet::equal(int64_t tablet_id, int32_t schema_hash) { + return (_tablet_meta->tablet_id() == tablet_id) && (_tablet_meta->schema_hash() == schema_hash); +} + +inline size_t Tablet::tablet_footprint() { + ReadLock rdlock(&_meta_lock); + return _tablet_meta->tablet_footprint(); +} + +inline size_t Tablet::num_rows() { + ReadLock rdlock(&_meta_lock); + return _tablet_meta->num_rows(); +} + +inline int Tablet::version_count() const { + return _tablet_meta->version_count(); +} + +inline Version Tablet::max_version() const { + return _tablet_meta->max_version(); +} + +inline const TabletSchema& Tablet::tablet_schema() const { + return _schema; +} + +inline KeysType Tablet::keys_type() const { + return _schema.keys_type(); +} + +inline size_t Tablet::num_columns() const { + return _schema.num_columns(); +} + +inline size_t Tablet::num_null_columns() const { + return _schema.num_null_columns(); +} + +inline size_t Tablet::num_key_columns() const { + return _schema.num_key_columns(); +} + +inline size_t Tablet::num_short_key_columns() const { + return _schema.num_short_key_columns(); +} + +inline size_t Tablet::num_rows_per_row_block() const { + return _schema.num_rows_per_row_block(); +} + +inline CompressKind Tablet::compress_kind() const { + return _schema.compress_kind(); +} + +inline double Tablet::bloom_filter_fpp() const { + return _schema.bloom_filter_fpp(); +} + +inline size_t Tablet::next_unique_id() const { + return _schema.next_column_unique_id(); +} + +inline size_t Tablet::field_index(const string& field_name) const { + return _schema.field_index(field_name); +} + +inline size_t Tablet::row_size() const { + return _schema.row_size(); +} + +} + +#endif // DORIS_BE_SRC_OLAP_TABLET_H diff --git a/be/src/olap/tablet_manager.cpp b/be/src/olap/tablet_manager.cpp new file mode 100755 index 00000000000000..c828990c473334 --- /dev/null +++ b/be/src/olap/tablet_manager.cpp @@ -0,0 +1,1370 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +#include "olap/tablet_manager.h" + +#include + +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include +#include +#include + +#include "olap/base_compaction.h" +#include "olap/cumulative_compaction.h" +#include "olap/lru_cache.h" +#include "olap/tablet_meta.h" +#include "olap/tablet_meta_manager.h" +#include "olap/push_handler.h" +#include "olap/reader.h" +#include "olap/schema_change.h" +#include "olap/data_dir.h" +#include "olap/utils.h" +#include "olap/olap_common.h" +#include "olap/rowset/column_data_writer.h" +#include "olap/rowset/rowset_id_generator.h" +#include "util/time.h" +#include "util/doris_metrics.h" +#include "util/pretty_printer.h" + +using apache::thrift::ThriftDebugString; +using boost::filesystem::canonical; +using boost::filesystem::directory_iterator; +using boost::filesystem::path; +using boost::filesystem::recursive_directory_iterator; +using std::back_inserter; +using std::copy; +using std::inserter; +using std::list; +using std::map; +using std::nothrow; +using std::pair; +using std::priority_queue; +using std::set; +using std::set_difference; +using std::string; +using std::stringstream; +using std::vector; + +namespace doris { + +bool _sort_tablet_by_creation_time(const TabletSharedPtr& a, const TabletSharedPtr& b) { + return a->creation_time() < b->creation_time(); +} + +TabletManager::TabletManager() + : _tablet_stat_cache_update_time_ms(0), + _available_storage_medium_type_count(0) { } + +OLAPStatus TabletManager::_add_tablet_unlock(TTabletId tablet_id, SchemaHash schema_hash, + const TabletSharedPtr& tablet, bool update_meta, bool force) { + OLAPStatus res = OLAP_SUCCESS; + VLOG(3) << "begin to add tablet to TabletManager. " + << "tablet_id=" << tablet_id << ", schema_hash=" << schema_hash + << ", force=" << force; + + TabletSharedPtr table_item = nullptr; + for (TabletSharedPtr item : _tablet_map[tablet_id].table_arr) { + if (item->equal(tablet_id, schema_hash)) { + table_item = item; + break; + } + } + + if (table_item == nullptr) { + VLOG(3) << "not find exist tablet just add it to map" + << " tablet_id = " << tablet_id + << " schema_hash = " << schema_hash; + return _add_tablet_to_map(tablet_id, schema_hash, tablet, update_meta, false, false); + } + + if (!force) { + if (table_item->tablet_path() == tablet->tablet_path()) { + LOG(WARNING) << "add the same tablet twice! tablet_id=" + << tablet_id << " schema_hash=" << schema_hash; + return OLAP_ERR_ENGINE_INSERT_EXISTS_TABLE; + } + if (table_item->data_dir() == tablet->data_dir()) { + LOG(WARNING) << "add tablet with same data dir twice! tablet_id=" + << tablet_id << " schema_hash=" << schema_hash; + return OLAP_ERR_ENGINE_INSERT_EXISTS_TABLE; + } + } + + table_item->obtain_header_rdlock(); + const RowsetSharedPtr old_rowset = table_item->rowset_with_max_version(); + const RowsetSharedPtr new_rowset = tablet->rowset_with_max_version(); + + // if new tablet is empty, it is a newly created schema change tablet + // the old tablet is dropped before add tablet. it should not exist old tablet + if (new_rowset == nullptr) { + table_item->release_header_lock(); + // it seems useless to call unlock and return here. + // it could prevent error when log level is changed in the future. + LOG(FATAL) << "new tablet is empty and old tablet exists. it should not happen." + << " tablet_id=" << tablet_id << " schema_hash=" << schema_hash; + return OLAP_ERR_ENGINE_INSERT_EXISTS_TABLE; + } + int64_t old_time = old_rowset == nullptr ? -1 : old_rowset->creation_time(); + int64_t new_time = new_rowset->creation_time(); + int32_t old_version = old_rowset == nullptr ? -1 : old_rowset->end_version(); + int32_t new_version = new_rowset->end_version(); + table_item->release_header_lock(); + + /* + * In restore process, we replace all origin files in tablet dir with + * the downloaded snapshot files. Than we try to reload tablet header. + * force == true means we forcibly replace the Tablet in _tablet_map + * with the new one. But if we do so, the files in the tablet dir will be + * dropped when the origin Tablet deconstruct. + * So we set keep_files == true to not delete files when the + * origin Tablet deconstruct. + */ + bool keep_files = force ? true : false; + if (force || (new_version > old_version + || (new_version == old_version && new_time > old_time))) { + // check if new tablet's meta is in store and add new tablet's meta to meta store + res = _add_tablet_to_map(tablet_id, schema_hash, tablet, update_meta, keep_files, true); + } else { + res = OLAP_ERR_ENGINE_INSERT_EXISTS_TABLE; + } + LOG(WARNING) << "add duplicated tablet. force=" << force << ", res=" << res + << ", tablet_id=" << tablet_id << ", schema_hash=" << schema_hash + << ", old_version=" << old_version << ", new_version=" << new_version + << ", old_time=" << old_time << ", new_time=" << new_time + << ", old_tablet_path=" << table_item->tablet_path() + << ", new_tablet_path=" << tablet->tablet_path(); + + return res; +} // add_tablet + +OLAPStatus TabletManager::_add_tablet_to_map(TTabletId tablet_id, SchemaHash schema_hash, + const TabletSharedPtr& tablet, bool update_meta, + bool keep_files, bool drop_old) { + // check if new tablet's meta is in store and add new tablet's meta to meta store + OLAPStatus res = OLAP_SUCCESS; + if (update_meta) { + // call tablet save meta in order to valid the meta + res = tablet->save_meta(); + if (res != OLAP_SUCCESS) { + LOG(WARNING) << "failed to save new tablet's meta to meta store" + << " tablet_id = " << tablet_id + << " schema_hash = " << schema_hash; + return res; + } + } + if (drop_old) { + // if the new tablet is fresher than current one + // then delete current one and add new one + res = _drop_tablet_unlock(tablet_id, schema_hash, keep_files); + if (res != OLAP_SUCCESS) { + LOG(WARNING) << "failed to drop old tablet when add new tablet" + << " tablet_id = " << tablet_id + << " schema_hash = " << schema_hash; + return res; + } + } + // Register tablet into StorageEngine, so that we can manage tablet from + // the perspective of root path. + // Example: unregister all tables when a bad disk found. + res = tablet->register_tablet_into_dir(); + if (res != OLAP_SUCCESS) { + LOG(WARNING) << "fail to register tablet into StorageEngine. res=" << res + << ", data_dir=" << tablet->data_dir()->path(); + return res; + } + _tablet_map[tablet_id].table_arr.push_back(tablet); + _tablet_map[tablet_id].table_arr.sort(_sort_tablet_by_creation_time); + VLOG(3) << "add tablet to map successfully" + << " tablet_id = " << tablet_id + << " schema_hash = " << schema_hash; + return res; +} + +// this method is called when engine restarts so that not add any locks +void TabletManager::cancel_unfinished_schema_change() { + // Schema Change在引擎退出时schemachange信息还保存在在Header里, + // 引擎重启后,需清除schemachange信息,上层会重做 + uint64_t canceled_num = 0; + LOG(INFO) << "begin to cancel unfinished schema change."; + + for (const auto& tablet_instance : _tablet_map) { + for (TabletSharedPtr tablet : tablet_instance.second.table_arr) { + if (tablet == nullptr) { + LOG(WARNING) << "tablet does not exist. tablet_id=" << tablet_instance.first; + continue; + } + AlterTabletTaskSharedPtr alter_task = tablet->alter_task(); + // if alter task's state == finished, could not do anything + if (alter_task == nullptr || alter_task->alter_state() == ALTER_FINISHED) { + continue; + } + + OLAPStatus res = tablet->set_alter_state(ALTER_FAILED); + if (res != OLAP_SUCCESS) { + LOG(FATAL) << "fail to set alter state. res=" << res + << ", base_tablet=" << tablet->full_name(); + return; + } + res = tablet->save_meta(); + if (res != OLAP_SUCCESS) { + LOG(FATAL) << "fail to save base tablet meta. res=" << res + << ", base_tablet=" << tablet->full_name(); + return; + } + + LOG(INFO) << "cancel unfinished alter tablet task. base_tablet=" << tablet->full_name(); + ++canceled_num; + } + } + + LOG(INFO) << "finish to cancel unfinished schema change! canceled_num=" << canceled_num; +} + +bool TabletManager::check_tablet_id_exist(TTabletId tablet_id) { + ReadLock rlock(&_tablet_map_lock); + return _check_tablet_id_exist_unlock(tablet_id); +} // check_tablet_id_exist + +bool TabletManager::_check_tablet_id_exist_unlock(TTabletId tablet_id) { + bool is_exist = false; + + tablet_map_t::iterator it = _tablet_map.find(tablet_id); + if (it != _tablet_map.end() && it->second.table_arr.size() != 0) { + is_exist = true; + } + return is_exist; +} // check_tablet_id_exist + +void TabletManager::clear() { + _tablet_map.clear(); + _shutdown_tablets.clear(); +} // clear + +OLAPStatus TabletManager::create_tablet(const TCreateTabletReq& request, + std::vector stores) { + WriteLock wrlock(&_tablet_map_lock); + LOG(INFO) << "begin to process create tablet. tablet=" << request.tablet_id + << ", schema_hash=" << request.tablet_schema.schema_hash; + OLAPStatus res = OLAP_SUCCESS; + DorisMetrics::create_tablet_requests_total.increment(1); + // Make sure create_tablet operation is idempotent: + // return success if tablet with same tablet_id and schema_hash exist, + // false if tablet with same tablet_id but different schema_hash exist + // why?????? + if (_check_tablet_id_exist_unlock(request.tablet_id)) { + TabletSharedPtr tablet = _get_tablet_with_no_lock( + request.tablet_id, request.tablet_schema.schema_hash); + if (tablet != nullptr) { + LOG(INFO) << "create tablet success for tablet already exist."; + return OLAP_SUCCESS; + } else { + LOG(WARNING) << "tablet with different schema hash already exists."; + return OLAP_ERR_CE_TABLET_ID_EXIST; + } + } + // set alter type to schema change. it is useless + TabletSharedPtr tablet = _internal_create_tablet(AlterTabletType::SCHEMA_CHANGE, request, false, nullptr, stores); + if (tablet == nullptr) { + res = OLAP_ERR_CE_CMD_PARAMS_ERROR; + LOG(WARNING) << "fail to create tablet. res=" << res; + } + + LOG(INFO) << "finish to process create tablet. res=" << res; + return res; +} // create_tablet + +TabletSharedPtr TabletManager::create_tablet(const AlterTabletType alter_type, + const TCreateTabletReq& request, const bool is_schema_change_tablet, + const TabletSharedPtr ref_tablet, std::vector data_dirs) { + DCHECK(is_schema_change_tablet && ref_tablet != nullptr); + WriteLock wrlock(&_tablet_map_lock); + return _internal_create_tablet(alter_type, request, is_schema_change_tablet, + ref_tablet, data_dirs); +} + +TabletSharedPtr TabletManager::_internal_create_tablet(const AlterTabletType alter_type, + const TCreateTabletReq& request, const bool is_schema_change_tablet, + const TabletSharedPtr ref_tablet, std::vector data_dirs) { + DCHECK((is_schema_change_tablet && ref_tablet != nullptr) || (!is_schema_change_tablet && ref_tablet == nullptr)); + // check if the tablet with specified tablet id and schema hash already exists + TabletSharedPtr checked_tablet = _get_tablet_with_no_lock(request.tablet_id, request.tablet_schema.schema_hash); + if (checked_tablet != nullptr) { + LOG(WARNING) << "failed to create tablet because tablet already exist." + << " tablet id = " << request.tablet_id + << " schema hash = " << request.tablet_schema.schema_hash; + return nullptr; + } + bool is_tablet_added = false; + TabletSharedPtr tablet = _create_tablet_meta_and_dir(request, is_schema_change_tablet, + ref_tablet, data_dirs); + if (tablet == nullptr) { + return nullptr; + } + + OLAPStatus res = OLAP_SUCCESS; + do { + res = tablet->init(); + if (res != OLAP_SUCCESS) { + LOG(WARNING) << "tablet init failed. tablet:" << tablet->full_name(); + break; + } + if (!is_schema_change_tablet) { + // Create init version if this is not a restore mode replica and request.version is set + // bool in_restore_mode = request.__isset.in_restore_mode && request.in_restore_mode; + // if (!in_restore_mode && request.__isset.version) { + // create inital rowset before add it to storage engine could omit many locks + res = _create_inital_rowset(tablet, request); + if (res != OLAP_SUCCESS) { + LOG(WARNING) << "fail to create initial version for tablet. res=" << res; + break; + } + } else { + // add alter task to new tablet if it is a new tablet during schema change + tablet->add_alter_task(ref_tablet->tablet_id(), ref_tablet->schema_hash(), + vector(), alter_type); + // 有可能出现以下2种特殊情况: + // 1. 因为操作系统时间跳变,导致新生成的表的creation_time小于旧表的creation_time时间 + // 2. 因为olap engine代码中统一以秒为单位,所以如果2个操作(比如create一个表, + // 然后立即alter该表)之间的时间间隔小于1s,则alter得到的新表和旧表的creation_time会相同 + // + // 当出现以上2种情况时,为了能够区分alter得到的新表和旧表,这里把新表的creation_time设置为 + // 旧表的creation_time加1 + if (tablet->creation_time() <= ref_tablet->creation_time()) { + LOG(WARNING) << "new tablet's creation time is less than or equal to old tablet" + << "new_tablet_creation_time=" << tablet->creation_time() + << ", ref_tablet_creation_time=" << ref_tablet->creation_time(); + int64_t new_creation_time = ref_tablet->creation_time() + 1; + tablet->set_creation_time(new_creation_time); + } + } + + // Add tablet to StorageEngine will make it visiable to user + res = _add_tablet_unlock(request.tablet_id, request.tablet_schema.schema_hash, tablet, true, false); + if (res != OLAP_SUCCESS) { + LOG(WARNING) << "fail to add tablet to StorageEngine. res=" << res; + break; + } + is_tablet_added = true; + TabletSharedPtr tablet_ptr = _get_tablet_with_no_lock(request.tablet_id, request.tablet_schema.schema_hash); + if (tablet_ptr == nullptr) { + res = OLAP_ERR_TABLE_NOT_FOUND; + LOG(WARNING) << "fail to get tablet. res=" << res; + break; + } + } while (0); + + // should remove the pending path of tablet id no matter create tablet success or not + tablet->data_dir()->remove_pending_ids(TABLET_ID_PREFIX + std::to_string(request.tablet_id)); + + // clear environment + if (res != OLAP_SUCCESS) { + DorisMetrics::create_tablet_requests_failed.increment(1); + if (is_tablet_added) { + OLAPStatus status = _drop_tablet_unlock( + request.tablet_id, request.tablet_schema.schema_hash, false); + if (status != OLAP_SUCCESS) { + LOG(WARNING) << "fail to drop tablet when create tablet failed. res=" << res; + } + } else { + tablet->delete_all_files(); + TabletMetaManager::remove(tablet->data_dir(), request.tablet_id, request.tablet_schema.schema_hash); + } + return nullptr; + } else { + LOG(INFO) << "finish to process create tablet. res=" << res; + return tablet; + } +} // create_tablet + +TabletSharedPtr TabletManager::_create_tablet_meta_and_dir( + const TCreateTabletReq& request, const bool is_schema_change_tablet, + const TabletSharedPtr ref_tablet, std::vector data_dirs) { + TabletSharedPtr tablet; + // Try to create tablet on each of all_available_root_path, util success + DataDir* last_dir = nullptr; + for (auto& data_dir : data_dirs) { + if (last_dir != nullptr) { + // if last dir != null, it means preivous create tablet retry failed + last_dir->remove_pending_ids(TABLET_ID_PREFIX + std::to_string(request.tablet_id)); + } + last_dir = data_dir; + TabletMetaSharedPtr tablet_meta; + // if create meta faild, do not need to clean dir, because it is only in memory + OLAPStatus res = _create_tablet_meta(request, data_dir, is_schema_change_tablet, ref_tablet, &tablet_meta); + if (res != OLAP_SUCCESS) { + LOG(WARNING) << "fail to create tablet meta. res=" << res << ", root=" << data_dir->path(); + continue; + } + + + stringstream schema_hash_dir_stream; + schema_hash_dir_stream << data_dir->path() + << DATA_PREFIX + << "/" << tablet_meta->shard_id() + << "/" << request.tablet_id + << "/" << request.tablet_schema.schema_hash; + string schema_hash_dir = schema_hash_dir_stream.str(); + boost::filesystem::path schema_hash_path(schema_hash_dir); + boost::filesystem::path tablet_path = schema_hash_path.parent_path(); + std::string tablet_dir = tablet_path.string(); + // because the tablet is removed async, so that the dir may still exist + // when be receive create tablet again. For example redo schema change + if (check_dir_existed(schema_hash_dir)) { + LOG(WARNING) << "skip this dir because tablet path exist, path="<< schema_hash_dir; + continue; + } else { + data_dir->add_pending_ids(TABLET_ID_PREFIX + std::to_string(request.tablet_id)); + res = create_dirs(schema_hash_dir); + if (res != OLAP_SUCCESS) { + LOG(WARNING) << "create dir fail. [res=" << res << " path:" << schema_hash_dir; + continue; + } + } + + tablet = Tablet::create_tablet_from_meta(tablet_meta, data_dir); + if (tablet == nullptr) { + LOG(WARNING) << "fail to load tablet from tablet_meta. root_path:" << data_dir->path(); + res = remove_all_dir(tablet_dir); + if (res != OLAP_SUCCESS) { + LOG(WARNING) << "remove tablet dir:" << tablet_dir; + } + continue; + } + break; + } + return tablet; +} + +// Drop tablet specified, the main logical is as follows: +// 1. tablet not in schema change: +// drop specified tablet directly; +// 2. tablet in schema change: +// a. schema change not finished && dropped tablet is base : +// base tablet cannot be dropped; +// b. other cases: +// drop specified tablet and clear schema change info. +OLAPStatus TabletManager::drop_tablet( + TTabletId tablet_id, SchemaHash schema_hash, bool keep_files) { + WriteLock wlock(&_tablet_map_lock); + return _drop_tablet_unlock(tablet_id, schema_hash, keep_files); +} // drop_tablet + + +// Drop tablet specified, the main logical is as follows: +// 1. tablet not in schema change: +// drop specified tablet directly; +// 2. tablet in schema change: +// a. schema change not finished && dropped tablet is base : +// base tablet cannot be dropped; +// b. other cases: +// drop specified tablet and clear schema change info. +OLAPStatus TabletManager::_drop_tablet_unlock( + TTabletId tablet_id, SchemaHash schema_hash, bool keep_files) { + LOG(INFO) << "begin to process drop tablet." + << "tablet=" << tablet_id << ", schema_hash=" << schema_hash; + DorisMetrics::drop_tablet_requests_total.increment(1); + + OLAPStatus res = OLAP_SUCCESS; + + // Get tablet which need to be droped + TabletSharedPtr dropped_tablet = _get_tablet_with_no_lock(tablet_id, schema_hash); + if (dropped_tablet == nullptr) { + LOG(WARNING) << "tablet to drop does not exist already." + << " tablet_id=" << tablet_id + << ", schema_hash=" << schema_hash; + return OLAP_SUCCESS; + } + + // Try to get schema change info + AlterTabletTaskSharedPtr alter_task = dropped_tablet->alter_task(); + + // Drop tablet directly when not in schema change + if (alter_task == nullptr) { + return _drop_tablet_directly_unlocked(tablet_id, schema_hash, keep_files); + } + + AlterTabletState alter_state = alter_task->alter_state(); + TTabletId related_tablet_id = alter_task->related_tablet_id(); + TSchemaHash related_schema_hash = alter_task->related_schema_hash();; + + // Check tablet is in schema change or not, is base tablet or not + bool is_schema_change_finished = (alter_state == ALTER_FINISHED || alter_state == ALTER_FAILED); + + bool is_drop_base_tablet = false; + TabletSharedPtr related_tablet = _get_tablet_with_no_lock( + related_tablet_id, related_schema_hash); + if (related_tablet == nullptr) { + LOG(WARNING) << "drop tablet directly when related tablet not found. " + << " tablet_id=" << related_tablet_id + << " schema_hash=" << related_schema_hash; + return _drop_tablet_directly_unlocked(tablet_id, schema_hash, keep_files); + } + + if (dropped_tablet->creation_time() < related_tablet->creation_time()) { + is_drop_base_tablet = true; + } + + if (is_drop_base_tablet && !is_schema_change_finished) { + LOG(WARNING) << "base tablet in schema change cannot be droped. tablet=" + << dropped_tablet->full_name(); + return OLAP_ERR_PREVIOUS_SCHEMA_CHANGE_NOT_FINISHED; + } + + // Drop specified tablet and clear schema change info + // must first break the link and then drop the tablet + // if drop tablet, then break link. the link maybe exists but the tablet + // not exist when be restarts + related_tablet->obtain_header_wrlock(); + // should check the related tablet id in alter task is current tablet to be dropped + // A related to B, BUT B related to C + // if drop A, should not clear B's alter task + AlterTabletTaskSharedPtr related_alter_task = related_tablet->alter_task(); + if (related_alter_task != nullptr && related_alter_task->related_tablet_id() == tablet_id + && related_alter_task->related_schema_hash() == schema_hash) { + related_tablet->delete_alter_task(); + res = related_tablet->save_meta(); + if (res != OLAP_SUCCESS) { + LOG(FATAL) << "fail to save tablet header. res=" << res + << ", tablet=" << related_tablet->full_name(); + } + } + related_tablet->release_header_lock(); + res = _drop_tablet_directly_unlocked(tablet_id, schema_hash, keep_files); + if (res != OLAP_SUCCESS) { + LOG(WARNING) << "fail to drop tablet which in schema change. tablet=" + << dropped_tablet->full_name(); + return res; + } + + LOG(INFO) << "finish to drop tablet. res=" << res; + return res; +} // drop_tablet_unlock + +OLAPStatus TabletManager::drop_tablets_on_error_root_path( + const vector& tablet_info_vec) { + OLAPStatus res = OLAP_SUCCESS; + WriteLock wlock(&_tablet_map_lock); + + for (const TabletInfo& tablet_info : tablet_info_vec) { + TTabletId tablet_id = tablet_info.tablet_id; + TSchemaHash schema_hash = tablet_info.schema_hash; + VLOG(3) << "drop_tablet begin. tablet_id=" << tablet_id + << ", schema_hash=" << schema_hash; + TabletSharedPtr dropped_tablet = _get_tablet_with_no_lock(tablet_id, schema_hash); + if (dropped_tablet == nullptr) { + LOG(WARNING) << "dropping tablet not exist. " + << " tablet=" << tablet_id + << " schema_hash=" << schema_hash; + continue; + } else { + for (list::iterator it = _tablet_map[tablet_id].table_arr.begin(); + it != _tablet_map[tablet_id].table_arr.end();) { + if ((*it)->equal(tablet_id, schema_hash)) { + it = _tablet_map[tablet_id].table_arr.erase(it); + } else { + ++it; + } + } + } + } + + return res; +} // drop_tablets_on_error_root_path + +TabletSharedPtr TabletManager::get_tablet(TTabletId tablet_id, SchemaHash schema_hash, + bool include_deleted, std::string* err) { + ReadLock rlock(&_tablet_map_lock); + return _get_tablet(tablet_id, schema_hash, include_deleted, err); +} // get_tablet + +TabletSharedPtr TabletManager::_get_tablet(TTabletId tablet_id, SchemaHash schema_hash, + bool include_deleted, std::string* err) { + TabletSharedPtr tablet; + tablet = _get_tablet_with_no_lock(tablet_id, schema_hash); + if (tablet == nullptr && include_deleted) { + for (auto& deleted_tablet : _shutdown_tablets) { + CHECK(deleted_tablet != nullptr) << "deleted tablet in nullptr"; + if (deleted_tablet->tablet_id() == tablet_id && deleted_tablet->schema_hash() == schema_hash) { + tablet = deleted_tablet; + break; + } + } + } + + if (tablet != nullptr) { + if (!tablet->is_used()) { + LOG(WARNING) << "tablet cannot be used. tablet=" << tablet_id; + if (err != nullptr) { *err = "tablet cannot be used"; } + tablet.reset(); + } + } else if (err != nullptr) { + *err = "tablet does not exist"; + } + + return tablet; +} // get_tablet + +TabletSharedPtr TabletManager::get_tablet(TTabletId tablet_id, SchemaHash schema_hash, + TabletUid tablet_uid, bool include_deleted, + std::string* err) { + ReadLock rlock(&_tablet_map_lock); + TabletSharedPtr tablet = _get_tablet(tablet_id, schema_hash, include_deleted, err); + if (tablet != nullptr && tablet->tablet_uid() == tablet_uid) { + return tablet; + } + return nullptr; +} // get_tablet + +bool TabletManager::get_tablet_id_and_schema_hash_from_path(const std::string& path, + TTabletId* tablet_id, TSchemaHash* schema_hash) { + std::vector data_dirs = StorageEngine::instance()->get_stores(); + for (auto data_dir : data_dirs) { + const std::string& data_dir_path = data_dir->path(); + if (path.find(data_dir_path) != std::string::npos) { + std::string pattern = data_dir_path + "/data/\\d+/(\\d+)/?(\\d+)?"; + std::regex rgx (pattern.c_str()); + std::smatch sm; + bool ret = std::regex_search(path, sm, rgx); + if (ret) { + if (sm.size() == 3) { + *tablet_id = std::strtoll(sm.str(1).c_str(), nullptr, 10); + *schema_hash = std::strtoll(sm.str(2).c_str(), nullptr, 10); + return true; + } else { + LOG(WARNING) << "invalid match. match size:" << sm.size(); + return false; + } + } + } + } + return false; +} + +bool TabletManager::get_rowset_id_from_path(const std::string& path, RowsetId* rowset_id) { + static std::regex rgx ("/data/\\d+/\\d+/\\d+/(\\d+)_.*"); + std::smatch sm; + bool ret = std::regex_search(path, sm, rgx); + if (ret) { + if (sm.size() == 2) { + *rowset_id = std::strtoll(sm.str(1).c_str(), nullptr, 10); + return true; + } else { + return false; + } + } + return false; +} + +void TabletManager::get_tablet_stat(TTabletStatResult& result) { + VLOG(3) << "begin to get all tablet stat."; + + // get current time + int64_t current_time = UnixMillis(); + WriteLock wlock(&_tablet_map_lock); + // update cache if too old + if (current_time - _tablet_stat_cache_update_time_ms > + config::tablet_stat_cache_update_interval_second * 1000) { + VLOG(3) << "update tablet stat."; + _build_tablet_stat(); + } + + result.__set_tablets_stats(_tablet_stat_cache); +} // get_tablet_stat + +TabletSharedPtr TabletManager::find_best_tablet_to_compaction( + CompactionType compaction_type, DataDir* data_dir) { + ReadLock tablet_map_rdlock(&_tablet_map_lock); + uint32_t highest_score = 0; + TabletSharedPtr best_tablet; + int64_t now = UnixMillis(); + for (tablet_map_t::value_type& table_ins : _tablet_map){ + for (TabletSharedPtr& table_ptr : table_ins.second.table_arr) { + AlterTabletTaskSharedPtr cur_alter_task = table_ptr->alter_task(); + if (cur_alter_task != nullptr && cur_alter_task->alter_state() != ALTER_FINISHED + && cur_alter_task->alter_state() != ALTER_FAILED) { + TabletSharedPtr related_tablet = _get_tablet_with_no_lock(cur_alter_task->related_tablet_id(), + cur_alter_task->related_schema_hash()); + if (related_tablet != nullptr && table_ptr->creation_time() > related_tablet->creation_time()) { + // it means cur tablet is a new tablet during schema change or rollup, skip compaction + continue; + } + } + + if (table_ptr->data_dir()->path_hash() != data_dir->path_hash() + || !table_ptr->is_used() || !table_ptr->init_succeeded() || !table_ptr->can_do_compaction()) { + continue; + } + + if (now - table_ptr->last_compaction_failure_time() <= config::min_compaction_failure_interval_sec * 1000) { + continue; + } + + if (compaction_type == CompactionType::CUMULATIVE_COMPACTION) { + if (!table_ptr->try_cumulative_lock()) { + continue; + } else { + table_ptr->release_cumulative_lock(); + } + } + + if (compaction_type == CompactionType::BASE_COMPACTION) { + if (!table_ptr->try_base_compaction_lock()) { + continue; + } else { + table_ptr->release_base_compaction_lock(); + } + } + + ReadLock rdlock(table_ptr->get_header_lock_ptr()); + uint32_t table_score = 0; + if (compaction_type == CompactionType::BASE_COMPACTION) { + table_score = table_ptr->calc_base_compaction_score(); + } else if (compaction_type == CompactionType::CUMULATIVE_COMPACTION) { + table_score = table_ptr->calc_cumulative_compaction_score(); + } + if (table_score > highest_score) { + highest_score = table_score; + best_tablet = table_ptr; + } + } + } + + if (best_tablet != nullptr) { + LOG(INFO) << "find best tablet to do compaction." + << " type: " << (compaction_type == CompactionType::CUMULATIVE_COMPACTION ? "cumulative" : "base") + << ", tablet id: " << best_tablet->tablet_id() << ", score: " << highest_score; + } + return best_tablet; +} + +OLAPStatus TabletManager::load_tablet_from_meta(DataDir* data_dir, TTabletId tablet_id, + TSchemaHash schema_hash, const std::string& meta_binary, bool update_meta, bool force) { + WriteLock wlock(&_tablet_map_lock); + TabletMetaSharedPtr tablet_meta(new TabletMeta()); + OLAPStatus status = tablet_meta->deserialize(meta_binary); + if (status != OLAP_SUCCESS) { + LOG(WARNING) << "parse meta_binary string failed for tablet_id:" << tablet_id << ", schema_hash:" << schema_hash; + return OLAP_ERR_HEADER_PB_PARSE_FAILED; + } + + // check if tablet meta is valid + if (tablet_meta->tablet_id() != tablet_id || tablet_meta->schema_hash() != schema_hash) { + LOG(WARNING) << "tablet meta load from meta is invalid" + << " input tablet id=" << tablet_id + << " input tablet schema_hash=" << schema_hash + << " meta tablet=" << tablet_meta->full_name(); + return OLAP_ERR_HEADER_PB_PARSE_FAILED; + } + if (tablet_meta->tablet_uid().hi == 0 && tablet_meta->tablet_uid().lo == 0) { + LOG(WARNING) << "not load this tablet because uid == 0" + << " tablet=" << tablet_meta->full_name(); + return OLAP_ERR_HEADER_PB_PARSE_FAILED; + } + + // init must be called + TabletSharedPtr tablet = Tablet::create_tablet_from_meta(tablet_meta, data_dir); + if (tablet == nullptr) { + LOG(WARNING) << "fail to new tablet. tablet_id=" << tablet_id << ", schema_hash:" << schema_hash; + return OLAP_ERR_TABLE_CREATE_FROM_HEADER_ERROR; + } + + if (tablet_meta->tablet_state() == TABLET_SHUTDOWN) { + LOG(INFO) << "tablet is to be deleted, skip load it" + << " tablet id = " << tablet_meta->tablet_id() + << " schema hash = " << tablet_meta->schema_hash(); + _shutdown_tablets.push_back(tablet); + return OLAP_ERR_TABLE_ALREADY_DELETED_ERROR; + } + + if (tablet->max_version().first == -1 && tablet->alter_task() == nullptr) { + LOG(WARNING) << "tablet not in schema change state without delta is invalid." + << "tablet=" << tablet->full_name(); + // tablet state is invalid, drop tablet + return OLAP_ERR_TABLE_INDEX_VALIDATE_ERROR; + } + + OLAPStatus res = tablet->init(); + if (res != OLAP_SUCCESS) { + LOG(WARNING) << "tablet init failed. tablet:" << tablet->full_name(); + return res; + } + res = _add_tablet_unlock(tablet_id, schema_hash, tablet, update_meta, force); + if (res != OLAP_SUCCESS) { + // insert existed tablet return OLAP_SUCCESS + if (res == OLAP_ERR_ENGINE_INSERT_EXISTS_TABLE) { + LOG(WARNING) << "add duplicate tablet. tablet=" << tablet->full_name(); + } + + LOG(WARNING) << "failed to add tablet. tablet=" << tablet->full_name(); + return res; + } + + return OLAP_SUCCESS; +} // load_tablet_from_meta + +OLAPStatus TabletManager::load_tablet_from_dir( + DataDir* store, TTabletId tablet_id, SchemaHash schema_hash, + const string& schema_hash_path, bool force) { + LOG(INFO) << "begin to load tablet from dir. " + << " tablet_id=" << tablet_id + << " schema_hash=" << schema_hash + << " path = " << schema_hash_path; + // not add lock here, because load_tablet_from_meta already add lock + string header_path = TabletMeta::construct_header_file_path(schema_hash_path, tablet_id); + // should change shard id before load tablet + path boost_header_path(header_path); + std::string shard_path = boost_header_path.parent_path().parent_path().parent_path().string(); + std::string shard_str = shard_path.substr(shard_path.find_last_of('/') + 1); + int32_t shard = stol(shard_str); + // load dir is called by clone, restore, storage migration + // should change tablet uid when tablet object changed + OLAPStatus res = TabletMeta::reset_tablet_uid(header_path); + if (res != OLAP_SUCCESS) { + LOG(WARNING) << "failed to set tablet uid when copied tablet meta file" + << " header_path=" << header_path; + return res; + } + TabletMetaSharedPtr tablet_meta(new(nothrow) TabletMeta()); + do { + if (access(header_path.c_str(), F_OK) != 0) { + LOG(WARNING) << "fail to find header file. [header_path=" << header_path << "]"; + res = OLAP_ERR_FILE_NOT_EXIST; + break; + } + if (tablet_meta == nullptr) { + LOG(WARNING) << "fail to malloc TabletMeta."; + res = OLAP_ERR_ENGINE_LOAD_INDEX_TABLE_ERROR; + break; + } + + if (tablet_meta->create_from_file(header_path) != OLAP_SUCCESS) { + LOG(WARNING) << "fail to load tablet_meta. file_path=" << header_path; + res = OLAP_ERR_ENGINE_LOAD_INDEX_TABLE_ERROR; + break; + } + // has to change shard id here, because meta file maybe copyed from other source + // its shard is different from local shard + tablet_meta->set_shard_id(shard); + std::string meta_binary; + tablet_meta->serialize(&meta_binary); + res = load_tablet_from_meta(store, tablet_id, schema_hash, meta_binary, true, force); + if (res != OLAP_SUCCESS) { + LOG(WARNING) << "fail to load tablet. [header_path=" << header_path << "]"; + res = OLAP_ERR_ENGINE_LOAD_INDEX_TABLE_ERROR; + break; + } + } while (0); + return res; +} // load_tablet_from_dir + +void TabletManager::release_schema_change_lock(TTabletId tablet_id) { + VLOG(3) << "release_schema_change_lock begin. tablet_id=" << tablet_id; + ReadLock rlock(&_tablet_map_lock); + + tablet_map_t::iterator it = _tablet_map.find(tablet_id); + if (it == _tablet_map.end()) { + LOG(WARNING) << "tablet does not exists. tablet=" << tablet_id; + } else { + it->second.schema_change_lock.unlock(); + } + VLOG(3) << "release_schema_change_lock end. tablet_id=" << tablet_id; +} // release_schema_change_lock + +OLAPStatus TabletManager::report_tablet_info(TTabletInfo* tablet_info) { + DorisMetrics::report_tablet_requests_total.increment(1); + LOG(INFO) << "begin to process report tablet info." + << "tablet_id=" << tablet_info->tablet_id + << ", schema_hash=" << tablet_info->schema_hash; + + OLAPStatus res = OLAP_SUCCESS; + + TabletSharedPtr tablet = get_tablet( + tablet_info->tablet_id, tablet_info->schema_hash); + if (tablet == nullptr) { + LOG(WARNING) << "can't find tablet. " + << " tablet=" << tablet_info->tablet_id + << " schema_hash=" << tablet_info->schema_hash; + return OLAP_ERR_TABLE_NOT_FOUND; + } + + _build_tablet_info(tablet, tablet_info); + LOG(INFO) << "success to process report tablet info."; + return res; +} // report_tablet_info + +OLAPStatus TabletManager::report_all_tablets_info(std::map* tablets_info) { + LOG(INFO) << "begin to process report all tablets info."; + ReadLock rlock(&_tablet_map_lock); + DorisMetrics::report_all_tablets_requests_total.increment(1); + + if (tablets_info == nullptr) { + return OLAP_ERR_INPUT_PARAMETER_ERROR; + } + + for (const auto& item : _tablet_map) { + if (item.second.table_arr.size() == 0) { + continue; + } + + TTablet tablet; + for (TabletSharedPtr tablet_ptr : item.second.table_arr) { + if (tablet_ptr == nullptr) { + continue; + } + + TTabletInfo tablet_info; + _build_tablet_info(tablet_ptr, &tablet_info); + + // report expire transaction + vector transaction_ids; + // TODO(ygl): tablet manager and txn manager may be dead lock + StorageEngine::instance()->txn_manager()->get_expire_txns(tablet_ptr->tablet_id(), + tablet_ptr->schema_hash(), tablet_ptr->tablet_uid(), &transaction_ids); + tablet_info.__set_transaction_ids(transaction_ids); + + if (_available_storage_medium_type_count > 1) { + tablet_info.__set_storage_medium(tablet_ptr->data_dir()->storage_medium()); + } + + tablet_info.__set_version_count(tablet_ptr->version_count()); + tablet_info.__set_path_hash(tablet_ptr->data_dir()->path_hash()); + + tablet.tablet_infos.push_back(tablet_info); + } + + if (tablet.tablet_infos.size() != 0) { + tablets_info->insert(pair(tablet.tablet_infos[0].tablet_id, tablet)); + } + } + + LOG(INFO) << "success to process report all tablets info. tablet_num=" << tablets_info->size(); + return OLAP_SUCCESS; +} // report_all_tablets_info + +OLAPStatus TabletManager::start_trash_sweep() { + { + ReadLock rlock(&_tablet_map_lock); + std::vector tablets_to_clean; + for (auto& item : _tablet_map) { + // try to clean empty item + if (item.second.table_arr.empty()) { + // try to get schema change lock if could get schema change lock, then nobody + // own the lock could remove the item + // it will core if schema change thread may hold the lock and this thread will deconstruct lock + if (item.second.schema_change_lock.trylock() == OLAP_SUCCESS) { + item.second.schema_change_lock.unlock(); + tablets_to_clean.push_back(item.first); + } + } + for (TabletSharedPtr tablet : item.second.table_arr) { + if (tablet == nullptr) { + continue; + } + tablet->delete_expired_inc_rowsets(); + } + } + // clean empty tablet id item + for (const auto& tablet_id_to_clean : tablets_to_clean) { + if (_tablet_map[tablet_id_to_clean].table_arr.empty()) { + _tablet_map.erase(tablet_id_to_clean); + } + } + } + + int32_t clean_num = 0; + do { + sleep(1); + clean_num = 0; + ReadLock rlock(&_tablet_map_lock); + auto it = _shutdown_tablets.begin(); + for (; it != _shutdown_tablets.end();) { + // check if the meta has the tablet info and its state is shutdown + if (it->use_count() > 1) { + // it means current tablet is referenced in other thread + ++it; + continue; + } + TabletMetaSharedPtr new_tablet_meta(new(nothrow) TabletMeta()); + if (new_tablet_meta == nullptr) { + LOG(WARNING) << "fail to malloc TabletMeta."; + ++it; + continue; + } + OLAPStatus check_st = TabletMetaManager::get_meta((*it)->data_dir(), + (*it)->tablet_id(), (*it)->schema_hash(), new_tablet_meta); + if (check_st == OLAP_SUCCESS) { + if (new_tablet_meta->tablet_state() != TABLET_SHUTDOWN + || new_tablet_meta->tablet_uid() != (*it)->tablet_uid()) { + LOG(WARNING) << "tablet's state changed to normal, skip remove dirs" + << " tablet id = " << new_tablet_meta->tablet_id() + << " schema hash = " << new_tablet_meta->schema_hash() + << " old tablet_uid=" << (*it)->tablet_uid() + << " cur tablet_uid=" << new_tablet_meta->tablet_uid(); + // remove it from list + it = _shutdown_tablets.erase(it); + continue; + } + if (check_dir_existed((*it)->tablet_path())) { + // take snapshot of tablet meta + std::string meta_file = (*it)->tablet_path() + "/" + std::to_string((*it)->tablet_id()) + ".hdr"; + (*it)->tablet_meta()->save(meta_file); + LOG(INFO) << "start to move path to trash" + << " tablet path = " << (*it)->tablet_path(); + OLAPStatus rm_st = move_to_trash((*it)->tablet_path(), (*it)->tablet_path()); + if (rm_st != OLAP_SUCCESS) { + LOG(WARNING) << "failed to move dir to trash" + << " dir = " << (*it)->tablet_path(); + ++it; + continue; + } + } + TabletMetaManager::remove((*it)->data_dir(), (*it)->tablet_id(), (*it)->schema_hash()); + LOG(INFO) << "successfully move tablet to trash." + << " tablet id " << (*it)->tablet_id() + << " schema hash " << (*it)->schema_hash() + << " tablet path " << (*it)->tablet_path(); + it = _shutdown_tablets.erase(it); + ++ clean_num; + } else { + // if could not find tablet info in meta store, then check if dir existed + if (check_dir_existed((*it)->tablet_path())) { + LOG(WARNING) << "errors while load meta from store, skip this tablet" + << " tablet id " << (*it)->tablet_id() + << " schema hash " << (*it)->schema_hash(); + ++it; + } else { + LOG(INFO) << "could not find tablet dir, skip move to trash, remove it from gc queue." + << " tablet id " << (*it)->tablet_id() + << " schema hash " << (*it)->schema_hash() + << " tablet path " << (*it)->tablet_path(); + it = _shutdown_tablets.erase(it); + } + } + + // if clean 100 tablets, should yield + if (clean_num >= 200) { + break; + } + } + } while (clean_num >= 200); + return OLAP_SUCCESS; +} // start_trash_sweep + +bool TabletManager::try_schema_change_lock(TTabletId tablet_id) { + bool res = false; + VLOG(3) << "try_schema_change_lock begin. tablet_id=" << tablet_id; + ReadLock rlock(&_tablet_map_lock); + + tablet_map_t::iterator it = _tablet_map.find(tablet_id); + if (it == _tablet_map.end()) { + LOG(WARNING) << "tablet does not exists. tablet_id=" << tablet_id; + } else { + res = (it->second.schema_change_lock.trylock() == OLAP_SUCCESS); + } + VLOG(3) << "try_schema_change_lock end. tablet_id=" << tablet_id; + return res; +} // try_schema_change_lock + +void TabletManager::update_root_path_info(std::map* path_map, + int* tablet_counter) { + ReadLock rlock(&_tablet_map_lock); + for (auto& entry : _tablet_map) { + TableInstances& instance = entry.second; + for (auto& tablet : instance.table_arr) { + (*tablet_counter) ++ ; + int64_t data_size = tablet->tablet_footprint(); + auto find = path_map->find(tablet->data_dir()->path()); + if (find == path_map->end()) { + continue; + } + if (find->second.is_used) { + find->second.data_used_capacity += data_size; + } + } + } +} // update_root_path_info + +void TabletManager::update_storage_medium_type_count(uint32_t storage_medium_type_count) { + _available_storage_medium_type_count = storage_medium_type_count; +} + +void TabletManager::_build_tablet_info(TabletSharedPtr tablet, TTabletInfo* tablet_info) { + tablet_info->tablet_id = tablet->tablet_id(); + tablet_info->schema_hash = tablet->schema_hash(); + tablet_info->row_count = tablet->num_rows(); + tablet_info->data_size = tablet->tablet_footprint(); + Version version = { -1, 0 }; + VersionHash v_hash = 0; + tablet->max_continuous_version_from_begining(&version, &v_hash); + tablet_info->version = version.second; + tablet_info->version_hash = v_hash; +} + +void TabletManager::_build_tablet_stat() { + _tablet_stat_cache.clear(); + for (const auto& item : _tablet_map) { + if (item.second.table_arr.size() == 0) { + continue; + } + + TTabletStat stat; + stat.tablet_id = item.first; + for (TabletSharedPtr tablet : item.second.table_arr) { + if (tablet == nullptr) { + continue; + } + // we only get base tablet's stat + stat.__set_data_size(tablet->tablet_footprint()); + stat.__set_row_num(tablet->num_rows()); + VLOG(3) << "tablet_id=" << item.first + << ", data_size=" << tablet->tablet_footprint() + << ", row_num:" << tablet->num_rows(); + break; + } + + _tablet_stat_cache.emplace(item.first, stat); + } + + _tablet_stat_cache_update_time_ms = UnixMillis(); +} + +OLAPStatus TabletManager::_create_inital_rowset( + TabletSharedPtr tablet, const TCreateTabletReq& request) { + OLAPStatus res = OLAP_SUCCESS; + + if (request.version < 1) { + LOG(WARNING) << "init version of tablet should at least 1."; + return OLAP_ERR_CE_CMD_PARAMS_ERROR; + } else { + Version version(0, request.version); + VLOG(3) << "begin to create init version. " + << "begin=" << version.first << ", end=" << version.second; + RowsetSharedPtr new_rowset; + do { + if (version.first > version.second) { + LOG(WARNING) << "begin should not larger than end." + << " begin=" << version.first + << " end=" << version.second; + res = OLAP_ERR_INPUT_PARAMETER_ERROR; + break; + } + RowsetId rowset_id = 1; + // if we know this is the first rowset in this tablet, then not call + // tablet to generate rowset id, just set it to 1 + // RETURN_NOT_OK(tablet->next_rowset_id(&rowset_id)); + RowsetWriterContext context; + context.rowset_id = rowset_id; + context.tablet_uid = tablet->tablet_uid(); + context.tablet_id = tablet->tablet_id(); + context.partition_id = tablet->partition_id(); + context.tablet_schema_hash = tablet->schema_hash(); + context.rowset_type = ALPHA_ROWSET; + context.rowset_path_prefix = tablet->tablet_path(); + context.tablet_schema = &(tablet->tablet_schema()); + context.rowset_state = VISIBLE; + context.data_dir = tablet->data_dir(); + context.version = version; + context.version_hash = request.version_hash; + RowsetWriter* builder = new (std::nothrow)AlphaRowsetWriter(); + if (builder == nullptr) { + LOG(WARNING) << "fail to new rowset."; + res = OLAP_ERR_MALLOC_ERROR; + break; + } + builder->init(context); + res = builder->flush(); + if (OLAP_SUCCESS != res) { + LOG(WARNING) << "fail to finalize writer. tablet=" << tablet->full_name(); + break; + } + + new_rowset = builder->build(); + res = tablet->add_rowset(new_rowset); + if (res != OLAP_SUCCESS) { + LOG(WARNING) << "fail to add rowset to tablet. " + << "tablet=" << tablet->full_name(); + break; + } + } while (0); + + // Unregister index and delete files(index and data) if failed + if (res != OLAP_SUCCESS) { + StorageEngine::instance()->add_unused_rowset(new_rowset); + LOG(WARNING) << "fail to create init base version. " + << " res=" << res + << " version=" << request.version; + return res; + } + } + tablet->set_cumulative_layer_point(request.version + 1); + res = tablet->save_meta(); + if (res != OLAP_SUCCESS) { + LOG(WARNING) << "fail to save header. [tablet=" << tablet->full_name() << "]"; + } + + return res; +} + +OLAPStatus TabletManager::_create_tablet_meta( + const TCreateTabletReq& request, + DataDir* store, + const bool is_schema_change_tablet, + const TabletSharedPtr ref_tablet, + TabletMetaSharedPtr* tablet_meta) { + uint64_t shard_id = 0; + OLAPStatus res = store->get_shard(&shard_id); + if (res != OLAP_SUCCESS) { + LOG(WARNING) << "fail to get root path shard. res=" << res; + return res; + } + + uint32_t next_unique_id = 0; + uint32_t col_ordinal = 0; + std::unordered_map col_ordinal_to_unique_id; + if (!is_schema_change_tablet) { + for (TColumn column : request.tablet_schema.columns) { + col_ordinal_to_unique_id[col_ordinal] = col_ordinal; + col_ordinal++; + } + next_unique_id = col_ordinal; + } else { + next_unique_id = ref_tablet->next_unique_id(); + size_t num_columns = ref_tablet->num_columns(); + size_t field = 0; + for (TColumn column : request.tablet_schema.columns) { + /* + * for schema change, compare old_tablet and new_tablet + * 1. if column in both new_tablet and old_tablet, + * assign unique_id of old_tablet to the column of new_tablet + * 2. if column exists only in new_tablet, assign next_unique_id of old_tablet + * to the new column + * + */ + for (field = 0 ; field < num_columns; ++field) { + if (ref_tablet->tablet_schema().column(field).name() == column.column_name) { + uint32_t unique_id = ref_tablet->tablet_schema().column(field).unique_id(); + col_ordinal_to_unique_id[col_ordinal] = unique_id; + break; + } + } + if (field == num_columns) { + col_ordinal_to_unique_id[col_ordinal] = next_unique_id; + next_unique_id++; + } + col_ordinal++; + } + } + + LOG(INFO) << "next_unique_id:" << next_unique_id; + // it is a new tablet meta obviously, should generate a new tablet id + TabletUid tablet_uid; + res = TabletMeta::create(request.table_id, request.partition_id, + request.tablet_id, request.tablet_schema.schema_hash, + shard_id, request.tablet_schema, + next_unique_id, col_ordinal_to_unique_id, + tablet_meta, tablet_uid); + return res; +} + +OLAPStatus TabletManager::_drop_tablet_directly_unlocked( + TTabletId tablet_id, SchemaHash schema_hash, bool keep_files) { + OLAPStatus res = OLAP_SUCCESS; + + TabletSharedPtr dropped_tablet = _get_tablet_with_no_lock(tablet_id, schema_hash); + if (dropped_tablet == nullptr) { + LOG(WARNING) << "fail to drop not existed tablet. " + << " tablet_id=" << tablet_id + << " schema_hash=" << schema_hash; + return OLAP_ERR_TABLE_NOT_FOUND; + } + + for (list::iterator it = _tablet_map[tablet_id].table_arr.begin(); + it != _tablet_map[tablet_id].table_arr.end();) { + if ((*it)->equal(tablet_id, schema_hash)) { + TabletSharedPtr tablet = *it; + it = _tablet_map[tablet_id].table_arr.erase(it); + if (!keep_files) { + // drop tablet will update tablet meta, should lock + WriteLock wrlock(tablet->get_header_lock_ptr()); + LOG(INFO) << "set tablet to shutdown state and remove it from memory" + << " tablet_id=" << tablet_id + << " schema_hash=" << schema_hash + << " tablet path=" << dropped_tablet->tablet_path(); + // has to update tablet here, must not update tablet meta directly + // because other thread may hold the tablet object, they may save meta too + // if update meta directly here, other thread may override the meta + // and the tablet will be loaded at restart time. + tablet->set_tablet_state(TABLET_SHUTDOWN); + res = tablet->save_meta(); + if (res != OLAP_SUCCESS) { + LOG(WARNING) << "fail to drop tablet. " + << " tablet_id=" << tablet_id + << " schema_hash=" << schema_hash; + return res; + } + _shutdown_tablets.push_back(tablet); + } + } else { + ++it; + } + } + + res = dropped_tablet->deregister_tablet_from_dir(); + if (res != OLAP_SUCCESS) { + LOG(WARNING) << "fail to unregister from root path. " + << " res= " << res + << " tablet=" << tablet_id; + } + + return res; +} // _drop_tablet_directly_unlocked + +TabletSharedPtr TabletManager::_get_tablet_with_no_lock(TTabletId tablet_id, SchemaHash schema_hash) { + VLOG(3) << "begin to get tablet. tablet_id=" << tablet_id + << ", schema_hash=" << schema_hash; + tablet_map_t::iterator it = _tablet_map.find(tablet_id); + if (it != _tablet_map.end()) { + for (TabletSharedPtr tablet : it->second.table_arr) { + CHECK(tablet != nullptr) << "tablet is nullptr:" << tablet; + if (tablet->equal(tablet_id, schema_hash)) { + VLOG(3) << "get tablet success. tablet_id=" << tablet_id + << ", schema_hash=" << schema_hash; + return tablet; + } + } + } + + VLOG(3) << "fail to get tablet. tablet_id=" << tablet_id + << ", schema_hash=" << schema_hash; + // Return empty tablet if fail + TabletSharedPtr tablet; + return tablet; +} // _get_tablet_with_no_lock + +} // doris diff --git a/be/src/olap/tablet_manager.h b/be/src/olap/tablet_manager.h new file mode 100644 index 00000000000000..d2589882490d2d --- /dev/null +++ b/be/src/olap/tablet_manager.h @@ -0,0 +1,204 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +#ifndef DORIS_BE_SRC_OLAP_TABLET_MANAGER_H +#define DORIS_BE_SRC_OLAP_TABLET_MANAGER_H + +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include + +#include "agent/status.h" +#include "common/status.h" +#include "gen_cpp/AgentService_types.h" +#include "gen_cpp/BackendService_types.h" +#include "gen_cpp/MasterService_types.h" +#include "olap/atomic.h" +#include "olap/lru_cache.h" +#include "olap/olap_common.h" +#include "olap/olap_define.h" +#include "olap/tablet.h" +#include "olap/olap_meta.h" +#include "olap/options.h" + +namespace doris { + +class Tablet; +class DataDir; + +// TabletManager provides get,add, delete tablet method for storage engine +class TabletManager { +public: + TabletManager(); + + ~TabletManager() { + _tablet_map.clear(); + } + + void cancel_unfinished_schema_change(); + + bool check_tablet_id_exist(TTabletId tablet_id); + + void clear(); + + OLAPStatus create_tablet(const TCreateTabletReq& request, + std::vector stores); + + // Create new tablet for StorageEngine + // + // Return Tablet * succeeded; Otherwise, return NULL if failed + TabletSharedPtr create_tablet(const AlterTabletType alter_type, + const TCreateTabletReq& request, + const bool is_schema_change_tablet, + const TabletSharedPtr ref_tablet, + std::vector stores); + + // Drop a tablet by description + // If set keep_files == true, files will NOT be deleted when deconstruction. + // Return OLAP_SUCCESS, if run ok + // OLAP_ERR_TABLE_DELETE_NOEXIST_ERROR, if tablet not exist + // OLAP_ERR_NOT_INITED, if not inited + OLAPStatus drop_tablet( + TTabletId tablet_id, SchemaHash schema_hash, bool keep_files = false); + + OLAPStatus drop_tablets_on_error_root_path(const std::vector& tablet_info_vec); + + TabletSharedPtr find_best_tablet_to_compaction(CompactionType compaction_type, DataDir* data_dir); + + // Get tablet pointer + TabletSharedPtr get_tablet(TTabletId tablet_id, SchemaHash schema_hash, + bool include_deleted = false, std::string* err = nullptr); + + TabletSharedPtr get_tablet(TTabletId tablet_id, SchemaHash schema_hash, + TabletUid tablet_uid, bool include_deleted = false, + std::string* err = nullptr); + + bool get_tablet_id_and_schema_hash_from_path(const std::string& path, + TTabletId* tablet_id, TSchemaHash* schema_hash); + + bool get_rowset_id_from_path(const std::string& path, + RowsetId* rowset_id); + + void get_tablet_stat(TTabletStatResult& result); + + // parse tablet header msg to generate tablet object + OLAPStatus load_tablet_from_meta(DataDir* data_dir, TTabletId tablet_id, + TSchemaHash schema_hash, const std::string& header, bool update_meta, + bool force = false); + + OLAPStatus load_tablet_from_dir(DataDir* data_dir, + TTabletId tablet_id, + SchemaHash schema_hash, + const std::string& schema_hash_path, + bool force = false); + + void release_schema_change_lock(TTabletId tablet_id); + + // 获取所有tables的名字 + // + // Return OLAP_SUCCESS, if run ok + // OLAP_ERR_INPUT_PARAMETER_ERROR, if tables is null + OLAPStatus report_tablet_info(TTabletInfo* tablet_info); + + OLAPStatus report_all_tablets_info(std::map* tablets_info); + + OLAPStatus start_trash_sweep(); + // Prevent schema change executed concurrently. + bool try_schema_change_lock(TTabletId tablet_id); + + void update_root_path_info(std::map* path_map, int* tablet_counter); + + void update_storage_medium_type_count(uint32_t storage_medium_type_count); + +private: + // Add a tablet pointer to StorageEngine + // If force, drop the existing tablet add this new one + // + // Return OLAP_SUCCESS, if run ok + // OLAP_ERR_TABLE_INSERT_DUPLICATION_ERROR, if find duplication + // OLAP_ERR_NOT_INITED, if not inited + OLAPStatus _add_tablet_unlock(TTabletId tablet_id, SchemaHash schema_hash, + const TabletSharedPtr& tablet, bool update_meta, bool force); + + OLAPStatus _add_tablet_to_map(TTabletId tablet_id, SchemaHash schema_hash, + const TabletSharedPtr& tablet, bool update_meta, + bool keep_files, bool drop_old); + + void _build_tablet_info(TabletSharedPtr tablet, TTabletInfo* tablet_info); + + void _build_tablet_stat(); + bool _check_tablet_id_exist_unlock(TTabletId tablet_id); + OLAPStatus _create_inital_rowset(TabletSharedPtr tablet, const TCreateTabletReq& request); + + + OLAPStatus _create_tablet_meta(const TCreateTabletReq& request, + DataDir* store, + const bool is_schema_change_tablet, + const TabletSharedPtr ref_tablet, + TabletMetaSharedPtr* tablet_meta); + + OLAPStatus _drop_tablet_directly_unlocked(TTabletId tablet_id, TSchemaHash schema_hash, bool keep_files = false); + + OLAPStatus _drop_tablet_unlock(TTabletId tablet_id, SchemaHash schema_hash, bool keep_files); + + TabletSharedPtr _get_tablet_with_no_lock(TTabletId tablet_id, SchemaHash schema_hash); + + TabletSharedPtr _get_tablet(TTabletId tablet_id, SchemaHash schema_hash, + bool include_deleted, std::string* err); + + TabletSharedPtr _internal_create_tablet(const AlterTabletType alter_type, const TCreateTabletReq& request, + const bool is_schema_change_tablet, const TabletSharedPtr ref_tablet, std::vector data_dirs); + + TabletSharedPtr _create_tablet_meta_and_dir(const TCreateTabletReq& request, const bool is_schema_change_tablet, + const TabletSharedPtr ref_tablet, std::vector data_dirs); + +private: + struct TableInstances { + Mutex schema_change_lock; + std::list table_arr; + }; + typedef std::map tablet_map_t; + RWMutex _tablet_map_lock; + RWMutex _create_tablet_lock; + tablet_map_t _tablet_map; + std::map _store_map; + + // cache to save tablets' statistics, such as data size and row + // TODO(cmy): for now, this is a naive implementation + std::map _tablet_stat_cache; + // last update time of tablet stat cache + int64_t _tablet_stat_cache_update_time_ms; + + uint32_t _available_storage_medium_type_count; + + std::vector _shutdown_tablets; + + DISALLOW_COPY_AND_ASSIGN(TabletManager); +}; + +} // namespace doris + +#endif // DORIS_BE_SRC_OLAP_TABLET_MANAGER_H diff --git a/be/src/olap/tablet_meta.cpp b/be/src/olap/tablet_meta.cpp new file mode 100644 index 00000000000000..fdc79ec8963983 --- /dev/null +++ b/be/src/olap/tablet_meta.cpp @@ -0,0 +1,772 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +#include "olap/tablet_meta.h" + +#include + +#include "olap/file_helper.h" +#include "olap/olap_common.h" +#include "olap/olap_define.h" +#include "olap/rowset/alpha_rowset_meta.h" +#include "olap/tablet_meta_manager.h" +#include "util/uid_util.h" +#include "util/url_coding.h" + +namespace doris { + +OLAPStatus AlterTabletTask::init_from_pb(const AlterTabletPB& alter_task) { + _alter_state = alter_task.alter_state(); + _related_tablet_id = alter_task.related_tablet_id(); + _related_schema_hash = alter_task.related_schema_hash(); + _alter_type = alter_task.alter_type(); + return OLAP_SUCCESS; +} + +OLAPStatus AlterTabletTask::to_alter_pb(AlterTabletPB* alter_task) { + alter_task->set_alter_state(_alter_state); + alter_task->set_related_tablet_id(_related_tablet_id); + alter_task->set_related_schema_hash(_related_schema_hash); + alter_task->set_alter_type(_alter_type); + return OLAP_SUCCESS; +} + +OLAPStatus AlterTabletTask::set_alter_state(AlterTabletState alter_state) { + if (_alter_state == ALTER_FAILED && alter_state != ALTER_FAILED) { + return OLAP_ERR_ALTER_STATUS_ERR; + } else if (_alter_state == ALTER_FINISHED && alter_state != ALTER_FINISHED) { + return OLAP_ERR_ALTER_STATUS_ERR; + } + _alter_state = alter_state; + return OLAP_SUCCESS; +} + +OLAPStatus TabletMeta::create(int64_t table_id, int64_t partition_id, + int64_t tablet_id, int32_t schema_hash, + uint64_t shard_id, const TTabletSchema& tablet_schema, + uint32_t next_unique_id, + const std::unordered_map& col_ordinal_to_unique_id, + TabletMetaSharedPtr* tablet_meta, TabletUid& tablet_uid) { + tablet_meta->reset(new TabletMeta(table_id, partition_id, + tablet_id, schema_hash, + shard_id, tablet_schema, + next_unique_id, col_ordinal_to_unique_id, tablet_uid)); + return OLAP_SUCCESS; +} + +TabletMeta::TabletMeta() {} + +TabletMeta::TabletMeta(int64_t table_id, int64_t partition_id, + int64_t tablet_id, int32_t schema_hash, + uint64_t shard_id, const TTabletSchema& tablet_schema, + uint32_t next_unique_id, + const std::unordered_map& col_ordinal_to_unique_id, + TabletUid tablet_uid) { + TabletMetaPB tablet_meta_pb; + tablet_meta_pb.set_table_id(table_id); + tablet_meta_pb.set_partition_id(partition_id); + tablet_meta_pb.set_tablet_id(tablet_id); + tablet_meta_pb.set_schema_hash(schema_hash); + tablet_meta_pb.set_shard_id(shard_id); + tablet_meta_pb.set_creation_time(time(NULL)); + tablet_meta_pb.set_cumulative_layer_point(-1); + tablet_meta_pb.set_tablet_state(PB_RUNNING); + *(tablet_meta_pb.mutable_tablet_uid()) = tablet_uid.to_proto(); + tablet_meta_pb.set_end_rowset_id(10000); + TabletSchemaPB* schema = tablet_meta_pb.mutable_schema(); + schema->set_num_short_key_columns(tablet_schema.short_key_column_count); + schema->set_num_rows_per_row_block(config::default_num_rows_per_column_file_block); + switch(tablet_schema.keys_type) { + case TKeysType::DUP_KEYS: + schema->set_keys_type(KeysType::DUP_KEYS); + break; + case TKeysType::UNIQUE_KEYS: + schema->set_keys_type(KeysType::UNIQUE_KEYS); + break; + case TKeysType::AGG_KEYS: + schema->set_keys_type(KeysType::AGG_KEYS); + break; + default: + LOG(WARNING) << "unknown tablet keys type"; + break; + } + schema->set_compress_kind(COMPRESS_LZ4); + tablet_meta_pb.set_in_restore_mode(false); + + // set column information + uint32_t col_ordinal = 0; + uint32_t key_count = 0; + bool has_bf_columns = false; + for (TColumn tcolumn : tablet_schema.columns) { + ColumnPB* column = schema->add_column(); + uint32_t unique_id = col_ordinal_to_unique_id.at(col_ordinal++); + column->set_unique_id(unique_id); + column->set_name(tcolumn.column_name); + string data_type; + EnumToString(TPrimitiveType, tcolumn.column_type.type, data_type); + column->set_type(data_type); + if (tcolumn.column_type.type == TPrimitiveType::DECIMAL) { + column->set_precision(tcolumn.column_type.precision); + column->set_frac(tcolumn.column_type.scale); + } + uint32_t length = FieldInfo::get_field_length_by_type( + tcolumn.column_type.type, tcolumn.column_type.len); + column->set_length(length); + column->set_index_length(length); + if (tcolumn.column_type.type == TPrimitiveType::VARCHAR || tcolumn.column_type.type == TPrimitiveType::HLL) { + if (!tcolumn.column_type.__isset.index_len) { + column->set_index_length(10); + } else { + column->set_index_length(tcolumn.column_type.index_len); + } + } + if (!tcolumn.is_key) { + column->set_is_key(false); + string aggregation_type; + EnumToString(TAggregationType, tcolumn.aggregation_type, aggregation_type); + column->set_aggregation(aggregation_type); + } else { + ++key_count; + column->set_is_key(true); + column->set_aggregation("NONE"); + } + column->set_is_nullable(tcolumn.is_allow_null); + if (tcolumn.__isset.default_value) { + column->set_default_value(tcolumn.default_value); + } + if (tcolumn.__isset.is_bloom_filter_column) { + column->set_is_bf_column(tcolumn.is_bloom_filter_column); + has_bf_columns = true; + } + } + + schema->set_next_column_unique_id(next_unique_id); + if (has_bf_columns && tablet_schema.__isset.bloom_filter_fpp) { + schema->set_bf_fpp(tablet_schema.bloom_filter_fpp); + } + + init_from_pb(tablet_meta_pb); +} + +OLAPStatus TabletMeta::create_from_file(const std::string& file_path) { + FileHeader file_header; + FileHandler file_handler; + + if (file_handler.open(file_path.c_str(), O_RDONLY) != OLAP_SUCCESS) { + LOG(WARNING) << "fail to open ordinal file. file=" << file_path; + return OLAP_ERR_IO_ERROR; + } + + // In file_header.unserialize(), it validates file length, signature, checksum of protobuf. + if (file_header.unserialize(&file_handler) != OLAP_SUCCESS) { + LOG(WARNING) << "fail to unserialize tablet_meta. file='" << file_path; + return OLAP_ERR_PARSE_PROTOBUF_ERROR; + } + + TabletMetaPB tablet_meta_pb; + try { + tablet_meta_pb.CopyFrom(file_header.message()); + } catch (...) { + LOG(WARNING) << "fail to copy protocol buffer object. file='" << file_path; + return OLAP_ERR_PARSE_PROTOBUF_ERROR; + } + + return init_from_pb(tablet_meta_pb); +} + +OLAPStatus TabletMeta::reset_tablet_uid(const std::string& file_path) { + OLAPStatus res = OLAP_SUCCESS; + TabletMeta tmp_tablet_meta; + if ((res = tmp_tablet_meta.create_from_file(file_path)) != OLAP_SUCCESS) { + LOG(WARNING) << "fail to load tablet meta from file" + << ", meta_file=" << file_path; + return res; + } + TabletMetaPB tmp_tablet_meta_pb; + res = tmp_tablet_meta.to_meta_pb(&tmp_tablet_meta_pb); + if (res != OLAP_SUCCESS) { + LOG(WARNING) << "fail to serialize tablet meta to pb object. " + << " , meta_file=" << file_path; + return res; + } + *(tmp_tablet_meta_pb.mutable_tablet_uid()) = TabletUid().to_proto(); + res = save(file_path, tmp_tablet_meta_pb); + if (res != OLAP_SUCCESS) { + LOG(FATAL) << "fail to save tablet meta pb to " + << " meta_file=" << file_path; + return res; + } + return res; +} + +const TabletUid TabletMeta::tablet_uid() { + return _tablet_uid; +} + +std::string TabletMeta::construct_header_file_path(const std::string& schema_hash_path, const int64_t tablet_id) { + std::stringstream header_name_stream; + header_name_stream << schema_hash_path << "/" << tablet_id << ".hdr"; + return header_name_stream.str(); +} + +OLAPStatus TabletMeta::save(const string& file_path) { + TabletMetaPB tablet_meta_pb; + RETURN_NOT_OK(to_meta_pb(&tablet_meta_pb)); + return TabletMeta::save(file_path, tablet_meta_pb); +} + +OLAPStatus TabletMeta::save(const string& file_path, TabletMetaPB& tablet_meta_pb) { + DCHECK(!file_path.empty()); + + FileHeader file_header; + FileHandler file_handler; + + if (file_handler.open_with_mode(file_path.c_str(), + O_CREAT | O_WRONLY | O_TRUNC, S_IRUSR | S_IWUSR) != OLAP_SUCCESS) { + LOG(WARNING) << "fail to open header file. file='" << file_path; + return OLAP_ERR_IO_ERROR; + } + + try { + file_header.mutable_message()->CopyFrom(tablet_meta_pb); + } catch (...) { + LOG(WARNING) << "fail to copy protocol buffer object. file='" << file_path; + return OLAP_ERR_OTHER_ERROR; + } + + if (file_header.prepare(&file_handler) != OLAP_SUCCESS + || file_header.serialize(&file_handler) != OLAP_SUCCESS) { + LOG(WARNING) << "fail to serialize to file header. file='" << file_path; + return OLAP_ERR_SERIALIZE_PROTOBUF_ERROR; + } + + return OLAP_SUCCESS; +} + +OLAPStatus TabletMeta::save_meta(DataDir* data_dir) { + WriteLock wrlock(&_meta_lock); + return _save_meta(data_dir); +} + +OLAPStatus TabletMeta::_save_meta(DataDir* data_dir) { + // check if rowset id all valid, should remove it later + for (auto& rs_meta : _rs_metas) { + if (rs_meta->rowset_id() >= _next_rowset_id) { + LOG(FATAL) << "meta contains invalid rowsetid " + << " tablet=" << full_name() + << " rowset_id=" << rs_meta->rowset_id() + << " next_rowset_id=" << _next_rowset_id; + } + } + for (auto& rs_meta : _inc_rs_metas) { + if (rs_meta->rowset_id() >= _next_rowset_id) { + LOG(FATAL) << "meta contains invalid rowsetid " + << " tablet=" << full_name() + << " rowset_id=" << rs_meta->rowset_id() + << " next_rowset_id=" << _next_rowset_id; + } + } + // check if _end_rowset_id > 10000 + if (_end_rowset_id < 10000) { + LOG(FATAL) << "end_rowset_id is invalid" + << " tablet=" << full_name() + << " end_rowset_id=" << _end_rowset_id; + } + // check if tablet uid is valid + if (_tablet_uid.hi == 0 && _tablet_uid.lo == 0) { + LOG(FATAL) << "tablet_uid is invalid" + << " tablet=" << full_name() + << " _tablet_uid=" << _tablet_uid.to_string(); + } + string meta_binary; + RETURN_NOT_OK(serialize(&meta_binary)); + OLAPStatus status = TabletMetaManager::save(data_dir, tablet_id(), schema_hash(), meta_binary); + if (status != OLAP_SUCCESS) { + LOG(FATAL) << "fail to save tablet_meta. status=" << status + << ", tablet_id=" << tablet_id() + << ", schema_hash=" << schema_hash(); + } + return status; +} + +OLAPStatus TabletMeta::serialize(string* meta_binary) { + TabletMetaPB tablet_meta_pb; + RETURN_NOT_OK(to_meta_pb(&tablet_meta_pb)); + bool serialize_success = tablet_meta_pb.SerializeToString(meta_binary); + if (!serialize_success) { + LOG(FATAL) << "failed to serialize meta " << full_name(); + } + // deserialize the meta to check the result is correct + TabletMetaPB de_tablet_meta_pb; + bool parsed = de_tablet_meta_pb.ParseFromString(*meta_binary); + if (!parsed) { + LOG(FATAL) << "deserialize from previous serialize result failed " << full_name(); + } + return OLAP_SUCCESS; +}; + +OLAPStatus TabletMeta::deserialize(const string& meta_binary) { + TabletMetaPB tablet_meta_pb; + bool parsed = tablet_meta_pb.ParseFromString(meta_binary); + if (!parsed) { + LOG(WARNING) << "parse tablet meta failed"; + return OLAP_ERR_INIT_FAILED; + } + return init_from_pb(tablet_meta_pb); +} + +OLAPStatus TabletMeta::init_from_pb(const TabletMetaPB& tablet_meta_pb) { + _table_id = tablet_meta_pb.table_id(); + _partition_id = tablet_meta_pb.partition_id(); + _tablet_id = tablet_meta_pb.tablet_id(); + _schema_hash = tablet_meta_pb.schema_hash(); + _shard_id = tablet_meta_pb.shard_id(); + _creation_time = tablet_meta_pb.creation_time(); + _cumulative_layer_point = tablet_meta_pb.cumulative_layer_point(); + _tablet_uid = TabletUid(tablet_meta_pb.tablet_uid()); + _end_rowset_id = tablet_meta_pb.end_rowset_id(); + _initial_end_rowset_id = tablet_meta_pb.end_rowset_id(); + _next_rowset_id = _end_rowset_id + 1; + + // init _tablet_state + switch (tablet_meta_pb.tablet_state()) { + case PB_NOTREADY: + _tablet_state = TabletState::TABLET_NOTREADY; + break; + case PB_RUNNING: + _tablet_state = TabletState::TABLET_RUNNING; + break; + case PB_TOMBSTONED: + _tablet_state = TabletState::TABLET_TOMBSTONED; + break; + case PB_STOPPED: + _tablet_state = TabletState::TABLET_STOPPED; + break; + case PB_SHUTDOWN: + _tablet_state = TabletState::TABLET_SHUTDOWN; + break; + default: + LOG(WARNING) << "tablet has no state. tablet=" << tablet_id() + << ", schema_hash=" << schema_hash(); + } + + // init _schema + RETURN_NOT_OK(_schema.init_from_pb(tablet_meta_pb.schema())); + + // init _rs_metas + for (auto& it : tablet_meta_pb.rs_metas()) { + RowsetMetaSharedPtr rs_meta(new AlphaRowsetMeta()); + rs_meta->init_from_pb(it); + if (rs_meta->has_delete_predicate()) { + add_delete_predicate(rs_meta->delete_predicate(), rs_meta->version().first); + } + _rs_metas.push_back(std::move(rs_meta)); + } + for (auto& it : tablet_meta_pb.inc_rs_metas()) { + RowsetMetaSharedPtr rs_meta(new AlphaRowsetMeta()); + rs_meta->init_from_pb(it); + _inc_rs_metas.push_back(std::move(rs_meta)); + } + + // generate AlterTabletTask + if (tablet_meta_pb.has_alter_task()) { + AlterTabletTask* alter_tablet_task = new AlterTabletTask(); + RETURN_NOT_OK(alter_tablet_task->init_from_pb(tablet_meta_pb.alter_task())); + _alter_task.reset(alter_tablet_task); + } + + if (tablet_meta_pb.has_in_restore_mode()) { + _in_restore_mode = tablet_meta_pb.in_restore_mode(); + } + return OLAP_SUCCESS; +} + +OLAPStatus TabletMeta::to_meta_pb(TabletMetaPB* tablet_meta_pb) { + tablet_meta_pb->set_table_id(table_id()); + tablet_meta_pb->set_partition_id(partition_id()); + tablet_meta_pb->set_tablet_id(tablet_id()); + tablet_meta_pb->set_schema_hash(schema_hash()); + tablet_meta_pb->set_shard_id(shard_id()); + tablet_meta_pb->set_creation_time(creation_time()); + tablet_meta_pb->set_cumulative_layer_point(cumulative_layer_point()); + *(tablet_meta_pb->mutable_tablet_uid()) = tablet_uid().to_proto(); + tablet_meta_pb->set_end_rowset_id(_end_rowset_id); + switch (tablet_state()) { + case TABLET_NOTREADY: + tablet_meta_pb->set_tablet_state(PB_NOTREADY); + break; + case TABLET_RUNNING: + tablet_meta_pb->set_tablet_state(PB_RUNNING); + break; + case TABLET_TOMBSTONED: + tablet_meta_pb->set_tablet_state(PB_TOMBSTONED); + break; + case TABLET_STOPPED: + tablet_meta_pb->set_tablet_state(PB_STOPPED); + break; + case TABLET_SHUTDOWN: + tablet_meta_pb->set_tablet_state(PB_SHUTDOWN); + break; + } + + for (auto& rs : _rs_metas) { + rs->to_rowset_pb(tablet_meta_pb->add_rs_metas()); + } + for (auto rs : _inc_rs_metas) { + rs->to_rowset_pb(tablet_meta_pb->add_inc_rs_metas()); + } + _schema.to_schema_pb(tablet_meta_pb->mutable_schema()); + if (_alter_task != nullptr) { + _alter_task->to_alter_pb(tablet_meta_pb->mutable_alter_task()); + } + + tablet_meta_pb->set_in_restore_mode(in_restore_mode()); + return OLAP_SUCCESS; +} + +OLAPStatus TabletMeta::to_json(std::string* json_string, json2pb::Pb2JsonOptions& options) { + TabletMetaPB tablet_meta_pb; + RETURN_NOT_OK(to_meta_pb(&tablet_meta_pb)); + json2pb::ProtoMessageToJson(tablet_meta_pb, json_string, options); + return OLAP_SUCCESS; +} + +Version TabletMeta::max_version() const { + Version max_version = { -1, 0 }; + for (auto& rs_meta : _rs_metas) { + if (rs_meta->end_version() > max_version.second) { + max_version = rs_meta->version(); + } else if (rs_meta->end_version() == max_version.second + && rs_meta->start_version() == max_version.first) { + max_version = rs_meta->version(); + } + } + return max_version; +} + +OLAPStatus TabletMeta::add_rs_meta(const RowsetMetaSharedPtr& rs_meta) { + // check RowsetMeta is valid + for (auto& rs : _rs_metas) { + if (rs->start_version() == rs_meta->start_version() + && rs->end_version() == rs_meta->end_version()) { + if (rs->rowset_id() != rs_meta->rowset_id()) { + LOG(WARNING) << "version already exist. rowset_id=" << rs->rowset_id() + << " start_version=" << rs_meta->start_version() + << ", end_version=" << rs_meta->end_version() + << ", tablet=" << full_name(); + return OLAP_ERR_PUSH_VERSION_ALREADY_EXIST; + } else { + // rowsetid,version is equal, it is a duplicate req, skip it + return OLAP_SUCCESS; + } + } + } + + _rs_metas.push_back(std::move(rs_meta)); + if (rs_meta->has_delete_predicate()) { + add_delete_predicate(rs_meta->delete_predicate(), rs_meta->version().first); + } + + return OLAP_SUCCESS; +} + +OLAPStatus TabletMeta::delete_rs_meta_by_version(const Version& version, vector* deleted_rs_metas) { + auto it = _rs_metas.begin(); + while (it != _rs_metas.end()) { + if ((*it)->version().first == version.first + && (*it)->version().second == version.second) { + if (deleted_rs_metas != nullptr) { + deleted_rs_metas->push_back(*it); + } + _rs_metas.erase(it); + } else { + ++it; + } + } + + return OLAP_SUCCESS; +} + +OLAPStatus TabletMeta::modify_rs_metas(const vector& to_add, + const vector& to_delete) { + for (auto rs_to_del : to_delete) { + auto it = _rs_metas.begin(); + while (it != _rs_metas.end()) { + if (rs_to_del->version().first == (*it)->version().first + && rs_to_del->version().second == (*it)->version().second) { + if ((*it)->has_delete_predicate()) { + remove_delete_predicate_by_version((*it)->version()); + } + _rs_metas.erase(it); + } else { + it++; + } + } + } + + for (auto rs : to_add) { + _rs_metas.push_back(std::move(rs)); + } + + return OLAP_SUCCESS; +} + +OLAPStatus TabletMeta::revise_rs_metas(const std::vector& rs_metas) { + WriteLock wrlock(&_meta_lock); + // delete alter task + _alter_task.reset(); + + // remove all old rs_meta and add new rs_meta + _rs_metas.clear(); + + for (auto& rs_meta : rs_metas) { + _rs_metas.push_back(rs_meta); + } + + return OLAP_SUCCESS; +} + +OLAPStatus TabletMeta::revise_inc_rs_metas(const std::vector& rs_metas) { + WriteLock wrlock(&_meta_lock); + // delete alter task + _alter_task.reset(); + + // remove all old rs_meta and add new rs_meta + _inc_rs_metas.clear(); + + for (auto& rs_meta : rs_metas) { + _inc_rs_metas.push_back(rs_meta); + } + + return OLAP_SUCCESS; +} + +OLAPStatus TabletMeta::add_inc_rs_meta(const RowsetMetaSharedPtr& rs_meta) { + // check RowsetMeta is valid + for (auto rs : _inc_rs_metas) { + if (rs->start_version() == rs_meta->start_version() + && rs->end_version() == rs_meta->end_version()) { + LOG(WARNING) << "rowset already exist. rowset_id=" << rs->rowset_id(); + return OLAP_ERR_ROWSET_ALREADY_EXIST; + } + } + + _inc_rs_metas.push_back(std::move(rs_meta)); + return OLAP_SUCCESS; +} + +RowsetMetaSharedPtr TabletMeta::acquire_rs_meta_by_version(const Version& version) const { + RowsetMetaSharedPtr rs_meta = nullptr; + for (int i = 0; i < _rs_metas.size(); ++i) { + if (_rs_metas[i]->version().first == version.first + && _rs_metas[i]->version().second == version.second) { + rs_meta = _rs_metas[i]; + break; + } + } + return rs_meta; +} + +OLAPStatus TabletMeta::delete_inc_rs_meta_by_version(const Version& version) { + auto it = _inc_rs_metas.begin(); + while (it != _inc_rs_metas.end()) { + if ((*it)->version().first == version.first + && (*it)->version().second == version.second) { + _inc_rs_metas.erase(it); + break; + } else { + it++; + } + } + + return OLAP_SUCCESS; +} + +RowsetMetaSharedPtr TabletMeta::acquire_inc_rs_meta_by_version(const Version& version) const { + RowsetMetaSharedPtr rs_meta = nullptr; + for (int i = 0; i < _inc_rs_metas.size(); ++i) { + if (_inc_rs_metas[i]->version().first == version.first + && _inc_rs_metas[i]->version().second == version.second) { + rs_meta = _inc_rs_metas[i]; + break; + } + } + return rs_meta; +} + +OLAPStatus TabletMeta::add_delete_predicate( + const DeletePredicatePB& delete_predicate, int64_t version) { + int ordinal = 0; + for (auto& del_pred : _del_pred_array) { + if (del_pred.version() == version) { + break; + } + ordinal++; + } + + if (ordinal < _del_pred_array.size()) { + // clear existed predicate + DeletePredicatePB* del_pred = &(_del_pred_array[ordinal]); + del_pred->clear_sub_predicates(); + for (const string& predicate : delete_predicate.sub_predicates()) { + del_pred->add_sub_predicates(predicate); + } + } else { + DeletePredicatePB* del_pred = _del_pred_array.Add(); + del_pred->set_version(version); + for (const string& predicate : delete_predicate.sub_predicates()) { + del_pred->add_sub_predicates(predicate); + } + } + return OLAP_SUCCESS; +} + +OLAPStatus TabletMeta::remove_delete_predicate_by_version(const Version& version) { + DCHECK(version.first == version.second) + << "version=" << version.first << "-" << version.second; + int ordinal = 0; + for (; ordinal < _del_pred_array.size(); ++ordinal) { + const DeletePredicatePB& temp = _del_pred_array.Get(ordinal); + if (temp.version() == version.first) { + // log delete condtion + string del_cond_str; + const google::protobuf::RepeatedPtrField& sub_predicates = temp.sub_predicates(); + + for (int i = 0; i != sub_predicates.size(); ++i) { + del_cond_str += sub_predicates.Get(i) + ";"; + } + + LOG(INFO) << "remove one del_pred. version=" << temp.version() + << ", condition=" << del_cond_str; + + // remove delete condition from PB + _del_pred_array.SwapElements(ordinal, _del_pred_array.size() - 1); + _del_pred_array.RemoveLast(); + } + } + return OLAP_SUCCESS; +} + +DelPredicateArray TabletMeta::delete_predicates() const { + return _del_pred_array; +} + +bool TabletMeta::version_for_delete_predicate(const Version& version) { + if (version.first != version.second) { + return false; + } + + for (auto& del_pred : _del_pred_array) { + if (del_pred.version() == version.first) { + return true; + } + } + + return false; +} + +OLAPStatus TabletMeta::get_next_rowset_id(RowsetId* gen_rowset_id, DataDir* data_dir) { + WriteLock wrlock(&_meta_lock); + if (_next_rowset_id >= _end_rowset_id) { + ++_next_rowset_id; + _end_rowset_id = _next_rowset_id + _batch_interval; + RETURN_NOT_OK(_save_meta(data_dir)); + } + *gen_rowset_id = _next_rowset_id; + ++_next_rowset_id; + return OLAP_SUCCESS; +} + +OLAPStatus TabletMeta::set_next_rowset_id(RowsetId new_rowset_id, DataDir* data_dir) { + WriteLock wrlock(&_meta_lock); + // must be < not <= + if (new_rowset_id < _next_rowset_id) { + return OLAP_SUCCESS; + } + if (new_rowset_id >= _end_rowset_id) { + _end_rowset_id = new_rowset_id + _batch_interval; + RETURN_NOT_OK(_save_meta(data_dir)); + } + _next_rowset_id = new_rowset_id + 1; + return OLAP_SUCCESS; +} + +RowsetId TabletMeta::get_cur_rowset_id() { + return _next_rowset_id; +} + + +// return value not reference +// MVCC modification for alter task, upper application get a alter task mirror +AlterTabletTaskSharedPtr TabletMeta::TabletMeta::alter_task() { + ReadLock rlock(&_meta_lock); + return _alter_task; +} + +OLAPStatus TabletMeta::add_alter_task(const AlterTabletTask& alter_task) { + WriteLock wrlock(&_meta_lock); + AlterTabletTask* new_alter_task = new AlterTabletTask(); + *new_alter_task = alter_task; + _alter_task.reset(new_alter_task); + return OLAP_SUCCESS; +} + +OLAPStatus TabletMeta::delete_alter_task() { + WriteLock wrlock(&_meta_lock); + _alter_task.reset(); + return OLAP_SUCCESS; +} + +// if alter task is nullptr, return error? +OLAPStatus TabletMeta::set_alter_state(AlterTabletState alter_state) { + WriteLock wrlock(&_meta_lock); + if (_alter_task == nullptr) { + // alter state should be set to ALTER_PREPARED when starting to + // alter tablet. In this scenario, _alter_task is null pointer. + LOG(WARNING) << "original alter task is null, could not set state"; + return OLAP_ERR_ALTER_STATUS_ERR; + } else { + AlterTabletTask* alter_tablet_task = new AlterTabletTask(); + *alter_tablet_task = *_alter_task; + OLAPStatus reset_status = alter_tablet_task->set_alter_state(alter_state); + if (reset_status != OLAP_SUCCESS) { + return reset_status; + } + _alter_task.reset(alter_tablet_task); + return OLAP_SUCCESS; + } +} + +std::string TabletMeta::full_name() const { + std::stringstream ss; + ss << _tablet_id + << "." << _schema_hash + << "." << _tablet_uid.to_string(); + return ss.str(); +} + +OLAPStatus TabletMeta::set_partition_id(int64_t partition_id) { + if ((_partition_id > 0 && _partition_id != partition_id) || partition_id < 1) { + LOG(FATAL) << "cur partition id=" << _partition_id + << " new partition id=" << partition_id + << " not equal"; + } + _partition_id = partition_id; + return OLAP_SUCCESS; +} + +} // namespace doris diff --git a/be/src/olap/tablet_meta.h b/be/src/olap/tablet_meta.h new file mode 100644 index 00000000000000..9fbd8cbeb4bbf2 --- /dev/null +++ b/be/src/olap/tablet_meta.h @@ -0,0 +1,319 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +#ifndef DORIS_BE_SRC_OLAP_TABLET_META_H +#define DORIS_BE_SRC_OLAP_TABLET_META_H + +#include +#include +#include + +#include "common/logging.h" +#include "gen_cpp/olap_file.pb.h" +#include "olap/olap_common.h" +#include "olap/olap_define.h" +#include "olap/tablet_schema.h" +#include "olap/rowset/rowset.h" +#include "olap/rowset/rowset_meta.h" +#include "olap/delete_handler.h" +#include "util/uid_util.h" + +using std::string; +using std::vector; + +namespace doris { + +// Lifecycle states that a Tablet can be in. Legal state transitions for a +// Tablet object: +// +// NOTREADY -> RUNNING -> TOMBSTONED -> STOPPED -> SHUTDOWN +// | | | ^^^ +// | | +----------+|| +// | +------------------------+| +// +-------------------------------------+ + +enum TabletState { + // Tablet is under alter table, rollup, clone + TABLET_NOTREADY, + + TABLET_RUNNING, + + // Tablet integrity has been violated, such as missing versions. + // In this state, tablet will not accept any incoming request. + // Report this state to FE, scheduling BE to drop tablet. + TABLET_TOMBSTONED, + + // Tablet is shutting down, files in disk still remained. + TABLET_STOPPED, + + // Files have been removed, tablet has been shutdown completely. + TABLET_SHUTDOWN +}; + +class RowsetMeta; +class Rowset; +class DataDir; +class TabletMeta; +using TabletMetaSharedPtr = std::shared_ptr; + +class AlterTabletTask { +public: + AlterTabletTask() {} + OLAPStatus init_from_pb(const AlterTabletPB& alter_task); + OLAPStatus to_alter_pb(AlterTabletPB* alter_task); + + inline const AlterTabletState& alter_state() const { return _alter_state; } + OLAPStatus set_alter_state(AlterTabletState alter_state); + + inline int64_t related_tablet_id() const { return _related_tablet_id; } + inline int32_t related_schema_hash() const { return _related_schema_hash; } + inline void set_related_tablet_id(int64_t related_tablet_id) { _related_tablet_id = related_tablet_id; } + inline void set_related_schema_hash(int32_t schema_hash) { _related_schema_hash = schema_hash; } + + inline const AlterTabletType& alter_type() const { return _alter_type; } + inline void set_alter_type(AlterTabletType alter_type) { _alter_type = alter_type; } + +private: + AlterTabletState _alter_state; + int64_t _related_tablet_id; + int32_t _related_schema_hash; + AlterTabletType _alter_type; +}; + +typedef std::shared_ptr AlterTabletTaskSharedPtr; + +// Class encapsulates meta of tablet. +// The concurrency control is handled in Tablet Class, not in this class. +class TabletMeta { +public: + static OLAPStatus create(int64_t table_id, int64_t partition_id, + int64_t tablet_id, int32_t schema_hash, + uint64_t shard_id, const TTabletSchema& tablet_schema, + uint32_t next_unique_id, + const std::unordered_map& col_ordinal_to_unique_id, + TabletMetaSharedPtr* tablet_meta, TabletUid& tablet_uid); + TabletMeta(); + TabletMeta(int64_t table_id, int64_t partition_id, + int64_t tablet_id, int32_t schema_hash, + uint64_t shard_id, const TTabletSchema& tablet_schema, + uint32_t next_unique_id, + const std::unordered_map& col_ordinal_to_unique_id, + TabletUid tablet_uid); + + // Function create_from_file is used to be compatible with previous tablet_meta. + // Previous tablet_meta is a physical file in tablet dir, which is not stored in rocksdb. + OLAPStatus create_from_file(const std::string& file_path); + OLAPStatus save(const std::string& file_path); + static OLAPStatus save(const string& file_path, TabletMetaPB& tablet_meta_pb); + static OLAPStatus reset_tablet_uid(const std::string& file_path); + static std::string construct_header_file_path(const std::string& schema_hash_path, const int64_t tablet_id); + OLAPStatus save_meta(DataDir* data_dir); + + OLAPStatus serialize(string* meta_binary); + OLAPStatus deserialize(const string& meta_binary); + OLAPStatus init_from_pb(const TabletMetaPB& tablet_meta_pb); + + OLAPStatus to_meta_pb(TabletMetaPB* tablet_meta_pb); + OLAPStatus to_json(std::string* json_string, json2pb::Pb2JsonOptions& options); + + const TabletUid tablet_uid(); + inline const int64_t table_id() const; + inline const int64_t partition_id() const; + inline const int64_t tablet_id() const; + inline const int32_t schema_hash() const; + inline const int16_t shard_id() const; + inline void set_shard_id(int32_t shard_id); + inline int64_t creation_time() const; + inline void set_creation_time(int64_t creation_time); + inline int64_t cumulative_layer_point() const; + inline void set_cumulative_layer_point(int64_t new_point); + + inline const size_t num_rows() const; + // disk space occupied by tablet + inline const size_t tablet_footprint() const; + inline const size_t version_count() const; + Version max_version() const; + + inline const TabletState tablet_state() const; + inline OLAPStatus set_tablet_state(TabletState state); + + inline const bool in_restore_mode() const; + inline OLAPStatus set_in_restore_mode(bool in_restore_mode); + + inline const TabletSchema& tablet_schema() const; + + inline const vector& all_rs_metas() const; + OLAPStatus add_rs_meta(const RowsetMetaSharedPtr& rs_meta); + RowsetMetaSharedPtr acquire_rs_meta_by_version(const Version& version) const; + OLAPStatus delete_rs_meta_by_version(const Version& version, vector* deleted_rs_metas); + OLAPStatus modify_rs_metas(const vector& to_add, + const vector& to_delete); + OLAPStatus revise_rs_metas(const std::vector& rs_metas); + OLAPStatus revise_inc_rs_metas(const std::vector& rs_metas); + + inline const vector& all_inc_rs_metas() const; + OLAPStatus add_inc_rs_meta(const RowsetMetaSharedPtr& rs_meta); + OLAPStatus delete_inc_rs_meta_by_version(const Version& version); + RowsetMetaSharedPtr acquire_inc_rs_meta_by_version(const Version& version) const; + + OLAPStatus add_delete_predicate(const DeletePredicatePB& delete_predicate, int64_t version); + OLAPStatus remove_delete_predicate_by_version(const Version& version); + DelPredicateArray delete_predicates() const; + bool version_for_delete_predicate(const Version& version); + AlterTabletTaskSharedPtr alter_task(); + OLAPStatus add_alter_task(const AlterTabletTask& alter_task); + OLAPStatus delete_alter_task(); + OLAPStatus set_alter_state(AlterTabletState alter_state); + + // rowsetid is not globally unique, it is tablet level + // it saves the batch end id into meta env + OLAPStatus get_next_rowset_id(RowsetId* rowset_id, DataDir* data_dir); + + OLAPStatus set_next_rowset_id(RowsetId new_rowset_id, DataDir* data_dir); + + RowsetId get_cur_rowset_id(); + + std::string full_name() const; + + OLAPStatus set_partition_id(int64_t partition_id); + + RowsetId initial_end_rowset_id() { + return _initial_end_rowset_id; + } + +private: + OLAPStatus _save_meta(DataDir* data_dir); + +private: + int64_t _table_id; + int64_t _partition_id; + int64_t _tablet_id; + int32_t _schema_hash; + int32_t _shard_id; + int64_t _creation_time; + int64_t _cumulative_layer_point; + TabletUid _tablet_uid; + RowsetId _next_rowset_id = 10000; + RowsetId _end_rowset_id; + RowsetId _initial_end_rowset_id; + RowsetId _batch_interval = 10000; + + + TabletState _tablet_state; + TabletSchema _schema; + vector _rs_metas; + vector _inc_rs_metas; + DelPredicateArray _del_pred_array; + AlterTabletTaskSharedPtr _alter_task; + bool _in_restore_mode = false; + + RWMutex _meta_lock; +}; + +inline const int64_t TabletMeta::table_id() const { + return _table_id; +} + +inline const int64_t TabletMeta::partition_id() const { + return _partition_id; +} + +inline const int64_t TabletMeta::tablet_id() const { + return _tablet_id; +} + +inline const int32_t TabletMeta::schema_hash() const { + return _schema_hash; +} + +inline const int16_t TabletMeta::shard_id() const { + return _shard_id; +} + +inline void TabletMeta::set_shard_id(int32_t shard_id) { + _shard_id = shard_id; +} + +inline int64_t TabletMeta::creation_time() const { + return _creation_time; +} + +inline void TabletMeta::set_creation_time(int64_t creation_time) { + _creation_time = creation_time; +} + +inline int64_t TabletMeta::cumulative_layer_point() const { + return _cumulative_layer_point; +} + +inline void TabletMeta::set_cumulative_layer_point(int64_t new_point) { + _cumulative_layer_point = new_point; +} + +inline const size_t TabletMeta::num_rows() const { + size_t num_rows = 0; + for (auto& rs : _rs_metas) { + num_rows += rs->num_rows(); + } + return num_rows; +} + +inline const size_t TabletMeta::tablet_footprint() const { + size_t total_size = 0; + for (auto& rs : _rs_metas) { + total_size += rs->data_disk_size(); + } + return total_size; +} + +inline const size_t TabletMeta::version_count() const { + return _rs_metas.size(); +} + +inline const TabletState TabletMeta::tablet_state() const { + return _tablet_state; +} + +inline OLAPStatus TabletMeta::set_tablet_state(TabletState state) { + _tablet_state = state; + return OLAP_SUCCESS; +} + +inline const bool TabletMeta::in_restore_mode() const { + return _in_restore_mode; +} + +inline OLAPStatus TabletMeta::set_in_restore_mode(bool in_restore_mode) { + _in_restore_mode = in_restore_mode; + return OLAP_SUCCESS; +} + +inline const TabletSchema& TabletMeta::tablet_schema() const { + return _schema; +} + +inline const vector& TabletMeta::all_rs_metas() const { + return _rs_metas; +} + +inline const vector& TabletMeta::all_inc_rs_metas() const { + return _inc_rs_metas; +} + +} // namespace doris + +#endif // DORIS_BE_SRC_OLAP_OLAP_TABLET_META_H diff --git a/be/src/olap/tablet_meta_manager.cpp b/be/src/olap/tablet_meta_manager.cpp new file mode 100755 index 00000000000000..47d9773d3b47ab --- /dev/null +++ b/be/src/olap/tablet_meta_manager.cpp @@ -0,0 +1,177 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +#include "olap/tablet_meta_manager.h" + +#include +#include +#include +#include +#include + +#include "olap/olap_define.h" +#include "olap/storage_engine.h" +#include "olap/olap_meta.h" +#include "common/logging.h" +#include "json2pb/json_to_pb.h" +#include "json2pb/pb_to_json.h" + +using rocksdb::DB; +using rocksdb::DBOptions; +using rocksdb::ColumnFamilyDescriptor; +using rocksdb::ColumnFamilyHandle; +using rocksdb::ColumnFamilyOptions; +using rocksdb::ReadOptions; +using rocksdb::WriteOptions; +using rocksdb::Slice; +using rocksdb::Iterator; +using rocksdb::Status; +using rocksdb::kDefaultColumnFamilyName; + +namespace doris { + +OLAPStatus TabletMetaManager::get_meta( + DataDir* store, TTabletId tablet_id, + TSchemaHash schema_hash, + TabletMetaSharedPtr tablet_meta) { + OlapMeta* meta = store->get_meta(); + std::stringstream key_stream; + key_stream << HEADER_PREFIX << tablet_id << "_" << schema_hash; + std::string key = key_stream.str(); + std::string value; + OLAPStatus s = meta->get(META_COLUMN_FAMILY_INDEX, key, &value); + if (s == OLAP_ERR_META_KEY_NOT_FOUND) { + LOG(WARNING) << "tablet_id:" << tablet_id << ", schema_hash:" << schema_hash << " not found."; + return OLAP_ERR_META_KEY_NOT_FOUND; + } else if (s != OLAP_SUCCESS) { + LOG(WARNING) << "load tablet_id:" << tablet_id << ", schema_hash:" << schema_hash << " failed."; + return s; + } + return tablet_meta->deserialize(value); +} + +OLAPStatus TabletMetaManager::get_json_meta(DataDir* store, + TTabletId tablet_id, TSchemaHash schema_hash, std::string* json_meta) { + TabletMetaSharedPtr tablet_meta(new TabletMeta()); + OLAPStatus s = get_meta(store, tablet_id, schema_hash, tablet_meta); + if (s != OLAP_SUCCESS) { + return s; + } + json2pb::Pb2JsonOptions json_options; + json_options.pretty_json = true; + tablet_meta->to_json(json_meta, json_options); + return OLAP_SUCCESS; +} + +OLAPStatus TabletMetaManager::save(DataDir* store, + TTabletId tablet_id, TSchemaHash schema_hash, + TabletMetaSharedPtr tablet_meta, const string& header_prefix) { + std::stringstream key_stream; + key_stream << header_prefix << tablet_id << "_" << schema_hash; + std::string key = key_stream.str(); + std::string value; + tablet_meta->serialize(&value); + OlapMeta* meta = store->get_meta(); + LOG(INFO) << "save tablet meta" + << ", key:" << key + << ", meta length:" << value.length(); + return meta->put(META_COLUMN_FAMILY_INDEX, key, value); +} + +OLAPStatus TabletMetaManager::save(DataDir* store, + TTabletId tablet_id, TSchemaHash schema_hash, const std::string& meta_binary, const string& header_prefix) { + std::stringstream key_stream; + key_stream << header_prefix << tablet_id << "_" << schema_hash; + std::string key = key_stream.str(); + VLOG(3) << "save tablet meta to meta store: key = " << key; + OlapMeta* meta = store->get_meta(); + + TabletMetaPB de_tablet_meta_pb; + bool parsed = de_tablet_meta_pb.ParseFromString(meta_binary); + if (!parsed) { + LOG(FATAL) << "deserialize from previous serialize result failed"; + } + + LOG(INFO) << "save tablet meta " + << ", key:" << key + << " meta_size=" << meta_binary.length(); + return meta->put(META_COLUMN_FAMILY_INDEX, key, meta_binary); +} + +OLAPStatus TabletMetaManager::remove(DataDir* store, TTabletId tablet_id, TSchemaHash schema_hash, + const string& header_prefix) { + std::stringstream key_stream; + key_stream << header_prefix << tablet_id << "_" << schema_hash; + std::string key = key_stream.str(); + OlapMeta* meta = store->get_meta(); + LOG(INFO) << "start to remove tablet_meta, key:" << key; + OLAPStatus res = meta->remove(META_COLUMN_FAMILY_INDEX, key); + LOG(INFO) << "remove tablet_meta, key:" << key << ", res:" << res; + return res; +} + +OLAPStatus TabletMetaManager::traverse_headers(OlapMeta* meta, + std::function const& func, const string& header_prefix) { + auto traverse_header_func = [&func](const std::string& key, const std::string& value) -> bool { + std::vector parts; + // key format: "hdr_" + tablet_id + "_" + schema_hash + split_string(key, '_', &parts); + if (parts.size() != 3) { + LOG(WARNING) << "invalid tablet_meta key:" << key << ", splitted size:" << parts.size(); + return true; + } + TTabletId tablet_id = std::stol(parts[1].c_str(), nullptr, 10); + TSchemaHash schema_hash = std::stol(parts[2].c_str(), nullptr, 10); + return func(tablet_id, schema_hash, value); + }; + OLAPStatus status = meta->iterate(META_COLUMN_FAMILY_INDEX, header_prefix, traverse_header_func); + return status; +} + +OLAPStatus TabletMetaManager::load_json_meta(DataDir* store, const std::string& meta_path) { + std::ifstream infile(meta_path); + char buffer[102400]; + std::string json_meta; + while (!infile.eof()) { + infile.getline(buffer, 102400); + json_meta = json_meta + buffer; + } + boost::algorithm::trim(json_meta); + TabletMetaPB tablet_meta_pb; + bool ret = json2pb::JsonToProtoMessage(json_meta, &tablet_meta_pb); + if (!ret) { + return OLAP_ERR_HEADER_LOAD_JSON_HEADER; + } + + std::string meta_binary; + tablet_meta_pb.SerializeToString(&meta_binary); + TTabletId tablet_id = tablet_meta_pb.tablet_id(); + TSchemaHash schema_hash = tablet_meta_pb.schema_hash(); + return save(store, tablet_id, schema_hash, meta_binary); +} + +OLAPStatus TabletMetaManager::dump_header(DataDir* store, TTabletId tablet_id, + TSchemaHash schema_hash, const std::string& dump_path) { + TabletMetaSharedPtr tablet_meta(new TabletMeta()); + OLAPStatus res = TabletMetaManager::get_meta(store, tablet_id, schema_hash, tablet_meta); + if (res != OLAP_SUCCESS) { + return res; + } + return tablet_meta->save(dump_path); +} + +} diff --git a/be/src/olap/tablet_meta_manager.h b/be/src/olap/tablet_meta_manager.h new file mode 100644 index 00000000000000..3898961914e3cd --- /dev/null +++ b/be/src/olap/tablet_meta_manager.h @@ -0,0 +1,61 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +#ifndef DORIS_BE_SRC_OLAP_TABLET_META_MANAGER_H +#define DORIS_BE_SRC_OLAP_TABLET_META_MANAGER_H + +#include + +#include "olap/tablet_meta.h" +#include "olap/olap_define.h" +#include "olap/data_dir.h" + +namespace doris { + +const std::string OLD_HEADER_PREFIX = "hdr_"; + +const std::string HEADER_PREFIX = "tabletmeta_"; + +// Helper Class for managing tablet headers of one root path. +class TabletMetaManager { +public: + static OLAPStatus get_meta(DataDir* store, TTabletId tablet_id, + TSchemaHash schema_hash, TabletMetaSharedPtr tablet_meta); + + static OLAPStatus get_json_meta(DataDir* store, TTabletId tablet_id, + TSchemaHash schema_hash, std::string* json_meta); + + static OLAPStatus save(DataDir* store, TTabletId tablet_id, TSchemaHash schema_hash, + TabletMetaSharedPtr tablet_meta, const string& header_prefix = "tabletmeta_"); + static OLAPStatus save(DataDir* store, TTabletId tablet_id, TSchemaHash schema_hash, + const std::string& meta_binary, const string& header_prefix = "tabletmeta_"); + + static OLAPStatus remove(DataDir* store, TTabletId tablet_id, TSchemaHash schema_hash, + const string& header_prefix = "tabletmeta_"); + + static OLAPStatus traverse_headers(OlapMeta* meta, + std::function const& func, const string& header_prefix = "tabletmeta_"); + + static OLAPStatus load_json_meta(DataDir* store, const std::string& meta_path); + + static OLAPStatus dump_header(DataDir* store, TTabletId tablet_id, + TSchemaHash schema_hash, const std::string& path); +}; + +} + +#endif // DORIS_BE_SRC_OLAP_TABLET_META_MANAGER_H diff --git a/be/src/olap/tablet_schema.cpp b/be/src/olap/tablet_schema.cpp new file mode 100644 index 00000000000000..d27e34d132ed2d --- /dev/null +++ b/be/src/olap/tablet_schema.cpp @@ -0,0 +1,179 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +#include + +#include "olap/tablet_schema.h" +#include "olap/field_info.h" + +namespace doris { + +TabletColumn::TabletColumn() {} + +TabletColumn::TabletColumn(FieldAggregationMethod agg, FieldType type) { + _aggregation = agg; + _type = type; +} + +OLAPStatus TabletColumn::init_from_pb(const ColumnPB& column) { + _unique_id = column.unique_id(); + _col_name = column.name(); + _type = FieldInfo::get_field_type_by_string(column.type()); + _is_key = column.is_key(); + _is_nullable = column.is_nullable(); + + _has_default_value = column.has_default_value(); + if (_has_default_value) { + _default_value = column.default_value(); + } + + if (column.has_precision()) { + _is_decimal = true; + _precision = column.precision(); + } else { + _is_decimal = false; + } + if (column.has_frac()) { + _frac = column.frac(); + } + _length = column.length(); + _index_length = column.index_length(); + if (column.has_is_bf_column()) { + _is_bf_column = column.is_bf_column(); + } else { + _is_bf_column = false; + } + _has_referenced_column = column.has_referenced_column_id(); + if (_has_referenced_column) { + _referenced_column_id = column.referenced_column_id(); + } + if (column.has_aggregation()) { + _aggregation = FieldInfo::get_aggregation_type_by_string(column.aggregation()); + } + return OLAP_SUCCESS; +} + +OLAPStatus TabletColumn::to_schema_pb(ColumnPB* column) { + column->set_unique_id(_unique_id); + column->set_name(_col_name); + column->set_type(FieldInfo::get_string_by_field_type(_type)); + column->set_is_key(_is_key); + column->set_is_nullable(_is_nullable); + if (_has_default_value) { + column->set_default_value(_default_value); + } + if (_is_decimal) { + column->set_precision(_precision); + column->set_frac(_frac); + } + column->set_length(_length); + column->set_index_length(_index_length); + if (_is_bf_column) { + column->set_is_bf_column(_is_bf_column); + } + column->set_aggregation(FieldInfo::get_string_by_aggregation_type(_aggregation)); + if (_has_referenced_column) { + column->set_referenced_column_id(_referenced_column_id); + } + return OLAP_SUCCESS; +} + +TabletSchema::TabletSchema() + : _num_columns(0), + _num_key_columns(0), + _num_null_columns(0), + _num_short_key_columns(0) { } + +OLAPStatus TabletSchema::init_from_pb(const TabletSchemaPB& schema) { + _keys_type = schema.keys_type(); + for (auto& column_pb : schema.column()) { + TabletColumn column; + column.init_from_pb(column_pb); + _cols.push_back(column); + _num_columns++; + if (column.is_key()) { + _num_key_columns++; + } + if (column.is_nullable()) { + _num_null_columns++; + } + } + _num_short_key_columns = schema.num_short_key_columns(); + _num_rows_per_row_block = schema.num_rows_per_row_block(); + _compress_kind = schema.compress_kind(); + _next_column_unique_id = schema.next_column_unique_id(); + if (schema.has_bf_fpp()) { + _has_bf_fpp = true; + _bf_fpp = schema.bf_fpp(); + } else { + _has_bf_fpp = false; + _bf_fpp = BLOOM_FILTER_DEFAULT_FPP; + } + return OLAP_SUCCESS; +} + +OLAPStatus TabletSchema::to_schema_pb(TabletSchemaPB* tablet_meta_pb) { + tablet_meta_pb->set_keys_type(_keys_type); + for (auto& col : _cols) { + ColumnPB* column = tablet_meta_pb->add_column(); + col.to_schema_pb(column); + } + tablet_meta_pb->set_num_short_key_columns(_num_short_key_columns); + tablet_meta_pb->set_num_rows_per_row_block(_num_rows_per_row_block); + tablet_meta_pb->set_compress_kind(_compress_kind); + if (_has_bf_fpp) { + tablet_meta_pb->set_bf_fpp(_bf_fpp); + } + tablet_meta_pb->set_next_column_unique_id(_next_column_unique_id); + + return OLAP_SUCCESS; +} + +size_t TabletSchema::row_size() const { + size_t size = 0; + for (auto& column : _cols) { + size += column.length(); + } + size += (_num_columns + 7) / 8; + + return size; +} + +size_t TabletSchema::field_index(const std::string& field_name) const { + bool field_exist = false; + int ordinal = -1; + for (auto& column : _cols) { + ordinal++; + if (column.name() == field_name) { + field_exist = true; + break; + } + } + return field_exist ? ordinal : -1; +} + +const std::vector& TabletSchema::columns() const { + return _cols; +} + +const TabletColumn& TabletSchema::column(size_t ordinal) const { + DCHECK(ordinal < _num_columns) + << "ordinal:" << ordinal << ", _num_columns:" << _num_columns; + return _cols[ordinal]; +} + +} // namespace doris diff --git a/be/src/olap/tablet_schema.h b/be/src/olap/tablet_schema.h new file mode 100644 index 00000000000000..64722bf715aeb4 --- /dev/null +++ b/be/src/olap/tablet_schema.h @@ -0,0 +1,112 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +#ifndef DORIS_BE_SRC_OLAP_TABLET_SCHEMA_H +#define DORIS_BE_SRC_OLAP_TABLET_SCHEMA_H + +#include + +#include "gen_cpp/olap_file.pb.h" +#include "olap/olap_define.h" +#include "olap/types.h" + +namespace doris { + +class TabletColumn { +public: + TabletColumn(); + TabletColumn(FieldAggregationMethod agg, FieldType type); + OLAPStatus init_from_pb(const ColumnPB& column); + OLAPStatus to_schema_pb(ColumnPB* column); + + inline int32_t unique_id() const { return _unique_id; } + inline std::string name() const { return _col_name; } + inline FieldType type() const { return _type; } + inline bool is_key() const { return _is_key; } + inline bool is_nullable() const { return _is_nullable; } + inline bool is_bf_column() const { return _is_bf_column; } + bool has_default_value() const { return _has_default_value; } + std::string default_value() const { return _default_value; } + bool has_reference_column() const { return _has_referenced_column; } + int32_t referenced_column_id() const { return _referenced_column_id; } + std::string referenced_column() const { return _referenced_column; } + size_t length() const { return _length; } + size_t index_length() const { return _index_length; } + FieldAggregationMethod aggregation() const { return _aggregation; } + int precision() const { return _precision; } + int frac() const { return _frac; } +private: + int32_t _unique_id; + std::string _col_name; + FieldType _type; + bool _is_key; + FieldAggregationMethod _aggregation; + bool _is_nullable; + + bool _has_default_value; + std::string _default_value; + + bool _is_decimal; + int32_t _precision; + int32_t _frac; + + int32_t _length; + int32_t _index_length; + + bool _is_bf_column; + + bool _has_referenced_column; + int32_t _referenced_column_id; + std::string _referenced_column; +}; + +class TabletSchema { +public: + TabletSchema(); + OLAPStatus init_from_pb(const TabletSchemaPB& schema); + OLAPStatus to_schema_pb(TabletSchemaPB* tablet_meta_pb); + size_t row_size() const; + size_t field_index(const std::string& field_name) const; + const TabletColumn& column(size_t ordinal) const; + const std::vector& columns() const; + inline size_t num_columns() const { return _num_columns; } + inline size_t num_key_columns() const { return _num_key_columns; } + inline size_t num_null_columns() const { return _num_null_columns; } + inline size_t num_short_key_columns() const { return _num_short_key_columns; } + inline size_t num_rows_per_row_block() const { return _num_rows_per_row_block; } + inline KeysType keys_type() const { return _keys_type; } + inline CompressKind compress_kind() const { return _compress_kind; } + inline size_t next_column_unique_id() const { return _next_column_unique_id; } + inline double bloom_filter_fpp() const { return _bf_fpp; } +private: + KeysType _keys_type; + std::vector _cols; + size_t _num_columns; + size_t _num_key_columns; + size_t _num_null_columns; + size_t _num_short_key_columns; + size_t _num_rows_per_row_block; + CompressKind _compress_kind; + size_t _next_column_unique_id; + + bool _has_bf_fpp; + double _bf_fpp; +}; + +} // namespace doris + +#endif // DORIS_BE_SRC_OLAP_TABLET_SCHEMA_H diff --git a/be/src/olap/task/engine_batch_load_task.cpp b/be/src/olap/task/engine_batch_load_task.cpp new file mode 100644 index 00000000000000..6126919d59275f --- /dev/null +++ b/be/src/olap/task/engine_batch_load_task.cpp @@ -0,0 +1,382 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +#include "olap/task/engine_batch_load_task.h" +#include +#include +#include +#include +#include +#include +#include +#include "boost/filesystem.hpp" +#include "boost/lexical_cast.hpp" +#include "agent/cgroups_mgr.h" +#include "gen_cpp/AgentService_types.h" +#include "http/http_client.h" +#include "olap/olap_common.h" +#include "olap/olap_define.h" +#include "olap/push_handler.h" +#include "olap/storage_engine.h" +#include "olap/tablet.h" +#include "util/doris_metrics.h" +#include "util/pretty_printer.h" + +using apache::thrift::ThriftDebugString; +using std::list; +using std::string; +using std::vector; + +namespace doris { + +EngineBatchLoadTask::EngineBatchLoadTask(TPushReq& push_req, + std::vector* tablet_infos, + int64_t signature, + AgentStatus* res_status) : + _push_req(push_req), + _tablet_infos(tablet_infos), + _signature(signature), + _res_status(res_status) { + _download_status = DORIS_SUCCESS; +} + +EngineBatchLoadTask::~EngineBatchLoadTask() { +} + +OLAPStatus EngineBatchLoadTask::execute() { + AgentStatus status = DORIS_SUCCESS; + if (_push_req.push_type == TPushType::LOAD || _push_req.push_type == TPushType::LOAD_DELETE) { + status = _init(); + if (status == DORIS_SUCCESS) { + uint32_t retry_time = 0; + while (retry_time < PUSH_MAX_RETRY) { + status = _process(); + + if (status == DORIS_PUSH_HAD_LOADED) { + OLAP_LOG_WARNING("transaction exists when realtime push, " + "but unfinished, do not report to fe, signature: %ld", + _signature); + break; // not retry any more + } + // Internal error, need retry + if (status == DORIS_ERROR) { + OLAP_LOG_WARNING("push internal error, need retry.signature: %ld", + _signature); + retry_time += 1; + } else { + break; + } + } + } + } else if (_push_req.push_type == TPushType::DELETE) { + OLAPStatus delete_data_status = _delete_data(_push_req, _tablet_infos); + if (delete_data_status != OLAPStatus::OLAP_SUCCESS) { + OLAP_LOG_WARNING("delete data failed. status: %d, signature: %ld", + delete_data_status, _signature); + status = DORIS_ERROR; + } + } else { + status = DORIS_TASK_REQUEST_ERROR; + } + *_res_status = status; + return OLAP_SUCCESS; +} + +AgentStatus EngineBatchLoadTask::_init() { + AgentStatus status = DORIS_SUCCESS; + + if (_is_init) { + VLOG(3) << "has been inited"; + return status; + } + + // Check replica exist + TabletSharedPtr tablet; + tablet = StorageEngine::instance()->tablet_manager()->get_tablet( + _push_req.tablet_id, + _push_req.schema_hash); + if (tablet == nullptr) { + LOG(WARNING) << "get tables failed. " + << "tablet_id: " << _push_req.tablet_id + << ", schema_hash: " << _push_req.schema_hash; + return DORIS_PUSH_INVALID_TABLE; + } + + // Empty remote_path + if (!_push_req.__isset.http_file_path) { + _is_init = true; + return status; + } + + // Check remote path + _remote_file_path = _push_req.http_file_path; + LOG(INFO) << "start get file. remote_file_path: " << _remote_file_path; + // Set download param + string tmp_file_dir; + string root_path = tablet->data_dir()->path(); + status = _get_tmp_file_dir(root_path, &tmp_file_dir); + + if (status != DORIS_SUCCESS) { + LOG(WARNING) << "get local path failed. tmp file dir: " << tmp_file_dir; + return status; + } + string tmp_file_name; + _get_file_name_from_path(_push_req.http_file_path, &tmp_file_name); + _local_file_path = tmp_file_dir + "/" + tmp_file_name; + _is_init = true; + return status; +} + +// Get replica root path +AgentStatus EngineBatchLoadTask::_get_tmp_file_dir(const string& root_path, string* download_path) { + AgentStatus status = DORIS_SUCCESS; + *download_path = root_path + DPP_PREFIX; + + // Check path exist + boost::filesystem::path full_path(*download_path); + + if (!boost::filesystem::exists(full_path)) { + LOG(INFO) << "download dir not exist: " << *download_path; + boost::system::error_code error_code; + boost::filesystem::create_directories(*download_path, error_code); + + if (0 != error_code) { + status = DORIS_ERROR; + LOG(WARNING) << "create download dir failed.path: " + << *download_path << ", error code: " << error_code; + } + } + + return status; +} + +void EngineBatchLoadTask::_get_file_name_from_path(const string& file_path, string* file_name) { + size_t found = file_path.find_last_of("/\\"); + pthread_t tid = pthread_self(); + *file_name = file_path.substr(found + 1) + "_" + boost::lexical_cast(tid); +} + +AgentStatus EngineBatchLoadTask::_process() { + AgentStatus status = DORIS_SUCCESS; + if (!_is_init) { + LOG(WARNING) << "has not init yet. tablet_id: " + << _push_req.tablet_id; + return DORIS_ERROR; + } + // Remote file not empty, need to download + if (_push_req.__isset.http_file_path) { + // Get file length and timeout + uint64_t file_size = 0; + uint64_t estimate_time_out = DEFAULT_DOWNLOAD_TIMEOUT; + if (_push_req.__isset.http_file_size) { + file_size = _push_req.http_file_size; + estimate_time_out = file_size / config::download_low_speed_limit_kbps / 1024; + } + if (estimate_time_out < config::download_low_speed_time) { + estimate_time_out = config::download_low_speed_time; + } + bool is_timeout = false; + auto download_cb = [this, estimate_time_out, file_size, &is_timeout] (HttpClient* client) { + // Check timeout and set timeout + time_t now = time(NULL); + if (_push_req.timeout > 0 && _push_req.timeout < now) { + // return status to break this callback + VLOG(3) << "check time out. time_out:" << _push_req.timeout + << ", now:" << now; + is_timeout = true; + return Status::OK(); + } + + RETURN_IF_ERROR(client->init(_remote_file_path)); + // sent timeout + uint64_t timeout = _push_req.timeout > 0 ? _push_req.timeout - now : 0; + if (timeout > 0 && timeout < estimate_time_out) { + client->set_timeout_ms(timeout * 1000); + } else { + client->set_timeout_ms(estimate_time_out * 1000); + } + + // download remote file + RETURN_IF_ERROR(client->download(_local_file_path)); + + // check file size + if (_push_req.__isset.http_file_size) { + // Check file size + uint64_t local_file_size = boost::filesystem::file_size(_local_file_path); + if (file_size != local_file_size) { + LOG(WARNING) << "download_file size error. file_size=" << file_size + << ", local_file_size=" << local_file_size; + return Status::InternalError("downloaded file's size isn't right"); + } + } + // NOTE: change http_file_path is not good design + _push_req.http_file_path = _local_file_path; + return Status::OK(); + }; + + MonotonicStopWatch stopwatch; + stopwatch.start(); + auto st = HttpClient::execute_with_retry(MAX_RETRY, 1, download_cb); + auto cost = stopwatch.elapsed_time(); + if (cost <= 0) { + cost = 1; + } + if (st.ok() && !is_timeout) { + double rate = -1.0; + if (_push_req.__isset.http_file_size) { + rate = (double) _push_req.http_file_size / (cost / 1000 / 1000 / 1000) / 1024; + } + LOG(INFO) << "down load file success. local_file=" << _local_file_path + << ", remote_file=" << _remote_file_path + << ", tablet_id" << _push_req.tablet_id + << ", cost=" << cost / 1000 << "us, file_size" << _push_req.http_file_size + << ", download rage:" << rate << "KB/s"; + } else { + LOG(WARNING) << "down load file failed. remote_file=" << _remote_file_path + << ", tablet=" << _push_req.tablet_id + << ", cost=" << cost / 1000 + << "us, errmsg=" << st.get_error_msg() << ", is_timeout=" << is_timeout; + status = DORIS_ERROR; + } + } + + if (status == DORIS_SUCCESS) { + // Load delta file + time_t push_begin = time(NULL); + OLAPStatus push_status = _push(_push_req, _tablet_infos); + time_t push_finish = time(NULL); + LOG(INFO) << "Push finish, cost time: " << (push_finish - push_begin); + if (push_status == OLAPStatus::OLAP_ERR_PUSH_TRANSACTION_ALREADY_EXIST) { + status = DORIS_PUSH_HAD_LOADED; + } else if (push_status != OLAPStatus::OLAP_SUCCESS) { + status = DORIS_ERROR; + } + } + + // Delete download file + if (boost::filesystem::exists(_local_file_path)) { + if (remove(_local_file_path.c_str()) == -1) { + LOG(WARNING) << "can not remove file=" << _local_file_path; + } + } + + return status; +} + +OLAPStatus EngineBatchLoadTask::_push(const TPushReq& request, + vector* tablet_info_vec) { + OLAPStatus res = OLAP_SUCCESS; + LOG(INFO) << "begin to process push. " + << " transaction_id=" << request.transaction_id + << " tablet_id=" << request.tablet_id + << ", version=" << request.version; + + if (tablet_info_vec == nullptr) { + LOG(WARNING) << "invalid output parameter which is nullptr pointer."; + DorisMetrics::push_requests_fail_total.increment(1); + return OLAP_ERR_CE_CMD_PARAMS_ERROR; + } + + TabletSharedPtr tablet = StorageEngine::instance()->tablet_manager()->get_tablet( + request.tablet_id, request.schema_hash); + if (tablet == nullptr) { + LOG(WARNING) << "false to find tablet. tablet=" << request.tablet_id + << ", schema_hash=" << request.schema_hash; + DorisMetrics::push_requests_fail_total.increment(1); + return OLAP_ERR_TABLE_NOT_FOUND; + } + + PushType type = PUSH_NORMAL; + if (request.push_type == TPushType::LOAD_DELETE) { + type = PUSH_FOR_LOAD_DELETE; + } + + int64_t duration_ns = 0; + PushHandler push_handler; + if (request.__isset.transaction_id) { + { + SCOPED_RAW_TIMER(&duration_ns); + res = push_handler.process_streaming_ingestion(tablet, request, type, tablet_info_vec); + } + } else { + { + SCOPED_RAW_TIMER(&duration_ns); + res = OLAP_ERR_PUSH_BATCH_PROCESS_REMOVED; + } + } + + if (res != OLAP_SUCCESS) { + LOG(WARNING) << "fail to push delta, " + << "transaction_id=" << request.transaction_id + << " tablet=" << tablet->full_name() + << ", cost=" << PrettyPrinter::print(duration_ns, TUnit::TIME_NS); + DorisMetrics::push_requests_fail_total.increment(1); + } else { + LOG(INFO) << "success to push delta, " + << "transaction_id=" << request.transaction_id + << " tablet=" << tablet->full_name() + << ", cost=" << PrettyPrinter::print(duration_ns, TUnit::TIME_NS); + DorisMetrics::push_requests_success_total.increment(1); + DorisMetrics::push_request_duration_us.increment(duration_ns / 1000); + DorisMetrics::push_request_write_bytes.increment(push_handler.write_bytes()); + DorisMetrics::push_request_write_rows.increment(push_handler.write_rows()); + } + return res; +} + +OLAPStatus EngineBatchLoadTask::_delete_data( + const TPushReq& request, + vector* tablet_info_vec) { + LOG(INFO) << "begin to process delete data. request=" << ThriftDebugString(request); + DorisMetrics::delete_requests_total.increment(1); + + OLAPStatus res = OLAP_SUCCESS; + + if (tablet_info_vec == nullptr) { + LOG(WARNING) << "invalid tablet info parameter which is nullptr pointer."; + return OLAP_ERR_CE_CMD_PARAMS_ERROR; + } + + // 1. Get all tablets with same tablet_id + TabletSharedPtr tablet = StorageEngine::instance()->tablet_manager()->get_tablet(request.tablet_id, request.schema_hash); + if (tablet == nullptr) { + LOG(WARNING) << "can't find tablet. tablet=" << request.tablet_id + << ", schema_hash=" << request.schema_hash; + return OLAP_ERR_TABLE_NOT_FOUND; + } + + // 2. Process delete data by push interface + PushHandler push_handler; + if (request.__isset.transaction_id) { + res = push_handler.process_streaming_ingestion(tablet, request, PUSH_FOR_DELETE, tablet_info_vec); + } else { + res = OLAP_ERR_PUSH_BATCH_PROCESS_REMOVED; + } + + if (res != OLAP_SUCCESS) { + OLAP_LOG_WARNING("fail to push empty version for delete data. " + "[res=%d tablet='%s']", + res, tablet->full_name().c_str()); + DorisMetrics::delete_requests_failed.increment(1); + return res; + } + + LOG(INFO) << "finish to process delete data. res=" << res; + return res; +} + +} // namespace doris diff --git a/be/src/agent/pusher.h b/be/src/olap/task/engine_batch_load_task.h similarity index 51% rename from be/src/agent/pusher.h rename to be/src/olap/task/engine_batch_load_task.h index 56b9b7d63e9e42..51a003d3b4fc6d 100644 --- a/be/src/agent/pusher.h +++ b/be/src/olap/task/engine_batch_load_task.h @@ -15,8 +15,8 @@ // specific language governing permissions and limitations // under the License. -#ifndef DORIS_BE_SRC_AGENT_PUSHER_H -#define DORIS_BE_SRC_AGENT_PUSHER_H +#ifndef DORIS_BE_SRC_OLAP_TASK_ENGINE_BATCH_LOAD_TASK_H +#define DORIS_BE_SRC_OLAP_TASK_ENGINE_BATCH_LOAD_TASK_H #include #include @@ -25,37 +25,59 @@ #include "gen_cpp/MasterService_types.h" #include "olap/olap_common.h" #include "olap/olap_define.h" +#include "olap/task/engine_task.h" + +using namespace std; namespace doris { +const uint32_t PUSH_MAX_RETRY = 1; const uint32_t MAX_RETRY = 3; const uint32_t DEFAULT_DOWNLOAD_TIMEOUT = 3600; -class OLAPEngine; +class StorageEngine; -class Pusher { +class EngineBatchLoadTask : public EngineTask{ public: - explicit Pusher(OLAPEngine* engine, const TPushReq& push_req); - virtual ~Pusher(); - + EngineBatchLoadTask(TPushReq& push_req, std::vector* tablet_infos, + int64_t signature, AgentStatus* res_status); + virtual ~EngineBatchLoadTask(); + + virtual OLAPStatus execute(); + +private: // The initial function of pusher - virtual AgentStatus init(); + virtual AgentStatus _init(); // The process of push data to olap engine // // Output parameters: // * tablet_infos: The info of pushed tablet after push data - virtual AgentStatus process(std::vector* tablet_infos); + virtual AgentStatus _process(); + + // Delete data of specified tablet according to delete conditions, + // once delete_data command submit success, deleted data is not visible, + // but not actually deleted util delay_delete_time run out. + // + // @param [in] request specify tablet and delete conditions + // @param [out] tablet_info_vec return tablet lastest status, which + // include version info, row count, data size, etc + // @return OLAP_SUCCESS if submit delete_data success + virtual OLAPStatus _delete_data(const TPushReq& request, + vector* tablet_info_vec); -private: AgentStatus _get_tmp_file_dir(const std::string& root_path, std::string* local_path); + OLAPStatus _push(const TPushReq& request, + std::vector* tablet_info_vec); void _get_file_name_from_path(const std::string& file_path, std::string* file_name); - TPushReq _push_req; - OLAPEngine* _engine; + bool _is_init = false; + TPushReq& _push_req; + std::vector* _tablet_infos; + int64_t _signature; + AgentStatus _download_status; + AgentStatus* _res_status; std::string _remote_file_path; std::string _local_file_path; - - DISALLOW_COPY_AND_ASSIGN(Pusher); }; // class Pusher } // namespace doris -#endif // DORIS_BE_SRC_AGENT_SERVICE_PUSHER_H +#endif // DORIS_BE_SRC_OLAP_TASK_ENGINE_BATCH_LOAD_TASK_H diff --git a/be/src/olap/task/engine_checksum_task.cpp b/be/src/olap/task/engine_checksum_task.cpp new file mode 100644 index 00000000000000..823d2bf2765591 --- /dev/null +++ b/be/src/olap/task/engine_checksum_task.cpp @@ -0,0 +1,133 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +#include "olap/task/engine_checksum_task.h" + +#include "olap/reader.h" + +namespace doris { + +EngineChecksumTask::EngineChecksumTask(TTabletId tablet_id, TSchemaHash schema_hash, + TVersion version, TVersionHash version_hash, uint32_t* checksum) + :_tablet_id(tablet_id), + _schema_hash(schema_hash), + _version(version), + _version_hash(version_hash), + _checksum(checksum) { + +} + +OLAPStatus EngineChecksumTask::execute() { + OLAPStatus res = _compute_checksum(); + return res; +} // execute + + +OLAPStatus EngineChecksumTask::_compute_checksum() { + LOG(INFO) << "begin to process compute checksum." + << "tablet_id=" << _tablet_id + << ", schema_hash=" << _schema_hash + << ", version=" << _version; + OLAPStatus res = OLAP_SUCCESS; + + if (_checksum == NULL) { + OLAP_LOG_WARNING("invalid output parameter which is null pointer."); + return OLAP_ERR_CE_CMD_PARAMS_ERROR; + } + + TabletSharedPtr tablet = StorageEngine::instance()->tablet_manager()->get_tablet(_tablet_id, _schema_hash); + if (NULL == tablet.get()) { + OLAP_LOG_WARNING("can't find tablet. [tablet_id=%ld schema_hash=%d]", + _tablet_id, _schema_hash); + return OLAP_ERR_TABLE_NOT_FOUND; + } + + + Reader reader; + ReaderParams reader_params; + reader_params.tablet = tablet; + reader_params.reader_type = READER_CHECKSUM; + reader_params.version = Version(0, _version); + + { + ReadLock rdlock(tablet->get_header_lock_ptr()); + const RowsetSharedPtr message = tablet->rowset_with_max_version(); + if (message == NULL) { + LOG(FATAL) << "fail to get latest version. tablet_id=" << _tablet_id; + return OLAP_ERR_VERSION_NOT_EXIST; + } + + if (message->end_version() == _version + && message->version_hash() != _version_hash) { + OLAP_LOG_WARNING("fail to check latest version hash. " + "[res=%d tablet_id=%ld version_hash=%ld request_version_hash=%ld]", + res, _tablet_id, message->version_hash(), _version_hash); + return OLAP_ERR_CE_CMD_PARAMS_ERROR; + } + OLAPStatus acquire_reader_st = tablet->capture_rs_readers(reader_params.version, &reader_params.rs_readers); + if (acquire_reader_st != OLAP_SUCCESS) { + LOG(WARNING) << "fail to init reader. tablet=" << tablet->full_name() + << "res=" << acquire_reader_st; + return acquire_reader_st; + } + } + + // ignore float and double type considering to precision lose + for (size_t i = 0; i < tablet->tablet_schema().num_columns(); ++i) { + FieldType type = tablet->tablet_schema().column(i).type(); + if (type == OLAP_FIELD_TYPE_FLOAT || type == OLAP_FIELD_TYPE_DOUBLE) { + continue; + } + + reader_params.return_columns.push_back(i); + } + + res = reader.init(reader_params); + if (res != OLAP_SUCCESS) { + OLAP_LOG_WARNING("initiate reader fail. [res=%d]", res); + return res; + } + + RowCursor row; + res = row.init(tablet->tablet_schema(), reader_params.return_columns); + if (res != OLAP_SUCCESS) { + OLAP_LOG_WARNING("failed to init row cursor. [res=%d]", res); + return res; + } + row.allocate_memory_for_string_type(tablet->tablet_schema()); + + bool eof = false; + uint32_t row_checksum = 0; + while (true) { + OLAPStatus res = reader.next_row_with_aggregation(&row, &eof); + if (res == OLAP_SUCCESS && eof) { + VLOG(3) << "reader reads to the end."; + break; + } else if (res != OLAP_SUCCESS) { + OLAP_LOG_WARNING("fail to read in reader. [res=%d]", res); + return res; + } + + row_checksum = row.hash_code(row_checksum); + } + + LOG(INFO) << "success to finish compute checksum. checksum=" << row_checksum; + *_checksum = row_checksum; + return OLAP_SUCCESS; +} + +} // doris diff --git a/be/src/olap/task/engine_checksum_task.h b/be/src/olap/task/engine_checksum_task.h new file mode 100644 index 00000000000000..9a114435efd407 --- /dev/null +++ b/be/src/olap/task/engine_checksum_task.h @@ -0,0 +1,54 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +#ifndef DORIS_BE_SRC_OLAP_TASK_ENGINE_CHECKSUM_TASK_H +#define DORIS_BE_SRC_OLAP_TASK_ENGINE_CHECKSUM_TASK_H + +#include "gen_cpp/AgentService_types.h" +#include "olap/olap_define.h" +#include "olap/task/engine_task.h" + +namespace doris { + +// base class for storage engine +// add "Engine" as task prefix to prevent duplicate name with agent task +class EngineChecksumTask : public EngineTask { + +public: + virtual OLAPStatus execute(); + +public: + EngineChecksumTask(TTabletId tablet_id, TSchemaHash schema_hash, + TVersion version, + TVersionHash version_hash, + uint32_t* checksum); + + ~EngineChecksumTask() {} + +private: + OLAPStatus _compute_checksum(); + +private: + TTabletId _tablet_id; + TSchemaHash _schema_hash; + TVersion _version; + TVersionHash _version_hash; + uint32_t* _checksum; +}; // EngineTask + +} // doris +#endif //DORIS_BE_SRC_OLAP_TASK_ENGINE_CHECKSUM_TASK_H \ No newline at end of file diff --git a/be/src/olap/task/engine_clear_alter_task.cpp b/be/src/olap/task/engine_clear_alter_task.cpp new file mode 100644 index 00000000000000..0469fa60bc316e --- /dev/null +++ b/be/src/olap/task/engine_clear_alter_task.cpp @@ -0,0 +1,76 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +#include "olap/task/engine_clear_alter_task.h" + +namespace doris { + +EngineClearAlterTask::EngineClearAlterTask(const TClearAlterTaskRequest& request) + :_clear_alter_task_req(request) { } + +OLAPStatus EngineClearAlterTask::execute() { + return _clear_alter_task(_clear_alter_task_req.tablet_id, _clear_alter_task_req.schema_hash); +} + +OLAPStatus EngineClearAlterTask::_clear_alter_task(const TTabletId tablet_id, + const TSchemaHash schema_hash) { + LOG(INFO) << "begin to process clear alter task. tablet_id=" << tablet_id + << ", schema_hash=" << schema_hash; + TabletSharedPtr tablet = StorageEngine::instance()->tablet_manager()->get_tablet(tablet_id, schema_hash); + if (tablet == nullptr) { + LOG(WARNING) << "can't find tablet when process clear alter task." + << " tablet_id=" << tablet_id + << ", schema_hash=" << schema_hash; + return OLAP_SUCCESS; + } + + // get schema change info + AlterTabletTaskSharedPtr alter_task = tablet->alter_task(); + if (alter_task == nullptr) { + return OLAP_SUCCESS; + } + AlterTabletState alter_state = alter_task->alter_state(); + TTabletId related_tablet_id = alter_task->related_tablet_id(); + TSchemaHash related_schema_hash = alter_task->related_schema_hash(); + + if (alter_state == ALTER_PREPARED || alter_state == ALTER_RUNNING) { + LOG(WARNING) << "Alter task is not finished when processing clear alter task. " + << "tablet=" << tablet->full_name(); + return OLAP_ERR_PREVIOUS_SCHEMA_CHANGE_NOT_FINISHED; + } + + // clear schema change info + OLAPStatus res = tablet->protected_delete_alter_task(); + + // clear related tablet's schema change info + TabletSharedPtr related_tablet = StorageEngine::instance()->tablet_manager()->get_tablet(related_tablet_id, related_schema_hash); + if (related_tablet == nullptr) { + LOG(WARNING) << "related tablet not found when process clear alter task." + << " tablet_id=" << tablet_id << ", schema_hash=" << schema_hash + << ", related_tablet_id=" << related_tablet_id + << ", related_schema_hash=" << related_schema_hash; + } else { + res = related_tablet->protected_delete_alter_task(); + } + + LOG(INFO) << "finish to process clear alter task." + << "tablet_id=" << related_tablet_id + << ", schema_hash=" << related_schema_hash; + return res; +} + +} // doris diff --git a/be/src/olap/task/engine_clear_alter_task.h b/be/src/olap/task/engine_clear_alter_task.h new file mode 100644 index 00000000000000..bb45b4bff2f3e4 --- /dev/null +++ b/be/src/olap/task/engine_clear_alter_task.h @@ -0,0 +1,48 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +#ifndef DORIS_BE_SRC_OLAP_TASK_ENGINE_CLEAR_ALTER_TASK_H +#define DORIS_BE_SRC_OLAP_TASK_ENGINE_CLEAR_ALTER_TASK_H + +#include "gen_cpp/AgentService_types.h" +#include "olap/olap_define.h" +#include "olap/task/engine_task.h" + +namespace doris { + +// base class for storage engine +// add "Engine" as task prefix to prevent duplicate name with agent task +class EngineClearAlterTask : public EngineTask { + +public: + virtual OLAPStatus execute(); + +public: + EngineClearAlterTask(const TClearAlterTaskRequest& request); + ~EngineClearAlterTask() {} + +private: + OLAPStatus _clear_alter_task(const TTabletId tablet_id, + const TSchemaHash schema_hash); + +private: + const TClearAlterTaskRequest& _clear_alter_task_req; + +}; // EngineTask + +} // doris +#endif //DORIS_BE_SRC_OLAP_TASK_ENGINE_CLEAR_ALTER_TASK_H \ No newline at end of file diff --git a/be/src/olap/task/engine_clone_task.cpp b/be/src/olap/task/engine_clone_task.cpp new file mode 100644 index 00000000000000..3fa14b5fdf925b --- /dev/null +++ b/be/src/olap/task/engine_clone_task.cpp @@ -0,0 +1,849 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +#include "olap/task/engine_clone_task.h" + +#include + +#include "http/http_client.h" +#include "olap/olap_snapshot_converter.h" +#include "olap/snapshot_manager.h" +#include "olap/rowset/alpha_rowset.h" +#include "olap/rowset/alpha_rowset_writer.h" +#include "olap/rowset/rowset.h" +#include "olap/rowset/rowset_id_generator.h" +#include "olap/rowset/rowset_writer.h" + +using std::set; +using std::stringstream; + +namespace doris { + +const std::string HTTP_REQUEST_PREFIX = "/api/_tablet/_download?"; +const std::string HTTP_REQUEST_TOKEN_PARAM = "token="; +const std::string HTTP_REQUEST_FILE_PARAM = "&file="; +const uint32_t DOWNLOAD_FILE_MAX_RETRY = 3; +const uint32_t LIST_REMOTE_FILE_TIMEOUT = 15; +const uint32_t GET_LENGTH_TIMEOUT = 10; + +EngineCloneTask::EngineCloneTask(const TCloneReq& clone_req, + const TMasterInfo& master_info, + int64_t signature, + vector* error_msgs, + vector* tablet_infos, + AgentStatus* res_status) : + _clone_req(clone_req), + _error_msgs(error_msgs), + _tablet_infos(tablet_infos), + _res_status(res_status), + _signature(signature), + _master_info(master_info) {} + +OLAPStatus EngineCloneTask::execute() { + AgentStatus status = DORIS_SUCCESS; + string src_file_path; + TBackend src_host; + // Check local tablet exist or not + TabletSharedPtr tablet = + StorageEngine::instance()->tablet_manager()->get_tablet( + _clone_req.tablet_id, _clone_req.schema_hash); + bool is_new_tablet = tablet == nullptr; + // try to repair a tablet with missing version + if (tablet != nullptr) { + ReadLock migration_rlock(tablet->get_migration_lock_ptr(), TRY_LOCK); + if (!migration_rlock.own_lock()) { + return OLAP_ERR_RWLOCK_ERROR; + } + LOG(INFO) << "clone tablet exist yet, begin to incremental clone. " + << "signature:" << _signature + << ", tablet_id:" << _clone_req.tablet_id + << ", schema_hash:" << _clone_req.schema_hash + << ", committed_version:" << _clone_req.committed_version; + + // get download path + string local_data_path = tablet->tablet_path() + CLONE_PREFIX; + bool allow_incremental_clone = false; + // check if current tablet has version == 2 and version hash == 0 + // version 2 may be an invalid rowset + Version clone_version = {_clone_req.committed_version, _clone_req.committed_version}; + RowsetSharedPtr clone_rowset = tablet->get_rowset_by_version(clone_version); + if (clone_rowset == nullptr || clone_rowset->version_hash() == _clone_req.committed_version_hash) { + // try to incremental clone + vector missed_versions; + tablet->calc_missed_versions(_clone_req.committed_version, &missed_versions); + LOG(INFO) << "finish to calculate missed versions when clone. " + << "tablet=" << tablet->full_name() + << ", committed_version=" << _clone_req.committed_version + << ", missed_versions_size=" << missed_versions.size(); + // if missed version size is 0, then it is useless to clone from remote be, it means local data is + // completed. Or remote be will just return header not the rowset files. clone will failed. + if (missed_versions.size() == 0) { + LOG(INFO) << "missed version size = 0, skip clone and return success"; + _set_tablet_info(DORIS_SUCCESS, is_new_tablet); + return OLAP_SUCCESS; + } + status = _clone_copy(*(tablet->data_dir()), _clone_req, _signature, local_data_path, + &src_host, &src_file_path, _error_msgs, + &missed_versions, + &allow_incremental_clone, + tablet); + } else { + LOG(INFO) << "current tablet has invalid rowset that's version == commit_version but version hash not equal" + << " clone req commit_version=" << _clone_req.committed_version + << " clone req commit_version_hash=" << _clone_req.committed_version_hash + << " cur rowset version=" << clone_rowset->version_hash() + << " tablet info = " << tablet->full_name(); + } + if (status == DORIS_SUCCESS && allow_incremental_clone) { + OLAPStatus olap_status = _finish_clone(tablet, local_data_path, _clone_req.committed_version, allow_incremental_clone); + if (olap_status != OLAP_SUCCESS) { + LOG(WARNING) << "failed to finish incremental clone. [table=" << tablet->full_name() + << " res=" << olap_status << "]"; + _error_msgs->push_back("incremental clone error."); + status = DORIS_ERROR; + } + } else { + // begin to full clone if incremental failed + LOG(INFO) << "begin to full clone. [table=" << tablet->full_name(); + status = _clone_copy(*(tablet->data_dir()), _clone_req, _signature, local_data_path, + &src_host, &src_file_path, _error_msgs, + NULL, NULL, tablet); + if (status == DORIS_SUCCESS) { + LOG(INFO) << "download successfully when full clone. [table=" << tablet->full_name() + << " src_host=" << src_host.host << " src_file_path=" << src_file_path + << " local_data_path=" << local_data_path << "]"; + + OLAPStatus olap_status = _finish_clone(tablet, local_data_path, _clone_req.committed_version, false); + + if (olap_status != OLAP_SUCCESS) { + LOG(WARNING) << "fail to finish full clone. [table=" << tablet->full_name() + << " res=" << olap_status << "]"; + _error_msgs->push_back("full clone error."); + status = DORIS_ERROR; + } + } + } + } else { + LOG(INFO) << "clone tablet not exist, begin clone a new tablet from remote be. " + << "signature:" << _signature + << ", tablet_id:" << _clone_req.tablet_id + << ", schema_hash:" << _clone_req.schema_hash + << ", committed_version:" << _clone_req.committed_version; + // create a new tablet in this be + // Get local disk from olap + string local_shard_root_path; + DataDir* store = nullptr; + OLAPStatus olap_status = StorageEngine::instance()->obtain_shard_path( + _clone_req.storage_medium, &local_shard_root_path, &store); + if (olap_status != OLAP_SUCCESS) { + LOG(WARNING) << "clone get local root path failed. signature: " << _signature; + _error_msgs->push_back("clone get local root path failed."); + status = DORIS_ERROR; + } + stringstream tablet_dir_stream; + tablet_dir_stream << local_shard_root_path + << "/" << _clone_req.tablet_id + << "/" << _clone_req.schema_hash; + + if (status == DORIS_SUCCESS) { + status = _clone_copy(*store, + _clone_req, + _signature, + tablet_dir_stream.str(), + &src_host, + &src_file_path, + _error_msgs, + nullptr, nullptr, nullptr); + } + + if (status == DORIS_SUCCESS) { + LOG(INFO) << "clone copy done. src_host: " << src_host.host + << " src_file_path: " << src_file_path; + stringstream schema_hash_path_stream; + schema_hash_path_stream << local_shard_root_path + << "/" << _clone_req.tablet_id + << "/" << _clone_req.schema_hash; + string header_path = TabletMeta::construct_header_file_path(schema_hash_path_stream.str(), + _clone_req.tablet_id); + OLAPStatus reset_id_status = TabletMeta::reset_tablet_uid(header_path); + if (reset_id_status != OLAP_SUCCESS) { + LOG(WARNING) << "errors while set tablet uid: '" << header_path; + _error_msgs->push_back("errors while set tablet uid."); + status = DORIS_ERROR; + } else { + OLAPStatus load_header_status = StorageEngine::instance()->tablet_manager()->load_tablet_from_dir( + store, _clone_req.tablet_id, _clone_req.schema_hash, schema_hash_path_stream.str(), false); + if (load_header_status != OLAP_SUCCESS) { + LOG(WARNING) << "load header failed. local_shard_root_path: '" << local_shard_root_path + << "' schema_hash: " << _clone_req.schema_hash << ". status: " << load_header_status + << ". signature: " << _signature; + _error_msgs->push_back("load header failed."); + status = DORIS_ERROR; + } + } + // clone success, delete .hdr file because tablet meta is stored in rocksdb + string cloned_meta_file = tablet_dir_stream.str() + "/" + std::to_string(_clone_req.tablet_id) + ".hdr"; + remove_dir(cloned_meta_file); + } + // Clean useless dir, if failed, ignore it. + if (status != DORIS_SUCCESS && status != DORIS_CREATE_TABLE_EXIST) { + stringstream local_data_path_stream; + local_data_path_stream << local_shard_root_path + << "/" << _clone_req.tablet_id; + string local_data_path = local_data_path_stream.str(); + LOG(INFO) << "clone failed. want to delete local dir: " << local_data_path + << ". signature: " << _signature; + try { + boost::filesystem::path local_path(local_data_path); + if (boost::filesystem::exists(local_path)) { + boost::filesystem::remove_all(local_path); + } + } catch (boost::filesystem::filesystem_error e) { + // Ignore the error, OLAP will delete it + LOG(WARNING) << "clone delete useless dir failed. " + << " error: " << e.what() + << " local dir: " << local_data_path.c_str() + << " signature: " << _signature; + } + } + } + _set_tablet_info(status, is_new_tablet); + return OLAP_SUCCESS; +} + +void EngineCloneTask::_set_tablet_info(AgentStatus status, bool is_new_tablet) { + // Get clone tablet info + if (status == DORIS_SUCCESS || status == DORIS_CREATE_TABLE_EXIST) { + TTabletInfo tablet_info; + tablet_info.__set_tablet_id(_clone_req.tablet_id); + tablet_info.__set_schema_hash(_clone_req.schema_hash); + OLAPStatus get_tablet_info_status = StorageEngine::instance()->tablet_manager()->report_tablet_info(&tablet_info); + if (get_tablet_info_status != OLAP_SUCCESS) { + LOG(WARNING) << "clone success, but get tablet info failed." + << " tablet id: " << _clone_req.tablet_id + << " schema hash: " << _clone_req.schema_hash + << " signature: " << _signature; + _error_msgs->push_back("clone success, but get tablet info failed."); + status = DORIS_ERROR; + } else if ( + (_clone_req.__isset.committed_version + && _clone_req.__isset.committed_version_hash) + && (tablet_info.version < _clone_req.committed_version || + (tablet_info.version == _clone_req.committed_version + && tablet_info.version_hash != _clone_req.committed_version_hash))) { + LOG(WARNING) << "failed to clone tablet. tablet_id:" << _clone_req.tablet_id + << ", schema_hash:" << _clone_req.schema_hash + << ", signature:" << _signature + << ", version:" << tablet_info.version + << ", version_hash:" << tablet_info.version_hash + << ", expected_version: " << _clone_req.committed_version + << ", version_hash:" << _clone_req.committed_version_hash; + // if it is a new tablet and clone failed, then remove the tablet + // if it is incremental clone, then must not drop the tablet + if (is_new_tablet) { + // we need to check if this cloned table's version is what we expect. + // if not, maybe this is a stale remaining table which is waiting for drop. + // we drop it. + LOG(WARNING) << "begin to drop the stale tablet. tablet_id:" << _clone_req.tablet_id + << ", schema_hash:" << _clone_req.schema_hash + << ", signature:" << _signature + << ", version:" << tablet_info.version + << ", version_hash:" << tablet_info.version_hash + << ", expected_version: " << _clone_req.committed_version + << ", version_hash:" << _clone_req.committed_version_hash; + OLAPStatus drop_status = StorageEngine::instance()->tablet_manager()->drop_tablet(_clone_req.tablet_id, + _clone_req.schema_hash); + if (drop_status != OLAP_SUCCESS && drop_status != OLAP_ERR_TABLE_NOT_FOUND) { + // just log + LOG(WARNING) << "drop stale cloned table failed! tabelt id: " << _clone_req.tablet_id; + } + } + status = DORIS_ERROR; + } else { + LOG(INFO) << "clone get tablet info success. tablet_id:" << _clone_req.tablet_id + << ", schema_hash:" << _clone_req.schema_hash + << ", signature:" << _signature + << ", version:" << tablet_info.version + << ", version_hash:" << tablet_info.version_hash; + _tablet_infos->push_back(tablet_info); + } + } + *_res_status = status; +} + +AgentStatus EngineCloneTask::_clone_copy( + DataDir& data_dir, + const TCloneReq& clone_req, + int64_t signature, + const string& local_data_path, + TBackend* src_host, + string* src_file_path, + vector* error_msgs, + const vector* missed_versions, + bool* allow_incremental_clone, + TabletSharedPtr tablet) { + AgentStatus status = DORIS_SUCCESS; + std::string token = _master_info.token; + for (auto src_backend : clone_req.src_backends) { + stringstream http_host_stream; + http_host_stream << "http://" << src_backend.host << ":" << src_backend.http_port; + string http_host = http_host_stream.str(); + // Make snapshot in remote olap engine + *src_host = src_backend; + AgentServerClient agent_client(*src_host); + TAgentResult make_snapshot_result; + status = DORIS_SUCCESS; + + LOG(INFO) << "pre make snapshot. backend_ip: " << src_host->host; + TSnapshotRequest snapshot_request; + snapshot_request.__set_tablet_id(clone_req.tablet_id); + snapshot_request.__set_schema_hash(clone_req.schema_hash); + // This is a new version be, should set preferred version to 2 + snapshot_request.__set_preferred_snapshot_version(PREFERRED_SNAPSHOT_VERSION); + if (missed_versions != NULL) { + // TODO: missing version composed of singleton delta. + // if not, this place should be rewrote. + vector snapshot_versions; + for (Version version : *missed_versions) { + snapshot_versions.push_back(version.first); + } + snapshot_request.__set_missing_version(snapshot_versions); + } + agent_client.make_snapshot( + snapshot_request, + &make_snapshot_result); + if (make_snapshot_result.__isset.allow_incremental_clone) { + // During upgrading, some BE nodes still be installed an old previous old. + // which incremental clone is not ready in those nodes. + // should add a symbol to indicate it. + *allow_incremental_clone = make_snapshot_result.allow_incremental_clone; + } + if (make_snapshot_result.status.status_code == TStatusCode::OK) { + if (make_snapshot_result.__isset.snapshot_path) { + *src_file_path = make_snapshot_result.snapshot_path; + if (src_file_path->at(src_file_path->length() - 1) != '/') { + src_file_path->append("/"); + } + LOG(INFO) << "make snapshot success. backend_ip: " << src_host->host << ". src_file_path: " + << *src_file_path << ". signature: " << signature; + } else { + LOG(WARNING) << "clone make snapshot success, " + "but get src file path failed. signature: " << signature; + status = DORIS_ERROR; + continue; + } + } else { + LOG(WARNING) << "make snapshot failed. tablet_id: " << clone_req.tablet_id + << ". schema_hash: " << clone_req.schema_hash << ". backend_ip: " << src_host->host + << ". backend_port: " << src_host->be_port << ". signature: " << signature; + error_msgs->push_back("make snapshot failed. backend_ip: " + src_host->host); + status = DORIS_ERROR; + continue; + } + + // Get remote and local full path + stringstream src_file_full_path_stream; + stringstream local_file_full_path_stream; + + if (status == DORIS_SUCCESS) { + src_file_full_path_stream << *src_file_path + << "/" << clone_req.tablet_id + << "/" << clone_req.schema_hash << "/"; + local_file_full_path_stream << local_data_path << "/"; + } + string src_file_full_path = src_file_full_path_stream.str(); + string local_file_full_path = local_file_full_path_stream.str(); + // Check local path exist, if exist, remove it, then create the dir + // local_file_full_path = tabletid/clone, for a specific tablet, there should be only one folder + // if this folder exists, then should remove it + // for example, BE clone from BE 1 to download file 1 with version (2,2), but clone from BE 1 failed + // then it will try to clone from BE 2, but it will find the file 1 already exist, but file 1 with same + // name may have different versions. + if (status == DORIS_SUCCESS) { + boost::filesystem::path local_file_full_dir(local_file_full_path); + if (boost::filesystem::exists(local_file_full_dir)) { + boost::filesystem::remove_all(local_file_full_dir); + } + boost::filesystem::create_directories(local_file_full_dir); + } + + // Get remove dir file list + HttpClient client; + std::string remote_file_path = http_host + HTTP_REQUEST_PREFIX + + HTTP_REQUEST_TOKEN_PARAM + token + + HTTP_REQUEST_FILE_PARAM + src_file_full_path; + + string file_list_str; + auto list_files_cb = [&remote_file_path, &file_list_str] (HttpClient* client) { + RETURN_IF_ERROR(client->init(remote_file_path)); + client->set_timeout_ms(LIST_REMOTE_FILE_TIMEOUT * 1000); + RETURN_IF_ERROR(client->execute(&file_list_str)); + return Status::OK(); + }; + + Status download_status = HttpClient::execute_with_retry( + DOWNLOAD_FILE_MAX_RETRY, 1, list_files_cb); + + vector file_name_list; + if (!download_status.ok()) { + LOG(WARNING) << "clone get remote file list failed over max time. " + << " backend_ip: " << src_host->host + << " src_file_path: " << remote_file_path + << " signature: " << signature; + status = DORIS_ERROR; + } else { + size_t start_position = 0; + size_t end_position = file_list_str.find("\n"); + + // Split file name from file_list_str + while (end_position != string::npos) { + string file_name = file_list_str.substr( + start_position, end_position - start_position); + // If the header file is not exist, the table could't loaded by olap engine. + // Avoid of data is not complete, we copy the header file at last. + // The header file's name is end of .hdr. + if (file_name.size() > 4 && file_name.substr(file_name.size() - 4, 4) == ".hdr") { + file_name_list.push_back(file_name); + } else { + file_name_list.insert(file_name_list.begin(), file_name); + } + + start_position = end_position + 1; + end_position = file_list_str.find("\n", start_position); + } + if (start_position != file_list_str.size()) { + string file_name = file_list_str.substr( + start_position, file_list_str.size() - start_position); + if (file_name.size() > 4 && file_name.substr(file_name.size() - 4, 4) == ".hdr") { + file_name_list.push_back(file_name); + } else { + file_name_list.insert(file_name_list.begin(), file_name); + } + } + } + + // Get copy from remote + uint64_t total_file_size = 0; + MonotonicStopWatch watch; + watch.start(); + for (auto& file_name : file_name_list) { + remote_file_path = http_host + HTTP_REQUEST_PREFIX + + HTTP_REQUEST_TOKEN_PARAM + token + + HTTP_REQUEST_FILE_PARAM + src_file_full_path + file_name; + + // get file length + uint64_t file_size = 0; + auto get_file_size_cb = [&remote_file_path, &file_size] (HttpClient* client) { + RETURN_IF_ERROR(client->init(remote_file_path)); + client->set_timeout_ms(GET_LENGTH_TIMEOUT * 1000); + RETURN_IF_ERROR(client->head()); + file_size = client->get_content_length(); + return Status::OK(); + }; + download_status = HttpClient::execute_with_retry( + DOWNLOAD_FILE_MAX_RETRY, 1, get_file_size_cb); + if (!download_status.ok()) { + LOG(WARNING) << "clone copy get file length failed over max time. remote_path=" + << remote_file_path + << ", signature=" << signature; + status = DORIS_ERROR; + break; + } + + total_file_size += file_size; + uint64_t estimate_timeout = file_size / config::download_low_speed_limit_kbps / 1024; + if (estimate_timeout < config::download_low_speed_time) { + estimate_timeout = config::download_low_speed_time; + } + + std::string local_file_path = local_file_full_path + file_name; + + auto download_cb = [&remote_file_path, + estimate_timeout, + &local_file_path, + file_size] (HttpClient* client) { + RETURN_IF_ERROR(client->init(remote_file_path)); + client->set_timeout_ms(estimate_timeout * 1000); + RETURN_IF_ERROR(client->download(local_file_path)); + + // Check file length + uint64_t local_file_size = boost::filesystem::file_size(local_file_path); + if (local_file_size != file_size) { + LOG(WARNING) << "download file length error" + << ", remote_path=" << remote_file_path + << ", file_size=" << file_size + << ", local_file_size=" << local_file_size; + return Status::InternalError("downloaded file size is not equal"); + } + chmod(local_file_path.c_str(), S_IRUSR | S_IWUSR); + return Status::OK(); + }; + download_status = HttpClient::execute_with_retry( + DOWNLOAD_FILE_MAX_RETRY, 1, download_cb); + if (!download_status.ok()) { + LOG(WARNING) << "download file failed over max retry." + << ", remote_path=" << remote_file_path + << ", signature=" << signature + << ", errormsg=" << download_status.get_error_msg(); + status = DORIS_ERROR; + break; + } + } // Clone files from remote backend + + uint64_t total_time_ms = watch.elapsed_time() / 1000 / 1000; + total_time_ms = total_time_ms > 0 ? total_time_ms : 0; + double copy_rate = 0.0; + if (total_time_ms > 0) { + copy_rate = total_file_size / ((double) total_time_ms) / 1000; + } + _copy_size = (int64_t) total_file_size; + _copy_time_ms = (int64_t) total_time_ms; + LOG(INFO) << "succeed to copy tablet " << signature + << ", total file size: " << total_file_size << " B" + << ", cost: " << total_time_ms << " ms" + << ", rate: " << copy_rate << " B/s"; + if (make_snapshot_result.snapshot_version < PREFERRED_SNAPSHOT_VERSION) { + OLAPStatus convert_status = _convert_to_new_snapshot(data_dir, local_data_path, clone_req.tablet_id); + if (convert_status != OLAP_SUCCESS) { + status = DORIS_ERROR; + } + } + // change all rowset ids because they maybe its id same with local rowset + OLAPStatus convert_status = SnapshotManager::instance()->convert_rowset_ids(data_dir, + local_data_path, clone_req.tablet_id, clone_req.schema_hash, tablet); + if (convert_status != OLAP_SUCCESS) { + status = DORIS_ERROR; + } + + + // Release snapshot, if failed, ignore it. OLAP engine will drop useless snapshot + TAgentResult release_snapshot_result; + agent_client.release_snapshot( + make_snapshot_result.snapshot_path, + &release_snapshot_result); + if (release_snapshot_result.status.status_code != TStatusCode::OK) { + LOG(WARNING) << "release snapshot failed. src_file_path: " << *src_file_path + << ". signature: " << signature; + } + + if (status == DORIS_SUCCESS) { + break; + } + } // clone copy from one backend + return status; +} + +OLAPStatus EngineCloneTask::_convert_to_new_snapshot(DataDir& data_dir, const string& clone_dir, int64_t tablet_id) { + OLAPStatus res = OLAP_SUCCESS; + // check clone dir existed + if (!check_dir_existed(clone_dir)) { + res = OLAP_ERR_DIR_NOT_EXIST; + LOG(WARNING) << "clone dir not existed when clone. clone_dir=" << clone_dir.c_str(); + return res; + } + + // load src header + string cloned_meta_file = clone_dir + "/" + std::to_string(tablet_id) + ".hdr"; + FileHeader file_header; + FileHandler file_handler; + OLAPHeaderMessage olap_header_msg; + if (file_handler.open(cloned_meta_file.c_str(), O_RDONLY) != OLAP_SUCCESS) { + LOG(WARNING) << "fail to open ordinal file. file=" << cloned_meta_file; + return OLAP_ERR_IO_ERROR; + } + + // In file_header.unserialize(), it validates file length, signature, checksum of protobuf. + if (file_header.unserialize(&file_handler) != OLAP_SUCCESS) { + LOG(WARNING) << "fail to unserialize tablet_meta. file='" << cloned_meta_file; + return OLAP_ERR_PARSE_PROTOBUF_ERROR; + } + + set clone_files; + if ((res = dir_walk(clone_dir, NULL, &clone_files)) != OLAP_SUCCESS) { + LOG(WARNING) << "failed to dir walk when clone. [clone_dir=" << clone_dir << "]"; + return res; + } + + try { + olap_header_msg.CopyFrom(file_header.message()); + } catch (...) { + LOG(WARNING) << "fail to copy protocol buffer object. file='" << cloned_meta_file; + return OLAP_ERR_PARSE_PROTOBUF_ERROR; + } + OlapSnapshotConverter converter; + TabletMetaPB tablet_meta_pb; + vector pending_rowsets; + res = converter.to_new_snapshot(olap_header_msg, clone_dir, clone_dir, data_dir, &tablet_meta_pb, + &pending_rowsets, false); + if (res != OLAP_SUCCESS) { + LOG(WARNING) << "fail to convert snapshot to new format. dir='" << clone_dir; + return res; + } + vector files_to_delete; + for (auto file_name : clone_files) { + string full_file_path = clone_dir + "/" + file_name; + files_to_delete.push_back(full_file_path); + } + // remove all files + RETURN_NOT_OK(remove_files(files_to_delete)); + + res = TabletMeta::save(cloned_meta_file, tablet_meta_pb); + if (res != OLAP_SUCCESS) { + LOG(WARNING) << "fail to save converted tablet meta to dir='" << clone_dir; + return res; + } + + return OLAP_SUCCESS; +} + +// only incremental clone use this method +OLAPStatus EngineCloneTask::_finish_clone(TabletSharedPtr tablet, const string& clone_dir, + int64_t committed_version, bool is_incremental_clone) { + OLAPStatus res = OLAP_SUCCESS; + vector linked_success_files; + + // clone and compaction operation should be performed sequentially + tablet->obtain_base_compaction_lock(); + tablet->obtain_cumulative_lock(); + + tablet->obtain_push_lock(); + tablet->obtain_header_wrlock(); + do { + // check clone dir existed + if (!check_dir_existed(clone_dir)) { + res = OLAP_ERR_DIR_NOT_EXIST; + LOG(WARNING) << "clone dir not existed when clone. clone_dir=" << clone_dir.c_str(); + break; + } + + // load src header + string cloned_tablet_meta_file = clone_dir + "/" + std::to_string(tablet->tablet_id()) + ".hdr"; + TabletMeta cloned_tablet_meta; + if ((res = cloned_tablet_meta.create_from_file(cloned_tablet_meta_file)) != OLAP_SUCCESS) { + LOG(WARNING) << "fail to load src header when clone. " + << ", cloned_tablet_meta_file=" << cloned_tablet_meta_file; + break; + } + // remove the cloned meta file + remove_dir(cloned_tablet_meta_file); + + // TODO(ygl): convert old format file into rowset + // check all files in /clone and /tablet + set clone_files; + if ((res = dir_walk(clone_dir, NULL, &clone_files)) != OLAP_SUCCESS) { + LOG(WARNING) << "failed to dir walk when clone. [clone_dir=" << clone_dir << "]"; + break; + } + + set local_files; + string tablet_dir = tablet->tablet_path(); + if ((res = dir_walk(tablet_dir, NULL, &local_files)) != OLAP_SUCCESS) { + LOG(WARNING) << "failed to dir walk when clone. [tablet_dir=" << tablet_dir << "]"; + break; + } + + // link files from clone dir, if file exists, skip it + for (const string& clone_file : clone_files) { + if (local_files.find(clone_file) != local_files.end()) { + VLOG(3) << "find same file when clone, skip it. " + << "tablet=" << tablet->full_name() + << ", clone_file=" << clone_file; + continue; + } + + string from = clone_dir + "/" + clone_file; + string to = tablet_dir + "/" + clone_file; + LOG(INFO) << "src file:" << from << " dest file:" << to; + if (link(from.c_str(), to.c_str()) != 0) { + LOG(WARNING) << "fail to create hard link when clone. " + << " from=" << from.c_str() + << " to=" << to.c_str(); + res = OLAP_ERR_OS_ERROR; + break; + } + linked_success_files.emplace_back(std::move(to)); + } + + if (res != OLAP_SUCCESS) { + break; + } + + if (is_incremental_clone) { + res = _clone_incremental_data(tablet, cloned_tablet_meta, committed_version); + } else { + res = _clone_full_data(tablet, const_cast(&cloned_tablet_meta)); + } + + // if full clone success, need to update cumulative layer point + if (!is_incremental_clone && res == OLAP_SUCCESS) { + tablet->set_cumulative_layer_point(cloned_tablet_meta.cumulative_layer_point()); + } + + } while (0); + + // clear linked files if errors happen + if (res != OLAP_SUCCESS) { + remove_files(linked_success_files); + } + tablet->release_header_lock(); + tablet->release_push_lock(); + + tablet->release_cumulative_lock(); + tablet->release_base_compaction_lock(); + + // clear clone dir + boost::filesystem::path clone_dir_path(clone_dir); + boost::filesystem::remove_all(clone_dir_path); + LOG(INFO) << "finish to clone data, clear downloaded data. res=" << res + << ", tablet=" << tablet->full_name() + << ", clone_dir=" << clone_dir; + return res; +} + +OLAPStatus EngineCloneTask::_clone_incremental_data(TabletSharedPtr tablet, const TabletMeta& cloned_tablet_meta, + int64_t committed_version) { + LOG(INFO) << "begin to incremental clone. tablet=" << tablet->full_name() + << ", committed_version=" << committed_version; + + vector missed_versions; + tablet->calc_missed_versions_unlock(committed_version, &missed_versions); + + vector versions_to_delete; + vector rowsets_to_clone; + + VLOG(3) << "get missed versions again when finish incremental clone. " + << "tablet=" << tablet->full_name() + << ", committed_version=" << committed_version + << ", missed_versions_size=" << missed_versions.size(); + + // check missing versions exist in clone src + for (Version version : missed_versions) { + RowsetMetaSharedPtr inc_rs_meta = cloned_tablet_meta.acquire_inc_rs_meta_by_version(version); + if (inc_rs_meta == nullptr) { + LOG(WARNING) << "missed version is not found in cloned tablet meta." + << ", missed_version=" << version.first << "-" << version.second; + return OLAP_ERR_VERSION_NOT_EXIST; + } + + rowsets_to_clone.push_back(inc_rs_meta); + } + + // clone_data to tablet + OLAPStatus clone_res = tablet->revise_tablet_meta(rowsets_to_clone, versions_to_delete); + LOG(INFO) << "finish to incremental clone. [tablet=" << tablet->full_name() << " res=" << clone_res << "]"; + return clone_res; +} + +OLAPStatus EngineCloneTask::_clone_full_data(TabletSharedPtr tablet, TabletMeta* cloned_tablet_meta) { + Version cloned_max_version = cloned_tablet_meta->max_version(); + LOG(INFO) << "begin to full clone. tablet=" << tablet->full_name() + << ", cloned_max_version=" << cloned_max_version.first + << "-" << cloned_max_version.second; + vector versions_to_delete; + vector rs_metas_found_in_src; + // check local versions + for (auto& rs_meta : tablet->tablet_meta()->all_rs_metas()) { + Version local_version(rs_meta->start_version(), rs_meta->end_version()); + VersionHash local_version_hash = rs_meta->version_hash(); + LOG(INFO) << "check local delta when full clone." + << "tablet=" << tablet->full_name() + << ", local_version=" << local_version.first << "-" << local_version.second; + + // if local version cross src latest, clone failed + // if local version is : 0-0, 1-1, 2-10, 12-14, 15-15,16-16 + // cloned max version is 13-13, this clone is failed, because could not + // fill local data by using cloned data. + // It should not happen because if there is a hole, the following delta will not + // do compaction. + if (local_version.first <= cloned_max_version.second + && local_version.second > cloned_max_version.second) { + LOG(WARNING) << "stop to full clone, version cross src latest." + << "tablet=" << tablet->full_name() + << ", local_version=" << local_version.first << "-" << local_version.second; + return OLAP_ERR_TABLE_VERSION_DUPLICATE_ERROR; + + } else if (local_version.second <= cloned_max_version.second) { + // if local version smaller than src, check if existed in src, will not clone it + bool existed_in_src = false; + + // if delta labeled with local_version is same with the specified version in clone header, + // there is no necessity to clone it. + for (auto& rs_meta : cloned_tablet_meta->all_rs_metas()) { + if (rs_meta->version().first == local_version.first + && rs_meta->version().second == local_version.second + && rs_meta->version_hash() == local_version_hash) { + existed_in_src = true; + break; + } + } + + if (existed_in_src) { + OLAPStatus delete_res = cloned_tablet_meta->delete_rs_meta_by_version(local_version, + &rs_metas_found_in_src); + if (delete_res != OLAP_SUCCESS) { + LOG(WARNING) << "failed to delete existed version from clone src when full clone. " + << ", version=" << local_version.first << "-" << local_version.second; + return delete_res; + } else { + LOG(INFO) << "Delta has already existed in local header, no need to clone." + << "tablet=" << tablet->full_name() + << ", version='" << local_version.first<< "-" << local_version.second + << ", version_hash=" << local_version_hash; + } + } else { + // Delta labeled in local_version is not existed in clone header, + // some overlapping delta will be cloned to replace it. + // And also, the specified delta should deleted from local header. + versions_to_delete.push_back(local_version); + LOG(INFO) << "Delete delta not included by the clone header, should delete it from local header." + << "tablet=" << tablet->full_name() << "," + << ", version=" << local_version.first<< "-" << local_version.second + << ", version_hash=" << local_version_hash; + } + } + } + vector rowsets_to_clone; + for (auto& rs_meta : cloned_tablet_meta->all_rs_metas()) { + rowsets_to_clone.push_back(rs_meta); + LOG(INFO) << "Delta to clone." + << "tablet=" << tablet->full_name() + << ", version=" << rs_meta->version().first << "-" + << rs_meta->version().second + << ", version_hash=" << rs_meta->version_hash(); + } + + // clone_data to tablet + // only replace rowet info, must not modify other info such as alter task info. for example + // 1. local tablet finished alter task + // 2. local tablet has error in push + // 3. local tablet cloned rowset from other nodes + // 4. if cleared alter task info, then push will not write to new tablet, the report info is error + OLAPStatus clone_res = tablet->revise_tablet_meta(rowsets_to_clone, versions_to_delete); + LOG(INFO) << "finish to full clone. tablet=" << tablet->full_name() << ", res=" << clone_res; + // in previous step, copy all files from CLONE_DIR to tablet dir + // but some rowset is useless, so that remove them here + for (auto& rs_meta_ptr : rs_metas_found_in_src) { + RowsetSharedPtr org_rowset(new AlphaRowset(&(cloned_tablet_meta->tablet_schema()), + tablet->tablet_path(), tablet->data_dir(), rs_meta_ptr)); + if (org_rowset->init() == OLAP_SUCCESS && org_rowset->load() == OLAP_SUCCESS) { + org_rowset->remove(); + } + } + return clone_res; +} + +} // doris diff --git a/be/src/olap/task/engine_clone_task.h b/be/src/olap/task/engine_clone_task.h new file mode 100644 index 00000000000000..211da3ccdc02f2 --- /dev/null +++ b/be/src/olap/task/engine_clone_task.h @@ -0,0 +1,83 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +#ifndef DORIS_BE_SRC_OLAP_TASK_ENGINE_CLONE_TASK_H +#define DORIS_BE_SRC_OLAP_TASK_ENGINE_CLONE_TASK_H + +#include "agent/utils.h" +#include "gen_cpp/AgentService_types.h" +#include "gen_cpp/HeartbeatService.h" +#include "gen_cpp/MasterService_types.h" +#include "olap/olap_define.h" +#include "olap/task/engine_task.h" + +namespace doris { + +// base class for storage engine +// add "Engine" as task prefix to prevent duplicate name with agent task +class EngineCloneTask : public EngineTask { + +public: + virtual OLAPStatus execute(); + +public: + EngineCloneTask(const TCloneReq& _clone_req, + const TMasterInfo& _master_info, + int64_t _signature, + vector* error_msgs, + vector* tablet_infos, + AgentStatus* _res_status); + ~EngineCloneTask() {} + +private: + + virtual OLAPStatus _finish_clone(TabletSharedPtr tablet, const std::string& clone_dir, + int64_t committed_version, bool is_incremental_clone); + + OLAPStatus _clone_incremental_data(TabletSharedPtr tablet, const TabletMeta& cloned_tablet_meta, + int64_t committed_version); + + OLAPStatus _clone_full_data(TabletSharedPtr tablet, TabletMeta* cloned_tablet_meta); + + AgentStatus _clone_copy(DataDir& data_dir, + const TCloneReq& clone_req, + int64_t signature, + const string& local_data_path, + TBackend* src_host, + string* src_file_path, + vector* error_msgs, + const vector* missing_versions, + bool* allow_incremental_clone, + TabletSharedPtr tablet); + + OLAPStatus _convert_to_new_snapshot(DataDir& data_dir, const string& clone_dir, int64_t tablet_id); + + void _set_tablet_info(AgentStatus status, bool is_new_tablet); + +private: + const TCloneReq& _clone_req; + vector* _error_msgs; + vector* _tablet_infos; + AgentStatus* _res_status; + int64_t _signature; + const TMasterInfo& _master_info; + int64_t _copy_size; + int64_t _copy_time_ms; +}; // EngineTask + +} // doris +#endif //DORIS_BE_SRC_OLAP_TASK_ENGINE_CLONE_TASK_H diff --git a/be/src/olap/task/engine_publish_version_task.cpp b/be/src/olap/task/engine_publish_version_task.cpp new file mode 100644 index 00000000000000..b98aca3f4c1f1f --- /dev/null +++ b/be/src/olap/task/engine_publish_version_task.cpp @@ -0,0 +1,131 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +#include "olap/task/engine_publish_version_task.h" +#include "olap/data_dir.h" +#include "olap/rowset/rowset_meta_manager.h" +#include "olap/tablet_manager.h" +#include + +namespace doris { + +using std::map; + +EnginePublishVersionTask::EnginePublishVersionTask(TPublishVersionRequest& publish_version_req, + vector* error_tablet_ids) + : _publish_version_req(publish_version_req), + _error_tablet_ids(error_tablet_ids) {} + +OLAPStatus EnginePublishVersionTask::finish() { + LOG(INFO) << "begin to process publish version. transaction_id=" + << _publish_version_req.transaction_id; + + int64_t transaction_id = _publish_version_req.transaction_id; + OLAPStatus res = OLAP_SUCCESS; + + // each partition + for (auto& partitionVersionInfo + : _publish_version_req.partition_version_infos) { + + int64_t partition_id = partitionVersionInfo.partition_id; + map tablet_related_rs; + StorageEngine::instance()->txn_manager()->get_txn_related_tablets(transaction_id, partition_id, &tablet_related_rs); + + Version version(partitionVersionInfo.version, partitionVersionInfo.version); + VersionHash version_hash = partitionVersionInfo.version_hash; + + // each tablet + for (auto& tablet_rs : tablet_related_rs) { + OLAPStatus publish_status = OLAP_SUCCESS; + TabletInfo tablet_info = tablet_rs.first; + RowsetSharedPtr rowset = tablet_rs.second; + LOG(INFO) << "begin to publish version on tablet. " + << "tablet_id=" << tablet_info.tablet_id + << ", schema_hash=" << tablet_info.schema_hash + << ", version=" << version.first + << ", version_hash=" << version_hash + << ", transaction_id=" << transaction_id; + // if rowset is null, it means this be received write task, but failed during write + // and receive fe's publish version task + // this be must return as an error tablet + if (rowset == nullptr) { + LOG(WARNING) << "could not find related rowset for tablet " << tablet_info.tablet_id + << " txn id " << transaction_id; + _error_tablet_ids->push_back(tablet_info.tablet_id); + res = OLAP_ERR_PUSH_ROWSET_NOT_FOUND; + continue; + } + TabletSharedPtr tablet = StorageEngine::instance()->tablet_manager()->get_tablet(tablet_info.tablet_id, + tablet_info.schema_hash, tablet_info.tablet_uid); + + if (tablet == nullptr) { + LOG(WARNING) << "can't get tablet when publish version. tablet_id=" << tablet_info.tablet_id + << " schema_hash=" << tablet_info.schema_hash; + _error_tablet_ids->push_back(tablet_info.tablet_id); + res = OLAP_ERR_PUSH_TABLE_NOT_EXIST; + continue; + } + + publish_status = StorageEngine::instance()->txn_manager()->publish_txn(tablet->data_dir()->get_meta(), + partition_id, + transaction_id, tablet_info.tablet_id, tablet_info.schema_hash, tablet_info.tablet_uid, + version, version_hash); + + if (publish_status != OLAP_SUCCESS) { + LOG(WARNING) << "failed to publish for rowset_id:" << rowset->rowset_id() + << "tablet id: " << tablet_info.tablet_id + << "txn id:" << transaction_id; + _error_tablet_ids->push_back(tablet_info.tablet_id); + res = publish_status; + continue; + } + // add visible rowset to tablet + publish_status = tablet->add_inc_rowset(rowset); + if (publish_status != OLAP_SUCCESS && publish_status != OLAP_ERR_PUSH_VERSION_ALREADY_EXIST) { + LOG(WARNING) << "add visible rowset to tablet failed rowset_id:" << rowset->rowset_id() + << "tablet id: " << tablet_info.tablet_id + << "txn id:" << transaction_id + << "res:" << publish_status; + _error_tablet_ids->push_back(tablet_info.tablet_id); + res = publish_status; + continue; + } + if (publish_status == OLAP_SUCCESS || publish_status == OLAP_ERR_PUSH_VERSION_ALREADY_EXIST) { + LOG(INFO) << "publish version successfully on tablet. tablet=" << tablet->full_name() + << ", transaction_id=" << transaction_id << ", version=" << version.first + << ", res=" << publish_status; + // delete rowset from meta env, because add inc rowset alreay saved the rowset meta to tablet meta + RowsetMetaManager::remove(tablet->data_dir()->get_meta(), tablet->tablet_uid(), rowset->rowset_id()); + // delete txn info + } else { + LOG(WARNING) << "fail to publish version on tablet. tablet=" << tablet->full_name().c_str() + << "transaction_id=" << transaction_id + << "version=" << version.first + << " res=" << publish_status; + _error_tablet_ids->push_back(tablet->tablet_id()); + res = publish_status; + } + } + } + + LOG(INFO) << "finish to publish version on transaction." + << "transaction_id=" << transaction_id + << ", error_tablet_size=" << _error_tablet_ids->size(); + return res; +} + +} // doris diff --git a/be/src/olap/task/engine_publish_version_task.h b/be/src/olap/task/engine_publish_version_task.h new file mode 100644 index 00000000000000..acf62e4bce8363 --- /dev/null +++ b/be/src/olap/task/engine_publish_version_task.h @@ -0,0 +1,43 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +#ifndef DORIS_BE_SRC_OLAP_TASK_ENGINE_PUBLISH_VERSION_TASK_H +#define DORIS_BE_SRC_OLAP_TASK_ENGINE_PUBLISH_VERSION_TASK_H + +#include "gen_cpp/AgentService_types.h" +#include "olap/olap_define.h" +#include "olap/task/engine_task.h" + +namespace doris { + +// base class for storage engine +// add "Engine" as task prefix to prevent duplicate name with agent task +class EnginePublishVersionTask : public EngineTask { + +public: + EnginePublishVersionTask(TPublishVersionRequest& publish_version_req, vector* error_tablet_ids); + ~EnginePublishVersionTask() {} + + virtual OLAPStatus finish(); + +private: + const TPublishVersionRequest& _publish_version_req; + vector* _error_tablet_ids; +}; // EnginePublishVersionTask + +} // doris +#endif //DORIS_BE_SRC_OLAP_TASK_ENGINE_PUBLISH_VERSION_TASK_H \ No newline at end of file diff --git a/be/src/olap/task/engine_schema_change_task.cpp b/be/src/olap/task/engine_schema_change_task.cpp new file mode 100644 index 00000000000000..ec390b51b33d1c --- /dev/null +++ b/be/src/olap/task/engine_schema_change_task.cpp @@ -0,0 +1,118 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +#include "olap/task/engine_schema_change_task.h" + +#include "olap/schema_change.h" + +namespace doris { + +using std::to_string; + +EngineSchemaChangeTask::EngineSchemaChangeTask(const TAlterTabletReq& alter_tablet_request, + int64_t signature, const TTaskType::type task_type, vector* error_msgs, + const string& process_name): + _alter_tablet_req(alter_tablet_request), + _signature(signature), + _task_type(task_type), + _error_msgs(error_msgs), + _process_name(process_name) { } + +OLAPStatus EngineSchemaChangeTask::execute() { + OLAPStatus status = OLAP_SUCCESS; + // create different alter task according task type + switch (_task_type) { + case TTaskType::ROLLUP: + status = _create_rollup_tablet(_alter_tablet_req); + break; + case TTaskType::SCHEMA_CHANGE: + status = _schema_change(_alter_tablet_req); + break; + default: + break; + } + if (status != OLAP_SUCCESS) { + LOG(WARNING) << _process_name << " failed. " + << "signature: " << _signature << " status: " << status; + } + + return status; +} // execute + +OLAPStatus EngineSchemaChangeTask::_create_rollup_tablet(const TAlterTabletReq& request) { + LOG(INFO) << "begin to create rollup tablet. base_tablet_id=" << request.base_tablet_id + << ", base_schema_hash=" << request.base_schema_hash + << ", new_tablet_id=" << request.new_tablet_req.tablet_id + << ", new_schema_hash=" << request.new_tablet_req.tablet_schema.schema_hash; + + DorisMetrics::create_rollup_requests_total.increment(1); + + OLAPStatus res = OLAP_SUCCESS; + + SchemaChangeHandler handler; + res = handler.process_alter_tablet(ROLLUP, request); + + if (res != OLAP_SUCCESS) { + LOG(WARNING) << "failed to do rollup. res=" << res + << " base_tablet_id=" << request.base_tablet_id + << ", base_schema_hash=" << request.base_schema_hash + << ", new_tablet_id=" << request.new_tablet_req.tablet_id + << ", new_schema_hash=" << request.new_tablet_req.tablet_schema.schema_hash; + DorisMetrics::create_rollup_requests_failed.increment(1); + return res; + } + + LOG(INFO) << "success to create rollup tablet. res=" << res + << " base_tablet_id=" << request.base_tablet_id + << ", base_schema_hash" << request.base_schema_hash + << ", new_tablet_id=" << request.new_tablet_req.tablet_id + << ", new_schema_hash=" << request.new_tablet_req.tablet_schema.schema_hash; + return res; +} // create_rollup_tablet + +OLAPStatus EngineSchemaChangeTask::_schema_change(const TAlterTabletReq& request) { + LOG(INFO) << "begin to alter tablet. base_tablet_id=" << request.base_tablet_id + << ", base_schema_hash=" << request.base_schema_hash + << ", new_tablet_id=" << request.new_tablet_req.tablet_id + << ", new_schema_hash=" << request.new_tablet_req.tablet_schema.schema_hash; + + DorisMetrics::schema_change_requests_total.increment(1); + + OLAPStatus res = OLAP_SUCCESS; + + SchemaChangeHandler handler; + res = handler.process_alter_tablet(SCHEMA_CHANGE, request); + + if (res != OLAP_SUCCESS) { + LOG(WARNING) << "failed to do alter tablet. res=" << res + << ", base_tablet_id=" << request.base_tablet_id + << ", base_schema_hash=" << request.base_schema_hash + << ", new_tablet_id=" << request.new_tablet_req.tablet_id + << ", new_schema_hash=" << request.new_tablet_req.tablet_schema.schema_hash; + DorisMetrics::schema_change_requests_failed.increment(1); + return res; + } + + LOG(INFO) << "success to do alter tablet." + << " base_tablet_id=" << request.base_tablet_id + << ", base_schema_hash" << request.base_schema_hash + << ", new_tablet_id=" << request.new_tablet_req.tablet_id + << ", new_schema_hash=" << request.new_tablet_req.tablet_schema.schema_hash; + return res; +} + +} // doris diff --git a/be/src/olap/task/engine_schema_change_task.h b/be/src/olap/task/engine_schema_change_task.h new file mode 100644 index 00000000000000..5c8653ebb3efc1 --- /dev/null +++ b/be/src/olap/task/engine_schema_change_task.h @@ -0,0 +1,71 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +#ifndef DORIS_BE_SRC_OLAP_TASK_ENGINE_SCHEMA_CHANGE_TASK_H +#define DORIS_BE_SRC_OLAP_TASK_ENGINE_SCHEMA_CHANGE_TASK_H + +#include "gen_cpp/AgentService_types.h" +#include "olap/olap_define.h" +#include "olap/task/engine_task.h" + +namespace doris { + +// base class for storage engine +// add "Engine" as task prefix to prevent duplicate name with agent task +class EngineSchemaChangeTask : public EngineTask { + +public: + virtual OLAPStatus execute(); + +public: + EngineSchemaChangeTask(const TAlterTabletReq& alter_tablet_request, int64_t signature, + const TTaskType::type task_type, vector* error_msgs, const string& process_name); + ~EngineSchemaChangeTask() {} + +private: + // ######################### ALTER TABLE BEGIN ######################### + // The following interfaces are all about alter tablet operation, + // the main logical is that generating a new tablet with different + // schema on base tablet. + + // Create rollup tablet on base tablet, after create_rollup_tablet, + // both base tablet and new tablet is effective. + // + // @param [in] request specify base tablet, new tablet and its schema + // @return OLAP_SUCCESS if submit success + OLAPStatus _create_rollup_tablet(const TAlterTabletReq& request); + + // Do schema change on tablet, StorageEngine support + // add column, drop column, alter column type and order, + // after schema_change, base tablet is abandoned. + // Note that the two tablets has same tablet_id but different schema_hash + // + // @param [in] request specify base tablet, new tablet and its schema + // @return OLAP_SUCCESS if submit success + OLAPStatus _schema_change(const TAlterTabletReq& request); + +private: + const TAlterTabletReq& _alter_tablet_req; + int64_t _signature; + const TTaskType::type _task_type; + vector* _error_msgs; + const string& _process_name; + +}; // EngineTask + +} // doris +#endif //DORIS_BE_SRC_OLAP_TASK_ENGINE_SCHEMA_CHANGE_TASK_H \ No newline at end of file diff --git a/be/src/olap/task/engine_storage_migration_task.cpp b/be/src/olap/task/engine_storage_migration_task.cpp new file mode 100644 index 00000000000000..84ca11d472b81d --- /dev/null +++ b/be/src/olap/task/engine_storage_migration_task.cpp @@ -0,0 +1,268 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +#include "olap/task/engine_storage_migration_task.h" + +#include "olap/snapshot_manager.h" +#include "olap/tablet_meta_manager.h" + +namespace doris { + +using std::stringstream; + +EngineStorageMigrationTask::EngineStorageMigrationTask(TStorageMediumMigrateReq& storage_medium_migrate_req) : + _storage_medium_migrate_req(storage_medium_migrate_req) { + +} + +OLAPStatus EngineStorageMigrationTask::execute() { + OLAPStatus res = OLAP_SUCCESS; + res = _storage_medium_migrate( + _storage_medium_migrate_req.tablet_id, + _storage_medium_migrate_req.schema_hash, + _storage_medium_migrate_req.storage_medium); + return res; +} + +OLAPStatus EngineStorageMigrationTask::_storage_medium_migrate( + TTabletId tablet_id, TSchemaHash schema_hash, + TStorageMedium::type storage_medium) { + LOG(INFO) << "begin to process storage media migrate. " + << "tablet_id=" << tablet_id << ", schema_hash=" << schema_hash + << ", dest_storage_medium=" << storage_medium; + DorisMetrics::storage_migrate_requests_total.increment(1); + + OLAPStatus res = OLAP_SUCCESS; + TabletSharedPtr tablet = StorageEngine::instance()->tablet_manager()->get_tablet(tablet_id, schema_hash); + if (tablet == nullptr) { + LOG(WARNING) << "can't find tablet. tablet_id= " << tablet_id + << " schema_hash=" << schema_hash; + return OLAP_ERR_TABLE_NOT_FOUND; + } + + // judge case when no need to migrate + uint32_t count = StorageEngine::instance()->available_storage_medium_type_count(); + if (count <= 1) { + LOG(INFO) << "available storage medium type count is less than 1, " + << "no need to migrate. count=" << count; + return OLAP_SUCCESS; + } + + TStorageMedium::type src_storage_medium = tablet->data_dir()->storage_medium(); + if (src_storage_medium == storage_medium) { + LOG(INFO) << "tablet is already on specified storage medium. " + << "storage_medium=" << storage_medium; + return OLAP_SUCCESS; + } + + WriteLock migration_wlock(tablet->get_migration_lock_ptr(), TRY_LOCK); + if (!migration_wlock.own_lock()) { + return OLAP_ERR_RWLOCK_ERROR; + } + + int64_t partition_id; + std::set transaction_ids; + StorageEngine::instance()->txn_manager()->get_tablet_related_txns(tablet->tablet_id(), + tablet->schema_hash(), tablet->tablet_uid(), &partition_id, &transaction_ids); + if (transaction_ids.size() > 0) { + LOG(WARNING) << "could not migration because has unfinished txns, " + << " tablet=" << tablet->full_name(); + return OLAP_ERR_HEADER_HAS_PENDING_DATA; + } + + tablet->obtain_push_lock(); + + // TODO(ygl): the tablet should not under schema change or rollup or load + do { + // get all versions to be migrate + tablet->obtain_header_rdlock(); + const RowsetSharedPtr lastest_version = tablet->rowset_with_max_version(); + if (lastest_version == nullptr) { + tablet->release_header_lock(); + res = OLAP_ERR_VERSION_NOT_EXIST; + LOG(WARNING) << "tablet has not any version."; + break; + } + + int32_t end_version = lastest_version->end_version(); + vector consistent_rowsets; + res = tablet->capture_consistent_rowsets(Version(0, end_version), &consistent_rowsets); + if (consistent_rowsets.empty()) { + tablet->release_header_lock(); + res = OLAP_ERR_VERSION_NOT_EXIST; + LOG(WARNING) << "fail to capture consistent rowsets. tablet=" << tablet->full_name() + << ", version=" << end_version; + break; + } + tablet->release_header_lock(); + + // generate schema hash path where files will be migrated + auto stores = StorageEngine::instance()->get_stores_for_create_tablet(storage_medium); + if (stores.empty()) { + res = OLAP_ERR_INVALID_ROOT_PATH; + LOG(WARNING) << "fail to get root path for create tablet."; + break; + } + + uint64_t shard = 0; + res = stores[0]->get_shard(&shard); + if (res != OLAP_SUCCESS) { + LOG(WARNING) << "fail to get root path shard. res=" << res; + break; + } + + stringstream root_path_stream; + root_path_stream << stores[0]->path() << DATA_PREFIX << "/" << shard; + string schema_hash_path = SnapshotManager::instance()->get_schema_hash_full_path(tablet, root_path_stream.str()); + // if dir already exist then return err, it should not happen + // should not remove the dir directly + if (check_dir_existed(schema_hash_path)) { + LOG(INFO) << "schema hash path already exist, skip this path. " + << "schema_hash_path=" << schema_hash_path; + res = OLAP_ERR_FILE_ALREADY_EXIST; + break; + } + + TabletMetaSharedPtr new_tablet_meta(new(std::nothrow) TabletMeta()); + res = TabletMetaManager::get_meta(stores[0], tablet->tablet_id(), tablet->schema_hash(), new_tablet_meta); + if (res != OLAP_ERR_META_KEY_NOT_FOUND) { + LOG(WARNING) << "tablet_meta already exists. " + << "data_dir:" << stores[0]->path() + << "tablet:" << tablet->full_name(); + res = OLAP_ERR_META_ALREADY_EXIST; + break; + } + create_dirs(schema_hash_path); + + // migrate all index and data files but header file + res = _copy_index_and_data_files(schema_hash_path, tablet, consistent_rowsets); + if (res != OLAP_SUCCESS) { + LOG(WARNING) << "fail to copy index and data files when migrate. res=" << res; + break; + } + + res = _generate_new_header(stores[0], shard, tablet, consistent_rowsets, new_tablet_meta); + if (res != OLAP_SUCCESS) { + LOG(WARNING) << "fail to generate new header file from the old. res=" << res; + break; + } + std::string new_meta_file = schema_hash_path + "/" + std::to_string(tablet_id) + ".hdr"; + res = new_tablet_meta->save(new_meta_file); + if (res != OLAP_SUCCESS) { + LOG(WARNING) << "failed to save met to path" << new_meta_file; + break; + } + + res = TabletMeta::reset_tablet_uid(new_meta_file); + if (res != OLAP_SUCCESS) { + LOG(WARNING) << "errors while set tablet uid: '" << new_meta_file; + break; + } + + // it will change rowset id and its create time + // rowset create time is useful when load tablet from meta to check which tablet is the tablet to load + res = SnapshotManager::instance()->convert_rowset_ids(*(stores[0]), schema_hash_path, tablet_id, schema_hash, nullptr); + if (res != OLAP_SUCCESS) { + LOG(WARNING) << "failed to convert rowset id when do storage migration" + << " path = " << schema_hash_path; + break; + } + + res = StorageEngine::instance()->tablet_manager()->load_tablet_from_dir(stores[0], + tablet_id, schema_hash, schema_hash_path, false); + if (res != OLAP_SUCCESS) { + LOG(WARNING) << "failed to load tablet from new path. tablet_id=" << tablet_id + << " schema_hash=" << schema_hash + << " path = " << schema_hash_path; + break; + } + + // if old tablet finished schema change, then the schema change status of the new tablet is DONE + // else the schema change status of the new tablet is FAILED + TabletSharedPtr new_tablet = StorageEngine::instance()->tablet_manager()->get_tablet(tablet_id, schema_hash); + if (new_tablet == nullptr) { + LOG(WARNING) << "get null tablet. tablet_id=" << tablet_id + << " schema_hash=" << schema_hash; + res = OLAP_ERR_TABLE_NOT_FOUND; + break; + } + AlterTabletTaskSharedPtr alter_task = tablet->alter_task(); + if (alter_task != nullptr) { + if (alter_task->alter_state() == ALTER_FINISHED) { + new_tablet->set_alter_state(ALTER_FINISHED); + } else { + new_tablet->delete_alter_task(); + } + } + } while (0); + + tablet->release_push_lock(); + + return res; +} + +// TODO(ygl): lost some infomation here, such as cumulative layer point +OLAPStatus EngineStorageMigrationTask::_generate_new_header( + DataDir* store, const uint64_t new_shard, + const TabletSharedPtr& tablet, + const std::vector& consistent_rowsets, + TabletMetaSharedPtr new_tablet_meta) { + if (store == nullptr) { + LOG(WARNING) << "fail to generate new header for store is null"; + return OLAP_ERR_HEADER_INIT_FAILED; + } + OLAPStatus res = OLAP_SUCCESS; + res = TabletMetaManager::get_meta(tablet->data_dir(), tablet->tablet_id(), tablet->schema_hash(), new_tablet_meta); + if (res == OLAP_ERR_META_KEY_NOT_FOUND) { + LOG(WARNING) << "tablet_meta has already been dropped. " + << "data_dir:" << tablet->data_dir()->path() + << "tablet:" << tablet->full_name(); + return res; + } + + vector rs_metas; + for (auto& rs : consistent_rowsets) { + rs_metas.push_back(rs->rowset_meta()); + } + new_tablet_meta->revise_rs_metas(rs_metas); + new_tablet_meta->set_shard_id(new_shard); + // should not save new meta here, because new tablet may failed + // should not remove the old meta here, because the new header maybe not valid + // remove old meta after the new tablet is loaded successfully + return res; +} + +OLAPStatus EngineStorageMigrationTask::_copy_index_and_data_files( + const string& schema_hash_path, + const TabletSharedPtr& ref_tablet, + std::vector& consistent_rowsets) { + std::vector success_files; + OLAPStatus status = OLAP_SUCCESS; + for (auto& rs : consistent_rowsets) { + status = rs->copy_files_to_path(schema_hash_path, &success_files); + if (status != OLAP_SUCCESS) { + if (remove_all_dir(schema_hash_path) != OLAP_SUCCESS) { + LOG(FATAL) << "remove storage migration path failed. " + << "schema_hash_path:" << schema_hash_path; + } + break; + } + } + return status; +} + +} // doris diff --git a/be/src/olap/task/engine_storage_migration_task.h b/be/src/olap/task/engine_storage_migration_task.h new file mode 100644 index 00000000000000..cfcc13d420f17c --- /dev/null +++ b/be/src/olap/task/engine_storage_migration_task.h @@ -0,0 +1,60 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +#ifndef DORIS_BE_SRC_OLAP_TASK_ENGINE_STORAGE_MIGRATION_TASK_H +#define DORIS_BE_SRC_OLAP_TASK_ENGINE_STORAGE_MIGRATION_TASK_H + +#include "gen_cpp/AgentService_types.h" +#include "olap/olap_define.h" +#include "olap/task/engine_task.h" + +namespace doris { + +// base class for storage engine +// add "Engine" as task prefix to prevent duplicate name with agent task +class EngineStorageMigrationTask : public EngineTask { + +public: + virtual OLAPStatus execute(); + +public: + EngineStorageMigrationTask(TStorageMediumMigrateReq& storage_medium_migrate_req); + ~EngineStorageMigrationTask() {} + +private: + OLAPStatus _storage_medium_migrate( + TTabletId tablet_id, TSchemaHash schema_hash, + TStorageMedium::type storage_medium); + + OLAPStatus _generate_new_header(DataDir* store, const uint64_t new_shard, + const TabletSharedPtr& tablet, + const std::vector& consistent_rowsets, + TabletMetaSharedPtr new_tablet_meta); + + // TODO: hkp + // rewrite this function + OLAPStatus _copy_index_and_data_files( + const std::string& header_path, + const TabletSharedPtr& ref_tablet, + std::vector& consistent_rowsets); + +private: + const TStorageMediumMigrateReq& _storage_medium_migrate_req; +}; // EngineTask + +} // doris +#endif //DORIS_BE_SRC_OLAP_TASK_ENGINE_STORAGE_MIGRATION_TASK_H diff --git a/be/src/olap/task/engine_task.h b/be/src/olap/task/engine_task.h new file mode 100644 index 00000000000000..3f4d38ac0bdf0a --- /dev/null +++ b/be/src/olap/task/engine_task.h @@ -0,0 +1,44 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +#ifndef DORIS_BE_SRC_OLAP_TASK_ENGINE_TASK_H +#define DORIS_BE_SRC_OLAP_TASK_ENGINE_TASK_H + +#include "olap/olap_common.h" +#include "olap/olap_define.h" +#include "olap/storage_engine.h" +#include "olap/tablet_manager.h" +#include "olap/txn_manager.h" +#include "util/doris_metrics.h" + +namespace doris { + +// base class for storage engine +// add "Engine" as task prefix to prevent duplicate name with agent task +class EngineTask { + +public: + // use agent_status not olap_status, because the task is very close to engine + virtual OLAPStatus prepare() { return OLAP_SUCCESS; } + virtual OLAPStatus execute() { return OLAP_SUCCESS; } + virtual OLAPStatus finish() { return OLAP_SUCCESS; } + virtual OLAPStatus cancel() { return OLAP_SUCCESS; } + virtual void get_related_tablets(vector* tablet_infos) {} +}; // EngineTask + +} // doris +#endif //DORIS_BE_SRC_OLAP_TASK_ENGINE_TASK_H diff --git a/be/src/olap/txn_manager.cpp b/be/src/olap/txn_manager.cpp new file mode 100755 index 00000000000000..fef2e98e5e6ed3 --- /dev/null +++ b/be/src/olap/txn_manager.cpp @@ -0,0 +1,474 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +#include "olap/storage_engine.h" + +#include + +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include +#include +#include + +#include "olap/base_compaction.h" +#include "olap/cumulative_compaction.h" +#include "olap/lru_cache.h" +#include "olap/tablet_meta.h" +#include "olap/tablet_meta_manager.h" +#include "olap/push_handler.h" +#include "olap/reader.h" +#include "olap/schema_change.h" +#include "olap/data_dir.h" +#include "olap/utils.h" +#include "olap/rowset/column_data_writer.h" +#include "olap/rowset/rowset_meta_manager.h" +#include "util/time.h" +#include "util/doris_metrics.h" +#include "util/pretty_printer.h" + +using apache::thrift::ThriftDebugString; +using boost::filesystem::canonical; +using boost::filesystem::directory_iterator; +using boost::filesystem::path; +using boost::filesystem::recursive_directory_iterator; +using std::back_inserter; +using std::copy; +using std::inserter; +using std::list; +using std::map; +using std::nothrow; +using std::pair; +using std::priority_queue; +using std::set; +using std::set_difference; +using std::string; +using std::stringstream; +using std::vector; + +namespace doris { + +TxnManager::TxnManager() { + for (int i = 0; i < _txn_lock_num; ++i) { + _txn_locks[i] = std::make_shared(); + } +} + +// prepare txn should always be allowed because ingest task will be retried +// could not distinguish rollup, schema change or base table, prepare txn successfully will allow +// ingest retried +OLAPStatus TxnManager::prepare_txn( + TPartitionId partition_id, TTransactionId transaction_id, + TTabletId tablet_id, SchemaHash schema_hash, TabletUid tablet_uid, + const PUniqueId& load_id) { + + pair key(partition_id, transaction_id); + TabletInfo tablet_info(tablet_id, schema_hash, tablet_uid); + WriteLock wrlock(_get_txn_lock(transaction_id)); + WriteLock txn_wrlock(&_txn_map_lock); + auto it = _txn_tablet_map.find(key); + if (it != _txn_tablet_map.end()) { + auto load_itr = it->second.find(tablet_info); + if (load_itr != it->second.end()) { + // found load for txn,tablet + // case 1: user commit rowset, then the load id must be equal + TabletTxnInfo& load_info = load_itr->second; + // check if load id is equal + if (load_info.load_id.hi() == load_id.hi() + && load_info.load_id.lo() == load_id.lo() + && load_info.rowset != nullptr) { + LOG(WARNING) << "find transaction exists when add to engine." + << "partition_id: " << key.first + << ", transaction_id: " << key.second + << ", tablet: " << tablet_info.to_string(); + return OLAP_SUCCESS; + } + } + } + // not found load id + // case 1: user start a new txn, rowset_ptr = null + // case 2: loading txn from meta env + TabletTxnInfo load_info(load_id, nullptr); + _txn_tablet_map[key][tablet_info] = load_info; + LOG(INFO) << "add transaction to engine successfully." + << "partition_id: " << key.first + << ", transaction_id: " << key.second + << ", tablet: " << tablet_info.to_string(); + return OLAP_SUCCESS; +} + +OLAPStatus TxnManager::commit_txn( + OlapMeta* meta, TPartitionId partition_id, TTransactionId transaction_id, + TTabletId tablet_id, SchemaHash schema_hash, TabletUid tablet_uid, + const PUniqueId& load_id, RowsetSharedPtr rowset_ptr, bool is_recovery) { + if (partition_id < 1 || transaction_id < 1 || tablet_id < 1) { + LOG(FATAL) << "invalid commit req " + << " partition_id=" << partition_id + << " transaction_id=" << transaction_id + << " tablet_id=" << tablet_id; + } + pair key(partition_id, transaction_id); + TabletInfo tablet_info(tablet_id, schema_hash, tablet_uid); + if (rowset_ptr == nullptr) { + LOG(WARNING) << "could not commit txn because rowset ptr is null. " + << "partition_id: " << key.first + << ", transaction_id: " << key.second + << ", tablet: " << tablet_info.to_string(); + return OLAP_ERR_ROWSET_INVALID; + } + WriteLock wrlock(_get_txn_lock(transaction_id)); + { + // get tx + ReadLock rdlock(&_txn_map_lock); + auto it = _txn_tablet_map.find(key); + if (it != _txn_tablet_map.end()) { + auto load_itr = it->second.find(tablet_info); + if (load_itr != it->second.end()) { + // found load for txn,tablet + // case 1: user commit rowset, then the load id must be equal + TabletTxnInfo& load_info = load_itr->second; + // check if load id is equal + if (load_info.load_id.hi() == load_id.hi() + && load_info.load_id.lo() == load_id.lo() + && load_info.rowset != nullptr + && load_info.rowset->rowset_id() == rowset_ptr->rowset_id()) { + // find a rowset with same rowset id, then it means a duplicate call + LOG(INFO) << "find transaction exists when add to engine." + << "partition_id: " << key.first + << ", transaction_id: " << key.second + << ", tablet: " << tablet_info.to_string() + << ", rowset_id: " << load_info.rowset->rowset_id(); + return OLAP_SUCCESS; + } else if (load_info.load_id.hi() == load_id.hi() + && load_info.load_id.lo() == load_id.lo() + && load_info.rowset != nullptr + && load_info.rowset->rowset_id() != rowset_ptr->rowset_id()) { + // find a rowset with different rowset id, then it should not happen, just return errors + LOG(WARNING) << "find transaction exists when add to engine." + << "partition_id: " << key.first + << ", transaction_id: " << key.second + << ", tablet: " << tablet_info.to_string() + << ", exist rowset_id: " << load_info.rowset->rowset_id() + << ", new rowset_id: " << rowset_ptr->rowset_id(); + return OLAP_ERR_PUSH_TRANSACTION_ALREADY_EXIST; + } + } + } + } + + // if not in recovery mode, then should persist the meta to meta env + // save meta need access disk, it maybe very slow, so that it is not in global txn lock + // it is under a single txn lock + if (!is_recovery) { + OLAPStatus save_status = RowsetMetaManager::save(meta, tablet_uid, rowset_ptr->rowset_id(), + rowset_ptr->rowset_meta().get()); + if (save_status != OLAP_SUCCESS) { + LOG(WARNING) << "save committed rowset failed. when commit txn rowset_id:" + << rowset_ptr->rowset_id() + << "tablet id: " << tablet_id + << "txn id:" << transaction_id; + return OLAP_ERR_ROWSET_SAVE_FAILED; + } + } + + { + WriteLock wrlock(&_txn_map_lock); + TabletTxnInfo load_info(load_id, rowset_ptr); + _txn_tablet_map[key][tablet_info] = load_info; + LOG(INFO) << "commit transaction to engine successfully." + << " partition_id: " << key.first + << ", transaction_id: " << key.second + << ", tablet: " << tablet_info.to_string() + << ", rowsetid: " << rowset_ptr->rowset_id(); + } + return OLAP_SUCCESS; +} + +// remove a txn from txn manager +OLAPStatus TxnManager::publish_txn(OlapMeta* meta, TPartitionId partition_id, TTransactionId transaction_id, + TTabletId tablet_id, SchemaHash schema_hash, TabletUid tablet_uid, + Version& version, VersionHash& version_hash) { + pair key(partition_id, transaction_id); + TabletInfo tablet_info(tablet_id, schema_hash, tablet_uid); + RowsetSharedPtr rowset_ptr = nullptr; + WriteLock wrlock(_get_txn_lock(transaction_id)); + { + ReadLock rlock(&_txn_map_lock); + auto it = _txn_tablet_map.find(key); + if (it != _txn_tablet_map.end()) { + auto load_itr = it->second.find(tablet_info); + if (load_itr != it->second.end()) { + // found load for txn,tablet + // case 1: user commit rowset, then the load id must be equal + TabletTxnInfo& load_info = load_itr->second; + rowset_ptr = load_info.rowset; + } + } + } + // save meta need access disk, it maybe very slow, so that it is not in global txn lock + // it is under a single txn lock + if (rowset_ptr != nullptr) { + // TODO(ygl): rowset is already set version here, memory is changed, if save failed + // it maybe a fatal error + rowset_ptr->set_version_and_version_hash(version, version_hash); + OLAPStatus save_status = RowsetMetaManager::save(meta, tablet_uid, + rowset_ptr->rowset_id(), + rowset_ptr->rowset_meta().get()); + if (save_status != OLAP_SUCCESS) { + LOG(WARNING) << "save committed rowset failed. when publish txn rowset_id:" + << rowset_ptr->rowset_id() + << ", tablet id: " << tablet_id + << ", txn id:" << transaction_id; + return OLAP_ERR_ROWSET_SAVE_FAILED; + } + } else { + return OLAP_ERR_TRANSACTION_NOT_EXIST; + } + { + WriteLock wrlock(&_txn_map_lock); + auto it = _txn_tablet_map.find(key); + if (it != _txn_tablet_map.end()) { + it->second.erase(tablet_info); + LOG(INFO) << "publish txn successfully." + << " partition_id: " << key.first + << ", txn_id: " << key.second + << ", tablet: " << tablet_info.to_string() + << ", rowsetid: " << rowset_ptr->rowset_id(); + if (it->second.empty()) { + _txn_tablet_map.erase(it); + } + } + return OLAP_SUCCESS; + } +} + +// txn could be rollbacked if it does not have related rowset +// if the txn has related rowset then could not rollback it, because it +// may be committed in another thread and our current thread meets errors when writing to data file +// BE has to wait for fe call clear txn api +OLAPStatus TxnManager::rollback_txn(TPartitionId partition_id, TTransactionId transaction_id, + TTabletId tablet_id, SchemaHash schema_hash, TabletUid tablet_uid) { + pair key(partition_id, transaction_id); + TabletInfo tablet_info(tablet_id, schema_hash, tablet_uid); + WriteLock wrlock(_get_txn_lock(transaction_id)); + WriteLock txn_wrlock(&_txn_map_lock); + auto it = _txn_tablet_map.find(key); + if (it != _txn_tablet_map.end()) { + auto load_itr = it->second.find(tablet_info); + if (load_itr != it->second.end()) { + // found load for txn,tablet + // case 1: user commit rowset, then the load id must be equal + TabletTxnInfo& load_info = load_itr->second; + if (load_info.rowset != nullptr) { + // if rowset is not null, it means other thread may commit the rowset + // should not delete txn any more + return OLAP_ERR_TRANSACTION_ALREADY_COMMITTED; + } + } + it->second.erase(tablet_info); + LOG(INFO) << "rollback transaction from engine successfully." + << " partition_id: " << key.first + << ", transaction_id: " << key.second + << ", tablet: " << tablet_info.to_string(); + if (it->second.empty()) { + _txn_tablet_map.erase(it); + } + return OLAP_SUCCESS; + } + return OLAP_SUCCESS; +} + +// fe call this api to clear unused rowsets in be +// could not delete the rowset if it already has a valid version +OLAPStatus TxnManager::delete_txn(OlapMeta* meta, TPartitionId partition_id, TTransactionId transaction_id, + TTabletId tablet_id, SchemaHash schema_hash, TabletUid tablet_uid) { + pair key(partition_id, transaction_id); + TabletInfo tablet_info(tablet_id, schema_hash, tablet_uid); + WriteLock wrlock(_get_txn_lock(transaction_id)); + WriteLock txn_wrlock(&_txn_map_lock); + auto it = _txn_tablet_map.find(key); + if (it == _txn_tablet_map.end()) { + return OLAP_ERR_TRANSACTION_NOT_EXIST; + } + auto load_itr = it->second.find(tablet_info); + if (load_itr != it->second.end()) { + // found load for txn,tablet + // case 1: user commit rowset, then the load id must be equal + TabletTxnInfo& load_info = load_itr->second; + if (load_info.rowset != nullptr && meta != nullptr) { + if (load_info.rowset->version().first > 0) { + LOG(WARNING) << "could not delete transaction from engine, " + << "just remove it from memory not delete from disk" + << " because related rowset already published." + << ",partition_id: " << key.first + << ", transaction_id: " << key.second + << ", tablet: " << tablet_info.to_string() + << ", rowset id: " << load_info.rowset->rowset_id() + << ", version: " << load_info.rowset->version().first; + return OLAP_ERR_TRANSACTION_ALREADY_VISIBLE; + } else { + RowsetMetaManager::remove(meta, tablet_uid, load_info.rowset->rowset_id()); + #ifndef BE_TEST + StorageEngine::instance()->add_unused_rowset(load_info.rowset); + #endif + LOG(INFO) << "delete transaction from engine successfully." + << " partition_id: " << key.first + << ", transaction_id: " << key.second + << ", tablet: " << tablet_info.to_string() + << ", rowset: " << (load_info.rowset != nullptr ? load_info.rowset->rowset_id(): 0); + } + } + } + it->second.erase(tablet_info); + if (it->second.empty()) { + _txn_tablet_map.erase(it); + } + return OLAP_SUCCESS; +} + +void TxnManager::get_tablet_related_txns(TTabletId tablet_id, SchemaHash schema_hash, TabletUid tablet_uid, + int64_t* partition_id, std::set* transaction_ids) { + if (partition_id == nullptr || transaction_ids == nullptr) { + LOG(WARNING) << "parameter is null when get transactions by tablet"; + return; + } + + TabletInfo tablet_info(tablet_id, schema_hash, tablet_uid); + ReadLock txn_rdlock(&_txn_map_lock); + for (auto& it : _txn_tablet_map) { + if (it.second.find(tablet_info) != it.second.end()) { + *partition_id = it.first.first; + transaction_ids->insert(it.first.second); + VLOG(3) << "find transaction on tablet." + << "partition_id: " << it.first.first + << ", transaction_id: " << it.first.second + << ", tablet: " << tablet_info.to_string(); + } + } +} + +// force drop all txns related with the tablet +// maybe lock error, because not get txn lock before remove from meta +void TxnManager::force_rollback_tablet_related_txns(OlapMeta* meta, TTabletId tablet_id, SchemaHash schema_hash, TabletUid tablet_uid) { + TabletInfo tablet_info(tablet_id, schema_hash, tablet_uid); + WriteLock txn_wrlock(&_txn_map_lock); + for (auto& it : _txn_tablet_map) { + auto load_itr = it.second.find(tablet_info); + if (load_itr != it.second.end()) { + TabletTxnInfo& load_info = load_itr->second; + if (load_info.rowset != nullptr && meta != nullptr) { + LOG(INFO) << " delete transaction from engine " + << ", tablet: " << tablet_info.to_string() + << ", rowset id: " << load_info.rowset->rowset_id(); + RowsetMetaManager::remove(meta, tablet_uid, load_info.rowset->rowset_id()); + } + LOG(INFO) << "remove tablet related txn." + << " partition_id: " << it.first.first + << ", transaction_id: " << it.first.second + << ", tablet: " << tablet_info.to_string() + << ", rowset: " << (load_info.rowset != nullptr ? load_info.rowset->rowset_id(): 0); + it.second.erase(tablet_info); + } + if (it.second.empty()) { + _txn_tablet_map.erase(it.first); + } + } +} + +void TxnManager::get_txn_related_tablets(const TTransactionId transaction_id, + TPartitionId partition_id, + std::map* tablet_infos) { + // get tablets in this transaction + pair key(partition_id, transaction_id); + ReadLock rdlock(_get_txn_lock(transaction_id)); + ReadLock txn_rdlock(&_txn_map_lock); + auto it = _txn_tablet_map.find(key); + if (it == _txn_tablet_map.end()) { + LOG(WARNING) << "could not find tablet for" + << " partition_id=" << partition_id + << ", transaction_id=" << transaction_id; + return; + } + std::map& load_info_map = it->second; + + // each tablet + for (auto& load_info : load_info_map) { + const TabletInfo& tablet_info = load_info.first; + // must not check rowset == null here, because if rowset == null + // publish version should failed + tablet_infos->emplace(tablet_info, load_info.second.rowset); + } +} + +void TxnManager::get_all_related_tablets(std::set* tablet_infos) { + ReadLock txn_rdlock(&_txn_map_lock); + for (auto& it : _txn_tablet_map) { + for (auto& tablet_load_it : it.second) { + tablet_infos->emplace(tablet_load_it.first); + } + } +} + +bool TxnManager::has_txn(TPartitionId partition_id, TTransactionId transaction_id, + TTabletId tablet_id, SchemaHash schema_hash, TabletUid tablet_uid) { + pair key(partition_id, transaction_id); + TabletInfo tablet_info(tablet_id, schema_hash, tablet_uid); + ReadLock rdlock(_get_txn_lock(transaction_id)); + ReadLock txn_rdlock(&_txn_map_lock); + auto it = _txn_tablet_map.find(key); + bool found = it != _txn_tablet_map.end() + && it->second.find(tablet_info) != it->second.end(); + + return found; +} + +bool TxnManager::get_expire_txns(TTabletId tablet_id, SchemaHash schema_hash, TabletUid tablet_uid, + std::vector* transaction_ids) { + if (transaction_ids == nullptr) { + LOG(WARNING) << "parameter is null when get_expire_txns by tablet"; + return false; + } + time_t now = time(nullptr); + TabletInfo tablet_info(tablet_id, schema_hash, tablet_uid); + ReadLock txn_rdlock(&_txn_map_lock); + for (auto& it : _txn_tablet_map) { + auto txn_info = it.second.find(tablet_info); + if (txn_info != it.second.end()) { + double diff = difftime(now, txn_info->second.creation_time); + if (diff >= config::pending_data_expire_time_sec) { + transaction_ids->push_back(it.first.second); + LOG(INFO) << "find expire pending data. " + << " tablet_id=" << tablet_id + << " schema_hash=" << schema_hash + << " tablet_uid=" << tablet_uid.to_string() + << " transaction_id=" << it.first.second + << " exist_sec=" << diff; + } + } + } + return true; +} + +} // namespace doris diff --git a/be/src/olap/txn_manager.h b/be/src/olap/txn_manager.h new file mode 100755 index 00000000000000..9928815bebadfd --- /dev/null +++ b/be/src/olap/txn_manager.h @@ -0,0 +1,136 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +#ifndef DORIS_BE_SRC_OLAP_TXN_MANAGER_H +#define DORIS_BE_SRC_OLAP_TXN_MANAGER_H + +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include + +#include "agent/status.h" +#include "common/status.h" +#include "gen_cpp/AgentService_types.h" +#include "gen_cpp/BackendService_types.h" +#include "gen_cpp/MasterService_types.h" +#include "olap/atomic.h" +#include "olap/lru_cache.h" +#include "olap/olap_common.h" +#include "olap/olap_define.h" +#include "olap/tablet.h" +#include "olap/olap_meta.h" +#include "olap/options.h" +#include "olap/rowset/rowset.h" +#include "olap/rowset/rowset_meta.h" + +namespace doris { + +struct TabletTxnInfo { + PUniqueId load_id; + RowsetSharedPtr rowset; + int64_t creation_time; + + TabletTxnInfo( + PUniqueId load_id, + RowsetSharedPtr rowset) : + load_id(load_id), + rowset(rowset), + creation_time(time(nullptr)) {} + + TabletTxnInfo() {} +}; + +// txn manager is used to manage mapping between tablet and txns +class TxnManager { +public: + TxnManager(); + + ~TxnManager() { + _txn_tablet_map.clear(); + _txn_locks.clear(); + } + // add a txn to manager + // partition id is useful in publish version stage because version is associated with partition + OLAPStatus prepare_txn(TPartitionId partition_id, TTransactionId transaction_id, + TTabletId tablet_id, SchemaHash schema_hash, TabletUid tablet_uid, + const PUniqueId& load_id); + + OLAPStatus commit_txn(OlapMeta* meta, TPartitionId partition_id, TTransactionId transaction_id, + TTabletId tablet_id, SchemaHash schema_hash, TabletUid tablet_uid, + const PUniqueId& load_id, RowsetSharedPtr rowset_ptr, + bool is_recovery); + + // remove a txn from txn manager + // not persist rowset meta because + OLAPStatus publish_txn(OlapMeta* meta, TPartitionId partition_id, TTransactionId transaction_id, + TTabletId tablet_id, SchemaHash schema_hash, TabletUid tablet_uid, + Version& version, VersionHash& version_hash); + + // delete the txn from manager if it is not committed(not have a valid rowset) + OLAPStatus rollback_txn(TPartitionId partition_id, TTransactionId transaction_id, + TTabletId tablet_id, SchemaHash schema_hash, TabletUid tablet_uid); + + // remove the txn from txn manager + // delete the related rowset if it is not null + // delete rowset related data if it is not null + OLAPStatus delete_txn(OlapMeta* meta, TPartitionId partition_id, TTransactionId transaction_id, + TTabletId tablet_id, SchemaHash schema_hash, TabletUid tablet_uid); + + void get_tablet_related_txns(TTabletId tablet_id, SchemaHash schema_hash, TabletUid tablet_uid, int64_t* partition_id, + std::set* transaction_ids); + + void get_txn_related_tablets(const TTransactionId transaction_id, + TPartitionId partition_ids, + std::map* tablet_infos); + + void get_all_related_tablets(std::set* tablet_infos); + + // just check if the txn exists + bool has_txn(TPartitionId partition_id, TTransactionId transaction_id, + TTabletId tablet_id, SchemaHash schema_hash, TabletUid tablet_uid); + + bool get_expire_txns(TTabletId tablet_id, SchemaHash schema_hash, TabletUid tablet_uid, std::vector* transaction_ids); + + void force_rollback_tablet_related_txns(OlapMeta* meta, TTabletId tablet_id, SchemaHash schema_hash, TabletUid tablet_uid); + +private: + RWMutex* _get_txn_lock(TTransactionId txn_id) { + return _txn_locks[txn_id % _txn_lock_num].get(); + } + +private: + RWMutex _txn_map_lock; + using TxnKey = std::pair; // partition_id, transaction_id; + std::map> _txn_tablet_map; + + const int32_t _txn_lock_num = 100; + std::map> _txn_locks; + + DISALLOW_COPY_AND_ASSIGN(TxnManager); +}; // TxnManager + +} +#endif // DORIS_BE_SRC_OLAP_TXN_MANAGER_H diff --git a/be/src/olap/types.cpp b/be/src/olap/types.cpp index 86706e065b0a8b..53636a6b4eacb7 100644 --- a/be/src/olap/types.cpp +++ b/be/src/olap/types.cpp @@ -79,7 +79,7 @@ TypeInfoResolver::TypeInfoResolver() { TypeInfoResolver::~TypeInfoResolver() {} TypeInfo* get_type_info(FieldType field_type) { - return TypeInfoResolver::get_instance()->get_type_info(field_type); + return TypeInfoResolver::instance()->get_type_info(field_type); } } // namespace doris diff --git a/be/src/olap/utils.cpp b/be/src/olap/utils.cpp index d992e3f21c7135..b6e3432f18bfc5 100644 --- a/be/src/olap/utils.cpp +++ b/be/src/olap/utils.cpp @@ -175,83 +175,6 @@ using std::vector; } while (0) namespace doris { -__thread char OLAPNoticeLog::_buf[BUF_SIZE]; // buffer instance -__thread int OLAPNoticeLog::_len = 0; // len instance - -void OLAPNoticeLog::push(const char *key, const char *fmt, ...) { - int size_left = BUF_SIZE - _len; - - va_list args; - va_start(args, fmt); - - int len = snprintf(_buf + _len, size_left, " %s:", key); - _len = len >= size_left ? BUF_SIZE - 1 : _len + len; - if (len < size_left) { - size_left = BUF_SIZE - _len; - len = vsnprintf(_buf + _len, size_left, fmt, args); - - //如果len>=BUF_SIZE, 说明有截断,但返回的长度是期望的长度 - _len = len >= size_left ? BUF_SIZE - 1 : _len + len; - } - - va_end(args); -} - -void OLAPNoticeLog::log(const char *msg) { - // do nothing -} - -__thread uint64_t OLAPNoticeInfo::_seek_count = 0; -__thread uint64_t OLAPNoticeInfo::_seek_time_us = 0; -__thread uint64_t OLAPNoticeInfo::_scan_rows = 0; -__thread uint64_t OLAPNoticeInfo::_filter_rows = 0; - -void OLAPNoticeInfo::add_seek_count() { - ++_seek_count; -} - -void OLAPNoticeInfo::add_scan_rows(uint64_t rows) { - _scan_rows += rows; -} - -void OLAPNoticeInfo::add_filter_rows(uint64_t rows) { - _filter_rows += rows; -} - -void OLAPNoticeInfo::add_seek_time_us(uint64_t time_us) { - _seek_time_us += time_us; -} - -uint64_t OLAPNoticeInfo::seek_count() { - return _seek_count; -} - -uint64_t OLAPNoticeInfo::seek_time_us() { - return _seek_time_us; -} - -uint64_t OLAPNoticeInfo::avg_seek_time_us() { - if (0 == _seek_count) { - return 0; - } - - return _seek_time_us / _seek_count; -} - -uint64_t OLAPNoticeInfo::scan_rows() { - return _scan_rows; -} - -uint64_t OLAPNoticeInfo::filter_rows() { - return _filter_rows; -} - -void OLAPNoticeInfo::clear() { - _seek_count = 0; - _seek_time_us = 0; - _scan_rows = 0; - _filter_rows = 0; -} OLAPStatus olap_compress(const char* src_buf, size_t src_len, @@ -1299,10 +1222,8 @@ bool check_dir_existed(const string& path) { try { if (boost::filesystem::exists(p)) { - VLOG(3) << "dir already existed. [path='" << path << "']"; return true; } else { - VLOG(3) << "dir does not existed. [path='" << path << "']"; return false; } } catch (...) { @@ -1412,7 +1333,8 @@ OLAPStatus copy_dir(const string &src_dir, const string &dst_dir) { return OLAP_SUCCESS; } -void remove_files(const vector& files) { +OLAPStatus remove_files(const vector& files) { + OLAPStatus res = OLAP_SUCCESS; for (const string& file : files) { boost::filesystem::path file_path(file); @@ -1422,11 +1344,13 @@ void remove_files(const vector& files) { } else { OLAP_LOG_WARNING("failed to remove file. [file=%s errno=%d]", file.c_str(), Errno::no()); + res = OLAP_ERR_IO_ERROR; } } catch (...) { // do nothing } } + return res; } // failed when there are files or dirs under thr dir @@ -1435,7 +1359,6 @@ OLAPStatus remove_dir(const string& path) { try { if (boost::filesystem::remove(p)) { - VLOG(3) << "success to del dir. [path='" << path << "']"; return OLAP_SUCCESS; } } catch (...) { @@ -1471,7 +1394,6 @@ OLAPStatus remove_all_dir(const string& path) { try { if (boost::filesystem::remove_all(p)) { - VLOG(3) << "success to del all dir. [path='" << path << "']"; return OLAP_SUCCESS; } } catch (...) { @@ -1527,17 +1449,15 @@ OLAPStatus dir_walk(const string& root, // 检查找到的目录项是文件还是目录 string tmp_ent = root + '/' + direntp->d_name; if (lstat(tmp_ent.c_str(), &stat_data) < 0) { - OLAP_LOG_WARNING("lstat error."); + LOG(WARNING) << "lstat error."; continue; } if (S_ISDIR(stat_data.st_mode)) { - VLOG(3) << "find dir. d_name=" << direntp->d_name; if (NULL != dirs) { dirs->insert(direntp->d_name); } } else { - VLOG(3) << "find file. d_name=" << direntp->d_name; if (NULL != files) { files->insert(direntp->d_name); } diff --git a/be/src/olap/utils.h b/be/src/olap/utils.h index b2f0290dba6d70..002620817d41cb 100644 --- a/be/src/olap/utils.h +++ b/be/src/olap/utils.h @@ -41,6 +41,8 @@ #include "olap/olap_common.h" #include "olap/olap_define.h" +#define TRY_LOCK true + namespace doris { void write_log_info(char* buf, size_t buf_len, const char* fmt, ...); @@ -68,6 +70,10 @@ class OlapStopWatch { (now.tv_usec - _begin_time.tv_usec)); } + double get_elapse_second() { + return get_elapse_time_us() / 100000.0; + } + void reset() { gettimeofday(&_begin_time, 0); } @@ -80,45 +86,6 @@ class OlapStopWatch { struct timeval _begin_time; // 起始时间戳 }; -// 解决notice log buffer不够长的问题, 原生的notice log的buffer只有2048大小 -class OLAPNoticeLog { -public: - static void push(const char* key, const char* fmt, ...) \ - __attribute__((__format__(__printf__, 2, 3))); - - static void log(const char* msg); - -private: - static const int BUF_SIZE = 128 * 1024; // buffer大小 - static __thread char _buf[BUF_SIZE]; - static __thread int _len; -}; - -// 用于在notice log中输出索引定位次数以及平均定位时间 -// 如果还需要在notice log中输出需要聚合计算的其他信息,可以参考这个来实现。 -class OLAPNoticeInfo { -public: - static void add_seek_count(); - static void add_seek_time_us(uint64_t time_us); - static void add_scan_rows(uint64_t rows); - static void add_filter_rows(uint64_t rows); - static uint64_t seek_count(); - static uint64_t seek_time_us(); - static uint64_t avg_seek_time_us(); - static uint64_t scan_rows(); - static uint64_t filter_rows(); - static void clear(); - -private: - static __thread uint64_t _seek_count; - static __thread uint64_t _seek_time_us; - static __thread uint64_t _scan_rows; - static __thread uint64_t _filter_rows; -}; - -#define OLAP_LOG_NOTICE_SOCK(message) OLAPNoticeLog::log(message) -#define OLAP_LOG_NOTICE_PUSH(key, fmt, arg...) OLAPNoticeLog::push(key, fmt, ##arg) - // @brief 切分字符串 // @param base 原串 // @param separator 分隔符 @@ -280,14 +247,26 @@ class RWMutex { // class ReadLock { public: - explicit ReadLock(RWMutex* mutex) - : _mutex(mutex) { - this->_mutex->rdlock(); + explicit ReadLock(RWMutex* mutex, bool try_lock = false) + : _mutex(mutex), locked(false) { + if (try_lock) { + locked = this->_mutex->tryrdlock() == OLAP_SUCCESS; + } else { + this->_mutex->rdlock(); + locked = true; + } + } + ~ReadLock() { + if (locked) { + this->_mutex->unlock(); + } } - ~ReadLock() { this->_mutex->unlock(); } + + bool own_lock() { return locked; } private: RWMutex* _mutex; + bool locked; DISALLOW_COPY_AND_ASSIGN(ReadLock); }; @@ -298,14 +277,26 @@ class ReadLock { // class WriteLock { public: - explicit WriteLock(RWMutex* mutex) - : _mutex(mutex) { - this->_mutex->wrlock(); + explicit WriteLock(RWMutex* mutex, bool try_lock = false) + : _mutex(mutex), locked(false) { + if (try_lock) { + locked = this->_mutex->trywrlock() == OLAP_SUCCESS; + } else { + this->_mutex->wrlock(); + locked = true; + } + } + ~WriteLock() { + if (locked) { + this->_mutex->unlock(); + } } - ~WriteLock() { this->_mutex->unlock(); } + + bool own_lock() { return locked; } private: RWMutex* _mutex; + bool locked; DISALLOW_COPY_AND_ASSIGN(WriteLock); }; @@ -391,7 +382,7 @@ OLAPStatus create_dirs(const std::string& path); OLAPStatus copy_dir(const std::string &src_dir, const std::string &dst_dir); -void remove_files(const std::vector& files); +OLAPStatus remove_files(const std::vector& files); OLAPStatus remove_dir(const std::string& path); diff --git a/be/src/olap/wrapper_field.cpp b/be/src/olap/wrapper_field.cpp index 6df628dc17a637..1f1af0e627a334 100644 --- a/be/src/olap/wrapper_field.cpp +++ b/be/src/olap/wrapper_field.cpp @@ -19,30 +19,28 @@ namespace doris { -WrapperField* WrapperField::create(const FieldInfo& info, uint32_t len) { +WrapperField* WrapperField::create(const TabletColumn& column, uint32_t len) { bool is_string_type = - (info.type == OLAP_FIELD_TYPE_CHAR - || info.type == OLAP_FIELD_TYPE_VARCHAR - || info.type == OLAP_FIELD_TYPE_HLL); + (column.type() == OLAP_FIELD_TYPE_CHAR || column.type() == OLAP_FIELD_TYPE_VARCHAR); if (is_string_type && len > OLAP_STRING_MAX_LENGTH) { OLAP_LOG_WARNING("length of string parameter is too long[len=%lu, max_len=%lu].", len, OLAP_STRING_MAX_LENGTH); return nullptr; } - Field* rep = Field::create(info); + Field* rep = Field::create(column); if (rep == nullptr) { return nullptr; } size_t variable_len = 0; - if (info.type == OLAP_FIELD_TYPE_CHAR) { - variable_len = std::max(len, info.length); - } else if (info.type == OLAP_FIELD_TYPE_VARCHAR || info.type == OLAP_FIELD_TYPE_HLL) { + if (column.type() == OLAP_FIELD_TYPE_CHAR) { + variable_len = std::max(len, (uint32_t)(column.length())); + } else if (column.type() == OLAP_FIELD_TYPE_VARCHAR) { variable_len = std::max(len, - static_cast(info.length - sizeof(StringLengthType))); + static_cast(column.length() - sizeof(StringLengthType))); } else { - variable_len = info.length; + variable_len = column.length(); } WrapperField* wrapper = new WrapperField(rep, variable_len, is_string_type); diff --git a/be/src/olap/wrapper_field.h b/be/src/olap/wrapper_field.h index e1d2c382a54ab7..d88459fe8509f6 100644 --- a/be/src/olap/wrapper_field.h +++ b/be/src/olap/wrapper_field.h @@ -20,13 +20,14 @@ #include "olap/field.h" #include "olap/olap_define.h" +#include "olap/tablet_schema.h" #include "util/hash_util.hpp" namespace doris { class WrapperField { public: - static WrapperField* create(const FieldInfo& info, uint32_t len = 0); + static WrapperField* create(const TabletColumn& column, uint32_t len = 0); static WrapperField* create_by_type(const FieldType& type); WrapperField(Field* rep, size_t variable_len, bool is_string_type); diff --git a/be/src/runtime/exec_env.h b/be/src/runtime/exec_env.h index 9595a7d4a7c92e..86667cde9f7cd1 100644 --- a/be/src/runtime/exec_env.h +++ b/be/src/runtime/exec_env.h @@ -37,7 +37,7 @@ class LoadPathMgr; class LoadStreamMgr; class MemTracker; class MetricRegistry; -class OLAPEngine; +class StorageEngine; class PoolMemTrackerRegistry; class PriorityThreadPool; class PullLoadTaskMgr; @@ -117,8 +117,8 @@ class ExecEnv { const std::vector& store_paths() const { return _store_paths; } void set_store_paths(const std::vector& paths) { _store_paths = paths; } - OLAPEngine* olap_engine() { return _olap_engine; } - void set_olap_engine(OLAPEngine* olap_engine) { _olap_engine = olap_engine; } + StorageEngine* storage_engine() { return _storage_engine; } + void set_storage_engine(StorageEngine* storage_engine) { _storage_engine = storage_engine; } StreamLoadExecutor* stream_load_executor() { return _stream_load_executor; } RoutineLoadTaskExecutor* routine_load_task_executor() { return _routine_load_task_executor; } @@ -165,7 +165,7 @@ class ExecEnv { ReservationTracker* _buffer_reservation = nullptr; BufferPool* _buffer_pool = nullptr; - OLAPEngine* _olap_engine = nullptr; + StorageEngine* _storage_engine = nullptr; StreamLoadExecutor* _stream_load_executor = nullptr; RoutineLoadTaskExecutor* _routine_load_task_executor = nullptr; diff --git a/be/src/runtime/exec_env_init.cpp b/be/src/runtime/exec_env_init.cpp index 000a95c83d4fa5..2fc4563d57ffd8 100644 --- a/be/src/runtime/exec_env_init.cpp +++ b/be/src/runtime/exec_env_init.cpp @@ -38,7 +38,7 @@ #include "util/parse_util.h" #include "util/mem_info.h" #include "util/debug_util.h" -#include "olap/olap_engine.h" +#include "olap/storage_engine.h" #include "util/network_util.h" #include "util/bfd_parser.h" #include "runtime/etl_job_mgr.h" diff --git a/be/src/runtime/load_path_mgr.cpp b/be/src/runtime/load_path_mgr.cpp index d928819b11c7e6..b992c928130422 100644 --- a/be/src/runtime/load_path_mgr.cpp +++ b/be/src/runtime/load_path_mgr.cpp @@ -25,7 +25,7 @@ #include #include "olap/olap_define.h" -#include "olap/olap_engine.h" +#include "olap/storage_engine.h" #include "util/file_utils.h" #include "gen_cpp/Types_types.h" #include "runtime/exec_env.h" diff --git a/be/src/runtime/result_buffer_mgr.cpp b/be/src/runtime/result_buffer_mgr.cpp index 239bcac0fcdaf2..c37baa5d0531ec 100644 --- a/be/src/runtime/result_buffer_mgr.cpp +++ b/be/src/runtime/result_buffer_mgr.cpp @@ -25,11 +25,11 @@ namespace doris { -std::size_t hash_value(const TUniqueId& fragment_id) { - uint32_t value = RawValue::get_hash_value(&fragment_id.lo, TypeDescriptor(TYPE_BIGINT), 0); - value = RawValue::get_hash_value(&fragment_id.hi, TypeDescriptor(TYPE_BIGINT), value); - return value; -} +//std::size_t hash_value(const TUniqueId& fragment_id) { +// uint32_t value = RawValue::get_hash_value(&fragment_id.lo, TypeDescriptor(TYPE_BIGINT), 0); +// value = RawValue::get_hash_value(&fragment_id.hi, TypeDescriptor(TYPE_BIGINT), value); +// return value; +//} ResultBufferMgr::ResultBufferMgr() : _is_stop(false) { diff --git a/be/src/runtime/result_buffer_mgr.h b/be/src/runtime/result_buffer_mgr.h index 9e6106aa0e1934..b0c974bf42b43d 100644 --- a/be/src/runtime/result_buffer_mgr.h +++ b/be/src/runtime/result_buffer_mgr.h @@ -27,6 +27,7 @@ #include #include "common/status.h" #include "gen_cpp/Types_types.h" +#include "util/uid_util.h" namespace doris { @@ -59,7 +60,7 @@ class ResultBufferMgr { Status cancel_at_time(time_t cancel_time, const TUniqueId& query_id); private: - typedef boost::unordered_map > BufferMap; + typedef boost::unordered_map> BufferMap; typedef std::map > TimeoutMap; boost::shared_ptr find_control_block(const TUniqueId& query_id); diff --git a/be/src/runtime/snapshot_loader.cpp b/be/src/runtime/snapshot_loader.cpp index a3d11b665e76e4..f3b48822648195 100644 --- a/be/src/runtime/snapshot_loader.cpp +++ b/be/src/runtime/snapshot_loader.cpp @@ -29,8 +29,9 @@ #include "exec/broker_reader.h" #include "exec/broker_writer.h" #include "olap/file_helper.h" -#include "olap/olap_engine.h" -#include "olap/olap_table.h" +#include "olap/snapshot_manager.h" +#include "olap/storage_engine.h" +#include "olap/tablet.h" #include "runtime/exec_env.h" #include "runtime/broker_mgr.h" #include "util/file_utils.h" @@ -488,10 +489,10 @@ Status SnapshotLoader::download( // MUST hold tablet's header lock, push lock, cumulative lock and base compaction lock Status SnapshotLoader::move( const std::string& snapshot_path, - const std::string& tablet_path, - const std::string& store_path, + TabletSharedPtr tablet, bool overwrite) { - + std::string tablet_path = tablet->tablet_path(); + std::string store_path = tablet->data_dir()->path(); LOG(INFO) << "begin to move snapshot files. from: " << snapshot_path << ", to: " << tablet_path << ", store: " << store_path << ", job: " << _job_id @@ -519,6 +520,15 @@ Status SnapshotLoader::move( return Status::InternalError(ss.str()); } + + DataDir* store = StorageEngine::instance()->get_store(store_path); + if (store == nullptr) { + std::stringstream ss; + ss << "failed to get store by path: " << store_path; + LOG(WARNING) << ss.str(); + return Status::InternalError(ss.str()); + } + boost::filesystem::path tablet_dir(tablet_path); boost::filesystem::path snapshot_dir(snapshot_path); if (!boost::filesystem::exists(tablet_dir)) { @@ -535,22 +545,21 @@ Status SnapshotLoader::move( return Status::InternalError(ss.str()); } + // rename the rowset ids and tabletid info in rowset meta + OLAPStatus convert_status = SnapshotManager::instance()->convert_rowset_ids(*store, + snapshot_path, tablet_id, schema_hash, tablet); + if (convert_status != OLAP_SUCCESS) { + std::stringstream ss; + ss << "failed to convert rowsetids in snapshot: " << snapshot_path + << ", tablet path: " << tablet_path; + LOG(WARNING) << ss.str(); + return Status::InternalError(ss.str()); + } + if (overwrite) { std::vector snapshot_files; RETURN_IF_ERROR(_get_existing_files_from_local(snapshot_path, &snapshot_files)); - // 0. check all existing tablet files, revoke file if it is in GC queue - std::vector tablet_files; - RETURN_IF_ERROR(_get_existing_files_from_local(tablet_path, &tablet_files)); - std::vector files_to_check; - for (auto& snapshot_file : snapshot_files) { - if (std::find(tablet_files.begin(), tablet_files.end(), snapshot_file) != tablet_files.end()) { - std::string file_path = tablet_path + "/" + snapshot_file; - files_to_check.emplace_back(std::move(file_path)); - } - } - OLAPEngine::get_instance()->revoke_files_from_gc(files_to_check); - // 1. simply delete the old dir and replace it with the snapshot dir try { // This remove seems saft enough, because we already get @@ -590,141 +599,22 @@ Status SnapshotLoader::move( } } else { - // This is not a overwrite move - // The files in tablet dir should be like this: - // - // 10001.hdr - // 10001_0_70_3286516299297662422_0.idx - // 10001_0_70_3286516299297662422_0.dat - // 10001_71_71_4684061214850851594_0.idx - // 10001_71_71_4684061214850851594_0.dat - // ... - // - // 0-70 version is supposed to be the placeholder version - // - // The files in snapshot dir should be like this: - // 10001.hdr - // 10001_0_40_4684061214850851594_0.idx - // 10001_0_40_4684061214850851594_0.dat - // 10001_41_68_1097018054900466785_0.idx - // 10001_41_68_1097018054900466785_0.dat - // 10001_69_69_8126494056407230455_0.idx - // 10001_69_69_8126494056407230455_0.dat - // 10001_70_70_6330898043876688539_0.idx - // 10001_70_70_6330898043876688539_0.dat - // 10001_71_71_0_0.idx - // 10001_71_71_0_0.dat - // - // 71-71 may be exist as the palceholder version - // - // We need to move 0-70 version files from snapshot dir to - // replace the 0-70 placeholder version in tablet dir. - // than we merge the 2 .hdr file before reloading it. - - // load header in tablet dir to get the base vesion - OLAPTablePtr tablet = OLAPEngine::get_instance()->get_table( - tablet_id, schema_hash); - if (tablet.get() == NULL) { - std::stringstream ss; - ss << "failed to get tablet: " << tablet_id << ", schema hash: " - << schema_hash; - LOG(WARNING) << ss.str(); - return Status::InternalError(ss.str()); - } - // get base version - tablet->obtain_header_rdlock(); - const PDelta* base_version = tablet->base_version(); - tablet->release_header_lock(); - if (base_version == nullptr) { - std::stringstream ss; - ss << "failed to get base version of tablet: " << tablet_id; - LOG(WARNING) << ss.str(); - return Status::InternalError(ss.str()); - } - - int32_t end_version = base_version->end_version(); - - // load snapshot tablet - std::stringstream hdr; - hdr << snapshot_path << "/" << tablet_id << ".hdr"; - std::string snapshot_header_file = hdr.str(); - - OLAPHeader snapshot_header(snapshot_header_file); - OLAPStatus ost = snapshot_header.load_and_init(); - if (ost != OLAP_SUCCESS) { - LOG(WARNING) << "failed to load snapshot header: " << snapshot_header_file; - return Status::InternalError("failed to load snapshot header: " + snapshot_header_file); - } - - LOG(INFO) << "begin to move snapshot files from version 0 to " - << end_version << ", tablet id: " << tablet_id; - - // begin to move - try { - // delete the placeholder version in tablet dir - std::string dummy; - std::string place_holder_idx; - _assemble_file_name("", tablet_path, tablet_id, - 0, end_version, - base_version->version_hash(), 0, ".idx", - &dummy, &place_holder_idx); - boost::filesystem::remove(place_holder_idx); - - std::string place_holder_dat; - _assemble_file_name("", tablet_path, tablet_id, - 0, end_version, - base_version->version_hash(), 0, ".dat", - &dummy, &place_holder_idx); - boost::filesystem::remove(place_holder_dat); - - // copy files - int version_size = snapshot_header.file_version_size(); - for (int i = 0; i < version_size; ++i) { - const FileVersionMessage& version = snapshot_header.file_version(i); - if (version.start_version() > end_version) { - continue; - } - int seg_num = version.num_segments(); - for (int j = 0; j < seg_num; i++) { - // idx - std::string idx_from; - std::string idx_to; - _assemble_file_name(snapshot_path, tablet_path, tablet_id, - version.start_version(), version.end_version(), - version.version_hash(), j, ".idx", - &idx_from, &idx_to); - - boost::filesystem::copy_file(idx_from, idx_to); - - // dat - std::string dat_from; - std::string dat_to; - _assemble_file_name(snapshot_path, tablet_path, tablet_id, - version.start_version(), version.end_version(), - version.version_hash(), j, ".dat", - &dat_from, &dat_to); - boost::filesystem::copy_file(dat_from, dat_to); - } - } - } catch (const boost::filesystem::filesystem_error& e) { - std::stringstream ss; - ss << "failed to move tablet path: " << tablet_path - << ". err: " << e.what(); - LOG(WARNING) << ss.str(); - return Status::InternalError(ss.str()); - } + LOG(FATAL) << "only support overwrite now"; + } - // merge 2 headers - ost = tablet->merge_header(snapshot_header, end_version); - if (ost != OLAP_SUCCESS) { - std::stringstream ss; - ss << "failed to move tablet path: " << tablet_path; - LOG(WARNING) << ss.str(); - return Status::InternalError(ss.str()); - } + // snapshot loader not need to change tablet uid + // fixme: there is no header now and can not call load_one_tablet here + // reload header + OLAPStatus ost = StorageEngine::instance()->tablet_manager()->load_tablet_from_dir( + store, tablet_id, schema_hash, tablet_path, true); + if (ost != OLAP_SUCCESS) { + std::stringstream ss; + ss << "failed to reload header of tablet: " << tablet_id; + LOG(WARNING) << ss.str(); + return Status::InternalError(ss.str()); } + LOG(INFO) << "finished to reload header of tablet: " << tablet_id; - LOG(INFO) << "finished to move tablet: " << tablet_id; return status; } @@ -957,15 +847,7 @@ Status SnapshotLoader::_replace_tablet_id( return Status::OK(); } else if (_end_with(file_name, ".idx") || _end_with(file_name, ".dat")) { - size_t pos = file_name.find_first_of("_"); - if (pos == std::string::npos) { - return Status::InternalError("invalid tablet file name: " + file_name); - } - - std::string suffix_part = file_name.substr(pos); - std::stringstream ss; - ss << tablet_id << suffix_part; - *new_file_name = ss.str(); + *new_file_name = file_name; return Status::OK(); } else { return Status::InternalError("invalid tablet file name: " + file_name); diff --git a/be/src/runtime/snapshot_loader.h b/be/src/runtime/snapshot_loader.h index e9aea53c34322f..9c046f87f85a76 100644 --- a/be/src/runtime/snapshot_loader.h +++ b/be/src/runtime/snapshot_loader.h @@ -27,6 +27,7 @@ #include "gen_cpp/Types_types.h" #include "common/status.h" +#include "olap/tablet.h" #include "runtime/client_cache.h" namespace doris { @@ -79,8 +80,7 @@ class SnapshotLoader { Status move( const std::string& snapshot_path, - const std::string& tablet_path, - const std::string& store_path, + TabletSharedPtr tablet, bool overwrite); private: diff --git a/be/src/runtime/tablet_writer_mgr.cpp b/be/src/runtime/tablet_writer_mgr.cpp index 498a61abdc21cf..2224018fc1c210 100644 --- a/be/src/runtime/tablet_writer_mgr.cpp +++ b/be/src/runtime/tablet_writer_mgr.cpp @@ -22,7 +22,7 @@ #include #include "common/object_pool.h" -#include "exec/olap_table_info.h" +#include "exec/tablet_info.h" #include "runtime/descriptors.h" #include "runtime/mem_tracker.h" #include "runtime/row_batch.h" @@ -219,7 +219,7 @@ Status TabletsChannel::_open_all_writers(const PTabletWriterOpenRequest& params) request.tablet_id = tablet.tablet_id(); request.schema_hash = schema_hash; request.write_type = LOAD; - request.transaction_id = _txn_id; + request.txn_id = _txn_id; request.partition_id = tablet.partition_id(); request.load_id = params.id(); request.need_gen_rollup = params.need_gen_rollup(); @@ -229,7 +229,7 @@ Status TabletsChannel::_open_all_writers(const PTabletWriterOpenRequest& params) auto st = DeltaWriter::open(&request, &writer); if (st != OLAP_SUCCESS) { LOG(WARNING) << "open delta writer failed, tablet_id=" << tablet.tablet_id() - << ", transaction_id=" << _txn_id + << ", txn_id=" << _txn_id << ", partition_id=" << tablet.partition_id() << ", status=" << st; return Status::InternalError("open tablet writer failed"); diff --git a/be/src/runtime/tmp_file_mgr.cc b/be/src/runtime/tmp_file_mgr.cc index 1c6cfc9ae3ff55..14d881edd358df 100644 --- a/be/src/runtime/tmp_file_mgr.cc +++ b/be/src/runtime/tmp_file_mgr.cc @@ -27,8 +27,9 @@ // #include // #include -#include "olap/olap_engine.h" +#include "olap/storage_engine.h" #include "util/uid_util.h" +#include "util/debug_util.h" #include "util/disk_info.h" #include "util/filesystem_util.h" #include "runtime/exec_env.h" diff --git a/be/src/runtime/vectorized_row_batch.cpp b/be/src/runtime/vectorized_row_batch.cpp index 80b4e0fd1b6217..8ffa0272420e02 100644 --- a/be/src/runtime/vectorized_row_batch.cpp +++ b/be/src/runtime/vectorized_row_batch.cpp @@ -23,7 +23,7 @@ namespace doris { VectorizedRowBatch::VectorizedRowBatch( - const std::vector& schema, + const TabletSchema* schema, const std::vector& cols, int capacity) : _schema(schema), _cols(cols), _capacity(capacity), _limit(capacity) { @@ -35,7 +35,7 @@ VectorizedRowBatch::VectorizedRowBatch( _selected = reinterpret_cast(new char[sizeof(uint16_t) * _capacity]); - _col_vectors.resize(schema.size(), nullptr); + _col_vectors.resize(schema->num_columns(), nullptr); for (ColumnId column_id : cols) { _col_vectors[column_id] = new ColumnVector(); } @@ -52,14 +52,14 @@ void VectorizedRowBatch::dump_to_row_block(RowBlock* row_block) { // pointer of this field in row block char* row_field_ptr = row_block->_mem_buf + row_block->_field_offset_in_memory[column_id]; - const FieldInfo& field_info = _schema[column_id]; + const TabletColumn& column = _schema->column(column_id); size_t field_size = 0; - if (field_info.type == OLAP_FIELD_TYPE_CHAR || - field_info.type == OLAP_FIELD_TYPE_VARCHAR || - field_info.type == OLAP_FIELD_TYPE_HLL) { + if (column.type() == OLAP_FIELD_TYPE_CHAR || + column.type() == OLAP_FIELD_TYPE_VARCHAR || + column.type() == OLAP_FIELD_TYPE_HLL) { field_size = sizeof(Slice); } else { - field_size = field_info.length; + field_size = column.length(); } if (no_nulls) { for (int row = 0; row < _size; ++row) { @@ -97,15 +97,15 @@ void VectorizedRowBatch::dump_to_row_block(RowBlock* row_block) { char* vec_field_ptr = (char*)col_vec->col_data(); char* row_field_ptr = row_block->_mem_buf + row_block->_field_offset_in_memory[column_id]; - const FieldInfo& field_info = _schema[column_id]; + const TabletColumn& column = _schema->column(column_id); size_t field_size = 0; - if (field_info.type == OLAP_FIELD_TYPE_CHAR || - field_info.type == OLAP_FIELD_TYPE_VARCHAR || - field_info.type == OLAP_FIELD_TYPE_HLL) { + if (column.type() == OLAP_FIELD_TYPE_CHAR || + column.type() == OLAP_FIELD_TYPE_VARCHAR || + column.type() == OLAP_FIELD_TYPE_HLL) { field_size = sizeof(Slice); } else { - field_size = field_info.length; + field_size = column.length(); } if (no_nulls) { diff --git a/be/src/runtime/vectorized_row_batch.h b/be/src/runtime/vectorized_row_batch.h index cdfe664cc291bb..d4959a67353f23 100644 --- a/be/src/runtime/vectorized_row_batch.h +++ b/be/src/runtime/vectorized_row_batch.h @@ -73,7 +73,7 @@ class ColumnVector { class VectorizedRowBatch { public: VectorizedRowBatch( - const std::vector& schema, + const TabletSchema* schema, const std::vector& cols, int capacity); @@ -139,7 +139,7 @@ class VectorizedRowBatch { void dump_to_row_block(RowBlock* row_block); private: - const std::vector& _schema; + const TabletSchema* _schema; const std::vector& _cols; const uint16_t _capacity; uint16_t _size = 0; diff --git a/be/src/service/backend_service.cpp b/be/src/service/backend_service.cpp index b5d6659b48b638..d2e93428829f71 100644 --- a/be/src/service/backend_service.cpp +++ b/be/src/service/backend_service.cpp @@ -22,7 +22,7 @@ #include #include -#include "olap/olap_engine.h" +#include "olap/storage_engine.h" #include "service/backend_options.h" #include "util/network_util.h" #include "util/thrift_util.h" @@ -226,7 +226,7 @@ void BackendService::erase_export_task(TStatus& t_status, const TUniqueId& task_ } void BackendService::get_tablet_stat(TTabletStatResult& result) { - OLAPEngine::get_instance()->get_tablet_stat(result); + StorageEngine::instance()->tablet_manager()->get_tablet_stat(result); } void BackendService::submit_routine_load_task( diff --git a/be/src/service/doris_main.cpp b/be/src/service/doris_main.cpp index 9ad546fd91db89..c8a22e8fad104d 100644 --- a/be/src/service/doris_main.cpp +++ b/be/src/service/doris_main.cpp @@ -153,17 +153,17 @@ int main(int argc, char** argv) { // options doris::EngineOptions options; options.store_paths = paths; - doris::OLAPEngine* engine = nullptr; - auto st = doris::OLAPEngine::open(options, &engine); + doris::StorageEngine* engine = nullptr; + auto st = doris::StorageEngine::open(options, &engine); if (!st.ok()) { - LOG(FATAL) << "fail to open OLAPEngine, res=" << st.get_error_msg(); + LOG(FATAL) << "fail to open StorageEngine, res=" << st.get_error_msg(); exit(-1); } // start backend service for the coordinator on be_port auto exec_env = doris::ExecEnv::GetInstance(); doris::ExecEnv::init(exec_env, paths); - exec_env->set_olap_engine(engine); + exec_env->set_storage_engine(engine); doris::FrontendHelper::setup(exec_env); doris::ThriftServer* be_server = nullptr; diff --git a/be/src/tools/meta_tool.cpp b/be/src/tools/meta_tool.cpp index 39b51ca9abec07..d6d9476130eb03 100644 --- a/be/src/tools/meta_tool.cpp +++ b/be/src/tools/meta_tool.cpp @@ -15,155 +15,155 @@ // specific language governing permissions and limitations // under the License. -#include #include #include -#include -#include -#include +#include +#include +#include #include #include "common/status.h" -#include "olap/store.h" -#include "olap/olap_header_manager.h" +#include "gen_cpp/olap_file.pb.h" +#include "olap/data_dir.h" +#include "olap/tablet_meta_manager.h" #include "olap/olap_define.h" -#include "olap/olap_header.h" -#include "olap/olap_meta.h" +#include "olap/tablet_meta.h" #include "olap/utils.h" #include "json2pb/pb_to_json.h" -using doris::OlapStore; +using boost::filesystem::canonical; +using boost::filesystem::path; +using doris::DataDir; +using doris::OLAP_SUCCESS; using doris::OlapMeta; -using doris::OlapHeaderManager; -using doris::OLAPHeader; using doris::OLAPStatus; -using doris::OLAP_SUCCESS; using doris::Status; +using doris::TabletMeta; +using doris::TabletMetaManager; -const std::string HEADER_PREFIX = "hdr_"; +const std::string HEADER_PREFIX = "tabletmeta_"; -DEFINE_string(root_path, "./", "storage root path"); -DEFINE_string(operation, "get_header", - "valid operation: get_header, flag, load_header, delete_header, rollback, show_header"); -DEFINE_int64(tablet_id, 0, "tablet_id for header operation"); -DEFINE_int32(schema_hash, 0, "schema_hash for header operation"); -DEFINE_string(json_header_path, "", "json header file path"); -DEFINE_string(pb_header_path, "", "pb header file path"); +DEFINE_string(root_path, "", "storage root path"); +DEFINE_string(operation, "get_meta", + "valid operation: get_meta, flag, load_meta, delete_meta, show_meta"); +DEFINE_int64(tablet_id, 0, "tablet_id for tablet meta"); +DEFINE_int32(schema_hash, 0, "schema_hash for tablet meta"); +DEFINE_string(json_meta_path, "", "absolute json meta file path"); +DEFINE_string(pb_meta_path, "", "pb meta file path"); -void print_usage(std::string progname) { - std::cout << progname << " is the Doris File tool." << std::endl; - std::cout << "Usage:" << std::endl; - std::cout << "./meta_tool --operation=get_header --tablet_id=tabletid --schema_hash=schemahash" << std::endl; - std::cout << "./meta_tool --operation=flag" << std::endl; - std::cout << "./meta_tool --operation=load_header --json_header_path=path" << std::endl; - std::cout << "./meta_tool --operation=delete_header --tablet_id=tabletid --schema_hash=schemahash" << std::endl; - std::cout << "./meta_tool --root_path=rootpath --operation=rollback" << std::endl; - std::cout << "./meta_tool --operation=show_header --pb_header_path=path" << std::endl; +std::string get_usage(const std::string& progname) { + std::stringstream ss; + ss << progname << " is the Doris BE Meta tool.\n"; + ss << "Stop BE first before use this tool.\n"; + ss << "Usage:\n"; + ss << "./meta_tool --operation=get_meta --root_path=/path/to/storage/path --tablet_id=tabletid --schema_hash=schemahash\n"; + ss << "./meta_tool --operation=load_meta --root_path=/path/to/storage/path --json_meta_path=path\n"; + ss << "./meta_tool --operation=delete_meta --root_path=/path/to/storage/path --tablet_id=tabletid --schema_hash=schemahash\n"; + ss << "./meta_tool --operation=show_meta --pb_meta_path=path\n"; + return ss.str(); } -int main(int argc, char** argv) { - google::ParseCommandLineFlags(&argc, &argv, true); +void show_meta() { + TabletMeta tablet_meta; + OLAPStatus s = tablet_meta.create_from_file(FLAGS_pb_meta_path); + if (s != OLAP_SUCCESS){ + std::cout << "load pb meta file:" << FLAGS_pb_meta_path << " failed" + << ", status:" << s << std::endl; + return; + } + std::string json_meta; + json2pb::Pb2JsonOptions json_options; + json_options.pretty_json = true; + doris::TabletMetaPB tablet_meta_pb; + tablet_meta.to_meta_pb(&tablet_meta_pb); + json2pb::ProtoMessageToJson(tablet_meta_pb, &json_meta, json_options); + std::cout << json_meta << std::endl; +} - std::string root_path = FLAGS_root_path; - if (FLAGS_root_path == "") { - std::cout << "empty root path" << std::endl; - print_usage(argv[0]); - return -1; - } else if (FLAGS_root_path.find("/") != 0) { - // relative path - char dir[PATH_MAX] = {0}; - readlink("/proc/self/exe", dir, PATH_MAX); - std::string path_prefix(dir); - path_prefix = path_prefix.substr(0, path_prefix.rfind("/") + 1); - std::string root_path_postfix = FLAGS_root_path; - // trim tailing / - if (root_path_postfix.rfind("/") == (root_path_postfix.size() -1)) { - root_path_postfix = root_path_postfix.substr(0, root_path_postfix.size() -1); - } +void get_meta(DataDir *data_dir) { + std::string value; + OLAPStatus s = TabletMetaManager::get_json_meta(data_dir, FLAGS_tablet_id, FLAGS_schema_hash, &value); + if (s == doris::OLAP_ERR_META_KEY_NOT_FOUND) { + std::cout << "no tablet meta for tablet_id:" << FLAGS_tablet_id + << ", schema_hash:" << FLAGS_schema_hash << std::endl; + return; + } + std::cout << value << std::endl; +} - root_path = path_prefix + root_path_postfix; +void load_meta(DataDir *data_dir) { + // load json tablet meta into meta + OLAPStatus s = TabletMetaManager::load_json_meta(data_dir, FLAGS_json_meta_path); + if (s != OLAP_SUCCESS) { + std::cout << "load meta failed, status:" << s << std::endl; + return; } - std::unique_ptr store(new(std::nothrow) OlapStore(root_path)); - if (store.get() == NULL) { - std::cout << "new store failed" << std::endl; - return -1; + std::cout << "load meta successfully" << std::endl; +} + +void delete_meta(DataDir *data_dir) { + OLAPStatus s = TabletMetaManager::remove(data_dir, FLAGS_tablet_id, FLAGS_schema_hash); + if (s != OLAP_SUCCESS) { + std::cout << "delete tablet meta failed for tablet_id:" << FLAGS_tablet_id + << ", schema_hash:" << FLAGS_schema_hash + << ", status:" << s << std::endl; + return; } - Status st = store->load(); - if (!st.ok()) { - std::cout << "store load failed" << std::endl; - return -1; + std::cout << "delete meta successfully" << std::endl; +} + +int main(int argc, char **argv) { + std::string usage = get_usage(argv[0]); + gflags::SetUsageMessage(usage); + google::ParseCommandLineFlags(&argc, &argv, true); + + if (FLAGS_operation == "show_meta") { + show_meta(); } + else { + // operations that need root path should be written here + std::set valid_operations = { + "get_meta", + "load_meta", + "delete_meta" + }; + if (valid_operations.find(FLAGS_operation) == valid_operations.end()) { + std::cout << "invalid operation:" << FLAGS_operation << std::endl; + return -1; + } - if (FLAGS_operation == "get_header") { - std::string value; - OLAPStatus s = OlapHeaderManager::get_json_header(store.get(), FLAGS_tablet_id, FLAGS_schema_hash, &value); - if (s == doris::OLAP_ERR_META_KEY_NOT_FOUND) { - std::cout << "no header for tablet_id:" << FLAGS_tablet_id - << " schema_hash:" << FLAGS_schema_hash; - return 0; + path root_path(FLAGS_root_path); + try { + root_path = canonical(root_path); } - std::cout << value << std::endl; - } else if (FLAGS_operation == "flag") { - bool converted = false; - OLAPStatus s = OlapHeaderManager::get_header_converted(store.get(), converted); - if (s != OLAP_SUCCESS) { - std::cout << "get header converted flag failed" << std::endl; + catch (...) { + std::cout << "invalid root path:" << FLAGS_root_path << std::endl; return -1; } - std::cout << "is_header_converted is " << converted << std::endl; - } else if (FLAGS_operation == "load_header") { - OLAPStatus s = OlapHeaderManager::load_json_header(store.get(), FLAGS_json_header_path); - if (s != OLAP_SUCCESS) { - std::cout << "load header failed" << std::endl; + + std::unique_ptr data_dir(new (std::nothrow) DataDir(root_path.string())); + if (data_dir == nullptr) { + std::cout << "new data dir failed" << std::endl; return -1; } - std::cout << "load header successfully" << std::endl; - } else if (FLAGS_operation == "delete_header") { - OLAPStatus s = OlapHeaderManager::remove(store.get(), FLAGS_tablet_id, FLAGS_schema_hash); - if (s != OLAP_SUCCESS) { - std::cout << "delete header failed for tablet_id:" << FLAGS_tablet_id - << " schema_hash:" << FLAGS_schema_hash << std::endl; + Status st = data_dir->init(); + if (!st.ok()) { + std::cout << "data_dir load failed" << std::endl; return -1; } - std::cout << "delete header successfully" << std::endl; - } else if (FLAGS_operation == "rollback") { - auto rollback_func = [&root_path](long tablet_id, - long schema_hash, const std::string& value) -> bool { - OLAPHeader olap_header; - bool parsed = olap_header.ParseFromString(value); - if (!parsed) { - std::cout << "parse header failed"; - return true; - } - std::string tablet_id_str = std::to_string(tablet_id); - std::string schema_hash_path = root_path + "/data/" + std::to_string(olap_header.shard()) - + "/" + tablet_id_str + "/" + std::to_string(schema_hash); - std::string header_file_path = schema_hash_path + "/" + tablet_id_str + ".hdr"; - std::cout << "save header to path:" << header_file_path << std::endl; - OLAPStatus s = olap_header.save(header_file_path); - if (s != OLAP_SUCCESS) { - std::cout << "save header file to path:" << header_file_path << " failed" << std::endl; - } - return true; - }; - OlapHeaderManager::traverse_headers(store->get_meta(), rollback_func); - } else if (FLAGS_operation == "show_header") { - OLAPHeader header(FLAGS_pb_header_path); - OLAPStatus s = header.load_and_init(); - if (s != OLAP_SUCCESS) { - std::cout << "load pb header file:" << FLAGS_pb_header_path << " failed" << std::endl; + + if (FLAGS_operation == "get_meta") { + get_meta(data_dir.get()); + } else if (FLAGS_operation == "load_meta") { + load_meta(data_dir.get()); + } else if (FLAGS_operation == "delete_meta") { + delete_meta(data_dir.get()); + } else { + std::cout << "invalid operation:" << FLAGS_operation << "\n" + << usage << std::endl; return -1; } - std::string json_header; - json2pb::Pb2JsonOptions json_options; - json_options.pretty_json = true; - json2pb::ProtoMessageToJson(header, &json_header, json_options); - std::cout << "header:" << std::endl; - std::cout << json_header << std::endl; - } else { - std::cout << "invalid operation:" << FLAGS_operation << std::endl; - print_usage(argv[0]); - return -1; } + gflags::ShutDownCommandLineFlags(); return 0; } diff --git a/be/src/util/doris_metrics.cpp b/be/src/util/doris_metrics.cpp index 3a8158e720c360..57e9c35ce76621 100644 --- a/be/src/util/doris_metrics.cpp +++ b/be/src/util/doris_metrics.cpp @@ -64,7 +64,6 @@ IntCounter DorisMetrics::create_rollup_requests_failed; IntCounter DorisMetrics::storage_migrate_requests_total; IntCounter DorisMetrics::delete_requests_total; IntCounter DorisMetrics::delete_requests_failed; -IntCounter DorisMetrics::cancel_delete_requests_total; IntCounter DorisMetrics::clone_requests_total; IntCounter DorisMetrics::clone_requests_failed; @@ -174,7 +173,6 @@ void DorisMetrics::initialize( REGISTER_ENGINE_REQUEST_METRIC(storage_migrate, total, storage_migrate_requests_total); REGISTER_ENGINE_REQUEST_METRIC(delete, total, delete_requests_total); REGISTER_ENGINE_REQUEST_METRIC(delete, failed, delete_requests_failed); - REGISTER_ENGINE_REQUEST_METRIC(cancel_delete, total, cancel_delete_requests_total); REGISTER_ENGINE_REQUEST_METRIC(clone, total, clone_requests_total); REGISTER_ENGINE_REQUEST_METRIC(clone, failed, clone_requests_failed); diff --git a/be/src/util/doris_metrics.h b/be/src/util/doris_metrics.h index d0582d2b8f0bf6..71808667b7c89f 100644 --- a/be/src/util/doris_metrics.h +++ b/be/src/util/doris_metrics.h @@ -83,7 +83,6 @@ class DorisMetrics { static IntCounter storage_migrate_requests_total; static IntCounter delete_requests_total; static IntCounter delete_requests_failed; - static IntCounter cancel_delete_requests_total; static IntCounter clone_requests_total; static IntCounter clone_requests_failed; diff --git a/be/src/util/once.h b/be/src/util/once.h new file mode 100644 index 00000000000000..4b528f8998603c --- /dev/null +++ b/be/src/util/once.h @@ -0,0 +1,61 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +#ifndef DORIS_BE_SRC_UTIL_ONCE_H +#define DORIS_BE_SRC_UTIL_ONCE_H + +#include + +#include "olap/olap_common.h" + +namespace doris { + +// Similar to the KuduOnceDynamic class, but accepts a lambda function. +class DorisInitOnce { +public: + DorisInitOnce() + : _init_succeeded(false) {} + + // If the underlying `once_flag` has yet to be invoked, invokes the provided + // lambda and stores its return value. Otherwise, returns the stored Status. + template + OLAPStatus init(Fn fn) { + std::call_once(_once_flag, [this, fn] { + _status = fn(); + if (OLAP_SUCCESS == _status) { + _init_succeeded.store(true, std::memory_order_release); + } + }); + return _status; + } + + // std::memory_order_acquire here and std::memory_order_release in + // init(), taken together, mean that threads can safely synchronize on + // _init_succeeded. + bool init_succeeded() const { + return _init_succeeded.load(std::memory_order_acquire); + } + +private: + std::atomic _init_succeeded; + std::once_flag _once_flag; + OLAPStatus _status; +}; + +} // namespace doris + +#endif // DORIS_BE_SRC_UTIL_ONCE_H diff --git a/be/src/util/uid_util.h b/be/src/util/uid_util.h index d7a73979876c4d..0806dab0afb4a3 100644 --- a/be/src/util/uid_util.h +++ b/be/src/util/uid_util.h @@ -19,6 +19,7 @@ #define DORIS_BE_SRC_UTIL_UID_UTIL_H #include +#include #include #include @@ -42,6 +43,22 @@ inline void to_hex(T val, char* buf) { } } +template +inline void from_hex(T* ret, const std::string& buf) { + T val = 0; + for (int i = 0; i < buf.length(); ++i) { + int buf_val = 0; + if (buf.c_str()[i] >= '0' && buf.c_str()[i] <= '9') + buf_val = buf.c_str()[i] - '0'; + else { + buf_val = buf.c_str()[i] - 'a' + 10; + } + val <<= 4; + val = val | buf_val; + } + *ret = val; +} + struct UniqueId { int64_t hi; int64_t lo; @@ -54,6 +71,10 @@ struct UniqueId { UniqueId(int64_t hi_, int64_t lo_) : hi(hi_), lo(lo_) { } UniqueId(const TUniqueId& tuid) : hi(tuid.hi), lo(tuid.lo) { } UniqueId(const PUniqueId& puid) : hi(puid.hi()), lo(puid.lo()) { } + UniqueId(const std::string& hi_str, const std::string& lo_str) { + from_hex(&hi, hi_str); + from_hex(&lo, lo_str); + } ~UniqueId() noexcept { } std::string to_string() const { @@ -64,14 +85,29 @@ struct UniqueId { return {buf, 33}; } + // std::map std::set needs this operator + bool operator<(const UniqueId& right) const { + if (hi != right.hi) { + return hi < right.hi; + } else { + return lo < right.lo; + } + } + + // std::unordered_map need this api size_t hash(size_t seed = 0) const { return doris::HashUtil::hash(this, sizeof(*this), seed); } + // std::unordered_map need this api bool operator==(const UniqueId& rhs) const { return hi == rhs.hi && lo == rhs.lo; } + bool operator!=(const UniqueId& rhs) const { + return hi != rhs.hi || lo != rhs.lo; + } + TUniqueId to_thrift() const { TUniqueId tid; tid.__set_hi(hi); diff --git a/be/test/agent/CMakeLists.txt b/be/test/agent/CMakeLists.txt index 9f2e8e8ae9c3c1..e0586476e1b120 100644 --- a/be/test/agent/CMakeLists.txt +++ b/be/test/agent/CMakeLists.txt @@ -24,6 +24,4 @@ set(EXECUTABLE_OUTPUT_PATH "${BUILD_DIR}/test/agent") # ADD_BE_TEST(agent_server_test) ADD_BE_TEST(cgroups_mgr_test) #ADD_BE_TEST(heartbeat_server_test) -# ADD_BE_TEST(pusher_test) -# ADD_BE_TEST(task_worker_pool_test) ADD_BE_TEST(utils_test) diff --git a/be/test/agent/agent_server_test.cpp b/be/test/agent/agent_server_test.cpp index 7f29f6aa161610..b10463d06a64e9 100644 --- a/be/test/agent/agent_server_test.cpp +++ b/be/test/agent/agent_server_test.cpp @@ -58,21 +58,21 @@ TEST(SubmitTasksTest, TestSubmitTasks){ // Master info inited, submit task tasks.clear(); - TAgentTaskRequest create_table_task; + TAgentTaskRequest create_tablet_task; TCreateTabletReq create_tablet_req; - create_table_task.task_type = TTaskType::CREATE; - create_table_task.__set_create_tablet_req(create_tablet_req); - tasks.push_back(create_table_task); - TAgentTaskRequest drop_table_task; + create_tablet_task.task_type = TTaskType::CREATE; + create_tablet_task.__set_create_tablet_req(create_tablet_req); + tasks.push_back(create_tablet_task); + TAgentTaskRequest drop_tablet_task; TDropTabletReq drop_tablet_req; - drop_table_task.task_type = TTaskType::DROP; - drop_table_task.__set_drop_tablet_req(drop_tablet_req); - tasks.push_back(drop_table_task); - TAgentTaskRequest alter_table_task; + drop_tablet_task.task_type = TTaskType::DROP; + drop_tablet_task.__set_drop_tablet_req(drop_tablet_req); + tasks.push_back(drop_tablet_task); + TAgentTaskRequest alter_tablet_task; TAlterTabletReq alter_tablet_req; - alter_table_task.task_type = TTaskType::ROLLUP; - alter_table_task.__set_alter_tablet_req(alter_tablet_req); - tasks.push_back(alter_table_task); + alter_tablet_task.task_type = TTaskType::ROLLUP; + alter_tablet_task.__set_alter_tablet_req(alter_tablet_req); + tasks.push_back(alter_tablet_task); TAgentTaskRequest clone_task; TCloneReq clone_req; clone_task.task_type = TTaskType::CLONE; diff --git a/be/test/agent/heartbeat_server_test.cpp b/be/test/agent/heartbeat_server_test.cpp index 5eaf1c50fce56d..76c575b9b0523d 100644 --- a/be/test/agent/heartbeat_server_test.cpp +++ b/be/test/agent/heartbeat_server_test.cpp @@ -21,7 +21,6 @@ #include "gen_cpp/HeartbeatService_types.h" #include "gen_cpp/Types_types.h" #include "agent/heartbeat_server.h" -#include "olap/mock_olap_rootpath.h" #include "util/logging.h" using ::testing::_; @@ -42,27 +41,6 @@ TEST(HeartbeatTest, TestHeartbeat){ ori_master_info.network_address.port = 0; HeartbeatServer heartbeat_server(&ori_master_info); - MockOLAPRootPath mock_olap_rootpath; - OLAPRootPath* ori_olap_rootpath; - ori_olap_rootpath = heartbeat_server._olap_rootpath_instance; - heartbeat_server._olap_rootpath_instance = &mock_olap_rootpath; - - // No cluster id yet - EXPECT_CALL(mock_olap_rootpath, set_cluster_id(_)) - .Times(1) - .WillOnce(Return(OLAPStatus::OLAP_ERR_OTHER_ERROR)); - TMasterInfo master_info; - master_info.cluster_id = 1; - master_info.epoch = 10; - master_info.network_address.hostname = "host"; - master_info.network_address.port = 12345; - heartbeat_server.heartbeat(heartbeat_result, master_info); - EXPECT_EQ(TStatusCode::RUNTIME_ERROR, heartbeat_result.status.status_code); - - // New cluster heartbeat - EXPECT_CALL(mock_olap_rootpath, set_cluster_id(_)) - .Times(1) - .WillOnce(Return(OLAPStatus::OLAP_SUCCESS)); heartbeat_server.heartbeat(heartbeat_result, master_info); EXPECT_EQ(TStatusCode::OK, heartbeat_result.status.status_code); EXPECT_EQ(master_info.epoch, heartbeat_server._epoch); diff --git a/be/test/agent/mock_pusher.h b/be/test/agent/mock_pusher.h index ff10607df8710c..a2ff736017603c 100644 --- a/be/test/agent/mock_pusher.h +++ b/be/test/agent/mock_pusher.h @@ -19,7 +19,6 @@ #define DORIS_BE_SRC_AGENT_MOCK_MOCK_PUSHER_H #include "gmock/gmock.h" -#include "agent/pusher.h" namespace doris { diff --git a/be/test/agent/pusher_test.cpp b/be/test/agent/pusher_test.cpp deleted file mode 100644 index 96aa5f9b8282cf..00000000000000 --- a/be/test/agent/pusher_test.cpp +++ /dev/null @@ -1,278 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, -// software distributed under the License is distributed on an -// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -// KIND, either express or implied. See the License for the -// specific language governing permissions and limitations -// under the License. - -#include -#include -#include "boost/filesystem.hpp" -#include "gtest/gtest.h" -#include "gmock/gmock.h" -#include "agent/mock_file_downloader.h" -#include "agent/mock_utils.h" -#include "agent/pusher.h" -#include "olap/mock_command_executor.h" -#include "olap/olap_define.h" -#include "olap/olap_table.h" -#include "util/logging.h" - -using ::testing::_; -using ::testing::Return; -using ::testing::ReturnPointee; -using ::testing::SetArgPointee; -using std::string; -using std::vector; - -namespace doris { - -MockFileDownloader::MockFileDownloader(const FileDownloaderParam& param):FileDownloader(param) { -} - -TEST(PusherTest, TestInit) { - TPushReq push_req; - push_req.tablet_id = 1; - push_req.schema_hash = 12345; - Pusher pusher(nullptr, push_req); - - OLAPEngine* tmp = NULL; - MockCommandExecutor mock_command_executor; - tmp = pusher._engine; - pusher._engine = &mock_command_executor; - - OLAPTable* olap_table = NULL; - // not init, can not get olap table - EXPECT_CALL(mock_command_executor, get_table(1, 12345)) - .Times(1) - .WillOnce(Return(std::shared_ptr(olap_table))); - AgentStatus ret = pusher.init(); - EXPECT_EQ(DORIS_PUSH_INVALID_TABLE, ret); - - // not init, can get olap table, and empty remote path - olap_table = new OLAPTable(new OLAPHeader("./test_data/header"), nullptr); - EXPECT_CALL(mock_command_executor, get_table(1, 12345)) - .Times(1) - .WillOnce(Return(std::shared_ptr(olap_table))); - ret = pusher.init(); - EXPECT_EQ(DORIS_SUCCESS, ret); - EXPECT_TRUE(pusher._is_init); - - // has inited - ret = pusher.init(); - EXPECT_EQ(DORIS_SUCCESS, ret); - pusher._engine = tmp; - - // not inited, remote path not empty - string http_file_path = "http://xx"; - string root_path_name = "./test_data/data"; - olap_table = new OLAPTable(new OLAPHeader("./test_data/header"), nullptr); - push_req.__set_http_file_path(http_file_path); - Pusher pusher2(nullptr, push_req); - tmp = pusher2._engine; - pusher2._engine = &mock_command_executor; - olap_table->_storage_root_path = root_path_name; - EXPECT_CALL(mock_command_executor, get_table(1, 12345)) - .Times(1) - .WillOnce(Return(std::shared_ptr(olap_table))); - ret = pusher2.init(); - EXPECT_EQ(DORIS_SUCCESS, ret); - EXPECT_TRUE(pusher2._is_init); - EXPECT_STREQ(http_file_path.c_str(), pusher2._downloader_param.remote_file_path.c_str()); - EXPECT_EQ(0, strncmp( - pusher2._downloader_param.local_file_path.c_str(), - root_path_name.c_str(), - strlen(root_path_name.c_str()))); - - pusher2._engine = tmp; -} - -TEST(PusherTest, TestGetTmpFileDir) { - TPushReq push_req; - Pusher pusher(nullptr, push_req); - - // download path not exist - string root_path = "./test_data/dpp_download_file"; - string download_path; - AgentStatus ret = pusher._get_tmp_file_dir(root_path, &download_path); - EXPECT_EQ(DORIS_SUCCESS, ret); - EXPECT_STREQ("./test_data/dpp_download_file/dpp_download", download_path.c_str()); - - // download path exist - ret = pusher._get_tmp_file_dir(root_path, &download_path); - EXPECT_EQ(DORIS_SUCCESS, ret); -} - -TEST(PusherTest, TestDownloadFile){ - TPushReq push_req; - Pusher pusher(nullptr, push_req); - - // download success - FileDownloader::FileDownloaderParam param; - MockFileDownloader mock_file_downloader(param); - EXPECT_CALL(mock_file_downloader, download_file()) - .Times(1) - .WillOnce(Return(DORIS_SUCCESS)); - pusher._file_downloader = &mock_file_downloader; - AgentStatus ret = pusher._download_file(); - EXPECT_EQ(DORIS_SUCCESS, ret); - - // download failed - EXPECT_CALL(mock_file_downloader, download_file()) - .Times(1) - .WillOnce(Return(DORIS_ERROR)); - ret = pusher._download_file(); - EXPECT_EQ(DORIS_ERROR, ret); -} - -TEST(PusherTest, TestGetFileNameFromPath) { - TPushReq push_req; - Pusher pusher(nullptr, push_req); - - string file_path = "/file_path/file_name"; - string file_name; - pusher._get_file_name_from_path(file_path, &file_name); - EXPECT_EQ(0, strncmp(file_name.c_str(), "file_name_", 10)); -} - -TEST(PusherTest, TestProcess) { - TPushReq push_req; - Pusher pusher(nullptr, push_req); - vector tablet_infos; - - // not init - AgentStatus ret = pusher.process(&tablet_infos); - EXPECT_EQ(DORIS_ERROR, ret); - - // init, remote file empty, push success, delete download file - pusher._is_init = true; - pusher._downloader_param.local_file_path = "./test_data/download_file"; - MockCommandExecutor mock_command_executor; - OLAPEngine* tmp; - tmp = pusher._engine; - pusher._engine = &mock_command_executor; - EXPECT_CALL(mock_command_executor, push(push_req, &tablet_infos)) - .Times(1) - .WillOnce(Return(OLAPStatus::OLAP_SUCCESS)); - - FILE* fp = fopen(pusher._downloader_param.local_file_path.c_str(), "w"); - if (fp != NULL) { - fclose(fp); - fp = NULL; - } - boost::filesystem::path download_file_path(pusher._downloader_param.local_file_path); - EXPECT_TRUE(boost::filesystem::exists(download_file_path)); - - ret = pusher.process(&tablet_infos); - EXPECT_EQ(DORIS_SUCCESS, ret); - EXPECT_FALSE(boost::filesystem::exists(download_file_path)); - - // init, remote file empty, push failed, delete download file - EXPECT_CALL(mock_command_executor, push(push_req, &tablet_infos)) - .Times(1) - .WillOnce(Return(OLAPStatus::OLAP_ERR_OTHER_ERROR)); - - fp = fopen(pusher._downloader_param.local_file_path.c_str(), "w"); - if (fp != NULL) { - fclose(fp); - fp = NULL; - } - EXPECT_TRUE(boost::filesystem::exists(download_file_path)); - - ret = pusher.process(&tablet_infos); - EXPECT_EQ(DORIS_ERROR, ret); - EXPECT_FALSE(boost::filesystem::exists(download_file_path)); - - pusher._engine = tmp; - - // init, remote file not empty, not set file length - push_req.__set_http_file_path("http://xxx"); - Pusher pusher2(nullptr, push_req); - pusher2._is_init = true; - FileDownloader::FileDownloaderParam param; - MockFileDownloader mock_file_downloader(param); - pusher2._file_downloader = &mock_file_downloader; - - // init, remote file not empty, get remote file length success, timeout - time_t now = time(NULL); - pusher2._push_req.timeout = now - 100; - ret = pusher2.process(&tablet_infos); - EXPECT_EQ(DORIS_PUSH_TIME_OUT, ret); - - // init, remote file not empty, get remote file length success, download file failed - now = time(NULL); - pusher2._push_req.timeout = now + 100; - pusher2._download_status = DORIS_ERROR; - ret = pusher2.process(&tablet_infos); - EXPECT_EQ(DORIS_ERROR, ret); - - // init, remote file not empty, get remote file length success, download file success - // size diff - string file_path = "./test_data/download_file"; - fp = fopen(file_path.c_str(), "w"); - fputs("doris be test", fp); - fclose(fp); - boost::filesystem::path local_file_path(file_path); - uint64_t local_file_size = boost::filesystem::file_size(local_file_path); - now = time(NULL); - pusher2._push_req.timeout = now + 100; - pusher2._download_status = DORIS_SUCCESS; - pusher2._push_req.__set_http_file_size(local_file_size + 1); - pusher2._downloader_param.local_file_path = file_path; - ret = pusher2.process(&tablet_infos); - EXPECT_EQ(DORIS_FILE_DOWNLOAD_FAILED, ret); - - // init, remote file not empty, get remote file length success, download file success - // size same, push failed - fp = fopen(file_path.c_str(), "w"); - fputs("doris be test", fp); - fclose(fp); - now = time(NULL); - tmp = pusher2._engine; - pusher2._engine = &mock_command_executor; - pusher2._push_req.timeout = now + 100; - pusher2._push_req.__set_http_file_size(local_file_size); - EXPECT_CALL(mock_command_executor, push(_, &tablet_infos)) - .Times(1) - .WillOnce(Return(OLAPStatus::OLAP_ERR_OTHER_ERROR)); - ret = pusher2.process(&tablet_infos); - EXPECT_EQ(DORIS_ERROR, ret); - - // init, remote file not empty, get remote file length success, download file success - // size same, push success - fp = fopen(file_path.c_str(), "w"); - fputs("doris be test", fp); - fclose(fp); - now = time(NULL); - pusher2._push_req.timeout = now + 100; - EXPECT_CALL(mock_command_executor, push(_, &tablet_infos)) - .Times(1) - .WillOnce(Return(OLAPStatus::OLAP_SUCCESS)); - ret = pusher2.process(&tablet_infos); - EXPECT_EQ(DORIS_SUCCESS, ret); - - pusher2._engine = tmp; -} - -} // namespace doris - -int main(int argc, char **argv) { - std::string conffile = std::string(getenv("DORIS_HOME")) + "/conf/be.conf"; - if (!doris::config::init(conffile.c_str(), false)) { - fprintf(stderr, "error read config file. \n"); - return -1; - } - doris::init_glog("be-test"); - ::testing::InitGoogleTest(&argc, argv); - return RUN_ALL_TESTS(); -} diff --git a/be/test/agent/task_worker_pool_test.cpp b/be/test/agent/task_worker_pool_test.cpp deleted file mode 100644 index b1f276e7ce9c18..00000000000000 --- a/be/test/agent/task_worker_pool_test.cpp +++ /dev/null @@ -1,1913 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, -// software distributed under the License is distributed on an -// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -// KIND, either express or implied. See the License for the -// specific language governing permissions and limitations -// under the License. - -#include -#include -#include -#include -#include -#include "gtest/gtest.h" -#include "gmock/gmock.h" -#include "agent/file_downloader.h" -#include "agent/mock_file_downloader.h" -#include "agent/mock_pusher.h" -#include "agent/mock_utils.h" -#include "agent/mock_task_worker_pool.h" -#include "agent/task_worker_pool.h" -#include "agent/utils.h" -#include "olap/mock_command_executor.h" -#include "util/logging.h" -#include "runtime/exec_env.h" - -using ::testing::_; -using ::testing::Return; -using ::testing::SetArgPointee; -using std::string; -using std::deque; - -namespace doris { - -MockFileDownloader::MockFileDownloader(const FileDownloaderParam& param):FileDownloader(param) { -} - -MockPusher::MockPusher(const TPushReq& push_req) : Pusher(push_req) { -}; - -MockAgentServerClient::MockAgentServerClient(const TBackend backend) - : AgentServerClient(backend) { -} - -MockMasterServerClient::MockMasterServerClient( - const TMasterInfo& master_info, - FrontendServiceClientCache* client_cache) : MasterServerClient(master_info, client_cache) { -} - -TEST(TaskWorkerPoolTest, TestStart) { - TMasterInfo master_info; - ExecEnv env; - TaskWorkerPool task_worker_pool_create_table( - TaskWorkerPool::TaskWorkerType::CREATE_TABLE, - &env, - master_info); - task_worker_pool_create_table.start(); - EXPECT_EQ(task_worker_pool_create_table._worker_count, config::create_table_worker_count); - - TaskWorkerPool task_worker_pool_drop_table( - TaskWorkerPool::TaskWorkerType::DROP_TABLE, - &env, - master_info); - task_worker_pool_drop_table.start(); - EXPECT_EQ(task_worker_pool_create_table._worker_count, config::drop_table_worker_count); - - TaskWorkerPool task_worker_pool_push( - TaskWorkerPool::TaskWorkerType::PUSH, - &env, - master_info); - task_worker_pool_push.start(); - EXPECT_EQ(task_worker_pool_push._worker_count, config::push_worker_count_normal_priority - + config::push_worker_count_high_priority); - - TaskWorkerPool task_worker_pool_publish_version( - TaskWorkerPool::TaskWorkerType::PUBLISH_VERSION, - &env, - master_info); - task_worker_pool_publish_version.start(); - EXPECT_EQ(task_worker_pool_publish_version._worker_count, config::publish_version_worker_count); - - TaskWorkerPool task_worker_pool_alter_table( - TaskWorkerPool::TaskWorkerType::ALTER_TABLE, - &env, - master_info); - task_worker_pool_alter_table.start(); - EXPECT_EQ(task_worker_pool_alter_table._worker_count, config::alter_table_worker_count); - - TaskWorkerPool task_worker_pool_clone( - TaskWorkerPool::TaskWorkerType::CLONE, - &env, - master_info); - task_worker_pool_clone.start(); - EXPECT_EQ(task_worker_pool_clone._worker_count, config::clone_worker_count); - - TaskWorkerPool task_worker_pool_cancel_delete_data( - TaskWorkerPool::TaskWorkerType::CANCEL_DELETE_DATA, - &env, - master_info); - task_worker_pool_cancel_delete_data.start(); - EXPECT_EQ( - task_worker_pool_cancel_delete_data._worker_count, - config::cancel_delete_data_worker_count); - - TaskWorkerPool task_worker_pool_report_task( - TaskWorkerPool::TaskWorkerType::REPORT_TASK, - &env, - master_info); - task_worker_pool_report_task.start(); - EXPECT_EQ(task_worker_pool_report_task._worker_count, REPORT_TASK_WORKER_COUNT); - - TaskWorkerPool task_worker_pool_report_disk_state( - TaskWorkerPool::TaskWorkerType::REPORT_DISK_STATE, - &env, - master_info); - task_worker_pool_report_disk_state.start(); - EXPECT_EQ(task_worker_pool_report_disk_state._worker_count, REPORT_DISK_STATE_WORKER_COUNT); - - TaskWorkerPool task_worker_pool_report_olap_table( - TaskWorkerPool::TaskWorkerType::REPORT_OLAP_TABLE, - &env, - master_info); - task_worker_pool_report_olap_table.start(); - EXPECT_EQ(task_worker_pool_report_olap_table._worker_count, REPORT_OLAP_TABLE_WORKER_COUNT); - - TaskWorkerPool task_worker_pool_upload( - TaskWorkerPool::TaskWorkerType::UPLOAD, - &env, - master_info); - task_worker_pool_upload.start(); - EXPECT_EQ(task_worker_pool_upload._worker_count, config::upload_worker_count); - - TaskWorkerPool task_worker_pool_make_snapshot( - TaskWorkerPool::TaskWorkerType::MAKE_SNAPSHOT, - &env, - master_info); - task_worker_pool_make_snapshot.start(); - EXPECT_EQ(task_worker_pool_make_snapshot._worker_count, config::make_snapshot_worker_count); - - TaskWorkerPool task_worker_pool_release_snapshot( - TaskWorkerPool::TaskWorkerType::RELEASE_SNAPSHOT, - &env, - master_info); - task_worker_pool_release_snapshot.start(); - EXPECT_EQ(task_worker_pool_release_snapshot._worker_count, - config::release_snapshot_worker_count); -} - -TEST(TaskWorkerPoolTest, TestSubmitTask) { - TMasterInfo master_info; - ExecEnv env; - TaskWorkerPool task_worker_pool( - TaskWorkerPool::TaskWorkerType::ALTER_TABLE, - &env, - master_info); - - // Record signature success - TAgentTaskRequest agent_task_request; - agent_task_request.task_type = TTaskType::ROLLUP; - agent_task_request.signature = 123456; - task_worker_pool.submit_task(agent_task_request); - EXPECT_EQ(1, task_worker_pool._tasks.size()); - - // Record same signature - task_worker_pool.submit_task(agent_task_request); - EXPECT_EQ(1, task_worker_pool._tasks.size()); - - task_worker_pool._s_task_signatures[agent_task_request.task_type].clear(); -} - -TEST(TaskWorkerPoolTest, TestRecordTaskInfo) { - TMasterInfo master_info; - ExecEnv env; - TaskWorkerPool task_worker_pool( - TaskWorkerPool::TaskWorkerType::ALTER_TABLE, - &env, - master_info); - - TTaskType::type task_type = TTaskType::ROLLUP; - // Record signature success - bool ret = task_worker_pool._record_task_info(task_type, 123456, "root"); - EXPECT_TRUE(ret); - EXPECT_EQ(1, task_worker_pool._s_task_signatures[task_type].size()); - - // Record same signature - ret = task_worker_pool._record_task_info(task_type, 123456, "root"); - EXPECT_FALSE(ret); - EXPECT_EQ(1, task_worker_pool._s_task_signatures[task_type].size()); - - // Record different signature - ret = task_worker_pool._record_task_info(task_type, 123457, ""); - EXPECT_TRUE(ret); - EXPECT_EQ(2, task_worker_pool._s_task_signatures[task_type].size()); - - TMasterInfo master_info2; - TaskWorkerPool task_worker_pool2( - TaskWorkerPool::TaskWorkerType::PUSH, - &env, - master_info2); - TTaskType::type task_type2 = TTaskType::PUSH; - - // Record push task info - ret = task_worker_pool._record_task_info(task_type2, 223456, "root"); - EXPECT_TRUE(ret); - EXPECT_EQ(1, task_worker_pool._s_task_signatures[task_type2].size()); - EXPECT_EQ(1, task_worker_pool._s_total_task_user_count[task_type2]["root"]); - EXPECT_EQ(1, task_worker_pool._s_total_task_count[task_type2]); - - // Record same signature push task - ret = task_worker_pool._record_task_info(task_type2, 223456, "user"); - EXPECT_FALSE(ret); - EXPECT_EQ(1, task_worker_pool._s_task_signatures[task_type2].size()); - EXPECT_EQ(1, task_worker_pool._s_total_task_user_count[task_type2]["root"]); - EXPECT_EQ(1, task_worker_pool._s_total_task_count[task_type2]); - - // Record diff signature same user - ret = task_worker_pool._record_task_info(task_type2, 223457, "root"); - EXPECT_TRUE(ret); - EXPECT_EQ(2, task_worker_pool._s_task_signatures[task_type2].size()); - EXPECT_EQ(2, task_worker_pool._s_total_task_user_count[task_type2]["root"]); - EXPECT_EQ(2, task_worker_pool._s_total_task_count[task_type2]); - - // Record diff signature diff user - ret = task_worker_pool._record_task_info(task_type2, 223458, "user"); - EXPECT_TRUE(ret); - EXPECT_EQ(3, task_worker_pool._s_task_signatures[task_type2].size()); - EXPECT_EQ(1, task_worker_pool._s_total_task_user_count[task_type2]["user"]); - EXPECT_EQ(3, task_worker_pool._s_total_task_count[task_type2]); -} - -TEST(TaskWorkerPoolTest, TestRemoveTaskInfo) { - TMasterInfo master_info; - ExecEnv env; - TaskWorkerPool task_worker_pool( - TaskWorkerPool::TaskWorkerType::ALTER_TABLE, - &env, - master_info); - - TTaskType::type task_type = TTaskType::ROLLUP; - EXPECT_EQ(2, task_worker_pool._s_task_signatures[task_type].size()); - task_worker_pool._remove_task_info(task_type, 123456, "root"); - EXPECT_EQ(1, task_worker_pool._s_task_signatures[task_type].size()); - task_worker_pool._remove_task_info(task_type, 123457, "root"); - EXPECT_EQ(0, task_worker_pool._s_task_signatures[task_type].size()); - - TTaskType::type task_type_push = TTaskType::PUSH; - task_worker_pool._s_running_task_user_count[task_type_push]["root"] = 2; - task_worker_pool._s_running_task_user_count[task_type_push]["user"] = 1; - - EXPECT_EQ(3, task_worker_pool._s_task_signatures[task_type_push].size()); - EXPECT_EQ(2, task_worker_pool._s_total_task_user_count[task_type_push]["root"]); - EXPECT_EQ(1, task_worker_pool._s_total_task_user_count[task_type_push]["user"]); - EXPECT_EQ(3, task_worker_pool._s_total_task_count[task_type_push]); - task_worker_pool._remove_task_info(task_type_push, 223456, "root"); - EXPECT_EQ(2, task_worker_pool._s_task_signatures[task_type_push].size()); - EXPECT_EQ(1, task_worker_pool._s_total_task_user_count[task_type_push]["root"]); - EXPECT_EQ(2, task_worker_pool._s_total_task_count[task_type_push]); - EXPECT_EQ(1, task_worker_pool._s_running_task_user_count[task_type_push]["root"]); - task_worker_pool._remove_task_info(task_type_push, 223457, "root"); - EXPECT_EQ(1, task_worker_pool._s_task_signatures[task_type_push].size()); - EXPECT_EQ(0, task_worker_pool._s_total_task_user_count[task_type_push]["root"]); - EXPECT_EQ(1, task_worker_pool._s_total_task_count[task_type_push]); - EXPECT_EQ(0, task_worker_pool._s_running_task_user_count[task_type_push]["root"]); - task_worker_pool._remove_task_info(task_type_push, 223458, "user"); - EXPECT_EQ(0, task_worker_pool._s_task_signatures[task_type_push].size()); - EXPECT_EQ(0, task_worker_pool._s_total_task_user_count[task_type_push]["user"]); - EXPECT_EQ(0, task_worker_pool._s_total_task_count[task_type_push]); - EXPECT_EQ(0, task_worker_pool._s_running_task_user_count[task_type_push]["user"]); -} - -TEST(TaskWorkerPoolTest, TestGetNextTask) { - TMasterInfo master_info; - ExecEnv env; - TaskWorkerPool task_worker_pool( - TaskWorkerPool::TaskWorkerType::PUSH, - &env, - master_info); - - // Add 1 task - int32_t thread_count = 3; - deque tasks; - TAgentTaskRequest task1; - task1.resource_info.user = "root"; - task1.__isset.resource_info = true; - task1.task_type = TTaskType::PUSH; - tasks.push_back(task1); - task_worker_pool._s_total_task_user_count[TTaskType::PUSH]["root"] = 1; - task_worker_pool._s_total_task_count[TTaskType::PUSH] = 1; - task_worker_pool._s_running_task_user_count[TTaskType::PUSH]["root"] = 0; - uint32_t ret = task_worker_pool._get_next_task_index(thread_count, tasks, TPriority::NORMAL); - EXPECT_EQ(0, ret); - tasks.erase(tasks.begin() + 0); - - // Add 3 task - TAgentTaskRequest task2; - task2.resource_info.user = "root"; - task2.__isset.resource_info = true; - task2.task_type = TTaskType::PUSH; - tasks.push_back(task2); - TAgentTaskRequest task3; - task3.resource_info.user = "root"; - task3.__isset.resource_info = true; - task3.task_type = TTaskType::PUSH; - tasks.push_back(task3); - TAgentTaskRequest task4; - task4.resource_info.user = "user1"; - task4.__isset.resource_info = true; - task4.task_type = TTaskType::PUSH; - tasks.push_back(task4); - task_worker_pool._s_total_task_user_count[TTaskType::PUSH]["root"] = 3; - task_worker_pool._s_total_task_user_count[TTaskType::PUSH]["user1"] = 1; - task_worker_pool._s_total_task_count[TTaskType::PUSH] = 4; - task_worker_pool._s_running_task_user_count[TTaskType::PUSH]["root"] = 1; - ret = task_worker_pool._get_next_task_index(thread_count, tasks, TPriority::NORMAL); - EXPECT_EQ(0, ret); - tasks.erase(tasks.begin() + 0); - - // Go on - task_worker_pool._s_running_task_user_count[TTaskType::PUSH]["root"] = 2; - ret = task_worker_pool._get_next_task_index(thread_count, tasks, TPriority::NORMAL); - EXPECT_EQ(1, ret); - tasks.erase(tasks.begin() + 1); - - // Add 2 task, 1 root task finished - TAgentTaskRequest task5; - task5.resource_info.user = "user1"; - task5.__isset.resource_info = true; - task5.task_type = TTaskType::PUSH; - tasks.push_back(task5); - TAgentTaskRequest task6; - task6.resource_info.user = "user2"; - task6.__isset.resource_info = true; - task6.task_type = TTaskType::PUSH; - tasks.push_back(task6); - task_worker_pool._s_total_task_user_count[TTaskType::PUSH]["root"] = 2; - task_worker_pool._s_total_task_user_count[TTaskType::PUSH]["user1"] = 2; - task_worker_pool._s_total_task_user_count[TTaskType::PUSH]["user2"] = 1; - task_worker_pool._s_total_task_count[TTaskType::PUSH] = 5; - task_worker_pool._s_running_task_user_count[TTaskType::PUSH]["root"] = 1; - task_worker_pool._s_running_task_user_count[TTaskType::PUSH]["user1"] = 1; - ret = task_worker_pool._get_next_task_index(thread_count, tasks, TPriority::NORMAL); - EXPECT_EQ(2, ret); - tasks.erase(tasks.begin() + 2); - - // User2 task finished, no one task was fit, choose first one - task_worker_pool._s_total_task_user_count[TTaskType::PUSH]["user2"] = 0; - task_worker_pool._s_total_task_count[TTaskType::PUSH] = 4; - ret = task_worker_pool._get_next_task_index(thread_count, tasks, TPriority::NORMAL); - EXPECT_EQ(0, ret); -} - -TEST(TaskWorkerPoolTest, TestFinishTask) { - TMasterInfo master_info; - ExecEnv env; - TaskWorkerPool task_worker_pool( - TaskWorkerPool::TaskWorkerType::ALTER_TABLE, - &env, - master_info); - - FrontendServiceClientCache* client_cache = new FrontendServiceClientCache(); - MockMasterServerClient mock_master_server_client(master_info, client_cache); - MasterServerClient* original_master_server_client; - original_master_server_client = task_worker_pool._master_client; - task_worker_pool._master_client = &mock_master_server_client; - - // Finish task failed - TFinishTaskRequest finish_task_request; - EXPECT_CALL(mock_master_server_client, finish_task(_, _)) - .Times(TASK_FINISH_MAX_RETRY) - .WillRepeatedly(Return(DORIS_ERROR)); - task_worker_pool._finish_task(finish_task_request); - - // Finish task success - EXPECT_CALL(mock_master_server_client, finish_task(_, _)) - .Times(1) - .WillOnce(Return(DORIS_SUCCESS)); - task_worker_pool._finish_task(finish_task_request); - - task_worker_pool._master_client = original_master_server_client; -} - -#if 0 -TEST(TaskWorkerPoolTest, TestCreateTable) { - TMasterInfo master_info; - ExecEnv env; - TAgentTaskRequest agent_task_request; - agent_task_request.task_type = TTaskType::CREATE; - agent_task_request.signature = 123456; - TaskWorkerPool task_worker_pool( - TaskWorkerPool::TaskWorkerType::CREATE_TABLE, - &env, - master_info); - - MockCommandExecutor mock_command_executor; - CommandExecutor* original_command_executor; - original_command_executor = task_worker_pool._command_executor; - task_worker_pool._command_executor = &mock_command_executor; - FrontendServiceClientCache* client_cache = new FrontendServiceClientCache(); - MockMasterServerClient mock_master_server_client(master_info, client_cache); - MasterServerClient* original_master_server_client; - original_master_server_client = task_worker_pool._master_client; - task_worker_pool._master_client = &mock_master_server_client; - - // Create table failed - EXPECT_CALL(mock_command_executor, create_table(_)) - .Times(1) - .WillOnce(Return(OLAPStatus::OLAP_ERR_OTHER_ERROR)); - EXPECT_CALL(mock_master_server_client, finish_task(_, _)) - .Times(1) - .WillOnce(Return(DORIS_SUCCESS)); - - task_worker_pool.submit_task(agent_task_request); - EXPECT_EQ(1, task_worker_pool._s_task_signatures[agent_task_request.task_type].size()); - EXPECT_EQ(1, task_worker_pool._tasks.size()); - task_worker_pool._create_table_worker_thread_callback(&task_worker_pool); - EXPECT_EQ(0, task_worker_pool._s_task_signatures[agent_task_request.task_type].size()); - EXPECT_EQ(0, task_worker_pool._tasks.size()); - - // Create table success - EXPECT_CALL(mock_command_executor, create_table(_)) - .Times(1) - .WillOnce(Return(OLAPStatus::OLAP_SUCCESS)); - EXPECT_CALL(mock_master_server_client, finish_task(_, _)) - .Times(1) - .WillOnce(Return(DORIS_SUCCESS)); - - task_worker_pool.submit_task(agent_task_request); - EXPECT_EQ(1, task_worker_pool._s_task_signatures[agent_task_request.task_type].size()); - EXPECT_EQ(1, task_worker_pool._tasks.size()); - task_worker_pool._create_table_worker_thread_callback(&task_worker_pool); - EXPECT_EQ(0, task_worker_pool._s_task_signatures[agent_task_request.task_type].size()); - EXPECT_EQ(0, task_worker_pool._tasks.size()); - - task_worker_pool._command_executor = original_command_executor; - task_worker_pool._master_client = original_master_server_client; -} -#endif - -TEST(TaskWorkerPoolTest, TestDropTableTask) { - TMasterInfo master_info; - ExecEnv env; - TAgentTaskRequest agent_task_request; - agent_task_request.task_type = TTaskType::DROP; - agent_task_request.signature = 123456; - TaskWorkerPool task_worker_pool( - TaskWorkerPool::TaskWorkerType::DROP_TABLE, - &env, - master_info); - - MockCommandExecutor mock_command_executor; - CommandExecutor* original_command_executor; - original_command_executor = task_worker_pool._command_executor; - task_worker_pool._command_executor = &mock_command_executor; - FrontendServiceClientCache* client_cache = new FrontendServiceClientCache(); - MockMasterServerClient mock_master_server_client(master_info, client_cache); - MasterServerClient* original_master_server_client; - original_master_server_client = task_worker_pool._master_client; - task_worker_pool._master_client = &mock_master_server_client; - - // Drop table failed - EXPECT_CALL(mock_command_executor, drop_table(_)) - .Times(1) - .WillOnce(Return(OLAPStatus::OLAP_ERR_OTHER_ERROR)); - EXPECT_CALL(mock_master_server_client, finish_task(_, _)) - .Times(1) - .WillOnce(Return(DORIS_SUCCESS)); - - task_worker_pool.submit_task(agent_task_request); - EXPECT_EQ(1, task_worker_pool._s_task_signatures[agent_task_request.task_type].size()); - EXPECT_EQ(1, task_worker_pool._tasks.size()); - task_worker_pool._drop_table_worker_thread_callback(&task_worker_pool); - EXPECT_EQ(0, task_worker_pool._s_task_signatures[agent_task_request.task_type].size()); - EXPECT_EQ(0, task_worker_pool._tasks.size()); - - // Drop table success - EXPECT_CALL(mock_command_executor, drop_table(_)) - .Times(1) - .WillOnce(Return(OLAPStatus::OLAP_SUCCESS)); - EXPECT_CALL(mock_master_server_client, finish_task(_, _)) - .Times(1) - .WillOnce(Return(DORIS_SUCCESS)); - - task_worker_pool.submit_task(agent_task_request); - EXPECT_EQ(1, task_worker_pool._s_task_signatures[agent_task_request.task_type].size()); - EXPECT_EQ(1, task_worker_pool._tasks.size()); - task_worker_pool._drop_table_worker_thread_callback(&task_worker_pool); - EXPECT_EQ(0, task_worker_pool._s_task_signatures[agent_task_request.task_type].size()); - EXPECT_EQ(0, task_worker_pool._tasks.size()); - - task_worker_pool._command_executor = original_command_executor; - task_worker_pool._master_client = original_master_server_client; -} - -TEST(TaskWorkerPoolTest, TestSchemaChange) { - TMasterInfo master_info; - ExecEnv env; - TAgentTaskRequest agent_task_request; - agent_task_request.task_type = TTaskType::SCHEMA_CHANGE; - agent_task_request.signature = 123456; - TaskWorkerPool task_worker_pool( - TaskWorkerPool::TaskWorkerType::ALTER_TABLE, - &env, - master_info); - - MockCommandExecutor mock_command_executor; - CommandExecutor* original_command_executor; - original_command_executor = task_worker_pool._command_executor; - task_worker_pool._command_executor = &mock_command_executor; - FrontendServiceClientCache* client_cache = new FrontendServiceClientCache(); - MockMasterServerClient mock_master_server_client(master_info, client_cache); - MasterServerClient* original_master_server_client; - original_master_server_client = task_worker_pool._master_client; - task_worker_pool._master_client = &mock_master_server_client; - - // New tablet size ok, last schema change status is failed - // Delete failed alter table tablet file failed - TCreateTabletReq create_tablet_req1; - agent_task_request.alter_tablet_req.base_tablet_id = 12345; - agent_task_request.alter_tablet_req.base_schema_hash = 56789; - agent_task_request.alter_tablet_req.__set_new_tablet_req(create_tablet_req1); - - EXPECT_CALL(mock_command_executor, show_alter_table_status( - agent_task_request.alter_tablet_req.base_tablet_id, - agent_task_request.alter_tablet_req.base_schema_hash)) - .Times(1) - .WillOnce(Return(ALTER_TABLE_FAILED)); - EXPECT_CALL(mock_command_executor, drop_table(_)) - .Times(1) - .WillOnce(Return(OLAPStatus::OLAP_ERR_OTHER_ERROR)); - EXPECT_CALL(mock_command_executor, schema_change(_)) - .Times(0); - EXPECT_CALL(mock_master_server_client, finish_task(_, _)) - .Times(1) - .WillOnce(Return(DORIS_SUCCESS)); - - task_worker_pool.submit_task(agent_task_request); - EXPECT_EQ(1, task_worker_pool._s_task_signatures[agent_task_request.task_type].size()); - EXPECT_EQ(1, task_worker_pool._tasks.size()); - task_worker_pool._alter_table_worker_thread_callback(&task_worker_pool); - EXPECT_EQ(0, task_worker_pool._s_task_signatures[agent_task_request.task_type].size()); - EXPECT_EQ(0, task_worker_pool._tasks.size()); - - // New tablet size ok, last schema change status is failed - // Delete failed alter table tablet file success - // Do schema change failed - EXPECT_CALL(mock_command_executor, show_alter_table_status( - agent_task_request.alter_tablet_req.base_tablet_id, - agent_task_request.alter_tablet_req.base_schema_hash)) - .Times(1) - .WillOnce(Return(ALTER_TABLE_FAILED)); - EXPECT_CALL(mock_command_executor, drop_table(_)) - .Times(1) - .WillOnce(Return(OLAPStatus::OLAP_SUCCESS)); - EXPECT_CALL(mock_command_executor, schema_change(_)) - .Times(1) - .WillOnce(Return(OLAPStatus::OLAP_ERR_OTHER_ERROR)); - EXPECT_CALL(mock_master_server_client, finish_task(_, _)) - .Times(1) - .WillOnce(Return(DORIS_SUCCESS)); - - task_worker_pool.submit_task(agent_task_request); - EXPECT_EQ(1, task_worker_pool._s_task_signatures[agent_task_request.task_type].size()); - EXPECT_EQ(1, task_worker_pool._tasks.size()); - task_worker_pool._alter_table_worker_thread_callback(&task_worker_pool); - EXPECT_EQ(0, task_worker_pool._s_task_signatures[agent_task_request.task_type].size()); - EXPECT_EQ(0, task_worker_pool._tasks.size()); - - // New tablet size ok, last schema change status is failed - // Delete failed alter table tablet file success - // Do schema change success, check status failed - EXPECT_CALL(mock_command_executor, show_alter_table_status( - agent_task_request.alter_tablet_req.base_tablet_id, - agent_task_request.alter_tablet_req.base_schema_hash)) - .Times(1) - .WillOnce(Return(ALTER_TABLE_FAILED)); - EXPECT_CALL(mock_command_executor, drop_table(_)) - .Times(1) - .WillOnce(Return(OLAPStatus::OLAP_SUCCESS)); - EXPECT_CALL(mock_command_executor, schema_change(_)) - .Times(1) - .WillOnce(Return(OLAPStatus::OLAP_SUCCESS)); - EXPECT_CALL(mock_command_executor, report_tablet_info(_)) - .Times(1) - .WillOnce(Return(OLAPStatus::OLAP_ERR_OTHER_ERROR)); - EXPECT_CALL(mock_master_server_client, finish_task(_, _)) - .Times(1) - .WillOnce(Return(DORIS_SUCCESS)); - - task_worker_pool.submit_task(agent_task_request); - EXPECT_EQ(1, task_worker_pool._s_task_signatures[agent_task_request.task_type].size()); - EXPECT_EQ(1, task_worker_pool._tasks.size()); - task_worker_pool._alter_table_worker_thread_callback(&task_worker_pool); - EXPECT_EQ(0, task_worker_pool._s_task_signatures[agent_task_request.task_type].size()); - EXPECT_EQ(0, task_worker_pool._tasks.size()); - - // New tablet size ok, last schema change status is ok - // Do schema change success, check status running then success - EXPECT_CALL(mock_command_executor, show_alter_table_status( - agent_task_request.alter_tablet_req.base_tablet_id, - agent_task_request.alter_tablet_req.base_schema_hash)) - .Times(1) - .WillOnce(Return(ALTER_TABLE_FINISHED)); - EXPECT_CALL(mock_command_executor, drop_table(_)) - .Times(0); - EXPECT_CALL(mock_command_executor, schema_change(_)) - .Times(1) - .WillOnce(Return(OLAPStatus::OLAP_SUCCESS)); - EXPECT_CALL(mock_command_executor, report_tablet_info(_)) - .Times(1) - .WillOnce(Return(OLAPStatus::OLAP_SUCCESS)); - EXPECT_CALL(mock_master_server_client, finish_task(_, _)) - .Times(1) - .WillOnce(Return(DORIS_SUCCESS)); - - task_worker_pool.submit_task(agent_task_request); - EXPECT_EQ(1, task_worker_pool._s_task_signatures[agent_task_request.task_type].size()); - EXPECT_EQ(1, task_worker_pool._tasks.size()); - task_worker_pool._alter_table_worker_thread_callback(&task_worker_pool); - EXPECT_EQ(0, task_worker_pool._s_task_signatures[agent_task_request.task_type].size()); - EXPECT_EQ(0, task_worker_pool._tasks.size()); - - task_worker_pool._command_executor = original_command_executor; - task_worker_pool._master_client = original_master_server_client; -} - -TEST(TaskWorkerPoolTest, TestRollup) { - TMasterInfo master_info; - ExecEnv env; - TAgentTaskRequest agent_task_request; - agent_task_request.task_type = TTaskType::ROLLUP; - agent_task_request.signature = 123456; - TaskWorkerPool task_worker_pool( - TaskWorkerPool::TaskWorkerType::ALTER_TABLE, - &env, - master_info); - - MockCommandExecutor mock_command_executor; - CommandExecutor* original_command_executor; - original_command_executor = task_worker_pool._command_executor; - task_worker_pool._command_executor = &mock_command_executor; - FrontendServiceClientCache* client_cache = new FrontendServiceClientCache(); - MockMasterServerClient mock_master_server_client(master_info, client_cache); - MasterServerClient* original_master_server_client; - original_master_server_client = task_worker_pool._master_client; - task_worker_pool._master_client = &mock_master_server_client; - - // New tablet size ok, last rollup status is ok - // Do rollup success, check status running then success - TCreateTabletReq create_tablet_req1; - agent_task_request.alter_tablet_req.base_tablet_id = 12345; - agent_task_request.alter_tablet_req.base_schema_hash = 56789; - agent_task_request.alter_tablet_req.__set_new_tablet_req(create_tablet_req1); - EXPECT_CALL(mock_command_executor, show_alter_table_status( - agent_task_request.alter_tablet_req.base_tablet_id, - agent_task_request.alter_tablet_req.base_schema_hash)) - .Times(1) - .WillOnce(Return(ALTER_TABLE_FINISHED)); - EXPECT_CALL(mock_command_executor, drop_table(_)) - .Times(0); - EXPECT_CALL(mock_command_executor, create_rollup_table(_)) - .Times(1) - .WillOnce(Return(OLAPStatus::OLAP_SUCCESS)); - EXPECT_CALL(mock_command_executor, report_tablet_info(_)) - .Times(1) - .WillOnce(Return(OLAPStatus::OLAP_ERR_OTHER_ERROR)); - EXPECT_CALL(mock_master_server_client, finish_task(_, _)) - .Times(1) - .WillOnce(Return(DORIS_SUCCESS)); - - task_worker_pool.submit_task(agent_task_request); - EXPECT_EQ(1, task_worker_pool._s_task_signatures[agent_task_request.task_type].size()); - EXPECT_EQ(1, task_worker_pool._tasks.size()); - task_worker_pool._alter_table_worker_thread_callback(&task_worker_pool); - EXPECT_EQ(0, task_worker_pool._s_task_signatures[agent_task_request.task_type].size()); - EXPECT_EQ(0, task_worker_pool._tasks.size()); - - task_worker_pool._command_executor = original_command_executor; - task_worker_pool._master_client = original_master_server_client; -} - -TEST(TaskWorkerPoolTest, TestPush) { - TMasterInfo master_info; - ExecEnv env; - TAgentTaskRequest agent_task_request; - agent_task_request.task_type = TTaskType::PUSH; - agent_task_request.signature = 123456; - agent_task_request.__set_priority(TPriority::HIGH); - TaskWorkerPool task_worker_pool( - TaskWorkerPool::TaskWorkerType::PUSH, - &env, - master_info); - - MockCommandExecutor mock_command_executor; - CommandExecutor* original_command_executor; - original_command_executor = task_worker_pool._command_executor; - task_worker_pool._command_executor = &mock_command_executor; - FrontendServiceClientCache* client_cache = new FrontendServiceClientCache(); - MockMasterServerClient mock_master_server_client(master_info, client_cache); - MasterServerClient* original_master_server_client; - original_master_server_client = task_worker_pool._master_client; - task_worker_pool._master_client = &mock_master_server_client; - TPushReq push_req; - MockPusher mock_pusher(push_req); - Pusher* original_pusher = task_worker_pool._pusher; - task_worker_pool._pusher = &mock_pusher; - - // Push type load, push init failed - agent_task_request.push_req.push_type = TPushType::LOAD; - EXPECT_CALL(mock_pusher, init()) - .Times(1) - .WillOnce(Return(DORIS_ERROR)); - EXPECT_CALL(mock_pusher, process(_)) - .Times(0); - EXPECT_CALL(mock_master_server_client, finish_task(_, _)) - .Times(1) - .WillOnce(Return(DORIS_SUCCESS)); - - task_worker_pool.submit_task(agent_task_request); - EXPECT_EQ(1, task_worker_pool._s_task_signatures[agent_task_request.task_type].size()); - EXPECT_EQ(1, task_worker_pool._tasks.size()); - task_worker_pool._push_worker_thread_callback(&task_worker_pool); - EXPECT_EQ(0, task_worker_pool._s_task_signatures[agent_task_request.task_type].size()); - EXPECT_EQ(0, task_worker_pool._tasks.size()); - - // Push type load, push init success, push failed - EXPECT_CALL(mock_pusher, init()) - .Times(1) - .WillOnce(Return(DORIS_SUCCESS)); - EXPECT_CALL(mock_pusher, process(_)) - .Times(PUSH_MAX_RETRY) - .WillRepeatedly(Return(DORIS_ERROR)); - EXPECT_CALL(mock_master_server_client, finish_task(_, _)) - .Times(1) - .WillOnce(Return(DORIS_SUCCESS)); - - task_worker_pool.submit_task(agent_task_request); - EXPECT_EQ(1, task_worker_pool._s_task_signatures[agent_task_request.task_type].size()); - EXPECT_EQ(1, task_worker_pool._tasks.size()); - task_worker_pool._push_worker_thread_callback(&task_worker_pool); - EXPECT_EQ(0, task_worker_pool._s_task_signatures[agent_task_request.task_type].size()); - EXPECT_EQ(0, task_worker_pool._tasks.size()); - - // Push type load, push init success, push success - EXPECT_CALL(mock_pusher, init()) - .Times(1) - .WillOnce(Return(DORIS_SUCCESS)); - EXPECT_CALL(mock_pusher, process(_)) - .Times(1) - .WillOnce(Return(DORIS_SUCCESS)); - EXPECT_CALL(mock_master_server_client, finish_task(_, _)) - .Times(1) - .WillOnce(Return(DORIS_SUCCESS)); - - task_worker_pool.submit_task(agent_task_request); - EXPECT_EQ(1, task_worker_pool._s_task_signatures[agent_task_request.task_type].size()); - EXPECT_EQ(1, task_worker_pool._tasks.size()); - task_worker_pool._push_worker_thread_callback(&task_worker_pool); - EXPECT_EQ(0, task_worker_pool._s_task_signatures[agent_task_request.task_type].size()); - EXPECT_EQ(0, task_worker_pool._tasks.size()); - - // Push type delete, delete failed - agent_task_request.push_req.push_type = TPushType::DELETE; - EXPECT_CALL(mock_command_executor, delete_data(_, _)) - .Times(1) - .WillOnce(Return(OLAPStatus::OLAP_ERR_OTHER_ERROR)); - EXPECT_CALL(mock_master_server_client, finish_task(_, _)) - .Times(1) - .WillOnce(Return(DORIS_SUCCESS)); - - task_worker_pool.submit_task(agent_task_request); - EXPECT_EQ(1, task_worker_pool._s_task_signatures[agent_task_request.task_type].size()); - EXPECT_EQ(1, task_worker_pool._tasks.size()); - task_worker_pool._push_worker_thread_callback(&task_worker_pool); - EXPECT_EQ(0, task_worker_pool._s_task_signatures[agent_task_request.task_type].size()); - EXPECT_EQ(0, task_worker_pool._tasks.size()); - - // Push type delete, delete success - EXPECT_CALL(mock_command_executor, delete_data(_, _)) - .Times(1) - .WillOnce(Return(OLAPStatus::OLAP_SUCCESS)); - EXPECT_CALL(mock_master_server_client, finish_task(_, _)) - .Times(1) - .WillOnce(Return(DORIS_SUCCESS)); - - task_worker_pool.submit_task(agent_task_request); - EXPECT_EQ(1, task_worker_pool._s_task_signatures[agent_task_request.task_type].size()); - EXPECT_EQ(1, task_worker_pool._tasks.size()); - task_worker_pool._push_worker_thread_callback(&task_worker_pool); - EXPECT_EQ(0, task_worker_pool._s_task_signatures[agent_task_request.task_type].size()); - EXPECT_EQ(0, task_worker_pool._tasks.size()); - - task_worker_pool._command_executor = original_command_executor; - task_worker_pool._master_client = original_master_server_client; - task_worker_pool._pusher = original_pusher; -} - -TEST(TaskWorkerPoolTest, TestPublishVersionTask) { - TMasterInfo master_info; - ExecEnv env; - TAgentTaskRequest agent_task_request; - agent_task_request.task_type = TTaskType::PUBLISH_VERSION; - agent_task_request.signature = 123456; - TaskWorkerPool task_worker_pool( - TaskWorkerPool::TaskWorkerType::PUBLISH_VERSION, - &env, - master_info); - - MockCommandExecutor mock_command_executor; - CommandExecutor* original_command_executor; - original_command_executor = task_worker_pool._command_executor; - task_worker_pool._command_executor = &mock_command_executor; - FrontendServiceClientCache* client_cache = new FrontendServiceClientCache(); - MockMasterServerClient mock_master_server_client(master_info, client_cache); - MasterServerClient* original_master_server_client; - original_master_server_client = task_worker_pool._master_client; - task_worker_pool._master_client = &mock_master_server_client; - - // publish version failed - EXPECT_CALL(mock_command_executor, publish_version(_, _)) - .Times(3) - .WillRepeatedly(Return(OLAPStatus::OLAP_ERR_OTHER_ERROR)); - EXPECT_CALL(mock_master_server_client, finish_task(_, _)) - .Times(1) - .WillOnce(Return(DORIS_SUCCESS)); - - task_worker_pool.submit_task(agent_task_request); - EXPECT_EQ(1, task_worker_pool._s_task_signatures[agent_task_request.task_type].size()); - EXPECT_EQ(1, task_worker_pool._tasks.size()); - task_worker_pool._publish_version_worker_thread_callback(&task_worker_pool); - EXPECT_EQ(0, task_worker_pool._s_task_signatures[agent_task_request.task_type].size()); - EXPECT_EQ(0, task_worker_pool._tasks.size()); - - // publish version success - EXPECT_CALL(mock_command_executor, publish_version(_, _)) - .Times(1) - .WillOnce(Return(OLAPStatus::OLAP_SUCCESS)); - EXPECT_CALL(mock_master_server_client, finish_task(_, _)) - .Times(1) - .WillOnce(Return(DORIS_SUCCESS)); - - task_worker_pool.submit_task(agent_task_request); - EXPECT_EQ(1, task_worker_pool._s_task_signatures[agent_task_request.task_type].size()); - EXPECT_EQ(1, task_worker_pool._tasks.size()); - task_worker_pool._publish_version_worker_thread_callback(&task_worker_pool); - EXPECT_EQ(0, task_worker_pool._s_task_signatures[agent_task_request.task_type].size()); - EXPECT_EQ(0, task_worker_pool._tasks.size()); - - task_worker_pool._command_executor = original_command_executor; - task_worker_pool._master_client = original_master_server_client; -} - -TEST(TaskWorkerPoolTest, TestClone) { - TMasterInfo master_info; - ExecEnv env; - TAgentTaskRequest agent_task_request; - agent_task_request.task_type = TTaskType::CLONE; - agent_task_request.signature = 123456; - TaskWorkerPool task_worker_pool( - TaskWorkerPool::TaskWorkerType::CLONE, - &env, - master_info); - - MockCommandExecutor mock_command_executor; - CommandExecutor* original_command_executor; - original_command_executor = task_worker_pool._command_executor; - task_worker_pool._command_executor = &mock_command_executor; - FrontendServiceClientCache* client_cache = new FrontendServiceClientCache(); - MockMasterServerClient mock_master_server_client(master_info, client_cache); - MasterServerClient* original_master_server_client; - original_master_server_client = task_worker_pool._master_client; - task_worker_pool._master_client = &mock_master_server_client; - TBackend backend; - MockAgentServerClient mock_agent_server_client(backend); - AgentServerClient* original_agent_server_client; - original_agent_server_client = task_worker_pool._agent_client; - task_worker_pool._agent_client = &mock_agent_server_client; - FileDownloader::FileDownloaderParam param; - MockFileDownloader mock_file_downloader(param); - FileDownloader* original_file_downloader_ptr; - original_file_downloader_ptr = task_worker_pool._file_downloader_ptr; - task_worker_pool._file_downloader_ptr = &mock_file_downloader; - MockAgentUtils mock_agent_utils; - AgentUtils* original_agent_utils; - original_agent_utils = task_worker_pool._agent_utils; - task_worker_pool._agent_utils = &mock_agent_utils; - - // Tablet has exist - // incremental clone's make snapshot failed - // full clone's make snapshot failed - TCloneReq clone_req; - TBackend backend1; - TBackend backend2; - TBackend backend3; - clone_req.src_backends.push_back(backend1); - clone_req.src_backends.push_back(backend2); - clone_req.src_backends.push_back(backend3); - clone_req.tablet_id = 123; - clone_req.schema_hash = 456; - - TAgentResult agent_result; - agent_result.status.status_code = TStatusCode::INTERNAL_ERROR; - agent_task_request.__set_clone_req(clone_req); - - TSnapshotRequest snapshot_request; - snapshot_request.__set_tablet_id(agent_task_request.clone_req.tablet_id); - snapshot_request.__set_schema_hash(agent_task_request.clone_req.schema_hash); - - TSnapshotRequest snapshot_request2; - snapshot_request2.__set_tablet_id(agent_task_request.clone_req.tablet_id); - snapshot_request2.__set_schema_hash(agent_task_request.clone_req.schema_hash); - std::vector missing_versions; - snapshot_request2.__set_missing_version(missing_versions); - - std::shared_ptr olap_table_ok(new OLAPTable(NULL, nullptr)); - EXPECT_CALL(mock_command_executor, get_table( - agent_task_request.clone_req.tablet_id, - agent_task_request.clone_req.schema_hash)) - .Times(1) - .WillOnce(Return(olap_table_ok)); - EXPECT_CALL(mock_command_executor, get_info_before_incremental_clone(_, _, _)) - .Times(1); - EXPECT_CALL(mock_agent_server_client, make_snapshot(snapshot_request2, _)) - .Times(clone_req.src_backends.size()) - .WillRepeatedly(DoAll(SetArgPointee<1>(agent_result), Return(DORIS_SUCCESS))); - EXPECT_CALL(mock_agent_server_client, make_snapshot(snapshot_request, _)) - .Times(clone_req.src_backends.size()) - .WillRepeatedly(DoAll(SetArgPointee<1>(agent_result), Return(DORIS_SUCCESS))); - EXPECT_CALL(mock_agent_server_client, release_snapshot(_, _)) - .Times(0); - EXPECT_CALL(mock_command_executor, finish_clone(_, _, _, _)) - .Times(0); - EXPECT_CALL(mock_command_executor, report_tablet_info(_)) - .Times(0); - EXPECT_CALL(mock_master_server_client, finish_task(_, _)) - .Times(1) - .WillOnce(Return(DORIS_SUCCESS)); - - task_worker_pool.submit_task(agent_task_request); - EXPECT_EQ(1, task_worker_pool._s_task_signatures[agent_task_request.task_type].size()); - EXPECT_EQ(1, task_worker_pool._tasks.size()); - task_worker_pool._clone_worker_thread_callback(&task_worker_pool); - EXPECT_EQ(0, task_worker_pool._s_task_signatures[agent_task_request.task_type].size()); - EXPECT_EQ(0, task_worker_pool._tasks.size()); - - // Tablet has exist - // incremental clone's make snapshot success - // incremental clone failed - TAgentResult agent_result2; - agent_result2.__set_snapshot_path("path"); - agent_result2.status.status_code = TStatusCode::OK; - - EXPECT_CALL(mock_command_executor, get_table( - agent_task_request.clone_req.tablet_id, - agent_task_request.clone_req.schema_hash)) - .Times(1) - .WillOnce(Return(olap_table_ok)); - EXPECT_CALL(mock_command_executor, get_info_before_incremental_clone(_, _, _)) - .Times(1) - .WillOnce(Return("./test_data/5/6")); - EXPECT_CALL(mock_agent_server_client, make_snapshot(snapshot_request2, _)) - .Times(1) - .WillOnce(DoAll(SetArgPointee<1>(agent_result2), Return(DORIS_SUCCESS))); - EXPECT_CALL(mock_file_downloader, list_file_dir(_)) - .Times(1) - .WillOnce( - DoAll(SetArgPointee<0>("1.hdr\n1.idx\n1.dat"), Return(DORIS_SUCCESS))); - uint64_t file_size = 4; - EXPECT_CALL(mock_file_downloader, get_length(_)) - .Times(3) - .WillRepeatedly(DoAll(SetArgPointee<0>(file_size), Return(DORIS_SUCCESS))); - EXPECT_CALL(mock_file_downloader, download_file()) - .Times(3) - .WillRepeatedly(Return(DORIS_SUCCESS)); - EXPECT_CALL(mock_agent_server_client, release_snapshot(_, _)) - .Times(1) - .WillOnce(DoAll(SetArgPointee<1>(agent_result2), Return(DORIS_SUCCESS))); - EXPECT_CALL(mock_command_executor, finish_clone(_, _, _, _)) - .Times(1) - .WillOnce(Return(OLAP_ERR_OTHER_ERROR)); - EXPECT_CALL(mock_command_executor, report_tablet_info(_)) - .Times(0); - EXPECT_CALL(mock_master_server_client, finish_task(_, _)) - .Times(1) - .WillOnce(Return(DORIS_SUCCESS)); - - task_worker_pool.submit_task(agent_task_request); - EXPECT_EQ(1, task_worker_pool._s_task_signatures[agent_task_request.task_type].size()); - EXPECT_EQ(1, task_worker_pool._tasks.size()); - task_worker_pool._clone_worker_thread_callback(&task_worker_pool); - EXPECT_EQ(0, task_worker_pool._s_task_signatures[agent_task_request.task_type].size()); - EXPECT_EQ(0, task_worker_pool._tasks.size()); - - // Tablet has exist - // incremental clone success - // get tablet info failed - EXPECT_CALL(mock_command_executor, get_table( - agent_task_request.clone_req.tablet_id, - agent_task_request.clone_req.schema_hash)) - .Times(1) - .WillOnce(Return(olap_table_ok)); - EXPECT_CALL(mock_command_executor, get_info_before_incremental_clone(_, _, _)) - .Times(1) - .WillOnce(Return("./test_data/5/6")); - EXPECT_CALL(mock_agent_server_client, make_snapshot(snapshot_request2, _)) - .Times(1) - .WillOnce(DoAll(SetArgPointee<1>(agent_result2), Return(DORIS_SUCCESS))); - EXPECT_CALL(mock_file_downloader, list_file_dir(_)) - .Times(1) - .WillOnce( - DoAll(SetArgPointee<0>("1.hdr\n1.idx\n1.dat"), Return(DORIS_SUCCESS))); - EXPECT_CALL(mock_file_downloader, get_length(_)) - .Times(3) - .WillRepeatedly(DoAll(SetArgPointee<0>(file_size), Return(DORIS_SUCCESS))); - EXPECT_CALL(mock_file_downloader, download_file()) - .Times(3) - .WillRepeatedly(Return(DORIS_SUCCESS)); - EXPECT_CALL(mock_agent_server_client, release_snapshot(_, _)) - .Times(1) - .WillOnce(DoAll(SetArgPointee<1>(agent_result2), Return(DORIS_SUCCESS))); - EXPECT_CALL(mock_command_executor, finish_clone(_, _, _, _)) - .Times(1) - .WillOnce(Return(OLAP_SUCCESS)); - EXPECT_CALL(mock_command_executor, report_tablet_info(_)) - .Times(1) - .WillOnce(Return(OLAPStatus::OLAP_ERR_OTHER_ERROR)); - EXPECT_CALL(mock_master_server_client, finish_task(_, _)) - .Times(1) - .WillOnce(Return(DORIS_SUCCESS)); - - task_worker_pool.submit_task(agent_task_request); - EXPECT_EQ(1, task_worker_pool._s_task_signatures[agent_task_request.task_type].size()); - EXPECT_EQ(1, task_worker_pool._tasks.size()); - task_worker_pool._clone_worker_thread_callback(&task_worker_pool); - EXPECT_EQ(0, task_worker_pool._s_task_signatures[agent_task_request.task_type].size()); - EXPECT_EQ(0, task_worker_pool._tasks.size()); - - // Tablet has exist - // incremental clone's make snapshot failed - // full clone's make snapshot success - // full clone failed - EXPECT_CALL(mock_command_executor, get_table( - agent_task_request.clone_req.tablet_id, - agent_task_request.clone_req.schema_hash)) - .Times(1) - .WillOnce(Return(olap_table_ok)); - EXPECT_CALL(mock_command_executor, get_info_before_incremental_clone(_, _, _)) - .Times(1) - .WillOnce(Return("./test_data/5/6")); - EXPECT_CALL(mock_agent_server_client, make_snapshot(snapshot_request2, _)) - .Times(clone_req.src_backends.size()) - .WillRepeatedly(DoAll(SetArgPointee<1>(agent_result), Return(DORIS_SUCCESS))); - EXPECT_CALL(mock_agent_server_client, make_snapshot(snapshot_request, _)) - .Times(1) - .WillOnce(DoAll(SetArgPointee<1>(agent_result2), Return(DORIS_SUCCESS))); - EXPECT_CALL(mock_file_downloader, list_file_dir(_)) - .Times(1) - .WillOnce( - DoAll(SetArgPointee<0>("1.hdr\n1.idx\n1.dat"), Return(DORIS_SUCCESS))); - EXPECT_CALL(mock_file_downloader, get_length(_)) - .Times(3) - .WillRepeatedly(DoAll(SetArgPointee<0>(file_size), Return(DORIS_SUCCESS))); - EXPECT_CALL(mock_file_downloader, download_file()) - .Times(3) - .WillRepeatedly(Return(DORIS_SUCCESS)); - EXPECT_CALL(mock_agent_server_client, release_snapshot(_, _)) - .Times(1) - .WillOnce(DoAll(SetArgPointee<1>(agent_result2), Return(DORIS_SUCCESS))); - EXPECT_CALL(mock_command_executor, finish_clone(_, _, _, _)) - .Times(1) - .WillOnce(Return(OLAP_ERR_OTHER_ERROR)); - EXPECT_CALL(mock_command_executor, report_tablet_info(_)) - .Times(0); - EXPECT_CALL(mock_master_server_client, finish_task(_, _)) - .Times(1) - .WillOnce(Return(DORIS_SUCCESS)); - - task_worker_pool.submit_task(agent_task_request); - EXPECT_EQ(1, task_worker_pool._s_task_signatures[agent_task_request.task_type].size()); - EXPECT_EQ(1, task_worker_pool._tasks.size()); - task_worker_pool._clone_worker_thread_callback(&task_worker_pool); - EXPECT_EQ(0, task_worker_pool._s_task_signatures[agent_task_request.task_type].size()); - EXPECT_EQ(0, task_worker_pool._tasks.size()); - - // Tablet has exist - // incremental clone's make snapshot failed - // full clone's make snapshot success - // full clone success - EXPECT_CALL(mock_command_executor, get_table( - agent_task_request.clone_req.tablet_id, - agent_task_request.clone_req.schema_hash)) - .Times(1) - .WillOnce(Return(olap_table_ok)); - EXPECT_CALL(mock_command_executor, get_info_before_incremental_clone(_, _, _)) - .Times(1) - .WillOnce(Return("./test_data/5/6")); - EXPECT_CALL(mock_agent_server_client, make_snapshot(snapshot_request2, _)) - .Times(clone_req.src_backends.size()) - .WillRepeatedly(DoAll(SetArgPointee<1>(agent_result), Return(DORIS_SUCCESS))); - EXPECT_CALL(mock_agent_server_client, make_snapshot(snapshot_request, _)) - .Times(1) - .WillOnce(DoAll(SetArgPointee<1>(agent_result2), Return(DORIS_SUCCESS))); - EXPECT_CALL(mock_file_downloader, list_file_dir(_)) - .Times(1) - .WillOnce( - DoAll(SetArgPointee<0>("1.hdr\n1.idx\n1.dat"), Return(DORIS_SUCCESS))); - EXPECT_CALL(mock_file_downloader, get_length(_)) - .Times(3) - .WillRepeatedly(DoAll(SetArgPointee<0>(file_size), Return(DORIS_SUCCESS))); - EXPECT_CALL(mock_file_downloader, download_file()) - .Times(3) - .WillRepeatedly(Return(DORIS_SUCCESS)); - EXPECT_CALL(mock_agent_server_client, release_snapshot(_, _)) - .Times(1) - .WillOnce(DoAll(SetArgPointee<1>(agent_result2), Return(DORIS_SUCCESS))); - EXPECT_CALL(mock_command_executor, finish_clone(_, _, _, _)) - .Times(1) - .WillOnce(Return(OLAP_SUCCESS)); - EXPECT_CALL(mock_command_executor, report_tablet_info(_)) - .Times(1); - EXPECT_CALL(mock_master_server_client, finish_task(_, _)) - .Times(1) - .WillOnce(Return(DORIS_SUCCESS)); - - task_worker_pool.submit_task(agent_task_request); - EXPECT_EQ(1, task_worker_pool._s_task_signatures[agent_task_request.task_type].size()); - EXPECT_EQ(1, task_worker_pool._tasks.size()); - task_worker_pool._clone_worker_thread_callback(&task_worker_pool); - EXPECT_EQ(0, task_worker_pool._s_task_signatures[agent_task_request.task_type].size()); - EXPECT_EQ(0, task_worker_pool._tasks.size()); - - // Tablet not exist, obtain root path failed, do not get tablet info - std::shared_ptr olap_table_null(NULL); - EXPECT_CALL(mock_command_executor, get_table( - agent_task_request.clone_req.tablet_id, - agent_task_request.clone_req.schema_hash)) - .Times(1) - .WillOnce(Return(olap_table_null)); - EXPECT_CALL(mock_command_executor, obtain_shard_path(_, _)) - .Times(1) - .WillOnce(Return(OLAPStatus::OLAP_ERR_OTHER_ERROR)); - EXPECT_CALL(mock_command_executor, report_tablet_info(_)) - .Times(0); - EXPECT_CALL(mock_master_server_client, finish_task(_, _)) - .Times(1) - .WillOnce(Return(DORIS_SUCCESS)); - - task_worker_pool.submit_task(agent_task_request); - EXPECT_EQ(1, task_worker_pool._s_task_signatures[agent_task_request.task_type].size()); - EXPECT_EQ(1, task_worker_pool._tasks.size()); - task_worker_pool._clone_worker_thread_callback(&task_worker_pool); - EXPECT_EQ(0, task_worker_pool._s_task_signatures[agent_task_request.task_type].size()); - EXPECT_EQ(0, task_worker_pool._tasks.size()); - - // Tablet not exist, obtain root path success, make snapshot failed - agent_result2.__isset.snapshot_path = false; - EXPECT_CALL(mock_command_executor, get_table( - agent_task_request.clone_req.tablet_id, - agent_task_request.clone_req.schema_hash)) - .Times(1) - .WillOnce(Return(olap_table_null)); - EXPECT_CALL(mock_command_executor, obtain_shard_path(_, _)) - .Times(1) - .WillOnce(Return(OLAPStatus::OLAP_SUCCESS)); - EXPECT_CALL(mock_agent_server_client, make_snapshot(snapshot_request, _)) - .Times(clone_req.src_backends.size()) - .WillOnce(DoAll(SetArgPointee<1>(agent_result), Return(DORIS_SUCCESS))) - .WillOnce(DoAll(SetArgPointee<1>(agent_result2), Return(DORIS_SUCCESS))) - .WillOnce(DoAll(SetArgPointee<1>(agent_result), Return(DORIS_SUCCESS))); - EXPECT_CALL(mock_agent_server_client, release_snapshot(_, _)) - .Times(0); - EXPECT_CALL(mock_command_executor, report_tablet_info(_)) - .Times(0); - EXPECT_CALL(mock_master_server_client, finish_task(_, _)) - .Times(1) - .WillOnce(Return(DORIS_SUCCESS)); - - task_worker_pool.submit_task(agent_task_request); - EXPECT_EQ(1, task_worker_pool._s_task_signatures[agent_task_request.task_type].size()); - EXPECT_EQ(1, task_worker_pool._tasks.size()); - task_worker_pool._clone_worker_thread_callback(&task_worker_pool); - EXPECT_EQ(0, task_worker_pool._s_task_signatures[agent_task_request.task_type].size()); - EXPECT_EQ(0, task_worker_pool._tasks.size()); - - // Tablet not exist, obtain root path success, make snapshot success - // List remote dir failed - clone_req.tablet_id = 5; - clone_req.schema_hash = 6; - agent_task_request.__set_clone_req(clone_req); - agent_result2.__set_snapshot_path("path"); - EXPECT_CALL(mock_command_executor, get_table( - agent_task_request.clone_req.tablet_id, - agent_task_request.clone_req.schema_hash)) - .Times(1) - .WillOnce(Return(olap_table_null)); - EXPECT_CALL(mock_command_executor, obtain_shard_path(_, _)) - .Times(1) - .WillOnce( - DoAll(SetArgPointee<1>("./test_data"), - Return(OLAPStatus::OLAP_SUCCESS))); - snapshot_request.__set_tablet_id(agent_task_request.clone_req.tablet_id); - snapshot_request.__set_schema_hash(agent_task_request.clone_req.schema_hash); - EXPECT_CALL(mock_agent_server_client, make_snapshot(snapshot_request, _)) - .Times(clone_req.src_backends.size()) - .WillRepeatedly(DoAll(SetArgPointee<1>(agent_result2), Return(DORIS_SUCCESS))); - EXPECT_CALL(mock_file_downloader, list_file_dir(_)) - .Times(clone_req.src_backends.size() * DOWNLOAD_FILE_MAX_RETRY) - .WillRepeatedly(Return(DORIS_ERROR)); - EXPECT_CALL(mock_agent_server_client, release_snapshot(_, _)) - .Times(clone_req.src_backends.size()) - .WillRepeatedly(DoAll(SetArgPointee<1>(agent_result), Return(DORIS_SUCCESS))); - EXPECT_CALL(mock_command_executor, report_tablet_info(_)) - .Times(0); - EXPECT_CALL(mock_master_server_client, finish_task(_, _)) - .Times(1) - .WillOnce(Return(DORIS_SUCCESS)); - - task_worker_pool.submit_task(agent_task_request); - EXPECT_EQ(1, task_worker_pool._s_task_signatures[agent_task_request.task_type].size()); - EXPECT_EQ(1, task_worker_pool._tasks.size()); - task_worker_pool._clone_worker_thread_callback(&task_worker_pool); - EXPECT_EQ(0, task_worker_pool._s_task_signatures[agent_task_request.task_type].size()); - EXPECT_EQ(0, task_worker_pool._tasks.size()); - - // Tablet not exist, obtain root path success, make snapshot success - // List remote dir success, get remote file length failed - EXPECT_CALL(mock_command_executor, get_table( - agent_task_request.clone_req.tablet_id, - agent_task_request.clone_req.schema_hash)) - .Times(1) - .WillOnce(Return(olap_table_null)); - EXPECT_CALL(mock_command_executor, obtain_shard_path(_, _)) - .Times(1) - .WillOnce( - DoAll(SetArgPointee<1>("./test_data"), - Return(OLAPStatus::OLAP_SUCCESS))); - snapshot_request.__set_tablet_id(agent_task_request.clone_req.tablet_id); - snapshot_request.__set_schema_hash(agent_task_request.clone_req.schema_hash); - EXPECT_CALL(mock_agent_server_client, make_snapshot(snapshot_request, _)) - .Times(clone_req.src_backends.size()) - .WillRepeatedly(DoAll(SetArgPointee<1>(agent_result2), Return(DORIS_SUCCESS))); - EXPECT_CALL(mock_file_downloader, list_file_dir(_)) - .Times(clone_req.src_backends.size()) - .WillRepeatedly( - DoAll(SetArgPointee<0>("1.hdr\n1.idx\n1.dat"), Return(DORIS_SUCCESS))); - EXPECT_CALL(mock_file_downloader, get_length(_)) - .Times(clone_req.src_backends.size() * DOWNLOAD_FILE_MAX_RETRY) - .WillRepeatedly(Return(DORIS_ERROR)); - EXPECT_CALL(mock_agent_server_client, release_snapshot(_, _)) - .Times(clone_req.src_backends.size()) - .WillRepeatedly(DoAll(SetArgPointee<1>(agent_result), Return(DORIS_SUCCESS))); - EXPECT_CALL(mock_command_executor, report_tablet_info(_)) - .Times(0); - EXPECT_CALL(mock_master_server_client, finish_task(_, _)) - .Times(1) - .WillOnce(Return(DORIS_SUCCESS)); - - task_worker_pool.submit_task(agent_task_request); - EXPECT_EQ(1, task_worker_pool._s_task_signatures[agent_task_request.task_type].size()); - EXPECT_EQ(1, task_worker_pool._tasks.size()); - task_worker_pool._clone_worker_thread_callback(&task_worker_pool); - EXPECT_EQ(0, task_worker_pool._s_task_signatures[agent_task_request.task_type].size()); - EXPECT_EQ(0, task_worker_pool._tasks.size()); - - // Tablet not exist, obtain root path success, make snapshot success - // List remote dir success, get remote file length success - // Download file failed - EXPECT_CALL(mock_command_executor, get_table( - agent_task_request.clone_req.tablet_id, - agent_task_request.clone_req.schema_hash)) - .Times(1) - .WillOnce(Return(olap_table_null)); - EXPECT_CALL(mock_command_executor, obtain_shard_path(_, _)) - .Times(1) - .WillOnce( - DoAll(SetArgPointee<1>("./test_data"), - Return(OLAPStatus::OLAP_SUCCESS))); - snapshot_request.__set_tablet_id(agent_task_request.clone_req.tablet_id); - snapshot_request.__set_schema_hash(agent_task_request.clone_req.schema_hash); - EXPECT_CALL(mock_agent_server_client, make_snapshot(snapshot_request, _)) - .Times(clone_req.src_backends.size()) - .WillRepeatedly(DoAll(SetArgPointee<1>(agent_result2), Return(DORIS_SUCCESS))); - EXPECT_CALL(mock_file_downloader, list_file_dir(_)) - .Times(clone_req.src_backends.size()) - .WillRepeatedly( - DoAll(SetArgPointee<0>("1.hdr\n1.idx\n1.dat"), Return(DORIS_SUCCESS))); - EXPECT_CALL(mock_file_downloader, get_length(_)) - .Times(clone_req.src_backends.size()) - .WillRepeatedly(Return(DORIS_SUCCESS)); - EXPECT_CALL(mock_file_downloader, download_file()) - .Times(clone_req.src_backends.size() * DOWNLOAD_FILE_MAX_RETRY) - .WillRepeatedly(Return(DORIS_ERROR)); - EXPECT_CALL(mock_agent_server_client, release_snapshot(_, _)) - .Times(clone_req.src_backends.size()) - .WillRepeatedly(DoAll(SetArgPointee<1>(agent_result), Return(DORIS_SUCCESS))); - EXPECT_CALL(mock_command_executor, report_tablet_info(_)) - .Times(0); - EXPECT_CALL(mock_master_server_client, finish_task(_, _)) - .Times(1) - .WillOnce(Return(DORIS_SUCCESS)); - - task_worker_pool.submit_task(agent_task_request); - EXPECT_EQ(1, task_worker_pool._s_task_signatures[agent_task_request.task_type].size()); - EXPECT_EQ(1, task_worker_pool._tasks.size()); - task_worker_pool._clone_worker_thread_callback(&task_worker_pool); - EXPECT_EQ(0, task_worker_pool._s_task_signatures[agent_task_request.task_type].size()); - EXPECT_EQ(0, task_worker_pool._tasks.size()); - - // Tablet not exist, obtain root path success, make snapshot success - // List remote dir success, get remote file length success - // Download file success, but file size is wrong - EXPECT_CALL(mock_command_executor, get_table( - agent_task_request.clone_req.tablet_id, - agent_task_request.clone_req.schema_hash)) - .Times(1) - .WillOnce(Return(olap_table_null)); - EXPECT_CALL(mock_command_executor, obtain_shard_path(_, _)) - .Times(1) - .WillOnce( - DoAll(SetArgPointee<1>("./test_data"), - Return(OLAPStatus::OLAP_SUCCESS))); - snapshot_request.__set_tablet_id(agent_task_request.clone_req.tablet_id); - snapshot_request.__set_schema_hash(agent_task_request.clone_req.schema_hash); - EXPECT_CALL(mock_agent_server_client, make_snapshot(snapshot_request, _)) - .Times(clone_req.src_backends.size()) - .WillRepeatedly(DoAll(SetArgPointee<1>(agent_result2), Return(DORIS_SUCCESS))); - EXPECT_CALL(mock_file_downloader, list_file_dir(_)) - .Times(clone_req.src_backends.size()) - .WillRepeatedly( - DoAll(SetArgPointee<0>("1.hdr\n1.idx\n1.dat"), Return(DORIS_SUCCESS))); - file_size = 5; - EXPECT_CALL(mock_file_downloader, get_length(_)) - .Times(clone_req.src_backends.size()) - .WillRepeatedly(DoAll(SetArgPointee<0>(file_size), Return(DORIS_SUCCESS))); - EXPECT_CALL(mock_file_downloader, download_file()) - .Times(clone_req.src_backends.size() * DOWNLOAD_FILE_MAX_RETRY) - .WillRepeatedly(Return(DORIS_SUCCESS)); - EXPECT_CALL(mock_agent_server_client, release_snapshot(_, _)) - .Times(clone_req.src_backends.size()) - .WillRepeatedly(DoAll(SetArgPointee<1>(agent_result), Return(DORIS_SUCCESS))); - EXPECT_CALL(mock_command_executor, report_tablet_info(_)) - .Times(0); - EXPECT_CALL(mock_master_server_client, finish_task(_, _)) - .Times(1) - .WillOnce(Return(DORIS_SUCCESS)); - - task_worker_pool.submit_task(agent_task_request); - EXPECT_EQ(1, task_worker_pool._s_task_signatures[agent_task_request.task_type].size()); - EXPECT_EQ(1, task_worker_pool._tasks.size()); - task_worker_pool._clone_worker_thread_callback(&task_worker_pool); - EXPECT_EQ(0, task_worker_pool._s_task_signatures[agent_task_request.task_type].size()); - EXPECT_EQ(0, task_worker_pool._tasks.size()); - - // Tablet not exist, obtain root path success, make snapshot success - // List remote dir success, get remote file length success - // Download file success, load header failed - EXPECT_CALL(mock_command_executor, get_table( - agent_task_request.clone_req.tablet_id, - agent_task_request.clone_req.schema_hash)) - .Times(1) - .WillOnce(Return(olap_table_null)); - EXPECT_CALL(mock_command_executor, obtain_shard_path(_, _)) - .Times(1) - .WillOnce( - DoAll(SetArgPointee<1>("./test_data"), - Return(OLAPStatus::OLAP_SUCCESS))); - snapshot_request.__set_tablet_id(agent_task_request.clone_req.tablet_id); - snapshot_request.__set_schema_hash(agent_task_request.clone_req.schema_hash); - EXPECT_CALL(mock_agent_server_client, make_snapshot(snapshot_request, _)) - .Times(1) - .WillRepeatedly(DoAll(SetArgPointee<1>(agent_result2), Return(DORIS_SUCCESS))); - EXPECT_CALL(mock_file_downloader, list_file_dir(_)) - .Times(1) - .WillRepeatedly( - DoAll(SetArgPointee<0>("1.hdr\n1.idx\n1.dat"), Return(DORIS_SUCCESS))); - file_size = 4; - EXPECT_CALL(mock_file_downloader, get_length(_)) - .Times(3) - .WillRepeatedly(DoAll(SetArgPointee<0>(file_size), Return(DORIS_SUCCESS))); - EXPECT_CALL(mock_file_downloader, download_file()) - .Times(3) - .WillRepeatedly(Return(DORIS_SUCCESS)); - EXPECT_CALL(mock_command_executor, load_header(_, _, _)) - .Times(1) - .WillOnce(Return(OLAPStatus::OLAP_ERR_OTHER_ERROR)); - EXPECT_CALL(mock_agent_server_client, release_snapshot(_, _)) - .Times(1) - .WillRepeatedly(DoAll(SetArgPointee<1>(agent_result), Return(DORIS_SUCCESS))); - EXPECT_CALL(mock_command_executor, report_tablet_info(_)) - .Times(0); - EXPECT_CALL(mock_master_server_client, finish_task(_, _)) - .Times(1) - .WillOnce(Return(DORIS_SUCCESS)); - - task_worker_pool.submit_task(agent_task_request); - EXPECT_EQ(1, task_worker_pool._s_task_signatures[agent_task_request.task_type].size()); - EXPECT_EQ(1, task_worker_pool._tasks.size()); - task_worker_pool._clone_worker_thread_callback(&task_worker_pool); - EXPECT_EQ(0, task_worker_pool._s_task_signatures[agent_task_request.task_type].size()); - EXPECT_EQ(0, task_worker_pool._tasks.size()); - - // Tablet not exist, obtain root path success, make snapshot success - // List remote dir success, get remote file length success - // Download file success, load header success - // Release snapshot failed, get tablet info failed - EXPECT_CALL(mock_command_executor, get_table( - agent_task_request.clone_req.tablet_id, - agent_task_request.clone_req.schema_hash)) - .Times(1) - .WillOnce(Return(olap_table_null)); - EXPECT_CALL(mock_command_executor, obtain_shard_path(_, _)) - .Times(1) - .WillOnce( - DoAll(SetArgPointee<1>("./test_data"), - Return(OLAPStatus::OLAP_SUCCESS))); - snapshot_request.__set_tablet_id(agent_task_request.clone_req.tablet_id); - snapshot_request.__set_schema_hash(agent_task_request.clone_req.schema_hash); - EXPECT_CALL(mock_agent_server_client, make_snapshot(snapshot_request, _)) - .Times(1) - .WillRepeatedly(DoAll(SetArgPointee<1>(agent_result2), Return(DORIS_SUCCESS))); - EXPECT_CALL(mock_file_downloader, list_file_dir(_)) - .Times(1) - .WillRepeatedly( - DoAll(SetArgPointee<0>("1.hdr\n1.idx\n1.dat"), Return(DORIS_SUCCESS))); - file_size = 4; - EXPECT_CALL(mock_file_downloader, get_length(_)) - .Times(3) - .WillRepeatedly(DoAll(SetArgPointee<0>(file_size), Return(DORIS_SUCCESS))); - EXPECT_CALL(mock_file_downloader, download_file()) - .Times(3) - .WillRepeatedly(Return(DORIS_SUCCESS)); - EXPECT_CALL(mock_command_executor, load_header(_, _, _)) - .Times(1) - .WillOnce(Return(OLAPStatus::OLAP_SUCCESS)); - EXPECT_CALL(mock_agent_server_client, release_snapshot(_, _)) - .Times(1) - .WillRepeatedly(DoAll(SetArgPointee<1>(agent_result), Return(DORIS_ERROR))); - EXPECT_CALL(mock_command_executor, report_tablet_info(_)) - .Times(1) - .WillOnce(Return(OLAP_ERR_OTHER_ERROR)); - EXPECT_CALL(mock_master_server_client, finish_task(_, _)) - .Times(1) - .WillOnce(Return(DORIS_SUCCESS)); - - task_worker_pool.submit_task(agent_task_request); - EXPECT_EQ(1, task_worker_pool._s_task_signatures[agent_task_request.task_type].size()); - EXPECT_EQ(1, task_worker_pool._tasks.size()); - task_worker_pool._clone_worker_thread_callback(&task_worker_pool); - EXPECT_EQ(0, task_worker_pool._s_task_signatures[agent_task_request.task_type].size()); - EXPECT_EQ(0, task_worker_pool._tasks.size()); - - // Tablet not exist, obtain root path success, make snapshot success - // List remote dir success, get remote file length success - // Download file success, load header success - // Release snapshot success, get tablet info success - EXPECT_CALL(mock_command_executor, get_table( - agent_task_request.clone_req.tablet_id, - agent_task_request.clone_req.schema_hash)) - .Times(1) - .WillOnce(Return(olap_table_null)); - EXPECT_CALL(mock_command_executor, obtain_shard_path(_, _)) - .Times(1) - .WillOnce( - DoAll(SetArgPointee<1>("./test_data"), - Return(OLAPStatus::OLAP_SUCCESS))); - snapshot_request.__set_tablet_id(agent_task_request.clone_req.tablet_id); - snapshot_request.__set_schema_hash(agent_task_request.clone_req.schema_hash); - EXPECT_CALL(mock_agent_server_client, make_snapshot(snapshot_request, _)) - .Times(1) - .WillRepeatedly(DoAll(SetArgPointee<1>(agent_result2), Return(DORIS_SUCCESS))); - EXPECT_CALL(mock_file_downloader, list_file_dir(_)) - .Times(1) - .WillRepeatedly( - DoAll(SetArgPointee<0>("1.hdr\n1.idx\n1.dat"), Return(DORIS_SUCCESS))); - file_size = 4; - EXPECT_CALL(mock_file_downloader, get_length(_)) - .Times(3) - .WillRepeatedly(DoAll(SetArgPointee<0>(file_size), Return(DORIS_SUCCESS))); - EXPECT_CALL(mock_file_downloader, download_file()) - .Times(3) - .WillRepeatedly(Return(DORIS_SUCCESS)); - EXPECT_CALL(mock_command_executor, load_header(_, _, _)) - .Times(1) - .WillOnce(Return(OLAPStatus::OLAP_SUCCESS)); - EXPECT_CALL(mock_agent_server_client, release_snapshot(_, _)) - .Times(1) - .WillRepeatedly(DoAll(SetArgPointee<1>(agent_result), Return(DORIS_SUCCESS))); - EXPECT_CALL(mock_command_executor, report_tablet_info(_)) - .Times(1) - .WillOnce(Return(OLAP_SUCCESS)); - EXPECT_CALL(mock_master_server_client, finish_task(_, _)) - .Times(1) - .WillOnce(Return(DORIS_SUCCESS)); - - task_worker_pool.submit_task(agent_task_request); - EXPECT_EQ(1, task_worker_pool._s_task_signatures[agent_task_request.task_type].size()); - EXPECT_EQ(1, task_worker_pool._tasks.size()); - task_worker_pool._clone_worker_thread_callback(&task_worker_pool); - EXPECT_EQ(0, task_worker_pool._s_task_signatures[agent_task_request.task_type].size()); - EXPECT_EQ(0, task_worker_pool._tasks.size()); - - task_worker_pool._command_executor = original_command_executor; - task_worker_pool._master_client = original_master_server_client; - task_worker_pool._agent_client = original_agent_server_client; - task_worker_pool._agent_utils = original_agent_utils; - task_worker_pool._file_downloader_ptr = original_file_downloader_ptr; -} - -TEST(TaskWorkerPoolTest, TestCancelDeleteData) { - TMasterInfo master_info; - ExecEnv env; - TAgentTaskRequest agent_task_request; - agent_task_request.task_type = TTaskType::CANCEL_DELETE; - agent_task_request.signature = 123456; - TaskWorkerPool task_worker_pool( - TaskWorkerPool::TaskWorkerType::CANCEL_DELETE_DATA, - &env, - master_info); - - MockCommandExecutor mock_command_executor; - CommandExecutor* original_command_executor; - original_command_executor = task_worker_pool._command_executor; - task_worker_pool._command_executor = &mock_command_executor; - FrontendServiceClientCache* client_cache = new FrontendServiceClientCache(); - MockMasterServerClient mock_master_server_client(master_info, client_cache); - MasterServerClient* original_master_server_client; - original_master_server_client = task_worker_pool._master_client; - task_worker_pool._master_client = &mock_master_server_client; - - // Cancel delete failed - EXPECT_CALL(mock_command_executor, cancel_delete(_)) - .Times(1) - .WillOnce(Return(OLAPStatus::OLAP_ERR_OTHER_ERROR)); - EXPECT_CALL(mock_master_server_client, finish_task(_, _)) - .Times(1) - .WillOnce(Return(DORIS_SUCCESS)); - - task_worker_pool.submit_task(agent_task_request); - EXPECT_EQ(1, task_worker_pool._s_task_signatures[agent_task_request.task_type].size()); - EXPECT_EQ(1, task_worker_pool._tasks.size()); - task_worker_pool._cancel_delete_data_worker_thread_callback(&task_worker_pool); - EXPECT_EQ(0, task_worker_pool._s_task_signatures[agent_task_request.task_type].size()); - EXPECT_EQ(0, task_worker_pool._tasks.size()); - - // Cancel delete success - EXPECT_CALL(mock_command_executor, cancel_delete(_)) - .Times(1) - .WillOnce(Return(OLAPStatus::OLAP_SUCCESS)); - EXPECT_CALL(mock_master_server_client, finish_task(_, _)) - .Times(1) - .WillOnce(Return(DORIS_SUCCESS)); - - task_worker_pool.submit_task(agent_task_request); - EXPECT_EQ(1, task_worker_pool._s_task_signatures[agent_task_request.task_type].size()); - EXPECT_EQ(1, task_worker_pool._tasks.size()); - task_worker_pool._cancel_delete_data_worker_thread_callback(&task_worker_pool); - EXPECT_EQ(0, task_worker_pool._s_task_signatures[agent_task_request.task_type].size()); - EXPECT_EQ(0, task_worker_pool._tasks.size()); - - task_worker_pool._command_executor = original_command_executor; - task_worker_pool._master_client = original_master_server_client; -} - -TEST(TaskWorkerPoolTest, TestReportTask) { - TMasterInfo master_info; - ExecEnv env; - TAgentTaskRequest agent_task_request; - agent_task_request.task_type = TTaskType::SCHEMA_CHANGE; - agent_task_request.signature = 123456; - TaskWorkerPool task_worker_pool( - TaskWorkerPool::TaskWorkerType::ALTER_TABLE, - &env, - master_info); - - MockCommandExecutor mock_command_executor; - CommandExecutor* original_command_executor; - original_command_executor = task_worker_pool._command_executor; - task_worker_pool._command_executor = &mock_command_executor; - FrontendServiceClientCache* client_cache = new FrontendServiceClientCache(); - MockMasterServerClient mock_master_server_client(master_info, client_cache); - MasterServerClient* original_master_server_client; - original_master_server_client = task_worker_pool._master_client; - task_worker_pool._master_client = &mock_master_server_client; - - // Report failed - EXPECT_CALL(mock_master_server_client, report(_, _)) - .Times(1) - .WillOnce(Return(DORIS_ERROR)); - task_worker_pool._report_task_worker_thread_callback(&task_worker_pool); - - // Report success - EXPECT_CALL(mock_master_server_client, report(_, _)) - .Times(1) - .WillOnce(Return(DORIS_SUCCESS)); - task_worker_pool._report_task_worker_thread_callback(&task_worker_pool); - - task_worker_pool._command_executor = original_command_executor; - task_worker_pool._master_client = original_master_server_client; -} - -TEST(TaskWorkerPoolTest, TestReportDiskState) { - TMasterInfo master_info; - ExecEnv env; - TAgentTaskRequest agent_task_request; - agent_task_request.task_type = TTaskType::SCHEMA_CHANGE; - agent_task_request.signature = 123456; - TaskWorkerPool task_worker_pool( - TaskWorkerPool::TaskWorkerType::ALTER_TABLE, - &env, - master_info); - - MockCommandExecutor mock_command_executor; - CommandExecutor* original_command_executor; - original_command_executor = task_worker_pool._command_executor; - task_worker_pool._command_executor = &mock_command_executor; - FrontendServiceClientCache* client_cache = new FrontendServiceClientCache(); - MockMasterServerClient mock_master_server_client(master_info, client_cache); - MasterServerClient* original_master_server_client; - original_master_server_client = task_worker_pool._master_client; - task_worker_pool._master_client = &mock_master_server_client; - - // Get root path failed, report failed -#if 0 - EXPECT_CALL(mock_command_executor, get_all_root_path_info(_)) - .Times(1) - .WillOnce(Return(OLAPStatus::OLAP_ERR_OTHER_ERROR)); - EXPECT_CALL(mock_master_server_client, report(_, _)) - .Times(0); - task_worker_pool._report_disk_state_worker_thread_callback(&task_worker_pool); -#endif - - // Get root path success, report failed - EXPECT_CALL(mock_command_executor, get_all_root_path_info(_)) - .Times(1) - .WillOnce(Return(OLAPStatus::OLAP_SUCCESS)); - EXPECT_CALL(mock_master_server_client, report(_, _)) - .Times(1) - .WillOnce(Return(DORIS_ERROR)); - task_worker_pool._report_disk_state_worker_thread_callback(&task_worker_pool); - - // Get root path success, report success - EXPECT_CALL(mock_command_executor, get_all_root_path_info(_)) - .Times(1) - .WillOnce(Return(OLAPStatus::OLAP_SUCCESS)); - EXPECT_CALL(mock_master_server_client, report(_, _)) - .Times(1) - .WillOnce(Return(DORIS_SUCCESS)); - task_worker_pool._report_disk_state_worker_thread_callback(&task_worker_pool); - - task_worker_pool._command_executor = original_command_executor; - task_worker_pool._master_client = original_master_server_client; -} - -TEST(TaskWorkerPoolTest, TestReportOlapTable) { - TMasterInfo master_info; - ExecEnv env; - TaskWorkerPool task_worker_pool( - TaskWorkerPool::TaskWorkerType::ALTER_TABLE, - &env, - master_info); - - MockCommandExecutor mock_command_executor; - CommandExecutor* original_command_executor; - original_command_executor = task_worker_pool._command_executor; - task_worker_pool._command_executor = &mock_command_executor; - FrontendServiceClientCache* client_cache = new FrontendServiceClientCache(); - MockMasterServerClient mock_master_server_client(master_info, client_cache); - MasterServerClient* original_master_server_client; - original_master_server_client = task_worker_pool._master_client; - task_worker_pool._master_client = &mock_master_server_client; - - // Get tablet info failed, report failed - EXPECT_CALL(mock_command_executor, report_all_tablets_info(_)) - .Times(1) - .WillOnce(Return(OLAPStatus::OLAP_ERR_OTHER_ERROR)); - EXPECT_CALL(mock_master_server_client, report(_, _)) - .Times(0); - task_worker_pool._report_olap_table_worker_thread_callback(&task_worker_pool); - - // Get tablet info success, report failed - EXPECT_CALL(mock_command_executor, report_all_tablets_info(_)) - .Times(1) - .WillOnce(Return(OLAPStatus::OLAP_SUCCESS)); - EXPECT_CALL(mock_master_server_client, report(_, _)) - .Times(1) - .WillOnce(Return(DORIS_ERROR)); - task_worker_pool._report_olap_table_worker_thread_callback(&task_worker_pool); - - // Get tablet info success, report success - EXPECT_CALL(mock_command_executor, report_all_tablets_info(_)) - .Times(1) - .WillOnce(Return(OLAPStatus::OLAP_SUCCESS)); - EXPECT_CALL(mock_master_server_client, report(_, _)) - .Times(1) - .WillOnce(Return(DORIS_SUCCESS)); - task_worker_pool._report_olap_table_worker_thread_callback(&task_worker_pool); - - task_worker_pool._command_executor = original_command_executor; - task_worker_pool._master_client = original_master_server_client; -} - -TEST(TaskWorkerPoolTest, TestMakeSnapshot) { - TMasterInfo master_info; - ExecEnv env; - TaskWorkerPool task_worker_pool( - TaskWorkerPool::TaskWorkerType::MAKE_SNAPSHOT, - &env, - master_info); - - MockCommandExecutor mock_command_executor; - CommandExecutor* original_command_executor; - original_command_executor = task_worker_pool._command_executor; - task_worker_pool._command_executor = &mock_command_executor; - FrontendServiceClientCache* client_cache = new FrontendServiceClientCache(); - MockMasterServerClient mock_master_server_client(master_info, client_cache); - MasterServerClient* original_master_server_client; - original_master_server_client = task_worker_pool._master_client; - task_worker_pool._master_client = &mock_master_server_client; - - TAgentTaskRequest agent_task_request; - agent_task_request.task_type = TTaskType::MAKE_SNAPSHOT; - agent_task_request.signature = 123456; - - // make snapshot failed - task_worker_pool.submit_task(agent_task_request); - EXPECT_EQ(1, task_worker_pool._s_task_signatures[agent_task_request.task_type].size()); - EXPECT_EQ(1, task_worker_pool._tasks.size()); - EXPECT_CALL(mock_command_executor, make_snapshot(_, _)) - .Times(1) - .WillOnce(Return(OLAPStatus::OLAP_ERR_OTHER_ERROR)); - EXPECT_CALL(mock_master_server_client, finish_task(_, _)) - .Times(1) - .WillOnce(Return(DORIS_SUCCESS)); - task_worker_pool._make_snapshot_thread_callback(&task_worker_pool); - EXPECT_EQ(0, task_worker_pool._s_task_signatures[agent_task_request.task_type].size()); - EXPECT_EQ(0, task_worker_pool._tasks.size()); - - // make snapshot success - task_worker_pool.submit_task(agent_task_request); - EXPECT_EQ(1, task_worker_pool._s_task_signatures[agent_task_request.task_type].size()); - EXPECT_EQ(1, task_worker_pool._tasks.size()); - EXPECT_CALL(mock_command_executor, make_snapshot(_, _)) - .Times(1) - .WillOnce(Return(OLAPStatus::OLAP_SUCCESS)); - EXPECT_CALL(mock_master_server_client, finish_task(_, _)) - .Times(1) - .WillOnce(Return(DORIS_SUCCESS)); - task_worker_pool._make_snapshot_thread_callback(&task_worker_pool); - EXPECT_EQ(0, task_worker_pool._s_task_signatures[agent_task_request.task_type].size()); - - task_worker_pool._command_executor = original_command_executor; - task_worker_pool._master_client = original_master_server_client; -} - -TEST(TaskWorkerPoolTest, TestReleaseSnapshot) { - TMasterInfo master_info; - ExecEnv env; - TaskWorkerPool task_worker_pool( - TaskWorkerPool::TaskWorkerType::RELEASE_SNAPSHOT, - &env, - master_info); - - MockCommandExecutor mock_command_executor; - CommandExecutor* original_command_executor; - original_command_executor = task_worker_pool._command_executor; - task_worker_pool._command_executor = &mock_command_executor; - FrontendServiceClientCache* client_cache = new FrontendServiceClientCache(); - MockMasterServerClient mock_master_server_client(master_info, client_cache); - MasterServerClient* original_master_server_client; - original_master_server_client = task_worker_pool._master_client; - task_worker_pool._master_client = &mock_master_server_client; - - TAgentTaskRequest agent_task_request; - agent_task_request.task_type = TTaskType::RELEASE_SNAPSHOT; - agent_task_request.signature = 123456; - - // make snapshot failed - task_worker_pool.submit_task(agent_task_request); - EXPECT_EQ(1, task_worker_pool._s_task_signatures[agent_task_request.task_type].size()); - EXPECT_EQ(1, task_worker_pool._tasks.size()); - EXPECT_CALL(mock_command_executor, release_snapshot(_)) - .Times(1) - .WillOnce(Return(OLAPStatus::OLAP_ERR_OTHER_ERROR)); - EXPECT_CALL(mock_master_server_client, finish_task(_, _)) - .Times(1) - .WillOnce(Return(DORIS_SUCCESS)); - task_worker_pool._release_snapshot_thread_callback(&task_worker_pool); - EXPECT_EQ(0, task_worker_pool._s_task_signatures[agent_task_request.task_type].size()); - EXPECT_EQ(0, task_worker_pool._tasks.size()); - - // make snapshot success - task_worker_pool.submit_task(agent_task_request); - EXPECT_EQ(1, task_worker_pool._s_task_signatures[agent_task_request.task_type].size()); - EXPECT_EQ(1, task_worker_pool._tasks.size()); - EXPECT_CALL(mock_command_executor, release_snapshot(_)) - .Times(1) - .WillOnce(Return(OLAPStatus::OLAP_SUCCESS)); - EXPECT_CALL(mock_master_server_client, finish_task(_, _)) - .Times(1) - .WillOnce(Return(DORIS_SUCCESS)); - task_worker_pool._release_snapshot_thread_callback(&task_worker_pool); - EXPECT_EQ(0, task_worker_pool._s_task_signatures[agent_task_request.task_type].size()); - - task_worker_pool._command_executor = original_command_executor; - task_worker_pool._master_client = original_master_server_client; -} - -TEST(TaskWorkerPoolTest, TestShowAlterTableStatus) { - TMasterInfo master_info; - ExecEnv env; - TaskWorkerPool task_worker_pool( - TaskWorkerPool::TaskWorkerType::ALTER_TABLE, - &env, - master_info); - - MockCommandExecutor mock_command_executor; - CommandExecutor* original_command_executor; - original_command_executor = task_worker_pool._command_executor; - task_worker_pool._command_executor = &mock_command_executor; - - // Get tablet info failed - TTabletInfo tablet_info; - EXPECT_CALL(mock_command_executor, report_tablet_info(_)) - .Times(1) - .WillOnce(Return(OLAPStatus::OLAP_ERR_OTHER_ERROR)); - AgentStatus status = task_worker_pool._get_tablet_info(1, 2, 123456, &tablet_info); - EXPECT_EQ(DORIS_ERROR, status); - - // Get tablet info success - EXPECT_CALL(mock_command_executor, report_tablet_info(_)) - .Times(1) - .WillOnce(Return(OLAPStatus::OLAP_SUCCESS)); - status = task_worker_pool._get_tablet_info(1, 2, 123456, &tablet_info); - EXPECT_EQ(DORIS_SUCCESS, status); - - task_worker_pool._command_executor = original_command_executor; -} - -TEST(TaskWorkerPoolTest, TestDropTable) { - TMasterInfo master_info; - ExecEnv env; - TaskWorkerPool task_worker_pool( - TaskWorkerPool::TaskWorkerType::ALTER_TABLE, - &env, - master_info); - - MockCommandExecutor mock_command_executor; - CommandExecutor* original_command_executor; - original_command_executor = task_worker_pool._command_executor; - task_worker_pool._command_executor = &mock_command_executor; - - TTabletId tablet_id = 123; - TSchemaHash schema_hash = 456; - EXPECT_CALL(mock_command_executor, show_alter_table_status(tablet_id, schema_hash)) - .Times(1) - .WillOnce(Return(ALTER_TABLE_RUNNING)); - AlterTableStatus status = task_worker_pool._show_alter_table_status(tablet_id, schema_hash); - EXPECT_EQ(ALTER_TABLE_RUNNING, status); - - task_worker_pool._command_executor = original_command_executor; -} - -TEST(TaskWorkerPoolTest, TestGetTabletInfo) { - TMasterInfo master_info; - ExecEnv env; - TaskWorkerPool task_worker_pool( - TaskWorkerPool::TaskWorkerType::ALTER_TABLE, - &env, - master_info); - - MockCommandExecutor mock_command_executor; - CommandExecutor* original_command_executor; - original_command_executor = task_worker_pool._command_executor; - task_worker_pool._command_executor = &mock_command_executor; - - // Report tablet info failed - TTabletInfo tablet_info; - EXPECT_CALL(mock_command_executor, report_tablet_info(_)) - .Times(1) - .WillOnce(Return(OLAPStatus::OLAP_ERR_OTHER_ERROR)); - AgentStatus status = task_worker_pool._get_tablet_info(1, 2, 123456, &tablet_info); - EXPECT_EQ(DORIS_ERROR, status); - - // Report tablet info success - EXPECT_CALL(mock_command_executor, report_tablet_info(_)) - .Times(1) - .WillOnce(Return(OLAPStatus::OLAP_SUCCESS)); - status = task_worker_pool._get_tablet_info(1, 2, 123456, &tablet_info); - EXPECT_EQ(DORIS_SUCCESS, status); - - task_worker_pool._command_executor = original_command_executor; -} - -} - -int main(int argc, char** argv) { - std::string conffile = std::string(getenv("DORIS_HOME")) + "/conf/be.conf"; - if (!doris::config::init(conffile.c_str(), false)) { - fprintf(stderr, "error read config file. \n"); - return -1; - } - doris::init_glog("be-test"); - ::testing::InitGoogleTest(&argc, argv); - return RUN_ALL_TESTS(); -} diff --git a/be/test/exec/CMakeLists.txt b/be/test/exec/CMakeLists.txt index 76d7cecf04d552..709442e2164a61 100644 --- a/be/test/exec/CMakeLists.txt +++ b/be/test/exec/CMakeLists.txt @@ -46,13 +46,13 @@ endif() ADD_BE_TEST(broker_reader_test) ADD_BE_TEST(broker_scanner_test) ADD_BE_TEST(broker_scan_node_test) +ADD_BE_TEST(tablet_info_test) +ADD_BE_TEST(tablet_sink_test) ADD_BE_TEST(es_scan_node_test) ADD_BE_TEST(es_http_scan_node_test) ADD_BE_TEST(es_predicate_test) ADD_BE_TEST(es_query_builder_test) ADD_BE_TEST(es_scan_reader_test) -ADD_BE_TEST(olap_table_info_test) -ADD_BE_TEST(olap_table_sink_test) #ADD_BE_TEST(schema_scan_node_test) #ADD_BE_TEST(schema_scanner_test) ##ADD_BE_TEST(set_executor_test) diff --git a/be/test/exec/new_olap_scan_node_test.cpp b/be/test/exec/new_olap_scan_node_test.cpp index 03fe6bdfad00d1..b59bcc15d08699 100644 --- a/be/test/exec/new_olap_scan_node_test.cpp +++ b/be/test/exec/new_olap_scan_node_test.cpp @@ -52,7 +52,7 @@ class TestOlapScanNode : public testing::Test { } void TearDown() { - OLAPEngine::get_instance()->clear(); + StorageEngine::get_instance()->clear(); SessionManager::get_instance()->delete_session_by_fd(123); system("rm -rf ./testrun"); @@ -71,19 +71,17 @@ class TestOlapScanNode : public testing::Test { unused_flag_path.c_str(), unused_flag_path.size()); - OLAPRootPath::get_instance()->init(); + StorageEngine::get_instance()->_lru_cache = newLRU_cache(10000); - OLAPEngine::get_instance()->_lru_cache = newLRU_cache(10000); - - _olap_header = new - OLAPHeader("./testrun/case3/clickuserid_online_userid_type_planid_unitid_winfoid.hdr"); - _olap_header->load(); - _olap_table = new OLAPTable(_olap_header); - _olap_table->load_indices(); - _olap_table->_root_path_name = "./testrun/case3"; + _tablet_meta = new + TabletMeta("./testrun/case3/clickuserid_online_userid_type_planid_unitid_winfoid.hdr"); + _tablet_meta->load(); + tablet = new Tablet(_tablet_meta); + tablet->load_indices(); + tablet->_root_path_name = "./testrun/case3"; TableDescription description("fc", "clickuserid_online", "userid_type_planid_unitid_winfoid"); - OLAPEngine::get_instance()->add_table(description, _olap_table); + StorageEngine::get_instance()->add_table(description, tablet); // init session manager SessionManager::get_instance()->init(); @@ -272,8 +270,8 @@ class TestOlapScanNode : public testing::Test { } private: - OLAPHeader* _olap_header; - OLAPTable* _olap_table; + TabletMeta* _tablet_meta; + Tablet* tablet; TPlanNode _tnode; ObjectPool _obj_pool; diff --git a/be/test/exec/olap_table_info_test.cpp b/be/test/exec/tablet_info_test.cpp similarity index 99% rename from be/test/exec/olap_table_info_test.cpp rename to be/test/exec/tablet_info_test.cpp index 5360e466701169..3c60e1d50637d5 100644 --- a/be/test/exec/olap_table_info_test.cpp +++ b/be/test/exec/tablet_info_test.cpp @@ -15,7 +15,7 @@ // specific language governing permissions and limitations // under the License. -#include "exec/olap_table_info.h" +#include "exec/tablet_info.h" #include diff --git a/be/test/exec/olap_table_sink_test.cpp b/be/test/exec/tablet_sink_test.cpp similarity index 99% rename from be/test/exec/olap_table_sink_test.cpp rename to be/test/exec/tablet_sink_test.cpp index a63278803ea237..0033a587db0c74 100644 --- a/be/test/exec/olap_table_sink_test.cpp +++ b/be/test/exec/tablet_sink_test.cpp @@ -15,7 +15,7 @@ // specific language governing permissions and limitations // under the License. -#include "exec/olap_table_sink.h" +#include "exec/tablet_sink.h" #include diff --git a/be/test/olap/CMakeLists.txt b/be/test/olap/CMakeLists.txt index 780a5e20c9fad8..99233d931e9ebb 100644 --- a/be/test/olap/CMakeLists.txt +++ b/be/test/olap/CMakeLists.txt @@ -42,7 +42,6 @@ ADD_BE_TEST(skiplist_test) ADD_BE_TEST(delta_writer_test) ADD_BE_TEST(serialize_test) ADD_BE_TEST(olap_meta_test) -ADD_BE_TEST(olap_header_manager_test) ADD_BE_TEST(field_info_test) ADD_BE_TEST(rowset/segment_v2/bitshuffle_page_test) ADD_BE_TEST(rowset/segment_v2/plain_page_test) @@ -51,3 +50,10 @@ ADD_BE_TEST(rowset/segment_v2/column_reader_writer_test) ADD_BE_TEST(rowset/segment_v2/encoding_info_test) ADD_BE_TEST(rowset/segment_v2/ordinal_page_index_test) ADD_BE_TEST(rowset/segment_v2/rle_page_test) +ADD_BE_TEST(tablet_meta_manager_test) +ADD_BE_TEST(tablet_mgr_test) +ADD_BE_TEST(rowset/rowset_meta_manager_test) +ADD_BE_TEST(rowset/rowset_meta_test) +ADD_BE_TEST(rowset/alpha_rowset_test) +ADD_BE_TEST(olap_snapshot_converter_test) +ADD_BE_TEST(txn_manager_test) diff --git a/be/test/olap/bit_field_test.cpp b/be/test/olap/bit_field_test.cpp index 1c603120fdc15d..6ec41573c320f1 100755 --- a/be/test/olap/bit_field_test.cpp +++ b/be/test/olap/bit_field_test.cpp @@ -20,8 +20,8 @@ #include "olap/byte_buffer.h" #include "olap/out_stream.h" #include "olap/in_stream.h" -#include "olap/bit_field_reader.h" -#include "olap/bit_field_writer.h" +#include "olap/rowset/bit_field_reader.h" +#include "olap/rowset/bit_field_writer.h" #include "util/logging.h" namespace doris { diff --git a/be/test/olap/column_reader_test.cpp b/be/test/olap/column_reader_test.cpp index 3574e6f794c1c5..60e53e483fd315 100644 --- a/be/test/olap/column_reader_test.cpp +++ b/be/test/olap/column_reader_test.cpp @@ -19,12 +19,13 @@ #include "olap/byte_buffer.h" #include "olap/stream_name.h" -#include "olap/column_reader.h" -#include "olap/column_writer.h" +#include "olap/rowset/column_reader.h" +#include "olap/rowset/column_writer.h" #include "olap/field.h" #include "olap/olap_define.h" #include "olap/olap_common.h" #include "olap/row_cursor.h" +#include "olap/row_block.h" #include "runtime/mem_pool.h" #include "runtime/string_value.hpp" #include "runtime/vectorized_row_batch.h" @@ -98,7 +99,7 @@ class TestColumn : public testing::Test { _length_buffers.clear(); } - void CreateColumnWriter(const std::vector &tablet_schema) { + void CreateColumnWriter(const TabletSchema& tablet_schema) { _column_writer = ColumnWriter::create( 0, tablet_schema, _stream_factory, 1024, BLOOM_FILTER_DEFAULT_FPP); @@ -106,7 +107,7 @@ class TestColumn : public testing::Test { ASSERT_EQ(_column_writer->init(), OLAP_SUCCESS); } - void CreateColumnReader(const std::vector &tablet_schema) { + void CreateColumnReader(const TabletSchema& tablet_schema) { UniqueIdEncodingMap encodings; encodings[0] = ColumnEncodingMessage(); encodings[0].set_kind(ColumnEncodingMessage::DIRECT); @@ -115,7 +116,7 @@ class TestColumn : public testing::Test { } void CreateColumnReader( - const std::vector &tablet_schema, + const TabletSchema& tablet_schema, UniqueIdEncodingMap &encodings) { UniqueIdToColumnIdMap included; included[0] = 0; @@ -123,10 +124,10 @@ class TestColumn : public testing::Test { segment_included[0] = 0; _column_reader = ColumnReader::create(0, - tablet_schema, - included, - segment_included, - encodings); + tablet_schema, + included, + segment_included, + encodings); ASSERT_TRUE(_column_reader != NULL); @@ -200,29 +201,31 @@ class TestColumn : public testing::Test { } ASSERT_EQ(_column_reader->init( - &_map_in_streams, - 1024, - _mem_pool.get(), - &_stats), OLAP_SUCCESS); + &_map_in_streams, + 1024, + _mem_pool.get(), + &_stats), OLAP_SUCCESS); } - void SetFieldInfo(FieldInfo &field_info, - std::string name, - FieldType type, - FieldAggregationMethod aggregation, - uint32_t length, - bool is_allow_null, - bool is_key) { - field_info.name = name; - field_info.type = type; - field_info.aggregation = aggregation; - field_info.length = length; - field_info.is_allow_null = is_allow_null; - field_info.is_key = is_key; - field_info.precision = 1000; - field_info.frac = 10000; - field_info.unique_id = 0; - field_info.is_bf_column = false; + void SetTabletSchemaWithOneColumn(std::string name, + std::string type, + std::string aggregation, + uint32_t length, + bool is_allow_null, + bool is_key, TabletSchema* tablet_schema) { + TabletSchemaPB tablet_schema_pb; + ColumnPB* column = tablet_schema_pb.add_column(); + column->set_unique_id(0); + column->set_name(name); + column->set_type(type); + column->set_is_key(is_key); + column->set_is_nullable(is_allow_null); + column->set_length(length); + column->set_aggregation(aggregation); + column->set_precision(1000); + column->set_frac(1000); + column->set_is_bf_column(false); + tablet_schema->init_from_pb(tablet_schema_pb); } void create_and_save_last_position() { @@ -261,23 +264,20 @@ class TestColumn : public testing::Test { TEST_F(TestColumn, VectorizedTinyColumnWithoutPresent) { // write data - std::vector tablet_schema; - FieldInfo field_info; - SetFieldInfo(field_info, - std::string("TinyColumn"), - OLAP_FIELD_TYPE_TINYINT, - OLAP_FIELD_AGGREGATION_REPLACE, - 1, - false, - true); - tablet_schema.push_back(field_info); - + TabletSchema tablet_schema; + SetTabletSchemaWithOneColumn( + "TinyColumn", + "TINYINT", + "REPLACE", + 1, + false, + true, &tablet_schema); CreateColumnWriter(tablet_schema); RowCursor write_row; write_row.init(tablet_schema); - RowBlock block(tablet_schema); + RowBlock block(&tablet_schema); RowBlockInfo block_info; block_info.row_num = 1024; block.init(block_info); @@ -317,23 +317,22 @@ TEST_F(TestColumn, VectorizedTinyColumnWithoutPresent) { TEST_F(TestColumn, SeekTinyColumnWithoutPresent) { // write data - std::vector tablet_schema; - FieldInfo field_info; - SetFieldInfo(field_info, - std::string("TinyColumn"), - OLAP_FIELD_TYPE_TINYINT, - OLAP_FIELD_AGGREGATION_REPLACE, - 1, - false, - true); - tablet_schema.push_back(field_info); + TabletSchema tablet_schema; + SetTabletSchemaWithOneColumn( + "TinyColumn", + "TINYINT", + "REPLACE", + 1, + false, + true, + &tablet_schema); CreateColumnWriter(tablet_schema); RowCursor write_row; write_row.init(tablet_schema); - RowBlock block(tablet_schema); + RowBlock block(&tablet_schema); RowBlockInfo block_info; block_info.row_num = 1024; block.init(block_info); @@ -404,23 +403,22 @@ TEST_F(TestColumn, SeekTinyColumnWithoutPresent) { TEST_F(TestColumn, SkipTinyColumnWithoutPresent) { // write data - std::vector tablet_schema; - FieldInfo field_info; - SetFieldInfo(field_info, - std::string("TinyColumn"), - OLAP_FIELD_TYPE_TINYINT, - OLAP_FIELD_AGGREGATION_REPLACE, - 1, - false, - true); - tablet_schema.push_back(field_info); + TabletSchema tablet_schema; + SetTabletSchemaWithOneColumn( + "TinyColumn", + "TINYINT", + "REPLACE", + 1, + false, + true, + &tablet_schema); CreateColumnWriter(tablet_schema); RowCursor write_row; write_row.init(tablet_schema); - RowBlock block(tablet_schema); + RowBlock block(&tablet_schema); RowBlockInfo block_info; block_info.row_num = 1024; block.init(block_info); @@ -460,23 +458,21 @@ TEST_F(TestColumn, SkipTinyColumnWithoutPresent) { TEST_F(TestColumn, VectorizedTinyColumnWithPresent) { // write data - std::vector tablet_schema; - FieldInfo field_info; - SetFieldInfo(field_info, - std::string("TinyColumn"), - OLAP_FIELD_TYPE_TINYINT, - OLAP_FIELD_AGGREGATION_REPLACE, - 1, - true, - true); - tablet_schema.push_back(field_info); - + TabletSchema tablet_schema; + SetTabletSchemaWithOneColumn( + "TinyColumn", + "TINYINT", + "REPLACE", + 1, + true, + true, + &tablet_schema); CreateColumnWriter(tablet_schema); RowCursor write_row; write_row.init(tablet_schema); - RowBlock block(tablet_schema); + RowBlock block(&tablet_schema); RowBlockInfo block_info; block_info.row_num = 1024; block.init(block_info); @@ -514,23 +510,21 @@ TEST_F(TestColumn, VectorizedTinyColumnWithPresent) { TEST_F(TestColumn, TinyColumnIndex) { // write data - std::vector tablet_schema; - FieldInfo field_info; - SetFieldInfo(field_info, - std::string("TinyColumn"), - OLAP_FIELD_TYPE_TINYINT, - OLAP_FIELD_AGGREGATION_REPLACE, - 1, - true, - true); - tablet_schema.push_back(field_info); - + TabletSchema tablet_schema; + SetTabletSchemaWithOneColumn( + "TinyColumn", + "TINYINT", + "REPLACE", + 1, + true, + true, + &tablet_schema); CreateColumnWriter(tablet_schema); RowCursor write_row; write_row.init(tablet_schema); - RowBlock block(tablet_schema); + RowBlock block(&tablet_schema); RowBlockInfo block_info; block_info.row_num = 1024; block.init(block_info); @@ -568,23 +562,21 @@ TEST_F(TestColumn, TinyColumnIndex) { TEST_F(TestColumn, SeekTinyColumnWithPresent) { // write data - std::vector tablet_schema; - FieldInfo field_info; - SetFieldInfo(field_info, - std::string("TinyColumn"), - OLAP_FIELD_TYPE_TINYINT, - OLAP_FIELD_AGGREGATION_REPLACE, - 1, - true, - true); - tablet_schema.push_back(field_info); - + TabletSchema tablet_schema; + SetTabletSchemaWithOneColumn( + "TinyColumn", + "TINYINT", + "REPLACE", + 1, + true, + true, + &tablet_schema); CreateColumnWriter(tablet_schema); RowCursor write_row; write_row.init(tablet_schema); - RowBlock block(tablet_schema); + RowBlock block(&tablet_schema); RowBlockInfo block_info; block_info.row_num = 1024; block.init(block_info); @@ -654,23 +646,20 @@ TEST_F(TestColumn, SeekTinyColumnWithPresent) { TEST_F(TestColumn, SkipTinyColumnWithPresent) { // write data - std::vector tablet_schema; - FieldInfo field_info; - SetFieldInfo(field_info, - std::string("TinyColumn"), - OLAP_FIELD_TYPE_TINYINT, - OLAP_FIELD_AGGREGATION_REPLACE, - 1, - true, - true); - tablet_schema.push_back(field_info); - + TabletSchema tablet_schema; + SetTabletSchemaWithOneColumn( + "TinyColumn", + "TINYINT", + "REPLACE", + 1, + true, + true, &tablet_schema); CreateColumnWriter(tablet_schema); RowCursor write_row; write_row.init(tablet_schema); - RowBlock block(tablet_schema); + RowBlock block(&tablet_schema); RowBlockInfo block_info; block_info.row_num = 1024; block.init(block_info); @@ -710,23 +699,20 @@ TEST_F(TestColumn, SkipTinyColumnWithPresent) { TEST_F(TestColumn, VectorizedShortColumnWithoutPresent) { // write data - std::vector tablet_schema; - FieldInfo field_info; - SetFieldInfo(field_info, - std::string("ShortColumn"), - OLAP_FIELD_TYPE_SMALLINT, - OLAP_FIELD_AGGREGATION_REPLACE, - 2, - false, - true); - tablet_schema.push_back(field_info); - + TabletSchema tablet_schema; + SetTabletSchemaWithOneColumn( + "ShortColumn", + "SMALLINT", + "REPLACE", + 2, + false, + true, &tablet_schema); CreateColumnWriter(tablet_schema); RowCursor write_row; write_row.init(tablet_schema); - RowBlock block(tablet_schema); + RowBlock block(&tablet_schema); RowBlockInfo block_info; block_info.row_num = 1024; block.init(block_info); @@ -764,23 +750,20 @@ TEST_F(TestColumn, VectorizedShortColumnWithoutPresent) { TEST_F(TestColumn, SeekShortColumnWithoutPresent) { // write data - std::vector tablet_schema; - FieldInfo field_info; - SetFieldInfo(field_info, - std::string("ShortColumn"), - OLAP_FIELD_TYPE_SMALLINT, - OLAP_FIELD_AGGREGATION_REPLACE, - 2, - false, - true); - tablet_schema.push_back(field_info); - + TabletSchema tablet_schema; + SetTabletSchemaWithOneColumn( + "ShortColumn", + "SMALLINT", + "REPLACE", + 2, + false, + true, &tablet_schema); CreateColumnWriter(tablet_schema); RowCursor write_row; write_row.init(tablet_schema); - RowBlock block(tablet_schema); + RowBlock block(&tablet_schema); RowBlockInfo block_info; block_info.row_num = 1024; block.init(block_info); @@ -849,23 +832,20 @@ TEST_F(TestColumn, SeekShortColumnWithoutPresent) { TEST_F(TestColumn, SkipShortColumnWithoutPresent) { // write data - std::vector tablet_schema; - FieldInfo field_info; - SetFieldInfo(field_info, - std::string("ShortColumn"), - OLAP_FIELD_TYPE_SMALLINT, - OLAP_FIELD_AGGREGATION_REPLACE, - 2, - false, - true); - tablet_schema.push_back(field_info); - + TabletSchema tablet_schema; + SetTabletSchemaWithOneColumn( + "ShortColumn", + "SMALLINT", + "REPLACE", + 2, + false, + true, &tablet_schema); CreateColumnWriter(tablet_schema); RowCursor write_row; write_row.init(tablet_schema); - RowBlock block(tablet_schema); + RowBlock block(&tablet_schema); RowBlockInfo block_info; block_info.row_num = 1024; block.init(block_info); @@ -905,23 +885,20 @@ TEST_F(TestColumn, SkipShortColumnWithoutPresent) { TEST_F(TestColumn, SeekShortColumnWithPresent) { // write data - std::vector tablet_schema; - FieldInfo field_info; - SetFieldInfo(field_info, - std::string("ShortColumn"), - OLAP_FIELD_TYPE_SMALLINT, - OLAP_FIELD_AGGREGATION_REPLACE, - 2, - true, - true); - tablet_schema.push_back(field_info); - + TabletSchema tablet_schema; + SetTabletSchemaWithOneColumn( + "ShortColumn", + "SMALLINT", + "REPLACE", + 2, + true, + true, &tablet_schema); CreateColumnWriter(tablet_schema); RowCursor write_row; write_row.init(tablet_schema); - RowBlock block(tablet_schema); + RowBlock block(&tablet_schema); RowBlockInfo block_info; block_info.row_num = 1024; block.init(block_info); @@ -987,23 +964,21 @@ TEST_F(TestColumn, SeekShortColumnWithPresent) { TEST_F(TestColumn, VectorizedShortColumnWithPresent) { // write data - std::vector tablet_schema; - FieldInfo field_info; - SetFieldInfo(field_info, - std::string("ShortColumn"), - OLAP_FIELD_TYPE_SMALLINT, - OLAP_FIELD_AGGREGATION_REPLACE, - 2, - true, - true); - tablet_schema.push_back(field_info); - + TabletSchema tablet_schema; + + SetTabletSchemaWithOneColumn( + "ShortColumn", + "SMALLINT", + "REPLACE", + 2, + true, + true, &tablet_schema); CreateColumnWriter(tablet_schema); RowCursor write_row; write_row.init(tablet_schema); - RowBlock block(tablet_schema); + RowBlock block(&tablet_schema); RowBlockInfo block_info; block_info.row_num = 1024; block.init(block_info); @@ -1043,23 +1018,20 @@ TEST_F(TestColumn, VectorizedShortColumnWithPresent) { TEST_F(TestColumn, SkipShortColumnWithPresent) { // write data - std::vector tablet_schema; - FieldInfo field_info; - SetFieldInfo(field_info, - std::string("ShortColumn"), - OLAP_FIELD_TYPE_SMALLINT, - OLAP_FIELD_AGGREGATION_REPLACE, - 2, - true, - true); - tablet_schema.push_back(field_info); - + TabletSchema tablet_schema; + SetTabletSchemaWithOneColumn( + "ShortColumn", + "SMALLINT", + "REPLACE", + 2, + true, + true, &tablet_schema); CreateColumnWriter(tablet_schema); RowCursor write_row; write_row.init(tablet_schema); - RowBlock block(tablet_schema); + RowBlock block(&tablet_schema); RowBlockInfo block_info; block_info.row_num = 1024; block.init(block_info); @@ -1098,23 +1070,20 @@ TEST_F(TestColumn, SkipShortColumnWithPresent) { TEST_F(TestColumn, VectorizedIntColumnWithoutPresent) { // write data - std::vector tablet_schema; - FieldInfo field_info; - SetFieldInfo(field_info, - std::string("IntColumn"), - OLAP_FIELD_TYPE_INT, - OLAP_FIELD_AGGREGATION_REPLACE, - 4, - false, - true); - tablet_schema.push_back(field_info); - + TabletSchema tablet_schema; + SetTabletSchemaWithOneColumn( + "IntColumn", + "INT", + "REPLACE", + 4, + false, + true, &tablet_schema); CreateColumnWriter(tablet_schema); RowCursor write_row; write_row.init(tablet_schema); - RowBlock block(tablet_schema); + RowBlock block(&tablet_schema); RowBlockInfo block_info; block_info.row_num = 1024; block.init(block_info); @@ -1152,23 +1121,20 @@ TEST_F(TestColumn, VectorizedIntColumnWithoutPresent) { TEST_F(TestColumn, VectorizedIntColumnMassWithoutPresent) { // write data - std::vector tablet_schema; - FieldInfo field_info; - SetFieldInfo(field_info, - std::string("IntColumn"), - OLAP_FIELD_TYPE_INT, - OLAP_FIELD_AGGREGATION_REPLACE, - 4, - false, - true); - tablet_schema.push_back(field_info); - + TabletSchema tablet_schema; + SetTabletSchemaWithOneColumn( + "IntColumn", + "INT", + "REPLACE", + 4, + false, + true, &tablet_schema); CreateColumnWriter(tablet_schema); RowCursor write_row; write_row.init(tablet_schema); - RowBlock block(tablet_schema); + RowBlock block(&tablet_schema); RowBlockInfo block_info; block_info.row_num = 10000; block.init(block_info); @@ -1208,23 +1174,20 @@ TEST_F(TestColumn, VectorizedIntColumnMassWithoutPresent) { TEST_F(TestColumn, VectorizedIntColumnWithPresent) { // write data - std::vector tablet_schema; - FieldInfo field_info; - SetFieldInfo(field_info, - std::string("IntColumn"), - OLAP_FIELD_TYPE_INT, - OLAP_FIELD_AGGREGATION_REPLACE, - 4, - true, - true); - tablet_schema.push_back(field_info); - + TabletSchema tablet_schema; + SetTabletSchemaWithOneColumn( + "IntColumn", + "INT", + "REPLACE", + 4, + true, + true, &tablet_schema); CreateColumnWriter(tablet_schema); RowCursor write_row; write_row.init(tablet_schema); - RowBlock block(tablet_schema); + RowBlock block(&tablet_schema); RowBlockInfo block_info; block_info.row_num = 10000; block.init(block_info); @@ -1265,23 +1228,20 @@ TEST_F(TestColumn, VectorizedIntColumnWithPresent) { TEST_F(TestColumn, VectorizedLongColumnWithoutPresent) { // write data - std::vector tablet_schema; - FieldInfo field_info; - SetFieldInfo(field_info, - std::string("LongColumnWithoutPresent"), - OLAP_FIELD_TYPE_BIGINT, - OLAP_FIELD_AGGREGATION_REPLACE, - 8, - false, - true); - tablet_schema.push_back(field_info); - + TabletSchema tablet_schema; + SetTabletSchemaWithOneColumn( + "LongColumnWithoutPresent", + "BIGINT", + "REPLACE", + 8, + false, + true, &tablet_schema); CreateColumnWriter(tablet_schema); RowCursor write_row; write_row.init(tablet_schema); - RowBlock block(tablet_schema); + RowBlock block(&tablet_schema); RowBlockInfo block_info; block_info.row_num = 10000; block.init(block_info); @@ -1320,23 +1280,20 @@ TEST_F(TestColumn, VectorizedLongColumnWithoutPresent) { TEST_F(TestColumn, VectorizedLongColumnWithPresent) { // write data - std::vector tablet_schema; - FieldInfo field_info; - SetFieldInfo(field_info, - std::string("LongColumnWithPresent"), - OLAP_FIELD_TYPE_BIGINT, - OLAP_FIELD_AGGREGATION_REPLACE, - 8, - true, - true); - tablet_schema.push_back(field_info); - + TabletSchema tablet_schema; + SetTabletSchemaWithOneColumn( + "LongColumnWithPresent", + "BIGINT", + "REPLACE", + 8, + true, + true, &tablet_schema); CreateColumnWriter(tablet_schema); RowCursor write_row; write_row.init(tablet_schema); - RowBlock block(tablet_schema); + RowBlock block(&tablet_schema); RowBlockInfo block_info; block_info.row_num = 10000; block.init(block_info); @@ -1377,23 +1334,20 @@ TEST_F(TestColumn, VectorizedLongColumnWithPresent) { TEST_F(TestColumn, VectorizedFloatColumnWithoutPresent) { // write data - std::vector tablet_schema; - FieldInfo field_info; - SetFieldInfo(field_info, - std::string("FloatColumnWithoutPresent"), - OLAP_FIELD_TYPE_FLOAT, - OLAP_FIELD_AGGREGATION_REPLACE, - 4, - false, - true); - tablet_schema.push_back(field_info); - + TabletSchema tablet_schema; + SetTabletSchemaWithOneColumn( + "FloatColumnWithoutPresent", + "FLOAT", + "REPLACE", + 4, + false, + true, &tablet_schema); CreateColumnWriter(tablet_schema); RowCursor write_row; write_row.init(tablet_schema); - RowBlock block(tablet_schema); + RowBlock block(&tablet_schema); RowBlockInfo block_info; block_info.row_num = 10000; block.init(block_info); @@ -1433,23 +1387,21 @@ TEST_F(TestColumn, VectorizedFloatColumnWithoutPresent) { TEST_F(TestColumn, VectorizedFloatColumnWithPresent) { // write data - std::vector tablet_schema; - FieldInfo field_info; - SetFieldInfo(field_info, - std::string("FloatColumnWithPresent"), - OLAP_FIELD_TYPE_FLOAT, - OLAP_FIELD_AGGREGATION_REPLACE, - 4, - true, - true); - tablet_schema.push_back(field_info); - + TabletSchema tablet_schema; + + SetTabletSchemaWithOneColumn( + "FloatColumnWithPresent", + "FLOAT", + "REPLACE", + 4, + true, + true, &tablet_schema); CreateColumnWriter(tablet_schema); RowCursor write_row; write_row.init(tablet_schema); - RowBlock block(tablet_schema); + RowBlock block(&tablet_schema); RowBlockInfo block_info; block_info.row_num = 10000; block.init(block_info); @@ -1490,23 +1442,21 @@ TEST_F(TestColumn, VectorizedFloatColumnWithPresent) { TEST_F(TestColumn, SeekFloatColumnWithPresent) { // write data - std::vector tablet_schema; - FieldInfo field_info; - SetFieldInfo(field_info, - std::string("FloatColumnWithPresent"), - OLAP_FIELD_TYPE_FLOAT, - OLAP_FIELD_AGGREGATION_REPLACE, - 4, - true, - true); - tablet_schema.push_back(field_info); - + TabletSchema tablet_schema; + + SetTabletSchemaWithOneColumn( + "FloatColumnWithPresent", + "FLOAT", + "REPLACE", + 4, + true, + true, &tablet_schema); CreateColumnWriter(tablet_schema); RowCursor write_row; write_row.init(tablet_schema); - RowBlock block(tablet_schema); + RowBlock block(&tablet_schema); RowBlockInfo block_info; block_info.row_num = 10000; block.init(block_info); @@ -1563,23 +1513,21 @@ TEST_F(TestColumn, SeekFloatColumnWithPresent) { TEST_F(TestColumn, SkipFloatColumnWithPresent) { // write data - std::vector tablet_schema; - FieldInfo field_info; - SetFieldInfo(field_info, - std::string("FloatColumnWithPresent"), - OLAP_FIELD_TYPE_FLOAT, - OLAP_FIELD_AGGREGATION_REPLACE, - 4, - true, - true); - tablet_schema.push_back(field_info); - + TabletSchema tablet_schema; + + SetTabletSchemaWithOneColumn( + "FloatColumnWithPresent", + "FLOAT", + "REPLACE", + 4, + true, + true, &tablet_schema); CreateColumnWriter(tablet_schema); RowCursor write_row; write_row.init(tablet_schema); - RowBlock block(tablet_schema); + RowBlock block(&tablet_schema); RowBlockInfo block_info; block_info.row_num = 10000; block.init(block_info); @@ -1616,23 +1564,21 @@ TEST_F(TestColumn, SkipFloatColumnWithPresent) { TEST_F(TestColumn, VectorizedDoubleColumnWithoutPresent) { // write data - std::vector tablet_schema; - FieldInfo field_info; - SetFieldInfo(field_info, - std::string("DoubleColumnWithoutPresent"), - OLAP_FIELD_TYPE_DOUBLE, - OLAP_FIELD_AGGREGATION_REPLACE, - 8, - false, - true); - tablet_schema.push_back(field_info); - + TabletSchema tablet_schema; + + SetTabletSchemaWithOneColumn( + "DoubleColumnWithoutPresent", + "DOUBLE", + "REPLACE", + 8, + false, + true, &tablet_schema); CreateColumnWriter(tablet_schema); RowCursor write_row; write_row.init(tablet_schema); - RowBlock block(tablet_schema); + RowBlock block(&tablet_schema); RowBlockInfo block_info; block_info.row_num = 10000; block.init(block_info); @@ -1672,23 +1618,21 @@ TEST_F(TestColumn, VectorizedDoubleColumnWithoutPresent) { TEST_F(TestColumn, VectorizedDoubleColumnWithPresent) { // write data - std::vector tablet_schema; - FieldInfo field_info; - SetFieldInfo(field_info, - std::string("DoubleColumnWithPresent"), - OLAP_FIELD_TYPE_DOUBLE, - OLAP_FIELD_AGGREGATION_REPLACE, - 8, - true, - true); - tablet_schema.push_back(field_info); - + TabletSchema tablet_schema; + + SetTabletSchemaWithOneColumn( + "DoubleColumnWithPresent", + "DOUBLE", + "REPLACE", + 8, + true, + true, &tablet_schema); CreateColumnWriter(tablet_schema); RowCursor write_row; write_row.init(tablet_schema); - RowBlock block(tablet_schema); + RowBlock block(&tablet_schema); RowBlockInfo block_info; block_info.row_num = 10000; block.init(block_info); @@ -1730,23 +1674,21 @@ TEST_F(TestColumn, VectorizedDoubleColumnWithPresent) { TEST_F(TestColumn, VectorizedDatetimeColumnWithoutPresent) { // write data - std::vector tablet_schema; - FieldInfo field_info; - SetFieldInfo(field_info, - std::string("DatetimeColumnWithoutPresent"), - OLAP_FIELD_TYPE_DATETIME, - OLAP_FIELD_AGGREGATION_REPLACE, - 8, - false, - true); - tablet_schema.push_back(field_info); - + TabletSchema tablet_schema; + + SetTabletSchemaWithOneColumn( + "DatetimeColumnWithoutPresent", + "DATETIME", + "REPLACE", + 8, + false, + true, &tablet_schema); CreateColumnWriter(tablet_schema); RowCursor write_row; write_row.init(tablet_schema); - RowBlock block(tablet_schema); + RowBlock block(&tablet_schema); RowBlockInfo block_info; block_info.row_num = 10000; block.init(block_info); @@ -1779,23 +1721,21 @@ TEST_F(TestColumn, VectorizedDatetimeColumnWithoutPresent) { TEST_F(TestColumn, VectorizedDatetimeColumnWithPresent) { // write data - std::vector tablet_schema; - FieldInfo field_info; - SetFieldInfo(field_info, - std::string("DatetimeColumnWithoutPresent"), - OLAP_FIELD_TYPE_DATETIME, - OLAP_FIELD_AGGREGATION_REPLACE, - 8, - true, - true); - tablet_schema.push_back(field_info); - + TabletSchema tablet_schema; + + SetTabletSchemaWithOneColumn( + "DatetimeColumnWithoutPresent", + "DATETIME", + "REPLACE", + 8, + true, + true, &tablet_schema); CreateColumnWriter(tablet_schema); RowCursor write_row; write_row.init(tablet_schema); - RowBlock block(tablet_schema); + RowBlock block(&tablet_schema); RowBlockInfo block_info; block_info.row_num = 10000; block.init(block_info); @@ -1843,23 +1783,21 @@ TEST_F(TestColumn, VectorizedDatetimeColumnWithPresent) { TEST_F(TestColumn, VectorizedDateColumnWithoutPresent) { // write data - std::vector tablet_schema; - FieldInfo field_info; - SetFieldInfo(field_info, - std::string("DateColumnWithoutoutPresent"), - OLAP_FIELD_TYPE_DATE, - OLAP_FIELD_AGGREGATION_REPLACE, - 3, - false, - true); - tablet_schema.push_back(field_info); - + TabletSchema tablet_schema; + + SetTabletSchemaWithOneColumn( + "DateColumnWithoutoutPresent", + "DATE", + "REPLACE", + 3, + false, + true, &tablet_schema); CreateColumnWriter(tablet_schema); RowCursor write_row; write_row.init(tablet_schema); - RowBlock block(tablet_schema); + RowBlock block(&tablet_schema); RowBlockInfo block_info; block_info.row_num = 10000; block.init(block_info); @@ -1891,23 +1829,21 @@ TEST_F(TestColumn, VectorizedDateColumnWithoutPresent) { TEST_F(TestColumn, VectorizedDateColumnWithPresent) { // write data - std::vector tablet_schema; - FieldInfo field_info; - SetFieldInfo(field_info, - std::string("DateColumnWithoutoutPresent"), - OLAP_FIELD_TYPE_DATE, - OLAP_FIELD_AGGREGATION_REPLACE, - 3, - true, - true); - tablet_schema.push_back(field_info); - + TabletSchema tablet_schema; + + SetTabletSchemaWithOneColumn( + "DateColumnWithoutoutPresent", + "DATE", + "REPLACE", + 3, + true, + true, &tablet_schema); CreateColumnWriter(tablet_schema); RowCursor write_row; write_row.init(tablet_schema); - RowBlock block(tablet_schema); + RowBlock block(&tablet_schema); RowBlockInfo block_info; block_info.row_num = 10000; block.init(block_info); @@ -1955,23 +1891,21 @@ TEST_F(TestColumn, VectorizedDateColumnWithPresent) { TEST_F(TestColumn, VectorizedDecimalColumnWithoutPresent) { // write data - std::vector tablet_schema; - FieldInfo field_info; - SetFieldInfo(field_info, - std::string("DecimalColumnWithoutoutPresent"), - OLAP_FIELD_TYPE_DECIMAL, - OLAP_FIELD_AGGREGATION_REPLACE, - 12, - false, - true); - tablet_schema.push_back(field_info); - + TabletSchema tablet_schema; + + SetTabletSchemaWithOneColumn( + "DecimalColumnWithoutoutPresent", + "DECIMAL", + "REPLACE", + 12, + false, + true, &tablet_schema); CreateColumnWriter(tablet_schema); RowCursor write_row; write_row.init(tablet_schema); - RowBlock block(tablet_schema); + RowBlock block(&tablet_schema); RowBlockInfo block_info; block_info.row_num = 10000; block.init(block_info); @@ -2016,23 +1950,21 @@ TEST_F(TestColumn, VectorizedDecimalColumnWithoutPresent) { TEST_F(TestColumn, VectorizedDecimalColumnWithPresent) { // write data - std::vector tablet_schema; - FieldInfo field_info; - SetFieldInfo(field_info, - std::string("DecimalColumnWithoutoutPresent"), - OLAP_FIELD_TYPE_DECIMAL, - OLAP_FIELD_AGGREGATION_REPLACE, - 12, - true, - true); - tablet_schema.push_back(field_info); - + TabletSchema tablet_schema; + + SetTabletSchemaWithOneColumn( + "DecimalColumnWithoutoutPresent", + "DECIMAL", + "REPLACE", + 12, + true, + true, &tablet_schema); CreateColumnWriter(tablet_schema); RowCursor write_row; write_row.init(tablet_schema); - RowBlock block(tablet_schema); + RowBlock block(&tablet_schema); RowBlockInfo block_info; block_info.row_num = 10000; block.init(block_info); @@ -2077,23 +2009,21 @@ TEST_F(TestColumn, VectorizedDecimalColumnWithPresent) { TEST_F(TestColumn, SkipDecimalColumnWithPresent) { // write data - std::vector tablet_schema; - FieldInfo field_info; - SetFieldInfo(field_info, - std::string("DecimalColumnWithPresent"), - OLAP_FIELD_TYPE_DECIMAL, - OLAP_FIELD_AGGREGATION_REPLACE, - 12, - true, - true); - tablet_schema.push_back(field_info); - + TabletSchema tablet_schema; + + SetTabletSchemaWithOneColumn( + "DecimalColumnWithPresent", + "DECIMAL", + "REPLACE", + 12, + true, + true, &tablet_schema); CreateColumnWriter(tablet_schema); RowCursor write_row; write_row.init(tablet_schema); - RowBlock block(tablet_schema); + RowBlock block(&tablet_schema); RowBlockInfo block_info; block_info.row_num = 10000; block.init(block_info); @@ -2134,23 +2064,20 @@ TEST_F(TestColumn, SkipDecimalColumnWithPresent) { TEST_F(TestColumn, SeekDecimalColumnWithPresent) { // write data - std::vector tablet_schema; - FieldInfo field_info; - SetFieldInfo(field_info, - std::string("DecimalColumnWithPresent"), - OLAP_FIELD_TYPE_DECIMAL, - OLAP_FIELD_AGGREGATION_REPLACE, - 12, - true, - true); - tablet_schema.push_back(field_info); - + TabletSchema tablet_schema; + SetTabletSchemaWithOneColumn( + "DecimalColumnWithPresent", + "DECIMAL", + "REPLACE", + 12, + true, + true, &tablet_schema); CreateColumnWriter(tablet_schema); RowCursor write_row; write_row.init(tablet_schema); - RowBlock block(tablet_schema); + RowBlock block(&tablet_schema); RowBlockInfo block_info; block_info.row_num = 10000; block.init(block_info); @@ -2217,18 +2144,16 @@ TEST_F(TestColumn, SeekDecimalColumnWithPresent) { } TEST_F(TestColumn, VectorizedLargeIntColumnWithoutPresent) { - // init table schema - std::vector tablet_schema; - FieldInfo field_info; - SetFieldInfo(field_info, - std::string("LargeIntColumnWithoutoutPresent"), - OLAP_FIELD_TYPE_LARGEINT, - OLAP_FIELD_AGGREGATION_SUM, - 16, - false, - true); - tablet_schema.push_back(field_info); - + // init tablet schema + TabletSchema tablet_schema; + + SetTabletSchemaWithOneColumn( + "LargeIntColumnWithoutoutPresent", + "LARGEINT", + "SUM", + 16, + false, + true, &tablet_schema); // test data string value1 = "100000000000000000000000000000000000000"; string value2 = "-170141183460469231731687303715884105728"; @@ -2238,7 +2163,7 @@ TEST_F(TestColumn, VectorizedLargeIntColumnWithoutPresent) { RowCursor write_row; write_row.init(tablet_schema); - RowBlock block(tablet_schema); + RowBlock block(&tablet_schema); RowBlockInfo block_info; block_info.row_num = 10000; block.init(block_info); @@ -2281,17 +2206,17 @@ TEST_F(TestColumn, VectorizedLargeIntColumnWithoutPresent) { } TEST_F(TestColumn, VectorizedLargeIntColumnWithPresent) { - // init table schema - std::vector tablet_schema; - FieldInfo field_info; - SetFieldInfo(field_info, - std::string("LargeIntColumnWithoutoutPresent"), - OLAP_FIELD_TYPE_LARGEINT, - OLAP_FIELD_AGGREGATION_SUM, - 16, - true, - true); - tablet_schema.push_back(field_info); + // init tablet schema + TabletSchema tablet_schema; + + SetTabletSchemaWithOneColumn( + "LargeIntColumnWithoutoutPresent", + "LARGEINT", + "SUM", + 16, + true, + true, &tablet_schema); + // test data string value1 = "100000000000000000000000000000000000000"; @@ -2302,7 +2227,7 @@ TEST_F(TestColumn, VectorizedLargeIntColumnWithPresent) { RowCursor write_row; write_row.init(tablet_schema); - RowBlock block(tablet_schema); + RowBlock block(&tablet_schema); RowBlockInfo block_info; block_info.row_num = 10000; block.init(block_info); @@ -2360,18 +2285,16 @@ TEST_F(TestColumn, VectorizedLargeIntColumnWithPresent) { } TEST_F(TestColumn, SkipLargeIntColumnWithPresent) { - // init table schema - std::vector tablet_schema; - FieldInfo field_info; - SetFieldInfo(field_info, - std::string("LargeIntColumnWithPresent"), - OLAP_FIELD_TYPE_LARGEINT, - OLAP_FIELD_AGGREGATION_SUM, - 16, - true, - true); - tablet_schema.push_back(field_info); - + // init tablet schema + TabletSchema tablet_schema; + + SetTabletSchemaWithOneColumn( + "LargeIntColumnWithPresent", + "LARGEINT", + "SUM", + 16, + true, + true, &tablet_schema); // test data string value1 = "100000000000000000000000000000000000000"; string value2 = "-170141183460469231731687303715884105728"; @@ -2381,7 +2304,7 @@ TEST_F(TestColumn, SkipLargeIntColumnWithPresent) { RowCursor write_row; write_row.init(tablet_schema); - RowBlock block(tablet_schema); + RowBlock block(&tablet_schema); RowBlockInfo block_info; block_info.row_num = 10000; block.init(block_info); @@ -2422,16 +2345,15 @@ TEST_F(TestColumn, SkipLargeIntColumnWithPresent) { TEST_F(TestColumn, VectorizedDirectVarcharColumnWithoutPresent) { // write data - std::vector tablet_schema; - FieldInfo field_info; - SetFieldInfo(field_info, - std::string("DirectVarcharColumnWithoutoutPresent"), - OLAP_FIELD_TYPE_VARCHAR, - OLAP_FIELD_AGGREGATION_REPLACE, - 10, - false, - true); - tablet_schema.push_back(field_info); + TabletSchema tablet_schema; + + SetTabletSchemaWithOneColumn( + "DirectVarcharColumnWithoutoutPresent", + "VARCHAR", + "REPLACE", + 10, + false, + true, &tablet_schema); CreateColumnWriter(tablet_schema); @@ -2439,7 +2361,7 @@ TEST_F(TestColumn, VectorizedDirectVarcharColumnWithoutPresent) { write_row.init(tablet_schema); write_row.allocate_memory_for_string_type(tablet_schema); - RowBlock block(tablet_schema); + RowBlock block(&tablet_schema); RowBlockInfo block_info; block_info.row_num = 10000; block.init(block_info); @@ -2496,24 +2418,21 @@ TEST_F(TestColumn, VectorizedDirectVarcharColumnWithoutPresent) { TEST_F(TestColumn, VectorizedDirectVarcharColumnWithPresent) { // write data - std::vector tablet_schema; - FieldInfo field_info; - SetFieldInfo(field_info, - std::string("DirectVarcharColumnWithoutoutPresent"), - OLAP_FIELD_TYPE_VARCHAR, - OLAP_FIELD_AGGREGATION_REPLACE, - 10, - true, - true); - tablet_schema.push_back(field_info); - + TabletSchema tablet_schema; + SetTabletSchemaWithOneColumn( + "DirectVarcharColumnWithoutoutPresent", + "VARCHAR", + "REPLACE", + 10, + true, + true, &tablet_schema); CreateColumnWriter(tablet_schema); RowCursor write_row; write_row.init(tablet_schema); write_row.allocate_memory_for_string_type(tablet_schema); - RowBlock block(tablet_schema); + RowBlock block(&tablet_schema); RowBlockInfo block_info; block_info.row_num = 10000; block.init(block_info); @@ -2556,24 +2475,22 @@ TEST_F(TestColumn, VectorizedDirectVarcharColumnWithPresent) { TEST_F(TestColumn, SkipDirectVarcharColumnWithPresent) { // write data - std::vector tablet_schema; - FieldInfo field_info; - SetFieldInfo(field_info, - std::string("DirectVarcharColumnWithPresent"), - OLAP_FIELD_TYPE_VARCHAR, - OLAP_FIELD_AGGREGATION_REPLACE, - 10, - true, - true); - tablet_schema.push_back(field_info); - + TabletSchema tablet_schema; + + SetTabletSchemaWithOneColumn( + "DirectVarcharColumnWithPresent", + "VARCHAR", + "REPLACE", + 10, + true, + true, &tablet_schema); CreateColumnWriter(tablet_schema); RowCursor write_row; write_row.init(tablet_schema); write_row.allocate_memory_for_string_type(tablet_schema); - RowBlock block(tablet_schema); + RowBlock block(&tablet_schema); RowBlockInfo block_info; block_info.row_num = 10000; block.init(block_info); @@ -2616,24 +2533,22 @@ TEST_F(TestColumn, SkipDirectVarcharColumnWithPresent) { TEST_F(TestColumn, SeekDirectVarcharColumnWithoutPresent) { // write data - std::vector tablet_schema; - FieldInfo field_info; - SetFieldInfo(field_info, - std::string("DirectVarcharColumnWithPresent"), - OLAP_FIELD_TYPE_VARCHAR, - OLAP_FIELD_AGGREGATION_REPLACE, - 10, - false, - true); - tablet_schema.push_back(field_info); - + TabletSchema tablet_schema; + + SetTabletSchemaWithOneColumn( + "DirectVarcharColumnWithPresent", + "VARCHAR", + "REPLACE", + 10, + false, + true, &tablet_schema); CreateColumnWriter(tablet_schema); RowCursor write_row; write_row.init(tablet_schema); write_row.allocate_memory_for_string_type(tablet_schema); - RowBlock block(tablet_schema); + RowBlock block(&tablet_schema); RowBlockInfo block_info; block_info.row_num = 10000; block.init(block_info); @@ -2698,24 +2613,22 @@ TEST_F(TestColumn, SeekDirectVarcharColumnWithoutPresent) { TEST_F(TestColumn, SeekDirectVarcharColumnWithPresent) { // write data - std::vector tablet_schema; - FieldInfo field_info; - SetFieldInfo(field_info, - std::string("DirectVarcharColumnWithPresent"), - OLAP_FIELD_TYPE_VARCHAR, - OLAP_FIELD_AGGREGATION_REPLACE, - 10, - true, - true); - tablet_schema.push_back(field_info); - + TabletSchema tablet_schema; + + SetTabletSchemaWithOneColumn( + "DirectVarcharColumnWithPresent", + "VARCHAR", + "REPLACE", + 10, + true, + true, &tablet_schema); CreateColumnWriter(tablet_schema); RowCursor write_row; write_row.init(tablet_schema); write_row.allocate_memory_for_string_type(tablet_schema); - RowBlock block(tablet_schema); + RowBlock block(&tablet_schema); RowBlockInfo block_info; block_info.row_num = 10000; block.init(block_info); @@ -2780,24 +2693,21 @@ TEST_F(TestColumn, SeekDirectVarcharColumnWithPresent) { TEST_F(TestColumn, VectorizedStringColumnWithoutPresent) { // write data - std::vector tablet_schema; - FieldInfo field_info; - SetFieldInfo(field_info, - std::string("VarcharColumnWithoutoutPresent"), - OLAP_FIELD_TYPE_CHAR, - OLAP_FIELD_AGGREGATION_REPLACE, - strlen("abcde"), - false, - true); - tablet_schema.push_back(field_info); - + TabletSchema tablet_schema; + SetTabletSchemaWithOneColumn( + "VarcharColumnWithoutoutPresent", + "CHAR", + "REPLACE", + strlen("abcde"), + false, + true, &tablet_schema); CreateColumnWriter(tablet_schema); RowCursor write_row; write_row.init(tablet_schema); write_row.allocate_memory_for_string_type(tablet_schema); - RowBlock block(tablet_schema); + RowBlock block(&tablet_schema); RowBlockInfo block_info; block_info.row_num = 10000; block.init(block_info); @@ -2854,24 +2764,21 @@ TEST_F(TestColumn, VectorizedStringColumnWithoutPresent) { TEST_F(TestColumn, VectorizedStringColumnWithPresent) { // write data - std::vector tablet_schema; - FieldInfo field_info; - SetFieldInfo(field_info, - std::string("VarcharColumnWithoutoutPresent"), - OLAP_FIELD_TYPE_CHAR, - OLAP_FIELD_AGGREGATION_REPLACE, - strlen("abcde"), - true, - true); - tablet_schema.push_back(field_info); - + TabletSchema tablet_schema; + SetTabletSchemaWithOneColumn( + "VarcharColumnWithoutoutPresent", + "CHAR", + "REPLACE", + strlen("abcde"), + true, + true, &tablet_schema); CreateColumnWriter(tablet_schema); RowCursor write_row; write_row.init(tablet_schema); write_row.allocate_memory_for_string_type(tablet_schema); - RowBlock block(tablet_schema); + RowBlock block(&tablet_schema); RowBlockInfo block_info; block_info.row_num = 10000; block.init(block_info); @@ -2912,24 +2819,21 @@ TEST_F(TestColumn, VectorizedStringColumnWithPresent) { TEST_F(TestColumn, VectorizedStringColumnWithoutoutPresent2) { // write data - std::vector tablet_schema; - FieldInfo field_info; - SetFieldInfo(field_info, - std::string("VarcharColumnWithoutoutPresent"), - OLAP_FIELD_TYPE_CHAR, - OLAP_FIELD_AGGREGATION_REPLACE, - 20, - false, - true); - tablet_schema.push_back(field_info); - + TabletSchema tablet_schema; + SetTabletSchemaWithOneColumn( + "VarcharColumnWithoutoutPresent", + "CHAR", + "REPLACE", + 20, + false, + true, &tablet_schema); CreateColumnWriter(tablet_schema); RowCursor write_row; write_row.init(tablet_schema); write_row.allocate_memory_for_string_type(tablet_schema); - RowBlock block(tablet_schema); + RowBlock block(&tablet_schema); RowBlockInfo block_info; block_info.row_num = 10000; block.init(block_info); @@ -3006,24 +2910,21 @@ TEST_F(TestColumn, VectorizedStringColumnWithoutoutPresent2) { TEST_F(TestColumn, VectorizedDirectVarcharColumnWith65533) { // write data - std::vector tablet_schema; - FieldInfo field_info; - SetFieldInfo(field_info, - std::string("DirectVarcharColumnWithoutoutPresent"), - OLAP_FIELD_TYPE_VARCHAR, - OLAP_FIELD_AGGREGATION_REPLACE, - 65535, - false, - true); - tablet_schema.push_back(field_info); - + TabletSchema tablet_schema; + SetTabletSchemaWithOneColumn( + "DirectVarcharColumnWithoutoutPresent", + "VARCHAR", + "REPLACE", + 65535, + false, + true, &tablet_schema); CreateColumnWriter(tablet_schema); RowCursor write_row; write_row.init(tablet_schema); write_row.allocate_memory_for_string_type(tablet_schema); - RowBlock block(tablet_schema); + RowBlock block(&tablet_schema); RowBlockInfo block_info; block_info.row_num = 10000; block.init(block_info); @@ -3085,4 +2986,3 @@ int main(int argc, char** argv) { ret = RUN_ALL_TESTS(); return ret; } - diff --git a/be/test/olap/comparison_predicate_test.cpp b/be/test/olap/comparison_predicate_test.cpp index 3494b64ae8b2ad..8d582a70c09fc2 100644 --- a/be/test/olap/comparison_predicate_test.cpp +++ b/be/test/olap/comparison_predicate_test.cpp @@ -99,24 +99,28 @@ public: \ delete _vectorized_batch; \ } \ } \ - void SetFieldInfo(FieldInfo &field_info, std::string name, \ - FieldType type, FieldAggregationMethod aggregation, \ - uint32_t length, bool is_allow_null, bool is_key) { \ - field_info.name = name; \ - field_info.type = type; \ - field_info.aggregation = aggregation; \ - field_info.length = length; \ - field_info.is_allow_null = is_allow_null; \ - field_info.is_key = is_key; \ - field_info.precision = 1000; \ - field_info.frac = 10000; \ - field_info.unique_id = 0; \ - field_info.is_bf_column = false; \ + void SetTabletSchema(std::string name, \ + const std::string& type, const std::string& aggregation, \ + uint32_t length, bool is_allow_null, bool is_key, TabletSchema* tablet_schema) { \ + TabletSchemaPB tablet_schema_pb; \ + static int id = 0; \ + ColumnPB* column = tablet_schema_pb.add_column(); \ + column->set_unique_id(++id); \ + column->set_name(name); \ + column->set_type(type); \ + column->set_is_key(is_key); \ + column->set_is_nullable(is_allow_null); \ + column->set_length(length); \ + column->set_aggregation(aggregation); \ + column->set_precision(1000); \ + column->set_frac(1000); \ + column->set_is_bf_column(false); \ + tablet_schema->init_from_pb(tablet_schema_pb); \ } \ - void InitVectorizedBatch(const std::vector& schema, \ + void InitVectorizedBatch(const TabletSchema* tablet_schema, \ const std::vector&ids, \ int size) { \ - _vectorized_batch = new VectorizedRowBatch(schema, ids, size); \ + _vectorized_batch = new VectorizedRowBatch(tablet_schema, ids, size); \ _vectorized_batch->set_size(size); \ } \ std::unique_ptr _mem_tracker; \ @@ -129,17 +133,15 @@ TEST_PREDICATE_DEFINITION(TestLessPredicate) #define TEST_EQUAL_PREDICATE(TYPE, TYPE_NAME, FIELD_TYPE) \ TEST_F(TestEqualPredicate, TYPE_NAME##_COLUMN) { \ - std::vector schema; \ - FieldInfo field_info; \ - SetFieldInfo(field_info, std::string("TYPE_NAME##_COLUMN"), FIELD_TYPE, \ - OLAP_FIELD_AGGREGATION_REPLACE, 1, false, true); \ - schema.push_back(field_info); \ + TabletSchema tablet_schema; \ + SetTabletSchema(std::string("TYPE_NAME##_COLUMN"), FIELD_TYPE, \ + "REPLACE", 1, false, true, &tablet_schema); \ int size = 10; \ std::vector return_columns; \ - for (int i = 0; i < schema.size(); ++i) { \ + for (int i = 0; i < tablet_schema.num_columns(); ++i) { \ return_columns.push_back(i); \ } \ - InitVectorizedBatch(schema, return_columns, size); \ + InitVectorizedBatch(&tablet_schema, return_columns, size); \ ColumnVector* col_vector = _vectorized_batch->column(0); \ \ /* for no nulls */ \ @@ -176,24 +178,22 @@ TEST_F(TestEqualPredicate, TYPE_NAME##_COLUMN) { \ ASSERT_EQ(*(col_data + sel[0]), 5); \ } \ -TEST_EQUAL_PREDICATE(int8_t, TINYINT, OLAP_FIELD_TYPE_TINYINT) -TEST_EQUAL_PREDICATE(int16_t, SMALLINT, OLAP_FIELD_TYPE_SMALLINT) -TEST_EQUAL_PREDICATE(int32_t, INT, OLAP_FIELD_TYPE_INT) -TEST_EQUAL_PREDICATE(int64_t, BIGINT, OLAP_FIELD_TYPE_BIGINT) -TEST_EQUAL_PREDICATE(int128_t, LARGEINT, OLAP_FIELD_TYPE_LARGEINT) +TEST_EQUAL_PREDICATE(int8_t, TINYINT, "TINYINT") +TEST_EQUAL_PREDICATE(int16_t, SMALLINT, "SMALLINT") +TEST_EQUAL_PREDICATE(int32_t, INT, "INT") +TEST_EQUAL_PREDICATE(int64_t, BIGINT, "BIGINT") +TEST_EQUAL_PREDICATE(int128_t, LARGEINT, "LARGEINT") TEST_F(TestEqualPredicate, FLOAT_COLUMN) { - std::vector schema; - FieldInfo field_info; - SetFieldInfo(field_info, std::string("FLOAT_COLUMN"), OLAP_FIELD_TYPE_FLOAT, - OLAP_FIELD_AGGREGATION_REPLACE, 1, false, true); - schema.push_back(field_info); + TabletSchema tablet_schema; + SetTabletSchema(std::string("FLOAT_COLUMN"), "FLOAT", + "REPLACE", 1, false, true, &tablet_schema); int size = 10; std::vector return_columns; - for (int i = 0; i < schema.size(); ++i) { + for (int i = 0; i < tablet_schema.num_columns(); ++i) { return_columns.push_back(i); } - InitVectorizedBatch(schema, return_columns, size); + InitVectorizedBatch(&tablet_schema, return_columns, size); ColumnVector* col_vector = _vectorized_batch->column(0); // for no nulls @@ -231,17 +231,15 @@ TEST_F(TestEqualPredicate, FLOAT_COLUMN) { } TEST_F(TestEqualPredicate, DOUBLE_COLUMN) { - std::vector schema; - FieldInfo field_info; - SetFieldInfo(field_info, std::string("DOUBLE_COLUMN"), OLAP_FIELD_TYPE_DOUBLE, - OLAP_FIELD_AGGREGATION_REPLACE, 1, false, true); - schema.push_back(field_info); + TabletSchema tablet_schema; + SetTabletSchema(std::string("DOUBLE_COLUMN"), "DOUBLE", + "REPLACE", 1, false, true, &tablet_schema); int size = 10; std::vector return_columns; - for (int i = 0; i < schema.size(); ++i) { + for (int i = 0; i < tablet_schema.num_columns(); ++i) { return_columns.push_back(i); } - InitVectorizedBatch(schema, return_columns, size); + InitVectorizedBatch(&tablet_schema, return_columns, size); ColumnVector* col_vector = _vectorized_batch->column(0); // for no nulls @@ -279,17 +277,15 @@ TEST_F(TestEqualPredicate, DOUBLE_COLUMN) { } TEST_F(TestEqualPredicate, DECIMAL_COLUMN) { - std::vector schema; - FieldInfo field_info; - SetFieldInfo(field_info, std::string("DECIMAL_COLUMN"), OLAP_FIELD_TYPE_DECIMAL, - OLAP_FIELD_AGGREGATION_REPLACE, 1, false, true); - schema.push_back(field_info); + TabletSchema tablet_schema; + SetTabletSchema(std::string("DECIMAL_COLUMN"), "DECIMAL", + "REPLACE", 1, false, true, &tablet_schema); int size = 10; std::vector return_columns; - for (int i = 0; i < schema.size(); ++i) { + for (int i = 0; i < tablet_schema.num_columns(); ++i) { return_columns.push_back(i); } - InitVectorizedBatch(schema, return_columns, size); + InitVectorizedBatch(&tablet_schema, return_columns, size); ColumnVector* col_vector = _vectorized_batch->column(0); // for no nulls @@ -330,17 +326,15 @@ TEST_F(TestEqualPredicate, DECIMAL_COLUMN) { } TEST_F(TestEqualPredicate, STRING_COLUMN) { - std::vector schema; - FieldInfo field_info; - SetFieldInfo(field_info, std::string("STRING_COLUMN"), OLAP_FIELD_TYPE_VARCHAR, - OLAP_FIELD_AGGREGATION_REPLACE, 1, false, true); - schema.push_back(field_info); + TabletSchema tablet_schema; + SetTabletSchema(std::string("STRING_COLUMN"), "VARCHAR", + "REPLACE", 1, false, true, &tablet_schema); int size = 10; std::vector return_columns; - for (int i = 0; i < schema.size(); ++i) { + for (int i = 0; i < tablet_schema.num_columns(); ++i) { return_columns.push_back(i); } - InitVectorizedBatch(schema, return_columns, size); + InitVectorizedBatch(&tablet_schema, return_columns, size); ColumnVector* col_vector = _vectorized_batch->column(0); // for no nulls @@ -398,17 +392,15 @@ TEST_F(TestEqualPredicate, STRING_COLUMN) { } TEST_F(TestEqualPredicate, DATE_COLUMN) { - std::vector schema; - FieldInfo field_info; - SetFieldInfo(field_info, std::string("DATE_COLUMN"), OLAP_FIELD_TYPE_DATE, - OLAP_FIELD_AGGREGATION_REPLACE, 1, false, true); - schema.push_back(field_info); + TabletSchema tablet_schema; + SetTabletSchema(std::string("DATE_COLUMN"), "DATA", + "REPLACE", 1, false, true, &tablet_schema); int size = 6; std::vector return_columns; - for (int i = 0; i < schema.size(); ++i) { + for (int i = 0; i < tablet_schema.num_columns(); ++i) { return_columns.push_back(i); } - InitVectorizedBatch(schema, return_columns, size); + InitVectorizedBatch(&tablet_schema, return_columns, size); ColumnVector* col_vector = _vectorized_batch->column(0); // for no nulls @@ -461,17 +453,15 @@ TEST_F(TestEqualPredicate, DATE_COLUMN) { } TEST_F(TestEqualPredicate, DATETIME_COLUMN) { - std::vector schema; - FieldInfo field_info; - SetFieldInfo(field_info, std::string("DATETIME_COLUMN"), OLAP_FIELD_TYPE_DATETIME, - OLAP_FIELD_AGGREGATION_REPLACE, 1, false, true); - schema.push_back(field_info); + TabletSchema tablet_schema; + SetTabletSchema(std::string("DATETIME_COLUMN"), "DATETIME", + "REPLACE", 1, false, true, &tablet_schema); int size = 6; std::vector return_columns; - for (int i = 0; i < schema.size(); ++i) { + for (int i = 0; i < tablet_schema.num_columns(); ++i) { return_columns.push_back(i); } - InitVectorizedBatch(schema, return_columns, size); + InitVectorizedBatch(&tablet_schema, return_columns, size); ColumnVector* col_vector = _vectorized_batch->column(0); // for no nulls @@ -525,17 +515,15 @@ TEST_F(TestEqualPredicate, DATETIME_COLUMN) { #define TEST_LESS_PREDICATE(TYPE, TYPE_NAME, FIELD_TYPE) \ TEST_F(TestLessPredicate, TYPE_NAME##_COLUMN) { \ - std::vector schema; \ - FieldInfo field_info; \ - SetFieldInfo(field_info, std::string("TYPE_NAME_COLUMN"), FIELD_TYPE, \ - OLAP_FIELD_AGGREGATION_REPLACE, 1, false, true); \ - schema.push_back(field_info); \ + TabletSchema tablet_schema; \ + SetTabletSchema(std::string("TYPE_NAME_COLUMN"), FIELD_TYPE, \ + "REPLACE", 1, false, true, &tablet_schema); \ int size = 10; \ std::vector return_columns; \ - for (int i = 0; i < schema.size(); ++i) { \ + for (int i = 0; i < tablet_schema.num_columns(); ++i) { \ return_columns.push_back(i); \ } \ - InitVectorizedBatch(schema, return_columns, size); \ + InitVectorizedBatch(&tablet_schema, return_columns, size); \ ColumnVector* col_vector = _vectorized_batch->column(0); \ \ /* for no nulls */ \ @@ -580,24 +568,22 @@ TEST_F(TestLessPredicate, TYPE_NAME##_COLUMN) { \ ASSERT_EQ(sum, 4); \ } \ -TEST_LESS_PREDICATE(int8_t, TINYINT, OLAP_FIELD_TYPE_TINYINT) -TEST_LESS_PREDICATE(int16_t, SMALLINT, OLAP_FIELD_TYPE_SMALLINT) -TEST_LESS_PREDICATE(int32_t, INT, OLAP_FIELD_TYPE_INT) -TEST_LESS_PREDICATE(int64_t, BIGINT, OLAP_FIELD_TYPE_BIGINT) -TEST_LESS_PREDICATE(int128_t, LARGEINT, OLAP_FIELD_TYPE_LARGEINT) +TEST_LESS_PREDICATE(int8_t, TINYINT, "TINYINT") +TEST_LESS_PREDICATE(int16_t, SMALLINT, "SMALLINT") +TEST_LESS_PREDICATE(int32_t, INT, "INT") +TEST_LESS_PREDICATE(int64_t, BIGINT, "BIGINT") +TEST_LESS_PREDICATE(int128_t, LARGEINT, "LARGEINT") TEST_F(TestLessPredicate, FLOAT_COLUMN) { - std::vector schema; - FieldInfo field_info; - SetFieldInfo(field_info, std::string("FLOAT_COLUMN"), OLAP_FIELD_TYPE_FLOAT, - OLAP_FIELD_AGGREGATION_REPLACE, 1, false, true); - schema.push_back(field_info); + TabletSchema tablet_schema; + SetTabletSchema(std::string("FLOAT_COLUMN"), "FLOAT", + "REPLACE", 1, false, true, &tablet_schema); int size = 10; std::vector return_columns; - for (int i = 0; i < schema.size(); ++i) { + for (int i = 0; i < tablet_schema.num_columns(); ++i) { return_columns.push_back(i); } - InitVectorizedBatch(schema, return_columns, size); + InitVectorizedBatch(&tablet_schema, return_columns, size); ColumnVector* col_vector = _vectorized_batch->column(0); // for no nulls @@ -644,17 +630,15 @@ TEST_F(TestLessPredicate, FLOAT_COLUMN) { } TEST_F(TestLessPredicate, DOUBLE_COLUMN) { - std::vector schema; - FieldInfo field_info; - SetFieldInfo(field_info, std::string("DOUBLE_COLUMN"), OLAP_FIELD_TYPE_DOUBLE, - OLAP_FIELD_AGGREGATION_REPLACE, 1, false, true); - schema.push_back(field_info); + TabletSchema tablet_schema; + SetTabletSchema(std::string("DOUBLE_COLUMN"), "DOUBLE", + "REPLACE", 1, false, true, &tablet_schema); int size = 10; std::vector return_columns; - for (int i = 0; i < schema.size(); ++i) { + for (int i = 0; i < tablet_schema.num_columns(); ++i) { return_columns.push_back(i); } - InitVectorizedBatch(schema, return_columns, size); + InitVectorizedBatch(&tablet_schema, return_columns, size); ColumnVector* col_vector = _vectorized_batch->column(0); // for no nulls @@ -701,17 +685,15 @@ TEST_F(TestLessPredicate, DOUBLE_COLUMN) { } TEST_F(TestLessPredicate, DECIMAL_COLUMN) { - std::vector schema; - FieldInfo field_info; - SetFieldInfo(field_info, std::string("DECIMAL_COLUMN"), OLAP_FIELD_TYPE_DECIMAL, - OLAP_FIELD_AGGREGATION_REPLACE, 1, false, true); - schema.push_back(field_info); + TabletSchema tablet_schema; + SetTabletSchema(std::string("DECIMAL_COLUMN"), "DECIMAL", + "REPLACE", 1, false, true, &tablet_schema); int size = 10; std::vector return_columns; - for (int i = 0; i < schema.size(); ++i) { + for (int i = 0; i < tablet_schema.num_columns(); ++i) { return_columns.push_back(i); } - InitVectorizedBatch(schema, return_columns, size); + InitVectorizedBatch(&tablet_schema, return_columns, size); ColumnVector* col_vector = _vectorized_batch->column(0); // for no nulls @@ -762,17 +744,15 @@ TEST_F(TestLessPredicate, DECIMAL_COLUMN) { } TEST_F(TestLessPredicate, STRING_COLUMN) { - std::vector schema; - FieldInfo field_info; - SetFieldInfo(field_info, std::string("STRING_COLUMN"), OLAP_FIELD_TYPE_VARCHAR, - OLAP_FIELD_AGGREGATION_REPLACE, 1, false, true); - schema.push_back(field_info); + TabletSchema tablet_schema; + SetTabletSchema(std::string("STRING_COLUMN"), "VARCHAR", + "REPLACE", 1, false, true, &tablet_schema); int size = 10; std::vector return_columns; - for (int i = 0; i < schema.size(); ++i) { + for (int i = 0; i < tablet_schema.num_columns(); ++i) { return_columns.push_back(i); } - InitVectorizedBatch(schema, return_columns, size); + InitVectorizedBatch(&tablet_schema, return_columns, size); ColumnVector* col_vector = _vectorized_batch->column(0); // for no nulls @@ -828,17 +808,15 @@ TEST_F(TestLessPredicate, STRING_COLUMN) { } TEST_F(TestLessPredicate, DATE_COLUMN) { - std::vector schema; - FieldInfo field_info; - SetFieldInfo(field_info, std::string("DATE_COLUMN"), OLAP_FIELD_TYPE_DATE, - OLAP_FIELD_AGGREGATION_REPLACE, 1, false, true); - schema.push_back(field_info); + TabletSchema tablet_schema; + SetTabletSchema(std::string("DATE_COLUMN"), "DATE", + "REPLACE", 1, false, true, &tablet_schema); int size = 6; std::vector return_columns; - for (int i = 0; i < schema.size(); ++i) { + for (int i = 0; i < tablet_schema.num_columns(); ++i) { return_columns.push_back(i); } - InitVectorizedBatch(schema, return_columns, size); + InitVectorizedBatch(&tablet_schema, return_columns, size); ColumnVector* col_vector = _vectorized_batch->column(0); // for no nulls @@ -888,17 +866,16 @@ TEST_F(TestLessPredicate, DATE_COLUMN) { } TEST_F(TestLessPredicate, DATETIME_COLUMN) { - std::vector schema; - FieldInfo field_info; - SetFieldInfo(field_info, std::string("DATETIME_COLUMN"), OLAP_FIELD_TYPE_DATETIME, - OLAP_FIELD_AGGREGATION_REPLACE, 1, false, true); - schema.push_back(field_info); + TabletSchema tablet_schema; + TabletColumn tablet_column; + SetTabletSchema(std::string("DATETIME_COLUMN"), "DATETIME", + "REPLACE", 1, false, true, &tablet_schema); int size = 6; std::vector return_columns; - for (int i = 0; i < schema.size(); ++i) { + for (int i = 0; i < tablet_schema.num_columns(); ++i) { return_columns.push_back(i); } - InitVectorizedBatch(schema, return_columns, size); + InitVectorizedBatch(&tablet_schema, return_columns, size); ColumnVector* col_vector = _vectorized_batch->column(0); // for no nulls diff --git a/be/test/olap/delete_handler_test.cpp b/be/test/olap/delete_handler_test.cpp index 87437fc4045105..ef840bd32ccef5 100644 --- a/be/test/olap/delete_handler_test.cpp +++ b/be/test/olap/delete_handler_test.cpp @@ -28,7 +28,7 @@ #include "olap/delete_handler.h" #include "olap/olap_define.h" -#include "olap/olap_engine.h" +#include "olap/storage_engine.h" #include "olap/push_handler.h" #include "olap/utils.h" #include "olap/options.h" @@ -42,7 +42,7 @@ using google::protobuf::RepeatedPtrField; namespace doris { static const uint32_t MAX_PATH_LEN = 1024; -static OLAPEngine* k_engine = nullptr; +static StorageEngine* k_engine = nullptr; void set_up() { char buffer[MAX_PATH_LEN]; @@ -56,7 +56,7 @@ void set_up() { doris::EngineOptions options; options.store_paths = paths; - doris::OLAPEngine::open(options, &k_engine); + doris::StorageEngine::open(options, &k_engine); } void tear_down() { @@ -151,8 +151,6 @@ void set_default_create_tablet_request(TCreateTabletReq* request) { void set_default_push_request(TPushReq* request) { request->tablet_id = 10003; request->schema_hash = 270068375; - request->__set_version(2); - request->__set_version_hash(1); request->timeout = 86400; request->push_type = TPushType::LOAD; } @@ -160,42 +158,29 @@ void set_default_push_request(TPushReq* request) { class TestDeleteConditionHandler : public testing::Test { protected: void SetUp() { - // Create local data dir for OLAPEngine. + // Create local data dir for StorageEngine. char buffer[MAX_PATH_LEN]; getcwd(buffer, MAX_PATH_LEN); config::storage_root_path = string(buffer) + "/data_delete_condition"; remove_all_dir(config::storage_root_path); ASSERT_EQ(create_dir(config::storage_root_path), OLAP_SUCCESS); - // Initialize all singleton object. - // OLAPRootPath::get_instance()->reload_root_paths(config::storage_root_path.c_str()); - // 1. Prepare for query split key. // create base tablet OLAPStatus res = OLAP_SUCCESS; set_default_create_tablet_request(&_create_tablet); - res = k_engine->create_table(_create_tablet); + res = k_engine->create_tablet(_create_tablet); ASSERT_EQ(OLAP_SUCCESS, res); - _olap_table = k_engine->get_table( + tablet = k_engine->tablet_manager()->get_tablet( _create_tablet.tablet_id, _create_tablet.tablet_schema.schema_hash); - ASSERT_TRUE(_olap_table.get() != NULL); - _tablet_path = _olap_table->tablet_path(); - } - - OLAPStatus push_empty_delta(int32_t version) { - // push data - TPushReq push_req; - set_default_push_request(&push_req); - push_req.version = version; - push_req.version_hash = version; - std::vector tablets_info; - return k_engine->push(push_req, &tablets_info); + ASSERT_TRUE(tablet.get() != NULL); + _tablet_path = tablet->tablet_path(); } void TearDown() { // Remove all dir. - _olap_table.reset(); - OLAPEngine::get_instance()->drop_table( + tablet.reset(); + StorageEngine::instance()->tablet_manager()->drop_tablet( _create_tablet.tablet_id, _create_tablet.tablet_schema.schema_hash); while (0 == access(_tablet_path.c_str(), F_OK)) { sleep(1); @@ -203,10 +188,8 @@ class TestDeleteConditionHandler : public testing::Test { ASSERT_EQ(OLAP_SUCCESS, remove_all_dir(config::storage_root_path)); } - typedef RepeatedPtrField del_cond_array; - std::string _tablet_path; - OLAPTablePtr _olap_table; + TabletSharedPtr tablet; TCreateTabletReq _create_tablet; DeleteConditionHandler _delete_condition_handler; }; @@ -234,82 +217,23 @@ TEST_F(TestDeleteConditionHandler, StoreCondSucceed) { condition.condition_values.push_back("5"); conditions.push_back(condition); - success_res = _delete_condition_handler.store_cond(_olap_table, 3, conditions); + DeletePredicatePB del_pred; + success_res = _delete_condition_handler.generate_delete_predicate(tablet->tablet_schema(), conditions, &del_pred); ASSERT_EQ(OLAP_SUCCESS, success_res); - ASSERT_EQ(OLAP_SUCCESS, push_empty_delta(3)); // 验证存储在header中的过滤条件正确 - const del_cond_array& delete_conditions = _olap_table->delete_data_conditions(); - ASSERT_EQ(size_t(1), delete_conditions.size()); - EXPECT_EQ(3, delete_conditions.Get(0).version()); - ASSERT_EQ(size_t(3), delete_conditions.Get(0).sub_conditions_size()); - EXPECT_STREQ("k1=1", delete_conditions.Get(0).sub_conditions(0).c_str()); - EXPECT_STREQ("k2>>3", delete_conditions.Get(0).sub_conditions(1).c_str()); - EXPECT_STREQ("k2<=5", delete_conditions.Get(0).sub_conditions(2).c_str()); - - // 再次存储相同版本号(版本号为3)的过滤条件 - conditions.clear(); - condition.column_name = "k1"; - condition.condition_op = "!="; - condition.condition_values.clear(); - condition.condition_values.push_back("1"); - conditions.push_back(condition); - - success_res = _delete_condition_handler.store_cond(_olap_table, 3, conditions); - ASSERT_EQ(OLAP_SUCCESS, success_res); - - // 验证存储相同版本号的过滤条件情况下,新的过滤条件替换掉旧的过滤条件 - const del_cond_array& new_delete_conditions = _olap_table->delete_data_conditions(); - ASSERT_EQ(size_t(1), new_delete_conditions.size()); - EXPECT_EQ(3, new_delete_conditions.Get(0).version()); - ASSERT_EQ(size_t(1), new_delete_conditions.Get(0).sub_conditions_size()); - EXPECT_STREQ("k1!=1", new_delete_conditions.Get(0).sub_conditions(0).c_str()); - - // 第三次存储不同版本号(版本号为4)的过滤条件 - conditions.clear(); - condition.column_name = "k1"; - condition.condition_op = "!="; - condition.condition_values.clear(); - condition.condition_values.push_back("1"); - conditions.push_back(condition); - - condition.column_name = "k1"; - condition.condition_op = "!="; - condition.condition_values.clear(); - condition.condition_values.push_back("2"); - conditions.push_back(condition); - - success_res = _delete_condition_handler.store_cond(_olap_table, 4, conditions); - ASSERT_EQ(OLAP_SUCCESS, success_res); - ASSERT_EQ(OLAP_SUCCESS, push_empty_delta(4)); - - const del_cond_array& all_delete_conditions = _olap_table->delete_data_conditions(); - ASSERT_EQ(size_t(2), all_delete_conditions.size()); - EXPECT_EQ(3, all_delete_conditions.Get(0).version()); - ASSERT_EQ(size_t(1), all_delete_conditions.Get(0).sub_conditions_size()); - EXPECT_STREQ("k1!=1", all_delete_conditions.Get(0).sub_conditions(0).c_str()); - EXPECT_EQ(4, all_delete_conditions.Get(1).version()); - ASSERT_EQ(size_t(2), all_delete_conditions.Get(1).sub_conditions_size()); - EXPECT_STREQ("k1!=1", all_delete_conditions.Get(1).sub_conditions(0).c_str()); - EXPECT_STREQ("k1!=2", all_delete_conditions.Get(1).sub_conditions(1).c_str()); + ASSERT_EQ(size_t(3), del_pred.sub_predicates_size()); + EXPECT_STREQ("k1=1", del_pred.sub_predicates(0).c_str()); + EXPECT_STREQ("k2>>3", del_pred.sub_predicates(1).c_str()); + EXPECT_STREQ("k2<=5", del_pred.sub_predicates(2).c_str()); } -// 检测参数不正确的情况,包括:空的过滤条件字符串,以及负的版本号 +// 检测参数不正确的情况,包括:空的过滤条件字符串 TEST_F(TestDeleteConditionHandler, StoreCondInvalidParameters) { // 空的过滤条件 std::vector conditions; - OLAPStatus failed_res = _delete_condition_handler.store_cond(_olap_table, 3, conditions); - ASSERT_EQ(OLAP_ERR_DELETE_INVALID_PARAMETERS, failed_res); - - // 负的版本号: -10 - TCondition condition; - condition.column_name = "k1"; - condition.condition_op = "="; - condition.condition_values.clear(); - condition.condition_values.push_back("2"); - conditions.push_back(condition); - - failed_res = _delete_condition_handler.store_cond(_olap_table, -10, conditions); + DeletePredicatePB del_pred; + OLAPStatus failed_res = _delete_condition_handler.generate_delete_predicate(tablet->tablet_schema(), conditions, &del_pred);; ASSERT_EQ(OLAP_ERR_DELETE_INVALID_PARAMETERS, failed_res); } @@ -323,8 +247,8 @@ TEST_F(TestDeleteConditionHandler, StoreCondNonexistentColumn) { condition.condition_values.clear(); condition.condition_values.push_back("2"); conditions.push_back(condition); - - OLAPStatus failed_res = _delete_condition_handler.store_cond(_olap_table, 3, conditions); + DeletePredicatePB del_pred; + OLAPStatus failed_res = _delete_condition_handler.generate_delete_predicate(tablet->tablet_schema(), conditions, &del_pred);; ASSERT_EQ(OLAP_ERR_DELETE_INVALID_CONDITION, failed_res); // 'v'是value列 @@ -335,168 +259,37 @@ TEST_F(TestDeleteConditionHandler, StoreCondNonexistentColumn) { condition.condition_values.push_back("5"); conditions.push_back(condition); - failed_res = _delete_condition_handler.store_cond(_olap_table, 3, conditions); + failed_res = _delete_condition_handler.generate_delete_predicate(tablet->tablet_schema(), conditions, &del_pred);; ASSERT_EQ(OLAP_ERR_DELETE_INVALID_CONDITION, failed_res); } -// 只删除特定版本的过滤条件 -TEST_F(TestDeleteConditionHandler, DeleteCondRemoveOneCondition) { - OLAPStatus res; - std::vector conditions; - TCondition condition; - condition.column_name = "k1"; - condition.condition_op = "="; - condition.condition_values.clear(); - condition.condition_values.push_back("1"); - conditions.push_back(condition); - - condition.column_name = "k2"; - condition.condition_op = ">"; - condition.condition_values.clear(); - condition.condition_values.push_back("3"); - conditions.push_back(condition); - - condition.column_name = "k2"; - condition.condition_op = "<="; - condition.condition_values.clear(); - condition.condition_values.push_back("5"); - conditions.push_back(condition); - - res = _delete_condition_handler.store_cond(_olap_table, 3, conditions); - ASSERT_EQ(OLAP_SUCCESS, res); - ASSERT_EQ(OLAP_SUCCESS, push_empty_delta(3)); - - conditions.clear(); - condition.column_name = "k1"; - condition.condition_op = "!="; - condition.condition_values.clear(); - condition.condition_values.push_back("1"); - conditions.push_back(condition); - - res = _delete_condition_handler.store_cond(_olap_table, 4, conditions); - ASSERT_EQ(OLAP_SUCCESS, res); - ASSERT_EQ(OLAP_SUCCESS, push_empty_delta(4)); - - conditions.clear(); - condition.column_name = "k2"; - condition.condition_op = ">="; - condition.condition_values.clear(); - condition.condition_values.push_back("1"); - conditions.push_back(condition); - - res = _delete_condition_handler.store_cond(_olap_table, 5, conditions); - ASSERT_EQ(OLAP_SUCCESS, res); - ASSERT_EQ(OLAP_SUCCESS, push_empty_delta(5)); - - // 删除版本号为8的过滤条件 - res = _delete_condition_handler.delete_cond(_olap_table, 5, false); - ASSERT_EQ(OLAP_SUCCESS, res); - - const del_cond_array& all_delete_conditions = _olap_table->delete_data_conditions(); - ASSERT_EQ(size_t(2), all_delete_conditions.size()); - - EXPECT_EQ(3, all_delete_conditions.Get(0).version()); - ASSERT_EQ(size_t(3), all_delete_conditions.Get(0).sub_conditions_size()); - EXPECT_STREQ("k1=1", all_delete_conditions.Get(0).sub_conditions(0).c_str()); - EXPECT_STREQ("k2>>3", all_delete_conditions.Get(0).sub_conditions(1).c_str()); - EXPECT_STREQ("k2<=5", all_delete_conditions.Get(0).sub_conditions(2).c_str()); - - EXPECT_EQ(4, all_delete_conditions.Get(1).version()); - ASSERT_EQ(size_t(1), all_delete_conditions.Get(1).sub_conditions_size()); - EXPECT_STREQ("k1!=1", all_delete_conditions.Get(1).sub_conditions(0).c_str()); -} - -// 删除特定版本以及版本比它小的过滤条件 -TEST_F(TestDeleteConditionHandler, DeleteCondRemovBelowCondition) { - OLAPStatus res; - std::vector conditions; - TCondition condition; - condition.column_name = "k1"; - condition.condition_op = "="; - condition.condition_values.clear(); - condition.condition_values.push_back("1"); - conditions.push_back(condition); - - condition.column_name = "k2"; - condition.condition_op = ">"; - condition.condition_values.clear(); - condition.condition_values.push_back("3"); - conditions.push_back(condition); - - condition.column_name = "k2"; - condition.condition_op = "<="; - condition.condition_values.clear(); - condition.condition_values.push_back("5"); - conditions.push_back(condition); - - res = _delete_condition_handler.store_cond(_olap_table, 3, conditions); - ASSERT_EQ(OLAP_SUCCESS, res); - ASSERT_EQ(OLAP_SUCCESS, push_empty_delta(3)); - - conditions.clear(); - condition.column_name = "k1"; - condition.condition_op = "!="; - condition.condition_values.clear(); - condition.condition_values.push_back("1"); - conditions.push_back(condition); - - res = _delete_condition_handler.store_cond(_olap_table, 4, conditions); - ASSERT_EQ(OLAP_SUCCESS, res); - ASSERT_EQ(OLAP_SUCCESS, push_empty_delta(4)); - - conditions.clear(); - condition.column_name = "k2"; - condition.condition_op = ">="; - condition.condition_values.clear(); - condition.condition_values.push_back("1"); - conditions.push_back(condition); - - res = _delete_condition_handler.store_cond(_olap_table, 5, conditions); - ASSERT_EQ(OLAP_SUCCESS, res); - ASSERT_EQ(OLAP_SUCCESS, push_empty_delta(5)); - - // 删除版本号为7以及版本号小于7的过滤条件 - res = _delete_condition_handler.delete_cond(_olap_table, 4, true); - ASSERT_EQ(OLAP_SUCCESS, res); - - const del_cond_array& all_delete_conditions = _olap_table->delete_data_conditions(); - ASSERT_EQ(size_t(1), all_delete_conditions.size()); - - EXPECT_EQ(5, all_delete_conditions.Get(0).version()); - ASSERT_EQ(size_t(1), all_delete_conditions.Get(0).sub_conditions_size()); - EXPECT_STREQ("k2>=1", all_delete_conditions.Get(0).sub_conditions(0).c_str()); -} - // 测试删除条件值不符合类型要求 class TestDeleteConditionHandler2 : public testing::Test { protected: void SetUp() { - // Create local data dir for OLAPEngine. + // Create local data dir for StorageEngine. char buffer[MAX_PATH_LEN]; getcwd(buffer, MAX_PATH_LEN); config::storage_root_path = string(buffer) + "/data_delete_condition"; remove_all_dir(config::storage_root_path); ASSERT_EQ(create_dir(config::storage_root_path), OLAP_SUCCESS); - // Initialize all singleton object. - // OLAPRootPath::get_instance()->reload_root_paths(config::storage_root_path.c_str()); - // 1. Prepare for query split key. // create base tablet OLAPStatus res = OLAP_SUCCESS; set_default_create_tablet_request(&_create_tablet); - res = k_engine->create_table(_create_tablet); + res = k_engine->create_tablet(_create_tablet); ASSERT_EQ(OLAP_SUCCESS, res); - _olap_table = k_engine->get_table( + tablet = k_engine->tablet_manager()->get_tablet( _create_tablet.tablet_id, _create_tablet.tablet_schema.schema_hash); - ASSERT_TRUE(_olap_table.get() != NULL); - _tablet_path = _olap_table->tablet_path(); + ASSERT_TRUE(tablet.get() != NULL); + _tablet_path = tablet->tablet_path(); } void TearDown() { // Remove all dir. - _olap_table.reset(); - OLAPEngine::get_instance()->drop_table( + tablet.reset(); + StorageEngine::instance()->tablet_manager()->drop_tablet( _create_tablet.tablet_id, _create_tablet.tablet_schema.schema_hash); while (0 == access(_tablet_path.c_str(), F_OK)) { sleep(1); @@ -504,11 +297,10 @@ class TestDeleteConditionHandler2 : public testing::Test { ASSERT_EQ(OLAP_SUCCESS, remove_all_dir(config::storage_root_path)); } - typedef RepeatedPtrField del_cond_array; - std::string _tablet_path; - OLAPTablePtr _olap_table; + TabletSharedPtr tablet; TCreateTabletReq _create_tablet; + DeleteConditionHandler _delete_condition_handler; }; TEST_F(TestDeleteConditionHandler2, ValidConditionValue) { @@ -542,8 +334,9 @@ TEST_F(TestDeleteConditionHandler2, ValidConditionValue) { condition.condition_values.push_back("-1"); conditions.push_back(condition); - res = cond_handler.store_cond(_olap_table, 2, conditions); - EXPECT_EQ(OLAP_SUCCESS, res); + DeletePredicatePB del_pred; + res = _delete_condition_handler.generate_delete_predicate(tablet->tablet_schema(), conditions, &del_pred); + ASSERT_EQ(OLAP_SUCCESS, res); // k5类型为int128 conditions.clear(); @@ -553,8 +346,9 @@ TEST_F(TestDeleteConditionHandler2, ValidConditionValue) { condition.condition_values.push_back("1"); conditions.push_back(condition); - res = cond_handler.store_cond(_olap_table, 2, conditions); - EXPECT_EQ(OLAP_SUCCESS, res); + DeletePredicatePB del_pred_2; + res = _delete_condition_handler.generate_delete_predicate(tablet->tablet_schema(), conditions, &del_pred_2); + ASSERT_EQ(OLAP_SUCCESS, res); // k9类型为decimal, precision=6, frac=3 conditions.clear(); @@ -564,23 +358,27 @@ TEST_F(TestDeleteConditionHandler2, ValidConditionValue) { condition.condition_values.push_back("2.3"); conditions.push_back(condition); - res = cond_handler.store_cond(_olap_table, 2, conditions); - EXPECT_EQ(OLAP_SUCCESS, res); + DeletePredicatePB del_pred_3; + res = _delete_condition_handler.generate_delete_predicate(tablet->tablet_schema(), conditions, &del_pred_3); + ASSERT_EQ(OLAP_SUCCESS, res); conditions[0].condition_values.clear(); conditions[0].condition_values.push_back("2"); - res = cond_handler.store_cond(_olap_table, 2, conditions); - EXPECT_EQ(OLAP_SUCCESS, res); + DeletePredicatePB del_pred_4; + res = _delete_condition_handler.generate_delete_predicate(tablet->tablet_schema(), conditions, &del_pred_4); + ASSERT_EQ(OLAP_SUCCESS, res); conditions[0].condition_values.clear(); conditions[0].condition_values.push_back("-2"); - res = cond_handler.store_cond(_olap_table, 2, conditions); - EXPECT_EQ(OLAP_SUCCESS, res); + DeletePredicatePB del_pred_5; + res = _delete_condition_handler.generate_delete_predicate(tablet->tablet_schema(), conditions, &del_pred_5); + ASSERT_EQ(OLAP_SUCCESS, res); conditions[0].condition_values.clear(); conditions[0].condition_values.push_back("-2.3"); - res = cond_handler.store_cond(_olap_table, 2, conditions); - EXPECT_EQ(OLAP_SUCCESS, res); + DeletePredicatePB del_pred_6; + res = _delete_condition_handler.generate_delete_predicate(tablet->tablet_schema(), conditions, &del_pred_6); + ASSERT_EQ(OLAP_SUCCESS, res); // k10,k11类型分别为date, datetime conditions.clear(); @@ -596,8 +394,9 @@ TEST_F(TestDeleteConditionHandler2, ValidConditionValue) { condition.condition_values.push_back("2014-01-01 00:00:00"); conditions.push_back(condition); - res = cond_handler.store_cond(_olap_table, 2, conditions); - EXPECT_EQ(OLAP_SUCCESS, res); + DeletePredicatePB del_pred_7; + res = _delete_condition_handler.generate_delete_predicate(tablet->tablet_schema(), conditions, &del_pred_7); + ASSERT_EQ(OLAP_SUCCESS, res); // k12,k13类型分别为string(64), varchar(64) conditions.clear(); @@ -613,8 +412,9 @@ TEST_F(TestDeleteConditionHandler2, ValidConditionValue) { condition.condition_values.push_back("YWFhYQ=="); conditions.push_back(condition); - res = cond_handler.store_cond(_olap_table, 2, conditions); - EXPECT_EQ(OLAP_SUCCESS, res); + DeletePredicatePB del_pred_8; + res = _delete_condition_handler.generate_delete_predicate(tablet->tablet_schema(), conditions, &del_pred_8); + ASSERT_EQ(OLAP_SUCCESS, res); } TEST_F(TestDeleteConditionHandler2, InvalidConditionValue) { @@ -630,133 +430,155 @@ TEST_F(TestDeleteConditionHandler2, InvalidConditionValue) { condition.condition_values.push_back("1000"); conditions.push_back(condition); - res = cond_handler.store_cond(_olap_table, 2, conditions); + DeletePredicatePB del_pred_1; + res = _delete_condition_handler.generate_delete_predicate(tablet->tablet_schema(), conditions, &del_pred_1); EXPECT_EQ(OLAP_ERR_DELETE_INVALID_CONDITION, res); // 测试k1的值越下界,k1类型为int8 conditions[0].condition_values.clear(); conditions[0].condition_values.push_back("-1000"); - res = cond_handler.store_cond(_olap_table, 2, conditions); + DeletePredicatePB del_pred_2; + res = _delete_condition_handler.generate_delete_predicate(tablet->tablet_schema(), conditions, &del_pred_2); EXPECT_EQ(OLAP_ERR_DELETE_INVALID_CONDITION, res); // 测试k2的值越上界,k2类型为int16 conditions[0].condition_values.clear(); conditions[0].column_name = "k2"; conditions[0].condition_values.push_back("32768"); - res = cond_handler.store_cond(_olap_table, 2, conditions); + DeletePredicatePB del_pred_3; + res = _delete_condition_handler.generate_delete_predicate(tablet->tablet_schema(), conditions, &del_pred_3); EXPECT_EQ(OLAP_ERR_DELETE_INVALID_CONDITION, res); // 测试k2的值越下界,k2类型为int16 conditions[0].condition_values.clear(); conditions[0].condition_values.push_back("-32769"); - res = cond_handler.store_cond(_olap_table, 2, conditions); + DeletePredicatePB del_pred_4; + res = _delete_condition_handler.generate_delete_predicate(tablet->tablet_schema(), conditions, &del_pred_4); EXPECT_EQ(OLAP_ERR_DELETE_INVALID_CONDITION, res); // 测试k3的值越上界,k3类型为int32 conditions[0].condition_values.clear(); conditions[0].column_name = "k3"; conditions[0].condition_values.push_back("2147483648"); - res = cond_handler.store_cond(_olap_table, 2, conditions); + DeletePredicatePB del_pred_5; + res = _delete_condition_handler.generate_delete_predicate(tablet->tablet_schema(), conditions, &del_pred_5); EXPECT_EQ(OLAP_ERR_DELETE_INVALID_CONDITION, res); // 测试k3的值越下界,k3类型为int32 conditions[0].condition_values.clear(); conditions[0].condition_values.push_back("-2147483649"); - res = cond_handler.store_cond(_olap_table, 2, conditions); + DeletePredicatePB del_pred_6; + res = _delete_condition_handler.generate_delete_predicate(tablet->tablet_schema(), conditions, &del_pred_6); EXPECT_EQ(OLAP_ERR_DELETE_INVALID_CONDITION, res); // 测试k4的值越上界,k2类型为int64 conditions[0].condition_values.clear(); conditions[0].column_name = "k4"; conditions[0].condition_values.push_back("9223372036854775808"); - res = cond_handler.store_cond(_olap_table, 2, conditions); + DeletePredicatePB del_pred_7; + res = _delete_condition_handler.generate_delete_predicate(tablet->tablet_schema(), conditions, &del_pred_7); EXPECT_EQ(OLAP_ERR_DELETE_INVALID_CONDITION, res); // 测试k4的值越下界,k1类型为int64 conditions[0].condition_values.clear(); conditions[0].condition_values.push_back("-9223372036854775809"); - res = cond_handler.store_cond(_olap_table, 2, conditions); + DeletePredicatePB del_pred_8; + res = _delete_condition_handler.generate_delete_predicate(tablet->tablet_schema(), conditions, &del_pred_8); EXPECT_EQ(OLAP_ERR_DELETE_INVALID_CONDITION, res); // 测试k5的值越上界,k5类型为int128 conditions[0].condition_values.clear(); conditions[0].column_name = "k5"; conditions[0].condition_values.push_back("170141183460469231731687303715884105728"); - res = cond_handler.store_cond(_olap_table, 2, conditions); + DeletePredicatePB del_pred_9; + res = _delete_condition_handler.generate_delete_predicate(tablet->tablet_schema(), conditions, &del_pred_9); EXPECT_EQ(OLAP_ERR_DELETE_INVALID_CONDITION, res); // 测试k5的值越下界,k5类型为int128 conditions[0].condition_values.clear(); conditions[0].condition_values.push_back("-170141183460469231731687303715884105729"); - res = cond_handler.store_cond(_olap_table, 2, conditions); + DeletePredicatePB del_pred_10; + res = _delete_condition_handler.generate_delete_predicate(tablet->tablet_schema(), conditions, &del_pred_10); EXPECT_EQ(OLAP_ERR_DELETE_INVALID_CONDITION, res); // 测试k9整数部分长度过长,k9类型为decimal, precision=6, frac=3 conditions[0].condition_values.clear(); conditions[0].column_name = "k9"; - conditions[0].condition_values.push_back("1234.5"); - res = cond_handler.store_cond(_olap_table, 2, conditions); + conditions[0].condition_values.push_back("12347876.5"); + DeletePredicatePB del_pred_11; + res = _delete_condition_handler.generate_delete_predicate(tablet->tablet_schema(), conditions, &del_pred_11); EXPECT_EQ(OLAP_ERR_DELETE_INVALID_CONDITION, res); // 测试k9小数部分长度过长,k9类型为decimal, precision=6, frac=3 conditions[0].condition_values.clear(); - conditions[0].condition_values.push_back("1.2345"); - res = cond_handler.store_cond(_olap_table, 2, conditions); + conditions[0].condition_values.push_back("1.2345678"); + DeletePredicatePB del_pred_12; + res = _delete_condition_handler.generate_delete_predicate(tablet->tablet_schema(), conditions, &del_pred_12); EXPECT_EQ(OLAP_ERR_DELETE_INVALID_CONDITION, res); // 测试k9没有小数部分,但包含小数点 conditions[0].condition_values.clear(); conditions[0].condition_values.push_back("1."); - res = cond_handler.store_cond(_olap_table, 2, conditions); + DeletePredicatePB del_pred_13; + res = _delete_condition_handler.generate_delete_predicate(tablet->tablet_schema(), conditions, &del_pred_13); EXPECT_EQ(OLAP_ERR_DELETE_INVALID_CONDITION, res); // 测试k10类型的过滤值不符合对应格式,k10为date conditions[0].condition_values.clear(); conditions[0].column_name = "k10"; conditions[0].condition_values.push_back("20130101"); - res = cond_handler.store_cond(_olap_table, 2, conditions); + DeletePredicatePB del_pred_14; + res = _delete_condition_handler.generate_delete_predicate(tablet->tablet_schema(), conditions, &del_pred_14); EXPECT_EQ(OLAP_ERR_DELETE_INVALID_CONDITION, res); conditions[0].condition_values.clear(); conditions[0].condition_values.push_back("2013-64-01"); - res = cond_handler.store_cond(_olap_table, 2, conditions); + DeletePredicatePB del_pred_15; + res = _delete_condition_handler.generate_delete_predicate(tablet->tablet_schema(), conditions, &del_pred_15); EXPECT_EQ(OLAP_ERR_DELETE_INVALID_CONDITION, res); conditions[0].condition_values.clear(); conditions[0].condition_values.push_back("2013-01-40"); - res = cond_handler.store_cond(_olap_table, 2, conditions); + DeletePredicatePB del_pred_16; + res = _delete_condition_handler.generate_delete_predicate(tablet->tablet_schema(), conditions, &del_pred_16); EXPECT_EQ(OLAP_ERR_DELETE_INVALID_CONDITION, res); // 测试k11类型的过滤值不符合对应格式,k11为datetime conditions[0].condition_values.clear(); conditions[0].column_name = "k11"; conditions[0].condition_values.push_back("20130101 00:00:00"); - res = cond_handler.store_cond(_olap_table, 2, conditions); + DeletePredicatePB del_pred_17; + res = _delete_condition_handler.generate_delete_predicate(tablet->tablet_schema(), conditions, &del_pred_17); EXPECT_EQ(OLAP_ERR_DELETE_INVALID_CONDITION, res); conditions[0].condition_values.clear(); conditions[0].condition_values.push_back("2013-64-01 00:00:00"); - res = cond_handler.store_cond(_olap_table, 2, conditions); + DeletePredicatePB del_pred_18; + res = _delete_condition_handler.generate_delete_predicate(tablet->tablet_schema(), conditions, &del_pred_18); EXPECT_EQ(OLAP_ERR_DELETE_INVALID_CONDITION, res); conditions[0].condition_values.clear(); conditions[0].condition_values.push_back("2013-01-40 00:00:00"); - res = cond_handler.store_cond(_olap_table, 2, conditions); + DeletePredicatePB del_pred_19; + res = _delete_condition_handler.generate_delete_predicate(tablet->tablet_schema(), conditions, &del_pred_19); EXPECT_EQ(OLAP_ERR_DELETE_INVALID_CONDITION, res); conditions[0].condition_values.clear(); conditions[0].condition_values.push_back("2013-01-01 24:00:00"); - res = cond_handler.store_cond(_olap_table, 2, conditions); + DeletePredicatePB del_pred_20; + res = _delete_condition_handler.generate_delete_predicate(tablet->tablet_schema(), conditions, &del_pred_20); EXPECT_EQ(OLAP_ERR_DELETE_INVALID_CONDITION, res); conditions[0].condition_values.clear(); conditions[0].condition_values.push_back("2013-01-01 00:60:00"); - res = cond_handler.store_cond(_olap_table, 2, conditions); + DeletePredicatePB del_pred_21; + res = _delete_condition_handler.generate_delete_predicate(tablet->tablet_schema(), conditions, &del_pred_21); EXPECT_EQ(OLAP_ERR_DELETE_INVALID_CONDITION, res); conditions[0].condition_values.clear(); conditions[0].condition_values.push_back("2013-01-01 00:00:60"); - res = cond_handler.store_cond(_olap_table, 2, conditions); + DeletePredicatePB del_pred_22; + res = _delete_condition_handler.generate_delete_predicate(tablet->tablet_schema(), conditions, &del_pred_22); EXPECT_EQ(OLAP_ERR_DELETE_INVALID_CONDITION, res); // 测试k12和k13类型的过滤值过长,k12,k13类型分别为string(64), varchar(64) @@ -765,7 +587,8 @@ TEST_F(TestDeleteConditionHandler2, InvalidConditionValue) { conditions[0].condition_values.push_back("YWFhYWFhYWFhYWFhYWFhYWFhYWFhYWFhYWFhYWFhYWFhYWFhYW" "FhYWFhYWFhYWFhYWFhYWFhYWFhYWFhYWFhYWFhYWFhYWFhYWFhYWFhYW" "FhYWFhYWFhYWFhYWFhYWFhYWFhYWE=;k13=YWFhYQ=="); - res = cond_handler.store_cond(_olap_table, 2, conditions); + DeletePredicatePB del_pred_23; + res = _delete_condition_handler.generate_delete_predicate(tablet->tablet_schema(), conditions, &del_pred_23); EXPECT_EQ(OLAP_ERR_DELETE_INVALID_CONDITION, res); conditions[0].condition_values.clear(); @@ -773,53 +596,41 @@ TEST_F(TestDeleteConditionHandler2, InvalidConditionValue) { conditions[0].condition_values.push_back("YWFhYWFhYWFhYWFhYWFhYWFhYWFhYWFhYWFhYWFhYWFhYWFhYW" "FhYWFhYWFhYWFhYWFhYWFhYWFhYWFhYWFhYWFhYWFhYWFhYWFhYWFhYW" "FhYWFhYWFhYWFhYWFhYWFhYWFhYWE=;k13=YWFhYQ=="); - res = cond_handler.store_cond(_olap_table, 2, conditions); + DeletePredicatePB del_pred_24; + res = _delete_condition_handler.generate_delete_predicate(tablet->tablet_schema(), conditions, &del_pred_24); EXPECT_EQ(OLAP_ERR_DELETE_INVALID_CONDITION, res); } class TestDeleteHandler : public testing::Test { protected: void SetUp() { - // Create local data dir for OLAPEngine. + // Create local data dir for StorageEngine. char buffer[MAX_PATH_LEN]; getcwd(buffer, MAX_PATH_LEN); config::storage_root_path = string(buffer) + "/data_delete_condition"; remove_all_dir(config::storage_root_path); ASSERT_EQ(create_dir(config::storage_root_path), OLAP_SUCCESS); - // Initialize all singleton object. - // OLAPRootPath::get_instance()->reload_root_paths(config::storage_root_path.c_str()); - // 1. Prepare for query split key. // create base tablet OLAPStatus res = OLAP_SUCCESS; set_default_create_tablet_request(&_create_tablet); - res = k_engine->create_table(_create_tablet); + res = k_engine->create_tablet(_create_tablet); ASSERT_EQ(OLAP_SUCCESS, res); - _olap_table = k_engine->get_table( + tablet = k_engine->tablet_manager()->get_tablet( _create_tablet.tablet_id, _create_tablet.tablet_schema.schema_hash); - ASSERT_TRUE(_olap_table.get() != NULL); - _tablet_path = _olap_table->tablet_path(); - - _data_row_cursor.init(_olap_table->tablet_schema()); - _data_row_cursor.allocate_memory_for_string_type(_olap_table->tablet_schema()); - } + ASSERT_TRUE(tablet != nullptr); + _tablet_path = tablet->tablet_path(); - OLAPStatus push_empty_delta(int32_t version) { - // push data - TPushReq push_req; - set_default_push_request(&push_req); - push_req.version = version; - push_req.version_hash = version; - std::vector tablets_info; - return k_engine->push(push_req, &tablets_info); + _data_row_cursor.init(tablet->tablet_schema()); + _data_row_cursor.allocate_memory_for_string_type(tablet->tablet_schema()); } void TearDown() { // Remove all dir. - _olap_table.reset(); + tablet.reset(); _delete_handler.finalize(); - OLAPEngine::get_instance()->drop_table( + StorageEngine::instance()->tablet_manager()->drop_tablet( _create_tablet.tablet_id, _create_tablet.tablet_schema.schema_hash); while (0 == access(_tablet_path.c_str(), F_OK)) { sleep(1); @@ -827,13 +638,12 @@ class TestDeleteHandler : public testing::Test { ASSERT_EQ(OLAP_SUCCESS, remove_all_dir(config::storage_root_path)); } - typedef RepeatedPtrField del_cond_array; - std::string _tablet_path; RowCursor _data_row_cursor; - OLAPTablePtr _olap_table; + TabletSharedPtr tablet; TCreateTabletReq _create_tablet; DeleteHandler _delete_handler; + DeleteConditionHandler _delete_condition_handler; }; TEST_F(TestDeleteHandler, InitSuccess) { @@ -841,10 +651,6 @@ TEST_F(TestDeleteHandler, InitSuccess) { std::vector conditions; DeleteConditionHandler delete_condition_handler; - // Header中还没有删除条件 - res = _delete_handler.init(_olap_table, 2); - ASSERT_EQ(OLAP_SUCCESS, res); - // 往头文件中添加过滤条件 TCondition condition; condition.column_name = "k1"; @@ -865,9 +671,11 @@ TEST_F(TestDeleteHandler, InitSuccess) { condition.condition_values.push_back("5"); conditions.push_back(condition); - res = delete_condition_handler.store_cond(_olap_table, 3, conditions); + DeletePredicatePB del_pred; + res = _delete_condition_handler.generate_delete_predicate(tablet->tablet_schema(), conditions, &del_pred); + ASSERT_EQ(OLAP_SUCCESS, res); + res = tablet->add_delete_predicate(del_pred, 1); ASSERT_EQ(OLAP_SUCCESS, res); - ASSERT_EQ(OLAP_SUCCESS, push_empty_delta(3)); conditions.clear(); condition.column_name = "k1"; @@ -876,9 +684,11 @@ TEST_F(TestDeleteHandler, InitSuccess) { condition.condition_values.push_back("3"); conditions.push_back(condition); - res = delete_condition_handler.store_cond(_olap_table, 4, conditions); + DeletePredicatePB del_pred_2; + res = _delete_condition_handler.generate_delete_predicate(tablet->tablet_schema(), conditions, &del_pred_2); + ASSERT_EQ(OLAP_SUCCESS, res); + res = tablet->add_delete_predicate(del_pred_2, 2); ASSERT_EQ(OLAP_SUCCESS, res); - ASSERT_EQ(OLAP_SUCCESS, push_empty_delta(4)); conditions.clear(); condition.column_name = "k2"; @@ -887,9 +697,11 @@ TEST_F(TestDeleteHandler, InitSuccess) { condition.condition_values.push_back("1"); conditions.push_back(condition); - res = delete_condition_handler.store_cond(_olap_table, 5, conditions); + DeletePredicatePB del_pred_3; + res = _delete_condition_handler.generate_delete_predicate(tablet->tablet_schema(), conditions, &del_pred_3); + ASSERT_EQ(OLAP_SUCCESS, res); + res = tablet->add_delete_predicate(del_pred_3, 3); ASSERT_EQ(OLAP_SUCCESS, res); - ASSERT_EQ(OLAP_SUCCESS, push_empty_delta(5)); conditions.clear(); condition.column_name = "k2"; @@ -898,19 +710,23 @@ TEST_F(TestDeleteHandler, InitSuccess) { condition.condition_values.push_back("3"); conditions.push_back(condition); - res = delete_condition_handler.store_cond(_olap_table, 6, conditions); + DeletePredicatePB del_pred_4; + res = _delete_condition_handler.generate_delete_predicate(tablet->tablet_schema(), conditions, &del_pred_4); + ASSERT_EQ(OLAP_SUCCESS, res); + res = tablet->add_delete_predicate(del_pred_4, 4); ASSERT_EQ(OLAP_SUCCESS, res); - ASSERT_EQ(OLAP_SUCCESS, push_empty_delta(6)); // 从header文件中取出版本号小于等于7的过滤条件 - _delete_handler.finalize(); - res = _delete_handler.init(_olap_table, 4); + res = _delete_handler.init(tablet->tablet_schema(), tablet->delete_predicates(), 4); ASSERT_EQ(OLAP_SUCCESS, res); - ASSERT_EQ(2, _delete_handler.conditions_num()); + ASSERT_EQ(4, _delete_handler.conditions_num()); vector conds_version = _delete_handler.get_conds_version(); + EXPECT_EQ(4, conds_version.size()); sort(conds_version.begin(), conds_version.end()); - EXPECT_EQ(3, conds_version[0]); - EXPECT_EQ(4, conds_version[1]); + EXPECT_EQ(1, conds_version[0]); + EXPECT_EQ(2, conds_version[1]); + EXPECT_EQ(3, conds_version[2]); + EXPECT_EQ(4, conds_version[3]); _delete_handler.finalize(); } @@ -937,12 +753,15 @@ TEST_F(TestDeleteHandler, FilterDataSubconditions) { condition.condition_values.push_back("4"); conditions.push_back(condition); - res = cond_handler.store_cond(_olap_table, 3, conditions); + DeletePredicatePB del_pred; + res = _delete_condition_handler.generate_delete_predicate(tablet->tablet_schema(), conditions, &del_pred); ASSERT_EQ(OLAP_SUCCESS, res); - ASSERT_EQ(OLAP_SUCCESS, push_empty_delta(3)); + res = tablet->add_delete_predicate(del_pred, 1); // 指定版本号为10以载入Header中的所有过滤条件(在这个case中,只有过滤条件1) - _delete_handler.init(_olap_table, 10); + res = _delete_handler.init(tablet->tablet_schema(), tablet->delete_predicates(), 4); + ASSERT_EQ(OLAP_SUCCESS, res); + ASSERT_EQ(1, _delete_handler.conditions_num()); // 构造一行测试数据 vector data_str; @@ -995,9 +814,10 @@ TEST_F(TestDeleteHandler, FilterDataConditions) { condition.condition_values.push_back("4"); conditions.push_back(condition); - res = cond_handler.store_cond(_olap_table, 3, conditions); + DeletePredicatePB del_pred; + res = _delete_condition_handler.generate_delete_predicate(tablet->tablet_schema(), conditions, &del_pred); ASSERT_EQ(OLAP_SUCCESS, res); - ASSERT_EQ(OLAP_SUCCESS, push_empty_delta(3)); + res = tablet->add_delete_predicate(del_pred, 1); // 过滤条件2 conditions.clear(); @@ -1007,9 +827,10 @@ TEST_F(TestDeleteHandler, FilterDataConditions) { condition.condition_values.push_back("3"); conditions.push_back(condition); - res = cond_handler.store_cond(_olap_table, 4, conditions); + DeletePredicatePB del_pred_2; + res = _delete_condition_handler.generate_delete_predicate(tablet->tablet_schema(), conditions, &del_pred_2); ASSERT_EQ(OLAP_SUCCESS, res); - ASSERT_EQ(OLAP_SUCCESS, push_empty_delta(4)); + res = tablet->add_delete_predicate(del_pred_2, 2); // 过滤条件3 conditions.clear(); @@ -1019,12 +840,15 @@ TEST_F(TestDeleteHandler, FilterDataConditions) { condition.condition_values.push_back("5"); conditions.push_back(condition); - res = cond_handler.store_cond(_olap_table, 5, conditions); + DeletePredicatePB del_pred_3; + res = _delete_condition_handler.generate_delete_predicate(tablet->tablet_schema(), conditions, &del_pred_3); ASSERT_EQ(OLAP_SUCCESS, res); - ASSERT_EQ(OLAP_SUCCESS, push_empty_delta(5)); + res = tablet->add_delete_predicate(del_pred_3, 3); - // 指定版本号为10以载入Header中的所有三条过滤条件 - _delete_handler.init(_olap_table, 10); + // 指定版本号为4以载入meta中的所有过滤条件(在这个case中,只有过滤条件1) + res = _delete_handler.init(tablet->tablet_schema(), tablet->delete_predicates(), 4); + ASSERT_EQ(OLAP_SUCCESS, res); + ASSERT_EQ(3, _delete_handler.conditions_num()); vector data_str; data_str.push_back("4"); @@ -1042,7 +866,7 @@ TEST_F(TestDeleteHandler, FilterDataConditions) { res = _data_row_cursor.from_tuple(tuple); ASSERT_EQ(OLAP_SUCCESS, res); // 这行数据会因为过滤条件3而被过滤 - ASSERT_TRUE(_delete_handler.is_filter_data(1, _data_row_cursor)); + ASSERT_TRUE(_delete_handler.is_filter_data(3, _data_row_cursor)); _delete_handler.finalize(); } @@ -1068,9 +892,10 @@ TEST_F(TestDeleteHandler, FilterDataVersion) { condition.condition_values.push_back("4"); conditions.push_back(condition); - res = cond_handler.store_cond(_olap_table, 3, conditions); + DeletePredicatePB del_pred; + res = _delete_condition_handler.generate_delete_predicate(tablet->tablet_schema(), conditions, &del_pred); ASSERT_EQ(OLAP_SUCCESS, res); - ASSERT_EQ(OLAP_SUCCESS, push_empty_delta(3)); + res = tablet->add_delete_predicate(del_pred, 3); // 过滤条件2 conditions.clear(); @@ -1080,12 +905,15 @@ TEST_F(TestDeleteHandler, FilterDataVersion) { condition.condition_values.push_back("3"); conditions.push_back(condition); - res = cond_handler.store_cond(_olap_table, 4, conditions); + DeletePredicatePB del_pred_2; + res = _delete_condition_handler.generate_delete_predicate(tablet->tablet_schema(), conditions, &del_pred_2); ASSERT_EQ(OLAP_SUCCESS, res); - ASSERT_EQ(OLAP_SUCCESS, push_empty_delta(4)); + res = tablet->add_delete_predicate(del_pred_2, 4); - // 指定版本号为10以载入Header中的所有过滤条件(过滤条件1,过滤条件2) - _delete_handler.init(_olap_table, 10); + // 指定版本号为4以载入meta中的所有过滤条件(过滤条件1,过滤条件2) + res = _delete_handler.init(tablet->tablet_schema(), tablet->delete_predicates(), 4); + ASSERT_EQ(OLAP_SUCCESS, res); + ASSERT_EQ(2, _delete_handler.conditions_num()); // 构造一行测试数据 vector data_str; @@ -1103,9 +931,9 @@ TEST_F(TestDeleteHandler, FilterDataVersion) { OlapTuple tuple(data_str); res = _data_row_cursor.from_tuple(tuple); ASSERT_EQ(OLAP_SUCCESS, res); - // 如果数据版本小于6,则过滤条件1生效,这条数据被过滤 - ASSERT_TRUE(_delete_handler.is_filter_data(1, _data_row_cursor)); - // 如果数据版本大于6,则过滤条件1会被跳过 + // 如果数据版本小于3,则过滤条件1生效,这条数据被过滤 + ASSERT_TRUE(_delete_handler.is_filter_data(2, _data_row_cursor)); + // 如果数据版本大于3,则过滤条件1会被跳过 ASSERT_FALSE(_delete_handler.is_filter_data(4, _data_row_cursor)); _delete_handler.finalize(); @@ -1114,11 +942,6 @@ TEST_F(TestDeleteHandler, FilterDataVersion) { } // namespace doris int main(int argc, char** argv) { - std::string conffile = std::string(getenv("DORIS_HOME")) + "/conf/be.conf"; - if (!doris::config::init(conffile.c_str(), false)) { - fprintf(stderr, "error read config file. \n"); - return -1; - } doris::init_glog("be-test"); int ret = doris::OLAP_SUCCESS; testing::InitGoogleTest(&argc, argv); diff --git a/be/test/olap/delta_writer_test.cpp b/be/test/olap/delta_writer_test.cpp index 94a35277d6f865..f52a4a0f946397 100644 --- a/be/test/olap/delta_writer_test.cpp +++ b/be/test/olap/delta_writer_test.cpp @@ -25,14 +25,14 @@ #include "gen_cpp/PaloInternalService_types.h" #include "gen_cpp/Types_types.h" #include "olap/field.h" -#include "olap/olap_engine.h" -#include "olap/olap_table.h" +#include "olap/storage_engine.h" +#include "olap/tablet.h" #include "olap/utils.h" #include "runtime/tuple.h" #include "util/descriptor_helper.h" #include "util/logging.h" #include "olap/options.h" -#include "olap/olap_header_manager.h" +#include "olap/tablet_meta_manager.h" namespace doris { @@ -42,7 +42,7 @@ namespace doris { static const uint32_t MAX_RETRY_TIMES = 10; static const uint32_t MAX_PATH_LEN = 1024; -OLAPEngine* k_engine = nullptr; +StorageEngine* k_engine = nullptr; void set_up() { char buffer[MAX_PATH_LEN]; @@ -55,19 +55,21 @@ void set_up() { doris::EngineOptions options; options.store_paths = paths; - doris::OLAPEngine::open(options, &k_engine); + doris::StorageEngine::open(options, &k_engine); } void tear_down() { + delete k_engine; + k_engine = nullptr; system("rm -rf ./data_test"); remove_all_dir(std::string(getenv("DORIS_HOME")) + UNUSED_PREFIX); } -void create_table_request(TCreateTabletReq* request) { - request->tablet_id = 10003; +void create_tablet_request(int64_t tablet_id, int32_t schema_hash, TCreateTabletReq* request) { + request->tablet_id = tablet_id; request->__set_version(1); request->__set_version_hash(0); - request->tablet_schema.schema_hash = 270068375; + request->tablet_schema.schema_hash = schema_hash; request->tablet_schema.short_key_column_count = 6; request->tablet_schema.keys_type = TKeysType::AGG_KEYS; request->tablet_schema.storage_type = TStorageType::COLUMN; @@ -211,7 +213,7 @@ void create_table_request(TCreateTabletReq* request) { request->tablet_schema.columns.push_back(v10); } -TDescriptorTable create_descriptor_table() { +TDescriptorTable create_descriptor_tablet() { TDescriptorTableBuilder dtb; TTupleDescriptorBuilder tuple_builder; @@ -267,30 +269,25 @@ class TestDeltaWriter : public ::testing::Test { ~TestDeltaWriter() { } void SetUp() { - // Create local data dir for OLAPEngine. - char buffer[MAX_PATH_LEN]; - getcwd(buffer, MAX_PATH_LEN); - config::storage_root_path = std::string(buffer) + "/data_push"; - remove_all_dir(config::storage_root_path); - ASSERT_EQ(create_dir(config::storage_root_path), OLAP_SUCCESS); - - // Initialize all singleton object. - // OLAPRootPath::get_instance()->reload_root_paths(config::storage_root_path.c_str()); + // Create local data dir for StorageEngine. + std::cout << "setup" << std::endl; } void TearDown(){ // Remove all dir. - ASSERT_EQ(OLAP_SUCCESS, remove_all_dir(config::storage_root_path)); + std::cout << "tear down" << std::endl; + //doris::tear_down(); + //ASSERT_EQ(OLAP_SUCCESS, remove_all_dir(config::storage_root_path)); } }; TEST_F(TestDeltaWriter, open) { TCreateTabletReq request; - create_table_request(&request); - OLAPStatus res = k_engine->create_table(request); + create_tablet_request(10003, 270068375, &request); + OLAPStatus res = k_engine->create_tablet(request); ASSERT_EQ(OLAP_SUCCESS, res); - TDescriptorTable tdesc_tbl = create_descriptor_table(); + TDescriptorTable tdesc_tbl = create_descriptor_tablet(); ObjectPool obj_pool; DescriptorTbl* desc_tbl = nullptr; DescriptorTbl::create(&obj_pool, tdesc_tbl, &desc_tbl); @@ -311,17 +308,17 @@ TEST_F(TestDeltaWriter, open) { TDropTabletReq drop_request; auto tablet_id = 10003; auto schema_hash = 270068375; - res = k_engine->drop_table(tablet_id, schema_hash); + res = k_engine->tablet_manager()->drop_tablet(tablet_id, schema_hash); ASSERT_EQ(OLAP_SUCCESS, res); } TEST_F(TestDeltaWriter, write) { TCreateTabletReq request; - create_table_request(&request); - OLAPStatus res = k_engine->create_table(request); + create_tablet_request(10004, 270068376, &request); + OLAPStatus res = k_engine->create_tablet(request); ASSERT_EQ(OLAP_SUCCESS, res); - TDescriptorTable tdesc_tbl = create_descriptor_table(); + TDescriptorTable tdesc_tbl = create_descriptor_tablet(); ObjectPool obj_pool; DescriptorTbl* desc_tbl = nullptr; DescriptorTbl::create(&obj_pool, tdesc_tbl, &desc_tbl); @@ -330,8 +327,8 @@ TEST_F(TestDeltaWriter, write) { PUniqueId load_id; load_id.set_hi(0); load_id.set_lo(0); - WriteRequest write_req = {10003, 270068375, WriteType::LOAD, - 20001, 30001, load_id, false, tuple_desc}; + WriteRequest write_req = {10004, 270068376, WriteType::LOAD, + 20002, 30002, load_id, false, tuple_desc}; DeltaWriter* delta_writer = nullptr; DeltaWriter::open(&write_req, &delta_writer); ASSERT_NE(delta_writer, nullptr); @@ -384,7 +381,7 @@ TEST_F(TestDeltaWriter, write) { var_ptr = (StringValue*)(tuple->get_slot(slots[18]->tuple_offset())); var_ptr->ptr = arena.Allocate(5); memcpy(var_ptr->ptr, "abcde", 5); - var_ptr->len = 5; + var_ptr->len = 5; DecimalValue val_decimal(1.1); *(DecimalValue*)(tuple->get_slot(slots[19]->tuple_offset())) = val_decimal; @@ -394,255 +391,38 @@ TEST_F(TestDeltaWriter, write) { } res = delta_writer->close(nullptr); - ASSERT_EQ(res, OLAP_SUCCESS); - - // publish version success - OLAPTablePtr table = OLAPEngine::get_instance()->get_table(write_req.tablet_id, write_req.schema_hash); - TPublishVersionRequest publish_req; - publish_req.transaction_id = write_req.transaction_id; - TPartitionVersionInfo info; - info.partition_id = write_req.partition_id; - info.version = table->lastest_version()->end_version() + 1; - info.version_hash = table->lastest_version()->version_hash() + 1; - std::vector partition_version_infos; - partition_version_infos.push_back(info); - publish_req.partition_version_infos = partition_version_infos; - std::vector error_tablet_ids; - res = k_engine->publish_version(publish_req, &error_tablet_ids); - - ASSERT_EQ(1, table->get_num_rows()); - - auto tablet_id = 10003; - auto schema_hash = 270068375; - res = k_engine->drop_table(tablet_id, schema_hash); - ASSERT_EQ(OLAP_SUCCESS, res); -} - -// ######################### ALTER TABLE TEST BEGIN ######################### - -void schema_change_request(const TCreateTabletReq& base_request, TCreateTabletReq* request) { - //linked schema change, add a value column - request->tablet_id = base_request.tablet_id + 1; - request->__set_version(base_request.version); - request->__set_version_hash(base_request.version_hash); - request->tablet_schema.schema_hash = base_request.tablet_schema.schema_hash + 1; - request->tablet_schema.short_key_column_count = 3; - request->tablet_schema.storage_type = TStorageType::COLUMN; - - request->tablet_schema.columns.push_back(base_request.tablet_schema.columns[2]); - request->tablet_schema.columns.push_back(base_request.tablet_schema.columns[3]); - request->tablet_schema.columns.push_back(base_request.tablet_schema.columns[4]); - request->tablet_schema.columns.push_back(base_request.tablet_schema.columns[5]); - request->tablet_schema.columns.push_back(base_request.tablet_schema.columns[6]); - request->tablet_schema.columns.push_back(base_request.tablet_schema.columns[7]); - request->tablet_schema.columns.push_back(base_request.tablet_schema.columns[8]); - request->tablet_schema.columns.push_back(base_request.tablet_schema.columns[9]); - - TColumn v0; - v0.column_name = "v0"; - v0.column_type.type = TPrimitiveType::BIGINT; - v0.__set_is_key(false); - v0.__set_default_value("0"); - v0.__set_aggregation_type(TAggregationType::SUM); - request->tablet_schema.columns.push_back(v0); - - request->tablet_schema.columns.push_back(base_request.tablet_schema.columns[10]); - request->tablet_schema.columns.push_back(base_request.tablet_schema.columns[11]); - request->tablet_schema.columns.push_back(base_request.tablet_schema.columns[12]); - request->tablet_schema.columns.push_back(base_request.tablet_schema.columns[13]); - request->tablet_schema.columns.push_back(base_request.tablet_schema.columns[14]); - request->tablet_schema.columns.push_back(base_request.tablet_schema.columns[15]); - request->tablet_schema.columns.push_back(base_request.tablet_schema.columns[16]); - request->tablet_schema.columns.push_back(base_request.tablet_schema.columns[17]); - - TColumn v9; - v9.column_name = "v9"; - v9.__set_is_key(false); - v9.column_type.type = TPrimitiveType::VARCHAR; - v9.column_type.__set_len(130); - v9.__set_aggregation_type(TAggregationType::REPLACE); - request->tablet_schema.columns.push_back(v9); - - request->tablet_schema.columns.push_back(base_request.tablet_schema.columns[19]); -} - -AlterTableStatus show_alter_table_status(const TAlterTabletReq& request) { - AlterTableStatus status = ALTER_TABLE_RUNNING; - uint32_t max_retry = MAX_RETRY_TIMES; - while (max_retry > 0) { - status = k_engine->show_alter_table_status( - request.base_tablet_id, request.base_schema_hash); - if (status != ALTER_TABLE_RUNNING) { break; } - LOG(INFO) << "doing alter table......"; - --max_retry; - sleep(1); - } - return status; -} - -class TestSchemaChange : public ::testing::Test { -public: - TestSchemaChange() { } - ~TestSchemaChange() { } - - void SetUp() { - // Create local data dir for OLAPEngine. - char buffer[MAX_PATH_LEN]; - getcwd(buffer, MAX_PATH_LEN); - config::storage_root_path = std::string(buffer) + "/data_schema_change"; - remove_all_dir(config::storage_root_path); - ASSERT_EQ(create_dir(config::storage_root_path), OLAP_SUCCESS); - - // Initialize all singleton object. - // OLAPRootPath::get_instance()->reload_root_paths(config::storage_root_path.c_str()); - } - - void TearDown(){ - // Remove all dir. - ASSERT_EQ(OLAP_SUCCESS, remove_all_dir(config::storage_root_path)); - } -}; - -TEST_F(TestSchemaChange, schema_change) { - OLAPStatus res = OLAP_SUCCESS; - AlterTableStatus status = ALTER_TABLE_WAITING; - - // 1. Prepare for schema change. - // create base table - TCreateTabletReq create_base_tablet; - create_table_request(&create_base_tablet); - res = k_engine->create_table(create_base_tablet); ASSERT_EQ(OLAP_SUCCESS, res); - TDescriptorTable tdesc_tbl = create_descriptor_table(); - ObjectPool obj_pool; - DescriptorTbl* desc_tbl = nullptr; - DescriptorTbl::create(&obj_pool, tdesc_tbl, &desc_tbl); - TupleDescriptor* tuple_desc = desc_tbl->get_tuple_descriptor(0); - - PUniqueId load_id; - load_id.set_hi(0); - load_id.set_lo(0); - WriteRequest write_req = {10003, 270068375, WriteType::LOAD, - 20001, 30001, load_id, false, tuple_desc}; - DeltaWriter* delta_writer = nullptr; - DeltaWriter::open(&write_req, &delta_writer); - ASSERT_NE(delta_writer, nullptr); - - const std::vector& slots = tuple_desc->slots(); - Arena arena; - // streaming load data - { - Tuple* tuple = reinterpret_cast(arena.Allocate(tuple_desc->byte_size())); - memset(tuple, 0, tuple_desc->byte_size()); - *(int8_t*)(tuple->get_slot(slots[0]->tuple_offset())) = -127; - *(int16_t*)(tuple->get_slot(slots[1]->tuple_offset())) = -32767; - *(int32_t*)(tuple->get_slot(slots[2]->tuple_offset())) = -2147483647; - *(int64_t*)(tuple->get_slot(slots[3]->tuple_offset())) = -9223372036854775807L; - - int128_t large_int_value = -90000; - memcpy(tuple->get_slot(slots[4]->tuple_offset()), &large_int_value, sizeof(int128_t)); - - ((DateTimeValue*)(tuple->get_slot(slots[5]->tuple_offset())))->from_date_str("2048-11-10", 10); - ((DateTimeValue*)(tuple->get_slot(slots[6]->tuple_offset())))->from_date_str("2636-08-16 19:39:43", 19); - - StringValue* char_ptr = (StringValue*)(tuple->get_slot(slots[7]->tuple_offset())); - char_ptr->ptr = arena.Allocate(4); - memcpy(char_ptr->ptr, "abcd", 4); - char_ptr->len = 4; - - StringValue* var_ptr = (StringValue*)(tuple->get_slot(slots[8]->tuple_offset())); - var_ptr->ptr = arena.Allocate(5); - memcpy(var_ptr->ptr, "abcde", 5); - var_ptr->len = 5; - - DecimalValue decimal_value(1.1); - *(DecimalValue*)(tuple->get_slot(slots[9]->tuple_offset())) = decimal_value; - - *(int8_t*)(tuple->get_slot(slots[10]->tuple_offset())) = -127; - *(int16_t*)(tuple->get_slot(slots[11]->tuple_offset())) = -32767; - *(int32_t*)(tuple->get_slot(slots[12]->tuple_offset())) = -2147483647; - *(int64_t*)(tuple->get_slot(slots[13]->tuple_offset())) = -9223372036854775807L; - - memcpy(tuple->get_slot(slots[14]->tuple_offset()), &large_int_value, sizeof(int128_t)); - - ((DateTimeValue*)(tuple->get_slot(slots[15]->tuple_offset())))->from_date_str("2048-11-10", 10); - ((DateTimeValue*)(tuple->get_slot(slots[16]->tuple_offset())))->from_date_str("2636-08-16 19:39:43", 19); - - char_ptr = (StringValue*)(tuple->get_slot(slots[17]->tuple_offset())); - char_ptr->ptr = arena.Allocate(4); - memcpy(char_ptr->ptr, "abcd", 4); - char_ptr->len = 4; - - var_ptr = (StringValue*)(tuple->get_slot(slots[18]->tuple_offset())); - var_ptr->ptr = arena.Allocate(5); - memcpy(var_ptr->ptr, "abcde", 5); - var_ptr->len = 5; - - DecimalValue val_decimal(1.1); - *(DecimalValue*)(tuple->get_slot(slots[19]->tuple_offset())) = val_decimal; - - res = delta_writer->write(tuple); + // publish version success + TabletSharedPtr tablet = k_engine->tablet_manager()->get_tablet(write_req.tablet_id, write_req.schema_hash); + std::cout << "before publish, tablet row nums:" << tablet->num_rows() << std::endl; + OlapMeta* meta = tablet->data_dir()->get_meta(); + Version version; + version.first = tablet->rowset_with_max_version()->end_version() + 1; + version.second = tablet->rowset_with_max_version()->end_version() + 1; + std::cout << "start to add rowset version:" << version.first << "-" << version.second << std::endl; + VersionHash version_hash = 2; + std::map tablet_related_rs; + StorageEngine::instance()->txn_manager()->get_txn_related_tablets(write_req.txn_id, write_req.partition_id, &tablet_related_rs); + for (auto& tablet_rs : tablet_related_rs) { + std::cout << "start to publish txn" << std::endl; + RowsetSharedPtr rowset = tablet_rs.second; + res = k_engine->txn_manager()->publish_txn(meta, write_req.partition_id, write_req.txn_id, + write_req.tablet_id, write_req.schema_hash, tablet_rs.first.tablet_uid, + version, version_hash); + ASSERT_EQ(OLAP_SUCCESS, res); + std::cout << "start to add inc rowset:" << rowset->rowset_id() << ", num rows:" << rowset->num_rows() + << ", version:" << rowset->version().first << "-" << rowset->version().second + << ", version_hash:" << rowset->version_hash() + << std::endl; + res = tablet->add_inc_rowset(rowset); ASSERT_EQ(OLAP_SUCCESS, res); } + ASSERT_EQ(1, tablet->num_rows()); - // publish version - res = delta_writer->close(nullptr); - ASSERT_EQ(res, OLAP_SUCCESS); - - // publish version success - OLAPTablePtr table = OLAPEngine::get_instance()->get_table(write_req.tablet_id, write_req.schema_hash); - TPublishVersionRequest publish_req; - publish_req.transaction_id = write_req.transaction_id; - TPartitionVersionInfo info; - info.partition_id = write_req.partition_id; - info.version = table->lastest_version()->end_version() + 1; - info.version_hash = table->lastest_version()->version_hash() + 1; - std::vector partition_version_infos; - partition_version_infos.push_back(info); - publish_req.partition_version_infos = partition_version_infos; - std::vector error_tablet_ids; - res = k_engine->publish_version(publish_req, &error_tablet_ids); - ASSERT_EQ(res, OLAP_SUCCESS); - - // 1. set add column request - TCreateTabletReq create_new_tablet; - schema_change_request(create_base_tablet, &create_new_tablet); - TAlterTabletReq request; - request.__set_base_tablet_id(create_base_tablet.tablet_id); - request.__set_base_schema_hash(create_base_tablet.tablet_schema.schema_hash); - request.__set_new_tablet_req(create_new_tablet); - - // 2. Submit schema change - request.base_schema_hash = create_base_tablet.tablet_schema.schema_hash; - res = k_engine->schema_change(request); - ASSERT_EQ(OLAP_SUCCESS, res); - - // 3. Verify schema change result. - // show schema change status - status = show_alter_table_status(request); - ASSERT_EQ(ALTER_TABLE_FINISHED, status); - - // check new tablet information - TTabletInfo tablet_info; - tablet_info.tablet_id = create_new_tablet.tablet_id; - tablet_info.schema_hash = create_new_tablet.tablet_schema.schema_hash; - res = k_engine->report_tablet_info(&tablet_info); - ASSERT_EQ(OLAP_SUCCESS, res); - ASSERT_EQ(info.version, tablet_info.version); - ASSERT_EQ(info.version_hash, tablet_info.version_hash); - ASSERT_EQ(1, tablet_info.row_count); - - // 4. Retry the same schema change request. - res = k_engine->schema_change(request); - ASSERT_EQ(OLAP_SUCCESS, res); - status = k_engine->show_alter_table_status( - request.base_tablet_id, request.base_schema_hash); - ASSERT_EQ(ALTER_TABLE_FINISHED, status); - - auto tablet_id = create_new_tablet.tablet_id; - auto schema_hash = create_new_tablet.tablet_schema.schema_hash; - res = k_engine->drop_table(tablet_id, schema_hash); + auto tablet_id = 10003; + auto schema_hash = 270068375; + res = k_engine->tablet_manager()->drop_tablet(tablet_id, schema_hash); ASSERT_EQ(OLAP_SUCCESS, res); } @@ -658,11 +438,9 @@ int main(int argc, char** argv) { int ret = doris::OLAP_SUCCESS; testing::InitGoogleTest(&argc, argv); doris::CpuInfo::init(); - doris::set_up(); ret = RUN_ALL_TESTS(); doris::tear_down(); - google::protobuf::ShutdownProtobufLibrary(); return ret; } diff --git a/be/test/olap/in_list_predicate_test.cpp b/be/test/olap/in_list_predicate_test.cpp index 5b187d4c34aa58..e34ea89c45f828 100644 --- a/be/test/olap/in_list_predicate_test.cpp +++ b/be/test/olap/in_list_predicate_test.cpp @@ -100,25 +100,30 @@ class TestInListPredicate : public testing::Test { } } - void SetFieldInfo(FieldInfo &field_info, std::string name, - FieldType type, FieldAggregationMethod aggregation, - uint32_t length, bool is_allow_null, bool is_key) { - field_info.name = name; - field_info.type = type; - field_info.aggregation = aggregation; - field_info.length = length; - field_info.is_allow_null = is_allow_null; - field_info.is_key = is_key; - field_info.precision = 1000; - field_info.frac = 10000; - field_info.unique_id = 0; - field_info.is_bf_column = false; + void SetTabletSchema(std::string name, + const std::string& type, const std::string& aggregation, + uint32_t length, bool is_allow_null, bool is_key, TabletSchema* tablet_schema) { + TabletSchemaPB tablet_schema_pb; + static int id = 0; + ColumnPB* column = tablet_schema_pb.add_column(); + column->set_unique_id(++id); + column->set_name(name); + column->set_type(type); + column->set_is_key(is_key); + column->set_is_nullable(is_allow_null); + column->set_length(length); + column->set_aggregation(aggregation); + column->set_precision(1000); + column->set_frac(1000); + column->set_is_bf_column(false); + + tablet_schema->init_from_pb(tablet_schema_pb); } - void InitVectorizedBatch(const std::vector& schema, + void InitVectorizedBatch(const TabletSchema* tablet_schema, const std::vector& ids, int size) { - _vectorized_batch = new VectorizedRowBatch(schema, ids, size); + _vectorized_batch = new VectorizedRowBatch(tablet_schema, ids, size); _vectorized_batch->set_size(size); } std::unique_ptr _mem_tracker; @@ -128,17 +133,15 @@ class TestInListPredicate : public testing::Test { #define TEST_IN_LIST_PREDICATE(TYPE, TYPE_NAME, FIELD_TYPE) \ TEST_F(TestInListPredicate, TYPE_NAME##_COLUMN) { \ - std::vector schema; \ - FieldInfo field_info; \ - SetFieldInfo(field_info, std::string("TYPE_NAME##_COLUMN"), FIELD_TYPE, \ - OLAP_FIELD_AGGREGATION_REPLACE, 1, false, true); \ - schema.push_back(field_info); \ + TabletSchema tablet_schema; \ + SetTabletSchema(std::string("TYPE_NAME##_COLUMN"), FIELD_TYPE, \ + "REPLACE", 1, false, true, &tablet_schema); \ int size = 10; \ std::vector return_columns; \ - for (int i = 0; i < schema.size(); ++i) { \ + for (int i = 0; i < tablet_schema.num_columns(); ++i) { \ return_columns.push_back(i); \ } \ - InitVectorizedBatch(schema, return_columns, size); \ + InitVectorizedBatch(&tablet_schema, return_columns, size); \ ColumnVector* col_vector = _vectorized_batch->column(0); \ \ /* for no nulls */ \ @@ -181,24 +184,22 @@ TEST_F(TestInListPredicate, TYPE_NAME##_COLUMN) { \ ASSERT_EQ(*(col_data + sel[0]), 5); \ } \ -TEST_IN_LIST_PREDICATE(int8_t, TINYINT, OLAP_FIELD_TYPE_TINYINT) -TEST_IN_LIST_PREDICATE(int16_t, SMALLINT, OLAP_FIELD_TYPE_SMALLINT) -TEST_IN_LIST_PREDICATE(int32_t, INT, OLAP_FIELD_TYPE_INT) -TEST_IN_LIST_PREDICATE(int64_t, BIGINT, OLAP_FIELD_TYPE_BIGINT) -TEST_IN_LIST_PREDICATE(int128_t, LARGEINT, OLAP_FIELD_TYPE_LARGEINT) +TEST_IN_LIST_PREDICATE(int8_t, TINYINT, "TINYINT") +TEST_IN_LIST_PREDICATE(int16_t, SMALLINT, "SMALLINT") +TEST_IN_LIST_PREDICATE(int32_t, INT, "INT") +TEST_IN_LIST_PREDICATE(int64_t, BIGINT, "BIGINT") +TEST_IN_LIST_PREDICATE(int128_t, LARGEINT, "LARGEINT") TEST_F(TestInListPredicate, FLOAT_COLUMN) { - std::vector schema; - FieldInfo field_info; - SetFieldInfo(field_info, std::string("FLOAT_COLUMN"), OLAP_FIELD_TYPE_FLOAT, - OLAP_FIELD_AGGREGATION_REPLACE, 1, false, true); - schema.push_back(field_info); + TabletSchema tablet_schema; + SetTabletSchema(std::string("FLOAT_COLUMN"), "FLOAT", + "REPLACE", 1, false, true, &tablet_schema); int size = 10; std::vector return_columns; - for (int i = 0; i < schema.size(); ++i) { + for (int i = 0; i < tablet_schema.num_columns(); ++i) { return_columns.push_back(i); } - InitVectorizedBatch(schema, return_columns, size); + InitVectorizedBatch(&tablet_schema, return_columns, size); ColumnVector* col_vector = _vectorized_batch->column(0); // for no nulls @@ -241,17 +242,15 @@ TEST_F(TestInListPredicate, FLOAT_COLUMN) { } TEST_F(TestInListPredicate, DOUBLE_COLUMN) { - std::vector schema; - FieldInfo field_info; - SetFieldInfo(field_info, std::string("DOUBLE_COLUMN"), OLAP_FIELD_TYPE_DOUBLE, - OLAP_FIELD_AGGREGATION_REPLACE, 1, false, true); - schema.push_back(field_info); + TabletSchema tablet_schema; + SetTabletSchema(std::string("DOUBLE_COLUMN"), "DOUBLE", + "REPLACE", 1, false, true, &tablet_schema); int size = 10; std::vector return_columns; - for (int i = 0; i < schema.size(); ++i) { + for (int i = 0; i < tablet_schema.num_columns(); ++i) { return_columns.push_back(i); } - InitVectorizedBatch(schema, return_columns, size); + InitVectorizedBatch(&tablet_schema, return_columns, size); ColumnVector* col_vector = _vectorized_batch->column(0); // for no nulls @@ -295,17 +294,15 @@ TEST_F(TestInListPredicate, DOUBLE_COLUMN) { } TEST_F(TestInListPredicate, DECIMAL_COLUMN) { - std::vector schema; - FieldInfo field_info; - SetFieldInfo(field_info, std::string("DECIMAL_COLUMN"), OLAP_FIELD_TYPE_DECIMAL, - OLAP_FIELD_AGGREGATION_REPLACE, 1, false, true); - schema.push_back(field_info); + TabletSchema tablet_schema; + SetTabletSchema(std::string("DECIMAL_COLUMN"), "DECIMAL", + "REPLACE", 1, false, true, &tablet_schema); int size = 10; std::vector return_columns; - for (int i = 0; i < schema.size(); ++i) { + for (int i = 0; i < tablet_schema.num_columns(); ++i) { return_columns.push_back(i); } - InitVectorizedBatch(schema, return_columns, size); + InitVectorizedBatch(&tablet_schema, return_columns, size); ColumnVector* col_vector = _vectorized_batch->column(0); // for no nulls @@ -358,17 +355,15 @@ TEST_F(TestInListPredicate, DECIMAL_COLUMN) { } TEST_F(TestInListPredicate, CHAR_COLUMN) { - std::vector schema; - FieldInfo field_info; - SetFieldInfo(field_info, std::string("STRING_COLUMN"), OLAP_FIELD_TYPE_CHAR, - OLAP_FIELD_AGGREGATION_REPLACE, 1, false, true); - schema.push_back(field_info); + TabletSchema tablet_schema; + SetTabletSchema(std::string("STRING_COLUMN"), "CHAR", + "REPLACE", 1, false, true, &tablet_schema); int size = 10; std::vector return_columns; - for (int i = 0; i < schema.size(); ++i) { + for (int i = 0; i < tablet_schema.num_columns(); ++i) { return_columns.push_back(i); } - InitVectorizedBatch(schema, return_columns, size); + InitVectorizedBatch(&tablet_schema, return_columns, size); ColumnVector* col_vector = _vectorized_batch->column(0); // for no nulls @@ -443,17 +438,15 @@ TEST_F(TestInListPredicate, CHAR_COLUMN) { } TEST_F(TestInListPredicate, VARCHAR_COLUMN) { - std::vector schema; - FieldInfo field_info; - SetFieldInfo(field_info, std::string("STRING_COLUMN"), OLAP_FIELD_TYPE_VARCHAR, - OLAP_FIELD_AGGREGATION_REPLACE, 1, false, true); - schema.push_back(field_info); + TabletSchema tablet_schema; + SetTabletSchema(std::string("STRING_COLUMN"), "VARCHAR", + "REPLACE", 1, false, true, &tablet_schema); int size = 10; std::vector return_columns; - for (int i = 0; i < schema.size(); ++i) { + for (int i = 0; i < tablet_schema.num_columns(); ++i) { return_columns.push_back(i); } - InitVectorizedBatch(schema, return_columns, size); + InitVectorizedBatch(&tablet_schema, return_columns, size); ColumnVector* col_vector = _vectorized_batch->column(0); // for no nulls @@ -526,17 +519,15 @@ TEST_F(TestInListPredicate, VARCHAR_COLUMN) { } TEST_F(TestInListPredicate, DATE_COLUMN) { - std::vector schema; - FieldInfo field_info; - SetFieldInfo(field_info, std::string("DATE_COLUMN"), OLAP_FIELD_TYPE_DATE, - OLAP_FIELD_AGGREGATION_REPLACE, 1, false, true); - schema.push_back(field_info); + TabletSchema tablet_schema; + SetTabletSchema(std::string("DATE_COLUMN"), "DATE", + "REPLACE", 1, false, true, &tablet_schema); int size = 6; std::vector return_columns; - for (int i = 0; i < schema.size(); ++i) { + for (int i = 0; i < tablet_schema.num_columns(); ++i) { return_columns.push_back(i); } - InitVectorizedBatch(schema, return_columns, size); + InitVectorizedBatch(&tablet_schema, return_columns, size); ColumnVector* col_vector = _vectorized_batch->column(0); // for no nulls @@ -597,17 +588,15 @@ TEST_F(TestInListPredicate, DATE_COLUMN) { } TEST_F(TestInListPredicate, DATETIME_COLUMN) { - std::vector schema; - FieldInfo field_info; - SetFieldInfo(field_info, std::string("DATETIME_COLUMN"), OLAP_FIELD_TYPE_DATETIME, - OLAP_FIELD_AGGREGATION_REPLACE, 1, false, true); - schema.push_back(field_info); + TabletSchema tablet_schema; + SetTabletSchema(std::string("DATETIME_COLUMN"), "DATETIME", + "REPLACE", 1, false, true, &tablet_schema); int size = 6; std::vector return_columns; - for (int i = 0; i < schema.size(); ++i) { + for (int i = 0; i < tablet_schema.num_columns(); ++i) { return_columns.push_back(i); } - InitVectorizedBatch(schema, return_columns, size); + InitVectorizedBatch(&tablet_schema, return_columns, size); ColumnVector* col_vector = _vectorized_batch->column(0); // for no nulls diff --git a/be/test/olap/mock_command_executor.h b/be/test/olap/mock_command_executor.h index 75b1eed6bb87f7..e2f402139b45a7 100644 --- a/be/test/olap/mock_command_executor.h +++ b/be/test/olap/mock_command_executor.h @@ -19,15 +19,15 @@ #define DORIS_BE_SRC_OLAP_MOCK_MOCK_COMMAND_EXECUTOR_H #include "gmock/gmock.h" -#include "olap/olap_engine.h" +#include "olap/storage_engine.h" namespace doris { -class MockCommandExecutor : public OLAPEngine { +class MockCommandExecutor : public StorageEngine { public: - MOCK_METHOD1(create_table, OLAPStatus(const TCreateTabletReq& request)); - MOCK_METHOD2(get_table, OLAPTablePtr(TTabletId tablet_id, TSchemaHash schema_hash)); - MOCK_METHOD1(drop_table, OLAPStatus(const TDropTabletReq& request)); + MOCK_METHOD1(create_tablet, OLAPStatus(const TCreateTabletReq& request)); + MOCK_METHOD2(get_tablet, TabletSharedPtr(TTabletId tablet_id, TSchemaHash schema_hash)); + MOCK_METHOD1(drop_tablet, OLAPStatus(const TDropTabletReq& request)); MOCK_METHOD2( push, OLAPStatus(const TPushReq& request, std::vector* tablet_info_vec)); @@ -35,10 +35,10 @@ class MockCommandExecutor : public OLAPEngine { MOCK_METHOD1( report_all_tablets_info, OLAPStatus(std::map* tablets_info)); - MOCK_METHOD1(create_rollup_table, OLAPStatus(const TAlterTabletReq& request)); + MOCK_METHOD1(create_rollup_tablet, OLAPStatus(const TAlterTabletReq& request)); MOCK_METHOD1(schema_change, OLAPStatus(const TAlterTabletReq& request)); MOCK_METHOD2( - show_alter_table_status, + show_alter_tablet_status, AlterTableStatus(TTabletId tablet_id, TSchemaHash schema_hash)); MOCK_METHOD3( make_snapshot, @@ -84,10 +84,10 @@ class MockCommandExecutor : public OLAPEngine { TVersionHash version_hash, uint32_t* checksum)); MOCK_METHOD1(reload_root_path, OLAPStatus(const std::string& root_paths)); - MOCK_METHOD2(check_table_exist, bool(TTabletId tablet_id, TSchemaHash schema_hash)); + MOCK_METHOD2(check_tablet_exist, bool(TTabletId tablet_id, TSchemaHash schema_hash)); MOCK_METHOD1( - get_all_root_path_info, - OLAPStatus(std::vector* root_paths_info)); + get_all_data_dir_info, + OLAPStatus(std::vector* data_dir_infos)); MOCK_METHOD2( publish_version, OLAPStatus(const TPublishVersionRequest& request, @@ -95,13 +95,13 @@ class MockCommandExecutor : public OLAPEngine { MOCK_METHOD3( get_info_before_incremental_clone, std::string( - OLAPTablePtr tablet, + TabletSharedPtr tablet, int64_t committed_version, std::vector* missing_versions)); MOCK_METHOD4( finish_clone, OLAPStatus( - OLAPTablePtr tablet, + TabletSharedPtr tablet, const std::string& clone_dir, int64_t committed_version, bool is_incremental_clone)); diff --git a/be/test/olap/null_predicate_test.cpp b/be/test/olap/null_predicate_test.cpp index 52c45975611f20..f59b4a5b27a844 100644 --- a/be/test/olap/null_predicate_test.cpp +++ b/be/test/olap/null_predicate_test.cpp @@ -70,25 +70,29 @@ class TestNullPredicate : public testing::Test { } } - void SetFieldInfo(FieldInfo &field_info, std::string name, - FieldType type, FieldAggregationMethod aggregation, - uint32_t length, bool is_allow_null, bool is_key) { - field_info.name = name; - field_info.type = type; - field_info.aggregation = aggregation; - field_info.length = length; - field_info.is_allow_null = is_allow_null; - field_info.is_key = is_key; - field_info.precision = 1000; - field_info.frac = 10000; - field_info.unique_id = 0; - field_info.is_bf_column = false; + void SetTabletSchema(std::string name, + std::string type, std::string aggregation, + uint32_t length, bool is_allow_null, bool is_key, TabletSchema* tablet_schema) { + TabletSchemaPB tablet_schema_pb; + static int id = 0; + ColumnPB* column = tablet_schema_pb.add_column();; + column->set_unique_id(++id); + column->set_name(name); + column->set_type(type); + column->set_is_key(is_key); + column->set_is_nullable(is_allow_null); + column->set_length(length); + column->set_aggregation(aggregation); + column->set_precision(1000); + column->set_frac(1000); + column->set_is_bf_column(false); + tablet_schema->init_from_pb(tablet_schema_pb); } - void InitVectorizedBatch(const std::vector& schema, + void InitVectorizedBatch(const TabletSchema* tablet_schema, const std::vector&ids, int size) { - _vectorized_batch = new VectorizedRowBatch(schema, ids, size); + _vectorized_batch = new VectorizedRowBatch(tablet_schema, ids, size); _vectorized_batch->set_size(size); } std::unique_ptr _mem_tracker; @@ -98,17 +102,15 @@ class TestNullPredicate : public testing::Test { #define TEST_IN_LIST_PREDICATE(TYPE, TYPE_NAME, FIELD_TYPE) \ TEST_F(TestNullPredicate, TYPE_NAME##_COLUMN) { \ - std::vector schema; \ - FieldInfo field_info; \ - SetFieldInfo(field_info, std::string("TYPE_NAME##_COLUMN"), FIELD_TYPE, \ - OLAP_FIELD_AGGREGATION_REPLACE, 1, false, true); \ - schema.push_back(field_info); \ + TabletSchema tablet_schema; \ + SetTabletSchema(std::string("TYPE_NAME##_COLUMN"), FIELD_TYPE, \ + "REPLACE", 1, false, true, &tablet_schema); \ int size = 10; \ std::vector return_columns; \ - for (int i = 0; i < schema.size(); ++i) { \ + for (int i = 0; i < tablet_schema.num_columns(); ++i) { \ return_columns.push_back(i); \ } \ - InitVectorizedBatch(schema, return_columns, size); \ + InitVectorizedBatch(&tablet_schema, return_columns, size); \ ColumnVector* col_vector = _vectorized_batch->column(0); \ \ /* for no nulls */ \ @@ -141,24 +143,22 @@ TEST_F(TestNullPredicate, TYPE_NAME##_COLUMN) { \ ASSERT_EQ(_vectorized_batch->size(), 5); \ } \ -TEST_IN_LIST_PREDICATE(int8_t, TINYINT, OLAP_FIELD_TYPE_TINYINT) -TEST_IN_LIST_PREDICATE(int16_t, SMALLINT, OLAP_FIELD_TYPE_SMALLINT) -TEST_IN_LIST_PREDICATE(int32_t, INT, OLAP_FIELD_TYPE_INT) -TEST_IN_LIST_PREDICATE(int64_t, BIGINT, OLAP_FIELD_TYPE_BIGINT) -TEST_IN_LIST_PREDICATE(int128_t, LARGEINT, OLAP_FIELD_TYPE_LARGEINT) +TEST_IN_LIST_PREDICATE(int8_t, TINYINT, "TINYINT") +TEST_IN_LIST_PREDICATE(int16_t, SMALLINT, "SMALLINT") +TEST_IN_LIST_PREDICATE(int32_t, INT, "INT") +TEST_IN_LIST_PREDICATE(int64_t, BIGINT, "BIGINT") +TEST_IN_LIST_PREDICATE(int128_t, LARGEINT, "LARGEINT") TEST_F(TestNullPredicate, FLOAT_COLUMN) { - std::vector schema; - FieldInfo field_info; - SetFieldInfo(field_info, std::string("FLOAT_COLUMN"), OLAP_FIELD_TYPE_FLOAT, - OLAP_FIELD_AGGREGATION_REPLACE, 1, false, true); - schema.push_back(field_info); + TabletSchema tablet_schema; + SetTabletSchema(std::string("FLOAT_COLUMN"), "FLOAT", + "REPLACE", 1, false, true, &tablet_schema); int size = 10; std::vector return_columns; - for (int i = 0; i < schema.size(); ++i) { + for (int i = 0; i < tablet_schema.num_columns(); ++i) { return_columns.push_back(i); } - InitVectorizedBatch(schema, return_columns, size); + InitVectorizedBatch(&tablet_schema, return_columns, size); ColumnVector* col_vector = _vectorized_batch->column(0); // for no nulls @@ -191,17 +191,15 @@ TEST_F(TestNullPredicate, FLOAT_COLUMN) { } TEST_F(TestNullPredicate, DOUBLE_COLUMN) { - std::vector schema; - FieldInfo field_info; - SetFieldInfo(field_info, std::string("DOUBLE_COLUMN"), OLAP_FIELD_TYPE_DOUBLE, - OLAP_FIELD_AGGREGATION_REPLACE, 1, false, true); - schema.push_back(field_info); + TabletSchema tablet_schema; + SetTabletSchema(std::string("DOUBLE_COLUMN"), "DOUBLE", + "REPLACE", 1, false, true, &tablet_schema); int size = 10; std::vector return_columns; - for (int i = 0; i < schema.size(); ++i) { + for (int i = 0; i < tablet_schema.num_columns(); ++i) { return_columns.push_back(i); } - InitVectorizedBatch(schema, return_columns, size); + InitVectorizedBatch(&tablet_schema, return_columns, size); ColumnVector* col_vector = _vectorized_batch->column(0); // for no nulls @@ -235,17 +233,15 @@ TEST_F(TestNullPredicate, DOUBLE_COLUMN) { } TEST_F(TestNullPredicate, DECIMAL_COLUMN) { - std::vector schema; - FieldInfo field_info; - SetFieldInfo(field_info, std::string("DECIMAL_COLUMN"), OLAP_FIELD_TYPE_DECIMAL, - OLAP_FIELD_AGGREGATION_REPLACE, 1, false, true); - schema.push_back(field_info); + TabletSchema tablet_schema; + SetTabletSchema(std::string("DECIMAL_COLUMN"), "DECIMAL", + "REPLACE", 1, false, true, &tablet_schema); int size = 10; std::vector return_columns; - for (int i = 0; i < schema.size(); ++i) { + for (int i = 0; i < tablet_schema.num_columns(); ++i) { return_columns.push_back(i); } - InitVectorizedBatch(schema, return_columns, size); + InitVectorizedBatch(&tablet_schema, return_columns, size); ColumnVector* col_vector = _vectorized_batch->column(0); // for no nulls @@ -282,17 +278,15 @@ TEST_F(TestNullPredicate, DECIMAL_COLUMN) { } TEST_F(TestNullPredicate, STRING_COLUMN) { - std::vector schema; - FieldInfo field_info; - SetFieldInfo(field_info, std::string("STRING_COLUMN"), OLAP_FIELD_TYPE_VARCHAR, - OLAP_FIELD_AGGREGATION_REPLACE, 1, false, true); - schema.push_back(field_info); + TabletSchema tablet_schema; + SetTabletSchema(std::string("STRING_COLUMN"), "VARCHAR", + "REPLACE", 1, false, true, &tablet_schema); int size = 10; std::vector return_columns; - for (int i = 0; i < schema.size(); ++i) { + for (int i = 0; i < tablet_schema.num_columns(); ++i) { return_columns.push_back(i); } - InitVectorizedBatch(schema, return_columns, size); + InitVectorizedBatch(&tablet_schema, return_columns, size); ColumnVector* col_vector = _vectorized_batch->column(0); // for no nulls @@ -339,17 +333,15 @@ TEST_F(TestNullPredicate, STRING_COLUMN) { } TEST_F(TestNullPredicate, DATE_COLUMN) { - std::vector schema; - FieldInfo field_info; - SetFieldInfo(field_info, std::string("DATE_COLUMN"), OLAP_FIELD_TYPE_DATE, - OLAP_FIELD_AGGREGATION_REPLACE, 1, false, true); - schema.push_back(field_info); + TabletSchema tablet_schema; + SetTabletSchema(std::string("DATE_COLUMN"), "DATE", + "REPLACE", 1, false, true, &tablet_schema); int size = 6; std::vector return_columns; - for (int i = 0; i < schema.size(); ++i) { + for (int i = 0; i < tablet_schema.num_columns(); ++i) { return_columns.push_back(i); } - InitVectorizedBatch(schema, return_columns, size); + InitVectorizedBatch(&tablet_schema, return_columns, size); ColumnVector* col_vector = _vectorized_batch->column(0); // for no nulls @@ -394,17 +386,15 @@ TEST_F(TestNullPredicate, DATE_COLUMN) { } TEST_F(TestNullPredicate, DATETIME_COLUMN) { - std::vector schema; - FieldInfo field_info; - SetFieldInfo(field_info, std::string("DATETIME_COLUMN"), OLAP_FIELD_TYPE_DATETIME, - OLAP_FIELD_AGGREGATION_REPLACE, 1, false, true); - schema.push_back(field_info); + TabletSchema tablet_schema; + SetTabletSchema(std::string("DATETIME_COLUMN"), "DATETIME", + "REPLACE", 1, false, true, &tablet_schema); int size = 6; std::vector return_columns; - for (int i = 0; i < schema.size(); ++i) { + for (int i = 0; i < tablet_schema.num_columns(); ++i) { return_columns.push_back(i); } - InitVectorizedBatch(schema, return_columns, size); + InitVectorizedBatch(&tablet_schema, return_columns, size); ColumnVector* col_vector = _vectorized_batch->column(0); // for no nulls diff --git a/be/test/olap/olap_header_manager_test.cpp b/be/test/olap/olap_header_manager_test.cpp deleted file mode 100755 index 0774a92c7b7cfb..00000000000000 --- a/be/test/olap/olap_header_manager_test.cpp +++ /dev/null @@ -1,120 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, -// software distributed under the License is distributed on an -// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -// KIND, either express or implied. See the License for the -// specific language governing permissions and limitations -// under the License. - -#include -#include -#include - -#include -#include -#include - -#include "olap/store.h" -#include "olap/olap_header_manager.h" -#include "olap/olap_define.h" -#include "util/file_utils.h" - -#ifndef BE_TEST -#define BE_TEST -#endif - -using std::string; - -namespace doris { - -const std::string header_path = "./be/test/olap/test_data/header.txt"; - -class OlapHeaderManagerTest : public testing::Test { -public: - virtual void SetUp() { - _root_path = "./ut_dir/olap_header_mgr_test"; - FileUtils::remove_all(_root_path); - FileUtils::create_dir(_root_path); - _store = new(std::nothrow) OlapStore(_root_path); - ASSERT_NE(nullptr, _store); - Status st = _store->load(); - ASSERT_TRUE(st.ok()); - ASSERT_TRUE(boost::filesystem::exists(_root_path + "/meta")); - - std::ifstream infile(header_path); - char buffer[1024]; - while (!infile.eof()) { - infile.getline(buffer, 1024); - _json_header = _json_header + buffer + "\n"; - } - _json_header = _json_header.substr(0, _json_header.size() - 1); - _json_header = _json_header.substr(0, _json_header.size() - 1); - std::cout << "set up finish" << std::endl; - } - - virtual void TearDown() { - delete _store; - ASSERT_TRUE(boost::filesystem::remove_all(_root_path)); - } - -private: - std::string _root_path; - OlapStore* _store; - std::string _json_header; -}; - -TEST_F(OlapHeaderManagerTest, TestConvertedFlag) { - bool converted_flag; - OLAPStatus s = OlapHeaderManager::get_header_converted(_store, converted_flag); - ASSERT_EQ(false, converted_flag); - s = OlapHeaderManager::set_converted_flag(_store); - ASSERT_EQ(OLAP_SUCCESS, s); - s = OlapHeaderManager::get_header_converted(_store, converted_flag); - ASSERT_EQ(true, converted_flag); -} - -TEST_F(OlapHeaderManagerTest, TestSaveAndGetAndRemove) { - const TTabletId tablet_id = 20487; - const TSchemaHash schema_hash = 1520686811; - OLAPHeader header; - bool ret = json2pb::JsonToProtoMessage(_json_header, &header); - ASSERT_TRUE(ret); - OLAPStatus s = OlapHeaderManager::save(_store, tablet_id, schema_hash, &header); - ASSERT_EQ(OLAP_SUCCESS, s); - std::string json_header_read; - s = OlapHeaderManager::get_json_header(_store, tablet_id, schema_hash, &json_header_read); - ASSERT_EQ(OLAP_SUCCESS, s); - ASSERT_EQ(_json_header, json_header_read); - s = OlapHeaderManager::remove(_store, tablet_id, schema_hash); - ASSERT_EQ(OLAP_SUCCESS, s); - OLAPHeader header_read; - s = OlapHeaderManager::get_header(_store, tablet_id, schema_hash, &header_read); - ASSERT_EQ(OLAP_ERR_META_KEY_NOT_FOUND, s); -} - -TEST_F(OlapHeaderManagerTest, TestLoad) { - const TTabletId tablet_id = 20487; - const TSchemaHash schema_hash = 1520686811; - OLAPStatus s = OlapHeaderManager::load_json_header(_store, header_path); - ASSERT_EQ(OLAP_SUCCESS, s); - std::string json_header_read; - s = OlapHeaderManager::get_json_header(_store, tablet_id, schema_hash, &json_header_read); - ASSERT_EQ(OLAP_SUCCESS, s); - ASSERT_EQ(_json_header, json_header_read); -} - -} // namespace doris - -int main(int argc, char **argv) { - ::testing::InitGoogleTest(&argc, argv); - return RUN_ALL_TESTS(); -} diff --git a/be/test/olap/olap_meta_test.cpp b/be/test/olap/olap_meta_test.cpp index fd33321dff2717..8cf7c3fde2dbb5 100644 --- a/be/test/olap/olap_meta_test.cpp +++ b/be/test/olap/olap_meta_test.cpp @@ -68,12 +68,12 @@ TEST_F(OlapMetaTest, TestPutAndGet) { OLAPStatus s = _meta->put(META_COLUMN_FAMILY_INDEX, key, value); ASSERT_EQ(OLAP_SUCCESS, s); std::string value_get; - s = _meta->get(META_COLUMN_FAMILY_INDEX, key, value_get); + s = _meta->get(META_COLUMN_FAMILY_INDEX, key, &value_get); ASSERT_EQ(OLAP_SUCCESS, s); ASSERT_EQ(value, value_get); // abnormal cases - s = _meta->get(META_COLUMN_FAMILY_INDEX, "key_not_exist", value_get); + s = _meta->get(META_COLUMN_FAMILY_INDEX, "key_not_exist", &value_get); ASSERT_EQ(OLAP_ERR_META_KEY_NOT_FOUND, s); } @@ -84,7 +84,7 @@ TEST_F(OlapMetaTest, TestRemove) { OLAPStatus s = _meta->put(META_COLUMN_FAMILY_INDEX, key, value); ASSERT_EQ(OLAP_SUCCESS, s); std::string value_get; - s = _meta->get(META_COLUMN_FAMILY_INDEX, key, value_get); + s = _meta->get(META_COLUMN_FAMILY_INDEX, key, &value_get); ASSERT_EQ(OLAP_SUCCESS, s); ASSERT_EQ(value, value_get); s = _meta->remove(META_COLUMN_FAMILY_INDEX, key); diff --git a/be/test/olap/olap_reader_test.cpp b/be/test/olap/olap_reader_test.cpp index 8a731b6fb16039..d7f9261b114b7f 100755 --- a/be/test/olap/olap_reader_test.cpp +++ b/be/test/olap/olap_reader_test.cpp @@ -115,14 +115,14 @@ void set_default_create_tablet_request(TCreateTabletReq* request) { // SQL for generate data(./be/test/olap/test_data/all_types_1000): // -// create table delete_test_row (k1 tinyint, k2 int, k3 varchar(64), +// create tablet delete_test_row (k1 tinyint, k2 int, k3 varchar(64), // k4 date, k5 datetime, k6 decimal(6,3), k7 smallint default "0", // k8 char(16) default "char", v bigint sum) engine=olap distributed by // random buckets 1 properties ("storage_type" = "row"); // // load label label1 (data infile // ("hdfs://host:port/dir") -// into table `delete_test_row` (k1,k2,v,k3,k4,k5,k6)); +// into tablet `delete_test_row` (k1,k2,v,k3,k4,k5,k6)); void set_default_push_request(TPushReq* request) { request->tablet_id = 10003; request->schema_hash = 1508825676; @@ -146,7 +146,7 @@ class TestOLAPReaderRow : public testing::Test { void TearDown() { // Remove all dir. - OLAPEngine::get_instance()->drop_table( + StorageEngine::get_instance()->drop_tablet( _create_tablet.tablet_id, _create_tablet.tablet_schema.schema_hash); while (0 == access(_tablet_path.c_str(), F_OK)) { sleep(1); @@ -155,22 +155,19 @@ class TestOLAPReaderRow : public testing::Test { } void init_olap() { - // Create local data dir for OLAPEngine. - config::storage_root_path = "./test_run/row_table"; + // Create local data dir for StorageEngine. + config::storage_root_path = "./test_run/row_tablet"; remove_all_dir(config::storage_root_path); ASSERT_EQ(create_dir(config::storage_root_path), OLAP_SUCCESS); - // Initialize all singleton object. - OLAPRootPath::get_instance()->reload_root_paths(config::storage_root_path.c_str()); - // 1. Prepare for query split key. // create base tablet OLAPStatus res = OLAP_SUCCESS; set_default_create_tablet_request(&_create_tablet); CommandExecutor command_executor = CommandExecutor(); - res = command_executor.create_table(_create_tablet); + res = command_executor.create_tablet(_create_tablet); ASSERT_EQ(OLAP_SUCCESS, res); - OLAPTablePtr tablet = command_executor.get_table( + TabletSharedPtr tablet = command_executor.get_tablet( _create_tablet.tablet_id, _create_tablet.tablet_schema.schema_hash); ASSERT_TRUE(tablet.get() != NULL); _tablet_path = tablet->tablet_path(); @@ -190,21 +187,21 @@ class TestOLAPReaderRow : public testing::Test { //ExecEnv* exec_env = new ExecEnv(); //_runtime_stat.init(fragment_id, query_options, "test", exec_env); - TDescriptorTable t_desc_table; - - // table descriptors - TTableDescriptor t_table_desc; - - t_table_desc.id = 0; - t_table_desc.tableType = TTableType::OLAP_TABLE; - t_table_desc.numCols = 0; - t_table_desc.numClusteringCols = 0; - t_table_desc.olapTable.tableName = ""; - t_table_desc.tableName = ""; - t_table_desc.dbName = ""; - t_table_desc.__isset.mysqlTable = true; - t_desc_table.tableDescriptors.push_back(t_table_desc); - t_desc_table.__isset.tableDescriptors = true; + TDescriptorTable t_desc_tablet; + + // tablet descriptors + TTableDescriptor t_tablet_desc; + + t_tablet_desc.id = 0; + t_tablet_desc.tableType = TTableType::OLAP_TABLE; + t_tablet_desc.numCols = 0; + t_tablet_desc.numClusteringCols = 0; + t_tablet_desc.olapTable.tableName = ""; + t_tablet_desc.tableName = ""; + t_tablet_desc.dbName = ""; + t_tablet_desc.__isset.mysqlTable = true; + t_desc_tablet.tableDescriptors.push_back(t_tablet_desc); + t_desc_tablet.__isset.tableDescriptors = true; // TSlotDescriptor int offset = 1; int i = 0; @@ -220,7 +217,7 @@ class TestOLAPReaderRow : public testing::Test { t_slot_desc.__set_slotIdx(i); t_slot_desc.__set_isMaterialized(true); t_slot_desc.__set_colName("k1"); - t_desc_table.slotDescriptors.push_back(t_slot_desc); + t_desc_tablet.slotDescriptors.push_back(t_slot_desc); offset += sizeof(int8_t); } ++i; @@ -236,7 +233,7 @@ class TestOLAPReaderRow : public testing::Test { t_slot_desc.__set_slotIdx(i); t_slot_desc.__set_isMaterialized(true); t_slot_desc.__set_colName("k2"); - t_desc_table.slotDescriptors.push_back(t_slot_desc); + t_desc_tablet.slotDescriptors.push_back(t_slot_desc); offset += sizeof(int32_t); } ++i; @@ -252,7 +249,7 @@ class TestOLAPReaderRow : public testing::Test { t_slot_desc.__set_slotIdx(i); t_slot_desc.__set_isMaterialized(true); t_slot_desc.__set_colName("k3"); - t_desc_table.slotDescriptors.push_back(t_slot_desc); + t_desc_tablet.slotDescriptors.push_back(t_slot_desc); offset += sizeof(StringValue); } ++i; @@ -268,7 +265,7 @@ class TestOLAPReaderRow : public testing::Test { t_slot_desc.__set_slotIdx(i); t_slot_desc.__set_isMaterialized(true); t_slot_desc.__set_colName("k4"); - t_desc_table.slotDescriptors.push_back(t_slot_desc); + t_desc_tablet.slotDescriptors.push_back(t_slot_desc); offset += sizeof(DateTimeValue); } ++i; @@ -284,7 +281,7 @@ class TestOLAPReaderRow : public testing::Test { t_slot_desc.__set_slotIdx(i); t_slot_desc.__set_isMaterialized(true); t_slot_desc.__set_colName("k5"); - t_desc_table.slotDescriptors.push_back(t_slot_desc); + t_desc_tablet.slotDescriptors.push_back(t_slot_desc); offset += sizeof(DateTimeValue); } ++i; @@ -300,7 +297,7 @@ class TestOLAPReaderRow : public testing::Test { t_slot_desc.__set_slotIdx(i); t_slot_desc.__set_isMaterialized(true); t_slot_desc.__set_colName("k6"); - t_desc_table.slotDescriptors.push_back(t_slot_desc); + t_desc_tablet.slotDescriptors.push_back(t_slot_desc); offset += sizeof(decimal12_t); } ++i; @@ -316,7 +313,7 @@ class TestOLAPReaderRow : public testing::Test { t_slot_desc.__set_slotIdx(i); t_slot_desc.__set_isMaterialized(true); t_slot_desc.__set_colName("k7"); - t_desc_table.slotDescriptors.push_back(t_slot_desc); + t_desc_tablet.slotDescriptors.push_back(t_slot_desc); offset += sizeof(int16_t); } ++i; @@ -332,7 +329,7 @@ class TestOLAPReaderRow : public testing::Test { t_slot_desc.__set_slotIdx(i); t_slot_desc.__set_isMaterialized(true); t_slot_desc.__set_colName("k8"); - t_desc_table.slotDescriptors.push_back(t_slot_desc); + t_desc_tablet.slotDescriptors.push_back(t_slot_desc); offset += sizeof(StringValue); } ++i; @@ -348,11 +345,11 @@ class TestOLAPReaderRow : public testing::Test { t_slot_desc.__set_slotIdx(i); t_slot_desc.__set_isMaterialized(true); t_slot_desc.__set_colName("v"); - t_desc_table.slotDescriptors.push_back(t_slot_desc); + t_desc_tablet.slotDescriptors.push_back(t_slot_desc); offset += sizeof(int64_t); } - t_desc_table.__isset.slotDescriptors = true; + t_desc_tablet.__isset.slotDescriptors = true; // TTupleDescriptor TTupleDescriptor t_tuple_desc; t_tuple_desc.id = 0; @@ -360,9 +357,9 @@ class TestOLAPReaderRow : public testing::Test { t_tuple_desc.numNullBytes = 1; t_tuple_desc.tableId = 0; t_tuple_desc.__isset.tableId = true; - t_desc_table.tupleDescriptors.push_back(t_tuple_desc); + t_desc_tablet.tupleDescriptors.push_back(t_tuple_desc); - DescriptorTbl::create(&_obj_pool, t_desc_table, &_desc_tbl); + DescriptorTbl::create(&_obj_pool, t_desc_tablet, &_desc_tbl); } void init_scan_node_k1_v() { @@ -372,21 +369,21 @@ class TestOLAPReaderRow : public testing::Test { //ExecEnv* exec_env = new ExecEnv(); //_runtime_stat.init(fragment_id, query_options, "test", exec_env); - TDescriptorTable t_desc_table; - - // table descriptors - TTableDescriptor t_table_desc; - - t_table_desc.id = 0; - t_table_desc.tableType = TTableType::OLAP_TABLE; - t_table_desc.numCols = 0; - t_table_desc.numClusteringCols = 0; - t_table_desc.olapTable.tableName = ""; - t_table_desc.tableName = ""; - t_table_desc.dbName = ""; - t_table_desc.__isset.mysqlTable = true; - t_desc_table.tableDescriptors.push_back(t_table_desc); - t_desc_table.__isset.tableDescriptors = true; + TDescriptorTable t_desc_tablet; + + // tablet descriptors + TTableDescriptor t_tablet_desc; + + t_tablet_desc.id = 0; + t_tablet_desc.tableType = TTableType::OLAP_TABLE; + t_tablet_desc.numCols = 0; + t_tablet_desc.numClusteringCols = 0; + t_tablet_desc.olapTable.tableName = ""; + t_tablet_desc.tableName = ""; + t_tablet_desc.dbName = ""; + t_tablet_desc.__isset.mysqlTable = true; + t_desc_tablet.tableDescriptors.push_back(t_tablet_desc); + t_desc_tablet.__isset.tableDescriptors = true; // TSlotDescriptor int offset = 1; int i = 0; @@ -402,7 +399,7 @@ class TestOLAPReaderRow : public testing::Test { t_slot_desc.__set_slotIdx(i); t_slot_desc.__set_isMaterialized(true); t_slot_desc.__set_colName("k1"); - t_desc_table.slotDescriptors.push_back(t_slot_desc); + t_desc_tablet.slotDescriptors.push_back(t_slot_desc); offset += sizeof(int8_t); } ++i; @@ -418,11 +415,11 @@ class TestOLAPReaderRow : public testing::Test { t_slot_desc.__set_slotIdx(i); t_slot_desc.__set_isMaterialized(true); t_slot_desc.__set_colName("v"); - t_desc_table.slotDescriptors.push_back(t_slot_desc); + t_desc_tablet.slotDescriptors.push_back(t_slot_desc); offset += sizeof(int64_t); } - t_desc_table.__isset.slotDescriptors = true; + t_desc_tablet.__isset.slotDescriptors = true; // TTupleDescriptor TTupleDescriptor t_tuple_desc; t_tuple_desc.id = 0; @@ -430,9 +427,9 @@ class TestOLAPReaderRow : public testing::Test { t_tuple_desc.numNullBytes = 1; t_tuple_desc.tableId = 0; t_tuple_desc.__isset.tableId = true; - t_desc_table.tupleDescriptors.push_back(t_tuple_desc); + t_desc_tablet.tupleDescriptors.push_back(t_tuple_desc); - DescriptorTbl::create(&_obj_pool, t_desc_table, &_desc_tbl); + DescriptorTbl::create(&_obj_pool, t_desc_tablet, &_desc_tbl); } private: @@ -681,7 +678,7 @@ class TestOLAPReaderColumn : public testing::Test { void TearDown() { // Remove all dir. - OLAPEngine::get_instance()->drop_table( + StorageEngine::get_instance()->drop_tablet( _create_tablet.tablet_id, _create_tablet.tablet_schema.schema_hash); while (0 == access(_tablet_path.c_str(), F_OK)) { sleep(1); @@ -690,23 +687,20 @@ class TestOLAPReaderColumn : public testing::Test { } void init_olap() { - // Create local data dir for OLAPEngine. - config::storage_root_path = "./test_run/column_table"; + // Create local data dir for StorageEngine. + config::storage_root_path = "./test_run/column_tablet"; remove_all_dir(config::storage_root_path); ASSERT_EQ(create_dir(config::storage_root_path), OLAP_SUCCESS); - // Initialize all singleton object. - OLAPRootPath::get_instance()->reload_root_paths(config::storage_root_path.c_str()); - // 1. Prepare for query split key. // create base tablet OLAPStatus res = OLAP_SUCCESS; set_default_create_tablet_request(&_create_tablet); _create_tablet.tablet_schema.storage_type = TStorageType::COLUMN; CommandExecutor command_executor = CommandExecutor(); - res = command_executor.create_table(_create_tablet); + res = command_executor.create_tablet(_create_tablet); ASSERT_EQ(OLAP_SUCCESS, res); - OLAPTablePtr tablet = command_executor.get_table( + TabletSharedPtr tablet = command_executor.get_tablet( _create_tablet.tablet_id, _create_tablet.tablet_schema.schema_hash); ASSERT_TRUE(tablet.get() != NULL); _tablet_path = tablet->tablet_path(); @@ -726,21 +720,21 @@ class TestOLAPReaderColumn : public testing::Test { //ExecEnv* exec_env = new ExecEnv(); //_runtime_stat.init(fragment_id, query_options, "test", exec_env); - TDescriptorTable t_desc_table; - - // table descriptors - TTableDescriptor t_table_desc; - - t_table_desc.id = 0; - t_table_desc.tableType = TTableType::OLAP_TABLE; - t_table_desc.numCols = 0; - t_table_desc.numClusteringCols = 0; - t_table_desc.olapTable.tableName = ""; - t_table_desc.tableName = ""; - t_table_desc.dbName = ""; - t_table_desc.__isset.mysqlTable = true; - t_desc_table.tableDescriptors.push_back(t_table_desc); - t_desc_table.__isset.tableDescriptors = true; + TDescriptorTable t_desc_tablet; + + // tablet descriptors + TTableDescriptor t_tablet_desc; + + t_tablet_desc.id = 0; + t_tablet_desc.tableType = TTableType::OLAP_TABLE; + t_tablet_desc.numCols = 0; + t_tablet_desc.numClusteringCols = 0; + t_tablet_desc.olapTable.tableName = ""; + t_tablet_desc.tableName = ""; + t_tablet_desc.dbName = ""; + t_tablet_desc.__isset.mysqlTable = true; + t_desc_tablet.tableDescriptors.push_back(t_tablet_desc); + t_desc_tablet.__isset.tableDescriptors = true; // TSlotDescriptor int offset = 1; int i = 0; @@ -756,7 +750,7 @@ class TestOLAPReaderColumn : public testing::Test { t_slot_desc.__set_slotIdx(i); t_slot_desc.__set_isMaterialized(true); t_slot_desc.__set_colName("k1"); - t_desc_table.slotDescriptors.push_back(t_slot_desc); + t_desc_tablet.slotDescriptors.push_back(t_slot_desc); offset += sizeof(int8_t); } ++i; @@ -772,7 +766,7 @@ class TestOLAPReaderColumn : public testing::Test { t_slot_desc.__set_slotIdx(i); t_slot_desc.__set_isMaterialized(true); t_slot_desc.__set_colName("k2"); - t_desc_table.slotDescriptors.push_back(t_slot_desc); + t_desc_tablet.slotDescriptors.push_back(t_slot_desc); offset += sizeof(int32_t); } ++i; @@ -788,7 +782,7 @@ class TestOLAPReaderColumn : public testing::Test { t_slot_desc.__set_slotIdx(i); t_slot_desc.__set_isMaterialized(true); t_slot_desc.__set_colName("k3"); - t_desc_table.slotDescriptors.push_back(t_slot_desc); + t_desc_tablet.slotDescriptors.push_back(t_slot_desc); offset += sizeof(StringValue); } ++i; @@ -804,7 +798,7 @@ class TestOLAPReaderColumn : public testing::Test { t_slot_desc.__set_slotIdx(i); t_slot_desc.__set_isMaterialized(true); t_slot_desc.__set_colName("k4"); - t_desc_table.slotDescriptors.push_back(t_slot_desc); + t_desc_tablet.slotDescriptors.push_back(t_slot_desc); offset += sizeof(DateTimeValue); } ++i; @@ -820,7 +814,7 @@ class TestOLAPReaderColumn : public testing::Test { t_slot_desc.__set_slotIdx(i); t_slot_desc.__set_isMaterialized(true); t_slot_desc.__set_colName("k5"); - t_desc_table.slotDescriptors.push_back(t_slot_desc); + t_desc_tablet.slotDescriptors.push_back(t_slot_desc); offset += sizeof(DateTimeValue); } ++i; @@ -836,7 +830,7 @@ class TestOLAPReaderColumn : public testing::Test { t_slot_desc.__set_slotIdx(i); t_slot_desc.__set_isMaterialized(true); t_slot_desc.__set_colName("k6"); - t_desc_table.slotDescriptors.push_back(t_slot_desc); + t_desc_tablet.slotDescriptors.push_back(t_slot_desc); offset += sizeof(decimal12_t); } ++i; @@ -852,7 +846,7 @@ class TestOLAPReaderColumn : public testing::Test { t_slot_desc.__set_slotIdx(i); t_slot_desc.__set_isMaterialized(true); t_slot_desc.__set_colName("k7"); - t_desc_table.slotDescriptors.push_back(t_slot_desc); + t_desc_tablet.slotDescriptors.push_back(t_slot_desc); offset += sizeof(int16_t); } ++i; @@ -868,7 +862,7 @@ class TestOLAPReaderColumn : public testing::Test { t_slot_desc.__set_slotIdx(i); t_slot_desc.__set_isMaterialized(true); t_slot_desc.__set_colName("k8"); - t_desc_table.slotDescriptors.push_back(t_slot_desc); + t_desc_tablet.slotDescriptors.push_back(t_slot_desc); offset += sizeof(StringValue); } ++i; @@ -884,11 +878,11 @@ class TestOLAPReaderColumn : public testing::Test { t_slot_desc.__set_slotIdx(i); t_slot_desc.__set_isMaterialized(true); t_slot_desc.__set_colName("v"); - t_desc_table.slotDescriptors.push_back(t_slot_desc); + t_desc_tablet.slotDescriptors.push_back(t_slot_desc); offset += sizeof(int64_t); } - t_desc_table.__isset.slotDescriptors = true; + t_desc_tablet.__isset.slotDescriptors = true; // TTupleDescriptor TTupleDescriptor t_tuple_desc; t_tuple_desc.id = 0; @@ -896,9 +890,9 @@ class TestOLAPReaderColumn : public testing::Test { t_tuple_desc.numNullBytes = 1; t_tuple_desc.tableId = 0; t_tuple_desc.__isset.tableId = true; - t_desc_table.tupleDescriptors.push_back(t_tuple_desc); + t_desc_tablet.tupleDescriptors.push_back(t_tuple_desc); - DescriptorTbl::create(&_obj_pool, t_desc_table, &_desc_tbl); + DescriptorTbl::create(&_obj_pool, t_desc_tablet, &_desc_tbl); } @@ -909,21 +903,21 @@ class TestOLAPReaderColumn : public testing::Test { //ExecEnv* exec_env = new ExecEnv(); //_runtime_stat.init(fragment_id, query_options, "test", exec_env); - TDescriptorTable t_desc_table; - - // table descriptors - TTableDescriptor t_table_desc; - - t_table_desc.id = 0; - t_table_desc.tableType = TTableType::OLAP_TABLE; - t_table_desc.numCols = 0; - t_table_desc.numClusteringCols = 0; - t_table_desc.olapTable.tableName = ""; - t_table_desc.tableName = ""; - t_table_desc.dbName = ""; - t_table_desc.__isset.mysqlTable = true; - t_desc_table.tableDescriptors.push_back(t_table_desc); - t_desc_table.__isset.tableDescriptors = true; + TDescriptorTable t_desc_tablet; + + // tablet descriptors + TTableDescriptor t_tablet_desc; + + t_tablet_desc.id = 0; + t_tablet_desc.tableType = TTableType::OLAP_TABLE; + t_tablet_desc.numCols = 0; + t_tablet_desc.numClusteringCols = 0; + t_tablet_desc.olapTable.tableName = ""; + t_tablet_desc.tableName = ""; + t_tablet_desc.dbName = ""; + t_tablet_desc.__isset.mysqlTable = true; + t_desc_tablet.tableDescriptors.push_back(t_tablet_desc); + t_desc_tablet.__isset.tableDescriptors = true; // TSlotDescriptor int offset = 1; int i = 0; @@ -939,7 +933,7 @@ class TestOLAPReaderColumn : public testing::Test { t_slot_desc.__set_slotIdx(i); t_slot_desc.__set_isMaterialized(true); t_slot_desc.__set_colName("k1"); - t_desc_table.slotDescriptors.push_back(t_slot_desc); + t_desc_tablet.slotDescriptors.push_back(t_slot_desc); offset += sizeof(int8_t); } ++i; @@ -955,11 +949,11 @@ class TestOLAPReaderColumn : public testing::Test { t_slot_desc.__set_slotIdx(i); t_slot_desc.__set_isMaterialized(true); t_slot_desc.__set_colName("v"); - t_desc_table.slotDescriptors.push_back(t_slot_desc); + t_desc_tablet.slotDescriptors.push_back(t_slot_desc); offset += sizeof(int64_t); } - t_desc_table.__isset.slotDescriptors = true; + t_desc_tablet.__isset.slotDescriptors = true; // TTupleDescriptor TTupleDescriptor t_tuple_desc; t_tuple_desc.id = 0; @@ -967,9 +961,9 @@ class TestOLAPReaderColumn : public testing::Test { t_tuple_desc.numNullBytes = 1; t_tuple_desc.tableId = 0; t_tuple_desc.__isset.tableId = true; - t_desc_table.tupleDescriptors.push_back(t_tuple_desc); + t_desc_tablet.tupleDescriptors.push_back(t_tuple_desc); - DescriptorTbl::create(&_obj_pool, t_desc_table, &_desc_tbl); + DescriptorTbl::create(&_obj_pool, t_desc_tablet, &_desc_tbl); } private: @@ -1165,7 +1159,7 @@ class TestOLAPReaderColumnDeleteCondition : public testing::Test { void TearDown() { // Remove all dir. - OLAPEngine::get_instance()->drop_table( + StorageEngine::get_instance()->drop_tablet( _create_tablet.tablet_id, _create_tablet.tablet_schema.schema_hash); while (0 == access(_tablet_path.c_str(), F_OK)) { sleep(1); @@ -1174,22 +1168,19 @@ class TestOLAPReaderColumnDeleteCondition : public testing::Test { } void init_olap() { - // Create local data dir for OLAPEngine. - config::storage_root_path = "./test_run/row_table"; + // Create local data dir for StorageEngine. + config::storage_root_path = "./test_run/row_tablet"; remove_all_dir(config::storage_root_path); ASSERT_EQ(create_dir(config::storage_root_path), OLAP_SUCCESS); - // Initialize all singleton object. - OLAPRootPath::get_instance()->reload_root_paths(config::storage_root_path.c_str()); - // 1. Prepare for query split key. // create base tablet OLAPStatus res = OLAP_SUCCESS; set_default_create_tablet_request(&_create_tablet); CommandExecutor command_executor = CommandExecutor(); - res = command_executor.create_table(_create_tablet); + res = command_executor.create_tablet(_create_tablet); ASSERT_EQ(OLAP_SUCCESS, res); - OLAPTablePtr tablet = command_executor.get_table( + TabletSharedPtr tablet = command_executor.get_tablet( _create_tablet.tablet_id, _create_tablet.tablet_schema.schema_hash); ASSERT_TRUE(tablet.get() != NULL); _tablet_path = tablet->tablet_path(); @@ -1226,21 +1217,21 @@ class TestOLAPReaderColumnDeleteCondition : public testing::Test { //ExecEnv* exec_env = new ExecEnv(); //_runtime_stat.init(fragment_id, query_options, "test", exec_env); - TDescriptorTable t_desc_table; - - // table descriptors - TTableDescriptor t_table_desc; - - t_table_desc.id = 0; - t_table_desc.tableType = TTableType::OLAP_TABLE; - t_table_desc.numCols = 0; - t_table_desc.numClusteringCols = 0; - t_table_desc.olapTable.tableName = ""; - t_table_desc.tableName = ""; - t_table_desc.dbName = ""; - t_table_desc.__isset.mysqlTable = true; - t_desc_table.tableDescriptors.push_back(t_table_desc); - t_desc_table.__isset.tableDescriptors = true; + TDescriptorTable t_desc_tablet; + + // tablet descriptors + TTableDescriptor t_tablet_desc; + + t_tablet_desc.id = 0; + t_tablet_desc.tableType = TTableType::OLAP_TABLE; + t_tablet_desc.numCols = 0; + t_tablet_desc.numClusteringCols = 0; + t_tablet_desc.olapTable.tableName = ""; + t_tablet_desc.tableName = ""; + t_tablet_desc.dbName = ""; + t_tablet_desc.__isset.mysqlTable = true; + t_desc_tablet.tableDescriptors.push_back(t_tablet_desc); + t_desc_tablet.__isset.tableDescriptors = true; // TSlotDescriptor int offset = 1; int i = 0; @@ -1256,7 +1247,7 @@ class TestOLAPReaderColumnDeleteCondition : public testing::Test { t_slot_desc.__set_slotIdx(i); t_slot_desc.__set_isMaterialized(true); t_slot_desc.__set_colName("k1"); - t_desc_table.slotDescriptors.push_back(t_slot_desc); + t_desc_tablet.slotDescriptors.push_back(t_slot_desc); offset += sizeof(int8_t); } ++i; @@ -1272,7 +1263,7 @@ class TestOLAPReaderColumnDeleteCondition : public testing::Test { t_slot_desc.__set_slotIdx(i); t_slot_desc.__set_isMaterialized(true); t_slot_desc.__set_colName("k2"); - t_desc_table.slotDescriptors.push_back(t_slot_desc); + t_desc_tablet.slotDescriptors.push_back(t_slot_desc); offset += sizeof(int32_t); } ++i; @@ -1288,7 +1279,7 @@ class TestOLAPReaderColumnDeleteCondition : public testing::Test { t_slot_desc.__set_slotIdx(i); t_slot_desc.__set_isMaterialized(true); t_slot_desc.__set_colName("k3"); - t_desc_table.slotDescriptors.push_back(t_slot_desc); + t_desc_tablet.slotDescriptors.push_back(t_slot_desc); offset += sizeof(StringValue); } ++i; @@ -1304,7 +1295,7 @@ class TestOLAPReaderColumnDeleteCondition : public testing::Test { t_slot_desc.__set_slotIdx(i); t_slot_desc.__set_isMaterialized(true); t_slot_desc.__set_colName("k4"); - t_desc_table.slotDescriptors.push_back(t_slot_desc); + t_desc_tablet.slotDescriptors.push_back(t_slot_desc); offset += sizeof(DateTimeValue); } ++i; @@ -1320,7 +1311,7 @@ class TestOLAPReaderColumnDeleteCondition : public testing::Test { t_slot_desc.__set_slotIdx(i); t_slot_desc.__set_isMaterialized(true); t_slot_desc.__set_colName("k5"); - t_desc_table.slotDescriptors.push_back(t_slot_desc); + t_desc_tablet.slotDescriptors.push_back(t_slot_desc); offset += sizeof(DateTimeValue); } ++i; @@ -1336,7 +1327,7 @@ class TestOLAPReaderColumnDeleteCondition : public testing::Test { t_slot_desc.__set_slotIdx(i); t_slot_desc.__set_isMaterialized(true); t_slot_desc.__set_colName("k6"); - t_desc_table.slotDescriptors.push_back(t_slot_desc); + t_desc_tablet.slotDescriptors.push_back(t_slot_desc); offset += sizeof(decimal12_t); } ++i; @@ -1352,7 +1343,7 @@ class TestOLAPReaderColumnDeleteCondition : public testing::Test { t_slot_desc.__set_slotIdx(i); t_slot_desc.__set_isMaterialized(true); t_slot_desc.__set_colName("k7"); - t_desc_table.slotDescriptors.push_back(t_slot_desc); + t_desc_tablet.slotDescriptors.push_back(t_slot_desc); offset += sizeof(int16_t); } ++i; @@ -1368,7 +1359,7 @@ class TestOLAPReaderColumnDeleteCondition : public testing::Test { t_slot_desc.__set_slotIdx(i); t_slot_desc.__set_isMaterialized(true); t_slot_desc.__set_colName("k8"); - t_desc_table.slotDescriptors.push_back(t_slot_desc); + t_desc_tablet.slotDescriptors.push_back(t_slot_desc); offset += sizeof(StringValue); } ++i; @@ -1384,11 +1375,11 @@ class TestOLAPReaderColumnDeleteCondition : public testing::Test { t_slot_desc.__set_slotIdx(i); t_slot_desc.__set_isMaterialized(true); t_slot_desc.__set_colName("v"); - t_desc_table.slotDescriptors.push_back(t_slot_desc); + t_desc_tablet.slotDescriptors.push_back(t_slot_desc); offset += sizeof(int64_t); } - t_desc_table.__isset.slotDescriptors = true; + t_desc_tablet.__isset.slotDescriptors = true; // TTupleDescriptor TTupleDescriptor t_tuple_desc; t_tuple_desc.id = 0; @@ -1396,9 +1387,9 @@ class TestOLAPReaderColumnDeleteCondition : public testing::Test { t_tuple_desc.numNullBytes = 1; t_tuple_desc.tableId = 0; t_tuple_desc.__isset.tableId = true; - t_desc_table.tupleDescriptors.push_back(t_tuple_desc); + t_desc_tablet.tupleDescriptors.push_back(t_tuple_desc); - DescriptorTbl::create(&_obj_pool, t_desc_table, &_desc_tbl); + DescriptorTbl::create(&_obj_pool, t_desc_tablet, &_desc_tbl); } @@ -1409,21 +1400,21 @@ class TestOLAPReaderColumnDeleteCondition : public testing::Test { //ExecEnv* exec_env = new ExecEnv(); //_runtime_stat.init(fragment_id, query_options, "test", exec_env); - TDescriptorTable t_desc_table; - - // table descriptors - TTableDescriptor t_table_desc; - - t_table_desc.id = 0; - t_table_desc.tableType = TTableType::OLAP_TABLE; - t_table_desc.numCols = 0; - t_table_desc.numClusteringCols = 0; - t_table_desc.olapTable.tableName = ""; - t_table_desc.tableName = ""; - t_table_desc.dbName = ""; - t_table_desc.__isset.mysqlTable = true; - t_desc_table.tableDescriptors.push_back(t_table_desc); - t_desc_table.__isset.tableDescriptors = true; + TDescriptorTable t_desc_tablet; + + // tablet descriptors + TTableDescriptor t_tablet_desc; + + t_tablet_desc.id = 0; + t_tablet_desc.tableType = TTableType::OLAP_TABLE; + t_tablet_desc.numCols = 0; + t_tablet_desc.numClusteringCols = 0; + t_tablet_desc.olapTable.tableName = ""; + t_tablet_desc.tableName = ""; + t_tablet_desc.dbName = ""; + t_tablet_desc.__isset.mysqlTable = true; + t_desc_tablet.tableDescriptors.push_back(t_tablet_desc); + t_desc_tablet.__isset.tableDescriptors = true; // TSlotDescriptor int offset = 1; int i = 0; @@ -1439,7 +1430,7 @@ class TestOLAPReaderColumnDeleteCondition : public testing::Test { t_slot_desc.__set_slotIdx(i); t_slot_desc.__set_isMaterialized(true); t_slot_desc.__set_colName("k1"); - t_desc_table.slotDescriptors.push_back(t_slot_desc); + t_desc_tablet.slotDescriptors.push_back(t_slot_desc); offset += sizeof(int8_t); } ++i; @@ -1455,11 +1446,11 @@ class TestOLAPReaderColumnDeleteCondition : public testing::Test { t_slot_desc.__set_slotIdx(i); t_slot_desc.__set_isMaterialized(true); t_slot_desc.__set_colName("v"); - t_desc_table.slotDescriptors.push_back(t_slot_desc); + t_desc_tablet.slotDescriptors.push_back(t_slot_desc); offset += sizeof(int64_t); } - t_desc_table.__isset.slotDescriptors = true; + t_desc_tablet.__isset.slotDescriptors = true; // TTupleDescriptor TTupleDescriptor t_tuple_desc; t_tuple_desc.id = 0; @@ -1467,9 +1458,9 @@ class TestOLAPReaderColumnDeleteCondition : public testing::Test { t_tuple_desc.numNullBytes = 1; t_tuple_desc.tableId = 0; t_tuple_desc.__isset.tableId = true; - t_desc_table.tupleDescriptors.push_back(t_tuple_desc); + t_desc_tablet.tupleDescriptors.push_back(t_tuple_desc); - DescriptorTbl::create(&_obj_pool, t_desc_table, &_desc_tbl); + DescriptorTbl::create(&_obj_pool, t_desc_tablet, &_desc_tbl); } private: diff --git a/be/test/olap/olap_snapshot_converter_test.cpp b/be/test/olap/olap_snapshot_converter_test.cpp new file mode 100755 index 00000000000000..e6db1f6a3d1319 --- /dev/null +++ b/be/test/olap/olap_snapshot_converter_test.cpp @@ -0,0 +1,242 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +#include +#include +#include +#include + +#include "gtest/gtest.h" +#include "gmock/gmock.h" +#include "olap/lru_cache.h" +#include "olap/olap_meta.h" +#include "olap/olap_snapshot_converter.h" +#include "olap/rowset/rowset_meta_manager.h" +#include "olap/rowset/alpha_rowset.h" +#include "olap/rowset/alpha_rowset_meta.h" +#include "olap/txn_manager.h" +#include "olap/new_status.h" +#include +#include "boost/filesystem.hpp" +#include "json2pb/json_to_pb.h" + +#ifndef BE_TEST +#define BE_TEST +#endif + +using ::testing::_; +using ::testing::Return; +using ::testing::SetArgPointee; +using std::string; + +namespace doris { + +class OlapSnapshotConverterTest : public testing::Test { +public: + virtual void SetUp() { + auto cache = new_lru_cache(config::file_descriptor_cache_capacity); + FileHandler::set_fd_cache(cache); + string test_engine_data_path = "./be/test/olap/test_data/converter_test_data/data"; + _engine_data_path = "./be/test/olap/test_data/converter_test_data/tmp"; + boost::filesystem::remove_all(_engine_data_path); + create_dirs(_engine_data_path); + _data_dir = new DataDir(_engine_data_path, 1000000000); + _data_dir->init(); + _meta_path = "./meta"; + string tmp_data_path = _engine_data_path + "/data"; + if (boost::filesystem::exists(tmp_data_path)) { + boost::filesystem::remove_all(tmp_data_path); + } + copy_dir(test_engine_data_path, tmp_data_path); + _tablet_id = 15007; + _schema_hash = 368169781; + _tablet_data_path = tmp_data_path + + "/" + std::to_string(0) + + "/" + std::to_string(_tablet_id) + + "/" + std::to_string(_schema_hash); + if (boost::filesystem::exists(_meta_path)) { + boost::filesystem::remove_all(_meta_path); + } + ASSERT_TRUE(boost::filesystem::create_directory(_meta_path)); + ASSERT_TRUE(boost::filesystem::exists(_meta_path)); + _meta = new(std::nothrow) OlapMeta(_meta_path); + ASSERT_NE(nullptr, _meta); + OLAPStatus st = _meta->init(); + ASSERT_TRUE(st == OLAP_SUCCESS); + } + + virtual void TearDown() { + delete _meta; + delete _data_dir; + if (boost::filesystem::exists(_meta_path)) { + ASSERT_TRUE(boost::filesystem::remove_all(_meta_path)); + } + if (boost::filesystem::exists(_engine_data_path)) { + ASSERT_TRUE(boost::filesystem::remove_all(_engine_data_path)); + } + } + +private: + DataDir* _data_dir; + OlapMeta* _meta; + std::string _json_rowset_meta; + TxnManager _txn_mgr; + std::string _engine_data_path; + std::string _meta_path; + int64_t _tablet_id; + int32_t _schema_hash; + string _tablet_data_path; +}; + +TEST_F(OlapSnapshotConverterTest, ToNewAndToOldSnapshot) { + // --- start to convert old snapshot to new snapshot + string header_file_path = _tablet_data_path + "/" + "olap_header.json"; + std::ifstream infile(header_file_path); + string buffer; + std::string json_header; + while (getline(infile, buffer)) { + json_header = json_header + buffer; + } + boost::algorithm::trim(json_header); + OLAPHeaderMessage header_msg; + bool ret = json2pb::JsonToProtoMessage(json_header, &header_msg); + ASSERT_TRUE(ret); + OlapSnapshotConverter converter; + TabletMetaPB tablet_meta_pb; + vector pending_rowsets; + OLAPStatus status = converter.to_new_snapshot(header_msg, _tablet_data_path, _tablet_data_path, + *_data_dir, &tablet_meta_pb, &pending_rowsets, true); + ASSERT_TRUE(status == OLAP_SUCCESS); + + TabletSchema tablet_schema; + tablet_schema.init_from_pb(tablet_meta_pb.schema()); + string data_path_prefix = _data_dir->get_absolute_tablet_path(&tablet_meta_pb, true); + // check converted new tabletmeta pb and its files + // check visible delta + ASSERT_TRUE(tablet_meta_pb.rs_metas().size() == header_msg.delta().size()); + for (auto& pdelta : header_msg.delta()) { + int64_t start_version = pdelta.start_version(); + int64_t end_version = pdelta.end_version(); + int64_t version_hash = pdelta.version_hash(); + bool found = false; + for (auto& visible_rowset : tablet_meta_pb.rs_metas()) { + if (visible_rowset.start_version() == start_version + && visible_rowset.end_version() == end_version + && visible_rowset.version_hash() == version_hash) { + found = true; + } + } + ASSERT_TRUE(found); + } + for (auto& visible_rowset : tablet_meta_pb.rs_metas()) { + RowsetMetaSharedPtr alpha_rowset_meta(new AlphaRowsetMeta()); + alpha_rowset_meta->init_from_pb(visible_rowset); + AlphaRowset rowset(&tablet_schema, data_path_prefix, _data_dir, alpha_rowset_meta); + ASSERT_TRUE(rowset.init() == OLAP_SUCCESS); + ASSERT_TRUE(rowset.load() == OLAP_SUCCESS); + std::vector old_files; + rowset.remove_old_files(&old_files); + } + // check incremental delta + ASSERT_TRUE(tablet_meta_pb.inc_rs_metas().size() == header_msg.incremental_delta().size()); + for (auto& pdelta : header_msg.incremental_delta()) { + int64_t start_version = pdelta.start_version(); + int64_t end_version = pdelta.end_version(); + int64_t version_hash = pdelta.version_hash(); + bool found = false; + for (auto& inc_rowset : tablet_meta_pb.inc_rs_metas()) { + if (inc_rowset.start_version() == start_version + && inc_rowset.end_version() == end_version + && inc_rowset.version_hash() == version_hash) { + found = true; + } + } + ASSERT_TRUE(found); + } + for (auto& inc_rowset : tablet_meta_pb.inc_rs_metas()) { + RowsetMetaSharedPtr alpha_rowset_meta(new AlphaRowsetMeta()); + alpha_rowset_meta->init_from_pb(inc_rowset); + AlphaRowset rowset(&tablet_schema, data_path_prefix, _data_dir, alpha_rowset_meta); + ASSERT_TRUE(rowset.init() == OLAP_SUCCESS); + ASSERT_TRUE(rowset.load() == OLAP_SUCCESS); + AlphaRowset tmp_rowset(&tablet_schema, data_path_prefix + "/incremental_delta", + _data_dir, alpha_rowset_meta); + ASSERT_TRUE(tmp_rowset.init() == OLAP_SUCCESS); + std::vector old_files; + tmp_rowset.remove_old_files(&old_files); + } + // check pending delta + ASSERT_TRUE(pending_rowsets.size() == header_msg.pending_delta().size()); + for (auto& pdelta : header_msg.pending_delta()) { + int64_t partition_id = pdelta.partition_id(); + int64_t transaction_id = pdelta.transaction_id(); + bool found = false; + for (auto& pending_rowset : pending_rowsets) { + if (pending_rowset.partition_id() == partition_id + && pending_rowset.txn_id() == transaction_id + && pending_rowset.tablet_uid().hi() == tablet_meta_pb.tablet_uid().hi() + && pending_rowset.tablet_uid().lo() == tablet_meta_pb.tablet_uid().lo()) { + found = true; + } + } + ASSERT_TRUE(found); + } + for (auto& pending_rowset : pending_rowsets) { + RowsetMetaSharedPtr alpha_rowset_meta(new AlphaRowsetMeta()); + alpha_rowset_meta->init_from_pb(pending_rowset); + AlphaRowset rowset(&tablet_schema, data_path_prefix, _data_dir, alpha_rowset_meta); + ASSERT_TRUE(rowset.init() == OLAP_SUCCESS); + ASSERT_TRUE(rowset.load() == OLAP_SUCCESS); + std::vector old_files; + rowset.remove_old_files(&old_files); + } + + // old files are removed, then convert new snapshot to old snapshot + OLAPHeaderMessage old_header_msg; + status = converter.to_old_snapshot(tablet_meta_pb, _tablet_data_path, + _tablet_data_path, &old_header_msg); + ASSERT_TRUE(status == OLAP_SUCCESS); + for (auto& pdelta : header_msg.delta()) { + bool found = false; + for (auto& converted_pdelta : old_header_msg.delta()) { + if (converted_pdelta.start_version() == pdelta.start_version() + && converted_pdelta.end_version() == pdelta.end_version() + && converted_pdelta.version_hash() == pdelta.version_hash()) { + found = true; + } + } + ASSERT_TRUE(found); + } + for (auto& pdelta : header_msg.incremental_delta()) { + bool found = false; + for (auto& converted_pdelta : old_header_msg.incremental_delta()) { + if (converted_pdelta.start_version() == pdelta.start_version() + && converted_pdelta.end_version() == pdelta.end_version() + && converted_pdelta.version_hash() == pdelta.version_hash()) { + found = true; + } + } + ASSERT_TRUE(found); + } +} + +} // namespace doris + +int main(int argc, char **argv) { + ::testing::InitGoogleTest(&argc, argv); + return RUN_ALL_TESTS(); +} diff --git a/be/test/olap/row_block_test.cpp b/be/test/olap/row_block_test.cpp index 833ca65a1f404a..785bc1f16c6367 100644 --- a/be/test/olap/row_block_test.cpp +++ b/be/test/olap/row_block_test.cpp @@ -22,7 +22,7 @@ #include "exprs/expr.h" #include "exprs/expr_context.h" #include "olap/row_block.h" -#include "olap/olap_table.h" +#include "olap/tablet.h" #include "runtime/runtime_state.h" #include "util/debug_util.h" #include "util/logging.h" @@ -45,46 +45,51 @@ class TestRowBlock : public testing::Test { } }; -TEST_F(TestRowBlock, init) { - std::vector fields; +void init_tablet_schema(TabletSchema* tablet_schema) { + TabletSchemaPB tablet_schema_pb; { // k1: bigint { - FieldInfo info; - info.name = "k1"; - info.type = OLAP_FIELD_TYPE_BIGINT; - info.aggregation = OLAP_FIELD_AGGREGATION_NONE; - info.length = 8; - info.is_key = true; - fields.push_back(info); + ColumnPB* column_1 = tablet_schema_pb.add_column(); + column_1->set_unique_id(1); + column_1->set_name("k1"); + column_1->set_type("BIGINT"); + column_1->set_is_key(true); + column_1->set_length(8); + column_1->set_aggregation("NONE"); } // k2: char { - FieldInfo info; - info.name = "k2"; - info.type = OLAP_FIELD_TYPE_CHAR; - info.aggregation = OLAP_FIELD_AGGREGATION_NONE; - info.length = 10; - info.is_key = true; - fields.push_back(info); + ColumnPB* column_2 = tablet_schema_pb.add_column(); + column_2->set_unique_id(2); + column_2->set_name("k2"); + column_2->set_type("CHAR"); + column_2->set_is_key(true); + column_2->set_length(10); + column_2->set_aggregation("NONE"); } // k3: varchar { - FieldInfo info; - info.name = "k3"; - info.type = OLAP_FIELD_TYPE_VARCHAR; - info.aggregation = OLAP_FIELD_AGGREGATION_NONE; - info.length = 20; - info.is_key = true; - fields.push_back(info); + ColumnPB* column_3 = tablet_schema_pb.add_column(); + column_3->set_unique_id(3); + column_3->set_name("k3"); + column_3->set_type("VARCHAR"); + column_3->set_is_key(true); + column_3->set_length(20); + column_3->set_aggregation("NONE"); } } + tablet_schema->init_from_pb(tablet_schema_pb); +} + +TEST_F(TestRowBlock, init) { + TabletSchema tablet_schema; + init_tablet_schema(&tablet_schema); { // has nullbyte - RowBlock block(fields); + RowBlock block(&tablet_schema); RowBlockInfo block_info; block_info.row_num = 1024; - block_info.data_file_type = COLUMN_ORIENTED_FILE; block_info.null_supported = true; auto res = block.init(block_info); ASSERT_EQ(OLAP_SUCCESS, res); @@ -92,20 +97,18 @@ TEST_F(TestRowBlock, init) { } { // has nullbyte - RowBlock block(fields); + RowBlock block(&tablet_schema); RowBlockInfo block_info; block_info.row_num = 1024; - block_info.data_file_type = COLUMN_ORIENTED_FILE; block_info.null_supported = false; auto res = block.init(block_info); ASSERT_EQ(OLAP_SUCCESS, res); ASSERT_EQ(9 + 17 + 17, block._mem_row_bytes); } { - RowBlock block(fields); + RowBlock block(&tablet_schema); RowBlockInfo block_info; block_info.row_num = 1024; - block_info.data_file_type = COLUMN_ORIENTED_FILE; block_info.null_supported = true; block_info.column_ids.push_back(1); auto res = block.init(block_info); @@ -119,50 +122,17 @@ TEST_F(TestRowBlock, init) { } TEST_F(TestRowBlock, write_and_read) { - std::vector fields; - { - // k1: bigint - { - FieldInfo info; - info.name = "k1"; - info.type = OLAP_FIELD_TYPE_BIGINT; - info.aggregation = OLAP_FIELD_AGGREGATION_NONE; - info.length = 8; - info.is_key = true; - fields.push_back(info); - } - // k2: char - { - FieldInfo info; - info.name = "k2"; - info.type = OLAP_FIELD_TYPE_CHAR; - info.aggregation = OLAP_FIELD_AGGREGATION_NONE; - info.length = 10; - info.is_key = true; - fields.push_back(info); - } - // k3: varchar - { - FieldInfo info; - info.name = "k3"; - info.type = OLAP_FIELD_TYPE_VARCHAR; - info.aggregation = OLAP_FIELD_AGGREGATION_NONE; - info.length = 20; - info.is_key = true; - fields.push_back(info); - } - } - // - RowBlock block(fields); + TabletSchema tablet_schema; + init_tablet_schema(&tablet_schema); + RowBlock block(&tablet_schema); RowBlockInfo block_info; block_info.row_num = 1024; - block_info.data_file_type = COLUMN_ORIENTED_FILE; block_info.null_supported = true; auto res = block.init(block_info); ASSERT_EQ(OLAP_SUCCESS, res); RowCursor row; - row.init(fields); + row.init(tablet_schema); for (int i = 0; i < 5; ++i) { block.get_row(i, &row); @@ -194,50 +164,17 @@ TEST_F(TestRowBlock, write_and_read) { } TEST_F(TestRowBlock, write_and_read_without_nullbyte) { - std::vector fields; - { - // k1: bigint - { - FieldInfo info; - info.name = "k1"; - info.type = OLAP_FIELD_TYPE_BIGINT; - info.aggregation = OLAP_FIELD_AGGREGATION_NONE; - info.length = 8; - info.is_key = true; - fields.push_back(info); - } - // k2: char - { - FieldInfo info; - info.name = "k2"; - info.type = OLAP_FIELD_TYPE_CHAR; - info.aggregation = OLAP_FIELD_AGGREGATION_NONE; - info.length = 10; - info.is_key = true; - fields.push_back(info); - } - // k3: varchar - { - FieldInfo info; - info.name = "k3"; - info.type = OLAP_FIELD_TYPE_VARCHAR; - info.aggregation = OLAP_FIELD_AGGREGATION_NONE; - info.length = 20; - info.is_key = true; - fields.push_back(info); - } - } - // - RowBlock block(fields); + TabletSchema tablet_schema; + init_tablet_schema(&tablet_schema); + RowBlock block(&tablet_schema); RowBlockInfo block_info; block_info.row_num = 1024; - block_info.data_file_type = COLUMN_ORIENTED_FILE; block_info.null_supported = false; auto res = block.init(block_info); ASSERT_EQ(OLAP_SUCCESS, res); RowCursor row; - row.init(fields); + row.init(tablet_schema); for (int i = 0; i < 5; ++i) { block.get_row(i, &row); @@ -269,50 +206,17 @@ TEST_F(TestRowBlock, write_and_read_without_nullbyte) { } TEST_F(TestRowBlock, compress_failed) { - std::vector fields; - { - // k1: bigint - { - FieldInfo info; - info.name = "k1"; - info.type = OLAP_FIELD_TYPE_BIGINT; - info.aggregation = OLAP_FIELD_AGGREGATION_NONE; - info.length = 8; - info.is_key = true; - fields.push_back(info); - } - // k2: char - { - FieldInfo info; - info.name = "k2"; - info.type = OLAP_FIELD_TYPE_CHAR; - info.aggregation = OLAP_FIELD_AGGREGATION_NONE; - info.length = 10; - info.is_key = true; - fields.push_back(info); - } - // k3: varchar - { - FieldInfo info; - info.name = "k3"; - info.type = OLAP_FIELD_TYPE_VARCHAR; - info.aggregation = OLAP_FIELD_AGGREGATION_NONE; - info.length = 20; - info.is_key = true; - fields.push_back(info); - } - } - // - RowBlock block(fields); + TabletSchema tablet_schema; + init_tablet_schema(&tablet_schema); + RowBlock block(&tablet_schema); RowBlockInfo block_info; block_info.row_num = 1024; - block_info.data_file_type = COLUMN_ORIENTED_FILE; block_info.null_supported = true; auto res = block.init(block_info); ASSERT_EQ(OLAP_SUCCESS, res); RowCursor row; - row.init(fields); + row.init(tablet_schema); for (int i = 0; i < 5; ++i) { block.get_row(i, &row); @@ -341,50 +245,17 @@ TEST_F(TestRowBlock, compress_failed) { } TEST_F(TestRowBlock, decompress_failed) { - std::vector fields; - { - // k1: bigint - { - FieldInfo info; - info.name = "k1"; - info.type = OLAP_FIELD_TYPE_BIGINT; - info.aggregation = OLAP_FIELD_AGGREGATION_NONE; - info.length = 8; - info.is_key = true; - fields.push_back(info); - } - // k2: char - { - FieldInfo info; - info.name = "k2"; - info.type = OLAP_FIELD_TYPE_CHAR; - info.aggregation = OLAP_FIELD_AGGREGATION_NONE; - info.length = 10; - info.is_key = true; - fields.push_back(info); - } - // k3: varchar - { - FieldInfo info; - info.name = "k3"; - info.type = OLAP_FIELD_TYPE_VARCHAR; - info.aggregation = OLAP_FIELD_AGGREGATION_NONE; - info.length = 20; - info.is_key = true; - fields.push_back(info); - } - } - // - RowBlock block(fields); + TabletSchema tablet_schema; + init_tablet_schema(&tablet_schema); + RowBlock block(&tablet_schema); RowBlockInfo block_info; block_info.row_num = 1024; - block_info.data_file_type = COLUMN_ORIENTED_FILE; block_info.null_supported = true; auto res = block.init(block_info); ASSERT_EQ(OLAP_SUCCESS, res); RowCursor row; - row.init(fields); + row.init(tablet_schema); for (int i = 0; i < 5; ++i) { block.get_row(i, &row); @@ -413,50 +284,17 @@ TEST_F(TestRowBlock, decompress_failed) { } TEST_F(TestRowBlock, find_row) { - std::vector fields; - { - // k1: bigint - { - FieldInfo info; - info.name = "k1"; - info.type = OLAP_FIELD_TYPE_BIGINT; - info.aggregation = OLAP_FIELD_AGGREGATION_NONE; - info.length = 8; - info.is_key = true; - fields.push_back(info); - } - // k2: char - { - FieldInfo info; - info.name = "k2"; - info.type = OLAP_FIELD_TYPE_CHAR; - info.aggregation = OLAP_FIELD_AGGREGATION_NONE; - info.length = 10; - info.is_key = true; - fields.push_back(info); - } - // k3: varchar - { - FieldInfo info; - info.name = "k3"; - info.type = OLAP_FIELD_TYPE_VARCHAR; - info.aggregation = OLAP_FIELD_AGGREGATION_NONE; - info.length = 20; - info.is_key = true; - fields.push_back(info); - } - } - // - RowBlock block(fields); + TabletSchema tablet_schema; + init_tablet_schema(&tablet_schema); + RowBlock block(&tablet_schema); RowBlockInfo block_info; block_info.row_num = 1024; - block_info.data_file_type = COLUMN_ORIENTED_FILE; block_info.null_supported = true; auto res = block.init(block_info); ASSERT_EQ(OLAP_SUCCESS, res); RowCursor row; - row.init(fields); + row.init(tablet_schema); for (int i = 0; i < 5; ++i) { block.get_row(i, &row); @@ -488,7 +326,7 @@ TEST_F(TestRowBlock, find_row) { { RowCursor find_row; - find_row.init(fields); + find_row.init(tablet_schema); for (int i = 0; i < 5; ++i) { // bigint { @@ -575,44 +413,11 @@ TEST_F(TestRowBlock, find_row) { } TEST_F(TestRowBlock, clear) { - std::vector fields; - { - // k1: bigint - { - FieldInfo info; - info.name = "k1"; - info.type = OLAP_FIELD_TYPE_BIGINT; - info.aggregation = OLAP_FIELD_AGGREGATION_NONE; - info.length = 8; - info.is_key = true; - fields.push_back(info); - } - // k2: char - { - FieldInfo info; - info.name = "k2"; - info.type = OLAP_FIELD_TYPE_CHAR; - info.aggregation = OLAP_FIELD_AGGREGATION_NONE; - info.length = 10; - info.is_key = true; - fields.push_back(info); - } - // k3: varchar - { - FieldInfo info; - info.name = "k3"; - info.type = OLAP_FIELD_TYPE_VARCHAR; - info.aggregation = OLAP_FIELD_AGGREGATION_NONE; - info.length = 20; - info.is_key = true; - fields.push_back(info); - } - } - // - RowBlock block(fields); + TabletSchema tablet_schema; + init_tablet_schema(&tablet_schema); + RowBlock block(&tablet_schema); RowBlockInfo block_info; block_info.row_num = 1024; - block_info.data_file_type = COLUMN_ORIENTED_FILE; block_info.null_supported = true; auto res = block.init(block_info); ASSERT_EQ(OLAP_SUCCESS, res); @@ -625,44 +430,11 @@ TEST_F(TestRowBlock, clear) { } TEST_F(TestRowBlock, pos_limit) { - std::vector fields; - { - // k1: bigint - { - FieldInfo info; - info.name = "k1"; - info.type = OLAP_FIELD_TYPE_BIGINT; - info.aggregation = OLAP_FIELD_AGGREGATION_NONE; - info.length = 8; - info.is_key = true; - fields.push_back(info); - } - // k2: char - { - FieldInfo info; - info.name = "k2"; - info.type = OLAP_FIELD_TYPE_CHAR; - info.aggregation = OLAP_FIELD_AGGREGATION_NONE; - info.length = 10; - info.is_key = true; - fields.push_back(info); - } - // k3: varchar - { - FieldInfo info; - info.name = "k3"; - info.type = OLAP_FIELD_TYPE_VARCHAR; - info.aggregation = OLAP_FIELD_AGGREGATION_NONE; - info.length = 20; - info.is_key = true; - fields.push_back(info); - } - } - // - RowBlock block(fields); + TabletSchema tablet_schema; + init_tablet_schema(&tablet_schema); + RowBlock block(&tablet_schema); RowBlockInfo block_info; block_info.row_num = 1024; - block_info.data_file_type = COLUMN_ORIENTED_FILE; block_info.null_supported = true; auto res = block.init(block_info); ASSERT_EQ(OLAP_SUCCESS, res); diff --git a/be/test/olap/row_cursor_test.cpp b/be/test/olap/row_cursor_test.cpp index 604126d0f0cd05..1f4fae9659e0d5 100644 --- a/be/test/olap/row_cursor_test.cpp +++ b/be/test/olap/row_cursor_test.cpp @@ -18,220 +18,232 @@ #include #include "olap/row_cursor.h" +#include "olap/tablet_schema.h" #include "runtime/mem_tracker.h" #include "runtime/mem_pool.h" #include "util/logging.h" namespace doris { -void set_tablet_schema_for_init(std::vector* tablet_schema) { - FieldInfo k1; - k1.name = "k1"; - k1.type = OLAP_FIELD_TYPE_TINYINT; - k1.length = 1; - k1.is_key = true; - k1.index_length = 1; - k1.is_allow_null = true; - tablet_schema->push_back(k1); - - FieldInfo k2; - k2.name = "k2"; - k2.type = OLAP_FIELD_TYPE_SMALLINT; - k2.length = 2; - k2.default_value = "0"; - k2.is_key = true; - k2.index_length = 2; - k2.is_allow_null = true; - tablet_schema->push_back(k2); - - FieldInfo k3; - k3.name = "k3"; - k3.type = OLAP_FIELD_TYPE_INT; - k3.length = 4; - k3.is_key = true; - k3.index_length = 4; - k3.is_allow_null = true; - tablet_schema->push_back(k3); - - FieldInfo k4; - k4.name = "k4"; - k4.type = OLAP_FIELD_TYPE_DATE; - k4.length = 3; - k4.is_key = true; - k4.index_length = 3; - k4.is_allow_null = true; - tablet_schema->push_back(k4); - - FieldInfo k5; - k5.name = "k5"; - k5.type = OLAP_FIELD_TYPE_DATETIME; - k5.length = 8; - k5.is_key = true; - k5.index_length = 8; - k5.is_allow_null = true; - tablet_schema->push_back(k5); - - FieldInfo k6; - k6.name = "k6"; - k6.type = OLAP_FIELD_TYPE_DECIMAL; - k6.length = 12; - k6.precision = 6; - k6.frac = 3; - k6.is_key = true; - k6.index_length = 12; - k6.is_allow_null = true; - tablet_schema->push_back(k6); - - FieldInfo k7; - k7.name = "k7"; - k7.type = OLAP_FIELD_TYPE_CHAR; - k7.length = 4; - k7.default_value = "char"; - k7.is_key = true; - k7.index_length = 4; - k7.is_allow_null = true; - tablet_schema->push_back(k7); - - FieldInfo v1; - v1.name = "v1"; - v1.type = OLAP_FIELD_TYPE_BIGINT; - v1.length = 8; - v1.aggregation = OLAP_FIELD_AGGREGATION_SUM; - v1.is_key = false; - v1.is_allow_null = true; - tablet_schema->push_back(v1); - - FieldInfo v2; - v2.name = "v2"; - v2.type = OLAP_FIELD_TYPE_VARCHAR; - v2.length = 16 + OLAP_STRING_MAX_BYTES; - v2.aggregation = OLAP_FIELD_AGGREGATION_REPLACE; - v2.is_key = false; - v2.is_allow_null = true; - tablet_schema->push_back(v2); - - FieldInfo v3; - v3.name = "v3"; - v3.type = OLAP_FIELD_TYPE_LARGEINT; - v3.length = 16; - v3.aggregation = OLAP_FIELD_AGGREGATION_MAX; - v3.is_key = false; - v3.is_allow_null = true; - tablet_schema->push_back(v3); - - FieldInfo v4; - v4.name = "v4"; - v4.type = OLAP_FIELD_TYPE_DECIMAL; - v4.length = 12; - v4.aggregation = OLAP_FIELD_AGGREGATION_MIN; - v4.is_key = false; - v4.is_allow_null = true; - tablet_schema->push_back(v4); - - FieldInfo v5; - v5.name = "v5"; - v5.type = OLAP_FIELD_TYPE_HLL; - v5.length = HLL_COLUMN_DEFAULT_LEN; - v5.aggregation = OLAP_FIELD_AGGREGATION_HLL_UNION; - v5.is_key = false; - v5.is_allow_null = true; - tablet_schema->push_back(v5); +void set_tablet_schema_for_init(TabletSchema* tablet_schema) { + TabletSchemaPB tablet_schema_pb; + ColumnPB* column_1 = tablet_schema_pb.add_column(); + column_1->set_unique_id(1); + column_1->set_name("column_1"); + column_1->set_type("TINYINT"); + column_1->set_is_key(true); + column_1->set_is_nullable(true); + column_1->set_length(1); + column_1->set_index_length(1); + + ColumnPB* column_2 = tablet_schema_pb.add_column(); + column_2->set_unique_id(2); + column_2->set_name("column_2"); + column_2->set_type("SMALLINT"); + column_2->set_is_key(true); + column_2->set_is_nullable(true); + column_2->set_length(2); + column_2->set_index_length(2); + column_2->set_default_value("0"); + + ColumnPB* column_3 = tablet_schema_pb.add_column(); + column_3->set_unique_id(3); + column_3->set_name("column_3"); + column_3->set_type("INT"); + column_3->set_is_key(true); + column_3->set_is_nullable(true); + column_3->set_length(4); + column_3->set_index_length(4); + + ColumnPB* column_4 = tablet_schema_pb.add_column(); + column_4->set_unique_id(4); + column_4->set_name("column_4"); + column_4->set_type("DATE"); + column_4->set_is_key(true); + column_4->set_is_nullable(true); + column_4->set_length(3); + column_4->set_index_length(3); + + ColumnPB* column_5 = tablet_schema_pb.add_column(); + column_5->set_unique_id(5); + column_5->set_name("column_5"); + column_5->set_type("DATETIME"); + column_5->set_is_key(true); + column_5->set_is_nullable(true); + column_5->set_length(8); + column_5->set_index_length(8); + + ColumnPB* column_6 = tablet_schema_pb.add_column(); + column_6->set_unique_id(6); + column_6->set_name("column_6"); + column_6->set_type("DECIMAL"); + column_6->set_is_key(true); + column_6->set_is_nullable(true); + column_6->set_length(12); + column_6->set_index_length(12); + column_6->set_frac(3); + column_6->set_precision(6); + + ColumnPB* column_7 = tablet_schema_pb.add_column(); + column_7->set_unique_id(7); + column_7->set_name("column_7"); + column_7->set_type("CHAR"); + column_7->set_is_key(true); + column_7->set_is_nullable(true); + column_7->set_length(4); + column_7->set_index_length(4); + column_7->set_default_value("char"); + + ColumnPB* column_8 = tablet_schema_pb.add_column(); + column_8->set_unique_id(8); + column_8->set_name("column_8"); + column_8->set_type("BIGINT"); + column_8->set_is_nullable(true); + column_8->set_length(8); + column_8->set_aggregation("SUM"); + column_8->set_is_key(false); + + ColumnPB* column_9 = tablet_schema_pb.add_column(); + column_9->set_unique_id(9); + column_9->set_name("column_9"); + column_9->set_type("VARCHAR"); + column_9->set_is_nullable(true); + column_9->set_length(16 + OLAP_STRING_MAX_BYTES); + column_9->set_aggregation("REPLACE"); + column_9->set_is_key(false); + + ColumnPB* column_10 = tablet_schema_pb.add_column(); + column_10->set_unique_id(10); + column_10->set_name("column_10"); + column_10->set_type("LARGEINT"); + column_10->set_is_nullable(true); + column_10->set_length(16); + column_10->set_aggregation("MAX"); + column_10->set_is_key(false); + + ColumnPB* column_11 = tablet_schema_pb.add_column(); + column_11->set_unique_id(11); + column_11->set_name("column_11"); + column_11->set_type("DECIMAL"); + column_11->set_is_nullable(true); + column_11->set_length(12); + column_11->set_aggregation("MIN"); + column_11->set_is_key(false); + + ColumnPB* column_12 = tablet_schema_pb.add_column(); + column_12->set_unique_id(12); + column_12->set_name("column_12"); + column_12->set_type("HLL"); + column_12->set_is_nullable(true); + column_12->set_length(HLL_COLUMN_DEFAULT_LEN); + column_12->set_aggregation("HLL_UNION"); + column_12->set_is_key(false); + + tablet_schema->init_from_pb(tablet_schema_pb); } -void set_tablet_schema_for_scan_key(std::vector* tablet_schema) { - FieldInfo k1; - k1.name = "k1"; - k1.type = OLAP_FIELD_TYPE_CHAR; - k1.length = 4; - k1.index_length = 4; - k1.default_value = "char"; - k1.is_key = true; - k1.is_allow_null = true; - tablet_schema->push_back(k1); - - FieldInfo k2; - k2.name = "k2"; - k2.type = OLAP_FIELD_TYPE_VARCHAR; - k2.length = 16 + OLAP_STRING_MAX_BYTES; - k2.index_length = 20; - k2.is_key = true; - k2.is_allow_null = true; - tablet_schema->push_back(k2); - - FieldInfo v1; - v1.name = "v1"; - v1.type = OLAP_FIELD_TYPE_LARGEINT; - v1.length = 16; - v1.aggregation = OLAP_FIELD_AGGREGATION_MAX; - v1.is_key = false; - v1.is_allow_null = true; - tablet_schema->push_back(v1); - - FieldInfo v2; - v2.name = "v2"; - v2.type = OLAP_FIELD_TYPE_DECIMAL; - v2.length = 12; - v2.aggregation = OLAP_FIELD_AGGREGATION_MIN; - v2.is_key = false; - v2.is_allow_null = true; - tablet_schema->push_back(v2); +void set_tablet_schema_for_scan_key(TabletSchema* tablet_schema) { + TabletSchemaPB tablet_schema_pb; + + ColumnPB* column_1 = tablet_schema_pb.add_column(); + column_1->set_unique_id(1); + column_1->set_name("column_1"); + column_1->set_type("CHAR"); + column_1->set_is_key(true); + column_1->set_is_nullable(true); + column_1->set_length(4); + column_1->set_index_length(4); + column_1->set_default_value("char"); + + ColumnPB* column_2 = tablet_schema_pb.add_column(); + column_2->set_unique_id(2); + column_2->set_name("column_2"); + column_2->set_type("VARCHAR"); + column_2->set_is_key(true); + column_2->set_is_nullable(true); + column_2->set_length(16 + OLAP_STRING_MAX_BYTES); + column_2->set_index_length(20); + + ColumnPB* column_3 = tablet_schema_pb.add_column(); + column_3->set_unique_id(3); + column_3->set_name("column_3"); + column_3->set_type("LARGEINT"); + column_3->set_is_nullable(true); + column_3->set_length(16); + column_3->set_aggregation("MAX"); + column_3->set_is_key(false); + + ColumnPB* column_4 = tablet_schema_pb.add_column(); + column_4->set_unique_id(9); + column_4->set_name("column_4"); + column_4->set_type("DECIMAL"); + column_4->set_is_nullable(true); + column_4->set_length(12); + column_4->set_aggregation("MIN"); + column_4->set_is_key(false); + + tablet_schema->init_from_pb(tablet_schema_pb); } -void set_tablet_schema_for_cmp_and_aggregate(std::vector* tablet_schema) { - FieldInfo k1; - k1.name = "k1"; - k1.type = OLAP_FIELD_TYPE_CHAR; - k1.length = 4; - k1.default_value = "char"; - k1.is_key = true; - k1.index_length = 4; - k1.is_allow_null = true; - tablet_schema->push_back(k1); - - FieldInfo k2; - k2.name = "k2"; - k2.type = OLAP_FIELD_TYPE_INT; - k2.length = 4; - k2.is_key = true; - k2.index_length = 4; - k2.is_allow_null = true; - tablet_schema->push_back(k2); - - FieldInfo v1; - v1.name = "v1"; - v1.type = OLAP_FIELD_TYPE_LARGEINT; - v1.length = 16; - v1.aggregation = OLAP_FIELD_AGGREGATION_SUM; - v1.is_key = false; - v1.is_allow_null = true; - tablet_schema->push_back(v1); - - FieldInfo v2; - v2.name = "v2"; - v2.type = OLAP_FIELD_TYPE_DOUBLE; - v2.length = 8; - v2.aggregation = OLAP_FIELD_AGGREGATION_MIN; - v2.is_key = false; - v2.is_allow_null = true; - tablet_schema->push_back(v2); - - FieldInfo v3; - v3.name = "v3"; - v3.type = OLAP_FIELD_TYPE_DECIMAL; - v3.length = 12; - v3.aggregation = OLAP_FIELD_AGGREGATION_MAX; - v3.is_key = false; - v3.is_allow_null = true; - tablet_schema->push_back(v3); - - FieldInfo v4; - v4.name = "v4"; - v4.type = OLAP_FIELD_TYPE_VARCHAR; - v4.length = 16 + OLAP_STRING_MAX_BYTES; - v4.aggregation = OLAP_FIELD_AGGREGATION_REPLACE; - v4.is_key = false; - v4.is_allow_null = true; - tablet_schema->push_back(v4); +void set_tablet_schema_for_cmp_and_aggregate(TabletSchema* tablet_schema) { + TabletSchemaPB tablet_schema_pb; + + ColumnPB* column_1 = tablet_schema_pb.add_column(); + column_1->set_unique_id(1); + column_1->set_name("column_1"); + column_1->set_type("CHAR"); + column_1->set_is_key(true); + column_1->set_is_nullable(true); + column_1->set_length(4); + column_1->set_index_length(4); + column_1->set_default_value("char"); + + ColumnPB* column_2 = tablet_schema_pb.add_column(); + column_2->set_unique_id(2); + column_2->set_name("column_2"); + column_2->set_type("INT"); + column_2->set_is_key(true); + column_2->set_is_nullable(true); + column_2->set_length(4); + column_2->set_index_length(4); + + ColumnPB* column_3 = tablet_schema_pb.add_column(); + column_3->set_unique_id(3); + column_3->set_name("column_3"); + column_3->set_type("LARGEINT"); + column_3->set_is_nullable(true); + column_3->set_length(16); + column_3->set_aggregation("SUM"); + column_3->set_is_key(false); + + ColumnPB* column_4 = tablet_schema_pb.add_column(); + column_4->set_unique_id(9); + column_4->set_name("column_4"); + column_4->set_type("DOUBLE"); + column_4->set_is_nullable(true); + column_4->set_length(8); + column_4->set_aggregation("MIN"); + column_4->set_is_key(false); + + ColumnPB* column_5 = tablet_schema_pb.add_column(); + column_5->set_unique_id(3); + column_5->set_name("column_5"); + column_5->set_type("DECIMAL"); + column_5->set_is_nullable(true); + column_5->set_length(12); + column_5->set_aggregation("MAX"); + column_5->set_is_key(false); + + ColumnPB* column_6 = tablet_schema_pb.add_column(); + column_6->set_unique_id(9); + column_6->set_name("column_6"); + column_6->set_type("VARCHAR"); + column_6->set_is_nullable(true); + column_6->set_length(16 + OLAP_STRING_MAX_BYTES); + column_6->set_aggregation("REPLACE"); + column_6->set_is_key(false); + + tablet_schema->init_from_pb(tablet_schema_pb); } class TestRowCursor : public testing::Test { @@ -250,7 +262,7 @@ class TestRowCursor : public testing::Test { }; TEST_F(TestRowCursor, InitRowCursor) { - std::vector tablet_schema; + TabletSchema tablet_schema; set_tablet_schema_for_init(&tablet_schema); RowCursor row; OLAPStatus res = row.init(tablet_schema); @@ -260,7 +272,7 @@ TEST_F(TestRowCursor, InitRowCursor) { } TEST_F(TestRowCursor, InitRowCursorWithColumnCount) { - std::vector tablet_schema; + TabletSchema tablet_schema; set_tablet_schema_for_init(&tablet_schema); RowCursor row; OLAPStatus res = row.init(tablet_schema, 5); @@ -272,11 +284,11 @@ TEST_F(TestRowCursor, InitRowCursorWithColumnCount) { } TEST_F(TestRowCursor, InitRowCursorWithColIds) { - std::vector tablet_schema; + TabletSchema tablet_schema; set_tablet_schema_for_init(&tablet_schema); std::vector col_ids; - for (size_t i = 0; i < tablet_schema.size() / 2; ++i) { + for (size_t i = 0; i < tablet_schema.num_columns() / 2; ++i) { col_ids.push_back(i * 2); } @@ -288,7 +300,7 @@ TEST_F(TestRowCursor, InitRowCursorWithColIds) { } TEST_F(TestRowCursor, InitRowCursorWithScanKey) { - std::vector tablet_schema; + TabletSchema tablet_schema; set_tablet_schema_for_scan_key(&tablet_schema); std::vector scan_keys; @@ -311,7 +323,7 @@ TEST_F(TestRowCursor, InitRowCursorWithScanKey) { } TEST_F(TestRowCursor, SetMinAndMaxKey) { - std::vector tablet_schema; + TabletSchema tablet_schema; set_tablet_schema_for_init(&tablet_schema); RowCursor min_row; @@ -321,7 +333,7 @@ TEST_F(TestRowCursor, SetMinAndMaxKey) { res = min_row.build_min_key(); ASSERT_EQ(res, OLAP_SUCCESS); - for (size_t i = 0; i < tablet_schema.size(); ++i) { + for (size_t i = 0; i < tablet_schema.num_columns(); ++i) { ASSERT_TRUE(min_row.is_min(i)); } @@ -332,7 +344,7 @@ TEST_F(TestRowCursor, SetMinAndMaxKey) { } TEST_F(TestRowCursor, EqualAndCompare) { - std::vector tablet_schema; + TabletSchema tablet_schema; set_tablet_schema_for_cmp_and_aggregate(&tablet_schema); RowCursor left; @@ -373,7 +385,7 @@ TEST_F(TestRowCursor, EqualAndCompare) { } TEST_F(TestRowCursor, IndexCmp) { - std::vector tablet_schema; + TabletSchema tablet_schema; set_tablet_schema_for_cmp_and_aggregate(&tablet_schema); RowCursor left; @@ -414,7 +426,7 @@ TEST_F(TestRowCursor, IndexCmp) { } TEST_F(TestRowCursor, FullKeyCmp) { - std::vector tablet_schema; + TabletSchema tablet_schema; set_tablet_schema_for_cmp_and_aggregate(&tablet_schema); RowCursor left; @@ -454,7 +466,7 @@ TEST_F(TestRowCursor, FullKeyCmp) { } TEST_F(TestRowCursor, AggregateWithoutNull) { - std::vector tablet_schema; + TabletSchema tablet_schema; set_tablet_schema_for_cmp_and_aggregate(&tablet_schema); RowCursor row; @@ -514,7 +526,7 @@ TEST_F(TestRowCursor, AggregateWithoutNull) { } TEST_F(TestRowCursor, AggregateWithNull) { - std::vector tablet_schema; + TabletSchema tablet_schema; set_tablet_schema_for_cmp_and_aggregate(&tablet_schema); RowCursor row; diff --git a/be/test/olap/rowset/alpha_rowset_test.cpp b/be/test/olap/rowset/alpha_rowset_test.cpp new file mode 100644 index 00000000000000..5dbf85d541d015 --- /dev/null +++ b/be/test/olap/rowset/alpha_rowset_test.cpp @@ -0,0 +1,266 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +#include +#include +#include + +#include "gtest/gtest.h" +#include "gmock/gmock.h" +#include "boost/filesystem.hpp" +#include "json2pb/json_to_pb.h" +#include "util/logging.h" +#include "olap/olap_meta.h" +#include "olap/rowset/rowset_writer_context.h" +#include "olap/rowset/rowset_reader_context.h" +#include "olap/rowset/alpha_rowset.h" +#include "olap/rowset/alpha_rowset_writer.h" +#include "olap/rowset/alpha_rowset_reader.h" +#include "olap/data_dir.h" +#include "olap/storage_engine.h" + +#ifndef BE_TEST +#define BE_TEST +#endif + +using ::testing::_; +using ::testing::Return; +using ::testing::SetArgPointee; +using std::string; + +namespace doris { + +static const uint32_t MAX_PATH_LEN = 1024; + +StorageEngine* k_engine = nullptr; + +void set_up() { + config::path_gc_check = false; + char buffer[MAX_PATH_LEN]; + getcwd(buffer, MAX_PATH_LEN); + config::storage_root_path = std::string(buffer) + "/data_test"; + remove_all_dir(config::storage_root_path); + OLAPStatus res = create_dir(config::storage_root_path); + ASSERT_EQ(OLAP_SUCCESS, res); + std::vector paths; + paths.emplace_back(config::storage_root_path, -1); + std::string data_path = config::storage_root_path + "/data"; + res = create_dir(data_path); + ASSERT_EQ(OLAP_SUCCESS, res); + std::string shard_path = data_path + "/0"; + res = create_dir(shard_path); + ASSERT_EQ(OLAP_SUCCESS, res); + std::string tablet_path = shard_path + "/12345"; + res = create_dir(tablet_path); + ASSERT_EQ(OLAP_SUCCESS, res); + std::string schema_hash_path = tablet_path + "/1111"; + res = create_dir(schema_hash_path); + ASSERT_EQ(OLAP_SUCCESS, res); + + doris::EngineOptions options; + options.store_paths = paths; + doris::StorageEngine::open(options, &k_engine); +} + +void tear_down() { + delete k_engine; + k_engine = nullptr; + remove_all_dir(config::storage_root_path); + remove_all_dir(std::string(getenv("DORIS_HOME")) + UNUSED_PREFIX); +} + +void create_rowset_writer_context(TabletSchema* tablet_schema, DataDir* data_dir, + RowsetWriterContext* rowset_writer_context) { + rowset_writer_context->rowset_id = 10000; + rowset_writer_context->tablet_id = 12345; + rowset_writer_context->tablet_schema_hash = 1111; + rowset_writer_context->partition_id = 10; + rowset_writer_context->rowset_type = ALPHA_ROWSET; + rowset_writer_context->rowset_path_prefix = config::storage_root_path + "/data/0/12345/1111"; + rowset_writer_context->rowset_state = VISIBLE; + rowset_writer_context->data_dir = data_dir; + rowset_writer_context->tablet_schema = tablet_schema; + rowset_writer_context->version.first = 0; + rowset_writer_context->version.second = 1; + rowset_writer_context->version_hash = 110; +} + +void create_rowset_reader_context(TabletSchema* tablet_schema, const std::vector* return_columns, + const DeleteHandler* delete_handler, std::vector* predicates, + std::set* load_bf_columns, Conditions* conditions, RowsetReaderContext* rowset_reader_context) { + rowset_reader_context->reader_type = READER_ALTER_TABLE; + rowset_reader_context->tablet_schema = tablet_schema; + rowset_reader_context->preaggregation = false; + rowset_reader_context->return_columns = return_columns; + rowset_reader_context->delete_handler = delete_handler; + rowset_reader_context->is_using_cache = false; + rowset_reader_context->lower_bound_keys = nullptr; + rowset_reader_context->is_lower_keys_included = nullptr; + rowset_reader_context->upper_bound_keys = nullptr; + rowset_reader_context->is_upper_keys_included = nullptr; + rowset_reader_context->predicates = predicates; + rowset_reader_context->load_bf_columns = load_bf_columns; + rowset_reader_context->conditions = conditions; + rowset_reader_context->lru_cache = k_engine->index_stream_lru_cache(); +} + +void create_tablet_schema(KeysType keys_type, TabletSchema* tablet_schema) { + TabletSchemaPB tablet_schema_pb; + tablet_schema_pb.set_keys_type(keys_type); + tablet_schema_pb.set_num_short_key_columns(2); + tablet_schema_pb.set_num_rows_per_row_block(1024); + tablet_schema_pb.set_compress_kind(COMPRESS_NONE); + tablet_schema_pb.set_next_column_unique_id(4); + + ColumnPB* column_1 = tablet_schema_pb.add_column(); + column_1->set_unique_id(1); + column_1->set_name("k1"); + column_1->set_type("INT"); + column_1->set_is_key(true); + column_1->set_is_nullable(true); + column_1->set_is_bf_column(false); + + ColumnPB* column_2 = tablet_schema_pb.add_column(); + column_2->set_unique_id(2); + column_2->set_name("k2"); + column_2->set_type("VARCHAR"); + column_2->set_length(20); + column_2->set_is_key(true); + column_2->set_is_nullable(true); + column_2->set_is_bf_column(false); + + ColumnPB* column_3 = tablet_schema_pb.add_column(); + column_3->set_unique_id(3); + column_3->set_name("v1"); + column_3->set_type("INT"); + column_3->set_is_key(false); + column_3->set_is_nullable(false); + column_3->set_is_bf_column(false); + column_3->set_aggregation("SUM"); + + tablet_schema->init_from_pb(tablet_schema_pb); +} + +class AlphaRowsetTest : public testing::Test { +public: + virtual void SetUp() { + set_up(); + _data_dir = k_engine->get_store(config::storage_root_path); + ASSERT_TRUE(_data_dir != nullptr); + _alpha_rowset_writer = new(std::nothrow) AlphaRowsetWriter(); + _mem_tracker.reset(new MemTracker(-1)); + _mem_pool.reset(new MemPool(_mem_tracker.get())); + } + + virtual void TearDown() { + delete _alpha_rowset_writer; + _alpha_rowset_writer = nullptr; + tear_down(); + } + +private: + DataDir* _data_dir; + AlphaRowsetWriter* _alpha_rowset_writer; + std::unique_ptr _mem_tracker; + std::unique_ptr _mem_pool; +}; +/* +TEST_F(AlphaRowsetTest, TestAlphaRowsetWriter) { + TabletSchema tablet_schema; + create_tablet_schema(AGG_KEYS, &tablet_schema); + RowsetWriterContext rowset_writer_context; + create_rowset_writer_context(&tablet_schema, _data_dir, &rowset_writer_context); + _alpha_rowset_writer->init(rowset_writer_context); + RowCursor row; + OLAPStatus res = row.init(tablet_schema); + ASSERT_EQ(OLAP_SUCCESS, res); + + int32_t field_0 = 10; + row.set_field_content(0, reinterpret_cast(&field_0), _mem_pool.get()); + Slice field_1("well"); + row.set_field_content(1, reinterpret_cast(&field_1), _mem_pool.get()); + int32_t field_2 = 100; + row.set_field_content(2, reinterpret_cast(&field_2), _mem_pool.get()); + _alpha_rowset_writer->add_row(&row); + _alpha_rowset_writer->flush(); + RowsetSharedPtr alpha_rowset = _alpha_rowset_writer->build(); + ASSERT_TRUE(alpha_rowset != nullptr); + ASSERT_EQ(10000, alpha_rowset->rowset_id()); + ASSERT_EQ(1, alpha_rowset->num_rows()); +} +*/ +TEST_F(AlphaRowsetTest, TestAlphaRowsetReader) { + TabletSchema tablet_schema; + create_tablet_schema(AGG_KEYS, &tablet_schema); + RowsetWriterContext rowset_writer_context; + create_rowset_writer_context(&tablet_schema, _data_dir, &rowset_writer_context); + _alpha_rowset_writer->init(rowset_writer_context); + RowCursor row; + OLAPStatus res = row.init(tablet_schema); + ASSERT_EQ(OLAP_SUCCESS, res); + + int32_t field_0 = 10; + row.set_field_content(0, reinterpret_cast(&field_0), _mem_pool.get()); + Slice field_1("well"); + row.set_field_content(1, reinterpret_cast(&field_1), _mem_pool.get()); + int32_t field_2 = 100; + row.set_field_content(2, reinterpret_cast(&field_2), _mem_pool.get()); + _alpha_rowset_writer->add_row(&row); + _alpha_rowset_writer->flush(); + RowsetSharedPtr alpha_rowset = _alpha_rowset_writer->build(); + ASSERT_TRUE(alpha_rowset != nullptr); + ASSERT_EQ(10000, alpha_rowset->rowset_id()); + ASSERT_EQ(1, alpha_rowset->num_rows()); + RowsetReaderSharedPtr rowset_reader = alpha_rowset->create_reader(); + ASSERT_TRUE(rowset_reader != nullptr); + std::vector return_columns; + for (int i = 0; i < tablet_schema.num_columns(); ++i) { + return_columns.push_back(i); + } + DeleteHandler delete_handler; + DelPredicateArray predicate_array; + res = delete_handler.init(tablet_schema, predicate_array, 4); + ASSERT_EQ(OLAP_SUCCESS, res); + RowsetReaderContext rowset_reader_context; + + std::set load_bf_columns; + std::vector predicates; + Conditions conditions; + create_rowset_reader_context(&tablet_schema, &return_columns, &delete_handler, + &predicates, &load_bf_columns, &conditions, &rowset_reader_context); + res = rowset_reader->init(&rowset_reader_context); + ASSERT_EQ(OLAP_SUCCESS, res); + RowBlock* row_block = nullptr; + res = rowset_reader->next_block(&row_block); + ASSERT_EQ(OLAP_SUCCESS, res); + ASSERT_EQ(1, row_block->remaining()); +} + +} // namespace doris + +int main(int argc, char **argv) { + std::string conffile = std::string(getenv("DORIS_HOME")) + "/conf/be.conf"; + if (!doris::config::init(conffile.c_str(), false)) { + fprintf(stderr, "error read config file. \n"); + return -1; + } + doris::init_glog("be-test"); + ::testing::InitGoogleTest(&argc, argv); + int ret = RUN_ALL_TESTS(); + google::protobuf::ShutdownProtobufLibrary(); + return ret; +} diff --git a/be/test/olap/rowset/rowset_meta_manager_test.cpp b/be/test/olap/rowset/rowset_meta_manager_test.cpp new file mode 100644 index 00000000000000..63c5125d49e370 --- /dev/null +++ b/be/test/olap/rowset/rowset_meta_manager_test.cpp @@ -0,0 +1,112 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +#include +#include +#include + +#include "gtest/gtest.h" +#include "gmock/gmock.h" +#include "olap/olap_meta.h" +#include "olap/rowset/rowset_meta_manager.h" +#include "olap/new_status.h" +#include "boost/filesystem.hpp" +#include "json2pb/json_to_pb.h" + +#ifndef BE_TEST +#define BE_TEST +#endif + +using ::testing::_; +using ::testing::Return; +using ::testing::SetArgPointee; +using std::string; + +namespace doris { + +const std::string rowset_meta_path = "./be/test/olap/test_data/rowset_meta.json"; + +class RowsetMetaManagerTest : public testing::Test { +public: + virtual void SetUp() { + std::string meta_path = "./meta"; + ASSERT_TRUE(boost::filesystem::create_directory(meta_path)); + _meta = new(std::nothrow) OlapMeta(meta_path); + ASSERT_NE(nullptr, _meta); + OLAPStatus st = _meta->init(); + ASSERT_TRUE(st == OLAP_SUCCESS); + ASSERT_TRUE(boost::filesystem::exists("./meta")); + + std::ifstream infile(rowset_meta_path); + char buffer[1024]; + while (!infile.eof()) { + infile.getline(buffer, 1024); + _json_rowset_meta = _json_rowset_meta + buffer + "\n"; + } + _json_rowset_meta = _json_rowset_meta.substr(0, _json_rowset_meta.size() - 1); + _json_rowset_meta = _json_rowset_meta.substr(0, _json_rowset_meta.size() - 1); + _tablet_uid = TabletUid(10, 10); + } + + virtual void TearDown() { + delete _meta; + ASSERT_TRUE(boost::filesystem::remove_all("./meta")); + } + +private: + OlapMeta* _meta; + std::string _json_rowset_meta; + TabletUid _tablet_uid; +}; + +TEST_F(RowsetMetaManagerTest, TestSaveAndGetAndRemove) { + uint64_t rowset_id = 10000; + RowsetMeta rowset_meta; + rowset_meta.init_from_json(_json_rowset_meta); + ASSERT_EQ(rowset_meta.rowset_id(), rowset_id); + OLAPStatus status = RowsetMetaManager::save(_meta, _tablet_uid, rowset_id, &rowset_meta); + ASSERT_TRUE(status == OLAP_SUCCESS); + ASSERT_TRUE(RowsetMetaManager::check_rowset_meta(_meta, _tablet_uid, rowset_id)); + std::string json_rowset_meta_read; + status = RowsetMetaManager::get_json_rowset_meta(_meta, _tablet_uid, rowset_id, &json_rowset_meta_read); + ASSERT_TRUE(status == OLAP_SUCCESS); + ASSERT_EQ(_json_rowset_meta, json_rowset_meta_read); + status = RowsetMetaManager::remove(_meta, _tablet_uid, rowset_id); + ASSERT_TRUE(status == OLAP_SUCCESS); + ASSERT_FALSE(RowsetMetaManager::check_rowset_meta(_meta, _tablet_uid, rowset_id)); + RowsetMetaSharedPtr rowset_meta_read(new RowsetMeta()); + status = RowsetMetaManager::get_rowset_meta(_meta, _tablet_uid, rowset_id, rowset_meta_read); + ASSERT_TRUE(status != OLAP_SUCCESS); +} + +TEST_F(RowsetMetaManagerTest, TestLoad) { + uint64_t rowset_id = 10000; + OLAPStatus status = RowsetMetaManager::load_json_rowset_meta(_meta, rowset_meta_path); + ASSERT_TRUE(status == OLAP_SUCCESS); + ASSERT_TRUE(RowsetMetaManager::check_rowset_meta(_meta, _tablet_uid, rowset_id)); + std::string json_rowset_meta_read; + status = RowsetMetaManager::get_json_rowset_meta(_meta, _tablet_uid, rowset_id, &json_rowset_meta_read); + ASSERT_TRUE(status == OLAP_SUCCESS); + ASSERT_EQ(_json_rowset_meta, json_rowset_meta_read); +} + +} // namespace doris + +int main(int argc, char **argv) { + ::testing::InitGoogleTest(&argc, argv); + return RUN_ALL_TESTS(); +} diff --git a/be/test/olap/rowset/rowset_meta_test.cpp b/be/test/olap/rowset/rowset_meta_test.cpp new file mode 100644 index 00000000000000..56f2059b386ed4 --- /dev/null +++ b/be/test/olap/rowset/rowset_meta_test.cpp @@ -0,0 +1,197 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +#include +#include +#include + +#include "gtest/gtest.h" +#include "gmock/gmock.h" +#include "olap/olap_meta.h" +#include "olap/rowset/rowset_meta.h" +#include "olap/rowset/alpha_rowset_meta.h" +#include "boost/filesystem.hpp" +#include "json2pb/json_to_pb.h" + +#ifndef BE_TEST +#define BE_TEST +#endif + +using ::testing::_; +using ::testing::Return; +using ::testing::SetArgPointee; +using std::string; + +namespace doris { + +const std::string rowset_meta_path = "./be/test/olap/test_data/rowset.json"; + +class RowsetMetaTest : public testing::Test { +public: + virtual void SetUp() { + std::string meta_path = "./meta"; + ASSERT_TRUE(boost::filesystem::create_directory(meta_path)); + _meta = new(std::nothrow) OlapMeta(meta_path); + ASSERT_NE(nullptr, _meta); + OLAPStatus st = _meta->init(); + ASSERT_TRUE(st == OLAP_SUCCESS); + ASSERT_TRUE(boost::filesystem::exists("./meta")); + + std::ifstream infile(rowset_meta_path); + char buffer[1024]; + while (!infile.eof()) { + infile.getline(buffer, 1024); + _json_rowset_meta = _json_rowset_meta + buffer + "\n"; + } + _json_rowset_meta = _json_rowset_meta.substr(0, _json_rowset_meta.size() - 1); + _json_rowset_meta = _json_rowset_meta.substr(0, _json_rowset_meta.size() - 1); + } + + virtual void TearDown() { + delete _meta; + ASSERT_TRUE(boost::filesystem::remove_all("./meta")); + } + +private: + OlapMeta* _meta; + std::string _json_rowset_meta; +}; + +void do_check(RowsetMeta rowset_meta) { + ASSERT_EQ(540081, rowset_meta.rowset_id()); + ASSERT_EQ(15673, rowset_meta.tablet_id()); + ASSERT_EQ(4042, rowset_meta.txn_id()); + ASSERT_EQ(567997577, rowset_meta.tablet_schema_hash()); + ASSERT_EQ(ALPHA_ROWSET, rowset_meta.rowset_type()); + ASSERT_EQ(VISIBLE, rowset_meta.rowset_state()); + ASSERT_EQ(2, rowset_meta.start_version()); + ASSERT_EQ(2, rowset_meta.end_version()); + ASSERT_EQ(8391828013814912580, rowset_meta.version_hash()); + ASSERT_EQ(3929, rowset_meta.num_rows()); + ASSERT_EQ(84699, rowset_meta.total_disk_size()); + ASSERT_EQ(84464, rowset_meta.data_disk_size()); + ASSERT_EQ(235, rowset_meta.index_disk_size()); + ASSERT_EQ(false, rowset_meta.empty()); + ASSERT_EQ(1553765670, rowset_meta.creation_time()); +} + +TEST_F(RowsetMetaTest, TestInit) { + RowsetMeta rowset_meta; + ASSERT_TRUE(rowset_meta.init_from_json(_json_rowset_meta)); + do_check(rowset_meta); + RowsetMetaPB rowset_meta_pb; + rowset_meta.to_rowset_pb(&rowset_meta_pb); + RowsetMeta rowset_meta_2; + rowset_meta_2.init_from_pb(rowset_meta_pb); + do_check(rowset_meta_2); + std::string value = ""; + rowset_meta_pb.SerializeToString(&value); + RowsetMeta rowset_meta_3; + rowset_meta_3.init(value); + do_check(rowset_meta_3); +} + +TEST_F(RowsetMetaTest, TestInitWithInvalidData) { + RowsetMeta rowset_meta; + ASSERT_FALSE(rowset_meta.init_from_json("invalid json meta data")); + ASSERT_FALSE(rowset_meta.init("invalid pb meta data")); +} + +void do_check_for_alpha(AlphaRowsetMeta alpha_rowset_meta) { + ASSERT_EQ(540081, alpha_rowset_meta.rowset_id()); + ASSERT_EQ(15673, alpha_rowset_meta.tablet_id()); + ASSERT_EQ(4042, alpha_rowset_meta.txn_id()); + ASSERT_EQ(567997577, alpha_rowset_meta.tablet_schema_hash()); + ASSERT_EQ(ALPHA_ROWSET, alpha_rowset_meta.rowset_type()); + ASSERT_EQ(VISIBLE, alpha_rowset_meta.rowset_state()); + ASSERT_EQ(2, alpha_rowset_meta.start_version()); + ASSERT_EQ(2, alpha_rowset_meta.end_version()); + ASSERT_EQ(8391828013814912580, alpha_rowset_meta.version_hash()); + ASSERT_EQ(3929, alpha_rowset_meta.num_rows()); + ASSERT_EQ(84699, alpha_rowset_meta.total_disk_size()); + ASSERT_EQ(84464, alpha_rowset_meta.data_disk_size()); + ASSERT_EQ(235, alpha_rowset_meta.index_disk_size()); + ASSERT_EQ(false, alpha_rowset_meta.empty()); + ASSERT_EQ(1553765670, alpha_rowset_meta.creation_time()); + std::vector segment_groups; + alpha_rowset_meta.get_segment_groups(&segment_groups); + ASSERT_EQ(2, segment_groups.size()); +} + +TEST_F(RowsetMetaTest, TestAlphaRowsetMeta) { + AlphaRowsetMeta rowset_meta; + rowset_meta.init_from_json(_json_rowset_meta); + do_check_for_alpha(rowset_meta); + RowsetMetaPB rowset_meta_pb; + rowset_meta.to_rowset_pb(&rowset_meta_pb); + AlphaRowsetMeta rowset_meta_2; + rowset_meta_2.init_from_pb(rowset_meta_pb); + do_check_for_alpha(rowset_meta_2); + std::string value = ""; + rowset_meta_pb.SerializeToString(&value); + AlphaRowsetMeta rowset_meta_3; + rowset_meta_3.init(value); + do_check_for_alpha(rowset_meta_3); +} + +TEST_F(RowsetMetaTest, TestAlphaRowsetMetaAdd) { + AlphaRowsetMeta rowset_meta; + rowset_meta.init_from_json(_json_rowset_meta); + do_check_for_alpha(rowset_meta); + SegmentGroupPB new_segment_group; + new_segment_group.set_segment_group_id(88888); + new_segment_group.set_num_segments(3); + new_segment_group.set_empty(true); + new_segment_group.set_index_size(100); + new_segment_group.set_data_size(1000); + new_segment_group.set_num_rows(1000); + rowset_meta.add_segment_group(new_segment_group); + std::vector segment_groups; + rowset_meta.get_segment_groups(&segment_groups); + ASSERT_EQ(3, segment_groups.size()); + std::string meta_pb_string = ""; + ASSERT_TRUE(rowset_meta.serialize(&meta_pb_string)); + AlphaRowsetMeta rowset_meta_2; + ASSERT_TRUE(rowset_meta_2.init(meta_pb_string)); + segment_groups.clear(); + rowset_meta_2.get_segment_groups(&segment_groups); + ASSERT_EQ(3, segment_groups.size()); +} + +TEST_F(RowsetMetaTest, TestAlphaRowsetMetaClear) { + AlphaRowsetMeta rowset_meta; + rowset_meta.init_from_json(_json_rowset_meta); + do_check_for_alpha(rowset_meta); + rowset_meta.clear_segment_group(); + std::vector segment_groups; + rowset_meta.get_segment_groups(&segment_groups); + ASSERT_EQ(0, segment_groups.size()); + std::string meta_pb_string = ""; + ASSERT_TRUE(rowset_meta.serialize(&meta_pb_string)); + AlphaRowsetMeta rowset_meta_2; + ASSERT_TRUE(rowset_meta_2.init(meta_pb_string)); + segment_groups.clear(); + rowset_meta_2.get_segment_groups(&segment_groups); + ASSERT_EQ(0, segment_groups.size()); +} + +} // namespace doris + +int main(int argc, char **argv) { + ::testing::InitGoogleTest(&argc, argv); + return RUN_ALL_TESTS(); +} diff --git a/be/test/olap/run_length_byte_test.cpp b/be/test/olap/run_length_byte_test.cpp index 416037947be576..d1a75beeb715b9 100755 --- a/be/test/olap/run_length_byte_test.cpp +++ b/be/test/olap/run_length_byte_test.cpp @@ -21,9 +21,9 @@ #include "olap/out_stream.h" #include "olap/in_stream.h" #include "olap/file_stream.h" -#include "olap/run_length_byte_writer.h" -#include "olap/run_length_byte_reader.h" -#include "olap/column_reader.h" +#include "olap/rowset/run_length_byte_writer.h" +#include "olap/rowset/run_length_byte_reader.h" +#include "olap/rowset/column_reader.h" #include "olap/stream_index_reader.h" #include "olap/stream_index_writer.h" #include "util/logging.h" diff --git a/be/test/olap/run_length_integer_test.cpp b/be/test/olap/run_length_integer_test.cpp index d6bba95493db1f..9f56cc1fbf1a90 100755 --- a/be/test/olap/run_length_integer_test.cpp +++ b/be/test/olap/run_length_integer_test.cpp @@ -20,8 +20,8 @@ #include "olap/byte_buffer.h" #include "olap/out_stream.h" #include "olap/in_stream.h" -#include "olap/run_length_integer_writer.h" -#include "olap/run_length_integer_reader.h" +#include "olap/rowset/run_length_integer_writer.h" +#include "olap/rowset/run_length_integer_reader.h" #include "olap/stream_index_writer.h" #include "olap/stream_index_reader.h" #include "util/logging.h" diff --git a/be/test/olap/stream_index_test.cpp b/be/test/olap/stream_index_test.cpp index ead34f83c26b70..9eb74a4c84296c 100755 --- a/be/test/olap/stream_index_test.cpp +++ b/be/test/olap/stream_index_test.cpp @@ -20,9 +20,9 @@ #include "olap/olap_cond.h" #include "olap/olap_define.h" -#include "olap/olap_engine.h" -#include "olap/olap_header.h" -#include "olap/olap_table.h" +#include "olap/storage_engine.h" +#include "olap/tablet_meta.h" +#include "olap/tablet.h" #include "olap/olap_common.h" #include "olap/row_cursor.h" #include "olap/wrapper_field.h" diff --git a/be/test/olap/tablet_meta_manager_test.cpp b/be/test/olap/tablet_meta_manager_test.cpp new file mode 100755 index 00000000000000..5a39d73066dee9 --- /dev/null +++ b/be/test/olap/tablet_meta_manager_test.cpp @@ -0,0 +1,113 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +#include +#include +#include + +#include +#include +#include + +#include "olap/tablet_meta_manager.h" +#include "olap/olap_define.h" +#include "util/file_utils.h" + +#ifndef BE_TEST +#define BE_TEST +#endif + +using std::string; + +namespace doris { + +const std::string meta_path = "./be/test/olap/test_data/header.txt"; + +class TabletMetaManagerTest : public testing::Test { +public: + virtual void SetUp() { + std::string root_path = "./store"; + ASSERT_TRUE(boost::filesystem::create_directory(root_path)); + _data_dir = new(std::nothrow) DataDir(root_path); + ASSERT_NE(nullptr, _data_dir); + Status st = _data_dir->init(); + ASSERT_TRUE(st.ok()); + ASSERT_TRUE(boost::filesystem::exists(root_path + "/meta")); + + std::ifstream infile(meta_path); + char buffer[1024]; + while (!infile.eof()) { + infile.getline(buffer, 1024); + _json_header = _json_header + buffer + "\n"; + } + _json_header = _json_header.substr(0, _json_header.size() - 1); + _json_header = _json_header.substr(0, _json_header.size() - 1); + } + + virtual void TearDown() { + delete _data_dir; + ASSERT_TRUE(boost::filesystem::remove_all("./store")); + } + +private: + DataDir* _data_dir; + std::string _json_header; +}; + +TEST_F(TabletMetaManagerTest, TestSaveAndGetAndRemove) { + const TTabletId tablet_id = 15672; + const TSchemaHash schema_hash = 567997577; + TabletMetaPB tablet_meta_pb; + bool ret = json2pb::JsonToProtoMessage(_json_header, &tablet_meta_pb); + ASSERT_TRUE(ret); + + std::string meta_binary; + tablet_meta_pb.SerializeToString(&meta_binary); + TabletMetaSharedPtr tablet_meta(new TabletMeta()); + OLAPStatus s = tablet_meta->deserialize(meta_binary); + ASSERT_EQ(OLAP_SUCCESS, s); + + s = TabletMetaManager::save(_data_dir, tablet_id, schema_hash, tablet_meta); + ASSERT_EQ(OLAP_SUCCESS, s); + std::string json_meta_read; + s = TabletMetaManager::get_json_meta(_data_dir, tablet_id, schema_hash, &json_meta_read); + ASSERT_EQ(OLAP_SUCCESS, s); + ASSERT_EQ(_json_header, json_meta_read); + s = TabletMetaManager::remove(_data_dir, tablet_id, schema_hash); + ASSERT_EQ(OLAP_SUCCESS, s); + TabletMetaSharedPtr meta_read(new TabletMeta()); + s = TabletMetaManager::get_meta(_data_dir, tablet_id, schema_hash, meta_read); + ASSERT_EQ(OLAP_ERR_META_KEY_NOT_FOUND, s); +} + +TEST_F(TabletMetaManagerTest, TestLoad) { + const TTabletId tablet_id = 15672; + const TSchemaHash schema_hash = 567997577; + OLAPStatus s = TabletMetaManager::load_json_meta(_data_dir, meta_path); + ASSERT_EQ(OLAP_SUCCESS, s); + std::string json_meta_read; + s = TabletMetaManager::get_json_meta(_data_dir, tablet_id, schema_hash, &json_meta_read); + ASSERT_EQ(OLAP_SUCCESS, s); + ASSERT_EQ(_json_header, json_meta_read); +} + +} // namespace doris + +int main(int argc, char **argv) { + ::testing::InitGoogleTest(&argc, argv); + return RUN_ALL_TESTS(); +} diff --git a/be/test/olap/tablet_mgr_test.cpp b/be/test/olap/tablet_mgr_test.cpp new file mode 100644 index 00000000000000..498fa25cd9bfce --- /dev/null +++ b/be/test/olap/tablet_mgr_test.cpp @@ -0,0 +1,220 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +#include +#include +#include + +#include "gtest/gtest.h" +#include "gmock/gmock.h" +#include "olap/olap_meta.h" +#include "olap/rowset/rowset_meta_manager.h" +#include "olap/rowset/alpha_rowset.h" +#include "olap/rowset/alpha_rowset_meta.h" +#include "olap/tablet_meta_manager.h" +#include "olap/txn_manager.h" +#include "olap/new_status.h" +#include "boost/filesystem.hpp" +#include "json2pb/json_to_pb.h" + +#ifndef BE_TEST +#define BE_TEST +#endif + +using ::testing::_; +using ::testing::Return; +using ::testing::SetArgPointee; +using std::string; + +namespace doris { + +class TabletMgrTest : public testing::Test { +public: + virtual void SetUp() { + auto cache = new_lru_cache(config::file_descriptor_cache_capacity); + FileHandler::set_fd_cache(cache); + string test_engine_data_path = "./be/test/olap/test_data/converter_test_data/data"; + _engine_data_path = "./be/test/olap/test_data/converter_test_data/tmp"; + boost::filesystem::remove_all(_engine_data_path); + create_dirs(_engine_data_path); + _data_dir = new DataDir(_engine_data_path, 1000000000); + _data_dir->init(); + _meta_path = "./meta"; + string tmp_data_path = _engine_data_path + "/data"; + if (boost::filesystem::exists(tmp_data_path)) { + boost::filesystem::remove_all(tmp_data_path); + } + copy_dir(test_engine_data_path, tmp_data_path); + _tablet_id = 15007; + _schema_hash = 368169781; + _tablet_data_path = tmp_data_path + + "/" + std::to_string(0) + + "/" + std::to_string(_tablet_id) + + "/" + std::to_string(_schema_hash); + if (boost::filesystem::exists(_meta_path)) { + boost::filesystem::remove_all(_meta_path); + } + ASSERT_TRUE(boost::filesystem::create_directory(_meta_path)); + ASSERT_TRUE(boost::filesystem::exists(_meta_path)); + _meta = new(std::nothrow) OlapMeta(_meta_path); + ASSERT_NE(nullptr, _meta); + OLAPStatus st = _meta->init(); + ASSERT_TRUE(st == OLAP_SUCCESS); + } + + virtual void TearDown() { + delete _meta; + delete _data_dir; + if (boost::filesystem::exists(_meta_path)) { + ASSERT_TRUE(boost::filesystem::remove_all(_meta_path)); + } + if (boost::filesystem::exists(_engine_data_path)) { + ASSERT_TRUE(boost::filesystem::remove_all(_engine_data_path)); + } + _tablet_mgr.clear(); + } + +private: + DataDir* _data_dir; + OlapMeta* _meta; + std::string _json_rowset_meta; + TxnManager _txn_mgr; + std::string _engine_data_path; + std::string _meta_path; + int64_t _tablet_id; + int32_t _schema_hash; + string _tablet_data_path; + TabletManager _tablet_mgr; +}; + +TEST_F(TabletMgrTest, CreateTablet) { + TColumnType col_type; + col_type.__set_type(TPrimitiveType::SMALLINT); + TColumn col1; + col1.__set_column_name("col1"); + col1.__set_column_type(col_type); + col1.__set_is_key(true); + std::vector cols; + cols.push_back(col1); + TTabletSchema tablet_schema; + tablet_schema.__set_short_key_column_count(1); + tablet_schema.__set_schema_hash(3333); + tablet_schema.__set_keys_type(TKeysType::AGG_KEYS); + tablet_schema.__set_storage_type(TStorageType::COLUMN); + tablet_schema.__set_columns(cols); + TCreateTabletReq create_tablet_req; + create_tablet_req.__set_tablet_schema(tablet_schema); + create_tablet_req.__set_tablet_id(111); + create_tablet_req.__set_version(2); + create_tablet_req.__set_version_hash(3333); + vector data_dirs; + data_dirs.push_back(_data_dir); + OLAPStatus create_st = _tablet_mgr.create_tablet(create_tablet_req, data_dirs); + ASSERT_TRUE(create_st == OLAP_SUCCESS); + TabletSharedPtr tablet = _tablet_mgr.get_tablet(111, 3333); + ASSERT_TRUE(tablet != nullptr); + // check dir exist + bool dir_exist = check_dir_existed(tablet->tablet_path()); + ASSERT_TRUE(dir_exist); + // check meta has this tablet + TabletMetaSharedPtr new_tablet_meta(new TabletMeta()); + OLAPStatus check_meta_st = TabletMetaManager::get_meta(_data_dir, 111, 3333, new_tablet_meta); + ASSERT_TRUE(check_meta_st == OLAP_SUCCESS); + + // retry create should be successfully + create_st = _tablet_mgr.create_tablet(create_tablet_req, data_dirs); + ASSERT_TRUE(create_st == OLAP_SUCCESS); + + // create tablet with different schema hash should be error + tablet_schema.__set_schema_hash(4444); + create_tablet_req.__set_tablet_schema(tablet_schema); + create_st = _tablet_mgr.create_tablet(create_tablet_req, data_dirs); + ASSERT_TRUE(create_st == OLAP_ERR_CE_TABLET_ID_EXIST); +} + + +TEST_F(TabletMgrTest, DropTablet) { + TColumnType col_type; + col_type.__set_type(TPrimitiveType::SMALLINT); + TColumn col1; + col1.__set_column_name("col1"); + col1.__set_column_type(col_type); + col1.__set_is_key(true); + std::vector cols; + cols.push_back(col1); + TTabletSchema tablet_schema; + tablet_schema.__set_short_key_column_count(1); + tablet_schema.__set_schema_hash(3333); + tablet_schema.__set_keys_type(TKeysType::AGG_KEYS); + tablet_schema.__set_storage_type(TStorageType::COLUMN); + tablet_schema.__set_columns(cols); + TCreateTabletReq create_tablet_req; + create_tablet_req.__set_tablet_schema(tablet_schema); + create_tablet_req.__set_tablet_id(111); + create_tablet_req.__set_version(2); + create_tablet_req.__set_version_hash(3333); + vector data_dirs; + data_dirs.push_back(_data_dir); + OLAPStatus create_st = _tablet_mgr.create_tablet(create_tablet_req, data_dirs); + ASSERT_TRUE(create_st == OLAP_SUCCESS); + TabletSharedPtr tablet = _tablet_mgr.get_tablet(111, 3333); + ASSERT_TRUE(tablet != nullptr); + + // drop unexist tablet will be success + OLAPStatus drop_st = _tablet_mgr.drop_tablet(111, 4444, false); + ASSERT_TRUE(drop_st == OLAP_SUCCESS); + tablet = _tablet_mgr.get_tablet(111, 3333); + ASSERT_TRUE(tablet != nullptr); + + // drop exist tablet will be success + drop_st = _tablet_mgr.drop_tablet(111, 3333, false); + ASSERT_TRUE(drop_st == OLAP_SUCCESS); + tablet = _tablet_mgr.get_tablet(111, 3333); + ASSERT_TRUE(tablet == nullptr); + tablet = _tablet_mgr.get_tablet(111, 3333, true); + ASSERT_TRUE(tablet != nullptr); + + // check dir exist + std::string tablet_path = tablet->tablet_path(); + bool dir_exist = check_dir_existed(tablet_path); + ASSERT_TRUE(dir_exist); + + // do trash sweep, tablet will not be garbage collected + // because tablet ptr referenced it + OLAPStatus trash_st = _tablet_mgr.start_trash_sweep(); + ASSERT_TRUE(trash_st == OLAP_SUCCESS); + tablet = _tablet_mgr.get_tablet(111, 3333, true); + ASSERT_TRUE(tablet != nullptr); + dir_exist = check_dir_existed(tablet_path); + ASSERT_TRUE(dir_exist); + + // reset tablet ptr + tablet.reset(); + trash_st = _tablet_mgr.start_trash_sweep(); + ASSERT_TRUE(trash_st == OLAP_SUCCESS); + tablet = _tablet_mgr.get_tablet(111, 3333, true); + ASSERT_TRUE(tablet == nullptr); + dir_exist = check_dir_existed(tablet_path); + ASSERT_TRUE(!dir_exist); +} + +} // namespace doris + +int main(int argc, char **argv) { + ::testing::InitGoogleTest(&argc, argv); + return RUN_ALL_TESTS(); +} diff --git a/be/test/olap/test_data/converter_test_data/data/0/15007/368169781/15007_2_2_6029593056193292005_0_0.dat b/be/test/olap/test_data/converter_test_data/data/0/15007/368169781/15007_2_2_6029593056193292005_0_0.dat new file mode 100644 index 00000000000000..2d2944c24716c5 Binary files /dev/null and b/be/test/olap/test_data/converter_test_data/data/0/15007/368169781/15007_2_2_6029593056193292005_0_0.dat differ diff --git a/be/test/olap/test_data/converter_test_data/data/0/15007/368169781/15007_2_2_6029593056193292005_0_0.idx b/be/test/olap/test_data/converter_test_data/data/0/15007/368169781/15007_2_2_6029593056193292005_0_0.idx new file mode 100644 index 00000000000000..2bd4791285d20c Binary files /dev/null and b/be/test/olap/test_data/converter_test_data/data/0/15007/368169781/15007_2_2_6029593056193292005_0_0.idx differ diff --git a/be/test/olap/test_data/converter_test_data/data/0/15007/368169781/15007_3_3_7368336314652758588_0_0.dat b/be/test/olap/test_data/converter_test_data/data/0/15007/368169781/15007_3_3_7368336314652758588_0_0.dat new file mode 100644 index 00000000000000..2d2944c24716c5 Binary files /dev/null and b/be/test/olap/test_data/converter_test_data/data/0/15007/368169781/15007_3_3_7368336314652758588_0_0.dat differ diff --git a/be/test/olap/test_data/converter_test_data/data/0/15007/368169781/15007_3_3_7368336314652758588_0_0.idx b/be/test/olap/test_data/converter_test_data/data/0/15007/368169781/15007_3_3_7368336314652758588_0_0.idx new file mode 100644 index 00000000000000..95b9ae10a10714 Binary files /dev/null and b/be/test/olap/test_data/converter_test_data/data/0/15007/368169781/15007_3_3_7368336314652758588_0_0.idx differ diff --git a/be/test/olap/test_data/converter_test_data/data/0/15007/368169781/15007_4_4_9172793704282665912_0_0.dat b/be/test/olap/test_data/converter_test_data/data/0/15007/368169781/15007_4_4_9172793704282665912_0_0.dat new file mode 100644 index 00000000000000..2d2944c24716c5 Binary files /dev/null and b/be/test/olap/test_data/converter_test_data/data/0/15007/368169781/15007_4_4_9172793704282665912_0_0.dat differ diff --git a/be/test/olap/test_data/converter_test_data/data/0/15007/368169781/15007_4_4_9172793704282665912_0_0.idx b/be/test/olap/test_data/converter_test_data/data/0/15007/368169781/15007_4_4_9172793704282665912_0_0.idx new file mode 100644 index 00000000000000..2bd4791285d20c Binary files /dev/null and b/be/test/olap/test_data/converter_test_data/data/0/15007/368169781/15007_4_4_9172793704282665912_0_0.idx differ diff --git a/be/test/olap/test_data/converter_test_data/data/0/15007/368169781/incremental_delta/15007_2_2_6029593056193292005_0_0.dat b/be/test/olap/test_data/converter_test_data/data/0/15007/368169781/incremental_delta/15007_2_2_6029593056193292005_0_0.dat new file mode 100644 index 00000000000000..2d2944c24716c5 Binary files /dev/null and b/be/test/olap/test_data/converter_test_data/data/0/15007/368169781/incremental_delta/15007_2_2_6029593056193292005_0_0.dat differ diff --git a/be/test/olap/test_data/converter_test_data/data/0/15007/368169781/incremental_delta/15007_2_2_6029593056193292005_0_0.idx b/be/test/olap/test_data/converter_test_data/data/0/15007/368169781/incremental_delta/15007_2_2_6029593056193292005_0_0.idx new file mode 100644 index 00000000000000..2bd4791285d20c Binary files /dev/null and b/be/test/olap/test_data/converter_test_data/data/0/15007/368169781/incremental_delta/15007_2_2_6029593056193292005_0_0.idx differ diff --git a/be/test/olap/test_data/converter_test_data/data/0/15007/368169781/incremental_delta/15007_3_3_7368336314652758588_0_0.dat b/be/test/olap/test_data/converter_test_data/data/0/15007/368169781/incremental_delta/15007_3_3_7368336314652758588_0_0.dat new file mode 100644 index 00000000000000..2d2944c24716c5 Binary files /dev/null and b/be/test/olap/test_data/converter_test_data/data/0/15007/368169781/incremental_delta/15007_3_3_7368336314652758588_0_0.dat differ diff --git a/be/test/olap/test_data/converter_test_data/data/0/15007/368169781/incremental_delta/15007_3_3_7368336314652758588_0_0.idx b/be/test/olap/test_data/converter_test_data/data/0/15007/368169781/incremental_delta/15007_3_3_7368336314652758588_0_0.idx new file mode 100644 index 00000000000000..95b9ae10a10714 Binary files /dev/null and b/be/test/olap/test_data/converter_test_data/data/0/15007/368169781/incremental_delta/15007_3_3_7368336314652758588_0_0.idx differ diff --git a/be/test/olap/test_data/converter_test_data/data/0/15007/368169781/incremental_delta/15007_4_4_9172793704282665912_0_0.dat b/be/test/olap/test_data/converter_test_data/data/0/15007/368169781/incremental_delta/15007_4_4_9172793704282665912_0_0.dat new file mode 100644 index 00000000000000..2d2944c24716c5 Binary files /dev/null and b/be/test/olap/test_data/converter_test_data/data/0/15007/368169781/incremental_delta/15007_4_4_9172793704282665912_0_0.dat differ diff --git a/be/test/olap/test_data/converter_test_data/data/0/15007/368169781/incremental_delta/15007_4_4_9172793704282665912_0_0.idx b/be/test/olap/test_data/converter_test_data/data/0/15007/368169781/incremental_delta/15007_4_4_9172793704282665912_0_0.idx new file mode 100644 index 00000000000000..2bd4791285d20c Binary files /dev/null and b/be/test/olap/test_data/converter_test_data/data/0/15007/368169781/incremental_delta/15007_4_4_9172793704282665912_0_0.idx differ diff --git a/be/test/olap/test_data/converter_test_data/data/0/15007/368169781/olap_header.json b/be/test/olap/test_data/converter_test_data/data/0/15007/368169781/olap_header.json new file mode 100644 index 00000000000000..fa6c58f7e339c8 --- /dev/null +++ b/be/test/olap/test_data/converter_test_data/data/0/15007/368169781/olap_header.json @@ -0,0 +1,676 @@ +{ + "num_rows_per_data_block": 1024, + "cumulative_layer_point": 2, + "num_short_key_fields": 5, + "column": [ + { + "name": "k1", + "type": "INT", + "aggregation": "NONE", + "length": 4, + "is_key": true, + "index_length": 4, + "is_allow_null": true, + "unique_id": 0, + "is_root_column": true + }, + { + "name": "k2", + "type": "SMALLINT", + "aggregation": "NONE", + "length": 2, + "is_key": true, + "index_length": 2, + "is_allow_null": true, + "unique_id": 1, + "is_root_column": true + }, + { + "name": "k3", + "type": "TINYINT", + "aggregation": "NONE", + "length": 1, + "is_key": true, + "index_length": 1, + "is_allow_null": true, + "unique_id": 2, + "is_root_column": true + }, + { + "name": "k4", + "type": "BIGINT", + "aggregation": "NONE", + "length": 8, + "is_key": true, + "index_length": 8, + "is_allow_null": true, + "unique_id": 3, + "is_root_column": true + }, + { + "name": "k5", + "type": "DECIMAL", + "aggregation": "NONE", + "length": 12, + "is_key": true, + "index_length": 12, + "precision": 9, + "frac": 3, + "is_allow_null": true, + "unique_id": 4, + "is_root_column": true + }, + { + "name": "k6", + "type": "CHAR", + "aggregation": "NONE", + "length": 5, + "is_key": true, + "index_length": 5, + "is_allow_null": true, + "unique_id": 5, + "is_root_column": true + }, + { + "name": "k10", + "type": "DATE", + "aggregation": "NONE", + "length": 3, + "is_key": true, + "index_length": 3, + "is_allow_null": true, + "unique_id": 6, + "is_root_column": true + }, + { + "name": "k11", + "type": "DATETIME", + "aggregation": "NONE", + "length": 8, + "is_key": true, + "index_length": 8, + "is_allow_null": true, + "unique_id": 7, + "is_root_column": true + }, + { + "name": "k7", + "type": "VARCHAR", + "aggregation": "NONE", + "length": 22, + "is_key": true, + "index_length": 20, + "is_allow_null": true, + "unique_id": 8, + "is_root_column": true + }, + { + "name": "k8", + "type": "DOUBLE", + "aggregation": "MAX", + "length": 8, + "is_key": false, + "index_length": 8, + "is_allow_null": true, + "unique_id": 9, + "is_root_column": true + }, + { + "name": "k9", + "type": "FLOAT", + "aggregation": "SUM", + "length": 4, + "is_key": false, + "index_length": 4, + "is_allow_null": true, + "unique_id": 10, + "is_root_column": true + } + ], + "creation_time": 1553152125, + "selectivity": [ + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1 + ], + "data_file_type": "COLUMN_ORIENTED_FILE", + "next_column_unique_id": 11, + "compress_kind": "COMPRESS_LZ4", + "segment_size": 268435456, + "keys_type": "AGG_KEYS", + "delta": [ + { + "start_version": 0, + "end_version": 1, + "version_hash": 0, + "creation_time": 1553152125, + "segment_group": [ + { + "segment_group_id": 0, + "num_segments": 0, + "index_size": 0, + "data_size": 0, + "num_rows": 0, + "empty": true + } + ] + }, + { + "start_version": 2, + "end_version": 2, + "version_hash": 6029593056193292005, + "creation_time": 1553152255, + "segment_group": [ + { + "segment_group_id": 0, + "num_segments": 1, + "index_size": 229, + "data_size": 4186, + "num_rows": 3315, + "column_pruning": [ + { + "min": "LTEyOA==", + "max": "MTI2", + "null_flag": false + }, + { + "min": "MTk4OQ==", + "max": "MjAxMg==", + "null_flag": false + }, + { + "min": "MA==", + "max": "MA==", + "null_flag": true + }, + { + "min": "MTEwMTE4OTI=", + "max": "MTEwMTE5MDI=", + "null_flag": false + }, + { + "min": "MTIzLjEyMzAwMDAwMA==", + "max": "MTIzLjEyMzAwMDAwMA==", + "null_flag": false + }, + { + "min": "ZmFsc2U=", + "max": "dHJ1ZQA=", + "null_flag": false + }, + { + "min": "MTk4OS0wMy0yMQ==", + "max": "MTk4OS0wMy0yMQ==", + "null_flag": false + }, + { + "min": "MTk4OS0wMy0yMSAxMzowMDowMA==", + "max": "MTk4OS0wMy0yMSAxNDowMDowMA==", + "null_flag": false + }, + { + "min": "d2FuZ2ppbmcwNA==", + "max": "d3VsaW4wNA==", + "null_flag": false + } + ], + "empty": false + } + ] + }, + { + "start_version": 3, + "end_version": 3, + "version_hash": 7368336314652758588, + "creation_time": 1553152260, + "segment_group": [ + { + "segment_group_id": 0, + "num_segments": 1, + "index_size": 229, + "data_size": 4186, + "num_rows": 3315, + "column_pruning": [ + { + "min": "LTEyOA==", + "max": "MTI2", + "null_flag": false + }, + { + "min": "MTk4OQ==", + "max": "MjAxMg==", + "null_flag": false + }, + { + "min": "MA==", + "max": "MA==", + "null_flag": true + }, + { + "min": "MTEwMTE4OTI=", + "max": "MTEwMTE5MDI=", + "null_flag": false + }, + { + "min": "MTIzLjEyMzAwMDAwMA==", + "max": "MTIzLjEyMzAwMDAwMA==", + "null_flag": false + }, + { + "min": "ZmFsc2U=", + "max": "dHJ1ZQA=", + "null_flag": false + }, + { + "min": "MTk4OS0wMy0yMQ==", + "max": "MTk4OS0wMy0yMQ==", + "null_flag": false + }, + { + "min": "MTk4OS0wMy0yMSAxMzowMDowMA==", + "max": "MTk4OS0wMy0yMSAxNDowMDowMA==", + "null_flag": false + }, + { + "min": "d2FuZ2ppbmcwNA==", + "max": "d3VsaW4wNA==", + "null_flag": false + } + ], + "empty": false + } + ] + }, + { + "start_version": 4, + "end_version": 4, + "version_hash": 9172793704282665912, + "creation_time": 1553152268, + "segment_group": [ + { + "segment_group_id": 0, + "num_segments": 1, + "index_size": 229, + "data_size": 4186, + "num_rows": 3315, + "column_pruning": [ + { + "min": "LTEyOA==", + "max": "MTI2", + "null_flag": false + }, + { + "min": "MTk4OQ==", + "max": "MjAxMg==", + "null_flag": false + }, + { + "min": "MA==", + "max": "MA==", + "null_flag": true + }, + { + "min": "MTEwMTE4OTI=", + "max": "MTEwMTE5MDI=", + "null_flag": false + }, + { + "min": "MTIzLjEyMzAwMDAwMA==", + "max": "MTIzLjEyMzAwMDAwMA==", + "null_flag": false + }, + { + "min": "ZmFsc2U=", + "max": "dHJ1ZQA=", + "null_flag": false + }, + { + "min": "MTk4OS0wMy0yMQ==", + "max": "MTk4OS0wMy0yMQ==", + "null_flag": false + }, + { + "min": "MTk4OS0wMy0yMSAxMzowMDowMA==", + "max": "MTk4OS0wMy0yMSAxNDowMDowMA==", + "null_flag": false + }, + { + "min": "d2FuZ2ppbmcwNA==", + "max": "d3VsaW4wNA==", + "null_flag": false + } + ], + "empty": false + } + ] + } + ], + "pending_delta": [ + { + "partition_id": 15005, + "transaction_id": 10007, + "creation_time": 1553152325, + "pending_segment_group": [ + { + "pending_segment_group_id": 0, + "num_segments": 1, + "load_id": { + "hi": -6248051641982818523, + "lo": -4026637950854708082 + }, + "column_pruning": [ + { + "min": "LTEyOA==", + "max": "MTI2", + "null_flag": false + }, + { + "min": "MTk4OQ==", + "max": "MjAxMg==", + "null_flag": false + }, + { + "min": "MA==", + "max": "MA==", + "null_flag": true + }, + { + "min": "MTEwMTE4OTI=", + "max": "MTEwMTE5MDI=", + "null_flag": false + }, + { + "min": "MTIzLjEyMzAwMDAwMA==", + "max": "MTIzLjEyMzAwMDAwMA==", + "null_flag": false + }, + { + "min": "ZmFsc2U=", + "max": "dHJ1ZQA=", + "null_flag": false + }, + { + "min": "MTk4OS0wMy0yMQ==", + "max": "MTk4OS0wMy0yMQ==", + "null_flag": false + }, + { + "min": "MTk4OS0wMy0yMSAxMzowMDowMA==", + "max": "MTk4OS0wMy0yMSAxNDowMDowMA==", + "null_flag": false + }, + { + "min": "d2FuZ2ppbmcwNA==", + "max": "d3VsaW4wNA==", + "null_flag": false + } + ], + "empty": false + } + ] + }, + { + "partition_id": 15005, + "transaction_id": 10008, + "creation_time": 1553152332, + "pending_segment_group": [ + { + "pending_segment_group_id": 0, + "num_segments": 1, + "load_id": { + "hi": 8955644356935812351, + "lo": 5235253922991912895 + }, + "column_pruning": [ + { + "min": "LTEyOA==", + "max": "MTI2", + "null_flag": false + }, + { + "min": "MTk4OQ==", + "max": "MjAxMg==", + "null_flag": false + }, + { + "min": "MA==", + "max": "MA==", + "null_flag": true + }, + { + "min": "MTEwMTE4OTI=", + "max": "MTEwMTE5MDI=", + "null_flag": false + }, + { + "min": "MTIzLjEyMzAwMDAwMA==", + "max": "MTIzLjEyMzAwMDAwMA==", + "null_flag": false + }, + { + "min": "ZmFsc2U=", + "max": "dHJ1ZQA=", + "null_flag": false + }, + { + "min": "MTk4OS0wMy0yMQ==", + "max": "MTk4OS0wMy0yMQ==", + "null_flag": false + }, + { + "min": "MTk4OS0wMy0yMSAxMzowMDowMA==", + "max": "MTk4OS0wMy0yMSAxNDowMDowMA==", + "null_flag": false + }, + { + "min": "d2FuZ2ppbmcwNA==", + "max": "d3VsaW4wNA==", + "null_flag": false + } + ], + "empty": false + } + ] + } + ], + "incremental_delta": [ + { + "start_version": 2, + "end_version": 2, + "version_hash": 6029593056193292005, + "creation_time": 1553152255, + "segment_group": [ + { + "segment_group_id": 0, + "num_segments": 1, + "index_size": 229, + "data_size": 4186, + "num_rows": 3315, + "column_pruning": [ + { + "min": "LTEyOA==", + "max": "MTI2", + "null_flag": false + }, + { + "min": "MTk4OQ==", + "max": "MjAxMg==", + "null_flag": false + }, + { + "min": "MA==", + "max": "MA==", + "null_flag": true + }, + { + "min": "MTEwMTE4OTI=", + "max": "MTEwMTE5MDI=", + "null_flag": false + }, + { + "min": "MTIzLjEyMzAwMDAwMA==", + "max": "MTIzLjEyMzAwMDAwMA==", + "null_flag": false + }, + { + "min": "ZmFsc2U=", + "max": "dHJ1ZQA=", + "null_flag": false + }, + { + "min": "MTk4OS0wMy0yMQ==", + "max": "MTk4OS0wMy0yMQ==", + "null_flag": false + }, + { + "min": "MTk4OS0wMy0yMSAxMzowMDowMA==", + "max": "MTk4OS0wMy0yMSAxNDowMDowMA==", + "null_flag": false + }, + { + "min": "d2FuZ2ppbmcwNA==", + "max": "d3VsaW4wNA==", + "null_flag": false + } + ], + "empty": false + } + ] + }, + { + "start_version": 3, + "end_version": 3, + "version_hash": 7368336314652758588, + "creation_time": 1553152260, + "segment_group": [ + { + "segment_group_id": 0, + "num_segments": 1, + "index_size": 229, + "data_size": 4186, + "num_rows": 3315, + "column_pruning": [ + { + "min": "LTEyOA==", + "max": "MTI2", + "null_flag": false + }, + { + "min": "MTk4OQ==", + "max": "MjAxMg==", + "null_flag": false + }, + { + "min": "MA==", + "max": "MA==", + "null_flag": true + }, + { + "min": "MTEwMTE4OTI=", + "max": "MTEwMTE5MDI=", + "null_flag": false + }, + { + "min": "MTIzLjEyMzAwMDAwMA==", + "max": "MTIzLjEyMzAwMDAwMA==", + "null_flag": false + }, + { + "min": "ZmFsc2U=", + "max": "dHJ1ZQA=", + "null_flag": false + }, + { + "min": "MTk4OS0wMy0yMQ==", + "max": "MTk4OS0wMy0yMQ==", + "null_flag": false + }, + { + "min": "MTk4OS0wMy0yMSAxMzowMDowMA==", + "max": "MTk4OS0wMy0yMSAxNDowMDowMA==", + "null_flag": false + }, + { + "min": "d2FuZ2ppbmcwNA==", + "max": "d3VsaW4wNA==", + "null_flag": false + } + ], + "empty": false + } + ] + }, + { + "start_version": 4, + "end_version": 4, + "version_hash": 9172793704282665912, + "creation_time": 1553152268, + "segment_group": [ + { + "segment_group_id": 0, + "num_segments": 1, + "index_size": 229, + "data_size": 4186, + "num_rows": 3315, + "column_pruning": [ + { + "min": "LTEyOA==", + "max": "MTI2", + "null_flag": false + }, + { + "min": "MTk4OQ==", + "max": "MjAxMg==", + "null_flag": false + }, + { + "min": "MA==", + "max": "MA==", + "null_flag": true + }, + { + "min": "MTEwMTE4OTI=", + "max": "MTEwMTE5MDI=", + "null_flag": false + }, + { + "min": "MTIzLjEyMzAwMDAwMA==", + "max": "MTIzLjEyMzAwMDAwMA==", + "null_flag": false + }, + { + "min": "ZmFsc2U=", + "max": "dHJ1ZQA=", + "null_flag": false + }, + { + "min": "MTk4OS0wMy0yMQ==", + "max": "MTk4OS0wMy0yMQ==", + "null_flag": false + }, + { + "min": "MTk4OS0wMy0yMSAxMzowMDowMA==", + "max": "MTk4OS0wMy0yMSAxNDowMDowMA==", + "null_flag": false + }, + { + "min": "d2FuZ2ppbmcwNA==", + "max": "d3VsaW4wNA==", + "null_flag": false + } + ], + "empty": false + } + ] + } + ], + "tablet_id": 15007, + "schema_hash": 368169781, + "shard": 0 +} \ No newline at end of file diff --git a/be/test/olap/test_data/converter_test_data/data/0/15007/368169781/pending_delta/10007_0_0.dat b/be/test/olap/test_data/converter_test_data/data/0/15007/368169781/pending_delta/10007_0_0.dat new file mode 100644 index 00000000000000..2d2944c24716c5 Binary files /dev/null and b/be/test/olap/test_data/converter_test_data/data/0/15007/368169781/pending_delta/10007_0_0.dat differ diff --git a/be/test/olap/test_data/converter_test_data/data/0/15007/368169781/pending_delta/10007_0_0.idx b/be/test/olap/test_data/converter_test_data/data/0/15007/368169781/pending_delta/10007_0_0.idx new file mode 100644 index 00000000000000..95b9ae10a10714 Binary files /dev/null and b/be/test/olap/test_data/converter_test_data/data/0/15007/368169781/pending_delta/10007_0_0.idx differ diff --git a/be/test/olap/test_data/converter_test_data/data/0/15007/368169781/pending_delta/10008_0_0.dat b/be/test/olap/test_data/converter_test_data/data/0/15007/368169781/pending_delta/10008_0_0.dat new file mode 100644 index 00000000000000..2d2944c24716c5 Binary files /dev/null and b/be/test/olap/test_data/converter_test_data/data/0/15007/368169781/pending_delta/10008_0_0.dat differ diff --git a/be/test/olap/test_data/converter_test_data/data/0/15007/368169781/pending_delta/10008_0_0.idx b/be/test/olap/test_data/converter_test_data/data/0/15007/368169781/pending_delta/10008_0_0.idx new file mode 100644 index 00000000000000..402f05835709af Binary files /dev/null and b/be/test/olap/test_data/converter_test_data/data/0/15007/368169781/pending_delta/10008_0_0.idx differ diff --git a/be/test/olap/test_data/header.txt b/be/test/olap/test_data/header.txt index a657c2c40b12b2..f596d7f2793259 100644 --- a/be/test/olap/test_data/header.txt +++ b/be/test/olap/test_data/header.txt @@ -1,152 +1,188 @@ { - "num_rows_per_data_block": 1024, - "file_version": [ + "table_id": 15670, + "partition_id": 15671, + "tablet_id": 15672, + "schema_hash": 567997577, + "shard_id": 34, + "creation_time": 1553765664, + "cumulative_layer_point": 2, + "tablet_state": "PB_NOTREADY", + "schema": { + "keys_type": "AGG_KEYS", + "column": [ + { + "unique_id": 0, + "name": "k1", + "type": "BIGINT", + "is_key": true, + "aggregation": "NONE", + "is_nullable": false, + "length": 8, + "index_length": 8 + }, + { + "unique_id": 1, + "name": "v1", + "type": "HLL", + "is_key": false, + "aggregation": "HLL_UNION", + "is_nullable": false, + "default_value": "MA==", + "length": 16387, + "index_length": 16 + }, + { + "unique_id": 2, + "name": "v2", + "type": "INT", + "is_key": false, + "aggregation": "SUM", + "is_nullable": false, + "length": 4, + "index_length": 4 + } + ], + "num_short_key_columns": 1, + "num_rows_per_row_block": 1024, + "compress_kind": "COMPRESS_LZ4", + "next_column_unique_id": 3 + }, + "rs_metas": [ { - "num_segments": 1, + "rowset_id": 540072, + "tablet_id": 15673, + "tablet_schema_hash": 567997577, + "rowset_type": "ALPHA_ROWSET", + "rowset_state": "VISIBLE", "start_version": 0, "end_version": 1, "version_hash": 0, - "max_timestamp": 0, - "index_size": 67, - "data_size": 477, "num_rows": 0, - "creation_time": 1534750461, - "delta_pruning": { - "column_pruning": [ - { - "min": "OTk5OS0xMi0zMQ==", - "max": "MC0wMC0wMA==", - "null_flag": false - }, - { - "min": "MjE0NzQ4MzY0Nw==", - "max": "MA==", - "null_flag": false - }, + "total_disk_size": 0, + "data_disk_size": 0, + "index_disk_size": 0, + "empty": true, + "creation_time": 1553765664, + "alpha_rowset_extra_meta_pb": { + "segment_groups": [ { - "min": "MzI3Njc=", - "max": "MA==", - "null_flag": false - }, - { - "min": "/w==", - "max": "AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA=", - "null_flag": false + "segment_group_id": 0, + "num_segments": 0, + "index_size": 0, + "data_size": 0, + "num_rows": 0, + "empty": true } ] } }, { - "num_segments": 1, + "rowset_id": 540081, + "tablet_id": 15673, + "txn_id": 4042, + "tablet_schema_hash": 567997577, + "rowset_type": "ALPHA_ROWSET", + "rowset_state": "VISIBLE", "start_version": 2, "end_version": 2, - "version_hash": 0, - "max_timestamp": 0, - "index_size": 67, - "data_size": 477, - "num_rows": 0, - "creation_time": 1534750461, - "delta_pruning": { - "column_pruning": [ + "version_hash": 8391828013814912580, + "num_rows": 3929, + "total_disk_size": 84699, + "data_disk_size": 84464, + "index_disk_size": 235, + "empty": false, + "load_id": { + "hi": -5350970832824939812, + "lo": -6717994719194512122 + }, + "creation_time": 1553765670, + "alpha_rowset_extra_meta_pb": { + "segment_groups": [ { - "min": "OTk5OS0xMi0zMQ==", - "max": "MC0wMC0wMA==", - "null_flag": false - }, - { - "min": "MjE0NzQ4MzY0Nw==", - "max": "MA==", - "null_flag": false - }, - { - "min": "MzI3Njc=", - "max": "MA==", - "null_flag": false - }, - { - "min": "/w==", - "max": "AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA=", - "null_flag": false + "segment_group_id": 0, + "num_segments": 1, + "index_size": 132, + "data_size": 576, + "num_rows": 5, + "zone_maps": [ + { + "min": "MQ==", + "max": "NQ==", + "null_flag": false + }, + { + "min": "MQ==", + "max": "Mw==", + "null_flag": false + }, + { + "min": "J2J1c2gn", + "max": "J3RvbSc=", + "null_flag": false + } + ], + "empty": false } ] } } ], - "cumulative_layer_point": 2, - "num_short_key_fields": 4, - "column": [ - { - "name": "event_day", - "type": "DATE", - "aggregation": "NONE", - "length": 3, - "is_key": true, - "index_length": 3, - "is_allow_null": true, - "unique_id": 0, - "is_root_column": true - }, + "inc_rs_metas": [ { - "name": "siteid", - "type": "INT", - "aggregation": "NONE", - "length": 4, - "is_key": true, - "default_value": "10", - "index_length": 4, - "is_allow_null": true, - "unique_id": 1, - "is_root_column": true - }, - { - "name": "citycode", - "type": "SMALLINT", - "aggregation": "NONE", - "length": 2, - "is_key": true, - "index_length": 2, - "is_allow_null": true, - "unique_id": 2, - "is_root_column": true - }, - { - "name": "username", - "type": "VARCHAR", - "aggregation": "NONE", - "length": 34, - "is_key": true, - "default_value": "", - "index_length": 20, - "is_allow_null": true, - "unique_id": 3, - "is_root_column": true - }, - { - "name": "pv", - "type": "BIGINT", - "aggregation": "SUM", - "length": 8, - "is_key": false, - "default_value": "0", - "index_length": 8, - "is_allow_null": true, - "unique_id": 4, - "is_root_column": true + "rowset_id": 540081, + "tablet_id": 15673, + "txn_id": 4042, + "tablet_schema_hash": 567997577, + "rowset_type": "ALPHA_ROWSET", + "rowset_state": "VISIBLE", + "start_version": 2, + "end_version": 2, + "version_hash": 8391828013814912580, + "num_rows": 3929, + "total_disk_size": 84699, + "data_disk_size": 84464, + "index_disk_size": 235, + "empty": false, + "load_id": { + "hi": -5350970832824939812, + "lo": -6717994719194512122 + }, + "creation_time": 1553765670, + "alpha_rowset_extra_meta_pb": { + "segment_groups": [ + { + "segment_group_id": 0, + "num_segments": 1, + "index_size": 132, + "data_size": 576, + "num_rows": 5, + "zone_maps": [ + { + "min": "MQ==", + "max": "NQ==", + "null_flag": false + }, + { + "min": "MQ==", + "max": "Mw==", + "null_flag": false + }, + { + "min": "J2J1c2gn", + "max": "J3RvbSc=", + "null_flag": false + } + ], + "empty": false + } + ] + } } ], - "creation_time": 1534750461, - "selectivity": [ - 1, - 1, - 1, - 1 - ], - "data_file_type": "COLUMN_ORIENTED_FILE", - "next_column_unique_id": 5, - "compress_kind": "COMPRESS_LZ4", - "segment_size": 268435456, - "keys_type": "AGG_KEYS", - "tablet_id": 20487, - "schema_hash": 1520686811, - "shard": 0 + "in_restore_mode": false, + "tablet_uid": { + "hi": 10, + "lo": 10 + }, + "end_rowset_id": 10000 } diff --git a/be/test/olap/test_data/rowset.json b/be/test/olap/test_data/rowset.json new file mode 100644 index 00000000000000..d45ac1fb66f7f6 --- /dev/null +++ b/be/test/olap/test_data/rowset.json @@ -0,0 +1,75 @@ +{ + "rowset_id": 540081, + "tablet_id": 15673, + "txn_id": 4042, + "tablet_schema_hash": 567997577, + "rowset_type": "ALPHA_ROWSET", + "rowset_state": "VISIBLE", + "start_version": 2, + "end_version": 2, + "version_hash": 8391828013814912580, + "num_rows": 3929, + "total_disk_size": 84699, + "data_disk_size": 84464, + "index_disk_size": 235, + "empty": false, + "load_id": { + "hi": -5350970832824939812, + "lo": -6717994719194512122 + }, + "creation_time": 1553765670, + "alpha_rowset_extra_meta_pb": { + "segment_groups": [ + { + "segment_group_id": 0, + "num_segments": 1, + "index_size": 132, + "data_size": 576, + "num_rows": 5, + "zone_maps": [ + { + "min": "MQ==", + "max": "NQ==", + "null_flag": false + }, + { + "min": "MQ==", + "max": "Mw==", + "null_flag": false + }, + { + "min": "J2J1c2gn", + "max": "J3RvbSc=", + "null_flag": false + } + ], + "empty": false + }, + { + "segment_group_id": 1, + "num_segments": 1, + "index_size": 132, + "data_size": 576, + "num_rows": 5, + "zone_maps": [ + { + "min": "MQ==", + "max": "NQ==", + "null_flag": false + }, + { + "min": "MQ==", + "max": "Mw==", + "null_flag": false + }, + { + "min": "J2J1c2gn", + "max": "J3RvbSc=", + "null_flag": false + } + ], + "empty": false + } + ] + } +} diff --git a/be/test/olap/test_data/rowset_meta.json b/be/test/olap/test_data/rowset_meta.json new file mode 100644 index 00000000000000..91c8c73eccaede --- /dev/null +++ b/be/test/olap/test_data/rowset_meta.json @@ -0,0 +1,49 @@ +{ + "rowset_id": 10000, + "tablet_id": 12046, + "tablet_schema_hash": 365187263, + "rowset_type": "ALPHA_ROWSET", + "rowset_state": "VISIBLE", + "start_version": 0, + "end_version": 1, + "version_hash": 0, + "num_rows": 0, + "total_disk_size": 0, + "data_disk_size": 0, + "index_disk_size": 0, + "empty": true, + "creation_time": 1552911435, + "tablet_uid": { + "hi": 10, + "lo": 10 + }, + "alpha_rowset_extra_meta_pb": { + "segment_groups": [ + { + "segment_group_id": 0, + "num_segments": 1, + "index_size": 132, + "data_size": 576, + "num_rows": 5, + "zone_maps": [ + { + "min": "MQ==", + "max": "NQ==", + "null_flag": false + }, + { + "min": "MQ==", + "max": "Mw==", + "null_flag": false + }, + { + "min": "J2J1c2gn", + "max": "J3RvbSc=", + "null_flag": false + } + ], + "empty": false + } + ] + } +} diff --git a/be/test/olap/test_data/rowset_meta2.json b/be/test/olap/test_data/rowset_meta2.json new file mode 100644 index 00000000000000..15af654e153e31 --- /dev/null +++ b/be/test/olap/test_data/rowset_meta2.json @@ -0,0 +1,12 @@ +{ + "rowset_id": 10001, + "tablet_id": 20487, + "tablet_schema_hash": 1520686811, + "rowset_type": "ALPHA_ROWSET", + "rowset_state": "VISIBLE", + "start_version": 2, + "end_version": 3, + "row_number": 123456, + "total_disk_size": 100000, + "data_disk_size": 95000 +} diff --git a/be/test/olap/txn_manager_test.cpp b/be/test/olap/txn_manager_test.cpp new file mode 100644 index 00000000000000..9996f63cb12b38 --- /dev/null +++ b/be/test/olap/txn_manager_test.cpp @@ -0,0 +1,259 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +#include +#include +#include + +#include "gtest/gtest.h" +#include "gmock/gmock.h" +#include "olap/olap_meta.h" +#include "olap/rowset/rowset_meta_manager.h" +#include "olap/rowset/alpha_rowset.h" +#include "olap/rowset/alpha_rowset_meta.h" +#include "olap/txn_manager.h" +#include "olap/new_status.h" +#include "boost/filesystem.hpp" +#include "json2pb/json_to_pb.h" + +#ifndef BE_TEST +#define BE_TEST +#endif + +using ::testing::_; +using ::testing::Return; +using ::testing::SetArgPointee; +using std::string; + +namespace doris { + +const std::string rowset_meta_path = "./be/test/olap/test_data/rowset_meta.json"; +const std::string rowset_meta_path_2 = "./be/test/olap/test_data/rowset_meta2.json"; + +class TxnManagerTest : public testing::Test { +public: + virtual void SetUp() { + std::string meta_path = "./meta"; + boost::filesystem::remove_all("./meta"); + ASSERT_TRUE(boost::filesystem::create_directory(meta_path)); + _meta = new(std::nothrow) OlapMeta(meta_path); + ASSERT_NE(nullptr, _meta); + OLAPStatus st = _meta->init(); + ASSERT_TRUE(st == OLAP_SUCCESS); + ASSERT_TRUE(boost::filesystem::exists("./meta")); + load_id.set_hi(0); + load_id.set_lo(0); + // init rowset meta 1 + std::ifstream infile(rowset_meta_path); + char buffer[1024]; + while (!infile.eof()) { + infile.getline(buffer, 1024); + _json_rowset_meta = _json_rowset_meta + buffer + "\n"; + } + _json_rowset_meta = _json_rowset_meta.substr(0, _json_rowset_meta.size() - 1); + + uint64_t rowset_id = 10000; + RowsetMetaSharedPtr rowset_meta(new AlphaRowsetMeta()); + rowset_meta->init_from_json(_json_rowset_meta); + ASSERT_EQ(rowset_meta->rowset_id(), rowset_id); + _alpha_rowset.reset(new AlphaRowset(nullptr, rowset_meta_path, nullptr, rowset_meta)); + _alpha_rowset_same_id.reset(new AlphaRowset(nullptr, rowset_meta_path, nullptr, rowset_meta)); + + // init rowset meta 2 + _json_rowset_meta = ""; + std::ifstream infile2(rowset_meta_path_2); + char buffer2[1024]; + while (!infile2.eof()) { + infile2.getline(buffer2, 1024); + _json_rowset_meta = _json_rowset_meta + buffer2 + "\n"; + std::cout << _json_rowset_meta << std::endl; + } + _json_rowset_meta = _json_rowset_meta.substr(0, _json_rowset_meta.size() - 1); + rowset_id = 10001; + RowsetMetaSharedPtr rowset_meta2(new AlphaRowsetMeta()); + rowset_meta2->init_from_json(_json_rowset_meta); + ASSERT_EQ(rowset_meta2->rowset_id(), rowset_id); + _alpha_rowset_diff_id.reset(new AlphaRowset(nullptr, rowset_meta_path_2, nullptr, rowset_meta2)); + _tablet_uid = TabletUid(10, 10); + } + + virtual void TearDown() { + delete _meta; + ASSERT_TRUE(boost::filesystem::remove_all("./meta")); + } + +private: + OlapMeta* _meta; + std::string _json_rowset_meta; + TxnManager _txn_mgr; + TPartitionId partition_id = 1123; + TTransactionId transaction_id = 111; + TTabletId tablet_id = 222; + SchemaHash schema_hash = 333; + TabletUid _tablet_uid; + PUniqueId load_id; + RowsetSharedPtr _alpha_rowset; + RowsetSharedPtr _alpha_rowset_same_id; + RowsetSharedPtr _alpha_rowset_diff_id; +}; + +TEST_F(TxnManagerTest, PrepareNewTxn) { + OLAPStatus status = _txn_mgr.prepare_txn(partition_id, transaction_id, + tablet_id, schema_hash, _tablet_uid, load_id); + ASSERT_TRUE(status == OLAP_SUCCESS); +} + +// 1. prepare txn +// 2. commit txn +// 3. should be success +TEST_F(TxnManagerTest, CommitTxnWithPrepare) { + OLAPStatus status = _txn_mgr.prepare_txn(partition_id, transaction_id, + tablet_id, schema_hash, _tablet_uid, load_id); + _txn_mgr.commit_txn(_meta, partition_id, transaction_id, + tablet_id, schema_hash, _tablet_uid, load_id, _alpha_rowset, false); + ASSERT_TRUE(status == OLAP_SUCCESS); + RowsetMetaSharedPtr rowset_meta(new AlphaRowsetMeta()); + status = RowsetMetaManager::get_rowset_meta(_meta, _tablet_uid, _alpha_rowset->rowset_id(), rowset_meta); + ASSERT_TRUE(status == OLAP_SUCCESS); + ASSERT_TRUE(rowset_meta->rowset_id() == _alpha_rowset->rowset_id()); +} + +// 1. commit without prepare +// 2. should success +TEST_F(TxnManagerTest, CommitTxnWithNoPrepare) { + OLAPStatus status = _txn_mgr.commit_txn(_meta, partition_id, transaction_id, + tablet_id, schema_hash, _tablet_uid, load_id, _alpha_rowset, false); + ASSERT_TRUE(status == OLAP_SUCCESS); +} + +// 1. commit twice with different rowset id +// 2. should failed +TEST_F(TxnManagerTest, CommitTxnTwiceWithDiffRowsetId) { + OLAPStatus status = _txn_mgr.commit_txn(_meta, partition_id, transaction_id, + tablet_id, schema_hash, _tablet_uid, load_id, _alpha_rowset, false); + ASSERT_TRUE(status == OLAP_SUCCESS); + status = _txn_mgr.commit_txn(_meta, partition_id, transaction_id, + tablet_id, schema_hash, _tablet_uid, load_id, _alpha_rowset_diff_id, false); + ASSERT_TRUE(status != OLAP_SUCCESS); +} + +// 1. commit twice with same rowset id +// 2. should success +TEST_F(TxnManagerTest, CommitTxnTwiceWithSameRowsetId) { + OLAPStatus status = _txn_mgr.commit_txn(_meta, partition_id, transaction_id, + tablet_id, schema_hash, _tablet_uid, load_id, _alpha_rowset, false); + ASSERT_TRUE(status == OLAP_SUCCESS); + status = _txn_mgr.commit_txn(_meta, partition_id, transaction_id, + tablet_id, schema_hash, _tablet_uid, load_id, _alpha_rowset_same_id, false); + ASSERT_TRUE(status == OLAP_SUCCESS); +} + +// 1. prepare twice should be success +TEST_F(TxnManagerTest, PrepareNewTxnTwice) { + OLAPStatus status = _txn_mgr.prepare_txn(partition_id, transaction_id, + tablet_id, schema_hash, _tablet_uid, load_id); + ASSERT_TRUE(status == OLAP_SUCCESS); + status = _txn_mgr.prepare_txn(partition_id, transaction_id, + tablet_id, schema_hash, _tablet_uid, load_id); + ASSERT_TRUE(status == OLAP_SUCCESS); +} + +// 1. txn could be rollbacked if it is not committed +TEST_F(TxnManagerTest, RollbackNotCommittedTxn) { + OLAPStatus status = _txn_mgr.prepare_txn(partition_id, transaction_id, + tablet_id, schema_hash, _tablet_uid, load_id); + ASSERT_TRUE(status == OLAP_SUCCESS); + status = _txn_mgr.rollback_txn(partition_id, transaction_id, + tablet_id, schema_hash, _tablet_uid); + ASSERT_TRUE(status == OLAP_SUCCESS); + RowsetMetaSharedPtr rowset_meta(new AlphaRowsetMeta()); + status = RowsetMetaManager::get_rowset_meta(_meta, _tablet_uid, _alpha_rowset->rowset_id(), rowset_meta); + ASSERT_TRUE(status != OLAP_SUCCESS); +} + +// 1. txn could not be rollbacked if it is committed +TEST_F(TxnManagerTest, RollbackCommittedTxn) { + OLAPStatus status = _txn_mgr.commit_txn(_meta, partition_id, transaction_id, + tablet_id, schema_hash, _tablet_uid, load_id, _alpha_rowset, false); + ASSERT_TRUE(status == OLAP_SUCCESS); + status = _txn_mgr.rollback_txn(partition_id, transaction_id, + tablet_id, schema_hash, _tablet_uid); + ASSERT_FALSE(status == OLAP_SUCCESS); + RowsetMetaSharedPtr rowset_meta(new AlphaRowsetMeta()); + status = RowsetMetaManager::get_rowset_meta(_meta, _tablet_uid, _alpha_rowset->rowset_id(), rowset_meta); + ASSERT_TRUE(status == OLAP_SUCCESS); + ASSERT_TRUE(rowset_meta->rowset_id() == _alpha_rowset->rowset_id()); +} + +// 1. publish version success +TEST_F(TxnManagerTest, PublishVersionSuccessful) { + OLAPStatus status = _txn_mgr.commit_txn(_meta, partition_id, transaction_id, + tablet_id, schema_hash, _tablet_uid, load_id, _alpha_rowset, false); + ASSERT_TRUE(status == OLAP_SUCCESS); + Version new_version(10,11); + VersionHash new_versionhash = 123; + status = _txn_mgr.publish_txn(_meta, partition_id, transaction_id, + tablet_id, schema_hash, _tablet_uid, new_version, new_versionhash); + ASSERT_TRUE(status == OLAP_SUCCESS); + + RowsetMetaSharedPtr rowset_meta(new AlphaRowsetMeta()); + status = RowsetMetaManager::get_rowset_meta(_meta, _tablet_uid, _alpha_rowset->rowset_id(), rowset_meta); + ASSERT_TRUE(status == OLAP_SUCCESS); + ASSERT_TRUE(rowset_meta->rowset_id() == _alpha_rowset->rowset_id()); + ASSERT_TRUE(rowset_meta->start_version() == 10); + ASSERT_TRUE(rowset_meta->end_version() == 11); +} + +// 1. publish version failed if not found related txn and rowset +TEST_F(TxnManagerTest, PublishNotExistedTxn) { + Version new_version(10,11); + VersionHash new_versionhash = 123; + OLAPStatus status = _txn_mgr.publish_txn(_meta, partition_id, transaction_id, + tablet_id, schema_hash, _tablet_uid, new_version, new_versionhash); + ASSERT_TRUE(status != OLAP_SUCCESS); +} + +TEST_F(TxnManagerTest, DeletePreparedTxn) { + OLAPStatus status = _txn_mgr.prepare_txn(partition_id, transaction_id, + tablet_id, schema_hash, _tablet_uid, load_id); + ASSERT_TRUE(status == OLAP_SUCCESS); + status = _txn_mgr.delete_txn(_meta, partition_id, transaction_id, + tablet_id, schema_hash, _tablet_uid); + ASSERT_TRUE(status == OLAP_SUCCESS); +} + +TEST_F(TxnManagerTest, DeleteCommittedTxn) { + OLAPStatus status = _txn_mgr.commit_txn(_meta, partition_id, transaction_id, + tablet_id, schema_hash, _tablet_uid, load_id, _alpha_rowset, false); + ASSERT_TRUE(status == OLAP_SUCCESS); + RowsetMetaSharedPtr rowset_meta(new AlphaRowsetMeta()); + status = RowsetMetaManager::get_rowset_meta(_meta, _tablet_uid, _alpha_rowset->rowset_id(), rowset_meta); + ASSERT_TRUE(status == OLAP_SUCCESS); + status = _txn_mgr.delete_txn(_meta, partition_id, transaction_id, + tablet_id, schema_hash, _tablet_uid); + ASSERT_TRUE(status == OLAP_SUCCESS); + RowsetMetaSharedPtr rowset_meta2(new AlphaRowsetMeta()); + status = RowsetMetaManager::get_rowset_meta(_meta, _tablet_uid, _alpha_rowset->rowset_id(), rowset_meta2); + ASSERT_TRUE(status != OLAP_SUCCESS); +} + +} // namespace doris + +int main(int argc, char **argv) { + ::testing::InitGoogleTest(&argc, argv); + return RUN_ALL_TESTS(); +} diff --git a/be/test/olap/vectorized_olap_reader_test.cpp b/be/test/olap/vectorized_olap_reader_test.cpp index 99816e6bd509fd..545367caabd6e4 100644 --- a/be/test/olap/vectorized_olap_reader_test.cpp +++ b/be/test/olap/vectorized_olap_reader_test.cpp @@ -118,14 +118,14 @@ void set_default_create_tablet_request(TCreateTabletReq* request) { // SQL for generate data(./be/test/olap/test_data/all_types_1000): // -// create table delete_test_row (k1 tinyint, k2 int, k3 varchar(64), +// create tablet delete_test_row (k1 tinyint, k2 int, k3 varchar(64), // k4 date, k5 datetime, k6 decimal(6,3), k7 smallint default "0", // k8 char(16) default "char", v bigint sum) engine=olap distributed by // random buckets 1 properties ("storage_type" = "row"); // // load label label1 (data infile // ("hdfs://host:port/dir") -// into table `delete_test_row` (k1,k2,v,k3,k4,k5,k6)); +// into tablet `delete_test_row` (k1,k2,v,k3,k4,k5,k6)); void set_default_push_request(TPushReq* request) { request->tablet_id = 10003; request->schema_hash = 1508825676; @@ -148,7 +148,7 @@ class TestVectorizedOLAPReader : public testing::Test { void TearDown() { // Remove all dir. - OLAPEngine::get_instance()->drop_table( + StorageEngine::get_instance()->drop_tablet( _create_tablet.tablet_id, _create_tablet.tablet_schema.schema_hash); while (0 == access(_tablet_name.c_str(), F_OK)) { sleep(1); @@ -157,22 +157,19 @@ class TestVectorizedOLAPReader : public testing::Test { } void init_olap_row() { - // Create local data dir for OLAPEngine. - config::storage_root_path = "./test_run/row_table"; + // Create local data dir for StorageEngine. + config::storage_root_path = "./test_run/row_tablet"; remove_all_dir(config::storage_root_path); ASSERT_EQ(create_dir(config::storage_root_path), OLAP_SUCCESS); - // Initialize all singleton object. - OLAPRootPath::get_instance()->reload_root_paths(config::storage_root_path.c_str()); - // 1. Prepare for query split key. // create base tablet OLAPStatus res = OLAP_SUCCESS; set_default_create_tablet_request(&_create_tablet); CommandExecutor command_executor = CommandExecutor(); - res = command_executor.create_table(_create_tablet); + res = command_executor.create_tablet(_create_tablet); ASSERT_EQ(OLAP_SUCCESS, res); - OLAPTablePtr tablet = command_executor.get_table( + TabletSharedPtr tablet = command_executor.get_tablet( _create_tablet.tablet_id, _create_tablet.tablet_schema.schema_hash); ASSERT_TRUE(tablet.get() != NULL); _tablet_name = tablet->tablet_name(); @@ -186,23 +183,20 @@ class TestVectorizedOLAPReader : public testing::Test { } void init_olap_column() { - // Create local data dir for OLAPEngine. - config::storage_root_path = "./test_run/column_table"; + // Create local data dir for StorageEngine. + config::storage_root_path = "./test_run/column_tablet"; remove_all_dir(config::storage_root_path); ASSERT_EQ(create_dir(config::storage_root_path), OLAP_SUCCESS); - // Initialize all singleton object. - OLAPRootPath::get_instance()->reload_root_paths(config::storage_root_path.c_str()); - // 1. Prepare for query split key. // create base tablet OLAPStatus res = OLAP_SUCCESS; set_default_create_tablet_request(&_create_tablet); _create_tablet.tablet_schema.storage_type = TStorageType::COLUMN; CommandExecutor command_executor = CommandExecutor(); - res = command_executor.create_table(_create_tablet); + res = command_executor.create_tablet(_create_tablet); ASSERT_EQ(OLAP_SUCCESS, res); - OLAPTablePtr tablet = command_executor.get_table( + TabletSharedPtr tablet = command_executor.get_tablet( _create_tablet.tablet_id, _create_tablet.tablet_schema.schema_hash); ASSERT_TRUE(tablet.get() != NULL); _tablet_name = tablet->tablet_name(); @@ -222,21 +216,21 @@ class TestVectorizedOLAPReader : public testing::Test { //ExecEnv* exec_env = new ExecEnv(); //_runtime_stat.init(fragment_id, query_options, "test", exec_env); - TDescriptorTable t_desc_table; - - // table descriptors - TTableDescriptor t_table_desc; - - t_table_desc.id = 0; - t_table_desc.tableType = TTableType::OLAP_TABLE; - t_table_desc.numCols = 0; - t_table_desc.numClusteringCols = 0; - t_table_desc.olapTable.tableName = ""; - t_table_desc.tableName = ""; - t_table_desc.dbName = ""; - t_table_desc.__isset.mysqlTable = true; - t_desc_table.tableDescriptors.push_back(t_table_desc); - t_desc_table.__isset.tableDescriptors = true; + TDescriptorTable t_desc_tablet; + + // tablet descriptors + TTableDescriptor t_tablet_desc; + + t_tablet_desc.id = 0; + t_tablet_desc.tableType = TTableType::OLAP_TABLE; + t_tablet_desc.numCols = 0; + t_tablet_desc.numClusteringCols = 0; + t_tablet_desc.olapTable.tableName = ""; + t_tablet_desc.tableName = ""; + t_tablet_desc.dbName = ""; + t_tablet_desc.__isset.mysqlTable = true; + t_desc_tablet.tableDescriptors.push_back(t_tablet_desc); + t_desc_tablet.__isset.tableDescriptors = true; // TSlotDescriptor int offset = 1; int i = 0; @@ -252,7 +246,7 @@ class TestVectorizedOLAPReader : public testing::Test { t_slot_desc.__set_slotIdx(i); t_slot_desc.__set_isMaterialized(true); t_slot_desc.__set_colName("k1"); - t_desc_table.slotDescriptors.push_back(t_slot_desc); + t_desc_tablet.slotDescriptors.push_back(t_slot_desc); offset += sizeof(int8_t); } ++i; @@ -268,7 +262,7 @@ class TestVectorizedOLAPReader : public testing::Test { t_slot_desc.__set_slotIdx(i); t_slot_desc.__set_isMaterialized(true); t_slot_desc.__set_colName("k2"); - t_desc_table.slotDescriptors.push_back(t_slot_desc); + t_desc_tablet.slotDescriptors.push_back(t_slot_desc); offset += sizeof(int32_t); } ++i; @@ -284,7 +278,7 @@ class TestVectorizedOLAPReader : public testing::Test { t_slot_desc.__set_slotIdx(i); t_slot_desc.__set_isMaterialized(true); t_slot_desc.__set_colName("k3"); - t_desc_table.slotDescriptors.push_back(t_slot_desc); + t_desc_tablet.slotDescriptors.push_back(t_slot_desc); offset += sizeof(StringValue); } ++i; @@ -300,7 +294,7 @@ class TestVectorizedOLAPReader : public testing::Test { t_slot_desc.__set_slotIdx(i); t_slot_desc.__set_isMaterialized(true); t_slot_desc.__set_colName("k4"); - t_desc_table.slotDescriptors.push_back(t_slot_desc); + t_desc_tablet.slotDescriptors.push_back(t_slot_desc); offset += sizeof(DateTimeValue); } ++i; @@ -316,7 +310,7 @@ class TestVectorizedOLAPReader : public testing::Test { t_slot_desc.__set_slotIdx(i); t_slot_desc.__set_isMaterialized(true); t_slot_desc.__set_colName("k5"); - t_desc_table.slotDescriptors.push_back(t_slot_desc); + t_desc_tablet.slotDescriptors.push_back(t_slot_desc); offset += sizeof(DateTimeValue); } ++i; @@ -332,7 +326,7 @@ class TestVectorizedOLAPReader : public testing::Test { t_slot_desc.__set_slotIdx(i); t_slot_desc.__set_isMaterialized(true); t_slot_desc.__set_colName("k6"); - t_desc_table.slotDescriptors.push_back(t_slot_desc); + t_desc_tablet.slotDescriptors.push_back(t_slot_desc); offset += sizeof(decimal12_t); } ++i; @@ -348,7 +342,7 @@ class TestVectorizedOLAPReader : public testing::Test { t_slot_desc.__set_slotIdx(i); t_slot_desc.__set_isMaterialized(true); t_slot_desc.__set_colName("k7"); - t_desc_table.slotDescriptors.push_back(t_slot_desc); + t_desc_tablet.slotDescriptors.push_back(t_slot_desc); offset += sizeof(int16_t); } ++i; @@ -364,7 +358,7 @@ class TestVectorizedOLAPReader : public testing::Test { t_slot_desc.__set_slotIdx(i); t_slot_desc.__set_isMaterialized(true); t_slot_desc.__set_colName("k8"); - t_desc_table.slotDescriptors.push_back(t_slot_desc); + t_desc_tablet.slotDescriptors.push_back(t_slot_desc); offset += sizeof(StringValue); } ++i; @@ -380,11 +374,11 @@ class TestVectorizedOLAPReader : public testing::Test { t_slot_desc.__set_slotIdx(i); t_slot_desc.__set_isMaterialized(true); t_slot_desc.__set_colName("v"); - t_desc_table.slotDescriptors.push_back(t_slot_desc); + t_desc_tablet.slotDescriptors.push_back(t_slot_desc); offset += sizeof(int64_t); } - t_desc_table.__isset.slotDescriptors = true; + t_desc_tablet.__isset.slotDescriptors = true; // TTupleDescriptor TTupleDescriptor t_tuple_desc; t_tuple_desc.id = 0; @@ -392,9 +386,9 @@ class TestVectorizedOLAPReader : public testing::Test { t_tuple_desc.numNullBytes = 1; t_tuple_desc.tableId = 0; t_tuple_desc.__isset.tableId = true; - t_desc_table.tupleDescriptors.push_back(t_tuple_desc); + t_desc_tablet.tupleDescriptors.push_back(t_tuple_desc); - DescriptorTbl::create(&_obj_pool, t_desc_table, &_desc_tbl); + DescriptorTbl::create(&_obj_pool, t_desc_tablet, &_desc_tbl); std::vector row_tuples; row_tuples.push_back(0); diff --git a/be/test/runtime/snapshot_loader_test.cpp b/be/test/runtime/snapshot_loader_test.cpp index 4e63275e0468ac..4e024553cc67a5 100644 --- a/be/test/runtime/snapshot_loader_test.cpp +++ b/be/test/runtime/snapshot_loader_test.cpp @@ -90,11 +90,11 @@ TEST_F(SnapshotLoaderTest, NormalCase) { st = loader._replace_tablet_id("1234_2_5_12345_1.dat", 5678, &new_name); ASSERT_TRUE(st.ok()); - ASSERT_EQ("5678_2_5_12345_1.dat", new_name); + ASSERT_EQ("1234_2_5_12345_1.dat", new_name); st = loader._replace_tablet_id("1234_2_5_12345_1.idx", 5678, &new_name); ASSERT_TRUE(st.ok()); - ASSERT_EQ("5678_2_5_12345_1.idx", new_name); + ASSERT_EQ("1234_2_5_12345_1.idx", new_name); st = loader._replace_tablet_id("1234_2_5_12345_1.xxx", 5678, &new_name); ASSERT_FALSE(st.ok()); diff --git a/be/test/util/doris_metrics_test.cpp b/be/test/util/doris_metrics_test.cpp index 1765ffaafa9b12..08631c818f8c20 100644 --- a/be/test/util/doris_metrics_test.cpp +++ b/be/test/util/doris_metrics_test.cpp @@ -227,14 +227,6 @@ TEST_F(DorisMetricsTest, Normal) { ASSERT_TRUE(metric != nullptr); ASSERT_STREQ("22", ((SimpleMetric*)metric)->to_string().c_str()); } - { - DorisMetrics::cancel_delete_requests_total.increment(23); - auto metric = metrics->get_metric("engine_requests_total", - MetricLabels().add("type", "cancel_delete") - .add("status", "total")); - ASSERT_TRUE(metric != nullptr); - ASSERT_STREQ("23", ((SimpleMetric*)metric)->to_string().c_str()); - } // comapction { DorisMetrics::base_compaction_deltas_total.increment(30); diff --git a/be/test/util/uid_util_test.cpp b/be/test/util/uid_util_test.cpp index 2175fc917a652e..9aa9add4560740 100644 --- a/be/test/util/uid_util_test.cpp +++ b/be/test/util/uid_util_test.cpp @@ -38,6 +38,8 @@ TEST_F(UidUtilTest, UniqueId) { UniqueId id(123456789, 987654321); std::string hex_str = id.to_string(); ASSERT_STREQ("00000000075bcd15-000000003ade68b1", hex_str.c_str()); + UniqueId id2("00000000075bcd15", "000000003ade68b1"); + ASSERT_TRUE(id == id2); } { PUniqueId puid; @@ -46,6 +48,8 @@ TEST_F(UidUtilTest, UniqueId) { UniqueId id(puid); std::string hex_str = id.to_string(); ASSERT_STREQ("002bdc546291f4b1-015ee2a321ce7d15", hex_str.c_str()); + UniqueId id2("002bdc546291f4b1", "015ee2a321ce7d15"); + ASSERT_TRUE(id == id2); } { TUniqueId tuid; @@ -54,14 +58,35 @@ TEST_F(UidUtilTest, UniqueId) { UniqueId id(tuid); std::string hex_str = id.to_string(); ASSERT_STREQ("002bdc546291f4b1-015ee2a321ce7d15", hex_str.c_str()); + UniqueId id2("002bdc546291f4b1", "015ee2a321ce7d15"); + ASSERT_TRUE(id == id2); } { TUniqueId tuid; tuid.__set_hi(12345678987654321); tuid.__set_lo(98765432123456789); + UniqueId id(tuid); + std::stringstream ss; + ss << id; + ASSERT_STREQ("002bdc546291f4b1-015ee2a321ce7d15", ss.str().c_str()); + UniqueId id2("002bdc546291f4b1", "015ee2a321ce7d15"); + ASSERT_TRUE(id == id2); + } + + { + TUniqueId tuid; + tuid.__set_hi(12345678987654321); + tuid.__set_lo(98765432123456789); + UniqueId id(tuid); std::stringstream ss; - ss << UniqueId(tuid); + ss << id; ASSERT_STREQ("002bdc546291f4b1-015ee2a321ce7d15", ss.str().c_str()); + UniqueId id2("002bdc546291f4b1", "015ee2a321ce7d15"); + ASSERT_TRUE(id == id2); + ASSERT_FALSE(id != id2); + UniqueId id3("002bdc546291f4b1", "015ee2a321ce7d16"); + ASSERT_TRUE(id != id3); + ASSERT_FALSE(id == id3); } } diff --git a/fe/src/main/java/org/apache/doris/alter/RollupHandler.java b/fe/src/main/java/org/apache/doris/alter/RollupHandler.java index 7eebb549571c34..5467977fc58202 100644 --- a/fe/src/main/java/org/apache/doris/alter/RollupHandler.java +++ b/fe/src/main/java/org/apache/doris/alter/RollupHandler.java @@ -600,11 +600,13 @@ protected void runOneCycle() { break; } case FINISHING: { - // check if previous load job finished + // check previous load job finished if (rollupJob.isPreviousLoadFinished()) { - // if all previous load jobs are finished, then send clear alter tasks to all related be + // if all previous load job finished, then send clear alter tasks to all related be + LOG.info("previous txn finished, try to send clear txn task"); int res = rollupJob.checkOrResendClearTasks(); if (res != 0) { + LOG.info("send clear txn task return {}", res); if (res == -1) { LOG.warn("rollup job is in finishing state, but could not finished, " + "just finish it, maybe a fatal error {}", rollupJob); diff --git a/fe/src/main/java/org/apache/doris/analysis/StringLiteral.java b/fe/src/main/java/org/apache/doris/analysis/StringLiteral.java index 06adb6391e3a33..c0f1546a01a35a 100644 --- a/fe/src/main/java/org/apache/doris/analysis/StringLiteral.java +++ b/fe/src/main/java/org/apache/doris/analysis/StringLiteral.java @@ -68,7 +68,7 @@ public int compareLiteral(LiteralExpr expr) { return 1; } - // compare string with utf-8 byte array, same with DM,BE,OLAPENGINE + // compare string with utf-8 byte array, same with DM,BE,StorageEngine byte[] thisBytes = null; byte[] otherBytes = null; try { diff --git a/fe/src/main/java/org/apache/doris/catalog/Table.java b/fe/src/main/java/org/apache/doris/catalog/Table.java index 9bd8c4e438b1a7..aedad489ff059d 100644 --- a/fe/src/main/java/org/apache/doris/catalog/Table.java +++ b/fe/src/main/java/org/apache/doris/catalog/Table.java @@ -267,8 +267,9 @@ public String toString() { /* * 1. Only schedule OLAP table. * 2. If table is colocate with other table, not schedule it. - * 3. if table's state is not NORMAL, we will schedule it, but will only repair VERSION_IMCOMPLETE status, - * this will be checked in TabletScheduler. + * 3. if table's state is ROLLUP or SCHEMA_CHANGE, but alter job's state is FINISHING, we should also + * schedule the tablet to repair it(only for VERSION_IMCOMPLETE case, this will be checked in + * TabletScheduler). */ public boolean needSchedule() { if (type != TableType.OLAP) { diff --git a/fe/src/main/java/org/apache/doris/clone/TabletChecker.java b/fe/src/main/java/org/apache/doris/clone/TabletChecker.java index 4923dd5aa3e5d4..d2e9d643a99a2d 100644 --- a/fe/src/main/java/org/apache/doris/clone/TabletChecker.java +++ b/fe/src/main/java/org/apache/doris/clone/TabletChecker.java @@ -24,6 +24,7 @@ import org.apache.doris.catalog.MaterializedIndex; import org.apache.doris.catalog.OlapTable; import org.apache.doris.catalog.Partition; +import org.apache.doris.catalog.Partition.PartitionState; import org.apache.doris.catalog.Table; import org.apache.doris.catalog.Table.TableType; import org.apache.doris.catalog.Tablet; @@ -199,6 +200,11 @@ private void checkTablets() { OlapTable olapTbl = (OlapTable) table; for (Partition partition : olapTbl.getPartitions()) { + if (partition.getState() != PartitionState.NORMAL) { + // when alter job is in FINISHING state, partition state will be set to NORMAL, + // and we can schedule the tablets in it. + continue; + } boolean isInPrios = isInPrios(dbId, table.getId(), partition.getId()); boolean prioPartIsHealthy = true; for (MaterializedIndex idx : partition.getMaterializedIndices()) { diff --git a/fe/src/main/java/org/apache/doris/clone/TabletScheduler.java b/fe/src/main/java/org/apache/doris/clone/TabletScheduler.java index b3c3de10c65fa1..9f2186480be149 100644 --- a/fe/src/main/java/org/apache/doris/clone/TabletScheduler.java +++ b/fe/src/main/java/org/apache/doris/clone/TabletScheduler.java @@ -26,6 +26,7 @@ import org.apache.doris.catalog.OlapTable; import org.apache.doris.catalog.OlapTable.OlapTableState; import org.apache.doris.catalog.Partition; +import org.apache.doris.catalog.Partition.PartitionState; import org.apache.doris.catalog.Replica; import org.apache.doris.catalog.Replica.ReplicaState; import org.apache.doris.catalog.Tablet; @@ -513,11 +514,11 @@ private void scheduleTablet(TabletSchedCtx tabletCtx, AgentBatchTask batchTask) throw new SchedException(Status.UNRECOVERABLE, "table's state is not NORMAL"); } - if (statusPair.first != TabletStatus.VERSION_INCOMPLETE && tableState != OlapTableState.NORMAL) { - // If table is under ALTER process, do not allow to add or delete replica. + if (statusPair.first != TabletStatus.VERSION_INCOMPLETE && partition.getState() != PartitionState.NORMAL) { + // If table is under ALTER process(before FINISHING), do not allow to add or delete replica. // VERSION_INCOMPLETE will repair the replica in place, which is allowed. throw new SchedException(Status.UNRECOVERABLE, - "table's state is not NORMAL but tablet status is " + statusPair.first.name()); + "table is in alter process, but tablet status is " + statusPair.first.name()); } tabletCtx.setTabletStatus(statusPair.first); diff --git a/fe/src/main/java/org/apache/doris/load/Load.java b/fe/src/main/java/org/apache/doris/load/Load.java index c6e19a1b0939ff..c43b41f2850762 100644 --- a/fe/src/main/java/org/apache/doris/load/Load.java +++ b/fe/src/main/java/org/apache/doris/load/Load.java @@ -87,7 +87,6 @@ import org.apache.doris.task.AgentTask; import org.apache.doris.task.AgentTaskExecutor; import org.apache.doris.task.AgentTaskQueue; -import org.apache.doris.task.CancelDeleteTask; import org.apache.doris.task.PushTask; import org.apache.doris.thrift.TEtlState; import org.apache.doris.thrift.TMiniLoadRequest; @@ -3452,26 +3451,6 @@ public void deleteOld(DeleteStmt stmt) throws DdlException { } finally { db.writeUnlock(); } - } catch (Exception e) { - // cancel delete - // need not save cancel delete task in AgentTaskQueue - AgentBatchTask cancelDeleteBatchTask = new AgentBatchTask(); - for (AgentTask task : deleteBatchTask.getAllTasks()) { - PushTask pushTask = (PushTask) task; - CancelDeleteTask cancelDeleteTask = - new CancelDeleteTask(task.getBackendId(), task.getDbId(), task.getTableId(), - task.getPartitionId(), task.getIndexId(), task.getTabletId(), - pushTask.getSchemaHash(), pushTask.getVersion(), - pushTask.getVersionHash()); - cancelDeleteBatchTask.addTask(cancelDeleteTask); - } - if (cancelDeleteBatchTask.getTaskNum() > 0) { - AgentTaskExecutor.submit(cancelDeleteBatchTask); - } - - String failMsg = "delete fail, " + e.getMessage(); - LOG.warn(failMsg); - throw new DdlException(failMsg); } finally { // clear tasks List tasks = deleteBatchTask.getAllTasks(); diff --git a/fe/src/main/java/org/apache/doris/master/MasterImpl.java b/fe/src/main/java/org/apache/doris/master/MasterImpl.java index 35305222aa343e..3da5a9ac74dbe0 100644 --- a/fe/src/main/java/org/apache/doris/master/MasterImpl.java +++ b/fe/src/main/java/org/apache/doris/master/MasterImpl.java @@ -116,7 +116,7 @@ public TMasterResult finishTask(TFinishTaskRequest request) throws TException { AgentTask task = AgentTaskQueue.getTask(backendId, taskType, signature); if (task == null) { if (taskType != TTaskType.DROP && taskType != TTaskType.STORAGE_MEDIUM_MIGRATE - && taskType != TTaskType.CANCEL_DELETE && taskType != TTaskType.RELEASE_SNAPSHOT) { + && taskType != TTaskType.RELEASE_SNAPSHOT) { String errMsg = "cannot find task. type: " + taskType + ", backendId: " + backendId + ", signature: " + signature; LOG.warn(errMsg); @@ -132,7 +132,7 @@ public TMasterResult finishTask(TFinishTaskRequest request) throws TException { // We start to let FE perceive the task's error msg if (taskType != TTaskType.MAKE_SNAPSHOT && taskType != TTaskType.UPLOAD && taskType != TTaskType.DOWNLOAD && taskType != TTaskType.MOVE - && taskType != TTaskType.CLONE) { + && taskType != TTaskType.CLONE && taskType != TTaskType.PUBLISH_VERSION) { return result; } } @@ -570,6 +570,10 @@ private void finishPublishVersion(AgentTask task, TFinishTaskRequest request) { publishVersionTask.addErrorTablets(errorTabletIds); publishVersionTask.setIsFinished(true); + if (request.getTask_status().getStatus_code() != TStatusCode.OK) { + // not remove the task from queue and be will retry + return; + } AgentTaskQueue.removeTask(publishVersionTask.getBackendId(), publishVersionTask.getTaskType(), publishVersionTask.getSignature()); diff --git a/fe/src/main/java/org/apache/doris/master/ReportHandler.java b/fe/src/main/java/org/apache/doris/master/ReportHandler.java index 2cebeefd980204..d8295b43e31d53 100644 --- a/fe/src/main/java/org/apache/doris/master/ReportHandler.java +++ b/fe/src/main/java/org/apache/doris/master/ReportHandler.java @@ -77,6 +77,7 @@ import org.apache.logging.log4j.Logger; import org.apache.thrift.TException; +import java.util.ArrayList; import java.util.HashMap; import java.util.HashSet; import java.util.List; @@ -864,7 +865,7 @@ private static void handleClearTransactions(ListMultimap transaction for (Long transactionId : transactionsToClear.keySet()) { ClearTransactionTask clearTransactionTask = new ClearTransactionTask(backendId, transactionId, - transactionsToClear.get(transactionId)); + new ArrayList(transactionsToClear.get(transactionId))); batchTask.addTask(clearTransactionTask); AgentTaskQueue.addTask(clearTransactionTask); } diff --git a/fe/src/main/java/org/apache/doris/planner/SingleNodePlanner.java b/fe/src/main/java/org/apache/doris/planner/SingleNodePlanner.java index 06601366bfb19e..e4b6269ec744d3 100644 --- a/fe/src/main/java/org/apache/doris/planner/SingleNodePlanner.java +++ b/fe/src/main/java/org/apache/doris/planner/SingleNodePlanner.java @@ -395,7 +395,7 @@ private void turnOffPreAgg(AggregateInfo aggInfo, SelectStmt selectStmt, Analyze if (!slot.getColumn().isKey()) { if (conjunctSlotIds.contains(slot.getId())) { turnOffReason = "conjunct on " + slot.getColumn().getName() + - " which is OlapEngine value column"; + " which is StorageEngine value column"; valueColumnValidate = false; break; } @@ -502,7 +502,7 @@ private void turnOffPreAgg(AggregateInfo aggInfo, SelectStmt selectStmt, Analyze if (aggExpr.getFnName().getFunction().equalsIgnoreCase("MAX") && aggExpr.getFnName().getFunction().equalsIgnoreCase("MIN")) { returnColumnValidate = false; - turnOffReason = "the type of agg on OlapEngine's Key column should only be MAX or MIN." + turnOffReason = "the type of agg on StorageEngine's Key column should only be MAX or MIN." + "agg expr: " + aggExpr.toSql(); break; } @@ -568,7 +568,7 @@ private void turnOffPreAgg(AggregateInfo aggInfo, SelectStmt selectStmt, Analyze for (SlotDescriptor slot : selectStmt.getTableRefs().get(0).getDesc().getSlots()) { if (!slot.getColumn().isKey()) { if (groupSlotIds.contains(slot.getId())) { - turnOffReason = "groupExpr contains OlapEngine's Value"; + turnOffReason = "groupExpr contains StorageEngine's Value"; groupExprValidate = false; break; } diff --git a/fe/src/main/java/org/apache/doris/task/AgentBatchTask.java b/fe/src/main/java/org/apache/doris/task/AgentBatchTask.java index 4f3ace34ce1703..1a2597b972ae2e 100644 --- a/fe/src/main/java/org/apache/doris/task/AgentBatchTask.java +++ b/fe/src/main/java/org/apache/doris/task/AgentBatchTask.java @@ -24,7 +24,6 @@ import org.apache.doris.thrift.TAgentServiceVersion; import org.apache.doris.thrift.TAgentTaskRequest; import org.apache.doris.thrift.TAlterTabletReq; -import org.apache.doris.thrift.TCancelDeleteDataReq; import org.apache.doris.thrift.TCheckConsistencyReq; import org.apache.doris.thrift.TClearAlterTaskRequest; import org.apache.doris.thrift.TClearTransactionTaskRequest; @@ -209,15 +208,6 @@ private TAgentTaskRequest toAgentTaskRequest(AgentTask task) { tAgentTaskRequest.setResource_info(schemaChangeTask.getResourceInfo()); return tAgentTaskRequest; } - case CANCEL_DELETE: { - CancelDeleteTask cancelDeleteTask = (CancelDeleteTask) task; - TCancelDeleteDataReq request = cancelDeleteTask.toThrift(); - if (LOG.isDebugEnabled()) { - LOG.debug(request.toString()); - } - tAgentTaskRequest.setCancel_delete_data_req(request); - return tAgentTaskRequest; - } case STORAGE_MEDIUM_MIGRATE: { StorageMediaMigrationTask migrationTask = (StorageMediaMigrationTask) task; TStorageMediumMigrateReq request = migrationTask.toThrift(); diff --git a/fe/src/main/java/org/apache/doris/task/CancelDeleteTask.java b/fe/src/main/java/org/apache/doris/task/CancelDeleteTask.java deleted file mode 100644 index 9357c052e551d8..00000000000000 --- a/fe/src/main/java/org/apache/doris/task/CancelDeleteTask.java +++ /dev/null @@ -1,53 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, -// software distributed under the License is distributed on an -// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -// KIND, either express or implied. See the License for the -// specific language governing permissions and limitations -// under the License. - -package org.apache.doris.task; - -import org.apache.doris.thrift.TCancelDeleteDataReq; -import org.apache.doris.thrift.TTaskType; - -public class CancelDeleteTask extends AgentTask { - private int schemaHash; - private long version; - private long versionHash; - - public CancelDeleteTask(long backendId, long dbId, long tableId, long partitionId, long indexId, - long tabletId, int schemaHash, long version, long versionHash) { - super(null, backendId, TTaskType.CANCEL_DELETE, dbId, tableId, partitionId, indexId, tabletId); - - this.schemaHash = schemaHash; - this.version = version; - this.versionHash = versionHash; - } - - public TCancelDeleteDataReq toThrift() { - TCancelDeleteDataReq request = new TCancelDeleteDataReq(tabletId, schemaHash, version, versionHash); - return request; - } - - public int getSchemaHash() { - return schemaHash; - } - - public long getVersion() { - return version; - } - - public long getVersionHash() { - return versionHash; - } -} diff --git a/fe/src/main/java/org/apache/doris/task/CreateReplicaTask.java b/fe/src/main/java/org/apache/doris/task/CreateReplicaTask.java index de535b84919541..4cb938d698f9db 100644 --- a/fe/src/main/java/org/apache/doris/task/CreateReplicaTask.java +++ b/fe/src/main/java/org/apache/doris/task/CreateReplicaTask.java @@ -133,6 +133,8 @@ public TCreateTabletReq toThrift() { if (inRestoreMode) { createTabletReq.setIn_restore_mode(true); } + createTabletReq.setTable_id(tableId); + createTabletReq.setPartition_id(partitionId); return createTabletReq; } diff --git a/fe/src/main/java/org/apache/doris/task/CreateRollupTask.java b/fe/src/main/java/org/apache/doris/task/CreateRollupTask.java index c8a71d7f309a94..a43fc78be842c5 100644 --- a/fe/src/main/java/org/apache/doris/task/CreateRollupTask.java +++ b/fe/src/main/java/org/apache/doris/task/CreateRollupTask.java @@ -107,6 +107,8 @@ public TAlterTabletReq toThrift() { tSchema.setBloom_filter_fpp(bfFpp); } createTabletReq.setTablet_schema(tSchema); + createTabletReq.setTable_id(tableId); + createTabletReq.setPartition_id(partitionId); tAlterTabletReq.setNew_tablet_req(createTabletReq); diff --git a/fe/src/main/java/org/apache/doris/task/PublishVersionTask.java b/fe/src/main/java/org/apache/doris/task/PublishVersionTask.java index e1b0ec58cc340b..9dbb47b0ffaa19 100644 --- a/fe/src/main/java/org/apache/doris/task/PublishVersionTask.java +++ b/fe/src/main/java/org/apache/doris/task/PublishVersionTask.java @@ -58,11 +58,12 @@ public List getPartitionVersionInfos() { return partitionVersionInfos; } - public List getErrorTablets() { + public synchronized List getErrorTablets() { return errorTablets; } - public void addErrorTablets(List errorTablets) { + public synchronized void addErrorTablets(List errorTablets) { + this.errorTablets.clear(); if (errorTablets == null) { return; } diff --git a/fe/src/main/java/org/apache/doris/task/SchemaChangeTask.java b/fe/src/main/java/org/apache/doris/task/SchemaChangeTask.java index b201f9ad2cb7fe..8dc9ff9d6c32f6 100644 --- a/fe/src/main/java/org/apache/doris/task/SchemaChangeTask.java +++ b/fe/src/main/java/org/apache/doris/task/SchemaChangeTask.java @@ -99,6 +99,8 @@ public TAlterTabletReq toThrift() { tSchema.setBloom_filter_fpp(bfFpp); } createTabletReq.setTablet_schema(tSchema); + createTabletReq.setTable_id(tableId); + createTabletReq.setPartition_id(partitionId); tAlterTabletReq.setNew_tablet_req(createTabletReq); diff --git a/fe/src/main/java/org/apache/doris/task/SnapshotTask.java b/fe/src/main/java/org/apache/doris/task/SnapshotTask.java index ac3bb851e049a0..185790f54e7955 100644 --- a/fe/src/main/java/org/apache/doris/task/SnapshotTask.java +++ b/fe/src/main/java/org/apache/doris/task/SnapshotTask.java @@ -79,6 +79,7 @@ public TSnapshotRequest toThrift() { request.setVersion(version); request.setVersion_hash(versionHash); request.setList_files(true); + request.setPreferred_snapshot_version(2); return request; } } diff --git a/fe/src/main/java/org/apache/doris/transaction/GlobalTransactionMgr.java b/fe/src/main/java/org/apache/doris/transaction/GlobalTransactionMgr.java index f64a01344fedbd..1477fe927e3fed 100644 --- a/fe/src/main/java/org/apache/doris/transaction/GlobalTransactionMgr.java +++ b/fe/src/main/java/org/apache/doris/transaction/GlobalTransactionMgr.java @@ -757,8 +757,8 @@ public boolean isPreviousTransactionsFinished(long endTransactionId, long dbId) continue; } if (entry.getKey() <= endTransactionId) { - LOG.info("txn is still running: {}, checking end txn id: {}", - entry.getValue(), endTransactionId); + LOG.info("find a running txn with txn_id={}, less than schema change txn_id {}", + entry.getKey(), endTransactionId); return false; } } diff --git a/fe/src/main/java/org/apache/doris/transaction/PublishVersionDaemon.java b/fe/src/main/java/org/apache/doris/transaction/PublishVersionDaemon.java index 271e5ae4ef1605..129c03948bd531 100644 --- a/fe/src/main/java/org/apache/doris/transaction/PublishVersionDaemon.java +++ b/fe/src/main/java/org/apache/doris/transaction/PublishVersionDaemon.java @@ -54,7 +54,7 @@ protected void runOneCycle() { try { publishVersion(); } catch (Throwable t) { - LOG.error("errors while publish version to all backends, {}", t); + LOG.error("errors while publish version to all backends", t); } } @@ -146,8 +146,18 @@ private void publishVersion() throws UserException { } else { for (long tabletId : errorTablets) { // tablet inverted index also contains rollingup index + // if tablet meta not contains the tablet, skip this tablet because this tablet is dropped + // from fe + if (tabletInvertedIndex.getTabletMeta(tabletId) == null) { + continue; + } Replica replica = tabletInvertedIndex.getReplica(tabletId, publishVersionTask.getBackendId()); - transErrorReplicas.add(replica); + if (replica != null) { + transErrorReplicas.add(replica); + } else { + LOG.info("could not find related replica with tabletid={}, backendid={}", + tabletId, publishVersionTask.getBackendId()); + } } } } else { @@ -178,7 +188,12 @@ private void publishVersion() throws UserException { if (errorPartitionIds.contains(partitionId)) { Replica replica = tabletInvertedIndex.getReplica(tabletId, unfinishedTask.getBackendId()); - transErrorReplicas.add(replica); + if (replica != null) { + transErrorReplicas.add(replica); + } else { + LOG.info("could not find related replica with tabletid={}, backendid={}", + tabletId, unfinishedTask.getBackendId()); + } } } } diff --git a/fe/src/test/java/org/apache/doris/task/AgentTaskTest.java b/fe/src/test/java/org/apache/doris/task/AgentTaskTest.java index 2df873efc7ad77..1a47123a2754dd 100644 --- a/fe/src/test/java/org/apache/doris/task/AgentTaskTest.java +++ b/fe/src/test/java/org/apache/doris/task/AgentTaskTest.java @@ -138,11 +138,6 @@ public void setUp() throws AnalysisException { new SchemaChangeTask(null, backendId1, dbId, tableId, partitionId, indexId1, tabletId1, replicaId1, columns, schemaHash2, schemaHash1, shortKeyNum, storageType, null, 0, TKeysType.AGG_KEYS); - - // cancel delete - cancelDeleteTask = - new CancelDeleteTask(backendId1, dbId, tableId, partitionId, indexId1, tabletId1, - schemaHash1, version, versionHash); } @Test @@ -214,12 +209,6 @@ public void toThriftTest() throws Exception { Assert.assertEquals(TTaskType.SCHEMA_CHANGE, request6.getTask_type()); Assert.assertEquals(schemaChangeTask.getSignature(), request6.getSignature()); Assert.assertNotNull(request6.getAlter_tablet_req()); - - // cancel delete - TAgentTaskRequest request9 = (TAgentTaskRequest) toAgentTaskRequest.invoke(agentBatchTask, cancelDeleteTask); - Assert.assertEquals(TTaskType.CANCEL_DELETE, request9.getTask_type()); - Assert.assertEquals(cancelDeleteTask.getSignature(), request9.getSignature()); - Assert.assertNotNull(request9.getCancel_delete_data_req()); } @Test diff --git a/gensrc/proto/olap_common.proto b/gensrc/proto/olap_common.proto index d37baf557bb333..3ac7c53a810dab 100644 --- a/gensrc/proto/olap_common.proto +++ b/gensrc/proto/olap_common.proto @@ -22,22 +22,22 @@ package doris; option java_package = "org.apache.doris.proto"; message ColumnMessage { - required string name = 1; - required string type = 2; - required string aggregation = 3; - required uint32 length = 4; - required bool is_key = 5; - optional string default_value = 6; - optional string referenced_column = 7; - optional uint32 index_length = 8; - optional uint32 precision = 9 [default = 27]; - optional uint32 frac = 10 [default = 9]; - optional bool is_allow_null = 11 [default=false]; - optional uint32 unique_id = 12; - repeated uint32 sub_column = 13; - optional bool is_root_column = 14 [default=false]; + required string name = 1; // ColumnPB.name + required string type = 2; // ColumnPB.type + required string aggregation = 3; // ColumnPB.aggregation + required uint32 length = 4; // ColumnPB.length + required bool is_key = 5; // ColumnPB.is_key + optional string default_value = 6; // ColumnPB.default_value + optional string referenced_column = 7; // ColumnPB. + optional uint32 index_length = 8; // ColumnPB.index_length + optional uint32 precision = 9 [default = 27]; // ColumnPB.precision + optional uint32 frac = 10 [default = 9]; // ColumnPB.frac + optional bool is_allow_null = 11 [default=false]; // ColumnPB.is_nullable + optional uint32 unique_id = 12; // ColumnPB.unique_id + repeated uint32 sub_column = 13; // not used + optional bool is_root_column = 14 [default=false]; // not used // is bloom filter column - optional bool is_bf_column = 15 [default=false]; + optional bool is_bf_column = 15 [default=false]; // ColumnPB.is_bf_column } enum CompressKind { diff --git a/gensrc/proto/olap_file.proto b/gensrc/proto/olap_file.proto index 3cfe66843f5eb0..96002b13980ba4 100644 --- a/gensrc/proto/olap_file.proto +++ b/gensrc/proto/olap_file.proto @@ -24,14 +24,14 @@ option java_package = "org.apache.doris.proto"; import "olap_common.proto"; import "types.proto"; -message ColumnPruning { +message ZoneMap { required bytes min = 1; required bytes max = 2; optional bool null_flag = 3; } message DeltaPruning { - repeated ColumnPruning column_pruning = 1; + repeated ZoneMap zone_maps = 1; } // define OLAP FileVersion Message, Base, delta and cumulative delta @@ -53,37 +53,80 @@ message FileVersionMessage { // Deprecated, Use PDelta instead } message PDelta { - required int64 start_version = 1; - required int64 end_version = 2; - required int64 version_hash = 3; - required int64 creation_time = 4; - repeated PSegmentGroup segment_group = 5; - optional DeleteConditionMessage delete_condition = 6; + required int64 start_version = 1; // RowsetMetaPB.start_version + required int64 end_version = 2; // RowsetMetaPB.end_version + required int64 version_hash = 3; // RowsetMetaPB.version_hash + required int64 creation_time = 4; // RowsetMetaPB.creation_time + repeated SegmentGroupPB segment_group = 5; // RowsetMetaPB.extra_properties + optional DeletePredicatePB delete_condition = 6; // RowsetMetaPB.delete_predicate } -message PSegmentGroup { +enum RowsetTypePB { + ALPHA_ROWSET = 0; // doris原有的列存格式 + BETA_ROWSET = 1; // 新列存 +} + +enum RowsetStatePB { + PREPARED = 0; // 表示正在写入Rowset + COMMITTED = 1; // 表示rowset 写入完成,但是用户还不可见;这个状态下的rowset,BE不能自行判断是否删除,必须由FE的指令 + VISIBLE = 2; // 表示rowset 已经对用户可见 +} + +message RowsetMetaPB { + required int64 rowset_id = 1; + optional int64 partition_id = 2; + optional int64 tablet_id = 3; + optional int64 txn_id = 4; + optional int32 tablet_schema_hash = 5; // tablet id and schema hash could find tablet + optional RowsetTypePB rowset_type = 6; // alpha + optional RowsetStatePB rowset_state = 7; + optional int64 start_version = 8; // PDelta.start_version + optional int64 end_version = 9; // PDelta.end_version + optional int64 version_hash = 10; // PDelta.version_hash + optional int64 num_rows = 11; // calculated from segment group + optional int64 total_disk_size = 12; // calculated by index + data + optional int64 data_disk_size = 13; // calculated sum(segmentgroup.data_size) + optional int64 index_disk_size = 14; // calculated sum(segmentgroup.index_size) + // column min/max/null flag statistic info + repeated ZoneMap zone_maps = 15; // not set, not used currently + optional DeletePredicatePB delete_predicate = 16; + optional bool empty = 17; // calculated from segment group + optional PUniqueId load_id = 18; + optional bool delete_flag = 19; // not set, get from olap index header msg? + optional int64 creation_time = 20; // PDelta.creation_time + optional PUniqueId tablet_uid = 21; + // spare field id for future use + optional AlphaRowsetExtraMetaPB alpha_rowset_extra_meta_pb = 50; +} + +message AlphaRowsetExtraMetaPB { + repeated SegmentGroupPB segment_groups = 1; +} + +message SegmentGroupPB { required int32 segment_group_id = 1; required int32 num_segments = 2; required int64 index_size = 3; required int64 data_size = 4; required int64 num_rows = 5; - repeated ColumnPruning column_pruning = 6; + repeated ZoneMap zone_maps = 6; optional bool empty = 7; + optional PUniqueId load_id = 8; } message PPendingDelta { - required int64 partition_id = 1; - required int64 transaction_id = 2; - required int64 creation_time = 3; - repeated PPendingSegmentGroup pending_segment_group = 4; - optional DeleteConditionMessage delete_condition = 5; + required int64 partition_id = 1; // RowsetMetaPB.partition_id + required int64 transaction_id = 2; // RowsetMetaPB.txn_id + required int64 creation_time = 3; // RowsetMetaPB.creation_time + repeated PendingSegmentGroupPB pending_segment_group = 4; // RowsetMetaPB.extra_properties + optional DeletePredicatePB delete_condition = 5; // RowsetMetaPB.delete_predicate } -message PPendingSegmentGroup { +message PendingSegmentGroupPB { required int32 pending_segment_group_id = 1; required int32 num_segments = 2; required PUniqueId load_id = 3; - repeated ColumnPruning column_pruning = 4; + repeated ZoneMap zone_maps = 4; optional bool empty = 5; } @@ -109,40 +152,133 @@ enum KeysType { AGG_KEYS = 2; } -message DeleteConditionMessage { +message DeletePredicatePB { required int32 version = 1; - repeated string sub_conditions = 2; + repeated string sub_predicates = 2; } message OLAPHeaderMessage { - required uint32 num_rows_per_data_block = 1; + required uint32 num_rows_per_data_block = 1; // TabletSchemaPB.num_rows_per_row_block - repeated FileVersionMessage file_version = 2; // Deprecated. Use PDelta instead. + repeated FileVersionMessage file_version = 2; // Deprecated. Use PDelta instead after stream load - required int32 cumulative_layer_point = 3; - required uint32 num_short_key_fields = 4; - repeated ColumnMessage column = 5; - required int64 creation_time = 6; + required int32 cumulative_layer_point = 3; // TabletMetaPB.cumulative_layer_point + required uint32 num_short_key_fields = 4; // TabletSchemaPB.num_short_key_columns + repeated ColumnMessage column = 5; // TabletSchemaPB.column + required int64 creation_time = 6; // TabletMetaPB.creation_time repeated int32 selectivity = 7; // Deprecated. - optional SchemaChangeStatusMessage schema_change_status = 8; - optional DataFileType data_file_type = 9 [default = OLAP_DATA_FILE]; - optional uint32 next_column_unique_id = 10 [default = 0]; - optional CompressKind compress_kind = 11 [default = COMPRESS_LZO]; - optional uint32 segment_size = 12 [default = 4292870144]; - repeated DeleteConditionMessage delete_data_conditions = 13; + optional SchemaChangeStatusMessage schema_change_status = 8; // TabletMetaPB.alter_task + optional DataFileType data_file_type = 9 [default = OLAP_DATA_FILE]; // ? only column oriented + optional uint32 next_column_unique_id = 10 [default = 0]; // TabletSchemaPB.next_column_unique_id + optional CompressKind compress_kind = 11 [default = COMPRESS_LZO]; // TabletSchemaPB.compress_kind + optional uint32 segment_size = 12 [default = 4292870144]; // ? not used + repeated DeletePredicatePB delete_data_conditions = 13; // not serialized any more, just read from PDelta // bloom filter false positive probability - optional double bf_fpp = 14; - optional KeysType keys_type = 15; - repeated PDelta delta = 16; - repeated PPendingDelta pending_delta = 17; - repeated PDelta incremental_delta = 18; + optional double bf_fpp = 14; // TabletSchemaPB.bf_fpp + optional KeysType keys_type = 15; // TabletSchemaPB.keys_type + repeated PDelta delta = 16; // TabletMetaPB.rs_metas + repeated PPendingDelta pending_delta = 17; // need write to olap meta store + repeated PDelta incremental_delta = 18; // TabletMetaPB.inc_rs_metas // if true, this tablet will not do compaction, // and does not create init version - optional bool in_restore_mode = 19 [default = false]; - optional int64 tablet_id = 20; - optional int32 schema_hash = 21; - optional uint64 shard = 22; + optional bool in_restore_mode = 19 [default = false]; // TabletMetaPB.is_restore_mode + optional int64 tablet_id = 20; // TabletMetaPB.tablet_id + optional int32 schema_hash = 21; // TabletMetaPB.schema_hash? int32 vs int64 + optional uint64 shard = 22; // TabletMetaPB.shard_id? int64 vs int32 +} + +enum AlterTabletState { + ALTER_PREPARED = 0; + ALTER_RUNNING = 1; + ALTER_FINISHED = 2; + ALTER_FAILED = 3; +} + +enum AlterTabletType { + SCHEMA_CHANGE = 1; + ROLLUP = 2; +} + +message AlterTabletPB { + optional AlterTabletState alter_state = 1; + required int64 related_tablet_id = 2; + optional int32 related_schema_hash = 3; + optional AlterTabletType alter_type = 4; +} + +enum ColumnType { + TINYINT = 0; + SMALLINT = 1; + INT = 2; + BIGINT = 3; + LARGEINT = 4; + FLOAT = 5; + DOUBLE = 6; + DECIMAL = 7; + CHAR = 8; + VARCHAR = 9; + HLL = 10; + DATE = 11; + DATETIME = 12; +} + +message ColumnPB { + required int32 unique_id = 1; // ColumnMessage.unique_id + optional string name = 2; // ColumnMessage.name + required string type = 3; // ColumnMessage.type + optional bool is_key = 4; // ColumnMessage.is_key + optional string aggregation = 5; // ColumnMessage.aggregation + optional bool is_nullable = 6; // ColumnMessage.is_allow_null + optional bytes default_value = 7; // ColumnMessage.default_value ? + optional int32 precision = 8; // ColumnMessage.precision + optional int32 frac = 9; // ColumnMessage.frac + optional int32 length = 10; // ColumnMessage.length + optional int32 index_length = 11; // ColumnMessage.index_length + optional bool is_bf_column = 12; // ColumnMessage.is_bf_column + optional int32 referenced_column_id = 13; // + optional string referenced_column = 14; // ColumnMessage.referenced_column? + +} + +message TabletSchemaPB { + optional KeysType keys_type = 1; // OLAPHeaderMessage.keys_type + repeated ColumnPB column = 2; // OLAPHeaderMessage.column + optional int32 num_short_key_columns = 3; // OLAPHeaderMessage.num_short_key_fields + optional int32 num_rows_per_row_block = 4; // OLAPHeaderMessage.num_rows_per_data_block + optional CompressKind compress_kind = 5; // OLAPHeaderMessage.compress_kind + optional double bf_fpp = 6; // OLAPHeaderMessage.bf_fpp + optional uint32 next_column_unique_id = 7; // OLAPHeaderMessage.next_column_unique_id +} + +enum TabletStatePB { + PB_NOTREADY = 0; // under alter table, rollup, clone + PB_RUNNING = 1; + PB_TOMBSTONED = 2; + PB_STOPPED = 3; + PB_SHUTDOWN = 4; +} + +message TabletMetaPB { + optional int64 table_id = 1; // ? + optional int64 partition_id = 2; // ? + optional int64 tablet_id = 3; // OlapHeaderMessage.tablet_id + optional int32 schema_hash = 4; // OlapHeaderMessage.schema_hash + optional int32 shard_id = 5; // OlapHeaderMessage.shard + optional int64 creation_time = 6; // OlapHeaderMessage.creation_time + optional int64 cumulative_layer_point = 7; // OlapHeaderMessage.cumulative_layer_point + + optional TabletStatePB tablet_state = 8; + optional TabletSchemaPB schema = 9; + repeated RowsetMetaPB rs_metas = 10; + repeated RowsetMetaPB inc_rs_metas = 11; + optional AlterTabletPB alter_task = 12; + // if true, this tablet will not do compaction, + // and does not create init version + optional bool in_restore_mode = 13 [default = false]; // OlapHeaderMessage.in_restore_mode + // a uniqued id to identified tablet with same tablet_id and schema hash + optional PUniqueId tablet_uid = 14; + optional int64 end_rowset_id = 15; } message OLAPIndexHeaderMessage { @@ -163,4 +299,3 @@ message OLAPDataHeaderMessage { message OLAPRawDeltaHeaderMessage { required int32 schema_hash = 2; } - diff --git a/gensrc/thrift/AgentService.thrift b/gensrc/thrift/AgentService.thrift index 5d037a5010c9eb..2d71ca4965881d 100644 --- a/gensrc/thrift/AgentService.thrift +++ b/gensrc/thrift/AgentService.thrift @@ -51,6 +51,8 @@ struct TCreateTabletReq { // this new tablet should be colocate with base tablet 7: optional Types.TTabletId base_tablet_id 8: optional Types.TSchemaHash base_schema_hash + 9: optional i64 table_id + 10: optional i64 partition_id } struct TDropTabletReq { @@ -120,6 +122,7 @@ struct TStorageMediumMigrateReq { } struct TCancelDeleteDataReq { + // deprecated 1: required Types.TTabletId tablet_id 2: required Types.TSchemaHash schema_hash 3: required Types.TVersion version @@ -157,6 +160,7 @@ struct TSnapshotRequest { 7: optional bool list_files // if all nodes has been upgraded, it can be removed. 8: optional bool allow_incremental_clone + 9: optional i32 preferred_snapshot_version = 1 // request preferred snapshot version, default value is 1 for old version be } struct TReleaseSnapshotRequest { @@ -233,12 +237,16 @@ struct TAgentTaskRequest { 22: optional TMoveDirReq move_dir_req 23: optional TRecoverTabletReq recover_tablet_req 24: optional TAlterTabletReqV2 alter_tablet_req_v2 + 25: optional i64 recv_time; // time the task is inserted to queue } struct TAgentResult { 1: required Status.TStatus status 2: optional string snapshot_path 3: optional bool allow_incremental_clone + // the snapshot that be has done according + // to the preferred snapshot version that client requests + 4: optional i32 snapshot_version = 1 } struct TTopicItem { diff --git a/gensrc/thrift/MetricDefs.thrift b/gensrc/thrift/MetricDefs.thrift index 66db6314ae3b68..d12e5394069aab 100644 --- a/gensrc/thrift/MetricDefs.thrift +++ b/gensrc/thrift/MetricDefs.thrift @@ -706,20 +706,20 @@ const map TMetricDefs = "contexts": [ "PALO_BE" ], - "description": "Looking count of olapengine's lru cache.", + "description": "Looking count of StorageEngine's lru cache.", "key": "palo_be.olap.lru_cache.lookup_count", "kind": Metrics.TMetricKind.COUNTER, - "label": "OlapEngine Lru Cache Lookup Count", + "label": "StorageEngine Lru Cache Lookup Count", "units": Metrics.TUnit.NONE }, "palo_be.olap.lru_cache.hit_count": { "contexts": [ "PALO_BE" ], - "description": "Hit count of olapengine's lru cache.", + "description": "Hit count of StorageEngine's lru cache.", "key": "palo_be.olap.lru_cache.hit_count", "kind": Metrics.TMetricKind.COUNTER, - "label": "OlapEngine Lru Cache Hit Count", + "label": "StorageEngine Lru Cache Hit Count", "units": Metrics.TUnit.NONE }, "palo_be.olap.push_count": { @@ -729,7 +729,7 @@ const map TMetricDefs = "description": "Pushing count over the life of the Palo Be process.", "key": "palo_be.olap.push_count", "kind": Metrics.TMetricKind.COUNTER, - "label": "OlapEngine Pushing Count", + "label": "StorageEngine Pushing Count", "units": Metrics.TUnit.NONE }, "palo_be.olap.fetch_count": { @@ -739,7 +739,7 @@ const map TMetricDefs = "description": "Fetch count over the life of the Palo Be process.", "key": "palo_be.olap.fetch_count", "kind": Metrics.TMetricKind.COUNTER, - "label": "OlapEngine Fetch Count", + "label": "StorageEngine Fetch Count", "units": Metrics.TUnit.NONE }, "palo_be.olap.request_count": { @@ -749,7 +749,7 @@ const map TMetricDefs = "description": "Request count over the life of the Palo Be process.", "key": "palo_be.olap.request_count", "kind": Metrics.TMetricKind.COUNTER, - "label": "OlapEngine Request Count", + "label": "StorageEngine Request Count", "units": Metrics.TUnit.NONE }, "palo_be.olap.be_merge.delta_num": { @@ -759,7 +759,7 @@ const map TMetricDefs = "description": "Base compaction num over the life of the Palo Be process.", "key": "palo_be.olap.be_merge.delta_num", "kind": Metrics.TMetricKind.COUNTER, - "label": "OlapEngine base compatcion num", + "label": "StorageEngine base compatcion num", "units": Metrics.TUnit.NONE }, "palo_be.olap.be_merge_size": { @@ -769,7 +769,7 @@ const map TMetricDefs = "description": "Base compaction size over the life of the Palo Be process.", "key": "palo_be.olap.be_merge_size", "kind": Metrics.TMetricKind.COUNTER, - "label": "OlapEngine base compatcion size", + "label": "StorageEngine base compatcion size", "units": Metrics.TUnit.NONE }, "palo_be.olap.ce_merge.delta_num": { @@ -779,7 +779,7 @@ const map TMetricDefs = "description": "Cumulative compaction num over the life of the Palo Be process.", "key": "palo_be.olap.ce_merge.delta_num", "kind": Metrics.TMetricKind.COUNTER, - "label": "OlapEngine cumulative compatcion num", + "label": "StorageEngine cumulative compatcion num", "units": Metrics.TUnit.NONE }, "palo_be.olap.ce_merge_size": { @@ -789,7 +789,7 @@ const map TMetricDefs = "description": "Cumulative compaction size over the life of the Palo Be process.", "key": "palo_be.olap.ce_merge_size", "kind": Metrics.TMetricKind.COUNTER, - "label": "OlapEngine cumulative compatcion size", + "label": "StorageEngine cumulative compatcion size", "units": Metrics.TUnit.NONE }, "palo_be.thrift_server.PaloBackend.connections_in_use": { diff --git a/gensrc/thrift/Types.thrift b/gensrc/thrift/Types.thrift index 262e6416311be7..bf868c13455e1f 100644 --- a/gensrc/thrift/Types.thrift +++ b/gensrc/thrift/Types.thrift @@ -145,7 +145,7 @@ enum TTaskType { STORAGE_MEDIUM_MIGRATE, ROLLUP, SCHEMA_CHANGE, - CANCEL_DELETE, + CANCEL_DELETE, // Deprecated MAKE_SNAPSHOT, RELEASE_SNAPSHOT, CHECK_CONSISTENCY, diff --git a/run-ut.sh b/run-ut.sh index eda8bf8931b913..7c7563160a2d6e 100755 --- a/run-ut.sh +++ b/run-ut.sh @@ -155,7 +155,7 @@ ${DORIS_TEST_BINARY_DIR}/util/coding_test ${DORIS_TEST_BINARY_DIR}/util/faststring_test ${DORIS_TEST_BINARY_DIR}/util/tdigest_test -## Running common Unittest +# Running common Unittest ${DORIS_TEST_BINARY_DIR}/common/resource_tls_test ## Running exprs unit test @@ -185,10 +185,10 @@ ${DORIS_TEST_BINARY_DIR}/exec/es_http_scan_node_test ${DORIS_TEST_BINARY_DIR}/exec/es_predicate_test ${DORIS_TEST_BINARY_DIR}/exec/es_scan_reader_test ${DORIS_TEST_BINARY_DIR}/exec/es_query_builder_test -${DORIS_TEST_BINARY_DIR}/exec/olap_table_info_test -${DORIS_TEST_BINARY_DIR}/exec/olap_table_sink_test +${DORIS_TEST_BINARY_DIR}/exec/tablet_info_test +${DORIS_TEST_BINARY_DIR}/exec/tablet_sink_test -## Running runtime Unittest +# Running runtime Unittest ${DORIS_TEST_BINARY_DIR}/runtime/fragment_mgr_test ${DORIS_TEST_BINARY_DIR}/runtime/decimal_value_test ${DORIS_TEST_BINARY_DIR}/runtime/datetime_value_test @@ -209,7 +209,7 @@ ${DORIS_TEST_BINARY_DIR}/http/http_utils_test ${DORIS_TEST_BINARY_DIR}/http/stream_load_test ${DORIS_TEST_BINARY_DIR}/http/http_client_test -# Running OLAPEngine Unittest +# Running StorageEngine Unittest ${DORIS_TEST_BINARY_DIR}/olap/bit_field_test ${DORIS_TEST_BINARY_DIR}/olap/byte_buffer_test ${DORIS_TEST_BINARY_DIR}/olap/run_length_byte_test @@ -229,10 +229,17 @@ ${DORIS_TEST_BINARY_DIR}/olap/column_reader_test ${DORIS_TEST_BINARY_DIR}/olap/row_cursor_test ${DORIS_TEST_BINARY_DIR}/olap/skiplist_test ${DORIS_TEST_BINARY_DIR}/olap/serialize_test -${DORIS_TEST_BINARY_DIR}/olap/olap_header_manager_test + +# Running routine load test +${DORIS_TEST_BINARY_DIR}/olap/tablet_meta_manager_test +${DORIS_TEST_BINARY_DIR}/olap/tablet_mgr_test ${DORIS_TEST_BINARY_DIR}/olap/olap_meta_test ${DORIS_TEST_BINARY_DIR}/olap/delta_writer_test ${DORIS_TEST_BINARY_DIR}/olap/field_info_test +${DORIS_TEST_BINARY_DIR}/olap/olap_snapshot_converter_test +${DORIS_TEST_BINARY_DIR}/olap/rowset/rowset_meta_manager_test +${DORIS_TEST_BINARY_DIR}/olap/rowset/rowset_meta_test +${DORIS_TEST_BINARY_DIR}/olap/rowset/alpha_rowset_test ${DORIS_TEST_BINARY_DIR}/olap/rowset/segment_v2/encoding_info_test ${DORIS_TEST_BINARY_DIR}/olap/rowset/segment_v2/ordinal_page_index_test ${DORIS_TEST_BINARY_DIR}/olap/rowset/segment_v2/bitshuffle_page_test @@ -240,6 +247,7 @@ ${DORIS_TEST_BINARY_DIR}/olap/rowset/segment_v2/plain_page_test ${DORIS_TEST_BINARY_DIR}/olap/rowset/segment_v2/binary_plain_page_test ${DORIS_TEST_BINARY_DIR}/olap/rowset/segment_v2/column_reader_writer_test ${DORIS_TEST_BINARY_DIR}/olap/rowset/segment_v2/rle_page_test +${DORIS_TEST_BINARY_DIR}/olap/txn_manager_test # Running routine load test ${DORIS_TEST_BINARY_DIR}/runtime/kafka_consumer_pipe_test @@ -253,8 +261,5 @@ fi cp -r ${DORIS_HOME}/be/test/agent/test_data ${DORIS_TEST_BINARY_DIR}/agent/ cd ${DORIS_TEST_BINARY_DIR}/agent # ./agent_server_test -# ./file_downloader_test #./heartbeat_server_test -#./pusher_test ./utils_test -#./task_worker_pool_test