From 895f132d57b1fda8edef25cab06ffd144c937185 Mon Sep 17 00:00:00 2001 From: yiguolei Date: Thu, 13 Jun 2019 15:15:56 +0800 Subject: [PATCH 1/4] Add migration lock when migration --- be/src/olap/delta_writer.cpp | 8 +++++++ be/src/olap/push_handler.cpp | 8 +++++++ be/src/olap/schema_change.cpp | 9 +++++++ be/src/olap/tablet.h | 2 ++ be/src/olap/task/engine_clone_task.cpp | 4 ++++ .../task/engine_storage_migration_task.cpp | 15 ++++++++++++ be/src/olap/txn_manager.cpp | 23 ------------------ be/src/olap/txn_manager.h | 4 ---- be/src/olap/utils.h | 24 +++++++++---------- 9 files changed, 58 insertions(+), 39 deletions(-) diff --git a/be/src/olap/delta_writer.cpp b/be/src/olap/delta_writer.cpp index c160391c9e2cb7..8f1678a05beddd 100644 --- a/be/src/olap/delta_writer.cpp +++ b/be/src/olap/delta_writer.cpp @@ -79,6 +79,10 @@ OLAPStatus DeltaWriter::init() { } { + ReadLock base_migration_rlock(_tablet->get_migration_lock_ptr(), true); + if (!base_migration_rlock.own_lock()) { + return OLAP_ERR_RWLOCK_ERROR; + } MutexLock push_lock(_tablet->get_push_lock()); RETURN_NOT_OK(StorageEngine::instance()->txn_manager()->prepare_txn( _req.partition_id, _req.txn_id, @@ -99,6 +103,10 @@ OLAPStatus DeltaWriter::init() { << ", schema_hash: " << new_schema_hash; return OLAP_ERR_TABLE_NOT_FOUND; } + ReadLock new_migration_rlock(_new_tablet->get_migration_lock_ptr(), true); + if (!new_migration_rlock.own_lock()) { + return OLAP_ERR_RWLOCK_ERROR; + } StorageEngine::instance()->txn_manager()->prepare_txn( _req.partition_id, _req.txn_id, new_tablet_id, new_schema_hash, _new_tablet->tablet_uid(), _req.load_id); diff --git a/be/src/olap/push_handler.cpp b/be/src/olap/push_handler.cpp index 182fe8c1b0d7be..222313fa173750 100644 --- a/be/src/olap/push_handler.cpp +++ b/be/src/olap/push_handler.cpp @@ -85,6 +85,10 @@ OLAPStatus PushHandler::_do_streaming_ingestion( if (tablet == nullptr) { return OLAP_ERR_TABLE_NOT_FOUND; } + ReadLock base_migration_rlock(tablet->get_migration_lock_ptr(), true); + if (!base_migration_rlock.own_lock()) { + return OLAP_ERR_RWLOCK_ERROR; + } tablet->obtain_push_lock(); PUniqueId load_id; load_id.set_hi(0); @@ -130,6 +134,10 @@ OLAPStatus PushHandler::_do_streaming_ingestion( << "tablet=" << tablet->full_name() << " related_tablet=" << related_tablet->full_name(); } else { + ReadLock new_migration_rlock(related_tablet->get_migration_lock_ptr(), true); + if (!new_migration_rlock.own_lock()) { + return OLAP_ERR_RWLOCK_ERROR; + } PUniqueId load_id; load_id.set_hi(0); load_id.set_lo(0); diff --git a/be/src/olap/schema_change.cpp b/be/src/olap/schema_change.cpp index 02f25ee303853a..26c9e981762319 100644 --- a/be/src/olap/schema_change.cpp +++ b/be/src/olap/schema_change.cpp @@ -1194,6 +1194,15 @@ OLAPStatus SchemaChangeHandler::process_alter_tablet(AlterTabletType type, return OLAP_ERR_TABLE_CREATE_META_ERROR; } + ReadLock base_migration_rlock(base_tablet->get_migration_lock_ptr(), true); + if (!base_migration_rlock.own_lock()) { + return OLAP_ERR_RWLOCK_ERROR; + } + ReadLock new_migration_rlock(new_tablet->get_migration_lock_ptr(), true); + if (!new_migration_rlock.own_lock()) { + return OLAP_ERR_RWLOCK_ERROR; + } + base_tablet->obtain_push_lock(); base_tablet->obtain_header_wrlock(); new_tablet->obtain_header_wrlock(); diff --git a/be/src/olap/tablet.h b/be/src/olap/tablet.h index 6596f36efd7255..238f607a2cc7f8 100644 --- a/be/src/olap/tablet.h +++ b/be/src/olap/tablet.h @@ -175,6 +175,8 @@ class Tablet : public std::enable_shared_from_this { inline void obtain_cumulative_lock() { _cumulative_lock.lock(); } inline void release_cumulative_lock() { _cumulative_lock.unlock(); } + inline RWMutex* get_migration_lock_ptr() { return &_migration_lock; } + // operation for compaction bool can_do_compaction(); const uint32_t calc_cumulative_compaction_score() const; diff --git a/be/src/olap/task/engine_clone_task.cpp b/be/src/olap/task/engine_clone_task.cpp index 013a38dff67fe7..44ebe44dd19e00 100644 --- a/be/src/olap/task/engine_clone_task.cpp +++ b/be/src/olap/task/engine_clone_task.cpp @@ -64,6 +64,10 @@ OLAPStatus EngineCloneTask::execute() { bool is_new_tablet = tablet == nullptr; // try to repair a tablet with missing version if (tablet != nullptr) { + ReadLock migration_rlock(tablet->get_migration_lock_ptr(), true); + if (!migration_rlock.own_lock()) { + return OLAP_ERR_RWLOCK_ERROR; + } LOG(INFO) << "clone tablet exist yet, begin to incremental clone. " << "signature:" << _signature << ", tablet_id:" << _clone_req.tablet_id diff --git a/be/src/olap/task/engine_storage_migration_task.cpp b/be/src/olap/task/engine_storage_migration_task.cpp index 9d7e4b3d7b0e2c..5535463b7fefb7 100644 --- a/be/src/olap/task/engine_storage_migration_task.cpp +++ b/be/src/olap/task/engine_storage_migration_task.cpp @@ -69,6 +69,21 @@ OLAPStatus EngineStorageMigrationTask::_storage_medium_migrate( return OLAP_SUCCESS; } + WriteLock migration_wlock(tablet->get_migration_lock_ptr(), true); + if (!migration_wlock.own_lock()) { + return OLAP_ERR_RWLOCK_ERROR; + } + + int64_t partition_id; + std::set transaction_ids; + StorageEngine::instance()->txn_manager()->get_tablet_related_txns(tablet->tablet_id(), + tablet->schema_hash(), tablet->tablet_uid(), &partition_id, &transaction_ids); + if (transaction_ids.size() > 0) { + LOG(WARNING) << "could not migration because has unfinished txns, " + << " tablet=" << tablet->full_name(); + return OLAP_ERR_HEADER_HAS_PENDING_DATA; + } + tablet->obtain_push_lock(); // TODO(ygl): the tablet should not under schema change or rollup or load diff --git a/be/src/olap/txn_manager.cpp b/be/src/olap/txn_manager.cpp index 6316e170586d9a..fef2e98e5e6ed3 100755 --- a/be/src/olap/txn_manager.cpp +++ b/be/src/olap/txn_manager.cpp @@ -444,29 +444,6 @@ bool TxnManager::has_txn(TPartitionId partition_id, TTransactionId transaction_i return found; } - -bool TxnManager::has_committed_txn(TPartitionId partition_id, TTransactionId transaction_id, - TTabletId tablet_id, SchemaHash schema_hash, TabletUid tablet_uid) { - - pair key(partition_id, transaction_id); - TabletInfo tablet_info(tablet_id, schema_hash, tablet_uid); - ReadLock rdlock(_get_txn_lock(transaction_id)); - ReadLock txn_rdlock(&_txn_map_lock); - auto it = _txn_tablet_map.find(key); - if (it != _txn_tablet_map.end()) { - auto load_itr = it->second.find(tablet_info); - if (load_itr != it->second.end()) { - // found load for txn,tablet - // case 1: user commit rowset, then the load id must be equal - TabletTxnInfo& load_info = load_itr->second; - if (load_info.rowset != nullptr) { - return true; - } - } - } - return false; -} - bool TxnManager::get_expire_txns(TTabletId tablet_id, SchemaHash schema_hash, TabletUid tablet_uid, std::vector* transaction_ids) { if (transaction_ids == nullptr) { diff --git a/be/src/olap/txn_manager.h b/be/src/olap/txn_manager.h index fa1f533a525092..9928815bebadfd 100755 --- a/be/src/olap/txn_manager.h +++ b/be/src/olap/txn_manager.h @@ -111,10 +111,6 @@ class TxnManager { // just check if the txn exists bool has_txn(TPartitionId partition_id, TTransactionId transaction_id, TTabletId tablet_id, SchemaHash schema_hash, TabletUid tablet_uid); - - // check if the txn exists and has related rowset - bool has_committed_txn(TPartitionId partition_id, TTransactionId transaction_id, - TTabletId tablet_id, SchemaHash schema_hash, TabletUid tablet_uid); bool get_expire_txns(TTabletId tablet_id, SchemaHash schema_hash, TabletUid tablet_uid, std::vector* transaction_ids); diff --git a/be/src/olap/utils.h b/be/src/olap/utils.h index 0e1a3d5090332d..9447c811dac8ea 100644 --- a/be/src/olap/utils.h +++ b/be/src/olap/utils.h @@ -246,25 +246,25 @@ class RWMutex { class ReadLock { public: explicit ReadLock(RWMutex* mutex, bool try_lock = false) - : _mutex(mutex), own_lock(false) { + : _mutex(mutex), locked(false) { if (try_lock) { - own_lock = this->_mutex->tryrdlock() == OLAP_SUCCESS; + locked = this->_mutex->tryrdlock() == OLAP_SUCCESS; } else { this->_mutex->rdlock(); - own_lock = true; + locked = true; } } ~ReadLock() { - if (own_lock) { + if (locked) { this->_mutex->unlock(); } } - bool has_own_lock() { return own_lock; } + bool own_lock() { return locked; } private: RWMutex* _mutex; - bool own_lock; + bool locked; DISALLOW_COPY_AND_ASSIGN(ReadLock); }; @@ -276,25 +276,25 @@ class ReadLock { class WriteLock { public: explicit WriteLock(RWMutex* mutex, bool try_lock = false) - : _mutex(mutex), own_lock(false) { + : _mutex(mutex), locked(false) { if (try_lock) { - own_lock = this->_mutex->trywrlock() == OLAP_SUCCESS; + locked = this->_mutex->trywrlock() == OLAP_SUCCESS; } else { this->_mutex->wrlock(); - own_lock = true; + locked = true; } } ~WriteLock() { - if (own_lock) { + if (locked) { this->_mutex->unlock(); } } - bool has_own_lock() { return own_lock; } + bool own_lock() { return locked; } private: RWMutex* _mutex; - bool own_lock; + bool locked; DISALLOW_COPY_AND_ASSIGN(WriteLock); }; From cc1e25c09ab6ef908b9b5801337d8ba7943904d5 Mon Sep 17 00:00:00 2001 From: yiguolei Date: Thu, 13 Jun 2019 19:13:41 +0800 Subject: [PATCH 2/4] Use TRY_LOCK instead of true --- be/src/olap/delta_writer.cpp | 4 ++-- be/src/olap/task/engine_storage_migration_task.cpp | 2 +- be/src/olap/utils.h | 2 ++ 3 files changed, 5 insertions(+), 3 deletions(-) diff --git a/be/src/olap/delta_writer.cpp b/be/src/olap/delta_writer.cpp index 8f1678a05beddd..d593aef5921a54 100644 --- a/be/src/olap/delta_writer.cpp +++ b/be/src/olap/delta_writer.cpp @@ -79,7 +79,7 @@ OLAPStatus DeltaWriter::init() { } { - ReadLock base_migration_rlock(_tablet->get_migration_lock_ptr(), true); + ReadLock base_migration_rlock(_tablet->get_migration_lock_ptr(), TRY_LOCK); if (!base_migration_rlock.own_lock()) { return OLAP_ERR_RWLOCK_ERROR; } @@ -103,7 +103,7 @@ OLAPStatus DeltaWriter::init() { << ", schema_hash: " << new_schema_hash; return OLAP_ERR_TABLE_NOT_FOUND; } - ReadLock new_migration_rlock(_new_tablet->get_migration_lock_ptr(), true); + ReadLock new_migration_rlock(_new_tablet->get_migration_lock_ptr(), TRY_LOCK); if (!new_migration_rlock.own_lock()) { return OLAP_ERR_RWLOCK_ERROR; } diff --git a/be/src/olap/task/engine_storage_migration_task.cpp b/be/src/olap/task/engine_storage_migration_task.cpp index 5535463b7fefb7..fb1f2b944274a8 100644 --- a/be/src/olap/task/engine_storage_migration_task.cpp +++ b/be/src/olap/task/engine_storage_migration_task.cpp @@ -69,7 +69,7 @@ OLAPStatus EngineStorageMigrationTask::_storage_medium_migrate( return OLAP_SUCCESS; } - WriteLock migration_wlock(tablet->get_migration_lock_ptr(), true); + WriteLock migration_wlock(tablet->get_migration_lock_ptr(), TRY_LOCK); if (!migration_wlock.own_lock()) { return OLAP_ERR_RWLOCK_ERROR; } diff --git a/be/src/olap/utils.h b/be/src/olap/utils.h index 9447c811dac8ea..bbb5f82e7db688 100644 --- a/be/src/olap/utils.h +++ b/be/src/olap/utils.h @@ -41,6 +41,8 @@ #include "olap/olap_common.h" #include "olap/olap_define.h" +#define TRY_LOCK true + namespace doris { void write_log_info(char* buf, size_t buf_len, const char* fmt, ...); From e45e185f2366da10a7168bba8759330872636a01 Mon Sep 17 00:00:00 2001 From: yiguolei Date: Thu, 13 Jun 2019 19:29:08 +0800 Subject: [PATCH 3/4] Not print failure log when there is no suitable versions --- be/src/olap/cumulative_compaction.cpp | 2 +- be/src/olap/storage_engine.cpp | 7 +++---- 2 files changed, 4 insertions(+), 5 deletions(-) diff --git a/be/src/olap/cumulative_compaction.cpp b/be/src/olap/cumulative_compaction.cpp index 55fcbcee006525..db9b3757d9fb4f 100755 --- a/be/src/olap/cumulative_compaction.cpp +++ b/be/src/olap/cumulative_compaction.cpp @@ -189,7 +189,7 @@ OLAPStatus CumulativeCompaction::_calculate_need_merged_versions() { Versions delta_versions; OLAPStatus res = _get_delta_versions(&delta_versions); if (res != OLAP_SUCCESS) { - LOG(INFO) << "failed to get delta versions."; + LOG(INFO) << "failed to get delta versions. res=" << res; return res; } diff --git a/be/src/olap/storage_engine.cpp b/be/src/olap/storage_engine.cpp index 96fb5be0ca3b87..41c1280d63f6d2 100644 --- a/be/src/olap/storage_engine.cpp +++ b/be/src/olap/storage_engine.cpp @@ -572,11 +572,10 @@ void StorageEngine::perform_cumulative_compaction(DataDir* data_dir) { if (res != OLAP_SUCCESS) { if (res != OLAP_ERR_CUMULATIVE_REPEAT_INIT && res != OLAP_ERR_CE_TRY_CE_LOCK_ERROR) { best_tablet->set_last_compaction_failure_time(UnixMillis()); - LOG(WARNING) << "failed to init cumulative compaction" - << ", table=" << best_tablet->full_name() - << ", res=" << res; - if (res != OLAP_ERR_CUMULATIVE_NO_SUITABLE_VERSIONS) { + LOG(WARNING) << "failed to init cumulative compaction" + << ", table=" << best_tablet->full_name() + << ", res=" << res; DorisMetrics::cumulative_compaction_request_failed.increment(1); } } From 67d17ba8341ded531dad5dd3eeb39cda494c8679 Mon Sep 17 00:00:00 2001 From: yiguolei Date: Thu, 13 Jun 2019 19:35:16 +0800 Subject: [PATCH 4/4] Use TRY_LOCK instead of true --- be/src/olap/push_handler.cpp | 4 ++-- be/src/olap/schema_change.cpp | 4 ++-- be/src/olap/task/engine_clone_task.cpp | 2 +- 3 files changed, 5 insertions(+), 5 deletions(-) diff --git a/be/src/olap/push_handler.cpp b/be/src/olap/push_handler.cpp index 222313fa173750..6edea96eb02805 100644 --- a/be/src/olap/push_handler.cpp +++ b/be/src/olap/push_handler.cpp @@ -85,7 +85,7 @@ OLAPStatus PushHandler::_do_streaming_ingestion( if (tablet == nullptr) { return OLAP_ERR_TABLE_NOT_FOUND; } - ReadLock base_migration_rlock(tablet->get_migration_lock_ptr(), true); + ReadLock base_migration_rlock(tablet->get_migration_lock_ptr(), TRY_LOCK); if (!base_migration_rlock.own_lock()) { return OLAP_ERR_RWLOCK_ERROR; } @@ -134,7 +134,7 @@ OLAPStatus PushHandler::_do_streaming_ingestion( << "tablet=" << tablet->full_name() << " related_tablet=" << related_tablet->full_name(); } else { - ReadLock new_migration_rlock(related_tablet->get_migration_lock_ptr(), true); + ReadLock new_migration_rlock(related_tablet->get_migration_lock_ptr(), TRY_LOCK); if (!new_migration_rlock.own_lock()) { return OLAP_ERR_RWLOCK_ERROR; } diff --git a/be/src/olap/schema_change.cpp b/be/src/olap/schema_change.cpp index 26c9e981762319..29c9c1890b1ae0 100644 --- a/be/src/olap/schema_change.cpp +++ b/be/src/olap/schema_change.cpp @@ -1194,11 +1194,11 @@ OLAPStatus SchemaChangeHandler::process_alter_tablet(AlterTabletType type, return OLAP_ERR_TABLE_CREATE_META_ERROR; } - ReadLock base_migration_rlock(base_tablet->get_migration_lock_ptr(), true); + ReadLock base_migration_rlock(base_tablet->get_migration_lock_ptr(), TRY_LOCK); if (!base_migration_rlock.own_lock()) { return OLAP_ERR_RWLOCK_ERROR; } - ReadLock new_migration_rlock(new_tablet->get_migration_lock_ptr(), true); + ReadLock new_migration_rlock(new_tablet->get_migration_lock_ptr(), TRY_LOCK); if (!new_migration_rlock.own_lock()) { return OLAP_ERR_RWLOCK_ERROR; } diff --git a/be/src/olap/task/engine_clone_task.cpp b/be/src/olap/task/engine_clone_task.cpp index 44ebe44dd19e00..503bbc9c89d896 100644 --- a/be/src/olap/task/engine_clone_task.cpp +++ b/be/src/olap/task/engine_clone_task.cpp @@ -64,7 +64,7 @@ OLAPStatus EngineCloneTask::execute() { bool is_new_tablet = tablet == nullptr; // try to repair a tablet with missing version if (tablet != nullptr) { - ReadLock migration_rlock(tablet->get_migration_lock_ptr(), true); + ReadLock migration_rlock(tablet->get_migration_lock_ptr(), TRY_LOCK); if (!migration_rlock.own_lock()) { return OLAP_ERR_RWLOCK_ERROR; }