From b4f77be7f1a0b2b19b5e72892911996ab29b97e0 Mon Sep 17 00:00:00 2001 From: freemandealer Date: Mon, 4 Nov 2024 16:17:25 +0800 Subject: [PATCH 1/2] [fix](cloud) fix read cache block file return stat NOT_FOUND Previously, cache blocks could be downloaded as different types than the target type we intended to read, leading to false cache misses. E.g., a block might be downloaded as an 'idx' type when the current context expected a 'ttl' type. The root cause of this problem is the original design encoding meta info such as type and expiration time into cache block file path and readers of this cache block file have inconsistent view of the type so they use different name to locate file and run into error in the end. This commit tries other type if the initial type failed to locate the file (return NOT_FOUND). Be ware that this is a nasty quick fix. We will elimite the metadate encoded in the file path in the near future to get rid of all the path related problems. Signed-off-by: freemandealer --- be/src/common/config.cpp | 2 - be/src/common/config.h | 2 - be/src/io/cache/cached_remote_file_reader.cpp | 2 + be/src/io/cache/fs_file_cache_storage.cpp | 56 ++++++++++++------- be/src/io/cache/fs_file_cache_storage.h | 3 + 5 files changed, 42 insertions(+), 23 deletions(-) diff --git a/be/src/common/config.cpp b/be/src/common/config.cpp index 32604a65e58dae..7c8abfeb8f46a1 100644 --- a/be/src/common/config.cpp +++ b/be/src/common/config.cpp @@ -1016,8 +1016,6 @@ DEFINE_mInt64(file_cache_ttl_valid_check_interval_second, "0"); // zero for not // If true, evict the ttl cache using LRU when full. // Otherwise, only expiration can evict ttl and new data won't add to cache when full. DEFINE_Bool(enable_ttl_cache_evict_using_lru, "true"); -// rename ttl filename to new format during read, with some performance cost -DEFINE_mBool(translate_to_new_ttl_format_during_read, "false"); DEFINE_mBool(enbale_dump_error_file, "true"); // limit the max size of error log on disk DEFINE_mInt64(file_cache_error_log_limit_bytes, "209715200"); // 200MB diff --git a/be/src/common/config.h b/be/src/common/config.h index c4fa4f9467280a..d6a581a7614c8d 100644 --- a/be/src/common/config.h +++ b/be/src/common/config.h @@ -1063,8 +1063,6 @@ DECLARE_mInt64(file_cache_ttl_valid_check_interval_second); // If true, evict the ttl cache using LRU when full. // Otherwise, only expiration can evict ttl and new data won't add to cache when full. DECLARE_Bool(enable_ttl_cache_evict_using_lru); -// rename ttl filename to new format during read, with some performance cost -DECLARE_Bool(translate_to_new_ttl_format_during_read); DECLARE_mBool(enbale_dump_error_file); // limit the max size of error log on disk DECLARE_mInt64(file_cache_error_log_limit_bytes); diff --git a/be/src/io/cache/cached_remote_file_reader.cpp b/be/src/io/cache/cached_remote_file_reader.cpp index 0a46c98390e70f..c9a273c5d368a6 100644 --- a/be/src/io/cache/cached_remote_file_reader.cpp +++ b/be/src/io/cache/cached_remote_file_reader.cpp @@ -292,6 +292,8 @@ Status CachedRemoteFileReader::read_at_impl(size_t offset, Slice result, size_t* file_offset); } if (!st || block_state != FileBlock::State::DOWNLOADED) { + LOG(WARNING) << "Read data failed from file cache downloaded by others. err=" + << st.msg() << ", block state=" << block_state; size_t bytes_read {0}; stats.hit_cache = false; s3_read_counter << 1; diff --git a/be/src/io/cache/fs_file_cache_storage.cpp b/be/src/io/cache/fs_file_cache_storage.cpp index bacd0820c66099..24ba550df74486 100644 --- a/be/src/io/cache/fs_file_cache_storage.cpp +++ b/be/src/io/cache/fs_file_cache_storage.cpp @@ -160,30 +160,36 @@ Status FSFileCacheStorage::read(const FileCacheKey& key, size_t value_offset, Sl get_path_in_local_cache(get_path_in_local_cache(key.hash, key.meta.expiration_time), key.offset, key.meta.type); Status s = fs->open_file(file, &file_reader); - if (!s.ok()) { - if (!s.is() || key.meta.type != FileCacheType::TTL) { - return s; + + // handle the case that the file is not found but actually exists in other type format + // TODO(zhengyu): nasty! better eliminate the type encoding in file name in the future + if (!s.ok() && !s.is()) { + LOG(WARNING) << "open file failed, file=" << file << ", error=" << s.to_string(); + return s; // return other error directly + } else if (!s.ok() && s.is()) { // but handle NOT_FOUND error + auto candidates = get_path_in_local_cache_all_candidates( + get_path_in_local_cache(key.hash, key.meta.expiration_time), key.offset); + for (auto& candidate : candidates) { + s = fs->open_file(candidate, &file_reader); + if (s.ok()) { + break; // success with one of there candidates + } } - std::string file_old_format = get_path_in_local_cache_old_ttl_format( - get_path_in_local_cache(key.hash, key.meta.expiration_time), key.offset, - key.meta.type); - if (config::translate_to_new_ttl_format_during_read) { - // try to rename the file with old ttl format to new and retry - VLOG(7) << "try to rename the file with old ttl format to new and retry" - << " oldformat=" << file_old_format << " original=" << file; - RETURN_IF_ERROR(fs->rename(file_old_format, file)); - RETURN_IF_ERROR(fs->open_file(file, &file_reader)); - } else { - // try to open the file with old ttl format - VLOG(7) << "try to open the file with old ttl format" - << " oldformat=" << file_old_format << " original=" << file; - RETURN_IF_ERROR(fs->open_file(file_old_format, &file_reader)); + if (!s.ok()) { // still not found, return error + LOG(WARNING) << "open file failed, file=" << file << ", error=" << s.to_string(); + return s; } - } + } // else, s.ok() means open file success + FDCache::instance()->insert_file_reader(fd_key, file_reader); } size_t bytes_read = 0; - RETURN_IF_ERROR(file_reader->read_at(value_offset, buffer, &bytes_read)); + auto s = file_reader->read_at(value_offset, buffer, &bytes_read); + if (!s.ok()) { + LOG(WARNING) << "read file failed, file=" << file_reader->path() + << ", error=" << s.to_string(); + return s; + } DCHECK(bytes_read == buffer.get_size()); return Status::OK(); } @@ -270,6 +276,18 @@ std::string FSFileCacheStorage::get_path_in_local_cache_old_ttl_format(const std return Path(dir) / (std::to_string(offset) + BlockFileCache::cache_type_to_string(type)); } +std::vector FSFileCacheStorage::get_path_in_local_cache_all_candidates( + const std::string& dir, size_t offset) { + std::vector candidates; + std::string base = get_path_in_local_cache(dir, offset, FileCacheType::NORMAL); + candidates.push_back(base); + candidates.push_back(base + "_idx"); + candidates.push_back(base + "_ttl"); + candidates.push_back(base + "_disposable"); + candidates.push_back(base + "_tmp"); + return candidates; +} + std::string FSFileCacheStorage::get_path_in_local_cache(const UInt128Wrapper& value, uint64_t expiration_time) const { auto str = value.to_string(); diff --git a/be/src/io/cache/fs_file_cache_storage.h b/be/src/io/cache/fs_file_cache_storage.h index 23e98f422ac884..a11b1ac495b42e 100644 --- a/be/src/io/cache/fs_file_cache_storage.h +++ b/be/src/io/cache/fs_file_cache_storage.h @@ -75,6 +75,9 @@ class FSFileCacheStorage : public FileCacheStorage { FileCacheType type, bool is_tmp = false); + [[nodiscard]] std::vector get_path_in_local_cache_all_candidates( + const std::string& dir, size_t offset); + [[nodiscard]] static std::string get_path_in_local_cache_old_ttl_format(const std::string& dir, size_t offset, FileCacheType type, From 278e0834d8359312bc487822c782ac490fd6a900 Mon Sep 17 00:00:00 2001 From: freemandealer Date: Wed, 6 Nov 2024 17:22:37 +0800 Subject: [PATCH 2/2] reponse to the reviewer Signed-off-by: freemandealer --- be/src/io/cache/fs_file_cache_storage.cpp | 1 - be/src/io/cache/fs_file_cache_storage.h | 6 +++--- 2 files changed, 3 insertions(+), 4 deletions(-) diff --git a/be/src/io/cache/fs_file_cache_storage.cpp b/be/src/io/cache/fs_file_cache_storage.cpp index 24ba550df74486..8471f6e12b4fa8 100644 --- a/be/src/io/cache/fs_file_cache_storage.cpp +++ b/be/src/io/cache/fs_file_cache_storage.cpp @@ -284,7 +284,6 @@ std::vector FSFileCacheStorage::get_path_in_local_cache_all_candida candidates.push_back(base + "_idx"); candidates.push_back(base + "_ttl"); candidates.push_back(base + "_disposable"); - candidates.push_back(base + "_tmp"); return candidates; } diff --git a/be/src/io/cache/fs_file_cache_storage.h b/be/src/io/cache/fs_file_cache_storage.h index a11b1ac495b42e..ac5d10d043058d 100644 --- a/be/src/io/cache/fs_file_cache_storage.h +++ b/be/src/io/cache/fs_file_cache_storage.h @@ -75,9 +75,6 @@ class FSFileCacheStorage : public FileCacheStorage { FileCacheType type, bool is_tmp = false); - [[nodiscard]] std::vector get_path_in_local_cache_all_candidates( - const std::string& dir, size_t offset); - [[nodiscard]] static std::string get_path_in_local_cache_old_ttl_format(const std::string& dir, size_t offset, FileCacheType type, @@ -104,6 +101,9 @@ class FSFileCacheStorage : public FileCacheStorage { void load_cache_info_into_memory(BlockFileCache* _mgr) const; + [[nodiscard]] std::vector get_path_in_local_cache_all_candidates( + const std::string& dir, size_t offset); + std::string _cache_base_path; std::thread _cache_background_load_thread; const std::shared_ptr& fs = global_local_filesystem();