diff --git a/.gitignore b/.gitignore index ff1ab08882c29c..6f1eabc5c7fcf0 100644 --- a/.gitignore +++ b/.gitignore @@ -100,4 +100,3 @@ data_test lru_cache_test /conf/log4j2-spring.xml -/fe/fe-core/src/test/resources/real-help-resource.zip diff --git a/be/CMakeLists.txt b/be/CMakeLists.txt index c69cdae088139c..372b1f05821ef3 100644 --- a/be/CMakeLists.txt +++ b/be/CMakeLists.txt @@ -420,9 +420,6 @@ set_target_properties(k5crypto PROPERTIES IMPORTED_LOCATION ${THIRDPARTY_DIR}/li add_library(gssapi_krb5 STATIC IMPORTED) set_target_properties(gssapi_krb5 PROPERTIES IMPORTED_LOCATION ${THIRDPARTY_DIR}/lib/libgssapi_krb5.a) -add_library(hdfs3 STATIC IMPORTED) -set_target_properties(hdfs3 PROPERTIES IMPORTED_LOCATION ${THIRDPARTY_DIR}/lib/libhdfs3.a) - find_program(THRIFT_COMPILER thrift ${CMAKE_SOURCE_DIR}/bin) if (OS_MACOSX) @@ -685,7 +682,6 @@ set(DORIS_LINK_LIBS ${WL_START_GROUP} Agent Common - Env Exec Exprs Gutil @@ -771,12 +767,37 @@ set(COMMON_THIRDPARTY # put this after lz4 to avoid using lz4 lib in librdkafka librdkafka_cpp librdkafka - hdfs3 xml2 lzma simdjson ) +if (ARCH_AMD64) + add_library(hadoop_hdfs STATIC IMPORTED) + set_target_properties(hadoop_hdfs PROPERTIES IMPORTED_LOCATION ${THIRDPARTY_DIR}/lib/hadoop_hdfs/native/libhdfs.a) + + add_library(jvm SHARED IMPORTED) + FILE(GLOB_RECURSE LIB_JVM $ENV{JAVA_HOME}/jre/lib/*/libjvm.so) + set_target_properties(jvm PROPERTIES IMPORTED_LOCATION ${LIB_JVM}) + + set(COMMON_THIRDPARTY + ${COMMON_THIRDPARTY} + hadoop_hdfs + jvm + ) + add_definitions(-DUSE_HADOOP_HDFS) +else() + add_library(hdfs3 STATIC IMPORTED) + set_target_properties(hdfs3 PROPERTIES IMPORTED_LOCATION ${THIRDPARTY_DIR}/lib/libhdfs3.a) + + # TODO: use arm hadoop hdfs to replace this + set(COMMON_THIRDPARTY + ${COMMON_THIRDPARTY} + hdfs3 + ) + add_definitions(-DUSE_LIBHDFS3) +endif() + if (absl_FOUND) set(COMMON_THIRDPARTY ${COMMON_THIRDPARTY} @@ -944,7 +965,6 @@ endif() add_subdirectory(${SRC_DIR}/agent) add_subdirectory(${SRC_DIR}/common) -add_subdirectory(${SRC_DIR}/env) add_subdirectory(${SRC_DIR}/exec) add_subdirectory(${SRC_DIR}/exprs) add_subdirectory(${SRC_DIR}/gen_cpp) diff --git a/be/src/agent/task_worker_pool.cpp b/be/src/agent/task_worker_pool.cpp index 78001d53c307aa..8d7fefda001f41 100644 --- a/be/src/agent/task_worker_pool.cpp +++ b/be/src/agent/task_worker_pool.cpp @@ -32,9 +32,9 @@ #include "agent/utils.h" #include "common/logging.h" #include "common/status.h" -#include "env/env.h" #include "gen_cpp/Types_types.h" #include "gutil/strings/substitute.h" +#include "io/fs/local_file_system.h" #include "io/fs/s3_file_system.h" #include "olap/data_dir.h" #include "olap/olap_common.h" @@ -53,7 +53,6 @@ #include "runtime/snapshot_loader.h" #include "service/backend_options.h" #include "util/doris_metrics.h" -#include "util/file_utils.h" #include "util/random.h" #include "util/scoped_cleanup.h" #include "util/stopwatch.hpp" @@ -1519,10 +1518,16 @@ void TaskWorkerPool::_make_snapshot_thread_callback() { // list and save all snapshot files // snapshot_path like: data/snapshot/20180417205230.1.86400 // we need to add subdir: tablet_id/schema_hash/ - std::stringstream ss; - ss << snapshot_path << "/" << snapshot_request.tablet_id << "/" - << snapshot_request.schema_hash << "/"; - status = FileUtils::list_files(Env::Default(), ss.str(), &snapshot_files); + std::vector files; + bool exists = true; + io::Path path = fmt::format("{}/{}/{}/", snapshot_path, snapshot_request.tablet_id, + snapshot_request.schema_hash); + status = io::global_local_filesystem()->list(path, true, &files, &exists); + if (status.ok()) { + for (auto& file : files) { + snapshot_files.push_back(file.file_name); + } + } } if (!status.ok()) { LOG_WARNING("failed to make snapshot") @@ -1657,7 +1662,7 @@ Status TaskWorkerPool::_move_dir(const TTabletId tablet_id, const std::string& s return loader.move(src, tablet, overwrite); } -void TaskWorkerPool::_handle_report(const TReportRequest& request, ReportType type) { +void TaskWorkerPool::_handle_report(TReportRequest& request, ReportType type) { TMasterResult result; Status status = MasterServerClient::instance()->report(request, &result); bool is_report_success = false; diff --git a/be/src/agent/task_worker_pool.h b/be/src/agent/task_worker_pool.h index 3ea664014e24d9..d583a02495b071 100644 --- a/be/src/agent/task_worker_pool.h +++ b/be/src/agent/task_worker_pool.h @@ -203,7 +203,7 @@ class TaskWorkerPool { void _alter_tablet(const TAgentTaskRequest& alter_tablet_request, int64_t signature, const TTaskType::type task_type, TFinishTaskRequest* finish_task_request); - void _handle_report(const TReportRequest& request, ReportType type); + void _handle_report(TReportRequest& request, ReportType type); Status _get_tablet_info(const TTabletId tablet_id, const TSchemaHash schema_hash, int64_t signature, TTabletInfo* tablet_info); diff --git a/be/src/common/config.h b/be/src/common/config.h index aeb565ee792b94..5cb5289ac74bc8 100644 --- a/be/src/common/config.h +++ b/be/src/common/config.h @@ -874,8 +874,6 @@ CONF_Int32(segcompaction_threshold_segment_num, "10"); // The segment whose row number above the threshold will be compacted during segcompaction CONF_Int32(segcompaction_small_threshold, "1048576"); -CONF_String(jvm_max_heap_size, "1024M"); - // enable java udf and jdbc scannode CONF_Bool(enable_java_support, "true"); diff --git a/be/src/common/configbase.cpp b/be/src/common/configbase.cpp index af20b3726010c2..24e3a44e668b82 100644 --- a/be/src/common/configbase.cpp +++ b/be/src/common/configbase.cpp @@ -29,7 +29,8 @@ #include "common/status.h" #include "gutil/strings/substitute.h" -#include "util/filesystem_util.h" +#include "io/fs/file_writer.h" +#include "io/fs/local_file_system.h" namespace doris { namespace config { @@ -260,28 +261,23 @@ void Properties::set_force(const std::string& key, const std::string& val) { file_conf_map[key] = val; } -bool Properties::dump(const std::string& conffile) { - std::vector files = {conffile}; - Status st = FileSystemUtil::remove_paths(files); - if (!st.ok()) { - return false; - } - st = FileSystemUtil::create_file(conffile); - if (!st.ok()) { - return false; - } - - std::ofstream out(conffile); - out << "# THIS IS AN AUTO GENERATED CONFIG FILE.\n"; - out << "# You can modify this file manually, and the configurations in this file\n"; - out << "# will overwrite the configurations in be.conf\n"; - out << "\n"; +Status Properties::dump(const std::string& conffile) { + RETURN_IF_ERROR(io::global_local_filesystem()->delete_file(conffile)); + io::FileWriterPtr file_writer; + RETURN_IF_ERROR(io::global_local_filesystem()->create_file(conffile, &file_writer)); + RETURN_IF_ERROR(file_writer->append("# THIS IS AN AUTO GENERATED CONFIG FILE.\n")); + RETURN_IF_ERROR(file_writer->append( + "# You can modify this file manually, and the configurations in this file\n")); + RETURN_IF_ERROR(file_writer->append("# will overwrite the configurations in be.conf\n\n")); for (auto const& iter : file_conf_map) { - out << iter.first << " = " << iter.second << "\n"; + RETURN_IF_ERROR(file_writer->append(iter.first)); + RETURN_IF_ERROR(file_writer->append(" = ")); + RETURN_IF_ERROR(file_writer->append(iter.second)); + RETURN_IF_ERROR(file_writer->append("\n")); } - out.close(); - return true; + + return file_writer->close(); } template @@ -383,14 +379,14 @@ bool init(const char* conf_file, bool fill_conf_map, bool must_exist, bool set_t (*full_conf_map)[(FIELD).name] = oss.str(); \ } \ if (PERSIST) { \ - persist_config(std::string((FIELD).name), VALUE); \ + RETURN_IF_ERROR(persist_config(std::string((FIELD).name), VALUE)); \ } \ return Status::OK(); \ } // write config to be_custom.conf // the caller need to make sure that the given config is valid -bool persist_config(const std::string& field, const std::string& value) { +Status persist_config(const std::string& field, const std::string& value) { // lock to make sure only one thread can modify the be_custom.conf std::lock_guard l(custom_conf_lock); @@ -399,7 +395,7 @@ bool persist_config(const std::string& field, const std::string& value) { Properties tmp_props; if (!tmp_props.load(conffile.c_str(), false)) { LOG(WARNING) << "failed to load " << conffile; - return false; + return Status::InternalError("failed to load conf file: {}", conffile); } tmp_props.set_force(field, value); diff --git a/be/src/common/configbase.h b/be/src/common/configbase.h index f0d9ed7aee0cdf..f3c79519ba9854 100644 --- a/be/src/common/configbase.h +++ b/be/src/common/configbase.h @@ -156,7 +156,7 @@ class Properties { void set_force(const std::string& key, const std::string& val); // dump props to conf file - bool dump(const std::string& conffile); + Status dump(const std::string& conffile); private: std::map file_conf_map; @@ -177,7 +177,7 @@ bool init(const char* conf_file, bool fill_conf_map = false, bool must_exist = t Status set_config(const std::string& field, const std::string& value, bool need_persist = false, bool force = false); -bool persist_config(const std::string& field, const std::string& value); +Status persist_config(const std::string& field, const std::string& value); std::mutex* get_mutable_string_config_lock(); diff --git a/be/src/env/CMakeLists.txt b/be/src/env/CMakeLists.txt deleted file mode 100644 index b52e045e3f0449..00000000000000 --- a/be/src/env/CMakeLists.txt +++ /dev/null @@ -1,28 +0,0 @@ -# Licensed to the Apache Software Foundation (ASF) under one -# or more contributor license agreements. See the NOTICE file -# distributed with this work for additional information -# regarding copyright ownership. The ASF licenses this file -# to you under the Apache License, Version 2.0 (the -# "License"); you may not use this file except in compliance -# with the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, -# software distributed under the License is distributed on an -# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -# KIND, either express or implied. See the License for the -# specific language governing permissions and limitations -# under the License. - -# where to put generated libraries -set(LIBRARY_OUTPUT_PATH "${BUILD_DIR}/src/env") - -# where to put generated binaries -set(EXECUTABLE_OUTPUT_PATH "${BUILD_DIR}/src/env") - -add_library(Env STATIC - env.cpp - env_posix.cpp - env_util.cpp -) diff --git a/be/src/env/env.h b/be/src/env/env.h deleted file mode 100644 index 2456df9a325cbb..00000000000000 --- a/be/src/env/env.h +++ /dev/null @@ -1,419 +0,0 @@ -// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. -// This source code is licensed under both the GPLv2 (found in the -// COPYING file in the root directory) and Apache 2.0 License -// (found in the LICENSE.Apache file in the root directory). -// Copyright (c) 2011 The LevelDB Authors. All rights reserved. -// Use of this source code is governed by a BSD-style license that can be -// found in the LICENSE file. See the AUTHORS file for names of contributors - -#pragma once - -#include -#include -#include - -#include "common/status.h" -#include "gen_cpp/Types_types.h" -#include "gen_cpp/olap_file.pb.h" -#include "util/slice.h" - -namespace doris { - -class RandomAccessFile; -class RandomRWFile; -class WritableFile; -class PosixEnv; -class StorageBackend; -struct FilePathDesc; -struct WritableFileOptions; -struct RandomAccessFileOptions; -struct RandomRWFileOptions; - -namespace io { -class FileSystem; -} - -class Env { -public: - // Governs if/how the file is created. - // - // enum value | file exists | file does not exist - // -----------------------------+-------------------+-------------------- - // CREATE_OR_OPEN_WITH_TRUNCATE | opens + truncates | creates - // CREATE_OR_OPEN | opens | creates - // MUST_CREATE | fails | creates - // MUST_EXIST | opens | fails - enum OpenMode { CREATE_OR_OPEN_WITH_TRUNCATE, CREATE_OR_OPEN, MUST_CREATE, MUST_EXIST }; - - Env() {} - virtual ~Env() {} - - // Return a default environment suitable for the current operating - // system. Sophisticated users may wish to provide their own Env - // implementation instead of relying on this default environment. - static Env* Default(); - - // Create a brand new random access read-only file with the - // specified name. On success, stores a pointer to the new file in - // *result and returns OK. On failure stores nullptr in *result and - // returns non-OK. If the file does not exist, returns a non-OK - // status. - // - // The returned file may be concurrently accessed by multiple threads. - virtual Status new_random_access_file(const std::string& fname, - std::unique_ptr* result) = 0; - - virtual Status new_random_access_file(const RandomAccessFileOptions& opts, - const std::string& fname, - std::unique_ptr* result) = 0; - - // Create an object that writes to a new file with the specified - // name. Deletes any existing file with the same name and creates a - // new file. On success, stores a pointer to the new file in - // *result and returns OK. On failure stores nullptr in *result and - // returns non-OK. - // - // The returned file will only be accessed by one thread at a time. - virtual Status new_writable_file(const std::string& fname, - std::unique_ptr* result) = 0; - - // Like the previous new_writable_file, but allows options to be - // specified. - virtual Status new_writable_file(const WritableFileOptions& opts, const std::string& fname, - std::unique_ptr* result) = 0; - - // Creates a new readable and writable file. If a file with the same name - // already exists on disk, it is deleted. - // - // Some of the methods of the new file may be accessed concurrently, - // while others are only safe for access by one thread at a time. - virtual Status new_random_rw_file(const std::string& fname, - std::unique_ptr* result) = 0; - - // Like the previous new_random_rw_file, but allows options to be specified. - virtual Status new_random_rw_file(const RandomRWFileOptions& opts, const std::string& fname, - std::unique_ptr* result) = 0; - - // Returns OK if the path exists. - // NotFound if the named file does not exist, - // the calling process does not have permission to determine - // whether this file exists, or if the path is invalid. - // IOError if an IO Error was encountered - virtual Status path_exists(const std::string& fname, bool is_dir = false) = 0; - - // Store in *result the names of the children of the specified directory. - // The names are relative to "dir". - // Original contents of *results are dropped. - // Returns OK if "dir" exists and "*result" contains its children. - // NotFound if "dir" does not exist, the calling process does not have - // permission to access "dir", or if "dir" is invalid. - // IOError if an IO Error was encountered - virtual Status get_children(const std::string& dir, std::vector* result) = 0; - - // Iterate the specified directory and call given callback function with child's - // name. This function continues execution until all children have been iterated - // or callback function return false. - // The names are relative to "dir". - // - // The function call extra cost is acceptable. Compared with returning all children - // into a given vector, the performance of this method is 5% worse. However this - // approach is more flexible and efficient in fulfilling other requirements. - // - // Returns OK if "dir" exists. - // NotFound if "dir" does not exist, the calling process does not have - // permission to access "dir", or if "dir" is invalid. - // IOError if an IO Error was encountered - virtual Status iterate_dir(const std::string& dir, - const std::function& cb) = 0; - - // Delete the named file. - virtual Status delete_file(const std::string& fname) = 0; - - // Create the specified directory. - // NOTE: It will return error if the path already exist(not necessarily as a directory) - virtual Status create_dir(const std::string& dirname) = 0; - - // Creates directory if missing. - // Return OK if it exists, or successful in Creating. - virtual Status create_dir_if_missing(const std::string& dirname, bool* created = nullptr) = 0; - - // Delete the specified directory. - // NOTE: The dir must be empty. - virtual Status delete_dir(const std::string& dirname) = 0; - - // Synchronize the entry for a specific directory. - virtual Status sync_dir(const std::string& dirname) = 0; - - // Checks if the file is a directory. Returns an error if it doesn't - // exist, otherwise writes true or false into 'is_dir' appropriately. - virtual Status is_directory(const std::string& path, bool* is_dir) = 0; - - // Canonicalize 'path' by applying the following conversions: - // - Converts a relative path into an absolute one using the cwd. - // - Converts '.' and '..' references. - // - Resolves all symbolic links. - // - // All directory entries in 'path' must exist on the filesystem. - virtual Status canonicalize(const std::string& path, std::string* result) = 0; - - virtual Status get_file_size(const std::string& fname, uint64_t* size) = 0; - - // Store the last modification time of fname in *file_mtime. - virtual Status get_file_modified_time(const std::string& fname, uint64_t* file_mtime) = 0; - - // copy path from src to target. - virtual Status copy_path(const std::string& src, const std::string& target) = 0; - // Rename file src to target. - virtual Status rename_file(const std::string& src, const std::string& target) = 0; - // Rename dir src to target. - virtual Status rename_dir(const std::string& src, const std::string& target) = 0; - - // create a hard-link - virtual Status link_file(const std::string& /*old_path*/, const std::string& /*new_path*/) = 0; - - // get space info for local and remote system - virtual Status get_space_info(const std::string& path, int64_t* capacity, - int64_t* available) = 0; - - // Create directory of dir_path, - // This function will create directory recursively, - // if dir's parent directory doesn't exist - // - // RETURNS: - // Status::OK() if create directory success or directory already exists - virtual Status create_dirs(const std::string& dirname) = 0; - -private: - static std::shared_ptr _posix_env; -}; - -struct FilePathDesc { - FilePathDesc(const std::string& path) { filepath = path; } - FilePathDesc() {} - TStorageMedium::type storage_medium = TStorageMedium::HDD; - std::string filepath; - std::string remote_path; - std::string storage_name; - io::FileSystem* file_system; - - std::string debug_string() const { - std::stringstream ss; - ss << "storage_medium: " << to_string(storage_medium) << ", local_path: " << filepath; - if (!remote_path.empty()) { - ss << ", storage_name: " << storage_name << ", remote_path: " << remote_path; - } - return ss.str(); - } - // REMOTE_CACHE is the local cache path for remote path, if a data_dir is REMOTE_CACHE, - // it means the tablet in it will be set as a remote path. - static bool is_remote(TStorageMedium::type checked_storage_medium) { - return checked_storage_medium == TStorageMedium::S3 || - checked_storage_medium == TStorageMedium::REMOTE_CACHE; - } - bool is_remote() const { return is_remote(storage_medium); } -}; - -class FilePathDescStream { -public: - FilePathDescStream& operator<<(const FilePathDesc& val) { - _filepath_stream << val.filepath; - _storage_medium = val.storage_medium; - _storage_name = val.storage_name; - if (FilePathDesc::is_remote(_storage_medium)) { - _remote_path_stream << val.remote_path; - } - return *this; - } - FilePathDescStream& operator<<(const std::string& val) { - _filepath_stream << val; - if (FilePathDesc::is_remote(_storage_medium)) { - _remote_path_stream << val; - } - return *this; - } - FilePathDescStream& operator<<(uint64_t val) { - _filepath_stream << val; - if (FilePathDesc::is_remote(_storage_medium)) { - _remote_path_stream << val; - } - return *this; - } - FilePathDescStream& operator<<(int64_t val) { - _filepath_stream << val; - if (FilePathDesc::is_remote(_storage_medium)) { - _remote_path_stream << val; - } - return *this; - } - FilePathDescStream& operator<<(uint32_t val) { - _filepath_stream << val; - if (FilePathDesc::is_remote(_storage_medium)) { - _remote_path_stream << val; - } - return *this; - } - FilePathDescStream& operator<<(int32_t val) { - _filepath_stream << val; - if (FilePathDesc::is_remote(_storage_medium)) { - _remote_path_stream << val; - } - return *this; - } - FilePathDesc path_desc() { - FilePathDesc path_desc(_filepath_stream.str()); - path_desc.storage_medium = _storage_medium; - if (FilePathDesc::is_remote(_storage_medium)) { - path_desc.remote_path = _remote_path_stream.str(); - } - path_desc.storage_name = _storage_name; - return path_desc; - } - -private: - TStorageMedium::type _storage_medium = TStorageMedium::HDD; - std::stringstream _filepath_stream; - std::stringstream _remote_path_stream; - std::string _storage_name; -}; - -struct RandomAccessFileOptions { - RandomAccessFileOptions() {} -}; - -// Creation-time options for WritableFile -struct WritableFileOptions { - // Call Sync() during Close(). - bool sync_on_close = false; - // See OpenMode for details. - Env::OpenMode mode = Env::CREATE_OR_OPEN_WITH_TRUNCATE; -}; - -// Creation-time options for RWFile -struct RandomRWFileOptions { - // Call Sync() during Close(). - bool sync_on_close = false; - // See OpenMode for details. - Env::OpenMode mode = Env::CREATE_OR_OPEN_WITH_TRUNCATE; -}; - -class RandomAccessFile { -public: - RandomAccessFile() {} - virtual ~RandomAccessFile() {} - - // Read "result.size" bytes from the file starting at "offset". - // Copies the resulting data into "result.data". - // - // If an error was encountered, returns a non-OK status. - // - // This method will internally retry on EINTR and "short reads" in order to - // fully read the requested number of bytes. In the event that it is not - // possible to read exactly 'length' bytes, an IOError is returned. - // - // Safe for concurrent use by multiple threads. - virtual Status read_at(uint64_t offset, const Slice* result) const = 0; - - // Reads up to the "results" aggregate size, based on each Slice's "size", - // from the file starting at 'offset'. The Slices must point to already-allocated - // buffers for the data to be written to. - // - // If an error was encountered, returns a non-OK status. - // - // This method will internally retry on EINTR and "short reads" in order to - // fully read the requested number of bytes. In the event that it is not - // possible to read exactly 'length' bytes, an IOError is returned. - // - // Safe for concurrent use by multiple threads. - virtual Status readv_at(uint64_t offset, const Slice* result, size_t res_cnt) const = 0; - - // read all data from this file - virtual Status read_all(std::string* content) const = 0; - - // Return the size of this file - virtual Status size(uint64_t* size) const = 0; - - // Return name of this file - virtual const std::string& file_name() const = 0; -}; - -// A file abstraction for sequential writing. The implementation -// must provide buffering since callers may append small fragments -// at a time to the file. -// Note: To avoid user misuse, WritableFile's API should support only -// one of Append or PositionedAppend. We support only Append here. -class WritableFile { -public: - enum FlushMode { FLUSH_SYNC, FLUSH_ASYNC }; - - WritableFile() {} - virtual ~WritableFile() {} - - // Append data to the end of the file - virtual Status append(const Slice& data) = 0; - - // If possible, uses scatter-gather I/O to efficiently append - // multiple buffers to a file. Otherwise, falls back to regular I/O. - // - // For implementation specific quirks and details, see comments in - // implementation source code (e.g., env_posix.cc) - virtual Status appendv(const Slice* data, size_t cnt) = 0; - - // Pre-allocates 'size' bytes for the file in the underlying filesystem. - // size bytes are added to the current pre-allocated size or to the current - // offset, whichever is bigger. In no case is the file truncated by this - // operation. - // - // On some implementations, preallocation is done without initializing the - // contents of the data blocks (as opposed to writing zeroes), requiring no - // IO to the data blocks. - // - // In no case is the file truncated by this operation. - virtual Status pre_allocate(uint64_t size) = 0; - - virtual Status close() = 0; - - // Flush all dirty data (not metadata) to disk. - // - // If the flush mode is synchronous, will wait for flush to finish and - // return a meaningful status. - virtual Status flush(FlushMode mode) = 0; - - virtual Status sync() = 0; - - virtual uint64_t size() const = 0; - - // Returns the filename provided when the WritableFile was constructed. - virtual const std::string& filename() const = 0; - -private: - // No copying allowed - WritableFile(const WritableFile&); - void operator=(const WritableFile&); -}; - -// A file abstraction for random reading and writing. -class RandomRWFile { -public: - enum FlushMode { FLUSH_SYNC, FLUSH_ASYNC }; - RandomRWFile() {} - virtual ~RandomRWFile() {} - - virtual Status read_at(uint64_t offset, const Slice& result) const = 0; - - virtual Status readv_at(uint64_t offset, const Slice* res, size_t res_cnt) const = 0; - - virtual Status write_at(uint64_t offset, const Slice& data) = 0; - - virtual Status writev_at(uint64_t offset, const Slice* data, size_t data_cnt) = 0; - - virtual Status flush(FlushMode mode, uint64_t offset, size_t length) = 0; - - virtual Status sync() = 0; - - virtual Status close() = 0; - - virtual Status size(uint64_t* size) const = 0; - virtual const std::string& filename() const = 0; -}; - -} // namespace doris diff --git a/be/src/env/env_posix.cpp b/be/src/env/env_posix.cpp deleted file mode 100644 index 3407f223e5bb25..00000000000000 --- a/be/src/env/env_posix.cpp +++ /dev/null @@ -1,766 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, -// software distributed under the License is distributed on an -// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -// KIND, either express or implied. See the License for the -// specific language governing permissions and limitations -// under the License. - -#include "env/env_posix.h" - -#include -#include -#include -#include -#include -#include -#include - -#include -#include -#include - -#include "env/env.h" -#include "gutil/gscoped_ptr.h" -#include "gutil/macros.h" -#include "gutil/port.h" -#include "gutil/strings/substitute.h" -#include "util/errno.h" -#include "util/slice.h" - -namespace doris { -using namespace ErrorCode; - -using std::string; -using strings::Substitute; - -// Close file descriptor when object goes out of scope. -class ScopedFdCloser { -public: - explicit ScopedFdCloser(int fd) : fd_(fd) {} - - ~ScopedFdCloser() { - int err; - RETRY_ON_EINTR(err, ::close(fd_)); - if (PREDICT_FALSE(err != 0)) { - LOG(WARNING) << "Failed to close fd " << fd_; - } - } - -private: - const int fd_; -}; - -static Status io_error(const std::string& context, int err_number) { - switch (err_number) { - case EACCES: - case ELOOP: - case ENAMETOOLONG: - case ENOENT: - case ENOTDIR: - return Status::NotFound("{} (error {}) {}", context, err_number, - errno_to_string(err_number)); - case EEXIST: - return Status::AlreadyExist("{} (error {}) {}", context, err_number, - errno_to_string(err_number)); - case EOPNOTSUPP: - case EXDEV: // No cross FS links allowed - return Status::NotSupported("{} (error {}) {}", context, err_number, - errno_to_string(err_number)); - case EIO: - LOG(ERROR) << "I/O error, context=" << context; - } - return Status::IOError("{} (error {}) {}", context, err_number, errno_to_string(err_number)); -} - -static Status do_sync(int fd, const string& filename) { -#ifdef __APPLE__ - if (fcntl(fd, F_FULLFSYNC) < 0) { - return io_error(filename, errno); - } -#else - if (fdatasync(fd) < 0) { - return io_error(filename, errno); - } -#endif - return Status::OK(); -} - -static Status do_open(const string& filename, Env::OpenMode mode, int* fd) { - int flags = O_RDWR; - switch (mode) { - case Env::CREATE_OR_OPEN_WITH_TRUNCATE: - flags |= O_CREAT | O_TRUNC; - break; - case Env::CREATE_OR_OPEN: - flags |= O_CREAT; - break; - case Env::MUST_CREATE: - flags |= O_CREAT | O_EXCL; - break; - case Env::MUST_EXIST: - break; - default: - return Status::NotSupported("Unknown create mode {}", mode); - } - int f; - RETRY_ON_EINTR(f, open(filename.c_str(), flags, 0666)); - if (f < 0) { - return io_error(filename, errno); - } - *fd = f; - return Status::OK(); -} - -static Status do_readv_at(int fd, const std::string& filename, uint64_t offset, const Slice* res, - size_t res_cnt) { - // Convert the results into the iovec vector to request - // and calculate the total bytes requested - size_t bytes_req = 0; - struct iovec iov[res_cnt]; - for (size_t i = 0; i < res_cnt; i++) { - const Slice& result = res[i]; - bytes_req += result.size; - iov[i] = {result.data, result.size}; - } - - uint64_t cur_offset = offset; - size_t completed_iov = 0; - size_t rem = bytes_req; - while (rem > 0) { - // Never request more than IOV_MAX in one request - size_t iov_count = std::min(res_cnt - completed_iov, static_cast(IOV_MAX)); - ssize_t r; - RETRY_ON_EINTR(r, preadv(fd, iov + completed_iov, iov_count, cur_offset)); - if (PREDICT_FALSE(r < 0)) { - // An error: return a non-ok status. - return io_error(filename, errno); - } - - if (PREDICT_FALSE(r == 0)) { - return Status::EndOfFile("EOF trying to read {} bytes at offset {}", bytes_req, offset); - } - - if (PREDICT_TRUE(r == rem)) { - // All requested bytes were read. This is almost always the case. - return Status::OK(); - } - DCHECK_LE(r, rem); - // Adjust iovec vector based on bytes read for the next request - ssize_t bytes_rem = r; - for (size_t i = completed_iov; i < res_cnt; i++) { - if (bytes_rem >= iov[i].iov_len) { - // The full length of this iovec was read - completed_iov++; - bytes_rem -= iov[i].iov_len; - } else { - // Partially read this result. - // Adjust the iov_len and iov_base to request only the missing data. - iov[i].iov_base = static_cast(iov[i].iov_base) + bytes_rem; - iov[i].iov_len -= bytes_rem; - break; // Don't need to adjust remaining iovec's - } - } - cur_offset += r; - rem -= r; - } - DCHECK_EQ(0, rem); - return Status::OK(); -} - -static Status do_writev_at(int fd, const string& filename, uint64_t offset, const Slice* data, - size_t data_cnt, size_t* bytes_written) { - // Convert the results into the iovec vector to request - // and calculate the total bytes requested. - size_t bytes_req = 0; - struct iovec iov[data_cnt]; - for (size_t i = 0; i < data_cnt; i++) { - const Slice& result = data[i]; - bytes_req += result.size; - iov[i] = {result.data, result.size}; - } - - uint64_t cur_offset = offset; - size_t completed_iov = 0; - size_t rem = bytes_req; - while (rem > 0) { - // Never request more than IOV_MAX in one request. - size_t iov_count = std::min(data_cnt - completed_iov, static_cast(IOV_MAX)); - ssize_t w; - RETRY_ON_EINTR(w, pwritev(fd, iov + completed_iov, iov_count, cur_offset)); - if (PREDICT_FALSE(w < 0)) { - // An error: return a non-ok status. - return io_error(filename, errno); - } - - if (PREDICT_TRUE(w == rem)) { - // All requested bytes were read. This is almost always the case. - rem = 0; - break; - } - // Adjust iovec vector based on bytes read for the next request. - ssize_t bytes_rem = w; - for (size_t i = completed_iov; i < data_cnt; i++) { - if (bytes_rem >= iov[i].iov_len) { - // The full length of this iovec was written. - completed_iov++; - bytes_rem -= iov[i].iov_len; - } else { - // Partially wrote this result. - // Adjust the iov_len and iov_base to write only the missing data. - iov[i].iov_base = static_cast(iov[i].iov_base) + bytes_rem; - iov[i].iov_len -= bytes_rem; - break; // Don't need to adjust remaining iovec's. - } - } - cur_offset += w; - rem -= w; - } - DCHECK_EQ(0, rem); - *bytes_written = bytes_req; - return Status::OK(); -} - -class PosixRandomAccessFile : public RandomAccessFile { -public: - PosixRandomAccessFile(std::string filename, int fd) : _filename(std::move(filename)), _fd(fd) {} - ~PosixRandomAccessFile() override { - int res; - RETRY_ON_EINTR(res, close(_fd)); - if (res != 0) { - LOG(WARNING) << "close file failed, name=" << _filename - << ", msg=" << errno_to_string(errno); - } - } - - Status read_at(uint64_t offset, const Slice* result) const override { - return readv_at(offset, result, 1); - } - - Status readv_at(uint64_t offset, const Slice* result, size_t res_cnt) const override { - return do_readv_at(_fd, _filename, offset, result, res_cnt); - } - - Status read_all(std::string* content) const override { - std::fstream fs(_filename.c_str(), std::fstream::in); - if (!fs.is_open()) { - RETURN_NOT_OK_STATUS_WITH_WARN( - Status::IOError("failed to open cluster id file {}", _filename), - "open file failed"); - } - std::string data; - fs >> data; - fs.close(); - if ((fs.rdstate() & std::fstream::eofbit) != 0) { - *content = data; - } else { - RETURN_NOT_OK_STATUS_WITH_WARN( - Status::Corruption( - "read_all from file {} is corrupt. [eofbit={} failbit={} badbit={}]", - _filename, fs.rdstate() & std::fstream::eofbit, - fs.rdstate() & std::fstream::failbit, - fs.rdstate() & std::fstream::badbit), - "read_all is error"); - } - return Status::OK(); - } - - Status size(uint64_t* size) const override { - struct stat st; - auto res = fstat(_fd, &st); - if (res != 0) { - return io_error(_filename, errno); - } - *size = st.st_size; - return Status::OK(); - } - - const std::string& file_name() const override { return _filename; } - -private: - std::string _filename; - int _fd; -}; - -class PosixWritableFile : public WritableFile { -public: - PosixWritableFile(std::string filename, int fd, uint64_t filesize, bool sync_on_close) - : _filename(std::move(filename)), - _fd(fd), - _sync_on_close(sync_on_close), - _filesize(filesize) {} - - ~PosixWritableFile() override { - WARN_IF_ERROR(close(), "Failed to close file, file=" + _filename); - } - - Status append(const Slice& data) override { return appendv(&data, 1); } - - Status appendv(const Slice* data, size_t cnt) override { - size_t bytes_written = 0; - RETURN_IF_ERROR(do_writev_at(_fd, _filename, _filesize, data, cnt, &bytes_written)); - _filesize += bytes_written; - return Status::OK(); - } - - Status pre_allocate(uint64_t size) override { -#ifdef __APPLE__ - return io_error(_filename, ENOSYS); -#else - uint64_t offset = std::max(_filesize, _pre_allocated_size); - int ret; - RETRY_ON_EINTR(ret, fallocate(_fd, 0, offset, size)); - if (ret != 0) { - if (errno == EOPNOTSUPP) { - LOG(WARNING) << "The filesystem does not support fallocate()."; - } else if (errno == ENOSYS) { - LOG(WARNING) << "The kernel does not implement fallocate()."; - } else { - return io_error(_filename, errno); - } - } - _pre_allocated_size = offset + size; - return Status::OK(); -#endif - } - - Status close() override { - if (_closed) { - return Status::OK(); - } - Status s; - - // If we've allocated more space than we used, truncate to the - // actual size of the file and perform Sync(). - if (_filesize < _pre_allocated_size) { - int ret; - RETRY_ON_EINTR(ret, ftruncate(_fd, _filesize)); - if (ret != 0) { - s = io_error(_filename, errno); - _pending_sync = true; - } - } - - if (_sync_on_close) { - Status sync_status = sync(); - if (!sync_status.ok()) { - LOG(ERROR) << "Unable to Sync " << _filename << ": " << sync_status; - if (s.ok()) { - s = sync_status; - } - } - } - - int ret; - RETRY_ON_EINTR(ret, ::close(_fd)); - if (ret < 0) { - if (s.ok()) { - s = io_error(_filename, errno); - } - } - - _closed = true; - return s; - } - - Status flush(FlushMode mode) override { -#if defined(__linux__) - int flags = SYNC_FILE_RANGE_WRITE; - if (mode == FLUSH_SYNC) { - flags |= SYNC_FILE_RANGE_WAIT_BEFORE; - flags |= SYNC_FILE_RANGE_WAIT_AFTER; - } - if (sync_file_range(_fd, 0, 0, flags) < 0) { - return io_error(_filename, errno); - } -#else - if (mode == FLUSH_SYNC && fsync(_fd) < 0) { - return io_error(_filename, errno); - } -#endif - return Status::OK(); - } - - Status sync() override { - if (_pending_sync) { - _pending_sync = false; - RETURN_IF_ERROR(do_sync(_fd, _filename)); - } - return Status::OK(); - } - - uint64_t size() const override { return _filesize; } - const string& filename() const override { return _filename; } - -private: - std::string _filename; - int _fd; - const bool _sync_on_close = false; - bool _pending_sync = false; - bool _closed = false; - uint64_t _filesize = 0; - uint64_t _pre_allocated_size = 0; -}; - -class PosixRandomRWFile : public RandomRWFile { -public: - PosixRandomRWFile(string fname, int fd, bool sync_on_close) - : _filename(std::move(fname)), _fd(fd), _sync_on_close(sync_on_close), _closed(false) {} - - ~PosixRandomRWFile() { WARN_IF_ERROR(close(), "Failed to close " + _filename); } - - Status read_at(uint64_t offset, const Slice& result) const override { - return readv_at(offset, &result, 1); - } - - Status readv_at(uint64_t offset, const Slice* result, size_t res_cnt) const override { - return do_readv_at(_fd, _filename, offset, result, res_cnt); - } - - Status write_at(uint64_t offset, const Slice& data) override { - return writev_at(offset, &data, 1); - } - - Status writev_at(uint64_t offset, const Slice* data, size_t data_cnt) override { - size_t bytes_written = 0; - return do_writev_at(_fd, _filename, offset, data, data_cnt, &bytes_written); - } - - Status flush(FlushMode mode, uint64_t offset, size_t length) override { -#if defined(__linux__) - int flags = SYNC_FILE_RANGE_WRITE; - if (mode == FLUSH_SYNC) { - flags |= SYNC_FILE_RANGE_WAIT_AFTER; - } - if (sync_file_range(_fd, offset, length, flags) < 0) { - return io_error(_filename, errno); - } -#else - if (mode == FLUSH_SYNC && fsync(_fd) < 0) { - return io_error(_filename, errno); - } -#endif - return Status::OK(); - } - - Status sync() override { return do_sync(_fd, _filename); } - - Status close() override { - if (_closed) { - return Status::OK(); - } - Status s; - if (_sync_on_close) { - s = sync(); - if (!s.ok()) { - LOG(ERROR) << "Unable to Sync " << _filename << ": " << s; - } - } - - int ret; - RETRY_ON_EINTR(ret, ::close(_fd)); - if (ret < 0) { - if (s.ok()) { - s = io_error(_filename, errno); - } - } - - _closed = true; - return s; - } - - Status size(uint64_t* size) const override { - struct stat st; - if (fstat(_fd, &st) == -1) { - return io_error(_filename, errno); - } - *size = st.st_size; - return Status::OK(); - } - - const string& filename() const override { return _filename; } - -private: - const std::string _filename; - const int _fd; - const bool _sync_on_close = false; - bool _closed = false; -}; - -// get a RandomAccessFile pointer without file cache -Status PosixEnv::new_random_access_file(const std::string& fname, - std::unique_ptr* result) { - return new_random_access_file(RandomAccessFileOptions(), fname, result); -} - -Status PosixEnv::new_random_access_file(const RandomAccessFileOptions& opts, - const std::string& fname, - std::unique_ptr* result) { - int fd; - RETRY_ON_EINTR(fd, open(fname.c_str(), O_RDONLY)); - if (fd < 0) { - return io_error(fname, errno); - } - result->reset(new PosixRandomAccessFile(fname, fd)); - return Status::OK(); -} - -Status PosixEnv::new_writable_file(const string& fname, std::unique_ptr* result) { - return new_writable_file(WritableFileOptions(), fname, result); -} - -Status PosixEnv::new_writable_file(const WritableFileOptions& opts, const string& fname, - std::unique_ptr* result) { - int fd; - RETURN_IF_ERROR(do_open(fname, opts.mode, &fd)); - - uint64_t file_size = 0; - if (opts.mode == MUST_EXIST) { - RETURN_IF_ERROR(get_file_size(fname, &file_size)); - } - result->reset(new PosixWritableFile(fname, fd, file_size, opts.sync_on_close)); - return Status::OK(); -} - -Status PosixEnv::new_random_rw_file(const string& fname, std::unique_ptr* result) { - return new_random_rw_file(RandomRWFileOptions(), fname, result); -} - -Status PosixEnv::new_random_rw_file(const RandomRWFileOptions& opts, const string& fname, - std::unique_ptr* result) { - int fd; - RETURN_IF_ERROR(do_open(fname, opts.mode, &fd)); - result->reset(new PosixRandomRWFile(fname, fd, opts.sync_on_close)); - return Status::OK(); -} - -Status PosixEnv::path_exists(const std::string& fname, bool is_dir) { - if (access(fname.c_str(), F_OK) != 0) { - return io_error(fname, errno); - } - return Status::OK(); -} - -Status PosixEnv::get_children(const std::string& dir, std::vector* result) { - result->clear(); - DIR* d = opendir(dir.c_str()); - if (d == nullptr) { - return io_error(dir, errno); - } - struct dirent* entry; - while ((entry = readdir(d)) != nullptr) { - result->push_back(entry->d_name); - } - closedir(d); - return Status::OK(); -} - -Status PosixEnv::iterate_dir(const std::string& dir, const std::function& cb) { - DIR* d = opendir(dir.c_str()); - if (d == nullptr) { - return io_error(dir, errno); - } - struct dirent* entry; - while ((entry = readdir(d)) != nullptr) { - // callback returning false means to terminate iteration - if (!cb(entry->d_name)) { - break; - } - } - closedir(d); - return Status::OK(); -} - -Status PosixEnv::delete_file(const std::string& fname) { - if (unlink(fname.c_str()) != 0) { - return io_error(fname, errno); - } - return Status::OK(); -} - -Status PosixEnv::create_dir(const std::string& name) { - if (mkdir(name.c_str(), 0755) != 0) { - return io_error(name, errno); - } - return Status::OK(); -} - -Status PosixEnv::create_dir_if_missing(const string& dirname, bool* created) { - Status s = create_dir(dirname); - if (created != nullptr) { - *created = s.ok(); - } - - // Check that dirname is actually a directory. - if (s.is()) { - bool is_dir = false; - RETURN_IF_ERROR(is_directory(dirname, &is_dir)); - if (is_dir) { - return Status::OK(); - } else { - return std::move(s.append("path already exists but not a dir")); - } - } - return s; -} - -Status PosixEnv::create_dirs(const string& dirname) { - if (dirname.empty()) { - return Status::InvalidArgument("Unknown primitive type({})", dirname); - } - - std::filesystem::path p(dirname); - - std::string partial_path; - for (std::filesystem::path::iterator it = p.begin(); it != p.end(); ++it) { - partial_path = partial_path + it->string() + "/"; - bool is_dir = false; - - Status s = is_directory(partial_path, &is_dir); - - if (s.ok()) { - if (is_dir) { - // It's a normal directory. - continue; - } - - // Maybe a file or a symlink. Let's try to follow the symlink. - std::string real_partial_path; - RETURN_IF_ERROR(canonicalize(partial_path, &real_partial_path)); - - RETURN_IF_ERROR(is_directory(real_partial_path, &is_dir)); - if (is_dir) { - // It's a symlink to a directory. - continue; - } else { - return Status::IOError("{} exists but is not a directory", partial_path); - } - } - - RETURN_IF_ERROR(create_dir_if_missing(partial_path)); - } - - return Status::OK(); -} - -// Delete the specified directory. -Status PosixEnv::delete_dir(const std::string& dirname) { - std::filesystem::path boost_path(dirname); - std::error_code ec; - std::filesystem::remove_all(boost_path, ec); - if (ec) { - std::stringstream ss; - ss << "remove all(" << dirname << ") failed, because: " << ec; - return Status::InternalError(ss.str()); - } - return Status::OK(); -} - -Status PosixEnv::sync_dir(const string& dirname) { - int dir_fd; - RETRY_ON_EINTR(dir_fd, open(dirname.c_str(), O_DIRECTORY | O_RDONLY)); - if (dir_fd < 0) { - return io_error(dirname, errno); - } - ScopedFdCloser fd_closer(dir_fd); - if (fsync(dir_fd) != 0) { - return io_error(dirname, errno); - } - return Status::OK(); -} - -Status PosixEnv::is_directory(const std::string& path, bool* is_dir) { - struct stat path_stat; - if (stat(path.c_str(), &path_stat) != 0) { - return io_error(path, errno); - } else { - *is_dir = S_ISDIR(path_stat.st_mode); - } - - return Status::OK(); -} - -Status PosixEnv::canonicalize(const std::string& path, std::string* result) { - // NOTE: we must use free() to release the buffer returned by realpath(), - // because the buffer is allocated by malloc(), see `man 3 realpath`. - std::unique_ptr r(realpath(path.c_str(), nullptr)); - if (r == nullptr) { - return io_error(strings::Substitute("Unable to canonicalize $0", path), errno); - } - *result = std::string(r.get()); - return Status::OK(); -} - -Status PosixEnv::get_file_size(const string& fname, uint64_t* size) { - struct stat sbuf; - if (stat(fname.c_str(), &sbuf) != 0) { - return io_error(fname, errno); - } else { - *size = sbuf.st_size; - } - return Status::OK(); -} - -Status PosixEnv::get_file_modified_time(const std::string& fname, uint64_t* file_mtime) { - struct stat s; - if (stat(fname.c_str(), &s) != 0) { - return io_error(fname, errno); - } - *file_mtime = static_cast(s.st_mtime); - return Status::OK(); -} - -Status PosixEnv::copy_path(const std::string& src, const std::string& target) { - try { - std::filesystem::copy(src, target, std::filesystem::copy_options::recursive); - } catch (const std::filesystem::filesystem_error& e) { - std::stringstream ss; - ss << "failed to copy_path: from " << src << " to " << target << ". err: " << e.what(); - LOG(WARNING) << ss.str(); - return Status::InternalError(ss.str()); - } - return Status::OK(); -} - -Status PosixEnv::rename_file(const std::string& src, const std::string& target) { - if (rename(src.c_str(), target.c_str()) != 0) { - return io_error(src, errno); - } - return Status::OK(); -} - -Status PosixEnv::rename_dir(const std::string& src, const std::string& target) { - return rename_file(src, target); -} - -Status PosixEnv::link_file(const std::string& old_path, const std::string& new_path) { - if (link(old_path.c_str(), new_path.c_str()) != 0) { - return io_error(old_path, errno); - } - return Status::OK(); -} - -Status PosixEnv::get_space_info(const std::string& path, int64_t* capacity, int64_t* available) { - try { - std::filesystem::path path_name(path); - std::filesystem::space_info path_info = std::filesystem::space(path_name); - *capacity = path_info.capacity; - *available = path_info.available; - } catch (std::filesystem::filesystem_error& e) { - RETURN_NOT_OK_STATUS_WITH_WARN( - Status::IOError("get path {} available capacity failed, error={}", path, e.what()), - "std::filesystem::space failed"); - } - return Status::OK(); -} - -} // end namespace doris diff --git a/be/src/env/env_posix.h b/be/src/env/env_posix.h deleted file mode 100644 index 876e2c16f1abae..00000000000000 --- a/be/src/env/env_posix.h +++ /dev/null @@ -1,92 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, -// software distributed under the License is distributed on an -// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -// KIND, either express or implied. See the License for the -// specific language governing permissions and limitations -// under the License. - -#pragma once - -#include "env/env.h" - -namespace doris { - -class RandomAccessFile; -class RandomRWFile; -class WritableFile; -struct WritableFileOptions; -struct RandomAccessFileOptions; -struct RandomRWFileOptions; - -class PosixEnv : public Env { -public: - ~PosixEnv() override {} - - // get a RandomAccessFile pointer without file cache - Status new_random_access_file(const std::string& fname, - std::unique_ptr* result) override; - - Status new_random_access_file(const RandomAccessFileOptions& opts, const std::string& fname, - std::unique_ptr* result) override; - - Status new_writable_file(const std::string& fname, - std::unique_ptr* result) override; - - Status new_writable_file(const WritableFileOptions& opts, const std::string& fname, - std::unique_ptr* result) override; - - Status new_random_rw_file(const std::string& fname, - std::unique_ptr* result) override; - - Status new_random_rw_file(const RandomRWFileOptions& opts, const std::string& fname, - std::unique_ptr* result) override; - - Status path_exists(const std::string& fname, bool is_dir = false) override; - - Status get_children(const std::string& dir, std::vector* result) override; - - Status iterate_dir(const std::string& dir, const std::function& cb) override; - - Status delete_file(const std::string& fname) override; - - Status create_dir(const std::string& name) override; - - Status create_dir_if_missing(const std::string& dirname, bool* created = nullptr) override; - - Status create_dirs(const std::string& dirname) override; - - // Delete the specified directory. - Status delete_dir(const std::string& dirname) override; - - Status sync_dir(const std::string& dirname) override; - - Status is_directory(const std::string& path, bool* is_dir) override; - - Status canonicalize(const std::string& path, std::string* result) override; - - Status get_file_size(const std::string& fname, uint64_t* size) override; - - Status get_file_modified_time(const std::string& fname, uint64_t* file_mtime) override; - - Status copy_path(const std::string& src, const std::string& target) override; - - Status rename_file(const std::string& src, const std::string& target) override; - - Status rename_dir(const std::string& src, const std::string& target) override; - - Status link_file(const std::string& old_path, const std::string& new_path) override; - - Status get_space_info(const std::string& path, int64_t* capacity, int64_t* available) override; -}; - -} // namespace doris \ No newline at end of file diff --git a/be/src/env/env_util.cpp b/be/src/env/env_util.cpp deleted file mode 100644 index 1043231176b1ba..00000000000000 --- a/be/src/env/env_util.cpp +++ /dev/null @@ -1,89 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, -// software distributed under the License is distributed on an -// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -// KIND, either express or implied. See the License for the -// specific language governing permissions and limitations -// under the License. - -#include "env/env_util.h" - -#include "env/env.h" - -using std::shared_ptr; -using std::string; - -namespace doris { -namespace env_util { - -Status open_file_for_write(Env* env, const string& path, shared_ptr* file) { - return open_file_for_write(WritableFileOptions(), env, path, file); -} - -Status open_file_for_write(const WritableFileOptions& opts, Env* env, const string& path, - shared_ptr* file) { - std::unique_ptr w; - RETURN_IF_ERROR(env->new_writable_file(opts, path, &w)); - file->reset(w.release()); - return Status::OK(); -} - -Status open_file_for_random(Env* env, const string& path, shared_ptr* file) { - std::unique_ptr r; - RETURN_IF_ERROR(env->new_random_access_file(path, &r)); - file->reset(r.release()); - return Status::OK(); -} - -static Status do_write_string_to_file(Env* env, const Slice& data, const std::string& fname, - bool should_sync) { - std::unique_ptr file; - Status s = env->new_writable_file(fname, &file); - if (!s.ok()) { - return s; - } - s = file->append(data); - if (s.ok() && should_sync) { - s = file->sync(); - } - if (s.ok()) { - s = file->close(); - } - file.reset(); // Will auto-close if we did not close above - if (!s.ok()) { - RETURN_NOT_OK_STATUS_WITH_WARN(env->delete_file(fname), - "Failed to delete partially-written file " + fname); - } - return s; -} - -Status write_string_to_file(Env* env, const Slice& data, const std::string& fname) { - return do_write_string_to_file(env, data, fname, false); -} - -Status write_string_to_file_sync(Env* env, const Slice& data, const std::string& fname) { - return do_write_string_to_file(env, data, fname, true); -} - -Status read_file_to_string(Env* env, const std::string& fname, std::string* data) { - data->clear(); - std::unique_ptr file; - Status s = env->new_random_access_file(fname, &file); - if (!s.ok()) { - return s; - } - s = file->read_all(data); - return s; -} - -} // namespace env_util -} // namespace doris diff --git a/be/src/env/env_util.h b/be/src/env/env_util.h deleted file mode 100644 index fde092c99bee09..00000000000000 --- a/be/src/env/env_util.h +++ /dev/null @@ -1,52 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, -// software distributed under the License is distributed on an -// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -// KIND, either express or implied. See the License for the -// specific language governing permissions and limitations -// under the License. - -#pragma once - -#include -#include - -#include "common/status.h" -#include "env.h" - -namespace doris { - -class Env; -class RandomAccessFile; -class WritableFile; -struct WritableFileOptions; - -namespace env_util { - -Status open_file_for_write(Env* env, const std::string& path, std::shared_ptr* file); - -Status open_file_for_write(const WritableFileOptions& opts, Env* env, const std::string& path, - std::shared_ptr* file); - -Status open_file_for_random(Env* env, const std::string& path, - std::shared_ptr* file); - -// A utility routine: write "data" to the named file. -Status write_string_to_file(Env* env, const Slice& data, const std::string& fname); -// Like above but also fsyncs the new file. -Status write_string_to_file_sync(Env* env, const Slice& data, const std::string& fname); - -// A utility routine: read contents of named file into *data -Status read_file_to_string(Env* env, const std::string& fname, std::string* data); - -} // namespace env_util -} // namespace doris diff --git a/be/src/gen_cpp/CMakeLists.txt b/be/src/gen_cpp/CMakeLists.txt index 0b4d42da5a756f..e9bc3cd79343fb 100644 --- a/be/src/gen_cpp/CMakeLists.txt +++ b/be/src/gen_cpp/CMakeLists.txt @@ -19,13 +19,78 @@ set(LIBRARY_OUTPUT_PATH "${BUILD_DIR}/src/gen_cpp") set (GEN_CPP_DIR ${GENSRC_DIR}/gen_cpp) -file(GLOB SRC_FILES CONFIGURE_DEPENDS - ${GEN_CPP_DIR}/*.cpp - ${GEN_CPP_DIR}/*.cc +set(SRC_FILES + ${GEN_CPP_DIR}/AgentService_constants.cpp + ${GEN_CPP_DIR}/AgentService_types.cpp + ${GEN_CPP_DIR}/BackendService_constants.cpp + ${GEN_CPP_DIR}/BackendService.cpp + ${GEN_CPP_DIR}/BackendService_types.cpp + ${GEN_CPP_DIR}/PaloBrokerService_types.cpp + ${GEN_CPP_DIR}/TDorisExternalService.cpp + ${GEN_CPP_DIR}/DorisExternalService_types.cpp + ${GEN_CPP_DIR}/DorisExternalService_constants.cpp + ${GEN_CPP_DIR}/QueryPlanExtra_types.cpp + ${GEN_CPP_DIR}/QueryPlanExtra_constants.cpp + ${GEN_CPP_DIR}/TPaloBrokerService.cpp + ${GEN_CPP_DIR}/HeartbeatService_constants.cpp + ${GEN_CPP_DIR}/HeartbeatService.cpp + ${GEN_CPP_DIR}/HeartbeatService_types.cpp + ${GEN_CPP_DIR}/PaloInternalService_constants.cpp + ${GEN_CPP_DIR}/PaloInternalService_types.cpp + ${GEN_CPP_DIR}/FrontendService.cpp + ${GEN_CPP_DIR}/FrontendService_constants.cpp + ${GEN_CPP_DIR}/FrontendService_types.cpp + ${GEN_CPP_DIR}/PaloService_constants.cpp + ${GEN_CPP_DIR}/PaloService_types.cpp + ${GEN_CPP_DIR}/Data_constants.cpp + ${GEN_CPP_DIR}/Data_types.cpp + ${GEN_CPP_DIR}/DataSinks_constants.cpp + ${GEN_CPP_DIR}/DataSinks_types.cpp + ${GEN_CPP_DIR}/Ddl_constants.cpp + ${GEN_CPP_DIR}/Ddl_types.cpp + ${GEN_CPP_DIR}/Descriptors_constants.cpp + ${GEN_CPP_DIR}/Descriptors_types.cpp + ${GEN_CPP_DIR}/Exprs_constants.cpp + ${GEN_CPP_DIR}/Exprs_types.cpp + ${GEN_CPP_DIR}/MasterService_constants.cpp + ${GEN_CPP_DIR}/MasterService_types.cpp + ${GEN_CPP_DIR}/MetricDefs_constants.cpp + ${GEN_CPP_DIR}/MetricDefs_types.cpp + ${GEN_CPP_DIR}/Metrics_constants.cpp + ${GEN_CPP_DIR}/Metrics_types.cpp + ${GEN_CPP_DIR}/NetworkTest_constants.cpp + ${GEN_CPP_DIR}/NetworkTest_types.cpp + ${GEN_CPP_DIR}/NetworkTestService.cpp + ${GEN_CPP_DIR}/Opcodes_constants.cpp + ${GEN_CPP_DIR}/Opcodes_types.cpp + ${GEN_CPP_DIR}/PlanNodes_constants.cpp + ${GEN_CPP_DIR}/PlanNodes_types.cpp + ${GEN_CPP_DIR}/Partitions_constants.cpp + ${GEN_CPP_DIR}/Partitions_types.cpp + ${GEN_CPP_DIR}/Planner_constants.cpp + ${GEN_CPP_DIR}/Planner_types.cpp + ${GEN_CPP_DIR}/RuntimeProfile_constants.cpp + ${GEN_CPP_DIR}/RuntimeProfile_types.cpp + ${GEN_CPP_DIR}/Status_constants.cpp + ${GEN_CPP_DIR}/Status_types.cpp + ${GEN_CPP_DIR}/Types_constants.cpp + ${GEN_CPP_DIR}/Types_types.cpp + ${GEN_CPP_DIR}/olap_common.pb.cc + ${GEN_CPP_DIR}/olap_file.pb.cc + ${GEN_CPP_DIR}/column_data_file.pb.cc + ${GEN_CPP_DIR}/data.pb.cc + ${GEN_CPP_DIR}/descriptors.pb.cc + ${GEN_CPP_DIR}/internal_service.pb.cc + ${GEN_CPP_DIR}/function_service.pb.cc + ${GEN_CPP_DIR}/types.pb.cc + ${GEN_CPP_DIR}/segment_v2.pb.cc + ${GEN_CPP_DIR}/parquet_constants.cpp + ${GEN_CPP_DIR}/parquet_types.cpp + #$${GEN_CPP_DIR}/opcode/functions.cc + #$${GEN_CPP_DIR}/opcode/vector-functions.cc + #$${GEN_CPP_DIR}/opcode/opcode-registry-init.cc ) -add_compile_options(-Wno-return-type) - # keep everything in one library, the object files reference # each other add_library(DorisGen STATIC ${SRC_FILES}) diff --git a/be/src/http/action/download_action.cpp b/be/src/http/action/download_action.cpp index c3475ab2943a53..e56b24c61fd596 100644 --- a/be/src/http/action/download_action.cpp +++ b/be/src/http/action/download_action.cpp @@ -23,16 +23,14 @@ #include #include -#include "env/env.h" #include "http/http_channel.h" #include "http/http_headers.h" #include "http/http_request.h" #include "http/http_response.h" #include "http/http_status.h" #include "http/utils.h" +#include "io/fs/local_file_system.h" #include "runtime/exec_env.h" -#include "util/file_utils.h" -#include "util/filesystem_util.h" #include "util/path_util.h" namespace doris { @@ -46,15 +44,17 @@ DownloadAction::DownloadAction(ExecEnv* exec_env, const std::vector : _exec_env(exec_env), _download_type(NORMAL) { for (auto& dir : allow_dirs) { std::string p; - WARN_IF_ERROR(FileUtils::canonicalize(dir, &p), "canonicalize path " + dir + " failed"); + Status st = io::global_local_filesystem()->canonicalize(dir, &p); + if (!st.ok()) { + continue; + } _allow_paths.emplace_back(std::move(p)); } } DownloadAction::DownloadAction(ExecEnv* exec_env, const std::string& error_log_root_dir) : _exec_env(exec_env), _download_type(ERROR_LOG) { - WARN_IF_ERROR(FileUtils::canonicalize(error_log_root_dir, &_error_log_root_dir), - "canonicalize path " + error_log_root_dir + " failed"); + io::global_local_filesystem()->canonicalize(error_log_root_dir, &_error_log_root_dir); } void DownloadAction::handle_normal(HttpRequest* req, const std::string& file_param) { @@ -74,7 +74,14 @@ void DownloadAction::handle_normal(HttpRequest* req, const std::string& file_par return; } - if (FileUtils::is_dir(file_param)) { + bool is_dir = false; + status = io::global_local_filesystem()->is_directory(file_param, &is_dir); + if (!status.ok()) { + HttpChannel::send_reply(req, status.to_string()); + return; + } + + if (is_dir) { do_dir_response(file_param, req); } else { do_file_response(file_param, req); @@ -91,7 +98,13 @@ void DownloadAction::handle_error_log(HttpRequest* req, const std::string& file_ return; } - if (FileUtils::is_dir(absolute_path)) { + bool is_dir = false; + status = io::global_local_filesystem()->is_directory(absolute_path, &is_dir); + if (!status.ok()) { + HttpChannel::send_reply(req, status.to_string()); + return; + } + if (is_dir) { std::string error_msg = "error log can only be file."; HttpChannel::send_reply(req, error_msg); return; @@ -138,12 +151,9 @@ Status DownloadAction::check_path_is_allowed(const std::string& file_path) { DCHECK_EQ(_download_type, NORMAL); std::string canonical_file_path; - RETURN_WITH_WARN_IF_ERROR(FileUtils::canonicalize(file_path, &canonical_file_path), - Status::InternalError("file path is invalid: {}", file_path), - "file path is invalid: " + file_path); - + RETURN_IF_ERROR(io::global_local_filesystem()->canonicalize(file_path, &canonical_file_path)); for (auto& allow_path : _allow_paths) { - if (FileSystemUtil::contain_path(allow_path, canonical_file_path)) { + if (io::LocalFileSystem::contain_path(allow_path, canonical_file_path)) { return Status::OK(); } } @@ -155,11 +165,8 @@ Status DownloadAction::check_log_path_is_allowed(const std::string& file_path) { DCHECK_EQ(_download_type, ERROR_LOG); std::string canonical_file_path; - RETURN_WITH_WARN_IF_ERROR(FileUtils::canonicalize(file_path, &canonical_file_path), - Status::InternalError("file path is invalid: {}", file_path), - "file path is invalid: " + file_path); - - if (FileSystemUtil::contain_path(_error_log_root_dir, canonical_file_path)) { + RETURN_IF_ERROR(io::global_local_filesystem()->canonicalize(file_path, &canonical_file_path)); + if (io::LocalFileSystem::contain_path(_error_log_root_dir, canonical_file_path)) { return Status::OK(); } diff --git a/be/src/http/action/jeprofile_actions.cpp b/be/src/http/action/jeprofile_actions.cpp index 27c3d0796227d0..c014ef4eba09ee 100644 --- a/be/src/http/action/jeprofile_actions.cpp +++ b/be/src/http/action/jeprofile_actions.cpp @@ -31,7 +31,7 @@ #include "http/http_handler.h" #include "http/http_headers.h" #include "http/http_request.h" -#include "util/file_utils.h" +#include "io/fs/local_file_system.h" namespace doris { @@ -72,7 +72,7 @@ void JeHeapAction::handle(HttpRequest* req) { Status JeprofileActions::setup(doris::ExecEnv* exec_env, doris::EvHttpServer* http_server, doris::ObjectPool& pool) { if (!config::jeprofile_dir.empty()) { - FileUtils::create_dir(config::jeprofile_dir); + RETURN_IF_ERROR(io::global_local_filesystem()->create_directory(config::jeprofile_dir)); } http_server->register_handler(HttpMethod::GET, "/jeheap/dump", pool.add(new JeHeapAction())); return Status::OK(); diff --git a/be/src/http/action/pprof_actions.cpp b/be/src/http/action/pprof_actions.cpp index c0f03f03bc48e8..0be0e2c7080e33 100644 --- a/be/src/http/action/pprof_actions.cpp +++ b/be/src/http/action/pprof_actions.cpp @@ -36,9 +36,9 @@ #include "http/http_headers.h" #include "http/http_request.h" #include "http/http_response.h" +#include "io/fs/local_file_system.h" #include "runtime/exec_env.h" #include "util/bfd_parser.h" -#include "util/file_utils.h" #include "util/pprof_utils.h" namespace doris { @@ -293,7 +293,7 @@ void SymbolAction::handle(HttpRequest* req) { Status PprofActions::setup(ExecEnv* exec_env, EvHttpServer* http_server, ObjectPool& pool) { if (!config::pprof_profile_dir.empty()) { - FileUtils::create_dir(config::pprof_profile_dir); + RETURN_IF_ERROR(io::global_local_filesystem()->create_directory(config::pprof_profile_dir)); } http_server->register_handler(HttpMethod::GET, "/pprof/heap", pool.add(new HeapAction())); diff --git a/be/src/http/action/restore_tablet_action.cpp b/be/src/http/action/restore_tablet_action.cpp index 3617284fafe96c..118008d16042d7 100644 --- a/be/src/http/action/restore_tablet_action.cpp +++ b/be/src/http/action/restore_tablet_action.cpp @@ -22,7 +22,6 @@ #include #include -#include "env/env.h" #include "gutil/strings/substitute.h" // for Substitute #include "http/http_channel.h" #include "http/http_headers.h" @@ -35,7 +34,6 @@ #include "olap/tablet_meta.h" #include "olap/utils.h" #include "runtime/exec_env.h" -#include "util/file_utils.h" #include "util/json_util.h" using std::filesystem::path; @@ -117,12 +115,11 @@ Status RestoreTabletAction::_reload_tablet(const std::string& key, const std::st LOG(WARNING) << "load header failed. status: " << res << ", signature: " << tablet_id; // remove tablet data path in data path // path: /roo_path/data/shard/tablet_id - std::string tablet_path = - strings::Substitute("$0/$1/$2", shard_path, tablet_id, schema_hash); - LOG(INFO) << "remove tablet_path:" << tablet_path; - Status s = FileUtils::remove_all(tablet_path); - if (!s.ok()) { - LOG(WARNING) << "remove invalid tablet schema hash path:" << tablet_path << " failed"; + io::Path tablet_path = fmt::format("{}/{}/{}", shard_path, tablet_id, schema_hash); + LOG(INFO) << "remove tablet_path:" << tablet_path.native(); + Status st = io::global_local_filesystem()->delete_directory(tablet_path); + if (!st.ok()) { + LOG(WARNING) << "remove invalid tablet schema hash path failed: " << st; } return Status::InternalError("command executor load header failed"); } else { @@ -170,15 +167,12 @@ Status RestoreTabletAction::_restore(const std::string& key, int64_t tablet_id, DataDir* store = StorageEngine::instance()->get_store(root_path); std::string restore_schema_hash_path = store->get_absolute_tablet_path( tablet_meta.shard_id(), tablet_meta.tablet_id(), tablet_meta.schema_hash()); - Status s = FileUtils::create_dir(restore_schema_hash_path); - if (!s.ok()) { - LOG(WARNING) << "create tablet path failed:" << restore_schema_hash_path; - return s; - } + RETURN_IF_ERROR(io::global_local_filesystem()->create_directory(restore_schema_hash_path)); // create hard link for files in /root_path/data/shard/tablet_id/schema_hash - s = _create_hard_link_recursive(latest_tablet_path, restore_schema_hash_path); + Status s = _create_hard_link_recursive(latest_tablet_path, restore_schema_hash_path); if (!s.ok()) { - RETURN_IF_ERROR(FileUtils::remove_all(restore_schema_hash_path)); + // do not check the status of delete_directory, return status of link operation + io::global_local_filesystem()->delete_directory(restore_schema_hash_path); return s; } std::string restore_shard_path = store->get_absolute_shard_path(tablet_meta.shard_id()); @@ -188,21 +182,17 @@ Status RestoreTabletAction::_restore(const std::string& key, int64_t tablet_id, Status RestoreTabletAction::_create_hard_link_recursive(const std::string& src, const std::string& dst) { - std::vector files; - RETURN_IF_ERROR(FileUtils::list_files(Env::Default(), src, &files)); + bool exists = true; + std::vector files; + RETURN_IF_ERROR(io::global_local_filesystem()->list(src, false, &files, &exists)); for (auto& file : files) { - std::string from = src + "/" + file; - std::string to = dst + "/" + file; - if (FileUtils::is_dir(from)) { - RETURN_IF_ERROR(FileUtils::create_dir(to)); + std::string from = src + "/" + file.file_name; + std::string to = dst + "/" + file.file_name; + if (!file.is_file) { + RETURN_IF_ERROR(io::global_local_filesystem()->create_directory(to)); RETURN_IF_ERROR(_create_hard_link_recursive(from, to)); } else { - int link_ret = link(from.c_str(), to.c_str()); - if (link_ret != 0) { - LOG(WARNING) << "link from:" << from << " to:" << to - << " failed, link ret:" << link_ret; - return Status::InternalError("create link path failed"); - } + RETURN_IF_ERROR(io::global_local_filesystem()->link_file(from, to)); } } return Status::OK(); @@ -222,8 +212,9 @@ bool RestoreTabletAction::_get_latest_tablet_path_from_trash(int64_t tablet_id, std::vector schema_hash_paths; for (auto& tablet_path : tablet_paths) { std::string schema_hash_path = tablet_path + "/" + std::to_string(schema_hash); - bool exist = FileUtils::check_exist(schema_hash_path); - if (exist) { + bool exists = true; + Status st = io::global_local_filesystem()->exists(schema_hash_path, &exists); + if (st.ok() && exists) { schema_hash_paths.emplace_back(std::move(schema_hash_path)); } } diff --git a/be/src/http/utils.cpp b/be/src/http/utils.cpp index d8d9b1b3dc1dd9..a4a934811923f6 100644 --- a/be/src/http/utils.cpp +++ b/be/src/http/utils.cpp @@ -23,12 +23,11 @@ #include "common/logging.h" #include "common/status.h" #include "common/utils.h" -#include "env/env.h" #include "http/http_channel.h" #include "http/http_common.h" #include "http/http_headers.h" #include "http/http_request.h" -#include "util/file_utils.h" +#include "io/fs/local_file_system.h" #include "util/path_util.h" #include "util/url_coding.h" @@ -158,18 +157,19 @@ void do_file_response(const std::string& file_path, HttpRequest* req) { } void do_dir_response(const std::string& dir_path, HttpRequest* req) { - std::vector files; - Status status = FileUtils::list_files(Env::Default(), dir_path, &files); - if (!status.ok()) { - LOG(WARNING) << "Failed to scan dir. dir=" << dir_path; + bool exists = true; + std::vector files; + Status st = io::global_local_filesystem()->list(dir_path, true, &files, &exists); + if (!st.ok()) { + LOG(WARNING) << "Failed to scan dir. " << st; HttpChannel::send_error(req, HttpStatus::INTERNAL_SERVER_ERROR); } const std::string FILE_DELIMITER_IN_DIR_RESPONSE = "\n"; std::stringstream result; - for (const std::string& file_name : files) { - result << file_name << FILE_DELIMITER_IN_DIR_RESPONSE; + for (auto& file : files) { + result << file.file_name << FILE_DELIMITER_IN_DIR_RESPONSE; } std::string result_str = result.str(); diff --git a/be/src/http/web_page_handler.cpp b/be/src/http/web_page_handler.cpp index 4a5b873a0801be..20e6a00185c1f5 100644 --- a/be/src/http/web_page_handler.cpp +++ b/be/src/http/web_page_handler.cpp @@ -20,7 +20,6 @@ #include #include "common/config.h" -#include "env/env.h" #include "gutil/stl_util.h" #include "gutil/strings/substitute.h" #include "http/ev_http_server.h" @@ -30,6 +29,7 @@ #include "http/http_response.h" #include "http/http_status.h" #include "http/utils.h" +#include "io/fs/local_file_system.h" #include "util/cpu_info.h" #include "util/debug_util.h" #include "util/disk_info.h" @@ -173,14 +173,18 @@ std::string WebPageHandler::mustache_partial_tag(const std::string& path) const bool WebPageHandler::static_pages_available() const { bool is_dir = false; - return Env::Default()->is_directory(_www_path, &is_dir).ok() && is_dir; + return io::global_local_filesystem()->is_directory(_www_path, &is_dir).ok() && is_dir; } bool WebPageHandler::mustache_template_available(const std::string& path) const { if (!static_pages_available()) { return false; } - return Env::Default()->path_exists(strings::Substitute("$0/$1.mustache", _www_path, path)).ok(); + bool exists; + return io::global_local_filesystem() + ->exists(strings::Substitute("$0/$1.mustache", _www_path, path), &exists) + .ok() && + exists; } void WebPageHandler::render_main_template(const std::string& content, std::stringstream* output) { diff --git a/be/src/io/CMakeLists.txt b/be/src/io/CMakeLists.txt index 1c953f1e55256e..71ffd2838d9123 100644 --- a/be/src/io/CMakeLists.txt +++ b/be/src/io/CMakeLists.txt @@ -42,6 +42,7 @@ set(IO_FILES fs/broker_file_writer.cpp fs/buffered_reader.cpp fs/stream_load_pipe.cpp + fs/err_utils.cpp fs/fs_utils.cpp cache/dummy_file_cache.cpp cache/file_cache.cpp diff --git a/be/src/io/cache/block/block_lru_file_cache.cpp b/be/src/io/cache/block/block_lru_file_cache.cpp index 22dbfe80f02a1c..c13e74664f4092 100644 --- a/be/src/io/cache/block/block_lru_file_cache.cpp +++ b/be/src/io/cache/block/block_lru_file_cache.cpp @@ -786,14 +786,15 @@ std::string LRUFileCache::read_file_cache_version() const { return "1.0"; } FileReaderSPtr version_reader; - size_t file_size = 0; + int64_t file_size = -1; fs->file_size(version_path, &file_size); char version[file_size]; fs->open_file(version_path, &version_reader); - version_reader->read_at(0, Slice(version, file_size), &file_size); + size_t bytes_read = 0; + version_reader->read_at(0, Slice(version, file_size), &bytes_read); version_reader->close(); - return std::string(version, file_size); + return std::string(version, bytes_read); } std::vector LRUFileCache::try_get_cache_paths(const Key& key, bool is_persistent) { diff --git a/be/src/io/cache/dummy_file_cache.cpp b/be/src/io/cache/dummy_file_cache.cpp index 4424a3a0408ae4..5150fa1db71422 100644 --- a/be/src/io/cache/dummy_file_cache.cpp +++ b/be/src/io/cache/dummy_file_cache.cpp @@ -19,7 +19,6 @@ #include "gutil/strings/util.h" #include "io/fs/local_file_system.h" -#include "util/file_utils.h" #include "util/string_util.h" namespace doris { @@ -32,10 +31,10 @@ DummyFileCache::~DummyFileCache() = default; void DummyFileCache::_add_file_cache(const Path& data_file) { Path cache_file = _cache_dir / data_file; - size_t file_size = 0; + int64_t file_size = -1; time_t m_time = 0; if (io::global_local_filesystem()->file_size(cache_file, &file_size).ok() && - FileUtils::mtime(cache_file.native(), &m_time).ok()) { + io::global_local_filesystem()->mtime(cache_file, &m_time).ok()) { _gc_lru_queue.push({cache_file, m_time}); _cache_file_size += file_size; } else { diff --git a/be/src/io/cache/file_cache.cpp b/be/src/io/cache/file_cache.cpp index 4dc08b52c9c836..91379a2f1d707f 100644 --- a/be/src/io/cache/file_cache.cpp +++ b/be/src/io/cache/file_cache.cpp @@ -92,7 +92,7 @@ Status FileCache::_remove_file(const Path& file, size_t* cleaned_size) { bool cache_file_exist = false; RETURN_NOT_OK_STATUS_WITH_WARN(io::global_local_filesystem()->exists(file, &cache_file_exist), "Check local cache file exist failed."); - size_t file_size = 0; + int64_t file_size = -1; if (cache_file_exist) { RETURN_NOT_OK_STATUS_WITH_WARN( io::global_local_filesystem()->file_size(file, &file_size), diff --git a/be/src/io/cache/file_cache_manager.cpp b/be/src/io/cache/file_cache_manager.cpp index d3bf4942de1aa3..b14a02c1cba15b 100644 --- a/be/src/io/cache/file_cache_manager.cpp +++ b/be/src/io/cache/file_cache_manager.cpp @@ -24,7 +24,6 @@ #include "io/fs/local_file_system.h" #include "olap/rowset/beta_rowset.h" #include "olap/storage_engine.h" -#include "util/file_utils.h" #include "util/string_util.h" namespace doris { diff --git a/be/src/io/cache/sub_file_cache.cpp b/be/src/io/cache/sub_file_cache.cpp index f23e122132d32f..3b3cb4110af6f0 100644 --- a/be/src/io/cache/sub_file_cache.cpp +++ b/be/src/io/cache/sub_file_cache.cpp @@ -30,7 +30,6 @@ #include "io/fs/local_file_system.h" #include "olap/iterators.h" #include "util/async_io.h" -#include "util/file_utils.h" #include "util/string_util.h" namespace doris { @@ -305,7 +304,7 @@ Status SubFileCache::_init() { auto str_vec = split(file.native(), "_"); size_t offset = std::strtoul(str_vec[str_vec.size() - 1].c_str(), nullptr, 10); - size_t file_size = 0; + int64_t file_size = -1; auto path = _cache_dir / file; RETURN_IF_ERROR(io::global_local_filesystem()->file_size(path, &file_size)); if (expect_file_size_map.find(offset) == expect_file_size_map.end() || diff --git a/be/src/io/file_factory.cpp b/be/src/io/file_factory.cpp index 09ee20d8c65d29..64bbcfd1d9ee64 100644 --- a/be/src/io/file_factory.cpp +++ b/be/src/io/file_factory.cpp @@ -49,7 +49,7 @@ Status FileFactory::create_file_writer(TFileType::type type, ExecEnv* env, } case TFileType::FILE_BROKER: { std::shared_ptr fs; - RETURN_IF_ERROR(io::BrokerFileSystem::create(broker_addresses[0], properties, 0, &fs)); + RETURN_IF_ERROR(io::BrokerFileSystem::create(broker_addresses[0], properties, &fs)); RETURN_IF_ERROR(fs->create_file(path, &file_writer)); break; } @@ -90,6 +90,7 @@ Status FileFactory::create_file_reader(RuntimeProfile* /*profile*/, } io::FileBlockCachePathPolicy file_block_cache; io::FileReaderOptions reader_options(cache_policy, file_block_cache); + reader_options.file_size = file_description.file_size; switch (type) { case TFileType::FILE_LOCAL: { RETURN_IF_ERROR(io::global_local_filesystem()->open_file(file_description.path, @@ -162,8 +163,7 @@ Status FileFactory::create_broker_reader(const TNetworkAddress& broker_addr, io::FileReaderSPtr* reader, const io::FileReaderOptions& reader_options) { std::shared_ptr fs; - RETURN_IF_ERROR( - io::BrokerFileSystem::create(broker_addr, prop, file_description.file_size, &fs)); + RETURN_IF_ERROR(io::BrokerFileSystem::create(broker_addr, prop, &fs)); RETURN_IF_ERROR(fs->open_file(file_description.path, reader_options, reader)); *broker_file_system = std::move(fs); return Status::OK(); diff --git a/be/src/io/file_factory.h b/be/src/io/file_factory.h index 2a7a2fe9b4f1ac..978241f8d6ff7b 100644 --- a/be/src/io/file_factory.h +++ b/be/src/io/file_factory.h @@ -40,7 +40,7 @@ struct FileSystemProperties { struct FileDescription { std::string path; int64_t start_offset; - size_t file_size; + int64_t file_size; }; class FileFactory { diff --git a/be/src/io/fs/broker_file_reader.cpp b/be/src/io/fs/broker_file_reader.cpp index 40ddfe6b4407a5..fc1334eed91564 100644 --- a/be/src/io/fs/broker_file_reader.cpp +++ b/be/src/io/fs/broker_file_reader.cpp @@ -35,6 +35,7 @@ BrokerFileReader::BrokerFileReader(const TNetworkAddress& broker_addr, const Pat _fd(fd), _fs(std::move(fs)) { _fs->get_client(&_client); + // LOG(INFO) << "yy debug broker reader size: " << _file_size; DorisMetrics::instance()->broker_file_open_reading->increment(1); DorisMetrics::instance()->broker_file_reader_total->increment(1); } diff --git a/be/src/io/fs/broker_file_reader.h b/be/src/io/fs/broker_file_reader.h index 288c5d25d481c4..e16e6e336f4a06 100644 --- a/be/src/io/fs/broker_file_reader.h +++ b/be/src/io/fs/broker_file_reader.h @@ -52,10 +52,10 @@ class BrokerFileReader : public FileReader { const IOContext* io_ctx) override; private: - const Path& _path; + const Path _path; size_t _file_size; - const TNetworkAddress& _broker_addr; + const TNetworkAddress _broker_addr; TBrokerFD _fd; std::shared_ptr _fs; diff --git a/be/src/io/fs/broker_file_system.cpp b/be/src/io/fs/broker_file_system.cpp index f962cdb8102027..d7e87e9930e399 100644 --- a/be/src/io/fs/broker_file_system.cpp +++ b/be/src/io/fs/broker_file_system.cpp @@ -59,18 +59,16 @@ inline const std::string& client_id(const TNetworkAddress& addr) { Status BrokerFileSystem::create(const TNetworkAddress& broker_addr, const std::map& broker_prop, - size_t file_size, std::shared_ptr* fs) { - (*fs).reset(new BrokerFileSystem(broker_addr, broker_prop, file_size)); + std::shared_ptr* fs) { + (*fs).reset(new BrokerFileSystem(broker_addr, broker_prop)); return (*fs)->connect(); } BrokerFileSystem::BrokerFileSystem(const TNetworkAddress& broker_addr, - const std::map& broker_prop, - size_t file_size) + const std::map& broker_prop) : RemoteFileSystem("", "", FileSystemType::BROKER), _broker_addr(broker_addr), - _broker_prop(broker_prop), - _file_size(file_size) {} + _broker_prop(broker_prop) {} Status BrokerFileSystem::connect_impl() { Status status = Status::OK(); @@ -85,7 +83,13 @@ Status BrokerFileSystem::create_file_impl(const Path& path, FileWriterPtr* write return Status::OK(); } -Status BrokerFileSystem::open_file_internal(const Path& file, FileReaderSPtr* reader) { +Status BrokerFileSystem::open_file_internal(const Path& file, int64_t file_size, + FileReaderSPtr* reader) { + int64_t fsize = file_size; + if (fsize < 0) { + RETURN_IF_ERROR(file_size_impl(file, &fsize)); + } + CHECK_BROKER_CLIENT(_client); TBrokerOpenReaderRequest request; request.__set_version(TBrokerVersion::VERSION_ONE); @@ -113,12 +117,12 @@ Status BrokerFileSystem::open_file_internal(const Path& file, FileReaderSPtr* re error_msg(response->opStatus.message)); } *reader = std::make_shared( - _broker_addr, file, _file_size, response->fd, + _broker_addr, file, fsize, response->fd, std::static_pointer_cast(shared_from_this())); return Status::OK(); } -Status BrokerFileSystem::create_directory_impl(const Path& /*path*/) { +Status BrokerFileSystem::create_directory_impl(const Path& /*path*/, bool /*failed_if_exists*/) { return Status::NotSupported("create directory not implemented!"); } @@ -195,9 +199,36 @@ Status BrokerFileSystem::exists_impl(const Path& path, bool* res) const { } } -Status BrokerFileSystem::file_size_impl(const Path& path, size_t* file_size) const { - *file_size = _file_size; - return Status::OK(); +Status BrokerFileSystem::file_size_impl(const Path& path, int64_t* file_size) const { + CHECK_BROKER_CLIENT(_client); + try { + TBrokerFileSizeRequest req; + req.__set_version(TBrokerVersion::VERSION_ONE); + req.__set_path(path); + req.__set_properties(_broker_prop); + + TBrokerFileSizeResponse resp; + try { + (*_client)->fileSize(resp, req); + } catch (apache::thrift::transport::TTransportException& e) { + RETURN_IF_ERROR((*_client).reopen()); + (*_client)->fileSize(resp, req); + } + + if (resp.opStatus.statusCode != TBrokerOperationStatusCode::OK) { + return Status::IOError("failed to get file size of path {}: {}", path.native(), + error_msg(resp.opStatus.message)); + } + if (resp.fileSize < 0) { + return Status::IOError("failed to get file size of path {}: size is negtive: {}", + path.native(), resp.fileSize); + } + *file_size = resp.fileSize; + return Status::OK(); + } catch (apache::thrift::TException& e) { + return Status::RpcError("failed to get file size of path {}: {}", path.native(), + error_msg(e.what())); + } } Status BrokerFileSystem::list_impl(const Path& dir, bool only_file, std::vector* files, @@ -298,7 +329,7 @@ Status BrokerFileSystem::upload_impl(const Path& local_file, const Path& remote_ FileReaderSPtr local_reader = nullptr; RETURN_IF_ERROR(local_fs->open_file(local_file, &local_reader)); - size_t file_len = local_reader->size(); + int64_t file_len = local_reader->size(); if (file_len == -1) { return Status::IOError("failed to get length of file: {}: {}", local_file.native(), error_msg("")); @@ -357,7 +388,7 @@ Status BrokerFileSystem::upload_with_checksum_impl(const Path& local_file, const Status BrokerFileSystem::download_impl(const Path& remote_file, const Path& local_file) { // 1. open remote file for read FileReaderSPtr broker_reader = nullptr; - RETURN_IF_ERROR(open_file_internal(remote_file, &broker_reader)); + RETURN_IF_ERROR(open_file_internal(remote_file, -1, &broker_reader)); // 2. remove the existing local file if exist if (std::filesystem::remove(local_file)) { @@ -394,7 +425,7 @@ Status BrokerFileSystem::download_impl(const Path& remote_file, const Path& loca Status BrokerFileSystem::direct_download_impl(const Path& remote_impl, std::string* content) { // 1. open remote file for read FileReaderSPtr broker_reader = nullptr; - RETURN_IF_ERROR(open_file_internal(remote_impl, &broker_reader)); + RETURN_IF_ERROR(open_file_internal(remote_impl, -1, &broker_reader)); constexpr size_t buf_sz = 1024 * 1024; std::unique_ptr read_buf(new char[buf_sz]); diff --git a/be/src/io/fs/broker_file_system.h b/be/src/io/fs/broker_file_system.h index 0328add3ca5250..fa3996487873fa 100644 --- a/be/src/io/fs/broker_file_system.h +++ b/be/src/io/fs/broker_file_system.h @@ -25,7 +25,7 @@ namespace io { class BrokerFileSystem final : public RemoteFileSystem { public: static Status create(const TNetworkAddress& broker_addr, - const std::map& broker_prop, size_t file_size, + const std::map& broker_prop, std::shared_ptr* fs); ~BrokerFileSystem() override = default; @@ -35,13 +35,13 @@ class BrokerFileSystem final : public RemoteFileSystem { protected: Status connect_impl() override; Status create_file_impl(const Path& file, FileWriterPtr* writer) override; - Status open_file_internal(const Path& file, FileReaderSPtr* reader) override; - Status create_directory_impl(const Path& dir) override; + Status open_file_internal(const Path& file, int64_t file_size, FileReaderSPtr* reader) override; + Status create_directory_impl(const Path& dir, bool failed_if_exists = false) override; Status delete_file_impl(const Path& file) override; Status delete_directory_impl(const Path& dir) override; Status batch_delete_impl(const std::vector& files) override; Status exists_impl(const Path& path, bool* res) const override; - Status file_size_impl(const Path& file, size_t* file_size) const override; + Status file_size_impl(const Path& file, int64_t* file_size) const override; Status list_impl(const Path& dir, bool only_file, std::vector* files, bool* exists) override; Status rename_impl(const Path& orig_name, const Path& new_name) override; @@ -58,12 +58,11 @@ class BrokerFileSystem final : public RemoteFileSystem { private: BrokerFileSystem(const TNetworkAddress& broker_addr, - const std::map& broker_prop, size_t file_size); + const std::map& broker_prop); std::string error_msg(const std::string& err) const; const TNetworkAddress& _broker_addr; const std::map& _broker_prop; - size_t _file_size; std::shared_ptr _client; }; diff --git a/be/src/io/fs/buffered_reader.cpp b/be/src/io/fs/buffered_reader.cpp index af88fb4b8995ea..552b9b1a8337a3 100644 --- a/be/src/io/fs/buffered_reader.cpp +++ b/be/src/io/fs/buffered_reader.cpp @@ -67,8 +67,8 @@ Status BufferedFileStreamReader::read_bytes(const uint8_t** buf, uint64_t offset SCOPED_RAW_TIMER(&_statistics.read_time); while (has_read < to_read) { size_t loop_read = 0; - Slice resutl(_buf.get() + buf_remaining + has_read, to_read - has_read); - RETURN_IF_ERROR(_file->read_at(_buf_end_offset + has_read, resutl, &loop_read)); + Slice result(_buf.get() + buf_remaining + has_read, to_read - has_read); + RETURN_IF_ERROR(_file->read_at(_buf_end_offset + has_read, result, &loop_read)); _statistics.read_calls++; if (loop_read == 0) { break; diff --git a/be/src/io/fs/err_utils.cpp b/be/src/io/fs/err_utils.cpp new file mode 100644 index 00000000000000..d01c7e748829f6 --- /dev/null +++ b/be/src/io/fs/err_utils.cpp @@ -0,0 +1,54 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +#include "io/fs/err_utils.h" + +#include + +#include + +#include "io/fs/hdfs.h" + +namespace doris { +namespace io { + +std::string errno_to_str() { + char buf[1024]; + return fmt::format("({}), {}", errno, strerror_r(errno, buf, 1024)); +} + +std::string errcode_to_str(const std::error_code& ec) { + return fmt::format("({}), {}", ec.value(), ec.message()); +} + +std::string hdfs_error() { + std::stringstream ss; + char buf[1024]; + ss << "(" << errno << "), " << strerror_r(errno, buf, 1024) << ")"; +#ifdef USE_HADOOP_HDFS + char* root_cause = hdfsGetLastExceptionRootCause(); + if (root_cause != nullptr) { + ss << ", reason: " << root_cause; + } +#else + ss << ", reason: " << hdfsGetLastError(); +#endif + return ss.str(); +} + +} // namespace io +} // namespace doris diff --git a/be/src/env/env.cpp b/be/src/io/fs/err_utils.h similarity index 79% rename from be/src/env/env.cpp rename to be/src/io/fs/err_utils.h index 3e0b963ac83c48..31ca702c32625d 100644 --- a/be/src/env/env.cpp +++ b/be/src/io/fs/err_utils.h @@ -15,17 +15,17 @@ // specific language governing permissions and limitations // under the License. -#include "env/env.h" +#pragma once -#include "env/env_posix.h" +#include +#include namespace doris { +namespace io { -std::shared_ptr Env::_posix_env(new PosixEnv()); +std::string errno_to_str(); +std::string errcode_to_str(const std::error_code& ec); +std::string hdfs_error(); -// Default Posix Env -Env* Env::Default() { - return _posix_env.get(); -} - -} // end namespace doris \ No newline at end of file +} // namespace io +} // namespace doris diff --git a/be/src/io/fs/file_reader.cpp b/be/src/io/fs/file_reader.cpp index 05de1ba4994235..fba531630f35cb 100644 --- a/be/src/io/fs/file_reader.cpp +++ b/be/src/io/fs/file_reader.cpp @@ -26,13 +26,17 @@ namespace io { Status FileReader::read_at(size_t offset, Slice result, size_t* bytes_read, const IOContext* io_ctx) { + Status st; if (bthread_self() == 0) { - return read_at_impl(offset, result, bytes_read, io_ctx); + st = read_at_impl(offset, result, bytes_read, io_ctx); + } else { + auto task = [&] { st = read_at_impl(offset, result, bytes_read, io_ctx); }; + AsyncIO::run_task(task, fs()->type()); } - Status s; - auto task = [&] { s = read_at_impl(offset, result, bytes_read, io_ctx); }; - AsyncIO::run_task(task, fs()->type()); - return s; + if (!st) { + LOG(WARNING) << st; + } + return st; } } // namespace io diff --git a/be/src/io/fs/file_reader_options.h b/be/src/io/fs/file_reader_options.h index cef4a54ea36958..c12ce727bff7a2 100644 --- a/be/src/io/fs/file_reader_options.h +++ b/be/src/io/fs/file_reader_options.h @@ -72,6 +72,10 @@ class FileReaderOptions { FileCachePolicy cache_type; const CachePathPolicy& path_policy; + // length of the file in bytes. + // -1 means unset. + // If the file length is not set, the file length will be fetched from the file system. + int64_t file_size = -1; static FileReaderOptions DEFAULT; }; diff --git a/be/src/io/fs/file_system.cpp b/be/src/io/fs/file_system.cpp index 212020437a3996..1f34500035d69d 100644 --- a/be/src/io/fs/file_system.cpp +++ b/be/src/io/fs/file_system.cpp @@ -17,6 +17,7 @@ #include "io/fs/file_system.h" +#include "olap/olap_define.h" #include "util/async_io.h" namespace doris { @@ -24,58 +25,28 @@ namespace io { Status FileSystem::create_file(const Path& file, FileWriterPtr* writer) { auto path = absolute_path(file); - if (bthread_self() == 0) { - return create_file_impl(path, writer); - } - Status s; - auto task = [&] { s = create_file_impl(path, writer); }; - AsyncIO::run_task(task, _type); - return s; + FILESYSTEM_M(create_file_impl(path, writer)); } Status FileSystem::open_file(const Path& file, const FileReaderOptions& reader_options, FileReaderSPtr* reader) { auto path = absolute_path(file); - if (bthread_self() == 0) { - return open_file_impl(path, reader_options, reader); - } - Status s; - auto task = [&] { s = open_file_impl(path, reader_options, reader); }; - AsyncIO::run_task(task, _type); - return s; + FILESYSTEM_M(open_file_impl(path, reader_options, reader)); } -Status FileSystem::create_directory(const Path& dir) { +Status FileSystem::create_directory(const Path& dir, bool failed_if_exists) { auto path = absolute_path(dir); - if (bthread_self() == 0) { - return create_directory_impl(path); - } - Status s; - auto task = [&] { s = create_directory_impl(path); }; - AsyncIO::run_task(task, _type); - return s; + FILESYSTEM_M(create_directory_impl(path, failed_if_exists)); } Status FileSystem::delete_file(const Path& file) { auto path = absolute_path(file); - if (bthread_self() == 0) { - return delete_file_impl(path); - } - Status s; - auto task = [&] { s = delete_file_impl(path); }; - AsyncIO::run_task(task, _type); - return s; + FILESYSTEM_M(delete_file_impl(path)); } Status FileSystem::delete_directory(const Path& dir) { auto path = absolute_path(dir); - if (bthread_self() == 0) { - return delete_directory_impl(path); - } - Status s; - auto task = [&] { s = delete_directory_impl(path); }; - AsyncIO::run_task(task, _type); - return s; + FILESYSTEM_M(delete_directory_impl(path)); } Status FileSystem::batch_delete(const std::vector& files) { @@ -83,71 +54,35 @@ Status FileSystem::batch_delete(const std::vector& files) { for (auto& file : files) { abs_files.push_back(absolute_path(file)); } - if (bthread_self() == 0) { - return batch_delete_impl(abs_files); - } - Status s; - auto task = [&] { s = batch_delete_impl(abs_files); }; - AsyncIO::run_task(task, _type); - return s; + FILESYSTEM_M(batch_delete_impl(abs_files)); } Status FileSystem::exists(const Path& path, bool* res) const { auto fs_path = absolute_path(path); - if (bthread_self() == 0) { - return exists_impl(fs_path, res); - } - Status s; - auto task = [&] { s = exists_impl(fs_path, res); }; - AsyncIO::run_task(task, _type); - return s; + FILESYSTEM_M(exists_impl(fs_path, res)); } -Status FileSystem::file_size(const Path& file, size_t* file_size) const { +Status FileSystem::file_size(const Path& file, int64_t* file_size) const { auto path = absolute_path(file); - if (bthread_self() == 0) { - return file_size_impl(path, file_size); - } - Status s; - auto task = [&] { s = file_size_impl(path, file_size); }; - AsyncIO::run_task(task, _type); - return s; + FILESYSTEM_M(file_size_impl(path, file_size)); } Status FileSystem::list(const Path& dir, bool only_file, std::vector* files, bool* exists) { auto path = absolute_path(dir); - if (bthread_self() == 0) { - return list_impl(path, only_file, files, exists); - } - Status s; - auto task = [&] { s = list_impl(path, only_file, files, exists); }; - AsyncIO::run_task(task, _type); - return s; + FILESYSTEM_M(list_impl(path, only_file, files, exists)); } Status FileSystem::rename(const Path& orig_name, const Path& new_name) { auto orig_path = absolute_path(orig_name); auto new_path = absolute_path(new_name); - if (bthread_self() == 0) { - return rename_impl(orig_path, new_path); - } - Status s; - auto task = [&] { s = rename_impl(orig_path, new_path); }; - AsyncIO::run_task(task, _type); - return s; + FILESYSTEM_M(rename_impl(orig_path, new_path)); } Status FileSystem::rename_dir(const Path& orig_name, const Path& new_name) { auto orig_path = absolute_path(orig_name); auto new_path = absolute_path(new_name); - if (bthread_self() == 0) { - return rename_dir_impl(orig_path, new_path); - } - Status s; - auto task = [&] { s = rename_dir_impl(orig_path, new_path); }; - AsyncIO::run_task(task, _type); - return s; + FILESYSTEM_M(rename_dir_impl(orig_path, new_path)); } } // namespace io diff --git a/be/src/io/fs/file_system.h b/be/src/io/fs/file_system.h index 05c58b39a7a971..3dbf741a05a284 100644 --- a/be/src/io/fs/file_system.h +++ b/be/src/io/fs/file_system.h @@ -29,6 +29,23 @@ namespace doris { namespace io { +#ifndef FILESYSTEM_M +#define FILESYSTEM_M(stmt) \ + do { \ + Status _s; \ + if (bthread_self() == 0) { \ + _s = (stmt); \ + } else { \ + auto task = [&] { _s = (stmt); }; \ + AsyncIO::run_task(task, _type); \ + } \ + if (!_s) { \ + LOG(WARNING) << _s; \ + } \ + return _s; \ + } while (0); +#endif + enum class FileSystemType : uint8_t { LOCAL, S3, @@ -39,7 +56,7 @@ enum class FileSystemType : uint8_t { struct FileInfo { // only file name, no path std::string file_name; - size_t file_size; + int64_t file_size; bool is_file; }; @@ -53,12 +70,12 @@ class FileSystem : public std::enable_shared_from_this { } Status open_file(const Path& file, const FileReaderOptions& reader_options, FileReaderSPtr* reader); - Status create_directory(const Path& dir); + Status create_directory(const Path& dir, bool failed_if_exists = false); Status delete_file(const Path& file); Status delete_directory(const Path& dir); Status batch_delete(const std::vector& files); Status exists(const Path& path, bool* res) const; - Status file_size(const Path& file, size_t* file_size) const; + Status file_size(const Path& file, int64_t* file_size) const; Status list(const Path& dir, bool only_file, std::vector* files, bool* exists); Status rename(const Path& orig_name, const Path& new_name); Status rename_dir(const Path& orig_name, const Path& new_name); @@ -90,7 +107,7 @@ class FileSystem : public std::enable_shared_from_this { FileReaderSPtr* reader) = 0; /// create directory recursively - virtual Status create_directory_impl(const Path& dir) = 0; + virtual Status create_directory_impl(const Path& dir, bool failed_if_exists = false) = 0; /// delete file. /// return OK if file does not exist @@ -112,7 +129,7 @@ class FileSystem : public std::enable_shared_from_this { /// return OK and get size of given file, save in "file_size". /// return ERR otherwise - virtual Status file_size_impl(const Path& file, size_t* file_size) const = 0; + virtual Status file_size_impl(const Path& file, int64_t* file_size) const = 0; /// return OK and list all objects in "dir", save in "files" /// return ERR otherwise diff --git a/be/src/io/fs/fs_utils.cpp b/be/src/io/fs/fs_utils.cpp index fbd6a89fc6187a..066dbaa63aefbf 100644 --- a/be/src/io/fs/fs_utils.cpp +++ b/be/src/io/fs/fs_utils.cpp @@ -17,29 +17,24 @@ #include "io/fs/fs_utils.h" -#include -#include - -#include +#include "io/fs/file_reader.h" +#include "io/fs/file_system.h" namespace doris { namespace io { -std::string errno_to_str() { - char buf[1024]; - return fmt::format("({}), {}", errno, strerror_r(errno, buf, 1024)); -} - -std::string errcode_to_str(const std::error_code& ec) { - return fmt::format("({}), {}", ec.value(), ec.message()); -} - -std::string hdfs_error() { - std::stringstream ss; - char buf[1024]; - ss << "(" << errno << "), " << strerror_r(errno, buf, 1024); - ss << ", reason: " << hdfsGetLastError(); - return ss.str(); +Status read_file_to_string(FileSystemSPtr fs, const Path& file, std::string* content) { + FileReaderSPtr file_reader; + RETURN_IF_ERROR(fs->open_file(file, &file_reader)); + size_t file_size = file_reader->size(); + content->resize(file_size); + size_t bytes_read = 0; + RETURN_IF_ERROR(file_reader->read_at(0, {*content}, &bytes_read)); + if (bytes_read != file_size) { + return Status::IOError("failed to read file {} to string. bytes read: {}, file size: {}", + file.native(), bytes_read, file_size); + } + return file_reader->close(); } } // namespace io diff --git a/be/src/io/fs/fs_utils.h b/be/src/io/fs/fs_utils.h index 31ca702c32625d..866b3cfaeab1af 100644 --- a/be/src/io/fs/fs_utils.h +++ b/be/src/io/fs/fs_utils.h @@ -17,15 +17,103 @@ #pragma once -#include -#include +#include "common/status.h" +#include "io/fs/file_system.h" namespace doris { namespace io { -std::string errno_to_str(); -std::string errcode_to_str(const std::error_code& ec); -std::string hdfs_error(); +struct FilePathDesc { + FilePathDesc(const std::string& path) { filepath = path; } + FilePathDesc() = default; + TStorageMedium::type storage_medium = TStorageMedium::HDD; + std::string filepath; + std::string remote_path; + std::string storage_name; + io::FileSystem* file_system; + + std::string debug_string() const { + std::stringstream ss; + ss << "storage_medium: " << to_string(storage_medium) << ", local_path: " << filepath; + if (!remote_path.empty()) { + ss << ", storage_name: " << storage_name << ", remote_path: " << remote_path; + } + return ss.str(); + } + // REMOTE_CACHE is the local cache path for remote path, if a data_dir is REMOTE_CACHE, + // it means the tablet in it will be set as a remote path. + static bool is_remote(TStorageMedium::type checked_storage_medium) { + return checked_storage_medium == TStorageMedium::S3 || + checked_storage_medium == TStorageMedium::REMOTE_CACHE; + } + bool is_remote() const { return is_remote(storage_medium); } +}; + +class FilePathDescStream { +public: + FilePathDescStream& operator<<(const FilePathDesc& val) { + _filepath_stream << val.filepath; + _storage_medium = val.storage_medium; + _storage_name = val.storage_name; + if (FilePathDesc::is_remote(_storage_medium)) { + _remote_path_stream << val.remote_path; + } + return *this; + } + FilePathDescStream& operator<<(const std::string& val) { + _filepath_stream << val; + if (FilePathDesc::is_remote(_storage_medium)) { + _remote_path_stream << val; + } + return *this; + } + FilePathDescStream& operator<<(uint64_t val) { + _filepath_stream << val; + if (FilePathDesc::is_remote(_storage_medium)) { + _remote_path_stream << val; + } + return *this; + } + FilePathDescStream& operator<<(int64_t val) { + _filepath_stream << val; + if (FilePathDesc::is_remote(_storage_medium)) { + _remote_path_stream << val; + } + return *this; + } + FilePathDescStream& operator<<(uint32_t val) { + _filepath_stream << val; + if (FilePathDesc::is_remote(_storage_medium)) { + _remote_path_stream << val; + } + return *this; + } + FilePathDescStream& operator<<(int32_t val) { + _filepath_stream << val; + if (FilePathDesc::is_remote(_storage_medium)) { + _remote_path_stream << val; + } + return *this; + } + FilePathDesc path_desc() { + FilePathDesc path_desc(_filepath_stream.str()); + path_desc.storage_medium = _storage_medium; + if (FilePathDesc::is_remote(_storage_medium)) { + path_desc.remote_path = _remote_path_stream.str(); + } + path_desc.storage_name = _storage_name; + return path_desc; + } + +private: + TStorageMedium::type _storage_medium = TStorageMedium::HDD; + std::stringstream _filepath_stream; + std::stringstream _remote_path_stream; + std::string _storage_name; +}; + +// read all data from file to string +Status read_file_to_string(FileSystemSPtr fs, const Path& file, std::string* content); } // namespace io } // namespace doris diff --git a/be/src/io/fs/hdfs.h b/be/src/io/fs/hdfs.h new file mode 100644 index 00000000000000..5e288e8dbdd2bc --- /dev/null +++ b/be/src/io/fs/hdfs.h @@ -0,0 +1,24 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +#pragma once + +#if defined(__x86_64__) +#include +#else +#include +#endif diff --git a/be/src/io/fs/hdfs_file_reader.cpp b/be/src/io/fs/hdfs_file_reader.cpp index 219410ac187fc4..77e15d7beea764 100644 --- a/be/src/io/fs/hdfs_file_reader.cpp +++ b/be/src/io/fs/hdfs_file_reader.cpp @@ -17,9 +17,11 @@ #include "io/fs/hdfs_file_reader.h" +#include "io/fs/err_utils.h" #include "io/fs/hdfs_file_system.h" #include "service/backend_options.h" #include "util/doris_metrics.h" + namespace doris { namespace io { HdfsFileReader::HdfsFileReader(Path path, size_t file_size, const std::string& name_node, @@ -66,7 +68,7 @@ Status HdfsFileReader::read_at_impl(size_t offset, Slice result, size_t* bytes_r int res = hdfsSeek(handle->hdfs_fs, _hdfs_file, offset); if (res != 0) { return Status::InternalError("Seek to offset failed. (BE: {}) offset={}, err: {}", - BackendOptions::get_localhost(), offset, hdfsGetLastError()); + BackendOptions::get_localhost(), offset, hdfs_error()); } size_t bytes_req = result.size; @@ -85,7 +87,7 @@ Status HdfsFileReader::read_at_impl(size_t offset, Slice result, size_t* bytes_r return Status::InternalError( "Read hdfs file failed. (BE: {}) namenode:{}, path:{}, err: {}", BackendOptions::get_localhost(), _name_node, _path.string(), - hdfsGetLastError()); + hdfs_error()); } if (loop_read == 0) { break; diff --git a/be/src/io/fs/hdfs_file_system.cpp b/be/src/io/fs/hdfs_file_system.cpp index e893249eb2028a..3eb5bea4c45a21 100644 --- a/be/src/io/fs/hdfs_file_system.cpp +++ b/be/src/io/fs/hdfs_file_system.cpp @@ -19,7 +19,7 @@ #include "gutil/hash/hash.h" #include "io/cache/block/cached_remote_file_reader.h" -#include "io/fs/fs_utils.h" +#include "io/fs/err_utils.h" #include "io/fs/hdfs_file_reader.h" #include "io/fs/hdfs_file_writer.h" #include "io/fs/local_file_system.h" @@ -102,10 +102,13 @@ Status HdfsFileSystem::create_file_impl(const Path& file, FileWriterPtr* writer) return Status::OK(); } -Status HdfsFileSystem::open_file_internal(const Path& file, FileReaderSPtr* reader) { +Status HdfsFileSystem::open_file_internal(const Path& file, int64_t file_size, + FileReaderSPtr* reader) { CHECK_HDFS_HANDLE(_fs_handle); - size_t file_len = 0; - RETURN_IF_ERROR(file_size_impl(file, &file_len)); + int64_t fsize = file_size; + if (fsize < 0) { + RETURN_IF_ERROR(file_size_impl(file, &fsize)); + } Path real_path = convert_path(file, _namenode); auto hdfs_file = @@ -126,12 +129,12 @@ Status HdfsFileSystem::open_file_internal(const Path& file, FileReaderSPtr* read } } *reader = std::make_shared( - file, file_len, _namenode, hdfs_file, + file, fsize, _namenode, hdfs_file, std::static_pointer_cast(shared_from_this())); return Status::OK(); } -Status HdfsFileSystem::create_directory_impl(const Path& dir) { +Status HdfsFileSystem::create_directory_impl(const Path& dir, bool failed_if_exists) { CHECK_HDFS_HANDLE(_fs_handle); Path real_path = convert_path(dir, _namenode); int res = hdfsCreateDirectory(_fs_handle->hdfs_fs, real_path.string().c_str()); @@ -179,7 +182,7 @@ Status HdfsFileSystem::exists_impl(const Path& path, bool* res) const { return Status::OK(); } -Status HdfsFileSystem::file_size_impl(const Path& path, size_t* file_size) const { +Status HdfsFileSystem::file_size_impl(const Path& path, int64_t* file_size) const { CHECK_HDFS_HANDLE(_fs_handle); Path real_path = convert_path(path, _namenode); hdfsFileInfo* file_info = hdfsGetPathInfo(_fs_handle->hdfs_fs, real_path.string().c_str()); @@ -246,7 +249,7 @@ Status HdfsFileSystem::upload_impl(const Path& local_file, const Path& remote_fi FileSystemSPtr local_fs = global_local_filesystem(); FileReaderSPtr local_reader = nullptr; RETURN_IF_ERROR(local_fs->open_file(local_file, &local_reader)); - size_t file_len = local_reader->size(); + int64_t file_len = local_reader->size(); if (file_len == -1) { return Status::IOError("failed to get size of file: {}", local_file.string()); } @@ -300,7 +303,7 @@ Status HdfsFileSystem::upload_with_checksum_impl(const Path& local, const Path& Status HdfsFileSystem::download_impl(const Path& remote_file, const Path& local_file) { // 1. open remote file for read FileReaderSPtr hdfs_reader = nullptr; - RETURN_IF_ERROR(open_file_internal(remote_file, &hdfs_reader)); + RETURN_IF_ERROR(open_file_internal(remote_file, -1, &hdfs_reader)); // 2. remove the existing local file if exist if (std::filesystem::remove(local_file)) { @@ -337,7 +340,7 @@ Status HdfsFileSystem::download_impl(const Path& remote_file, const Path& local_ Status HdfsFileSystem::direct_download_impl(const Path& remote_file, std::string* content) { // 1. open remote file for read FileReaderSPtr hdfs_reader = nullptr; - RETURN_IF_ERROR(open_file_internal(remote_file, &hdfs_reader)); + RETURN_IF_ERROR(open_file_internal(remote_file, -1, &hdfs_reader)); constexpr size_t buf_sz = 1024 * 1024; std::unique_ptr read_buf(new char[buf_sz]); diff --git a/be/src/io/fs/hdfs_file_system.h b/be/src/io/fs/hdfs_file_system.h index 41ec7b7243a817..9cec56b86cd9d7 100644 --- a/be/src/io/fs/hdfs_file_system.h +++ b/be/src/io/fs/hdfs_file_system.h @@ -18,13 +18,13 @@ #pragma once #include -#include #include +#include "io/fs/hdfs.h" #include "io/fs/remote_file_system.h" -namespace doris { +namespace doris { namespace io { class HdfsFileSystemHandle { @@ -91,13 +91,13 @@ class HdfsFileSystem final : public RemoteFileSystem { protected: Status connect_impl() override; Status create_file_impl(const Path& file, FileWriterPtr* writer) override; - Status open_file_internal(const Path& file, FileReaderSPtr* reader) override; - Status create_directory_impl(const Path& dir) override; + Status open_file_internal(const Path& file, int64_t file_size, FileReaderSPtr* reader) override; + Status create_directory_impl(const Path& dir, bool failed_if_exists = false) override; Status delete_file_impl(const Path& file) override; Status delete_directory_impl(const Path& dir) override; Status batch_delete_impl(const std::vector& files) override; Status exists_impl(const Path& path, bool* res) const override; - Status file_size_impl(const Path& file, size_t* file_size) const override; + Status file_size_impl(const Path& file, int64_t* file_size) const override; Status list_impl(const Path& dir, bool only_file, std::vector* files, bool* exists) override; Status rename_impl(const Path& orig_name, const Path& new_name) override; diff --git a/be/src/io/fs/hdfs_file_writer.cpp b/be/src/io/fs/hdfs_file_writer.cpp index 71b3de77e5eb28..ffb56e39e0ce45 100644 --- a/be/src/io/fs/hdfs_file_writer.cpp +++ b/be/src/io/fs/hdfs_file_writer.cpp @@ -20,6 +20,7 @@ #include #include "common/logging.h" +#include "io/fs/err_utils.h" #include "io/fs/hdfs_file_system.h" #include "service/backend_options.h" #include "util/hdfs_util.h" @@ -52,7 +53,7 @@ Status HdfsFileWriter::close() { ss << "failed to flush hdfs file. " << "(BE: " << BackendOptions::get_localhost() << ")" << "namenode:" << _hdfs_fs->_namenode << " path:" << _path - << ", err: " << hdfsGetLastError(); + << ", err: " << hdfs_error(); LOG(WARNING) << ss.str(); return Status::InternalError(ss.str()); } @@ -83,7 +84,7 @@ Status HdfsFileWriter::appendv(const Slice* data, size_t data_cnt) { if (written_bytes < 0) { return Status::InternalError("write hdfs failed. namenode: {}, path: {}, error: {}", _hdfs_fs->_namenode, _path.native(), - hdfsGetLastError()); + hdfs_error()); } left_bytes -= written_bytes; p += written_bytes; @@ -115,7 +116,7 @@ Status HdfsFileWriter::_open() { ss << "create dir failed. " << "(BE: " << BackendOptions::get_localhost() << ")" << " namenode: " << _hdfs_fs->_namenode << " path: " << hdfs_dir - << ", err: " << hdfsGetLastError(); + << ", err: " << hdfs_error(); LOG(WARNING) << ss.str(); return Status::InternalError(ss.str()); } @@ -127,7 +128,7 @@ Status HdfsFileWriter::_open() { ss << "open file failed. " << "(BE: " << BackendOptions::get_localhost() << ")" << " namenode:" << _hdfs_fs->_namenode << " path:" << _path - << ", err: " << hdfsGetLastError(); + << ", err: " << hdfs_error(); LOG(WARNING) << ss.str(); return Status::InternalError(ss.str()); } diff --git a/be/src/io/fs/local_file_reader.cpp b/be/src/io/fs/local_file_reader.cpp index 7139950a09ae36..c62633e8cb6a07 100644 --- a/be/src/io/fs/local_file_reader.cpp +++ b/be/src/io/fs/local_file_reader.cpp @@ -19,6 +19,7 @@ #include +#include "io/fs/err_utils.h" #include "util/async_io.h" #include "util/doris_metrics.h" @@ -48,7 +49,9 @@ Status LocalFileReader::close() { AsyncIO::run_task(task, io::FileSystemType::LOCAL); } if (-1 == res) { - return Status::IOError("failed to close {}: {}", _path.native(), std::strerror(errno)); + std::string err = errno_to_str(); + LOG(WARNING) << fmt::format("failed to close {}: {}", _path.native(), err); + return Status::IOError("failed to close {}: {}", _path.native(), err); } _fd = -1; } diff --git a/be/src/io/fs/local_file_system.cpp b/be/src/io/fs/local_file_system.cpp index b88f8fbc4765e4..c82295ae277337 100644 --- a/be/src/io/fs/local_file_system.cpp +++ b/be/src/io/fs/local_file_system.cpp @@ -17,10 +17,14 @@ #include "io/fs/local_file_system.h" +#include +#include + +#include "io/fs/err_utils.h" #include "io/fs/file_system.h" -#include "io/fs/fs_utils.h" #include "io/fs/local_file_reader.h" #include "io/fs/local_file_writer.h" +#include "runtime/thread_context.h" #include "util/async_io.h" namespace doris { @@ -48,7 +52,7 @@ Status LocalFileSystem::create_file_impl(const Path& file, FileWriterPtr* writer Status LocalFileSystem::open_file_impl(const Path& file, const FileReaderOptions& /*reader_options*/, FileReaderSPtr* reader) { - size_t fsize = 0; + int64_t fsize = 0; RETURN_IF_ERROR(file_size_impl(file, &fsize)); int fd = -1; RETRY_ON_EINTR(fd, open(file.c_str(), O_RDONLY)); @@ -61,9 +65,13 @@ Status LocalFileSystem::open_file_impl(const Path& file, return Status::OK(); } -Status LocalFileSystem::create_directory_impl(const Path& dir) { - if (std::filesystem::exists(dir)) { - return Status::IOError("failed to create {}, already exists", dir.native()); +Status LocalFileSystem::create_directory_impl(const Path& dir, bool failed_if_exists) { + if (failed_if_exists) { + bool exists = true; + RETURN_IF_ERROR(exists_impl(dir, &exists)); + if (exists) { + return Status::IOError("failed to create {}, already exists", dir.native()); + } } std::error_code ec; std::filesystem::create_directories(dir, ec); @@ -74,7 +82,9 @@ Status LocalFileSystem::create_directory_impl(const Path& dir) { } Status LocalFileSystem::delete_file_impl(const Path& file) { - if (!std::filesystem::exists(file)) { + bool exists = true; + RETURN_IF_ERROR(exists_impl(file, &exists)); + if (!exists) { return Status::OK(); } if (!std::filesystem::is_regular_file(file)) { @@ -89,7 +99,9 @@ Status LocalFileSystem::delete_file_impl(const Path& file) { } Status LocalFileSystem::delete_directory_impl(const Path& dir) { - if (!std::filesystem::exists(dir)) { + bool exists = true; + RETURN_IF_ERROR(exists_impl(dir, &exists)); + if (!exists) { return Status::OK(); } if (!std::filesystem::is_directory(dir)) { @@ -111,11 +123,15 @@ Status LocalFileSystem::batch_delete_impl(const std::vector& files) { } Status LocalFileSystem::exists_impl(const Path& path, bool* res) const { - *res = std::filesystem::exists(path); + std::error_code ec; + *res = std::filesystem::exists(path, ec); + if (ec) { + return Status::IOError("failed to check exists {}: {}", path.native(), errcode_to_str(ec)); + } return Status::OK(); } -Status LocalFileSystem::file_size_impl(const Path& file, size_t* file_size) const { +Status LocalFileSystem::file_size_impl(const Path& file, int64_t* file_size) const { std::error_code ec; *file_size = std::filesystem::file_size(file, ec); if (ec) { @@ -126,8 +142,8 @@ Status LocalFileSystem::file_size_impl(const Path& file, size_t* file_size) cons Status LocalFileSystem::list_impl(const Path& dir, bool only_file, std::vector* files, bool* exists) { - if (!std::filesystem::exists(dir)) { - *exists = false; + RETURN_IF_ERROR(exists_impl(dir, exists)); + if (!exists) { return Status::OK(); } std::error_code ec; @@ -137,8 +153,16 @@ Status LocalFileSystem::list_impl(const Path& dir, bool only_file, std::vectorpush_back(std::move(file_info)); } if (ec) { @@ -164,13 +188,7 @@ Status LocalFileSystem::rename_dir_impl(const Path& orig_name, const Path& new_n Status LocalFileSystem::link_file(const Path& src, const Path& dest) { auto src_file = absolute_path(src); auto dest_file = absolute_path(dest); - if (bthread_self() == 0) { - return link_file_impl(src_file, dest_file); - } - Status s; - auto task = [&] { s = link_file_impl(src_file, dest_file); }; - AsyncIO::run_task(task, _type); - return s; + FILESYSTEM_M(link_file_impl(src_file, dest_file)); } Status LocalFileSystem::link_file_impl(const Path& src, const Path& dest) { @@ -181,6 +199,195 @@ Status LocalFileSystem::link_file_impl(const Path& src, const Path& dest) { return Status::OK(); } +Status LocalFileSystem::canonicalize(const Path& path, std::string* real_path) { + std::error_code ec; + Path res = std::filesystem::canonical(path, ec); + if (ec) { + return Status::IOError("failed to canonicalize path {}: {}", path.native(), + errcode_to_str(ec)); + } + *real_path = res.string(); + return Status::OK(); +} + +Status LocalFileSystem::is_directory(const Path& path, bool* res) { + auto tmp_path = absolute_path(path); + std::error_code ec; + *res = std::filesystem::is_directory(tmp_path, ec); + if (ec) { + LOG(WARNING) << fmt::format("failed to check is dir {}: {}", tmp_path.native(), + errcode_to_str(ec)); + return Status::IOError("failed to check is dir {}: {}", tmp_path.native(), + errcode_to_str(ec)); + } + return Status::OK(); +} + +Status LocalFileSystem::md5sum(const Path& file, std::string* md5sum) { + auto path = absolute_path(file); + FILESYSTEM_M(md5sum_impl(path, md5sum)); +} + +Status LocalFileSystem::md5sum_impl(const Path& file, std::string* md5sum) { + int fd = open(file.c_str(), O_RDONLY); + if (fd < 0) { + return Status::IOError("failed to open file for md5sum {}: {}", file.native(), + errno_to_str()); + } + + struct stat statbuf; + if (fstat(fd, &statbuf) < 0) { + std::string err = errno_to_str(); + close(fd); + return Status::InternalError("failed to stat file {}: {}", file.native(), err); + } + size_t file_len = statbuf.st_size; + CONSUME_THREAD_MEM_TRACKER(file_len); + void* buf = mmap(nullptr, file_len, PROT_READ, MAP_SHARED, fd, 0); + + unsigned char result[MD5_DIGEST_LENGTH]; + MD5((unsigned char*)buf, file_len, result); + munmap(buf, file_len); + RELEASE_THREAD_MEM_TRACKER(file_len); + + std::stringstream ss; + for (int32_t i = 0; i < MD5_DIGEST_LENGTH; i++) { + ss << std::setfill('0') << std::setw(2) << std::hex << (int)result[i]; + } + ss >> *md5sum; + + close(fd); + return Status::OK(); +} + +Status LocalFileSystem::iterate_directory(const std::string& dir, + const std::function& cb) { + auto path = absolute_path(dir); + FILESYSTEM_M(iterate_directory_impl(dir, cb)); +} + +Status LocalFileSystem::iterate_directory_impl( + const std::string& dir, const std::function& cb) { + bool exists = true; + std::vector files; + RETURN_IF_ERROR(list_impl(dir, false, &files, &exists)); + for (auto& file : files) { + if (!cb(file)) { + break; + } + } + return Status::OK(); +} + +Status LocalFileSystem::mtime(const Path& file, time_t* m_time) { + auto path = absolute_path(file); + FILESYSTEM_M(mtime_impl(path, m_time)); +} + +Status LocalFileSystem::mtime_impl(const Path& file, time_t* m_time) { + int fd = open(file.c_str(), O_RDONLY); + if (fd < 0) { + return Status::IOError("failed to get mtime for file {}: {}", file.native(), + errno_to_str()); + } + + Defer defer {[&]() { close(fd); }}; + struct stat statbuf; + if (fstat(fd, &statbuf) < 0) { + return Status::IOError("failed to stat file {}: {}", file.native(), errno_to_str()); + } + *m_time = statbuf.st_mtime; + return Status::OK(); +} + +Status LocalFileSystem::delete_and_create_directory(const Path& dir) { + auto path = absolute_path(dir); + FILESYSTEM_M(delete_and_create_directory_impl(path)); +} + +Status LocalFileSystem::delete_and_create_directory_impl(const Path& dir) { + RETURN_IF_ERROR(delete_directory_impl(dir)); + return create_directory_impl(dir); +} + +Status LocalFileSystem::get_space_info(const Path& dir, size_t* capacity, size_t* available) { + auto path = absolute_path(dir); + FILESYSTEM_M(get_space_info_impl(path, capacity, available)); +} + +Status LocalFileSystem::get_space_info_impl(const Path& path, size_t* capacity, size_t* available) { + std::error_code ec; + std::filesystem::space_info info = std::filesystem::space(path, ec); + if (ec) { + return Status::IOError("failed to get available space for path {}: {}", path.native(), + errcode_to_str(ec)); + } + *capacity = info.capacity; + *available = info.available; + return Status::OK(); +} + +Status LocalFileSystem::resize_file(const Path& file, size_t new_size) { + auto path = absolute_path(file); + FILESYSTEM_M(resize_file_impl(path, new_size)); +} + +Status LocalFileSystem::resize_file_impl(const Path& file, size_t new_size) { + std::error_code ec; + std::filesystem::resize_file(file, new_size, ec); + if (ec) { + return Status::IOError("failed to resize file {}: {}", file.native(), errcode_to_str(ec)); + } + return Status::OK(); +} + +Status LocalFileSystem::copy_dirs(const Path& src, const Path& dest) { + auto src_path = absolute_path(src); + auto dest_path = absolute_path(dest); + FILESYSTEM_M(copy_dirs(src_path, dest_path)); +} + +Status LocalFileSystem::copy_dirs_impl(const Path& src, const Path& dest) { + std::error_code ec; + std::filesystem::copy(src, dest, std::filesystem::copy_options::recursive, ec); + if (ec) { + return Status::IOError("failed to copy from {} to {}: {}", src.native(), dest.native(), + errcode_to_str(ec)); + } + return Status::OK(); +} + +bool LocalFileSystem::contain_path(const Path& parent_, const Path& sub_) { + Path parent = parent_.lexically_normal(); + Path sub = sub_.lexically_normal(); + if (parent == sub) { + return true; + } + + if (parent.filename() == ".") { + parent.remove_filename(); + } + + // We're also not interested in the file's name. + if (sub.has_filename()) { + sub.remove_filename(); + } + // If dir has more components than file, then file can't possibly reside in dir. + auto dir_len = std::distance(parent.begin(), parent.end()); + auto file_len = std::distance(sub.begin(), sub.end()); + if (dir_len > file_len) { + return false; + } + auto p_it = parent.begin(); + auto s_it = sub.begin(); + for (; p_it != parent.end() && !p_it->string().empty(); ++p_it, ++s_it) { + if (!(*p_it == *s_it)) { + return false; + } + } + return true; +} + static std::shared_ptr local_fs = io::LocalFileSystem::create(""); const std::shared_ptr& global_local_filesystem() { diff --git a/be/src/io/fs/local_file_system.h b/be/src/io/fs/local_file_system.h index 44202c8995898c..308cd5e1fcd9e2 100644 --- a/be/src/io/fs/local_file_system.h +++ b/be/src/io/fs/local_file_system.h @@ -30,21 +30,56 @@ class LocalFileSystem final : public FileSystem { /// hard link dest file to src file Status link_file(const Path& src, const Path& dest); + // Canonicalize 'path' by applying the following conversions: + // - Converts a relative path into an absolute one using the cwd. + // - Converts '.' and '..' references. + // - Resolves all symbolic links. + // + // All directory entries in 'path' must exist on the filesystem. + Status canonicalize(const Path& path, std::string* real_path); + // Check if the given path is directory + Status is_directory(const Path& path, bool* res); + // Calc md5sum of given file + Status md5sum(const Path& file, std::string* md5sum); + // iterate the given dir and execute cb on each entry + Status iterate_directory(const std::string& dir, + const std::function& cb); + // Return the mtime of given file + Status mtime(const Path& file, time_t* m_time); + // remove dir if eixsts and create a new one + Status delete_and_create_directory(const Path& dir); + // return disk available space where the given path is. + Status get_space_info(const Path& path, size_t* capacity, size_t* available); + // changes the size of the regular file + Status resize_file(const Path& file, size_t new_size); + // copy src dir to dest dir, recursivly + Status copy_dirs(const Path& src, const Path& dest); + // return true if parent path contain sub path + static bool contain_path(const Path& parent, const Path& sub); + protected: Status create_file_impl(const Path& file, FileWriterPtr* writer) override; Status open_file_impl(const Path& file, const FileReaderOptions& reader_options, FileReaderSPtr* reader) override; - Status create_directory_impl(const Path& dir) override; + Status create_directory_impl(const Path& dir, bool failed_if_exists = false) override; Status delete_file_impl(const Path& file) override; Status delete_directory_impl(const Path& dir) override; Status batch_delete_impl(const std::vector& files) override; Status exists_impl(const Path& path, bool* res) const override; - Status file_size_impl(const Path& file, size_t* file_size) const override; + Status file_size_impl(const Path& file, int64_t* file_size) const override; Status list_impl(const Path& dir, bool only_file, std::vector* files, bool* exists) override; Status rename_impl(const Path& orig_name, const Path& new_name) override; Status rename_dir_impl(const Path& orig_name, const Path& new_name) override; Status link_file_impl(const Path& src, const Path& dest); + Status md5sum_impl(const Path& file, std::string* md5sum); + Status iterate_directory_impl(const std::string& dir, + const std::function& cb); + Status mtime_impl(const Path& file, time_t* m_time); + Status delete_and_create_directory_impl(const Path& dir); + Status get_space_info_impl(const Path& path, size_t* capacity, size_t* available); + Status resize_file_impl(const Path& file, size_t new_size); + Status copy_dirs_impl(const Path& src, const Path& dest); private: LocalFileSystem(Path&& root_path, std::string&& id = ""); diff --git a/be/src/io/fs/remote_file_system.cpp b/be/src/io/fs/remote_file_system.cpp index 755de9a263290f..9e4e777639479b 100644 --- a/be/src/io/fs/remote_file_system.cpp +++ b/be/src/io/fs/remote_file_system.cpp @@ -28,13 +28,7 @@ namespace io { Status RemoteFileSystem::upload(const Path& local_file, const Path& dest_file) { auto dest_path = absolute_path(dest_file); - if (bthread_self() == 0) { - return upload_impl(local_file, dest_path); - } - Status s; - auto task = [&] { s = upload_impl(local_file, dest_path); }; - AsyncIO::run_task(task, _type); - return s; + FILESYSTEM_M(upload_impl(local_file, dest_path)); } Status RemoteFileSystem::batch_upload(const std::vector& local_files, @@ -43,74 +37,38 @@ Status RemoteFileSystem::batch_upload(const std::vector& local_files, for (auto& path : remote_files) { remote_paths.push_back(absolute_path(path)); } - if (bthread_self() == 0) { - return batch_upload_impl(local_files, remote_paths); - } - Status s; - auto task = [&] { s = batch_upload_impl(local_files, remote_paths); }; - AsyncIO::run_task(task, _type); - return s; + FILESYSTEM_M(batch_upload_impl(local_files, remote_paths)); } Status RemoteFileSystem::direct_upload(const Path& remote_file, const std::string& content) { auto remote_path = absolute_path(remote_file); - if (bthread_self() == 0) { - return direct_upload_impl(remote_path, content); - } - Status s; - auto task = [&] { s = direct_upload_impl(remote_path, content); }; - AsyncIO::run_task(task, _type); - return s; + FILESYSTEM_M(direct_upload_impl(remote_path, content)); } Status RemoteFileSystem::upload_with_checksum(const Path& local_file, const Path& remote, const std::string& checksum) { auto remote_path = absolute_path(remote); - if (bthread_self() == 0) { - return upload_with_checksum_impl(local_file, remote_path, checksum); - } - Status s; - auto task = [&] { s = upload_with_checksum_impl(local_file, remote_path, checksum); }; - AsyncIO::run_task(task, _type); - return s; + FILESYSTEM_M(upload_with_checksum_impl(local_file, remote_path, checksum)); } Status RemoteFileSystem::download(const Path& remote_file, const Path& local) { auto remote_path = absolute_path(remote_file); - if (bthread_self() == 0) { - return download_impl(remote_path, local); - } - Status s; - auto task = [&] { s = download_impl(remote_path, local); }; - AsyncIO::run_task(task, _type); - return s; + FILESYSTEM_M(download_impl(remote_path, local)); } Status RemoteFileSystem::direct_download(const Path& remote_file, std::string* content) { auto remote_path = absolute_path(remote_file); - if (bthread_self() == 0) { - return direct_download_impl(remote_path, content); - } - Status s; - auto task = [&] { s = direct_download_impl(remote_path, content); }; - AsyncIO::run_task(task, _type); - return s; + FILESYSTEM_M(direct_download_impl(remote_path, content)); } Status RemoteFileSystem::connect() { - if (bthread_self() == 0) { - return connect_impl(); - } - Status s; - auto task = [&] { s = connect_impl(); }; - AsyncIO::run_task(task, _type); - return s; + FILESYSTEM_M(connect_impl()); } Status RemoteFileSystem::open_file_impl(const Path& path, const FileReaderOptions& reader_options, FileReaderSPtr* reader) { FileReaderSPtr raw_reader; - RETURN_IF_ERROR(open_file_internal(path, &raw_reader)); + RETURN_IF_ERROR(open_file_internal(path, reader_options.file_size, &raw_reader)); switch (reader_options.cache_type) { case io::FileCachePolicy::NO_CACHE: { *reader = raw_reader; diff --git a/be/src/io/fs/remote_file_system.h b/be/src/io/fs/remote_file_system.h index a62746afd52655..8626d0840a8055 100644 --- a/be/src/io/fs/remote_file_system.h +++ b/be/src/io/fs/remote_file_system.h @@ -71,7 +71,9 @@ class RemoteFileSystem : public FileSystem { virtual Status direct_download_impl(const Path& remote_file, std::string* content) = 0; // The derived class should implement this method. - virtual Status open_file_internal(const Path& file, FileReaderSPtr* reader) = 0; + // if file_size < 0, the file size should be fetched from file system + virtual Status open_file_internal(const Path& file, int64_t file_size, + FileReaderSPtr* reader) = 0; }; using RemoteFileSystemSPtr = std::shared_ptr; diff --git a/be/src/io/fs/s3_file_system.cpp b/be/src/io/fs/s3_file_system.cpp index d970c9dc4e6467..3236e3f8265a2d 100644 --- a/be/src/io/fs/s3_file_system.cpp +++ b/be/src/io/fs/s3_file_system.cpp @@ -70,9 +70,7 @@ Status S3FileSystem::create(S3Conf s3_conf, std::string id, std::shared_ptr( @@ -115,7 +116,7 @@ Status S3FileSystem::open_file_internal(const Path& file, FileReaderSPtr* reader return Status::OK(); } -Status S3FileSystem::create_directory_impl(const Path& dir) { +Status S3FileSystem::create_directory_impl(const Path& dir, bool failed_if_exists) { return Status::OK(); } @@ -244,7 +245,7 @@ Status S3FileSystem::exists_impl(const Path& path, bool* res) const { return Status::OK(); } -Status S3FileSystem::file_size_impl(const Path& file, size_t* file_size) const { +Status S3FileSystem::file_size_impl(const Path& file, int64_t* file_size) const { auto client = get_client(); CHECK_S3_CLIENT(client); diff --git a/be/src/io/fs/s3_file_system.h b/be/src/io/fs/s3_file_system.h index f97e9137ccd15c..0ae687e44dbbb2 100644 --- a/be/src/io/fs/s3_file_system.h +++ b/be/src/io/fs/s3_file_system.h @@ -32,6 +32,16 @@ class PooledThreadExecutor; namespace doris { namespace io { +// File system for S3 compatible object storage +// When creating S3FileSystem, all required info should be set in S3Conf, +// such as ak, sk, region, endpoint, bucket. +// And the root_path of S3FileSystem is s3_conf.prefix. +// When using S3FileSystem, it accepts 2 kinds of path: +// 1. Full path: s3://bucket/path/to/file.txt +// In this case, the root_path is not used. +// 2. only key: path/to/file.txt +// In this case, the final key will be "prefix + path/to/file.txt" +// // This class is thread-safe.(Except `set_xxx` method) class S3FileSystem final : public RemoteFileSystem { public: @@ -48,13 +58,13 @@ class S3FileSystem final : public RemoteFileSystem { protected: Status connect_impl() override; Status create_file_impl(const Path& file, FileWriterPtr* writer) override; - Status open_file_internal(const Path& file, FileReaderSPtr* reader) override; - Status create_directory_impl(const Path& dir) override; + Status open_file_internal(const Path& file, int64_t file_size, FileReaderSPtr* reader) override; + Status create_directory_impl(const Path& dir, bool failed_if_exists = false) override; Status delete_file_impl(const Path& file) override; Status delete_directory_impl(const Path& dir) override; Status batch_delete_impl(const std::vector& files) override; Status exists_impl(const Path& path, bool* res) const override; - Status file_size_impl(const Path& file, size_t* file_size) const override; + Status file_size_impl(const Path& file, int64_t* file_size) const override; Status list_impl(const Path& dir, bool only_file, std::vector* files, bool* exists) override; Status rename_impl(const Path& orig_name, const Path& new_name) override; @@ -70,8 +80,15 @@ class S3FileSystem final : public RemoteFileSystem { Status direct_download_impl(const Path& remote_file, std::string* content) override; Path absolute_path(const Path& path) const override { - // do nothing - return path; + if (path.string().find("://") != std::string::npos) { + // the path is with schema, which means this is a full path like: + // s3://bucket/path/to/file.txt + // so no need to concat with prefix + return path; + } else { + // path with no schema + return _root_path / path; + } } private: diff --git a/be/src/io/hdfs_builder.cpp b/be/src/io/hdfs_builder.cpp index b08b973860b462..8f3b765dbbd5fe 100644 --- a/be/src/io/hdfs_builder.cpp +++ b/be/src/io/hdfs_builder.cpp @@ -35,6 +35,7 @@ Status HDFSCommonBuilder::init_hdfs_builder() { return Status::InternalError( "failed to init HDFSCommonBuilder, please check check be/conf/hdfs-site.xml"); } + hdfsBuilderSetForceNewInstance(hdfs_builder); return Status::OK(); } @@ -53,7 +54,10 @@ Status HDFSCommonBuilder::run_kinit() { if (!rc) { return Status::InternalError("Kinit failed, errMsg: " + msg); } +#ifdef USE_LIBHDFS3 + hdfsBuilderSetPrincipal(hdfs_builder, hdfs_kerberos_principal.c_str()); hdfsBuilderSetKerbTicketCachePath(hdfs_builder, ticket_path.c_str()); +#endif return Status::OK(); } @@ -100,7 +104,6 @@ Status createHDFSBuilder(const THdfsParams& hdfsParams, HDFSCommonBuilder* build if (hdfsParams.__isset.hdfs_kerberos_principal) { builder->need_kinit = true; builder->hdfs_kerberos_principal = hdfsParams.hdfs_kerberos_principal; - hdfsBuilderSetPrincipal(builder->get(), hdfsParams.hdfs_kerberos_principal.c_str()); } if (hdfsParams.__isset.hdfs_kerberos_keytab) { builder->need_kinit = true; diff --git a/be/src/io/hdfs_builder.h b/be/src/io/hdfs_builder.h index ecc08d5a71fe66..b04b94a7e29077 100644 --- a/be/src/io/hdfs_builder.h +++ b/be/src/io/hdfs_builder.h @@ -17,9 +17,8 @@ #pragma once -#include - #include "common/status.h" +#include "io/fs/hdfs.h" #include "gen_cpp/PlanNodes_types.h" namespace doris { @@ -38,9 +37,12 @@ class HDFSCommonBuilder { public: HDFSCommonBuilder() {} ~HDFSCommonBuilder() { +#ifdef USE_LIBHDFS3 + // for hadoop hdfs, the hdfs_builder will be freed in hdfsConnect if (hdfs_builder != nullptr) { hdfsFreeBuilder(hdfs_builder); } +#endif } // Must call this to init hdfs_builder first. @@ -51,7 +53,7 @@ class HDFSCommonBuilder { Status run_kinit(); private: - hdfsBuilder* hdfs_builder; + hdfsBuilder* hdfs_builder = nullptr; bool need_kinit {false}; std::string hdfs_kerberos_keytab; std::string hdfs_kerberos_principal; diff --git a/be/src/olap/data_dir.cpp b/be/src/olap/data_dir.cpp index 8fd7d357ef4683..3fe2dcd7cc6dd0 100644 --- a/be/src/olap/data_dir.cpp +++ b/be/src/olap/data_dir.cpp @@ -40,8 +40,8 @@ #include #include -#include "env/env_util.h" #include "gutil/strings/substitute.h" +#include "io/fs/fs_utils.h" #include "io/fs/local_file_system.h" #include "io/fs/path.h" #include "olap/olap_define.h" @@ -52,7 +52,6 @@ #include "olap/utils.h" // for check_dir_existed #include "service/backend_options.h" #include "util/errno.h" -#include "util/file_utils.h" #include "util/string_util.h" using strings::Substitute; @@ -75,7 +74,6 @@ DataDir::DataDir(const std::string& path, int64_t capacity_bytes, TxnManager* txn_manager) : _path(path), _fs(io::LocalFileSystem::create(path)), - _capacity_bytes(capacity_bytes), _available_bytes(0), _disk_capacity_bytes(0), _storage_medium(storage_medium), @@ -105,14 +103,17 @@ DataDir::~DataDir() { } Status DataDir::init() { - if (!Env::Default()->path_exists(_path).ok()) { + bool exists = false; + RETURN_IF_ERROR(io::global_local_filesystem()->exists(_path, &exists)); + if (!exists) { RETURN_NOT_OK_STATUS_WITH_WARN(Status::IOError("opendir failed, path={}", _path), "check file exist failed"); } RETURN_NOT_OK_STATUS_WITH_WARN(update_capacity(), "update_capacity failed"); RETURN_NOT_OK_STATUS_WITH_WARN(_init_cluster_id(), "_init_cluster_id failed"); - RETURN_NOT_OK_STATUS_WITH_WARN(_init_capacity(), "_init_capacity failed"); + RETURN_NOT_OK_STATUS_WITH_WARN(_init_capacity_and_create_shards(), + "_init_capacity_and_create_shards failed"); RETURN_NOT_OK_STATUS_WITH_WARN(_init_meta(), "_init_meta failed"); _is_used = true; @@ -127,61 +128,46 @@ void DataDir::stop_bg_worker() { Status DataDir::_init_cluster_id() { auto cluster_id_path = fmt::format("{}/{}", _path, CLUSTER_ID_PREFIX); - RETURN_IF_ERROR(read_cluster_id(Env::Default(), cluster_id_path, &_cluster_id)); + RETURN_IF_ERROR(read_cluster_id(cluster_id_path, &_cluster_id)); if (_cluster_id == -1) { _cluster_id_incomplete = true; } return Status::OK(); } -Status DataDir::read_cluster_id(Env* env, const std::string& cluster_id_path, int32_t* cluster_id) { - std::unique_ptr input_file; - Status exist_status = env->path_exists(cluster_id_path); - if (exist_status.ok()) { - Status status = env->new_random_access_file(cluster_id_path, &input_file); - RETURN_NOT_OK_STATUS_WITH_WARN( - status, strings::Substitute("open file failed: $0, err=$1", cluster_id_path, - status.to_string())); +Status DataDir::read_cluster_id(const std::string& cluster_id_path, int32_t* cluster_id) { + bool exists = false; + RETURN_IF_ERROR(io::global_local_filesystem()->exists(cluster_id_path, &exists)); + if (exists) { std::string content; - RETURN_IF_ERROR(input_file->read_all(&content)); + RETURN_IF_ERROR( + io::read_file_to_string(io::global_local_filesystem(), cluster_id_path, &content)); if (content.size() > 0) { *cluster_id = std::stoi(content); } else { *cluster_id = -1; } - } else if (exist_status.is()) { - *cluster_id = -1; } else { - RETURN_NOT_OK_STATUS_WITH_WARN( - exist_status, strings::Substitute("check exist failed: $0, err=$1", cluster_id_path, - exist_status.to_string())); + *cluster_id = -1; } return Status::OK(); } -Status DataDir::_init_capacity() { - int64_t disk_capacity = -1; - int64_t available = -1; - RETURN_NOT_OK_STATUS_WITH_WARN( - Env::Default()->get_space_info(_path, &disk_capacity, &available), - strings::Substitute("get_space_info failed: $0", _path)); - if (_capacity_bytes == -1) { - _capacity_bytes = disk_capacity; - } else if (_capacity_bytes > disk_capacity) { - RETURN_NOT_OK_STATUS_WITH_WARN( - Status::InvalidArgument( - "root path {}'s capacity {} should not larger than disk capacity {}", _path, - _capacity_bytes, disk_capacity), - "init capacity failed"); - } - +Status DataDir::_init_capacity_and_create_shards() { + RETURN_IF_ERROR(io::global_local_filesystem()->get_space_info(_path, &_disk_capacity_bytes, + &_available_bytes)); auto data_path = fmt::format("{}/{}", _path, DATA_PREFIX); - Status exist_status = Env::Default()->path_exists(data_path); - if (!exist_status.ok() && - (!exist_status.is() || !Env::Default()->create_dirs(data_path).ok())) { - RETURN_NOT_OK_STATUS_WITH_WARN( - Status::IOError("failed to create data root path {}", data_path), - "create_dirs failed"); + bool exists = false; + RETURN_IF_ERROR(io::global_local_filesystem()->exists(data_path, &exists)); + if (!exists) { + RETURN_IF_ERROR(io::global_local_filesystem()->create_directory(data_path)); + } + for (int i = 0; i < MAX_SHARD_NUM; ++i) { + auto shard_path = fmt::format("{}/{}", data_path, i); + RETURN_IF_ERROR(io::global_local_filesystem()->exists(shard_path, &exists)); + if (!exists) { + RETURN_IF_ERROR(io::global_local_filesystem()->create_directory(shard_path)); + } } return Status::OK(); @@ -223,10 +209,13 @@ Status DataDir::set_cluster_id(int32_t cluster_id) { Status DataDir::_write_cluster_id_to_path(const std::string& path, int32_t cluster_id) { std::stringstream cluster_id_ss; cluster_id_ss << cluster_id; - std::unique_ptr wfile; - if (!Env::Default()->path_exists(path).ok()) { - RETURN_IF_ERROR(env_util::write_string_to_file_sync(Env::Default(), - Slice(cluster_id_ss.str()), path)); + bool exists = false; + RETURN_IF_ERROR(io::global_local_filesystem()->exists(path, &exists)); + if (!exists) { + io::FileWriterPtr file_writer; + RETURN_IF_ERROR(io::global_local_filesystem()->create_file(path, &file_writer)); + RETURN_IF_ERROR(file_writer->append(cluster_id_ss.str())); + RETURN_IF_ERROR(file_writer->close()); } return Status::OK(); } @@ -257,11 +246,6 @@ Status DataDir::get_shard(uint64_t* shard) { next_shard = _current_shard; _current_shard = (_current_shard + 1) % MAX_SHARD_NUM; } - auto shard_path = fmt::format("{}/{}/{}", _path, DATA_PREFIX, next_shard); - RETURN_WITH_WARN_IF_ERROR(Env::Default()->create_dirs(shard_path), - Status::Error(), - "fail to create path. path=" + shard_path); - *shard = next_shard; return Status::OK(); } @@ -299,17 +283,22 @@ std::string DataDir::get_absolute_tablet_path(int64_t shard_id, int64_t tablet_i void DataDir::find_tablet_in_trash(int64_t tablet_id, std::vector* paths) { // path: /root_path/trash/time_label/tablet_id/schema_hash auto trash_path = fmt::format("{}/{}", _path, TRASH_PREFIX); - std::vector sub_dirs; - FileUtils::list_files(Env::Default(), trash_path, &sub_dirs); + bool exists = true; + std::vector sub_dirs; + Status st = io::global_local_filesystem()->list(trash_path, false, &sub_dirs, &exists); + if (!st) { + return; + } + for (auto& sub_dir : sub_dirs) { // sub dir is time_label - auto sub_path = fmt::format("{}/{}", trash_path, sub_dir); - if (!FileUtils::is_dir(sub_path, Env::Default())) { + if (sub_dir.is_file) { continue; } + auto sub_path = fmt::format("{}/{}", trash_path, sub_dir.file_name); auto tablet_path = fmt::format("{}/{}", sub_path, tablet_id); - Status exist_status = Env::Default()->path_exists(tablet_path); - if (exist_status.ok()) { + st = io::global_local_filesystem()->exists(tablet_path, &exists); + if (st && exists) { paths->emplace_back(std::move(tablet_path)); } } @@ -641,27 +630,26 @@ void DataDir::perform_path_gc_by_rowsetid() { } // path producer -void DataDir::perform_path_scan() { +Status DataDir::perform_path_scan() { std::unique_lock lck(_check_path_mutex); if (!_all_check_paths.empty()) { LOG(INFO) << "_all_check_paths is not empty when path scan."; - return; + return Status::OK(); } LOG(INFO) << "start to scan data dir path:" << _path; - std::set shards; auto data_path = fmt::format("{}/{}", _path, DATA_PREFIX); + std::vector shards; + bool exists = true; + RETURN_IF_ERROR(io::global_local_filesystem()->list(data_path, false, &shards, &exists)); - Status ret = FileUtils::list_dirs_files(data_path, &shards, nullptr, Env::Default()); - if (!ret.ok()) { - LOG(WARNING) << "fail to walk dir. path=[" << data_path << "] error[" << ret.to_string() - << "]"; - return; - } - + Status ret; for (const auto& shard : shards) { - auto shard_path = fmt::format("{}/{}", data_path, shard); - std::set tablet_ids; - ret = FileUtils::list_dirs_files(shard_path, &tablet_ids, nullptr, Env::Default()); + if (shard.is_file) { + continue; + } + auto shard_path = fmt::format("{}/{}", data_path, shard.file_name); + std::vector tablet_ids; + ret = io::global_local_filesystem()->list(shard_path, false, &tablet_ids, &exists); if (!ret.ok()) { LOG(WARNING) << "fail to walk dir. [path=" << shard_path << "] error[" << ret.to_string() << "]"; @@ -671,11 +659,14 @@ void DataDir::perform_path_scan() { if (_stop_bg_worker) { break; } + if (tablet_id.is_file) { + continue; + } - auto tablet_id_path = fmt::format("{}/{}", shard_path, tablet_id); - std::set schema_hashes; - ret = FileUtils::list_dirs_files(tablet_id_path, &schema_hashes, nullptr, - Env::Default()); + auto tablet_id_path = fmt::format("{}/{}", shard_path, tablet_id.file_name); + std::vector schema_hashes; + ret = io::global_local_filesystem()->list(tablet_id_path, false, &schema_hashes, + &exists); if (!ret.ok()) { LOG(WARNING) << "fail to walk dir. [path=" << tablet_id_path << "]" << " error[" << ret.to_string() << "]"; @@ -683,6 +674,9 @@ void DataDir::perform_path_scan() { } for (const auto& schema_hash : schema_hashes) { + if (schema_hash.is_file) { + continue; + } int32_t interval_ms = config::path_scan_step_interval_ms; if (_stop_bg_worker) { break; @@ -690,20 +684,24 @@ void DataDir::perform_path_scan() { if (interval_ms > 0) { std::this_thread::sleep_for(std::chrono::milliseconds(interval_ms)); } - auto tablet_schema_hash_path = fmt::format("{}/{}", tablet_id_path, schema_hash); + auto tablet_schema_hash_path = + fmt::format("{}/{}", tablet_id_path, schema_hash.file_name); _all_tablet_schemahash_paths.insert(tablet_schema_hash_path); - std::set rowset_files; - ret = FileUtils::list_dirs_files(tablet_schema_hash_path, nullptr, &rowset_files, - Env::Default()); + std::vector rowset_files; + ret = io::global_local_filesystem()->list(tablet_schema_hash_path, true, + &rowset_files, &exists); if (!ret.ok()) { LOG(WARNING) << "fail to walk dir. [path=" << tablet_schema_hash_path << "] error[" << ret.to_string() << "]"; continue; } for (const auto& rowset_file : rowset_files) { + if (!rowset_file.is_file) { + continue; + } auto rowset_file_path = - fmt::format("{}/{}", tablet_schema_hash_path, rowset_file); + fmt::format("{}/{}", tablet_schema_hash_path, rowset_file.file_name); _all_check_paths.insert(rowset_file_path); } } @@ -712,15 +710,22 @@ void DataDir::perform_path_scan() { LOG(INFO) << "scan data dir path: " << _path << " finished. path size: " << _all_check_paths.size() + _all_tablet_schemahash_paths.size(); _check_path_cv.notify_one(); + return Status::OK(); } // This function is called for rowset_id path, only local rowset_id_path can be garbage. // remote path is uploaded, moved or deleted by tablet_id, // if local path has no remote path params, remote path doesn't exist. void DataDir::_process_garbage_path(const std::string& path) { - if (Env::Default()->path_exists(path).ok()) { + bool exists = false; + Status st = io::global_local_filesystem()->exists(path, &exists); + if (!st) { + return; + } + if (exists) { LOG(INFO) << "collect garbage dir path: " << path; - WARN_IF_ERROR(FileUtils::remove_all(path), "remove garbage dir failed. path: " + path); + WARN_IF_ERROR(io::global_local_filesystem()->delete_directory(path), + "remove garbage dir failed"); } } @@ -730,16 +735,8 @@ bool DataDir::_check_pending_ids(const std::string& id) { } Status DataDir::update_capacity() { - RETURN_NOT_OK_STATUS_WITH_WARN( - Env::Default()->get_space_info(_path, &_disk_capacity_bytes, &_available_bytes), - strings::Substitute("get_space_info failed: $0", _path)); - if (_disk_capacity_bytes < 0) { - _disk_capacity_bytes = _capacity_bytes; - } - if (_available_bytes < 0) { - _available_bytes = _capacity_bytes; - } - + RETURN_IF_ERROR(io::global_local_filesystem()->get_space_info(_path, &_disk_capacity_bytes, + &_available_bytes)); disks_total_capacity->set_value(_disk_capacity_bytes); disks_avail_capacity->set_value(_available_bytes); LOG(INFO) << "path: " << _path << " total capacity: " << _disk_capacity_bytes @@ -804,11 +801,11 @@ Status DataDir::move_to_trash(const std::string& tablet_path) { // 3. create target dir, or the rename() function will fail. auto trash_tablet_parent = trash_tablet_path.parent_path(); - if (!FileUtils::check_exist(trash_tablet_parent) && - !FileUtils::create_dir(trash_tablet_parent).ok()) { - LOG(WARNING) << "delete file failed. due to mkdir failed. [file=" << tablet_path - << " new_dir=" << trash_tablet_parent << "]"; - return Status::Error(); + // create dir if not exists + bool exists = true; + RETURN_IF_ERROR(io::global_local_filesystem()->exists(trash_tablet_parent, &exists)); + if (!exists) { + RETURN_IF_ERROR(io::global_local_filesystem()->create_directory(trash_tablet_parent)); } // 4. move tablet to trash @@ -821,16 +818,13 @@ Status DataDir::move_to_trash(const std::string& tablet_path) { // 5. check parent dir of source file, delete it when empty std::string source_parent_dir = fs_tablet_path.parent_path(); // tablet_id level - std::set sub_dirs, sub_files; - - RETURN_WITH_WARN_IF_ERROR( - FileUtils::list_dirs_files(source_parent_dir, &sub_dirs, &sub_files, Env::Default()), - Status::OK(), "access dir failed. [dir=" + source_parent_dir); - - if (sub_dirs.empty() && sub_files.empty()) { + std::vector sub_files; + RETURN_IF_ERROR( + io::global_local_filesystem()->list(source_parent_dir, false, &sub_files, &exists)); + if (sub_files.empty()) { LOG(INFO) << "remove empty dir " << source_parent_dir; // no need to exam return status - Env::Default()->delete_dir(source_parent_dir); + io::global_local_filesystem()->delete_directory(source_parent_dir); } return Status::OK(); diff --git a/be/src/olap/data_dir.h b/be/src/olap/data_dir.h index baa4bba93e90cc..f9aa644b911a46 100644 --- a/be/src/olap/data_dir.h +++ b/be/src/olap/data_dir.h @@ -25,10 +25,10 @@ #include #include "common/status.h" -#include "env/env.h" #include "gen_cpp/Types_types.h" #include "gen_cpp/olap_file.pb.h" #include "io/fs/file_system.h" +#include "io/fs/fs_utils.h" #include "olap/olap_common.h" #include "olap/rowset/rowset_id_generator.h" #include "util/metrics.h" @@ -84,7 +84,7 @@ class DataDir { bool is_ssd_disk() const { return _storage_medium == TStorageMedium::SSD; } - bool is_remote() const { return FilePathDesc::is_remote(_storage_medium); } + bool is_remote() const { return io::FilePathDesc::is_remote(_storage_medium); } TStorageMedium::type storage_medium() const { return _storage_medium; } @@ -109,7 +109,7 @@ class DataDir { // this function scans the paths in data dir to collect the paths to check // this is a producer function. After scan, it will notify the perform_path_gc function to gc - void perform_path_scan(); + Status perform_path_scan(); void perform_path_gc_by_rowsetid(); @@ -144,12 +144,12 @@ class DataDir { private: Status _init_cluster_id(); - Status _init_capacity(); + Status _init_capacity_and_create_shards(); Status _init_meta(); Status _check_disk(); Status _read_and_write_test_file(); - Status read_cluster_id(Env* env, const std::string& cluster_id_path, int32_t* cluster_id); + Status read_cluster_id(const std::string& cluster_id_path, int32_t* cluster_id); Status _write_cluster_id_to_path(const std::string& path, int32_t cluster_id); // Check whether has old format (hdr_ start) in olap. When doris updating to current version, // it may lead to data missing. When conf::storage_strict_check_incompatible_old_format is true, @@ -169,14 +169,10 @@ class DataDir { size_t _path_hash; io::FileSystemSPtr _fs; - // user specified capacity - int64_t _capacity_bytes; // the actual available capacity of the disk of this data dir - // NOTICE that _available_bytes may be larger than _capacity_bytes, if capacity is set - // by user, not the disk's actual capacity - int64_t _available_bytes; + size_t _available_bytes; // the actual capacity of the disk of this data dir - int64_t _disk_capacity_bytes; + size_t _disk_capacity_bytes; TStorageMedium::type _storage_medium; bool _is_used; diff --git a/be/src/olap/olap_common.h b/be/src/olap/olap_common.h index 3893525efc02be..4e7ad41a48752a 100644 --- a/be/src/olap/olap_common.h +++ b/be/src/olap/olap_common.h @@ -31,7 +31,6 @@ #include #include -#include "env/env.h" #include "gen_cpp/Types_types.h" #include "io/io_common.h" #include "olap/olap_define.h" diff --git a/be/src/olap/olap_server.cpp b/be/src/olap/olap_server.cpp index 69e4eae02d319c..55b6ef737bcf14 100644 --- a/be/src/olap/olap_server.cpp +++ b/be/src/olap/olap_server.cpp @@ -36,7 +36,6 @@ #include "olap/rowset/beta_rowset_writer.h" #include "olap/storage_engine.h" #include "service/point_query_executor.h" -#include "util/file_utils.h" #include "util/time.h" using std::string; @@ -338,7 +337,10 @@ void StorageEngine::_path_scan_thread_callback(DataDir* data_dir) { int32_t interval = config::path_scan_interval_second; do { LOG(INFO) << "try to perform path scan!"; - data_dir->perform_path_scan(); + Status st = data_dir->perform_path_scan(); + if (!st) { + LOG(WARNING) << "path scan failed: " << st; + } interval = config::path_scan_interval_second; if (interval <= 0) { diff --git a/be/src/olap/options.cpp b/be/src/olap/options.cpp index bf8ddb10da8f0f..1226ecd415b46d 100644 --- a/be/src/olap/options.cpp +++ b/be/src/olap/options.cpp @@ -24,9 +24,9 @@ #include "common/config.h" #include "common/logging.h" #include "common/status.h" -#include "env/env.h" #include "gutil/strings/split.h" #include "gutil/strings/substitute.h" +#include "io/fs/local_file_system.h" #include "olap/utils.h" #include "util/path_util.h" @@ -71,11 +71,7 @@ Status parse_root_path(const string& root_path, StorePath* path) { } string canonicalized_path; - Status status = Env::Default()->canonicalize(tmp_vec[0], &canonicalized_path); - if (!status.ok()) { - LOG(WARNING) << "path can not be canonicalized. may be not exist. path=" << tmp_vec[0]; - return Status::Error(); - } + RETURN_IF_ERROR(io::global_local_filesystem()->canonicalize(tmp_vec[0], &canonicalized_path)); path->path = tmp_vec[0]; // parse root path capacity and storage medium diff --git a/be/src/olap/rowset/beta_rowset.cpp b/be/src/olap/rowset/beta_rowset.cpp index a8cfb8e0224c33..513a04a8d1c798 100644 --- a/be/src/olap/rowset/beta_rowset.cpp +++ b/be/src/olap/rowset/beta_rowset.cpp @@ -21,11 +21,11 @@ #include #include // for remove() #include // for link() -#include #include "common/status.h" #include "gutil/strings/substitute.h" #include "io/cache/file_cache_manager.h" +#include "io/fs/fs_utils.h" #include "io/fs/s3_file_system.h" #include "olap/olap_define.h" #include "olap/rowset/beta_rowset_reader.h" @@ -117,7 +117,7 @@ Status BetaRowset::get_segments_size(std::vector* segments_size) { } for (int seg_id = 0; seg_id < num_segments(); ++seg_id) { auto seg_path = segment_file_path(seg_id); - size_t file_size; + int64_t file_size; RETURN_IF_ERROR(fs->file_size(seg_path, &file_size)); segments_size->push_back(file_size); } @@ -223,7 +223,6 @@ Status BetaRowset::link_files_to(const std::string& dir, RowsetId new_rowset_id, io::LocalFileSystem* local_fs = (io::LocalFileSystem*)fs.get(); for (int i = 0; i < num_segments(); ++i) { auto dst_path = segment_file_path(dir, new_rowset_id, i + new_rowset_start_seg_id); - // TODO(lingbin): use Env API? or EnvUtil? bool dst_path_exist = false; if (!fs->exists(dst_path, &dst_path_exist).ok() || dst_path_exist) { LOG(WARNING) << "failed to create hard link, file already exist: " << dst_path; @@ -267,23 +266,16 @@ Status BetaRowset::link_files_to(const std::string& dir, RowsetId new_rowset_id, Status BetaRowset::copy_files_to(const std::string& dir, const RowsetId& new_rowset_id) { DCHECK(is_local()); + bool exists = false; for (int i = 0; i < num_segments(); ++i) { auto dst_path = segment_file_path(dir, new_rowset_id, i); - Status status = Env::Default()->path_exists(dst_path); - if (status.ok()) { + RETURN_IF_ERROR(io::global_local_filesystem()->exists(dst_path, &exists)); + if (exists) { LOG(WARNING) << "file already exist: " << dst_path; return Status::Error(); } - if (!status.is()) { - LOG(WARNING) << "file check exist error: " << dst_path; - return Status::Error(); - } auto src_path = segment_file_path(i); - if (!Env::Default()->copy_path(src_path, dst_path).ok()) { - LOG(WARNING) << "fail to copy file. from=" << src_path << ", to=" << dst_path - << ", errno=" << Errno::no(); - return Status::Error(); - } + RETURN_IF_ERROR(io::global_local_filesystem()->copy_dirs(src_path, dst_path)); for (auto& column : _schema->columns()) { // if (column.has_inverted_index()) { const TabletIndex* index_meta = _schema->get_inverted_index(column.unique_id()); @@ -294,14 +286,8 @@ Status BetaRowset::copy_files_to(const std::string& dir, const RowsetId& new_row std::string inverted_index_dst_file_path = InvertedIndexDescriptor::get_index_file_name(dst_path, index_meta->index_id()); - if (!Env::Default() - ->copy_path(inverted_index_src_file_path, inverted_index_dst_file_path) - .ok()) { - LOG(WARNING) << "fail to copy file. from=" << inverted_index_src_file_path - << ", to=" << inverted_index_dst_file_path - << ", errno=" << Errno::no(); - return Status::Error(); - } + RETURN_IF_ERROR(io::global_local_filesystem()->copy_dirs( + inverted_index_src_file_path, inverted_index_dst_file_path)); LOG(INFO) << "success to copy file. from=" << inverted_index_src_file_path << ", " << "to=" << inverted_index_dst_file_path; } diff --git a/be/src/olap/rowset/beta_rowset_writer.cpp b/be/src/olap/rowset/beta_rowset_writer.cpp index ad3e7815734d20..d4138acb21e509 100644 --- a/be/src/olap/rowset/beta_rowset_writer.cpp +++ b/be/src/olap/rowset/beta_rowset_writer.cpp @@ -23,7 +23,6 @@ #include "common/config.h" #include "common/logging.h" -#include "env/env.h" #include "gutil/strings/substitute.h" #include "io/fs/file_writer.h" #include "olap/memtable.h" diff --git a/be/src/olap/rowset/rowset.h b/be/src/olap/rowset/rowset.h index ea7fcf55e18f86..c2ae5f1b055be9 100644 --- a/be/src/olap/rowset/rowset.h +++ b/be/src/olap/rowset/rowset.h @@ -22,7 +22,6 @@ #include #include -#include "env/env.h" #include "gen_cpp/olap_file.pb.h" #include "gutil/macros.h" #include "io/fs/remote_file_system.h" diff --git a/be/src/olap/rowset/segcompaction.cpp b/be/src/olap/rowset/segcompaction.cpp index 18f7121d111d50..cc151afdac1802 100644 --- a/be/src/olap/rowset/segcompaction.cpp +++ b/be/src/olap/rowset/segcompaction.cpp @@ -27,7 +27,6 @@ #include "beta_rowset_writer.h" #include "common/config.h" #include "common/logging.h" -#include "env/env.h" #include "gutil/strings/substitute.h" #include "io/fs/file_writer.h" #include "olap/memtable.h" diff --git a/be/src/olap/rowset/segment_v2/bitmap_index_writer.cpp b/be/src/olap/rowset/segment_v2/bitmap_index_writer.cpp index 7e37593b606fbd..dc819d84668e8e 100644 --- a/be/src/olap/rowset/segment_v2/bitmap_index_writer.cpp +++ b/be/src/olap/rowset/segment_v2/bitmap_index_writer.cpp @@ -20,7 +20,6 @@ #include #include -#include "env/env.h" #include "olap/rowset/segment_v2/common.h" #include "olap/rowset/segment_v2/encoding_info.h" #include "olap/rowset/segment_v2/indexed_column_writer.h" diff --git a/be/src/olap/rowset/segment_v2/column_reader.h b/be/src/olap/rowset/segment_v2/column_reader.h index 62601a1e6e65ad..e0e7bc4a6f980f 100644 --- a/be/src/olap/rowset/segment_v2/column_reader.h +++ b/be/src/olap/rowset/segment_v2/column_reader.h @@ -39,7 +39,6 @@ #include "olap/rowset/segment_v2/row_ranges.h" // for RowRanges #include "olap/rowset/segment_v2/zone_map_index.h" #include "olap/tablet_schema.h" -#include "util/file_cache.h" #include "util/once.h" #include "vec/columns/column_array.h" // ColumnArray diff --git a/be/src/olap/rowset/segment_v2/column_writer.cpp b/be/src/olap/rowset/segment_v2/column_writer.cpp index 89daa5a1f75225..2ffd3ee6b0bede 100644 --- a/be/src/olap/rowset/segment_v2/column_writer.cpp +++ b/be/src/olap/rowset/segment_v2/column_writer.cpp @@ -20,7 +20,6 @@ #include #include "common/logging.h" -#include "env/env.h" #include "gutil/strings/substitute.h" #include "io/fs/file_writer.h" #include "olap/rowset/segment_v2/bitmap_index_writer.h" diff --git a/be/src/olap/rowset/segment_v2/indexed_column_reader.h b/be/src/olap/rowset/segment_v2/indexed_column_reader.h index 79de411d6c77e9..10b87050aea9c6 100644 --- a/be/src/olap/rowset/segment_v2/indexed_column_reader.h +++ b/be/src/olap/rowset/segment_v2/indexed_column_reader.h @@ -21,7 +21,6 @@ #include #include "common/status.h" -#include "env/env.h" #include "gen_cpp/segment_v2.pb.h" #include "io/fs/file_reader.h" #include "io/fs/file_system.h" diff --git a/be/src/olap/rowset/segment_v2/indexed_column_writer.cpp b/be/src/olap/rowset/segment_v2/indexed_column_writer.cpp index 9f7202e8c89c81..f4dd245f67a60c 100644 --- a/be/src/olap/rowset/segment_v2/indexed_column_writer.cpp +++ b/be/src/olap/rowset/segment_v2/indexed_column_writer.cpp @@ -20,7 +20,6 @@ #include #include "common/logging.h" -#include "env/env.h" #include "olap/key_coder.h" #include "olap/rowset/segment_v2/encoding_info.h" #include "olap/rowset/segment_v2/index_page.h" diff --git a/be/src/olap/rowset/segment_v2/inverted_index_compound_directory.cpp b/be/src/olap/rowset/segment_v2/inverted_index_compound_directory.cpp index 26b509ff7f0c52..c3b474dfe1ab6e 100644 --- a/be/src/olap/rowset/segment_v2/inverted_index_compound_directory.cpp +++ b/be/src/olap/rowset/segment_v2/inverted_index_compound_directory.cpp @@ -411,7 +411,7 @@ void DorisCompoundDirectory::FSIndexOutput::close() { int64_t DorisCompoundDirectory::FSIndexOutput::length() const { CND_PRECONDITION(writer != nullptr, "file is not open"); - size_t ret; + int64_t ret; if (!writer->fs()->file_size(writer->path(), &ret).ok()) { return -1; } @@ -599,7 +599,7 @@ int64_t DorisCompoundDirectory::fileLength(const char* name) const { CND_PRECONDITION(directory[0] != 0, "directory is not open"); char buffer[CL_MAX_DIR]; priv_getFN(buffer, name); - size_t size = 0; + int64_t size = -1; RETURN_IF_ERROR(fs->file_size(buffer, &size)); return size; } diff --git a/be/src/olap/rowset/segment_v2/inverted_index_reader.h b/be/src/olap/rowset/segment_v2/inverted_index_reader.h index 583c092c18a1f6..81fc38aeb154f8 100644 --- a/be/src/olap/rowset/segment_v2/inverted_index_reader.h +++ b/be/src/olap/rowset/segment_v2/inverted_index_reader.h @@ -24,7 +24,6 @@ #include #include "common/status.h" -#include "env/env.h" #include "gen_cpp/segment_v2.pb.h" #include "gutil/macros.h" #include "io/fs/file_system.h" diff --git a/be/src/olap/rowset/segment_v2/ordinal_page_index.cpp b/be/src/olap/rowset/segment_v2/ordinal_page_index.cpp index 438a4849bf61f2..7e4914ccb84a44 100644 --- a/be/src/olap/rowset/segment_v2/ordinal_page_index.cpp +++ b/be/src/olap/rowset/segment_v2/ordinal_page_index.cpp @@ -18,7 +18,6 @@ #include "olap/rowset/segment_v2/ordinal_page_index.h" #include "common/logging.h" -#include "env/env.h" #include "io/fs/file_writer.h" #include "io/fs/local_file_system.h" #include "olap/key_coder.h" diff --git a/be/src/olap/rowset/segment_v2/ordinal_page_index.h b/be/src/olap/rowset/segment_v2/ordinal_page_index.h index f24cd6e1037a8e..d399b8db23dfea 100644 --- a/be/src/olap/rowset/segment_v2/ordinal_page_index.h +++ b/be/src/olap/rowset/segment_v2/ordinal_page_index.h @@ -22,7 +22,6 @@ #include #include "common/status.h" -#include "env/env.h" #include "gutil/macros.h" #include "io/fs/file_reader.h" #include "olap/rowset/segment_v2/common.h" diff --git a/be/src/olap/rowset/segment_v2/segment_iterator.h b/be/src/olap/rowset/segment_v2/segment_iterator.h index 64386f7b000a3d..6b27a3a875801a 100644 --- a/be/src/olap/rowset/segment_v2/segment_iterator.h +++ b/be/src/olap/rowset/segment_v2/segment_iterator.h @@ -31,7 +31,6 @@ #include "olap/rowset/segment_v2/row_ranges.h" #include "olap/rowset/segment_v2/segment.h" #include "olap/schema.h" -#include "util/file_cache.h" #include "vec/exprs/vexpr.h" namespace doris { diff --git a/be/src/olap/rowset/segment_v2/segment_writer.cpp b/be/src/olap/rowset/segment_v2/segment_writer.cpp index a0f4c169a05693..71b2939f18d005 100644 --- a/be/src/olap/rowset/segment_v2/segment_writer.cpp +++ b/be/src/olap/rowset/segment_v2/segment_writer.cpp @@ -19,7 +19,6 @@ #include "common/consts.h" #include "common/logging.h" // LOG -#include "env/env.h" // Env #include "io/fs/file_writer.h" #include "olap/data_dir.h" #include "olap/primary_key_index.h" diff --git a/be/src/olap/rowset/segment_v2/zone_map_index.h b/be/src/olap/rowset/segment_v2/zone_map_index.h index 5dc0acfb0ce23a..32005a0bd52a5e 100644 --- a/be/src/olap/rowset/segment_v2/zone_map_index.h +++ b/be/src/olap/rowset/segment_v2/zone_map_index.h @@ -22,7 +22,6 @@ #include #include "common/status.h" -#include "env/env.h" #include "gen_cpp/segment_v2.pb.h" #include "io/fs/file_reader.h" #include "olap/field.h" diff --git a/be/src/olap/snapshot_manager.cpp b/be/src/olap/snapshot_manager.cpp index df65038cf9d165..687f76a0ec97d0 100644 --- a/be/src/olap/snapshot_manager.cpp +++ b/be/src/olap/snapshot_manager.cpp @@ -29,8 +29,8 @@ #include #include "common/status.h" -#include "env/env.h" #include "gen_cpp/Types_constants.h" +#include "io/fs/local_file_system.h" #include "olap/rowset/rowset.h" #include "olap/rowset/rowset_factory.h" #include "olap/rowset/rowset_writer.h" @@ -98,16 +98,12 @@ Status SnapshotManager::release_snapshot(const string& snapshot_path) { auto stores = StorageEngine::instance()->get_stores(); for (auto store : stores) { std::string abs_path; - RETURN_WITH_WARN_IF_ERROR(Env::Default()->canonicalize(store->path(), &abs_path), - Status::Error(), - "canonical path " + store->path() + "failed"); - + RETURN_IF_ERROR(io::global_local_filesystem()->canonicalize(store->path(), &abs_path)); if (snapshot_path.compare(0, abs_path.size(), abs_path) == 0 && snapshot_path.compare(abs_path.size() + 1, SNAPSHOT_PREFIX.size(), SNAPSHOT_PREFIX) == 0) { - Env::Default()->delete_dir(snapshot_path); + RETURN_IF_ERROR(io::global_local_filesystem()->delete_directory(snapshot_path)); LOG(INFO) << "success to release snapshot path. [path='" << snapshot_path << "']"; - return Status::OK(); } } @@ -121,7 +117,9 @@ Status SnapshotManager::convert_rowset_ids(const std::string& clone_dir, int64_t SCOPED_CONSUME_MEM_TRACKER(_mem_tracker); Status res = Status::OK(); // check clone dir existed - if (!FileUtils::check_exist(clone_dir)) { + bool exists = true; + RETURN_IF_ERROR(io::global_local_filesystem()->exists(clone_dir, &exists)); + if (!exists) { res = Status::Error(); LOG(WARNING) << "clone dir not existed when convert rowsetids. clone_dir=" << clone_dir; return res; @@ -367,19 +365,16 @@ Status SnapshotManager::_create_snapshot_files(const TabletSharedPtr& ref_tablet auto header_path = _get_header_full_path(ref_tablet, schema_full_path); // /schema_full_path/tablet_id.hdr.json auto json_header_path = _get_json_header_full_path(ref_tablet, schema_full_path); - if (FileUtils::check_exist(schema_full_path)) { - VLOG_TRACE << "remove the old schema_full_path."; - FileUtils::remove_all(schema_full_path); + bool exists = true; + RETURN_IF_ERROR(io::global_local_filesystem()->exists(schema_full_path, &exists)); + if (exists) { + VLOG_TRACE << "remove the old schema_full_path." << schema_full_path; + RETURN_IF_ERROR(io::global_local_filesystem()->delete_directory(schema_full_path)); } - RETURN_WITH_WARN_IF_ERROR(FileUtils::create_dir(schema_full_path), - Status::Error(), - "create path " + schema_full_path + " failed"); - + RETURN_IF_ERROR(io::global_local_filesystem()->create_directory(schema_full_path)); string snapshot_id; - RETURN_WITH_WARN_IF_ERROR(FileUtils::canonicalize(snapshot_id_path, &snapshot_id), - Status::Error(), - "canonicalize path " + snapshot_id_path + " failed"); + RETURN_IF_ERROR(io::global_local_filesystem()->canonicalize(snapshot_id_path, &snapshot_id)); do { TabletMetaSharedPtr new_tablet_meta(new (nothrow) TabletMeta()); @@ -542,9 +537,11 @@ Status SnapshotManager::_create_snapshot_files(const TabletSharedPtr& ref_tablet LOG(WARNING) << "fail to make snapshot, try to delete the snapshot path. path=" << snapshot_id_path.c_str(); - if (FileUtils::check_exist(snapshot_id_path)) { + bool exists = true; + RETURN_IF_ERROR(io::global_local_filesystem()->exists(snapshot_id_path, &exists)); + if (exists) { VLOG_NOTICE << "remove snapshot path. [path=" << snapshot_id_path << "]"; - FileUtils::remove_all(snapshot_id_path); + RETURN_IF_ERROR(io::global_local_filesystem()->delete_directory(snapshot_id_path)); } } else { *snapshot_path = snapshot_id; diff --git a/be/src/olap/snapshot_manager.h b/be/src/olap/snapshot_manager.h index a7857c3ffb023b..982bf363758794 100644 --- a/be/src/olap/snapshot_manager.h +++ b/be/src/olap/snapshot_manager.h @@ -36,7 +36,6 @@ #include "olap/tablet.h" #include "olap/tablet_meta_manager.h" #include "util/doris_metrics.h" -#include "util/file_utils.h" namespace doris { diff --git a/be/src/olap/storage_engine.cpp b/be/src/olap/storage_engine.cpp index 8de3f181488ff0..154145d8234442 100644 --- a/be/src/olap/storage_engine.cpp +++ b/be/src/olap/storage_engine.cpp @@ -35,8 +35,7 @@ #include #include "agent/task_worker_pool.h" -#include "env/env.h" -#include "env/env_util.h" +#include "io/fs/local_file_system.h" #include "olap/base_compaction.h" #include "olap/cumulative_compaction.h" #include "olap/data_dir.h" @@ -52,7 +51,6 @@ #include "olap/tablet_meta_manager.h" #include "olap/utils.h" #include "util/doris_metrics.h" -#include "util/file_utils.h" #include "util/pretty_printer.h" #include "util/scoped_cleanup.h" #include "util/time.h" @@ -775,7 +773,9 @@ void StorageEngine::_clean_unused_txns() { Status StorageEngine::_do_sweep(const std::string& scan_root, const time_t& local_now, const int32_t expire) { Status res = Status::OK(); - if (!FileUtils::check_exist(scan_root)) { + bool exists = true; + RETURN_IF_ERROR(io::global_local_filesystem()->exists(scan_root, &exists)); + if (!exists) { // dir not existed. no need to sweep trash. return res; } @@ -808,11 +808,8 @@ Status StorageEngine::_do_sweep(const std::string& scan_root, const time_t& loca string path_name = sorted_path.string(); if (difftime(local_now, mktime(&local_tm_create)) >= actual_expire) { - Status ret = FileUtils::remove_all(path_name); - if (!ret.ok()) { - LOG(WARNING) << "fail to remove file or directory. path_desc: " << scan_root - << ", error=" << ret.to_string(); - res = Status::Error(); + res = io::global_local_filesystem()->delete_directory(path_name); + if (!res.ok()) { continue; } } else { diff --git a/be/src/olap/tablet_manager.cpp b/be/src/olap/tablet_manager.cpp index e501e17f39a33b..0d46ae5f8d59a8 100644 --- a/be/src/olap/tablet_manager.cpp +++ b/be/src/olap/tablet_manager.cpp @@ -25,7 +25,6 @@ #include #include -#include "env/env.h" #include "gutil/strings/strcat.h" #include "olap/base_compaction.h" #include "olap/cumulative_compaction.h" @@ -40,7 +39,6 @@ #include "runtime/thread_context.h" #include "service/backend_options.h" #include "util/doris_metrics.h" -#include "util/file_utils.h" #include "util/histogram.h" #include "util/path_util.h" #include "util/scoped_cleanup.h" @@ -399,15 +397,18 @@ TabletSharedPtr TabletManager::_create_tablet_meta_and_dir_unlocked( // Because the tablet is removed asynchronously, so that the dir may still exist when BE // receive create-tablet request again, For example retried schema-change request - if (FileUtils::check_exist(schema_hash_dir)) { + bool exists = true; + res = io::global_local_filesystem()->exists(schema_hash_dir, &exists); + if (!res.ok()) { + continue; + } + if (exists) { LOG(WARNING) << "skip this dir because tablet path exist, path=" << schema_hash_dir; continue; } else { data_dir->add_pending_ids(pending_id); - Status st = FileUtils::create_dir(schema_hash_dir); + Status st = io::global_local_filesystem()->create_directory(schema_hash_dir); if (!st.ok()) { - LOG(WARNING) << "create dir fail. path=" << schema_hash_dir - << " error=" << st.to_string(); continue; } } @@ -770,10 +771,14 @@ Status TabletManager::load_tablet_from_meta(DataDir* data_dir, TTabletId tablet_ // For case 2, If a tablet has just been copied to local BE, // it may be cleared by gc-thread(see perform_path_gc_by_tablet) because the tablet meta may not be loaded to memory. // So clone task should check path and then failed and retry in this case. - if (check_path && !Env::Default()->path_exists(tablet->tablet_path()).ok()) { - LOG(WARNING) << "tablet path not exists, create tablet failed, path=" - << tablet->tablet_path(); - return Status::Error(); + if (check_path) { + bool exists = true; + RETURN_IF_ERROR(io::global_local_filesystem()->exists(tablet->tablet_path(), &exists)); + if (!exists) { + LOG(WARNING) << "tablet path not exists, create tablet failed, path=" + << tablet->tablet_path(); + return Status::Error(); + } } if (tablet_meta->tablet_state() == TABLET_SHUTDOWN) { @@ -824,9 +829,10 @@ Status TabletManager::load_tablet_from_dir(DataDir* store, TTabletId tablet_id, TabletMeta::reset_tablet_uid(header_path), strings::Substitute("failed to set tablet uid when copied meta file. header_path=%0", header_path)); - ; - if (!Env::Default()->path_exists(header_path).ok()) { + bool exists = false; + RETURN_IF_ERROR(io::global_local_filesystem()->exists(header_path, &exists)); + if (!exists) { LOG(WARNING) << "fail to find header file. [header_path=" << header_path << "]"; return Status::Error(); } @@ -965,7 +971,12 @@ Status TabletManager::start_trash_sweep() { } // move data to trash const auto& tablet_path = (*it)->tablet_path(); - if (Env::Default()->path_exists(tablet_path).ok()) { + bool exists = false; + Status exists_st = io::global_local_filesystem()->exists(tablet_path, &exists); + if (!exists_st) { + continue; + } + if (exists) { // take snapshot of tablet meta auto meta_file_path = fmt::format("{}/{}.hdr", tablet_path, (*it)->tablet_id()); (*it)->tablet_meta()->save(meta_file_path); @@ -989,7 +1000,12 @@ Status TabletManager::start_trash_sweep() { } else { // if could not find tablet info in meta store, then check if dir existed const auto& tablet_path = (*it)->tablet_path(); - if (Env::Default()->path_exists(tablet_path).ok()) { + bool exists = false; + Status exists_st = io::global_local_filesystem()->exists(tablet_path, &exists); + if (!exists_st) { + continue; + } + if (exists) { LOG(WARNING) << "errors while load meta from store, skip this tablet. " << "tablet_id=" << (*it)->tablet_id() << ", schema_hash=" << (*it)->schema_hash(); @@ -1049,7 +1065,9 @@ void TabletManager::try_delete_unused_tablet_path(DataDir* data_dir, TTabletId t } // TODO(ygl): may do other checks in the future - if (Env::Default()->path_exists(schema_hash_path).ok()) { + bool exists = false; + Status exists_st = io::global_local_filesystem()->exists(schema_hash_path, &exists); + if (exists_st && exists) { LOG(INFO) << "start to move tablet to trash. tablet_path = " << schema_hash_path; Status rm_st = data_dir->move_to_trash(schema_hash_path); if (!rm_st.ok()) { diff --git a/be/src/olap/task/engine_clone_task.cpp b/be/src/olap/task/engine_clone_task.cpp index 1b4156f2388ad0..1689d545127e4a 100644 --- a/be/src/olap/task/engine_clone_task.cpp +++ b/be/src/olap/task/engine_clone_task.cpp @@ -21,7 +21,6 @@ #include #include -#include "env/env.h" #include "gen_cpp/BackendService.h" #include "gen_cpp/Types_constants.h" #include "gutil/strings/split.h" @@ -164,17 +163,11 @@ Status EngineCloneTask::_do_clone() { << " src_file_path: " << src_file_path; string header_path = TabletMeta::construct_header_file_path(tablet_dir, _clone_req.tablet_id); - status = TabletMeta::reset_tablet_uid(header_path); - if (!status.ok()) { - return status; - } - status = StorageEngine::instance()->tablet_manager()->load_tablet_from_dir( - store, _clone_req.tablet_id, _clone_req.schema_hash, tablet_dir, false); - if (!status.ok()) { - return status; - } + RETURN_IF_ERROR(TabletMeta::reset_tablet_uid(header_path)); + RETURN_IF_ERROR(StorageEngine::instance()->tablet_manager()->load_tablet_from_dir( + store, _clone_req.tablet_id, _clone_req.schema_hash, tablet_dir, false)); // clone success, delete .hdr file because tablet meta is stored in rocksdb - FileUtils::remove(header_path); + RETURN_IF_ERROR(io::global_local_filesystem()->delete_file(header_path)); } return _set_tablet_info(is_new_tablet); } @@ -366,8 +359,8 @@ Status EngineCloneTask::_download_files(DataDir* data_dir, const std::string& re // for example, BE clone from BE 1 to download file 1 with version (2,2), but clone from BE 1 failed // then it will try to clone from BE 2, but it will find the file 1 already exist, but file 1 with same // name may have different versions. - RETURN_IF_ERROR(FileUtils::remove_all(local_path)); - RETURN_IF_ERROR(FileUtils::create_dir(local_path)); + RETURN_IF_ERROR(io::global_local_filesystem()->delete_directory(local_path)); + RETURN_IF_ERROR(io::global_local_filesystem()->create_directory(local_path)); // Get remote dir file list string file_list_str; @@ -477,7 +470,9 @@ Status EngineCloneTask::_finish_clone(Tablet* tablet, const std::string& clone_d Defer remove_clone_dir {[&]() { std::filesystem::remove_all(clone_dir); }}; // check clone dir existed - if (!FileUtils::check_exist(clone_dir)) { + bool exists = true; + RETURN_IF_ERROR(io::global_local_filesystem()->exists(clone_dir, &exists)); + if (!exists) { return Status::InternalError("clone dir not existed. clone_dir={}", clone_dir); } @@ -489,28 +484,40 @@ Status EngineCloneTask::_finish_clone(Tablet* tablet, const std::string& clone_d RETURN_IF_ERROR(cloned_tablet_meta->create_from_file(cloned_tablet_meta_file)); // remove the cloned meta file - FileUtils::remove(cloned_tablet_meta_file); + RETURN_IF_ERROR(io::global_local_filesystem()->delete_file(cloned_tablet_meta_file)); // check all files in /clone and /tablet - set clone_files; - RETURN_IF_ERROR(FileUtils::list_dirs_files(clone_dir, nullptr, &clone_files, Env::Default())); + std::vector clone_files; + RETURN_IF_ERROR(io::global_local_filesystem()->list(clone_dir, true, &clone_files, &exists)); + std::unordered_set clone_file_names; + for (auto& file : clone_files) { + clone_file_names.insert(file.file_name); + } - set local_files; + std::vector local_files; const auto& tablet_dir = tablet->tablet_path(); - RETURN_IF_ERROR(FileUtils::list_dirs_files(tablet_dir, nullptr, &local_files, Env::Default())); + RETURN_IF_ERROR(io::global_local_filesystem()->list(tablet_dir, true, &local_files, &exists)); + std::unordered_set local_file_names; + for (auto& file : local_files) { + local_file_names.insert(file.file_name); + } Status status; std::vector linked_success_files; Defer remove_linked_files {[&]() { // clear linked files if errors happen if (!status.ok()) { - FileUtils::remove_paths(linked_success_files); + std::vector paths; + for (auto& file : linked_success_files) { + paths.emplace_back(file); + } + io::global_local_filesystem()->batch_delete(paths); } }}; /// Traverse all downloaded clone files in CLONE dir. /// If it does not exist in local tablet dir, link the file to local tablet dir /// And save all linked files in linked_success_files. - for (const string& clone_file : clone_files) { - if (local_files.find(clone_file) != local_files.end()) { + for (const string& clone_file : clone_file_names) { + if (local_file_names.find(clone_file) != local_file_names.end()) { VLOG_NOTICE << "find same file when clone, skip it. " << "tablet=" << tablet->full_name() << ", clone_file=" << clone_file; continue; @@ -518,10 +525,7 @@ Status EngineCloneTask::_finish_clone(Tablet* tablet, const std::string& clone_d auto from = fmt::format("{}/{}", clone_dir, clone_file); auto to = fmt::format("{}/{}", tablet_dir, clone_file); - if (link(from.c_str(), to.c_str()) != 0) { - status = Status::InternalError("failed to create hard link. from={}, to={}", from, to); - return status; - } + RETURN_IF_ERROR(io::global_local_filesystem()->link_file(from, to)); linked_success_files.emplace_back(std::move(to)); } diff --git a/be/src/olap/task/engine_storage_migration_task.cpp b/be/src/olap/task/engine_storage_migration_task.cpp index 43c6b4263d985b..43c593b509d607 100644 --- a/be/src/olap/task/engine_storage_migration_task.cpp +++ b/be/src/olap/task/engine_storage_migration_task.cpp @@ -200,12 +200,13 @@ Status EngineStorageMigrationTask::_migrate() { full_path = SnapshotManager::get_schema_hash_full_path(_tablet, shard_path); // if dir already exist then return err, it should not happen. // should not remove the dir directly, for safety reason. - if (FileUtils::check_exist(full_path)) { + bool exists = true; + RETURN_IF_ERROR(io::global_local_filesystem()->exists(full_path, &exists)); + if (exists) { return Status::AlreadyExist("schema hash path {} already exist, skip this path", full_path); } - - RETURN_IF_ERROR(FileUtils::create_dir(full_path)); + RETURN_IF_ERROR(io::global_local_filesystem()->create_directory(full_path)); } std::vector temp_consistent_rowsets(consistent_rowsets); @@ -269,7 +270,7 @@ Status EngineStorageMigrationTask::_migrate() { if (!res.ok()) { // we should remove the dir directly for avoid disk full of junk data, and it's safe to remove - FileUtils::remove_all(full_path); + io::global_local_filesystem()->delete_directory(full_path); } return res; } diff --git a/be/src/olap/utils.cpp b/be/src/olap/utils.cpp index 755f73c08ec799..1f19881aeed2a9 100644 --- a/be/src/olap/utils.cpp +++ b/be/src/olap/utils.cpp @@ -33,8 +33,6 @@ #include #include -#include "util/file_utils.h" - #ifdef DORIS_WITH_LZO #include #include @@ -44,7 +42,6 @@ #include "common/logging.h" #include "common/status.h" -#include "env/env.h" #include "gutil/strings/substitute.h" #include "io/fs/file_reader.h" #include "io/fs/file_writer.h" @@ -492,19 +489,14 @@ Status read_write_test_file(const string& test_file_path) { return io::global_local_filesystem()->delete_file(test_file_path); } -bool check_datapath_rw(const string& path) { - if (!FileUtils::check_exist(path)) return false; - string file_path = path + "/.read_write_test_file"; - try { - Status res = read_write_test_file(file_path); - return res.ok(); - } catch (...) { - // do nothing +Status check_datapath_rw(const string& path) { + bool exists = true; + RETURN_IF_ERROR(io::global_local_filesystem()->exists(path, &exists)); + if (!exists) { + return Status::IOError("path does not exist: {}", path); } - LOG(WARNING) << "error when try to read and write temp file under the data path and return " - "false. [path=" - << path << "]"; - return false; + string file_path = path + "/.read_write_test_file"; + return read_write_test_file(file_path); } __thread char Errno::_buf[BUF_SIZE]; ///< buffer instance diff --git a/be/src/olap/utils.h b/be/src/olap/utils.h index 5fb39e65c94d63..1ccaec98ee4c74 100644 --- a/be/src/olap/utils.h +++ b/be/src/olap/utils.h @@ -154,7 +154,7 @@ int operator-(const BinarySearchIterator& left, const BinarySearchIterator& righ // 不用sse4指令的crc32c的计算函数 unsigned int crc32c_lut(char const* b, unsigned int off, unsigned int len, unsigned int crc); -bool check_datapath_rw(const std::string& path); +Status check_datapath_rw(const std::string& path); Status read_write_test_file(const std::string& test_file_path); diff --git a/be/src/runtime/CMakeLists.txt b/be/src/runtime/CMakeLists.txt index 335d7becd6a3b6..c855e23f3f7a39 100644 --- a/be/src/runtime/CMakeLists.txt +++ b/be/src/runtime/CMakeLists.txt @@ -50,7 +50,6 @@ set(RUNTIME_FILES fragment_mgr.cpp load_path_mgr.cpp types.cpp - tmp_file_mgr.cc load_channel_mgr.cpp load_channel.cpp tablets_channel.cpp diff --git a/be/src/runtime/block_spill_manager.cpp b/be/src/runtime/block_spill_manager.cpp index 029e6b4e0d0dec..b627b5543f3ca4 100644 --- a/be/src/runtime/block_spill_manager.cpp +++ b/be/src/runtime/block_spill_manager.cpp @@ -21,8 +21,7 @@ #include #include -#include "env/env_posix.h" -#include "util/file_utils.h" +#include "io/fs/local_file_system.h" #include "util/time.h" #include "vec/core/block_spill_reader.h" #include "vec/core/block_spill_writer.h" @@ -35,19 +34,21 @@ BlockSpillManager::BlockSpillManager(const std::vector& paths) : _sto Status BlockSpillManager::init() { for (const auto& path : _store_paths) { auto dir = fmt::format("{}/{}", path.path, BLOCK_SPILL_GC_DIR); - if (!FileUtils::check_exist(dir)) { - RETURN_IF_ERROR(FileUtils::create_dir(dir)); + bool exists = true; + RETURN_IF_ERROR(io::global_local_filesystem()->exists(dir, &exists)); + if (!exists) { + RETURN_IF_ERROR(io::global_local_filesystem()->create_directory(dir)); } dir = fmt::format("{}/{}", path.path, BLOCK_SPILL_DIR); - if (!FileUtils::check_exist(dir)) { - RETURN_IF_ERROR(FileUtils::create_dir(dir)); + RETURN_IF_ERROR(io::global_local_filesystem()->exists(dir, &exists)); + if (!exists) { + RETURN_IF_ERROR(io::global_local_filesystem()->create_directory(dir)); } else { auto suffix = ToStringFromUnixMillis(UnixMillis()); auto gc_dir = fmt::format("{}/{}/{}", path.path, BLOCK_SPILL_GC_DIR, suffix); - if (Env::Default()->rename_dir(dir, gc_dir).ok()) { - RETURN_IF_ERROR(FileUtils::create_dir(dir)); - } + RETURN_IF_ERROR(io::global_local_filesystem()->rename_dir(dir, gc_dir)); + RETURN_IF_ERROR(io::global_local_filesystem()->create_directory(dir)); } } @@ -58,33 +59,36 @@ void BlockSpillManager::gc(int64_t max_file_count) { if (max_file_count < 1) { return; } + bool exists = true; int64_t count = 0; for (const auto& path : _store_paths) { std::string gc_root_dir = fmt::format("{}/{}", path.path, BLOCK_SPILL_GC_DIR); - std::set dirs; - auto st = FileUtils::list_dirs_files(gc_root_dir, &dirs, nullptr, Env::Default()); + std::vector dirs; + auto st = io::global_local_filesystem()->list(gc_root_dir, false, &dirs, &exists); if (!st.ok()) { continue; } for (const auto& dir : dirs) { - std::string abs_dir = fmt::format("{}/{}", gc_root_dir, dir); - - std::set files; - st = FileUtils::list_dirs_files(abs_dir, nullptr, &files, Env::Default()); + if (dir.is_file) { + continue; + } + std::string abs_dir = fmt::format("{}/{}", gc_root_dir, dir.file_name); + std::vector files; + st = io::global_local_filesystem()->list(abs_dir, true, &files, &exists); if (!st.ok()) { continue; } if (files.empty()) { - FileUtils::remove(abs_dir); + io::global_local_filesystem()->delete_directory(abs_dir); if (count++ == max_file_count) { return; } continue; } for (const auto& file : files) { - auto abs_file_path = fmt::format("{}/{}", abs_dir, file); - FileUtils::remove(abs_file_path); + auto abs_file_path = fmt::format("{}/{}", abs_dir, file.file_name); + io::global_local_filesystem()->delete_file(abs_file_path); if (count++ == max_file_count) { return; } @@ -129,4 +133,4 @@ void BlockSpillManager::remove(int64_t stream_id) { std::lock_guard l(lock_); id_to_file_paths_.erase(stream_id); } -} // namespace doris \ No newline at end of file +} // namespace doris diff --git a/be/src/runtime/exec_env.h b/be/src/runtime/exec_env.h index 226312f486dae1..be7ce3dce93db6 100644 --- a/be/src/runtime/exec_env.h +++ b/be/src/runtime/exec_env.h @@ -56,7 +56,6 @@ class ResultBufferMgr; class ResultQueueMgr; class TMasterInfo; class LoadChannelMgr; -class TmpFileMgr; class WebPageHandler; class StreamLoadExecutor; class RoutineLoadTaskExecutor; @@ -146,7 +145,6 @@ class ExecEnv { ResultCache* result_cache() { return _result_cache; } TMasterInfo* master_info() { return _master_info; } LoadPathMgr* load_path_mgr() { return _load_path_mgr; } - TmpFileMgr* tmp_file_mgr() { return _tmp_file_mgr; } BfdParser* bfd_parser() const { return _bfd_parser; } BrokerMgr* broker_mgr() const { return _broker_mgr; } BrpcClientCache* brpc_internal_client_cache() const { @@ -227,7 +225,6 @@ class ExecEnv { ResultCache* _result_cache = nullptr; TMasterInfo* _master_info = nullptr; LoadPathMgr* _load_path_mgr = nullptr; - TmpFileMgr* _tmp_file_mgr = nullptr; BfdParser* _bfd_parser = nullptr; BrokerMgr* _broker_mgr = nullptr; diff --git a/be/src/runtime/exec_env_init.cpp b/be/src/runtime/exec_env_init.cpp index 6570852fa663c6..344d3280bbbd06 100644 --- a/be/src/runtime/exec_env_init.cpp +++ b/be/src/runtime/exec_env_init.cpp @@ -43,7 +43,6 @@ #include "runtime/small_file_mgr.h" #include "runtime/stream_load/new_load_stream_mgr.h" #include "runtime/stream_load/stream_load_executor.h" -#include "runtime/tmp_file_mgr.h" #include "service/point_query_executor.h" #include "util/bfd_parser.h" #include "util/brpc_client_cache.h" @@ -123,7 +122,6 @@ Status ExecEnv::_init(const std::vector& store_paths) { config::query_cache_elasticity_size_mb); _master_info = new TMasterInfo(); _load_path_mgr = new LoadPathMgr(this); - _tmp_file_mgr = new TmpFileMgr(this); _bfd_parser = BfdParser::create(); _broker_mgr = new BrokerMgr(this); _load_channel_mgr = new LoadChannelMgr(); @@ -258,7 +256,6 @@ Status ExecEnv::_init_mem_env() { << ", origin config value: " << config::inverted_index_query_cache_limit; // 4. init other managers - RETURN_IF_ERROR(_tmp_file_mgr->init()); RETURN_IF_ERROR(_block_spill_mgr->init()); // 5. init chunk allocator @@ -338,7 +335,6 @@ void ExecEnv::_destroy() { SAFE_DELETE(_load_channel_mgr); SAFE_DELETE(_broker_mgr); SAFE_DELETE(_bfd_parser); - SAFE_DELETE(_tmp_file_mgr); SAFE_DELETE(_load_path_mgr); SAFE_DELETE(_master_info); SAFE_DELETE(_pipeline_task_scheduler); diff --git a/be/src/runtime/load_path_mgr.cpp b/be/src/runtime/load_path_mgr.cpp index 510512f2f5b21d..239aaffcd94f59 100644 --- a/be/src/runtime/load_path_mgr.cpp +++ b/be/src/runtime/load_path_mgr.cpp @@ -24,12 +24,11 @@ #include #include -#include "env/env.h" #include "gen_cpp/Types_types.h" +#include "io/fs/local_file_system.h" #include "olap/olap_define.h" #include "olap/storage_engine.h" #include "runtime/exec_env.h" -#include "util/file_utils.h" namespace doris { using namespace ErrorCode; @@ -61,7 +60,7 @@ Status LoadPathMgr::init() { // error log is saved in first root path _error_log_dir = _exec_env->store_paths()[0].path + "/" + ERROR_LOG_PREFIX; // check and make dir - RETURN_IF_ERROR(FileUtils::create_dir(_error_log_dir)); + RETURN_IF_ERROR(io::global_local_filesystem()->create_directory(_error_log_dir)); _idx = 0; _reserved_hours = std::max(config::load_data_reserve_hours, 1L); @@ -94,12 +93,10 @@ Status LoadPathMgr::allocate_dir(const std::string& db, const std::string& label path = _path_vec[_idx] + "/" + db + "/" + shard + "/" + label; _idx = (_idx + 1) % size; } - status = FileUtils::create_dir(path); + status = io::global_local_filesystem()->create_directory(path); if (LIKELY(status.ok())) { *prefix = path; return Status::OK(); - } else { - LOG(WARNING) << "create dir failed:" << path << ", error msg:" << status; } } @@ -141,10 +138,7 @@ Status LoadPathMgr::get_load_error_file_name(const std::string& db, const std::s } std::string shard_path = _error_log_dir + "/" + shard; // check and create shard path - Status status = FileUtils::create_dir(shard_path); - if (!status.ok()) { - LOG(WARNING) << "create error sub path failed. path=" << shard_path; - } + RETURN_IF_ERROR(io::global_local_filesystem()->create_directory(shard_path)); // add shard sub dir to file path ss << shard << "/" << ERROR_FILE_NAME << "_" << db << "_" << label << "_" << std::hex << fragment_instance_id.hi << "_" << fragment_instance_id.lo; @@ -165,7 +159,7 @@ void LoadPathMgr::process_path(time_t now, const std::string& path, int64_t rese return; } LOG(INFO) << "Going to remove path. path=" << path; - Status status = FileUtils::remove_all(path); + Status status = io::global_local_filesystem()->delete_directory(path); if (status.ok()) { LOG(INFO) << "Remove path success. path=" << path; } else { @@ -174,40 +168,44 @@ void LoadPathMgr::process_path(time_t now, const std::string& path, int64_t rese } void LoadPathMgr::clean_one_path(const std::string& path) { - Env* env = Env::Default(); - - std::vector dbs; - Status status = FileUtils::list_files(env, path, &dbs); - // path may not exist - if (!status.ok() && !status.is()) { - LOG(WARNING) << "scan one path to delete directory failed. path=" << path; + bool exists = true; + std::vector dbs; + Status st = io::global_local_filesystem()->list(path, false, &dbs, &exists); + if (!st) { return; } + Status status; time_t now = time(nullptr); for (auto& db : dbs) { - std::string db_dir = path + "/" + db; - std::vector sub_dirs; - status = FileUtils::list_files(env, db_dir, &sub_dirs); + if (db.is_file) { + continue; + } + std::string db_dir = path + "/" + db.file_name; + std::vector sub_dirs; + status = io::global_local_filesystem()->list(db_dir, false, &sub_dirs, &exists); if (!status.ok()) { - LOG(WARNING) << "scan db of trash dir failed, continue. dir=" << db_dir; + LOG(WARNING) << "scan db of trash dir failed: " << status; continue; } // delete this file for (auto& sub_dir : sub_dirs) { - std::string sub_path = db_dir + "/" + sub_dir; + if (sub_dir.is_file) { + continue; + } + std::string sub_path = db_dir + "/" + sub_dir.file_name; // for compatible - if (sub_dir.find(SHARD_PREFIX) == 0) { + if (sub_dir.file_name.find(SHARD_PREFIX) == 0) { // sub_dir starts with SHARD_PREFIX // process shard sub dir - std::vector labels; - Status status = FileUtils::list_files(env, sub_path, &labels); + std::vector labels; + status = io::global_local_filesystem()->list(sub_path, false, &labels, &exists); if (!status.ok()) { - LOG(WARNING) << "scan one path to delete directory failed. path=" << sub_path; + LOG(WARNING) << "scan one path to delete directory failed: " << status; continue; } for (auto& label : labels) { - std::string label_dir = sub_path + "/" + label; + std::string label_dir = sub_path + "/" + label.file_name; process_path(now, label_dir, config::load_data_reserve_hours); } } else { @@ -226,30 +224,33 @@ void LoadPathMgr::clean() { } void LoadPathMgr::clean_error_log() { - Env* env = Env::Default(); - time_t now = time(nullptr); - std::vector sub_dirs; - Status status = FileUtils::list_files(env, _error_log_dir, &sub_dirs); + bool exists = true; + std::vector sub_dirs; + Status status = io::global_local_filesystem()->list(_error_log_dir, false, &sub_dirs, &exists); if (!status.ok()) { - LOG(WARNING) << "scan error_log dir failed. dir=" << _error_log_dir; + LOG(WARNING) << "scan error_log dir failed: " << status; return; } for (auto& sub_dir : sub_dirs) { - std::string sub_path = _error_log_dir + "/" + sub_dir; + if (sub_dir.is_file) { + continue; + } + std::string sub_path = _error_log_dir + "/" + sub_dir.file_name; // for compatible - if (sub_dir.find(SHARD_PREFIX) == 0) { + if (sub_dir.file_name.find(SHARD_PREFIX) == 0) { // sub_dir starts with SHARD_PREFIX // process shard sub dir - std::vector error_log_files; - Status status = FileUtils::list_files(env, sub_path, &error_log_files); + std::vector error_log_files; + Status status = + io::global_local_filesystem()->list(sub_path, false, &error_log_files, &exists); if (!status.ok()) { - LOG(WARNING) << "scan one path to delete directory failed. path=" << sub_path; + LOG(WARNING) << "scan one path to delete directory failed: " << status; continue; } for (auto& error_log : error_log_files) { - std::string error_log_path = sub_path + "/" + error_log; + std::string error_log_path = sub_path + "/" + error_log.file_name; process_path(now, error_log_path, config::load_error_log_reserve_hours); } } else { diff --git a/be/src/runtime/runtime_state.cpp b/be/src/runtime/runtime_state.cpp index dcd06518e6ca71..9cad1a1f68ebbe 100644 --- a/be/src/runtime/runtime_state.cpp +++ b/be/src/runtime/runtime_state.cpp @@ -34,7 +34,6 @@ #include "runtime/load_path_mgr.h" #include "runtime/memory/mem_tracker.h" #include "runtime/runtime_filter_mgr.h" -#include "util/file_utils.h" #include "util/pretty_printer.h" #include "util/timezone_utils.h" #include "util/uid_util.h" diff --git a/be/src/runtime/runtime_state.h b/be/src/runtime/runtime_state.h index f5e118e3814eab..2496249d24ba50 100644 --- a/be/src/runtime/runtime_state.h +++ b/be/src/runtime/runtime_state.h @@ -41,7 +41,6 @@ class DateTimeValue; class MemTracker; class DataStreamRecvr; class ResultBufferMgr; -class TmpFileMgr; class BufferedBlockMgr; class RowDescriptor; class RuntimeFilterMgr; diff --git a/be/src/runtime/small_file_mgr.cpp b/be/src/runtime/small_file_mgr.cpp index 883cdbc5c818ea..cfa367bbc4d16e 100644 --- a/be/src/runtime/small_file_mgr.cpp +++ b/be/src/runtime/small_file_mgr.cpp @@ -23,13 +23,12 @@ #include #include "common/status.h" -#include "env/env.h" #include "gen_cpp/HeartbeatService.h" #include "gutil/strings/split.h" #include "http/http_client.h" +#include "io/fs/local_file_system.h" #include "runtime/exec_env.h" #include "util/doris_metrics.h" -#include "util/file_utils.h" #include "util/md5.h" #include "util/string_util.h" @@ -55,20 +54,20 @@ Status SmallFileMgr::init() { } Status SmallFileMgr::_load_local_files() { - RETURN_IF_ERROR(FileUtils::create_dir(_local_path)); + RETURN_IF_ERROR(io::global_local_filesystem()->create_directory(_local_path)); - auto scan_cb = [this](const char* file) { - if (is_dot_or_dotdot(file)) { + auto scan_cb = [this](const io::FileInfo& file) { + if (!file.is_file) { return true; } - auto st = _load_single_file(_local_path, file); + auto st = _load_single_file(_local_path, file.file_name); if (!st.ok()) { LOG(WARNING) << "load small file failed: " << st; } return true; }; - RETURN_IF_ERROR(Env::Default()->iterate_dir(_local_path, scan_cb)); + RETURN_IF_ERROR(io::global_local_filesystem()->iterate_directory(_local_path, scan_cb)); return Status::OK(); } @@ -87,7 +86,7 @@ Status SmallFileMgr::_load_single_file(const std::string& path, const std::strin } std::string file_md5; - RETURN_IF_ERROR(FileUtils::md5sum(path + "/" + file_name, &file_md5)); + RETURN_IF_ERROR(io::global_local_filesystem()->md5sum(path + "/" + file_name, &file_md5)); if (file_md5 != md5) { return Status::InternalError("Invalid md5 of file: {}", file_name); } @@ -129,11 +128,13 @@ Status SmallFileMgr::get_file(int64_t file_id, const std::string& md5, std::stri } Status SmallFileMgr::_check_file(const CacheEntry& entry, const std::string& md5) { - if (!FileUtils::check_exist(entry.path)) { - return Status::InternalError("file not exist"); + bool exists; + RETURN_IF_ERROR(io::global_local_filesystem()->exists(entry.path, &exists)); + if (!exists) { + return Status::InternalError("file not exist: {}", entry.path); } if (!iequal(md5, entry.md5)) { - return Status::InternalError("invalid MD5"); + return Status::InternalError("invalid MD5 of file: {}", entry.path); } return Status::OK(); } diff --git a/be/src/runtime/snapshot_loader.cpp b/be/src/runtime/snapshot_loader.cpp index 846ce688e273cb..50e0be587eff18 100644 --- a/be/src/runtime/snapshot_loader.cpp +++ b/be/src/runtime/snapshot_loader.cpp @@ -20,7 +20,6 @@ #include #include "common/logging.h" -#include "env/env.h" #include "gen_cpp/FrontendService.h" #include "gen_cpp/FrontendService_types.h" #include "gen_cpp/HeartbeatService_types.h" @@ -28,6 +27,7 @@ #include "gen_cpp/TPaloBrokerService.h" #include "io/fs/broker_file_system.h" #include "io/fs/hdfs_file_system.h" +#include "io/fs/local_file_system.h" #include "io/fs/s3_file_system.h" #include "io/hdfs_builder.h" #include "olap/snapshot_manager.h" @@ -35,7 +35,6 @@ #include "olap/tablet.h" #include "runtime/broker_mgr.h" #include "runtime/exec_env.h" -#include "util/file_utils.h" #include "util/s3_uri.h" #include "util/thrift_rpc_helper.h" @@ -70,7 +69,7 @@ Status SnapshotLoader::init(TStorageBackendType::type type, const std::string& l _remote_fs = std::move(fs); } else if (TStorageBackendType::type::BROKER == type) { std::shared_ptr fs; - RETURN_IF_ERROR(io::BrokerFileSystem::create(_broker_addr, _prop, 0, &fs)); + RETURN_IF_ERROR(io::BrokerFileSystem::create(_broker_addr, _prop, &fs)); _remote_fs = std::move(fs); } else { return Status::InternalError("Unknown storage tpye: {}", type); @@ -132,13 +131,8 @@ Status SnapshotLoader::upload(const std::map& src_to_d const std::string& local_file = *it; // calc md5sum of localfile std::string md5sum; - status = FileUtils::md5sum(src_path + "/" + local_file, &md5sum); - if (!status.ok()) { - std::stringstream ss; - ss << "failed to get md5sum of file: " << local_file << ": " << status; - LOG(WARNING) << ss.str(); - return Status::InternalError(ss.str()); - } + RETURN_IF_ERROR( + io::global_local_filesystem()->md5sum(src_path + "/" + local_file, &md5sum)); VLOG_CRITICAL << "get file checksum: " << local_file << ": " << md5sum; local_files_with_checksum.push_back(local_file + "." + md5sum); @@ -263,7 +257,8 @@ Status SnapshotLoader::download(const std::map& src_to } else { // check checksum std::string local_md5sum; - Status st = FileUtils::md5sum(local_path + "/" + remote_file, &local_md5sum); + Status st = io::global_local_filesystem()->md5sum( + local_path + "/" + remote_file, &local_md5sum); if (!st.ok()) { LOG(WARNING) << "failed to get md5sum of local file: " << remote_file << ". msg: " << st << ". download it"; @@ -305,13 +300,8 @@ Status SnapshotLoader::download(const std::map& src_to // 3. check md5 of the downloaded file std::string downloaded_md5sum; - status = FileUtils::md5sum(full_local_file, &downloaded_md5sum); - if (!status.ok()) { - std::stringstream ss; - ss << "failed to get md5sum of file: " << full_local_file << ", err: " << status; - LOG(WARNING) << ss.str(); - return Status::InternalError(ss.str()); - } + RETURN_IF_ERROR( + io::global_local_filesystem()->md5sum(full_local_file, &downloaded_md5sum)); VLOG_CRITICAL << "get downloaded file checksum: " << full_local_file << ": " << downloaded_md5sum; if (downloaded_md5sum != file_stat.md5) { @@ -531,6 +521,7 @@ Status SnapshotLoader::_get_tablet_id_and_schema_hash_from_file_path(const std:: Status SnapshotLoader::_check_local_snapshot_paths( const std::map& src_to_dest_path, bool check_src) { + bool res = true; for (const auto& pair : src_to_dest_path) { std::string path; if (check_src) { @@ -538,7 +529,9 @@ Status SnapshotLoader::_check_local_snapshot_paths( } else { path = pair.second; } - if (!FileUtils::is_dir(path)) { + + RETURN_IF_ERROR(io::global_local_filesystem()->is_directory(path, &res)); + if (!res) { std::stringstream ss; ss << "snapshot path is not directory or does not exist: " << path; LOG(WARNING) << ss.str(); @@ -551,12 +544,11 @@ Status SnapshotLoader::_check_local_snapshot_paths( Status SnapshotLoader::_get_existing_files_from_local(const std::string& local_path, std::vector* local_files) { - Status status = FileUtils::list_files(Env::Default(), local_path, local_files); - if (!status.ok()) { - std::stringstream ss; - ss << "failed to list files in local path: " << local_path << ", msg: " << status; - LOG(WARNING) << ss.str(); - return status; + bool exists = true; + std::vector files; + RETURN_IF_ERROR(io::global_local_filesystem()->list(local_path, true, &files, &exists)); + for (auto& file : files) { + local_files->push_back(file.file_name); } LOG(INFO) << "finished to list files in local path: " << local_path << ", file num: " << local_files->size(); diff --git a/be/src/runtime/snapshot_loader.h b/be/src/runtime/snapshot_loader.h index 2451c075182725..3a5ea5925bfade 100644 --- a/be/src/runtime/snapshot_loader.h +++ b/be/src/runtime/snapshot_loader.h @@ -34,7 +34,7 @@ namespace doris { struct FileStat { std::string name; std::string md5; - size_t size; + int64_t size; }; class ExecEnv; class StorageBackend; diff --git a/be/src/runtime/tmp_file_mgr.cc b/be/src/runtime/tmp_file_mgr.cc deleted file mode 100644 index 2a38b1663e050d..00000000000000 --- a/be/src/runtime/tmp_file_mgr.cc +++ /dev/null @@ -1,258 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, -// software distributed under the License is distributed on an -// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -// KIND, either express or implied. See the License for the -// specific language governing permissions and limitations -// under the License. - -#include "runtime/tmp_file_mgr.h" - -#include -#include -#include -#include -#include - -#include "olap/storage_engine.h" -#include "runtime/exec_env.h" -#include "util/debug_util.h" -#include "util/disk_info.h" -#include "util/filesystem_util.h" -#include "util/uid_util.h" - -using boost::algorithm::is_any_of; -using boost::algorithm::join; -using boost::algorithm::split; -using boost::algorithm::token_compress_on; -using std::filesystem::absolute; -using std::filesystem::path; -using boost::uuids::random_generator; - -using std::string; -using std::vector; - -namespace doris { - -DEFINE_GAUGE_METRIC_PROTOTYPE_3ARG(active_scratch_dirs, MetricUnit::NOUNIT, - "Metric to track active scratch directories"); - -const std::string _s_tmp_sub_dir_name = "doris-scratch"; -const uint64_t _s_available_space_threshold_mb = 1024; - -TmpFileMgr::TmpFileMgr(ExecEnv* exec_env) - : _exec_env(exec_env), _initialized(false), _dir_status_lock(), _tmp_dirs() { - INT_GAUGE_METRIC_REGISTER(DorisMetrics::instance()->server_entity(), active_scratch_dirs); -} - -TmpFileMgr::TmpFileMgr() { - INT_GAUGE_METRIC_REGISTER(DorisMetrics::instance()->server_entity(), active_scratch_dirs); -} - -TmpFileMgr::~TmpFileMgr() { - METRIC_DEREGISTER(DorisMetrics::instance()->server_entity(), active_scratch_dirs); -} - -Status TmpFileMgr::init() { - vector all_tmp_dirs; - for (auto& path : _exec_env->store_paths()) { - all_tmp_dirs.emplace_back(path.path); - } - return init_custom(all_tmp_dirs, true); -} - -Status TmpFileMgr::init_custom(const vector& tmp_dirs, bool one_dir_per_device) { - DCHECK(!_initialized); - if (tmp_dirs.empty()) { - LOG(WARNING) << "Running without spill to disk: no scratch directories provided."; - } - - vector is_tmp_dir_on_disk(DiskInfo::num_disks(), false); - // For each tmp directory, find the disk it is on, - // so additional tmp directories on the same disk can be skipped. - for (int i = 0; i < tmp_dirs.size(); ++i) { - std::filesystem::path tmp_path = - std::string_view(boost::trim_right_copy_if(tmp_dirs[i], is_any_of("/"))); - tmp_path = std::filesystem::absolute(tmp_path); - path scratch_subdir_path(tmp_path / _s_tmp_sub_dir_name); - // tmp_path must be a writable directory. - Status status = FileSystemUtil::verify_is_directory(tmp_path.string()); - if (!status.ok()) { - LOG(WARNING) << "Cannot use directory " << tmp_path.string() - << " for scratch: " << status; - continue; - } - // Find the disk id of tmp_path. Add the scratch directory if there isn't another - // directory on the same disk (or if we don't know which disk it is on). - int disk_id = DiskInfo::disk_id(tmp_path.c_str()); - if (!one_dir_per_device || disk_id < 0 || !is_tmp_dir_on_disk[disk_id]) { - uint64_t available_space; - RETURN_IF_ERROR( - FileSystemUtil::get_space_available(tmp_path.string(), &available_space)); - if (available_space < _s_available_space_threshold_mb * 1024 * 1024) { - LOG(WARNING) << "Filesystem containing scratch directory " << tmp_path - << " has less than " << _s_available_space_threshold_mb - << "MB available."; - } - // Create the directory, destroying if already present. If this succeeds, we will - // have an empty writable scratch directory. - status = FileSystemUtil::create_directory(scratch_subdir_path.string()); - if (status.ok()) { - if (disk_id >= 0) { - is_tmp_dir_on_disk[disk_id] = true; - } - LOG(INFO) << "Using scratch directory " << scratch_subdir_path.string() - << " on disk " << disk_id; - _tmp_dirs.push_back(Dir(scratch_subdir_path.string(), false)); - } else { - LOG(WARNING) << "Could not remove and recreate directory " - << scratch_subdir_path.string() << ": cannot use it for scratch. " - << "Error was: " << status; - } - } - } - - active_scratch_dirs->set_value(_tmp_dirs.size()); - - _initialized = true; - - if (_tmp_dirs.empty() && !tmp_dirs.empty()) { - LOG(ERROR) << "Running without spill to disk: could not use any scratch " - << "directories in list: " << join(tmp_dirs, ",") - << ". See previous warnings for information on causes."; - } - return Status::OK(); -} - -Status TmpFileMgr::get_file(const DeviceId& device_id, const TUniqueId& query_id, File** new_file) { - DCHECK(_initialized); - DCHECK_GE(device_id, 0); - DCHECK_LT(device_id, _tmp_dirs.size()); - if (is_blacklisted(device_id)) { - return Status::InternalError("path is blacklist. path: {}", _tmp_dirs[device_id].path()); - } - - // Generate the full file path. - string unique_name = boost::uuids::to_string(boost::uuids::random_generator()()); - std::stringstream file_name; - file_name << print_id(query_id) << "_" << unique_name; - path new_file_path(_tmp_dirs[device_id].path()); - new_file_path /= file_name.str(); - - *new_file = new File(this, device_id, new_file_path.string()); - return Status::OK(); -} - -string TmpFileMgr::get_tmp_dir_path(DeviceId device_id) const { - DCHECK(_initialized); - DCHECK_GE(device_id, 0); - DCHECK_LT(device_id, _tmp_dirs.size()); - return _tmp_dirs[device_id].path(); -} - -std::string TmpFileMgr::get_tmp_dir_path() { - std::vector devices = active_tmp_devices(); - std::random_device rd; - std::mt19937 g(rd()); - std::shuffle(devices.begin(), devices.end(), g); - return get_tmp_dir_path(devices.front()); -} - -void TmpFileMgr::blacklist_device(DeviceId device_id) { - DCHECK(_initialized); - DCHECK(device_id >= 0 && device_id < _tmp_dirs.size()); - bool added = true; - { - std::lock_guard l(_dir_status_lock); - added = _tmp_dirs[device_id].blacklist(); - } - if (added) { - active_scratch_dirs->increment(-1); - } -} - -bool TmpFileMgr::is_blacklisted(DeviceId device_id) { - DCHECK(_initialized); - DCHECK(device_id >= 0 && device_id < _tmp_dirs.size()); - std::lock_guard l(_dir_status_lock); - return _tmp_dirs[device_id].is_blacklisted(); -} - -int TmpFileMgr::num_active_tmp_devices() { - DCHECK(_initialized); - std::lock_guard l(_dir_status_lock); - int num_active = 0; - for (int device_id = 0; device_id < _tmp_dirs.size(); ++device_id) { - if (!_tmp_dirs[device_id].is_blacklisted()) { - ++num_active; - } - } - return num_active; -} - -vector TmpFileMgr::active_tmp_devices() { - vector devices; - // Allocate vector before we grab lock - devices.reserve(_tmp_dirs.size()); - { - std::lock_guard l(_dir_status_lock); - for (DeviceId device_id = 0; device_id < _tmp_dirs.size(); ++device_id) { - if (!_tmp_dirs[device_id].is_blacklisted()) { - devices.push_back(device_id); - } - } - } - return devices; -} - -TmpFileMgr::File::File(TmpFileMgr* mgr, DeviceId device_id, const string& path) - : _mgr(mgr), _path(path), _device_id(device_id), _current_size(0), _blacklisted(false) {} - -Status TmpFileMgr::File::allocate_space(int64_t write_size, int64_t* offset) { - DCHECK_GT(write_size, 0); - Status status; - if (_mgr->is_blacklisted(_device_id)) { - _blacklisted = true; - return Status::InternalError("path is blacklist. path: {}", _path); - } - if (_current_size == 0) { - // First call to AllocateSpace. Create the file. - status = FileSystemUtil::create_file(_path); - if (!status.ok()) { - report_io_error(status.to_string()); - return status; - } - _disk_id = DiskInfo::disk_id(_path.c_str()); - } - int64_t new_size = _current_size + write_size; - status = FileSystemUtil::resize_file(_path, new_size); - if (!status.ok()) { - report_io_error(status.to_string()); - return status; - } - *offset = _current_size; - _current_size = new_size; - return Status::OK(); -} - -void TmpFileMgr::File::report_io_error(const std::string& error_msg) { - LOG(ERROR) << "Error for temporary file '" << _path << "': " << error_msg; -} - -Status TmpFileMgr::File::remove() { - if (_current_size > 0) { - FileSystemUtil::remove_paths(vector(1, _path)); - } - return Status::OK(); -} - -} //namespace doris diff --git a/be/src/runtime/tmp_file_mgr.h b/be/src/runtime/tmp_file_mgr.h deleted file mode 100644 index 355f5e19ca5517..00000000000000 --- a/be/src/runtime/tmp_file_mgr.h +++ /dev/null @@ -1,183 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, -// software distributed under the License is distributed on an -// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -// KIND, either express or implied. See the License for the -// specific language governing permissions and limitations -// under the License. - -#pragma once - -#include "common/status.h" -#include "gen_cpp/Types_types.h" // for TUniqueId -#include "util/metrics.h" -#include "util/spinlock.h" - -namespace doris { - -class ExecEnv; - -// TmpFileMgr creates and manages temporary files and directories on the local -// filesystem. It can manage multiple temporary directories across multiple devices. -// TmpFileMgr ensures that at most one directory per device is used unless overridden -// for testing. GetFile() returns a File handle with a unique filename on a device. The -// client owns the File handle and can use it to expand the file. -// TODO: we could notify block managers about the failure so they can more take -// proactive action to avoid using the device. -class TmpFileMgr { -public: - // DeviceId is a unique identifier for a temporary device managed by TmpFileMgr. - // It is used as a handle for external classes to identify devices. - typedef int DeviceId; - - // File is a handle to a physical file in a temporary directory. Clients - // can allocate file space and remove files using AllocateSpace() and Remove(). - // Creation of the file is deferred until the first call to AllocateSpace(). - class File { - public: - ~File() { - // do nothing - } - - // Allocates 'write_size' bytes in this file for a new block of data. - // The file size is increased by a call to truncate() if necessary. - // The physical file is created on the first call to AllocateSpace(). - // Returns Status::OK()() and sets offset on success. - // Returns an error status if an unexpected error occurs. - // If an error status is returned, the caller can try a different temporary file. - Status allocate_space(int64_t write_size, int64_t* offset); - - // Called to notify TmpFileMgr that an IO error was encountered for this file - void report_io_error(const std::string& error_msg); - - // Delete the physical file on disk, if one was created. - // It is not valid to read or write to a file after calling Remove(). - Status remove(); - - const std::string& path() const { return _path; } - int disk_id() const { return _disk_id; } - bool is_blacklisted() const { return _blacklisted; } - - private: - friend class TmpFileMgr; - - // The name of the sub-directory that Impala created within each configured scratch - // directory. - const static std::string _s_tmp_sub_dir_name; - - // Space (in MB) that must ideally be available for writing on a scratch - // directory. A warning is issued if available space is less than this threshold. - const static uint64_t _s_available_space_threshold_mb; - - File(TmpFileMgr* mgr, DeviceId device_id, const std::string& path); - - // TmpFileMgr this belongs to. - TmpFileMgr* _mgr; - - // Path of the physical file in the filesystem. - std::string _path; - - // The temporary device this file is stored on. - DeviceId _device_id; - - // The id of the disk on which the physical file lies. - int _disk_id; - - // Current file size. Modified by AllocateSpace(). Size is 0 before file creation. - int64_t _current_size; - - // Set to true to indicate that file can't be expanded. This is useful to keep here - // even though it is redundant with the global per-device blacklisting in TmpFileMgr - // because it can be checked without acquiring a global lock. If a file is - // blacklisted, the corresponding device will always be blacklisted. - bool _blacklisted; - }; - - TmpFileMgr(ExecEnv* exec_env); - TmpFileMgr(); - - ~TmpFileMgr(); - - // Creates the configured tmp directories. If multiple directories are specified per - // disk, only one is created and used. Must be called after DiskInfo::Init(). - Status init(); - - // Custom initialization - initializes with the provided list of directories. - // If one_dir_per_device is true, only use one temporary directory per device. - // This interface is intended for testing purposes. - Status init_custom(const std::vector& tmp_dirs, bool one_dir_per_device); - - // Return a new File handle with a unique path for a query instance. The file path - // is within the (single) tmp directory on the specified device id. The caller owns - // the returned handle and is responsible for deleting it. The file is not created - - // creation is deferred until the first call to File::AllocateSpace(). - Status get_file(const DeviceId& device_id, const TUniqueId& query_id, File** new_file); - - // Return the scratch directory path for the device. - std::string get_tmp_dir_path(DeviceId device_id) const; - - // Return a random scratch directory path from the devices. - std::string get_tmp_dir_path(); - - // Total number of devices with tmp directories that are active. There is one tmp - // directory per device. - int num_active_tmp_devices(); - - // Return vector with device ids of all tmp devices being actively used. - // I.e. those that haven't been blacklisted. - std::vector active_tmp_devices(); - -private: - // Dir stores information about a temporary directory. - class Dir { - public: - const std::string& path() const { return _path; } - - // Return true if it was newly added to blacklist. - bool blacklist() { - bool was_blacklisted = _blacklisted; - _blacklisted = true; - return !was_blacklisted; - } - bool is_blacklisted() const { return _blacklisted; } - - private: - friend class TmpFileMgr; - - // path should be a absolute path to a writable scratch directory. - Dir(const std::string& path, bool blacklisted) : _path(path), _blacklisted(blacklisted) {} - - std::string _path; - - bool _blacklisted; - }; - - // Remove a device from the rotation. Subsequent attempts to allocate a file on that - // device will fail and the device will not be included in active tmp devices. - void blacklist_device(DeviceId device_id); - - bool is_blacklisted(DeviceId device_id); - - ExecEnv* _exec_env; - bool _initialized = false; - - // Protects the status of tmp dirs (i.e. whether they're blacklisted). - SpinLock _dir_status_lock; - - // The created tmp directories. - std::vector _tmp_dirs; - - // Metric to track active scratch directories. - IntGauge* active_scratch_dirs; -}; - -} // end namespace doris diff --git a/be/src/runtime/user_function_cache.cpp b/be/src/runtime/user_function_cache.cpp index 40381c0e5caa93..312e9ca1f2557f 100644 --- a/be/src/runtime/user_function_cache.cpp +++ b/be/src/runtime/user_function_cache.cpp @@ -23,11 +23,10 @@ #include "common/config.h" #include "common/status.h" -#include "env/env.h" #include "gutil/strings/split.h" #include "http/http_client.h" +#include "io/fs/local_file_system.h" #include "util/dynamic_util.h" -#include "util/file_utils.h" #include "util/jni-util.h" #include "util/md5.h" #include "util/spinlock.h" @@ -165,23 +164,24 @@ Status UserFunctionCache::_load_entry_from_lib(const std::string& dir, const std Status UserFunctionCache::_load_cached_lib() { // create library directory if not exist - RETURN_IF_ERROR(FileUtils::create_dir(_lib_dir)); + RETURN_IF_ERROR(io::global_local_filesystem()->create_directory(_lib_dir)); for (int i = 0; i < kLibShardNum; ++i) { std::string sub_dir = _lib_dir + "/" + std::to_string(i); - RETURN_IF_ERROR(FileUtils::create_dir(sub_dir)); + RETURN_IF_ERROR(io::global_local_filesystem()->create_directory(sub_dir)); - auto scan_cb = [this, &sub_dir](const char* file) { - if (is_dot_or_dotdot(file)) { + auto scan_cb = [this, &sub_dir](const io::FileInfo& file) { + if (!file.is_file) { return true; } - auto st = _load_entry_from_lib(sub_dir, file); + auto st = _load_entry_from_lib(sub_dir, file.file_name); if (!st.ok()) { - LOG(WARNING) << "load a library failed, dir=" << sub_dir << ", file=" << file; + LOG(WARNING) << "load a library failed, dir=" << sub_dir + << ", file=" << file.file_name; } return true; }; - RETURN_IF_ERROR(Env::Default()->iterate_dir(sub_dir, scan_cb)); + RETURN_IF_ERROR(io::global_local_filesystem()->iterate_directory(sub_dir, scan_cb)); } return Status::OK(); } diff --git a/be/src/service/doris_main.cpp b/be/src/service/doris_main.cpp index 7ddf9d57ba1859..b3fd6e07ac728c 100644 --- a/be/src/service/doris_main.cpp +++ b/be/src/service/doris_main.cpp @@ -46,7 +46,6 @@ #include "common/signal_handler.h" #include "common/status.h" #include "common/utils.h" -#include "env/env.h" #include "io/cache/block/block_file_cache_factory.h" #include "olap/options.h" #include "olap/storage_engine.h" diff --git a/be/src/service/single_replica_load_download_service.cpp b/be/src/service/single_replica_load_download_service.cpp index c557ce5b8d01f3..1cee6c677a7f67 100644 --- a/be/src/service/single_replica_load_download_service.cpp +++ b/be/src/service/single_replica_load_download_service.cpp @@ -19,6 +19,7 @@ #include "http/action/download_action.h" #include "http/ev_http_server.h" +#include "io/fs/fs_utils.h" #include "runtime/exec_env.h" namespace doris { @@ -33,7 +34,7 @@ Status SingleReplicaLoadDownloadService::start() { // register download action std::vector allow_paths; for (auto& path : _env->store_paths()) { - if (FilePathDesc::is_remote(path.storage_medium)) { + if (io::FilePathDesc::is_remote(path.storage_medium)) { continue; } allow_paths.emplace_back(path.path); diff --git a/be/src/tools/meta_tool.cpp b/be/src/tools/meta_tool.cpp index 1da3ef478755d2..377f2e85876b59 100644 --- a/be/src/tools/meta_tool.cpp +++ b/be/src/tools/meta_tool.cpp @@ -25,12 +25,13 @@ #include #include "common/status.h" -#include "env/env.h" #include "gen_cpp/olap_file.pb.h" #include "gen_cpp/segment_v2.pb.h" #include "gutil/strings/numbers.h" #include "gutil/strings/split.h" #include "gutil/strings/substitute.h" +#include "io/fs/file_reader.h" +#include "io/fs/local_file_system.h" #include "json2pb/pb_to_json.h" #include "olap/data_dir.h" #include "olap/olap_define.h" @@ -42,7 +43,6 @@ #include "olap/utils.h" #include "util/coding.h" #include "util/crc32c.h" -#include "util/file_utils.h" using std::filesystem::path; using doris::DataDir; @@ -50,9 +50,7 @@ using doris::OlapMeta; using doris::Status; using doris::TabletMeta; using doris::TabletMetaManager; -using doris::FileUtils; using doris::Slice; -using doris::RandomAccessFile; using strings::Substitute; using doris::segment_v2::SegmentFooterPB; using doris::segment_v2::ColumnReader; @@ -61,6 +59,7 @@ using doris::segment_v2::PagePointer; using doris::segment_v2::ColumnReaderOptions; using doris::segment_v2::ColumnIteratorOptions; using doris::segment_v2::PageFooterPB; +using doris::io::FileReaderSPtr; const std::string HEADER_PREFIX = "tabletmeta_"; @@ -143,12 +142,7 @@ void delete_meta(DataDir* data_dir) { Status init_data_dir(const std::string& dir, std::unique_ptr* ret) { std::string root_path; - Status st = FileUtils::canonicalize(dir, &root_path); - if (!st.ok()) { - std::cout << "invalid root path:" << FLAGS_root_path << ", error: " << st.to_string() - << std::endl; - return Status::InternalError("invalid root path"); - } + RETURN_IF_ERROR(doris::io::global_local_filesystem()->canonicalize(dir, &root_path)); doris::StorePath path; auto res = parse_root_path(root_path, &path); if (!res.ok()) { @@ -162,8 +156,8 @@ Status init_data_dir(const std::string& dir, std::unique_ptr* ret) { std::cout << "new data dir failed" << std::endl; return Status::InternalError("new data dir failed"); } - st = p->init(); - if (!st.ok()) { + res = p->init(); + if (!res.ok()) { std::cout << "data_dir load failed" << std::endl; return Status::InternalError("data_dir load failed"); } @@ -194,7 +188,7 @@ void batch_delete_meta(const std::string& tablet_file) { } // 1. get dir std::string dir; - Status st = FileUtils::canonicalize(v[0], &dir); + Status st = doris::io::global_local_filesystem()->canonicalize(v[0], &dir); if (!st.ok()) { std::cout << "invalid root dir in tablet_file: " << line << std::endl; err_num++; @@ -251,19 +245,18 @@ void batch_delete_meta(const std::string& tablet_file) { return; } -Status get_segment_footer(RandomAccessFile* input_file, SegmentFooterPB* footer) { +Status get_segment_footer(doris::io::FileReader* file_reader, SegmentFooterPB* footer) { // Footer := SegmentFooterPB, FooterPBSize(4), FooterPBChecksum(4), MagicNumber(4) - std::string file_name = input_file->file_name(); - uint64_t file_size; - RETURN_IF_ERROR(input_file->size(&file_size)); - + std::string file_name = file_reader->path(); + uint64_t file_size = file_reader->size(); if (file_size < 12) { return Status::Corruption("Bad segment file {}: file size {} < 12", file_name, file_size); } + size_t bytes_read = 0; uint8_t fixed_buf[12]; Slice slice(fixed_buf, 12); - RETURN_IF_ERROR(input_file->read_at(file_size - 12, &slice)); + RETURN_IF_ERROR(file_reader->read_at(file_size - 12, slice, &bytes_read)); // validate magic number const char* k_segment_magic = "D0R1"; @@ -281,7 +274,7 @@ Status get_segment_footer(RandomAccessFile* input_file, SegmentFooterPB* footer) std::string footer_buf; footer_buf.resize(footer_length); Slice slice2(footer_buf); - RETURN_IF_ERROR(input_file->read_at(file_size - 12 - footer_length, &slice2)); + RETURN_IF_ERROR(file_reader->read_at(file_size - 12 - footer_length, slice2, &bytes_read)); // validate footer PB's checksum uint32_t expect_checksum = doris::decode_fixed32_le(fixed_buf + 4); @@ -301,14 +294,14 @@ Status get_segment_footer(RandomAccessFile* input_file, SegmentFooterPB* footer) } void show_segment_footer(const std::string& file_name) { - std::unique_ptr input_file; - Status status = doris::Env::Default()->new_random_access_file(file_name, &input_file); + doris::io::FileReaderSPtr file_reader; + Status status = doris::io::global_local_filesystem()->open_file(file_name, &file_reader); if (!status.ok()) { - std::cout << "open file failed: " << status.to_string() << std::endl; + std::cout << "open file failed: " << status << std::endl; return; } SegmentFooterPB footer; - status = get_segment_footer(input_file.get(), &footer); + status = get_segment_footer(file_reader.get(), &footer); if (!status.ok()) { std::cout << "get footer failed: " << status.to_string() << std::endl; return; @@ -334,7 +327,7 @@ int main(int argc, char** argv) { show_meta(); } else if (FLAGS_operation == "batch_delete_meta") { std::string tablet_file; - Status st = FileUtils::canonicalize(FLAGS_tablet_file, &tablet_file); + Status st = doris::io::global_local_filesystem()->canonicalize(FLAGS_tablet_file, &tablet_file); if (!st.ok()) { std::cout << "invalid tablet file: " << FLAGS_tablet_file << ", error: " << st.to_string() << std::endl; diff --git a/be/src/util/CMakeLists.txt b/be/src/util/CMakeLists.txt index 524d5d372ef5dd..251cf325e25a8c 100644 --- a/be/src/util/CMakeLists.txt +++ b/be/src/util/CMakeLists.txt @@ -58,10 +58,8 @@ set(UTIL_FILES system_metrics.cpp url_parser.cpp url_coding.cpp - file_utils.cpp mysql_row_buffer.cpp error_util.cc - filesystem_util.cc time.cpp os_info.cpp os_util.cpp @@ -80,7 +78,6 @@ set(UTIL_FILES utf8_check.cpp cgroup_util.cpp path_util.cpp - file_cache.cpp thread.cpp threadpool.cpp trace.cpp diff --git a/be/src/util/cgroup_util.cpp b/be/src/util/cgroup_util.cpp index 0e3be6cde5f7db..15e5e8791cdff8 100644 --- a/be/src/util/cgroup_util.cpp +++ b/be/src/util/cgroup_util.cpp @@ -27,8 +27,8 @@ #include "gutil/strings/escaping.h" #include "gutil/strings/split.h" #include "gutil/strings/substitute.h" +#include "io/fs/local_file_system.h" #include "util/error_util.h" -#include "util/file_utils.h" #include "util/string_parser.hpp" using strings::CUnescape; @@ -224,7 +224,9 @@ std::string CGroupUtil::debug_string() { } bool CGroupUtil::enable() { - return FileUtils::check_exist("/proc/cgroups"); + bool exists = true; + Status st = io::global_local_filesystem()->exists("/proc/cgroups", &exists); + return st.ok() && exists; } } // namespace doris diff --git a/be/src/util/disk_info.cpp b/be/src/util/disk_info.cpp index 1dd016c3c5d9a1..b3c70706bbfd97 100644 --- a/be/src/util/disk_info.cpp +++ b/be/src/util/disk_info.cpp @@ -30,7 +30,7 @@ #include #include "gutil/strings/split.h" -#include "util/file_utils.h" +#include "io/fs/local_file_system.h" namespace doris { @@ -162,8 +162,11 @@ Status DiskInfo::get_disk_devices(const std::vector& paths, std::vector real_paths; for (auto& path : paths) { std::string p; - WARN_IF_ERROR(FileUtils::canonicalize(path, &p), - "canonicalize path " + path + " failed, skip disk monitoring of this path"); + Status st = io::global_local_filesystem()->canonicalize(path, &p); + if (!st.ok()) { + LOG(WARNING) << "skip disk monitoring of path. " << st; + continue; + } real_paths.emplace_back(std::move(p)); } diff --git a/be/src/util/disk_info_mac.cpp b/be/src/util/disk_info_mac.cpp index 0cbbfb40c06d44..1caa7cd8281272 100644 --- a/be/src/util/disk_info_mac.cpp +++ b/be/src/util/disk_info_mac.cpp @@ -29,7 +29,7 @@ #include #include "disk_info.h" -#include "file_utils.h" +#include "io/fs/local_file_system.h" namespace doris { @@ -134,8 +134,11 @@ Status DiskInfo::get_disk_devices(const std::vector& paths, std::vector real_paths; for (const auto& path : paths) { std::string p; - WARN_IF_ERROR(FileUtils::canonicalize(path, &p), - "canonicalize path " + path + " failed, skip disk monitoring of this path"); + Status st = io::global_local_filesystem()->canonicalize(path, &p); + if (!st.ok()) { + LOG(WARNING) << "skip disk monitoring of path. " << st; + continue; + } real_paths.emplace_back(std::move(p)); } diff --git a/be/src/util/doris_metrics.cpp b/be/src/util/doris_metrics.cpp index 2e36eed1242814..9c655aeed86a12 100644 --- a/be/src/util/doris_metrics.cpp +++ b/be/src/util/doris_metrics.cpp @@ -20,9 +20,8 @@ #include #include -#include "env/env.h" +#include "io/fs/local_file_system.h" #include "util/debug_util.h" -#include "util/file_utils.h" #include "util/system_metrics.h" namespace doris { @@ -345,9 +344,13 @@ void DorisMetrics::_update_process_thread_num() { ss << "/proc/" << pid << "/task/"; int64_t count = 0; - Status st = FileUtils::get_children_count(Env::Default(), ss.str(), &count); + auto cb = [&count](const io::FileInfo& file) -> bool { + count += 1; + return true; + }; + Status st = io::global_local_filesystem()->iterate_directory(ss.str(), cb); if (!st.ok()) { - LOG(WARNING) << "failed to count thread num from: " << ss.str(); + LOG(WARNING) << "failed to count thread num: " << st; process_thread_num->set_value(0); return; } @@ -363,9 +366,13 @@ void DorisMetrics::_update_process_fd_num() { std::stringstream ss; ss << "/proc/" << pid << "/fd/"; int64_t count = 0; - Status st = FileUtils::get_children_count(Env::Default(), ss.str(), &count); + auto cb = [&count](const io::FileInfo& file) -> bool { + count += 1; + return true; + }; + Status st = io::global_local_filesystem()->iterate_directory(ss.str(), cb); if (!st.ok()) { - LOG(WARNING) << "failed to count fd from: " << ss.str(); + LOG(WARNING) << "failed to count fd: " << st; process_fd_num_used->set_value(0); return; } diff --git a/be/src/util/file_cache.cpp b/be/src/util/file_cache.cpp deleted file mode 100644 index 0ffc51406be714..00000000000000 --- a/be/src/util/file_cache.cpp +++ /dev/null @@ -1,65 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, -// software distributed under the License is distributed on an -// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -// KIND, either express or implied. See the License for the -// specific language governing permissions and limitations -// under the License. - -#include "util/file_cache.h" - -#include "env/env.h" -#include "gutil/strings/substitute.h" - -namespace doris { - -template -FileCache::FileCache(const std::string& cache_name, int max_open_files) - : _cache_name(cache_name), - _cache(new_lru_cache(std::string("FileBlockManagerCache:") + cache_name, max_open_files)), - _is_cache_own(true) {} - -template -FileCache::FileCache(const std::string& cache_name, std::shared_ptr cache) - : _cache_name(cache_name), _cache(cache), _is_cache_own(false) {} - -template -bool FileCache::lookup(const std::string& file_name, - OpenedFileHandle* file_handle) { - DCHECK(_cache != nullptr); - CacheKey key(file_name); - auto lru_handle = _cache->lookup(key); - if (lru_handle == nullptr) { - return false; - } - *file_handle = OpenedFileHandle(_cache.get(), lru_handle); - return true; -} - -template -void FileCache::insert(const std::string& file_name, FileType* file, - OpenedFileHandle* file_handle, - void (*deleter)(const CacheKey&, void*)) { - DCHECK(_cache != nullptr); - if (!deleter) { - deleter = [](const CacheKey& key, void* value) { delete (FileType*)value; }; - } - CacheKey key(file_name); - auto lru_handle = _cache->insert(key, file, 1, deleter); - *file_handle = OpenedFileHandle(_cache.get(), lru_handle); -} - -// Explicit specialization for callers outside this compilation unit. -template class FileCache; -template class FileCache; - -} // namespace doris diff --git a/be/src/util/file_cache.h b/be/src/util/file_cache.h deleted file mode 100644 index 4ebe9139ef3891..00000000000000 --- a/be/src/util/file_cache.h +++ /dev/null @@ -1,152 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, -// software distributed under the License is distributed on an -// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -// KIND, either express or implied. See the License for the -// specific language governing permissions and limitations -// under the License. - -#pragma once - -#include -#include - -#include "olap/lru_cache.h" - -namespace doris { - -class Env; - -// A "smart" retrieved LRU cache handle. -// -// The cache handle is released when this object goes out of scope, possibly -// closing the opened file if it is no longer in the cache. -template -class OpenedFileHandle { -public: - OpenedFileHandle() : _cache(nullptr), _handle(nullptr) {} - - // A opened file handle - explicit OpenedFileHandle(Cache* cache, Cache::Handle* handle) - : _cache(cache), _handle(handle) {} - - // release cache handle - ~OpenedFileHandle() { - if (_handle != nullptr) { - _cache->release(_handle); - } - } - - OpenedFileHandle(OpenedFileHandle&& other) noexcept { - std::swap(_cache, other._cache); - std::swap(_handle, other._handle); - } - - OpenedFileHandle& operator=(OpenedFileHandle&& other) noexcept { - std::swap(_cache, other._cache); - std::swap(_handle, other._handle); - return *this; - } - - FileType* file() const { - DCHECK(_handle != nullptr); - return reinterpret_cast(_cache->value(_handle)); - } - -private: - Cache* _cache; - Cache::Handle* _handle; -}; - -// Cache of open files. -// -// The purpose of this cache is to enforce an upper bound on the maximum number -// of files open at a time. Files opened through the cache may be closed at any -// time, only to be reopened upon next use. -// -// The file cache can be viewed as having two logical parts: the client-facing -// File handle and the LRU cache. -// -// Client-facing API -// ----------------- -// The core of the client-facing API is the cache descriptor. A descriptor - -// LRU cache -// --------- -// The lower half of the file cache is a standard LRU cache whose keys are file -// names and whose values are pointers to opened file objects allocated on the -// heap. Unlike the descriptor map, this cache has an upper bound on capacity, -// and handles are evicted (and closed) according to an LRU algorithm. -// -// Whenever a descriptor is used by a client in file I/O, its file name is used -// in an LRU cache lookup. If found, the underlying file is still open and the -// file access is performed. Otherwise, the file must have been evicted and -// closed, so it is reopened and reinserted (possibly evicting a different open -// file) before the file access is performed. -// -// Every public method in the file cache is thread safe. -template -class FileCache { -public: - // Creates a new file cache. - // - // The 'cache_name' is used to disambiguate amongst other file cache - // instances. The cache will use 'max_open_files' as a soft upper bound on - // the number of files open at any given time. - // for this constructor, _is_cache_own is set to true, indicating that _cache - // is only owned by this. - FileCache(const std::string& cache_name, int max_open_files); - - // Creates a new file cache with given cache. - // - // The 'cache_name' is used to disambiguate amongst other file cache - // instances. Please use this constructor only you want to share _cache - // with other. - // for this constructor, _is_cache_own is set to false, indicating that _cache - // is sharing with other (In most case, sharing _cache with storage engine). - FileCache(const std::string& cache_name, std::shared_ptr cache); - - // Destroys the file cache. - ~FileCache() { - // If _cache is only owned by this, reset the shared_ptr of _cache. - if (_is_cache_own) { - _cache.reset(); - } - } - - // find whether the file has been cached - // if cached, return true and set the file_handle - // else return false - bool lookup(const std::string& file_name, OpenedFileHandle* file_handle); - - // insert new FileType* into lru cache - // and return file_handle - void insert(const std::string& file_name, FileType* file, - OpenedFileHandle* file_handle, - void (*deleter)(const CacheKey&, void*) = nullptr); - -private: - // Name of the cache. - std::string _cache_name; - - // Underlying cache instance. Caches opened files. - std::shared_ptr _cache; - - // Indicates weather _cache is only owned by this, - // generally, _cache can be shared by other, in - // this case, _is_cache_own is set to false. - bool _is_cache_own = false; - - DISALLOW_COPY_AND_ASSIGN(FileCache); -}; - -} // namespace doris diff --git a/be/src/util/file_utils.cpp b/be/src/util/file_utils.cpp deleted file mode 100644 index d5c95caadd2f0e..00000000000000 --- a/be/src/util/file_utils.cpp +++ /dev/null @@ -1,236 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, -// software distributed under the License is distributed on an -// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -// KIND, either express or implied. See the License for the -// specific language governing permissions and limitations -// under the License. - -#include "util/file_utils.h" - -#include -#include -#include -#include -#include - -#include -#include -#include -#include -#include - -#include "env/env.h" -#include "gutil/strings/split.h" -#include "gutil/strings/strip.h" -#include "gutil/strings/substitute.h" -#include "runtime/thread_context.h" -#include "util/defer_op.h" - -namespace doris { - -using strings::Substitute; - -Status FileUtils::create_dir(const std::string& path, Env* env) { - return env->create_dirs(path); -} - -Status FileUtils::create_dir(const std::string& dir_path) { - return create_dir(dir_path, Env::Default()); -} - -Status FileUtils::remove_all(const std::string& file_path) { - return Env::Default()->delete_dir(file_path); -} - -Status FileUtils::remove(const std::string& path) { - if (!Env::Default()->path_exists(path).ok()) { - LOG(WARNING) << "path does exist: " << path; - return Status::OK(); - } - bool is_dir; - RETURN_IF_ERROR(Env::Default()->is_directory(path, &is_dir)); - - if (is_dir) { - return Env::Default()->delete_dir(path); - } else { - return Env::Default()->delete_file(path); - } -} - -Status FileUtils::remove_paths(const std::vector& paths) { - for (const std::string& p : paths) { - RETURN_IF_ERROR(remove(p)); - } - return Status::OK(); -} - -Status FileUtils::list_files(Env* env, const std::string& dir, std::vector* files) { - auto cb = [files](const char* name) -> bool { - if (!is_dot_or_dotdot(name)) { - files->push_back(name); - } - return true; - }; - return env->iterate_dir(dir, cb); -} - -Status FileUtils::list_dirs_files(const std::string& path, std::set* dirs, - std::set* files, Env* env) { - auto cb = [path, dirs, files, env](const char* name) -> bool { - if (is_dot_or_dotdot(name)) { - return true; - } - - std::string temp_path = path + "/" + name; - bool is_dir; - - auto st = env->is_directory(temp_path, &is_dir); - if (st.ok()) { - if (is_dir) { - if (dirs != nullptr) { - dirs->insert(name); - } - } else if (files != nullptr) { - files->insert(name); - } - } else { - LOG(WARNING) << "check path " << path << "is directory error: " << st.to_string(); - } - - return true; - }; - - return env->iterate_dir(path, cb); -} - -Status FileUtils::get_children_count(Env* env, const std::string& dir, int64_t* count) { - auto cb = [count](const char* name) -> bool { - if (!is_dot_or_dotdot(name)) { - *count += 1; - } - return true; - }; - return env->iterate_dir(dir, cb); -} - -bool FileUtils::is_dir(const std::string& file_path, Env* env) { - bool ret; - if (env->is_directory(file_path, &ret).ok()) { - return ret; - } - - return false; -} - -bool FileUtils::is_dir(const std::string& path) { - return is_dir(path, Env::Default()); -} - -// Through proc filesystem -std::string FileUtils::path_of_fd(int fd) { - const int PATH_SIZE = 256; - char proc_path[PATH_SIZE]; - snprintf(proc_path, PATH_SIZE, "/proc/self/fd/%d", fd); - char path[PATH_SIZE]; - if (readlink(proc_path, path, PATH_SIZE) < 0) { - path[0] = '\0'; - } - return path; -} - -Status FileUtils::split_paths(const char* path, std::vector* path_vec) { - path_vec->clear(); - *path_vec = strings::Split(path, ";", strings::SkipWhitespace()); - - for (std::vector::iterator it = path_vec->begin(); it != path_vec->end();) { - StripWhiteSpace(&(*it)); - - it->erase(it->find_last_not_of("/") + 1); - if (it->size() == 0) { - it = path_vec->erase(it); - } else { - ++it; - } - } - - // Check if - std::sort(path_vec->begin(), path_vec->end()); - if (std::unique(path_vec->begin(), path_vec->end()) != path_vec->end()) { - return Status::InternalError("Same path in path.[path={}]", path); - } - - if (path_vec->size() == 0) { - return Status::InternalError("Size of vector after split is zero.[path={}]", path); - } - - return Status::OK(); -} - -Status FileUtils::copy_file(const std::string& src_path, const std::string& dest_path) { - return Env::Default()->copy_path(src_path, dest_path); -} - -Status FileUtils::md5sum(const std::string& file, std::string* md5sum) { - int fd = open(file.c_str(), O_RDONLY); - if (fd < 0) { - return Status::InternalError("failed to open file"); - } - - struct stat statbuf; - if (fstat(fd, &statbuf) < 0) { - close(fd); - return Status::InternalError("failed to stat file"); - } - size_t file_len = statbuf.st_size; - CONSUME_THREAD_MEM_TRACKER(file_len); - void* buf = mmap(0, file_len, PROT_READ, MAP_SHARED, fd, 0); - - unsigned char result[MD5_DIGEST_LENGTH]; - MD5((unsigned char*)buf, file_len, result); - munmap(buf, file_len); - RELEASE_THREAD_MEM_TRACKER(file_len); - - std::stringstream ss; - for (int32_t i = 0; i < MD5_DIGEST_LENGTH; i++) { - ss << std::setfill('0') << std::setw(2) << std::hex << (int)result[i]; - } - ss >> *md5sum; - - close(fd); - return Status::OK(); -} - -Status FileUtils::mtime(const std::string& file, time_t* m_time) { - int fd = open(file.c_str(), O_RDONLY); - if (fd < 0) { - return Status::InternalError("failed to open file"); - } - - Defer defer {[&]() { close(fd); }}; - struct stat statbuf; - if (fstat(fd, &statbuf) < 0) { - return Status::InternalError("failed to stat file"); - } - *m_time = statbuf.st_mtime; - return Status::OK(); -} - -bool FileUtils::check_exist(const std::string& path) { - return Env::Default()->path_exists(path).ok(); -} - -Status FileUtils::canonicalize(const std::string& path, std::string* real_path) { - return Env::Default()->canonicalize(path, real_path); -} - -} // namespace doris diff --git a/be/src/util/file_utils.h b/be/src/util/file_utils.h deleted file mode 100644 index a6419ff39b90b9..00000000000000 --- a/be/src/util/file_utils.h +++ /dev/null @@ -1,113 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, -// software distributed under the License is distributed on an -// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -// KIND, either express or implied. See the License for the -// specific language governing permissions and limitations -// under the License. - -#pragma once - -#include -#include -#include - -#include "common/status.h" -#include "gen_cpp/Types_types.h" - -namespace doris { - -class Env; - -// Return true if file is '.' or '..' -inline bool is_dot_or_dotdot(const char* name) { - return name[0] == '.' && (name[1] == '\0' || (name[1] == '.' && name[2] == '\0')); -} - -class FileUtils { -public: - // Create directory of dir_path with default Env, - // This function will create directory recursively, - // if dir's parent directory doesn't exist - // - // RETURNS: - // Status::OK() if create directory success or directory already exists - static Status create_dir(const std::string& dir_path); - - // Create directory of dir_path, - // This function will create directory recursively, - // if dir's parent directory doesn't exist - // - // RETURNS: - // Status::OK() if create directory success or directory already exists - static Status create_dir(const std::string& dir_path, Env* env); - - // Delete file recursively. - static Status remove_all(const std::string& dir_path); - - static Status remove(const std::string& path); - - static Status remove_paths(const std::vector& paths); - - // List all files in the specified directory without '.' and '..'. - // If you want retrieve all files, you can use Env::iterate_dir. - // All valid files will be stored in given *files. - static Status list_files(Env* env, const std::string& dir, std::vector* files); - - // List all dirs and files in the specified directory - static Status list_dirs_files(const std::string& path, std::set* dirs, - std::set* files, Env* env); - - // Get the number of children belong to the specified directory, this - // function also exclude '.' and '..'. - // Return OK with *count is set to the count, if execute successful. - static Status get_children_count(Env* env, const std::string& dir, int64_t* count); - - // Check the file_path is not exist with default env, or is not a dir, return false. - static bool is_dir(const std::string& file_path, Env* env); - - // If the file_path is not exist, or is not a dir, return false. - static bool is_dir(const std::string& file_path); - - // Get file path from fd - // Return - // file path of this fd referenced - // "" if this fd is invalid - static std::string path_of_fd(int fd); - - // split paths in configure file to path - // for example - // "/home/disk1/;/home/disk2" - // will split to ['/home/disk1', '/home/disk2'] - static Status split_paths(const char* path, std::vector* path_vec); - - // copy the file from src path to dest path, it will overwrite the existing files - static Status copy_file(const std::string& src_path, const std::string& dest_path); - - // calc md5sum of a local file - static Status md5sum(const std::string& file, std::string* md5sum); - - static Status mtime(const std::string& file, time_t* m_time); - - // check path(file or directory) exist with default env - static bool check_exist(const std::string& path); - - // Canonicalize 'path' by applying the following conversions: - // - Converts a relative path into an absolute one using the cwd. - // - Converts '.' and '..' references. - // - Resolves all symbolic links. - // - // All directory entries in 'path' must exist on the filesystem. - static Status canonicalize(const std::string& path, std::string* real_path); -}; - -} // namespace doris diff --git a/be/src/util/filesystem_util.cc b/be/src/util/filesystem_util.cc deleted file mode 100644 index 22c7df4e97c4bd..00000000000000 --- a/be/src/util/filesystem_util.cc +++ /dev/null @@ -1,191 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, -// software distributed under the License is distributed on an -// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -// KIND, either express or implied. See the License for the -// specific language governing permissions and limitations -// under the License. -// This file is copied from -// https://github.com/apache/impala/blob/branch-2.9.0/be/src/util/filesystem-util.cc -// and modified by Doris - -#include "util/filesystem_util.h" - -#include -#include -#include - -#include - -#include "util/error_util.h" - -using std::error_code; -using std::exception; -using std::string; -using std::vector; - -// std::filesystem functions must be given an errcode parameter to avoid the variants -// of those functions that throw exceptions. -namespace doris { - -Status FileSystemUtil::create_directory(const string& directory) { - error_code errcode; - bool exists = std::filesystem::exists(directory, errcode); - // Need to check for no_such_file_or_directory error case - Boost's exists() sometimes - // returns an error when it should simply return false. - if (errcode && errcode != std::errc::no_such_file_or_directory) { - return Status::InternalError("Encountered error checking existence of directory {}:{}", - directory, errcode.message()); - } - if (exists) { - // Attempt to remove the directory and its contents so that we can create a fresh - // empty directory that we will have permissions for. - std::filesystem::remove_all(directory, errcode); - if (errcode) { - return Status::InternalError("Encountered error removing directory {}:{}", directory, - errcode.message()); - } - } - std::filesystem::create_directories(directory, errcode); - if (errcode) { - return Status::InternalError("Encountered error creating directory {}:{}", directory, - errcode.message()); - } - return Status::OK(); -} - -Status FileSystemUtil::remove_paths(const vector& directories) { - for (int i = 0; i < directories.size(); ++i) { - error_code errcode; - std::filesystem::remove_all(directories[i], errcode); - if (errcode) { - return Status::InternalError("Encountered error removing directory {}:{}", - directories[i], errcode.message()); - } - } - - return Status::OK(); -} - -Status FileSystemUtil::create_file(const string& file_path) { - int fd = creat(file_path.c_str(), S_IRUSR | S_IWUSR); - - if (fd < 0) { - std::stringstream error_msg; - error_msg << "Create file " << file_path.c_str() << " failed with errno=" << errno - << "description=" << get_str_err_msg(); - return Status::InternalError(error_msg.str()); - } - - int success = close(fd); - if (success < 0) { - std::stringstream error_msg; - error_msg << "Close file " << file_path.c_str() << " failed with errno=" << errno - << " description=" << get_str_err_msg(); - return Status::InternalError(error_msg.str()); - } - - return Status::OK(); -} - -Status FileSystemUtil::resize_file(const string& file_path, int64_t trunc_len) { - int success = truncate(file_path.c_str(), trunc_len); - if (success != 0) { - std::stringstream error_msg; - error_msg << "Truncate file " << file_path << " to length " << trunc_len << " failed with " - << errno << " (" << get_str_err_msg() << ")"; - return Status::InternalError(error_msg.str()); - } - - return Status::OK(); -} - -Status FileSystemUtil::verify_is_directory(const string& directory_path) { - error_code errcode; - bool exists = std::filesystem::exists(directory_path, errcode); - if (errcode) { - return Status::InternalError( - "Encountered exception while verifying existence of directory path {}: {}", - directory_path, errcode.message()); - } - if (!exists) { - return Status::InternalError("Directory path {} does not exist.", directory_path); - } - bool is_dir = std::filesystem::is_directory(directory_path, errcode); - if (errcode) { - return Status::InternalError( - "Encountered exception while verifying existence of directory path {}: {}", - directory_path, errcode.message()); - } - if (!is_dir) { - return Status::InternalError("Path {} is not a directory", directory_path); - } - return Status::OK(); -} - -Status FileSystemUtil::get_space_available(const string& directory_path, - uint64_t* available_bytes) { - error_code errcode; - std::filesystem::space_info info = std::filesystem::space(directory_path, errcode); - if (errcode) { - return Status::InternalError( - "Encountered exception while checking available space for path {}: {}", - directory_path, errcode.message()); - } - *available_bytes = info.available; - return Status::OK(); -} - -uint64_t FileSystemUtil::max_num_file_handles() { - struct rlimit data; - if (getrlimit(RLIMIT_NOFILE, &data) == 0) { - return static_cast(data.rlim_cur); - } - return 0ul; -} - -// NOTE: the parent_path and sub_path can either dir or file. -// return true if patent_path == sub_path -bool FileSystemUtil::contain_path(const std::string& parent_path, const std::string& sub_path) { - std::filesystem::path parent(parent_path); - std::filesystem::path sub(sub_path); - parent = parent.lexically_normal(); - sub = sub.lexically_normal(); - if (parent == sub) { - return true; - } - - if (parent.filename() == ".") { - parent.remove_filename(); - } - - // We're also not interested in the file's name. - if (sub.has_filename()) { - sub.remove_filename(); - } - // If dir has more components than file, then file can't possibly reside in dir. - auto dir_len = std::distance(parent.begin(), parent.end()); - auto file_len = std::distance(sub.begin(), sub.end()); - if (dir_len > file_len) { - return false; - } - auto p_it = parent.begin(); - auto s_it = sub.begin(); - for (; p_it != parent.end() && !p_it->string().empty(); ++p_it, ++s_it) { - if (!(*p_it == *s_it)) { - return false; - } - } - return true; -} - -} // end namespace doris diff --git a/be/src/util/filesystem_util.h b/be/src/util/filesystem_util.h deleted file mode 100644 index ac29295e3eab41..00000000000000 --- a/be/src/util/filesystem_util.h +++ /dev/null @@ -1,62 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, -// software distributed under the License is distributed on an -// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -// KIND, either express or implied. See the License for the -// specific language governing permissions and limitations -// under the License. -// This file is copied from -// https://github.com/apache/impala/blob/branch-2.9.0/be/src/util/filesystem-util.h -// and modified by Doris - -#pragma once - -#include "common/status.h" - -namespace doris { - -// Utility class for common local file system operations such as file creation and -// deletion. This class should NOT be used to read or write data -// Errors are indicated by the status code RUNTIME_ERROR, and are not -// handled via exceptions. -class FileSystemUtil { -public: - // Create the specified directory and any ancestor directories that do not exist yet. - // The directory and its contents are destroyed if it already exists. - // Returns Status::OK() if successful, or a runtime error with a message otherwise. - static Status create_directory(const std::string& directory); - - // Create a file at the specified path. - static Status create_file(const std::string& file_path); - - // Resize a file to a specified length - uses unistd truncate(). - static Status resize_file(const std::string& file_path, int64_t trunc_len); - - // Remove the specified paths and their enclosing files/directories. - static Status remove_paths(const std::vector& directories); - - // Verify that the specified path is an existing directory. - // Returns Status::OK() if it is, or a runtime error with a message otherwise. - static Status verify_is_directory(const std::string& directory_path); - - // Returns the space available on the file system containing 'directory_path' - // in 'available_bytes' - static Status get_space_available(const std::string& directory_path, uint64_t* available_bytes); - - // Returns the currently allowed maximum of possible file descriptors. In case of an - // error returns 0. - static uint64_t max_num_file_handles(); - - static bool contain_path(const std::string& parent_path, const std::string& sub_path); -}; - -} // namespace doris diff --git a/be/src/util/hdfs_util.cpp b/be/src/util/hdfs_util.cpp index a5ad09ec01416b..d8be4b96c69ce9 100644 --- a/be/src/util/hdfs_util.cpp +++ b/be/src/util/hdfs_util.cpp @@ -21,6 +21,7 @@ #include "common/config.h" #include "common/logging.h" +#include "io/fs/err_utils.h" namespace doris { namespace io { @@ -34,7 +35,7 @@ hdfsFS HDFSHandle::create_hdfs_fs(HDFSCommonBuilder& hdfs_builder) { hdfsFS hdfs_fs = hdfsBuilderConnect(hdfs_builder.get()); if (hdfs_fs == nullptr) { LOG(WARNING) << "connect to hdfs failed." - << ", error: " << hdfsGetLastError(); + << ", error: " << hdfs_error(); return nullptr; } return hdfs_fs; diff --git a/be/src/util/hdfs_util.h b/be/src/util/hdfs_util.h index f98bdd5ab3687d..2e56181df7932e 100644 --- a/be/src/util/hdfs_util.h +++ b/be/src/util/hdfs_util.h @@ -17,13 +17,12 @@ #pragma once -#include - #include #include #include #include "common/status.h" +#include "io/fs/hdfs.h" #include "io/fs/path.h" #include "io/hdfs_builder.h" diff --git a/be/src/util/jni-util.cpp b/be/src/util/jni-util.cpp index 811738144e4ae9..b274ae08e813da 100644 --- a/be/src/util/jni-util.cpp +++ b/be/src/util/jni-util.cpp @@ -37,10 +37,10 @@ namespace doris { namespace { JavaVM* g_vm; -std::once_flag g_vm_once; +[[maybe_unused]] std::once_flag g_vm_once; const std::string GetDorisJNIClasspath() { - const auto* classpath = getenv("DORIS_JNI_CLASSPATH_PARAMETER"); + const auto* classpath = getenv("DORIS_CLASSPATH"); if (classpath) { return classpath; } else { @@ -66,12 +66,13 @@ const std::string GetDorisJNIClasspath() { } } -void FindOrCreateJavaVM() { +// Only used on non-x86 platform +[[maybe_unused]] void FindOrCreateJavaVM() { int num_vms; int rv = LibJVMLoader::JNI_GetCreatedJavaVMs(&g_vm, 1, &num_vms); if (rv == 0) { auto classpath = GetDorisJNIClasspath(); - std::string heap_size = fmt::format("-Xmx{}", config::jvm_max_heap_size); + std::string heap_size = fmt::format("-Xmx{}", "1024m"); std::string log_path = fmt::format("-DlogPath={}/log/udf-jdbc.log", getenv("DORIS_HOME")); std::string jvm_name = fmt::format("-Dsun.java.command={}", "DorisBE"); @@ -152,6 +153,7 @@ Status JniLocalFrame::push(JNIEnv* env, int max_local_ref) { Status JniUtil::GetJNIEnvSlowPath(JNIEnv** env) { DCHECK(!tls_env_) << "Call GetJNIEnv() fast path"; +#ifdef USE_LIBHDFS3 std::call_once(g_vm_once, FindOrCreateJavaVM); int rc = g_vm->GetEnv(reinterpret_cast(&tls_env_), JNI_VERSION_1_8); if (rc == JNI_EDETACHED) { @@ -160,6 +162,10 @@ Status JniUtil::GetJNIEnvSlowPath(JNIEnv** env) { if (rc != 0 || tls_env_ == nullptr) { return Status::InternalError("Unable to get JVM: {}", rc); } +#else + // the hadoop libhdfs will do all the stuff + tls_env_ = getJNIEnv(); +#endif *env = tls_env_; return Status::OK(); } @@ -219,7 +225,9 @@ Status JniUtil::LocalToGlobalRef(JNIEnv* env, jobject local_ref, jobject* global } Status JniUtil::Init() { +#ifdef USE_LIBHDFS3 RETURN_IF_ERROR(LibJVMLoader::instance().load()); +#endif // Get the JNIEnv* corresponding to current thread. JNIEnv* env; diff --git a/be/src/util/jni-util.h b/be/src/util/jni-util.h index 5aa8be9a1fc547..ec5f6abf6e142d 100644 --- a/be/src/util/jni-util.h +++ b/be/src/util/jni-util.h @@ -23,6 +23,11 @@ #include "gutil/macros.h" #include "util/thrift_util.h" +#ifdef USE_HADOOP_HDFS +// defined in hadoop_hdfs/hdfs.h +extern "C" JNIEnv* getJNIEnv(void); +#endif + namespace doris { #define RETURN_ERROR_IF_EXC(env) \ diff --git a/be/src/util/os_util.cpp b/be/src/util/os_util.cpp index 1c5215e59abfcc..b59b12bee9802f 100644 --- a/be/src/util/os_util.cpp +++ b/be/src/util/os_util.cpp @@ -31,13 +31,14 @@ #include #include -#include "env/env_util.h" #include "gutil/macros.h" #include "gutil/strings/numbers.h" #include "gutil/strings/split.h" #include "gutil/strings/stringpiece.h" #include "gutil/strings/substitute.h" #include "gutil/strings/util.h" +#include "io/fs/fs_utils.h" +#include "io/fs/local_file_system.h" #include "util/faststring.h" using std::string; @@ -108,9 +109,9 @@ Status get_thread_stats(int64_t tid, ThreadStats* stats) { return Status::NotSupported("ThreadStats not supported"); } std::string buf; - RETURN_IF_ERROR(env_util::read_file_to_string( - Env::Default(), strings::Substitute("/proc/self/task/$0/stat", tid), &buf)); - + RETURN_IF_ERROR(io::read_file_to_string(io::global_local_filesystem(), + strings::Substitute("/proc/self/task/$0/stat", tid), + &buf)); return parse_stat(buf, nullptr, stats); } void disable_core_dumps() { @@ -134,34 +135,4 @@ void disable_core_dumps() { } } -bool is_being_debugged() { -#ifndef __linux__ - return false; -#else - // Look for the TracerPid line in /proc/self/status. - // If this is non-zero, we are being ptraced, which is indicative of gdb or strace - // being attached. - std::string buf; - Status s = env_util::read_file_to_string(Env::Default(), "/proc/self/status", &buf); - if (!s.ok()) { - LOG(WARNING) << "could not read /proc/self/status: " << s.to_string(); - return false; - } - StringPiece buf_sp(buf.data(), buf.size()); - std::vector lines = Split(buf_sp, "\n"); - for (const auto& l : lines) { - if (!HasPrefixString(l, "TracerPid:")) continue; - std::pair key_val = Split(l, "\t"); - int64_t tracer_pid = -1; - if (!safe_strto64(key_val.second.data(), key_val.second.size(), &tracer_pid)) { - LOG(WARNING) << "Invalid line in /proc/self/status: " << l; - return false; - } - return tracer_pid != 0; - } - LOG(WARNING) << "Could not find TracerPid line in /proc/self/status"; - return false; -#endif // __linux__ -} - -} // namespace doris \ No newline at end of file +} // namespace doris diff --git a/be/src/util/os_util.h b/be/src/util/os_util.h index 850fb292ec8f80..42b3d61572c125 100644 --- a/be/src/util/os_util.h +++ b/be/src/util/os_util.h @@ -25,7 +25,6 @@ #include #include "common/status.h" -#include "env/env.h" namespace doris { @@ -60,8 +59,4 @@ Status get_thread_stats(int64_t tid, ThreadStats* stats); // want to generate a core dump from an "expected" crash. void disable_core_dumps(); -// Return true if this process appears to be running under a debugger or strace. -// -// This may return false on unsupported (non-Linux) platforms. -bool is_being_debugged(); } // namespace doris diff --git a/be/src/util/path_util.h b/be/src/util/path_util.h index c7389d04f0e4ba..1a1049fcbeb2e9 100644 --- a/be/src/util/path_util.h +++ b/be/src/util/path_util.h @@ -21,11 +21,13 @@ #include #include -#include "env/env.h" +#include "io/fs/fs_utils.h" namespace doris { namespace path_util { +using doris::io::FilePathDesc; + // NOTE: The methods here are only related to path processing, do not involve // any file and IO operations. diff --git a/be/src/util/pprof_utils.cpp b/be/src/util/pprof_utils.cpp index da908882974ff6..2025a0a4a7c796 100644 --- a/be/src/util/pprof_utils.cpp +++ b/be/src/util/pprof_utils.cpp @@ -21,7 +21,7 @@ #include "agent/utils.h" #include "gutil/strings/substitute.h" -#include "util/file_utils.h" +#include "io/fs/local_file_system.h" namespace doris { namespace config { @@ -112,7 +112,7 @@ Status PprofUtils::get_readable_profile(const std::string& file_or_content, bool bool rc = util.exec_cmd(final_cmd, &cmd_output, false); // delete raw file - FileUtils::remove(file_or_content); + io::global_local_filesystem()->delete_file(file_or_content); if (!rc) { return Status::InternalError("Failed to execute command: {}", cmd_output); @@ -134,7 +134,10 @@ Status PprofUtils::generate_flamegraph(int32_t sample_seconds, // check stackcollapse-perf.pl and flamegraph.pl exist std::string stackcollapse_perf_pl = flame_graph_tool_dir + "/stackcollapse-perf.pl"; std::string flamegraph_pl = flame_graph_tool_dir + "/flamegraph.pl"; - if (!FileUtils::check_exist(stackcollapse_perf_pl) || !FileUtils::check_exist(flamegraph_pl)) { + bool exists = false; + RETURN_IF_ERROR(io::global_local_filesystem()->exists(stackcollapse_perf_pl, &exists)); + RETURN_IF_ERROR(io::global_local_filesystem()->exists(flamegraph_pl, &exists)); + if (!exists) { return Status::InternalError( "Missing stackcollapse-perf.pl or flamegraph.pl in FlameGraph"); } @@ -152,7 +155,7 @@ Status PprofUtils::generate_flamegraph(int32_t sample_seconds, std::string cmd_output; bool rc = util.exec_cmd(cmd.str(), &cmd_output); if (!rc) { - FileUtils::remove(tmp_file.str()); + io::global_local_filesystem()->delete_file(tmp_file.str()); return Status::InternalError("Failed to execute perf command: {}", cmd_output); } @@ -168,8 +171,8 @@ Status PprofUtils::generate_flamegraph(int32_t sample_seconds, << " | " << flamegraph_pl << " > " << graph_file.str(); rc = util.exec_cmd(gen_cmd.str(), &res_content); if (!rc) { - FileUtils::remove(tmp_file.str()); - FileUtils::remove(graph_file.str()); + io::global_local_filesystem()->delete_file(tmp_file.str()); + io::global_local_filesystem()->delete_file(graph_file.str()); return Status::InternalError("Failed to execute perf script command: {}", res_content); } *svg_file_or_content = graph_file.str(); @@ -179,7 +182,7 @@ Status PprofUtils::generate_flamegraph(int32_t sample_seconds, << " | " << flamegraph_pl; rc = util.exec_cmd(gen_cmd.str(), &res_content, false); if (!rc) { - FileUtils::remove(tmp_file.str()); + io::global_local_filesystem()->delete_file(tmp_file.str()); return Status::InternalError("Failed to execute perf script command: {}", res_content); } *svg_file_or_content = res_content; diff --git a/be/src/util/s3_uri.cpp b/be/src/util/s3_uri.cpp index bbc664237c8580..c8f5350d12a6ec 100644 --- a/be/src/util/s3_uri.cpp +++ b/be/src/util/s3_uri.cpp @@ -29,8 +29,6 @@ const std::string S3URI::_SCHEME_DELIM = "://"; const std::string S3URI::_PATH_DELIM = "/"; const std::string S3URI::_QUERY_DELIM = "?"; const std::string S3URI::_FRAGMENT_DELIM = "#"; -const StringCaseSet S3URI::_VALID_SCHEMES = {"http", "https", "s3", "s3a", "s3n", - "bos", "oss", "cos", "obs"}; /// eg: /// s3://bucket1/path/to/file.txt @@ -42,24 +40,28 @@ Status S3URI::parse() { return Status::InvalidArgument("location is empty"); } std::vector scheme_split = strings::Split(_location, _SCHEME_DELIM); - if (scheme_split.size() != 2) { + std::string rest; + if (scheme_split.size() == 2) { + // has scheme, eg: s3://bucket1/path/to/file.txt + rest = scheme_split[1]; + std::vector authority_split = + strings::Split(rest, strings::delimiter::Limit(_PATH_DELIM, 1)); + if (authority_split.size() != 2) { + return Status::InvalidArgument("Invalid S3 URI: {}", _location); + } + _bucket = authority_split[0]; + _key = authority_split[1]; + } else if (scheme_split.size() == 1) { + // no scheme, eg: path/to/file.txt + _bucket = ""; // unknown + _key = _location; + } else { return Status::InvalidArgument("Invalid S3 URI: {}", _location); } - _scheme = scheme_split[0]; - if (_VALID_SCHEMES.find(_scheme) == _VALID_SCHEMES.end()) { - return Status::InvalidArgument("Invalid scheme: {}", _scheme); - } - std::vector authority_split = - strings::Split(scheme_split[1], strings::delimiter::Limit(_PATH_DELIM, 1)); - if (authority_split.size() != 2) { - return Status::InvalidArgument("Invalid S3 URI: {}", _location); - } - _key = authority_split[1]; StripWhiteSpace(&_key); if (_key.empty()) { return Status::InvalidArgument("Invalid S3 key: {}", _location); } - _bucket = authority_split[0]; // Strip query and fragment if they exist std::vector _query_split = strings::Split(_key, _QUERY_DELIM); std::vector _fragment_split = strings::Split(_query_split[0], _FRAGMENT_DELIM); @@ -68,10 +70,7 @@ Status S3URI::parse() { } std::string S3URI::to_string() const { - std::stringstream ss; - ss << "location: " << _location << ", bucket: " << _bucket << ", key: " << _key - << ", schema: " << _scheme; - return ss.str(); + return _location; } } // end namespace doris diff --git a/be/src/util/s3_uri.h b/be/src/util/s3_uri.h index de4bd05547549b..76ffea58247125 100644 --- a/be/src/util/s3_uri.h +++ b/be/src/util/s3_uri.h @@ -24,6 +24,13 @@ namespace doris { +// S3URI can handle following input: +// 1. s3://bucket_name/path/to/file.txt +// bucket: bucket_num +// key: path/to/file.txt +// 2. path/to/file.txt +// bucket: "" +// key: path/to/file.txt class S3URI { public: S3URI(const std::string& location) : _location(location) {} @@ -31,7 +38,6 @@ class S3URI { const std::string& get_bucket() const { return _bucket; } const std::string& get_key() const { return _key; } const std::string& get_location() const { return _location; } - const std::string& get_scheme() const { return _scheme; } std::string to_string() const; private: @@ -44,6 +50,5 @@ class S3URI { std::string _location; std::string _bucket; std::string _key; - std::string _scheme; }; } // end namespace doris diff --git a/be/src/util/s3_util.cpp b/be/src/util/s3_util.cpp index c797dcb2030812..1c01297548e22f 100644 --- a/be/src/util/s3_util.cpp +++ b/be/src/util/s3_util.cpp @@ -109,45 +109,6 @@ bool S3ClientFactory::is_s3_conf_valid(const std::map& return true; } -std::shared_ptr S3ClientFactory::create( - const std::map& prop) { - if (!is_s3_conf_valid(prop)) { - return nullptr; - } - StringCaseMap properties(prop.begin(), prop.end()); - Aws::Auth::AWSCredentials aws_cred(properties.find(S3_AK)->second, - properties.find(S3_SK)->second); - DCHECK(!aws_cred.IsExpiredOrEmpty()); - if (properties.find(S3_TOKEN) != properties.end()) { - aws_cred.SetSessionToken(properties.find(S3_TOKEN)->second); - } - - Aws::Client::ClientConfiguration aws_config; - aws_config.endpointOverride = properties.find(S3_ENDPOINT)->second; - aws_config.region = properties.find(S3_REGION)->second; - if (properties.find(S3_MAX_CONN_SIZE) != properties.end()) { - aws_config.maxConnections = std::atoi(properties.find(S3_MAX_CONN_SIZE)->second.c_str()); - } - if (properties.find(S3_REQUEST_TIMEOUT_MS) != properties.end()) { - aws_config.requestTimeoutMs = - std::atoi(properties.find(S3_REQUEST_TIMEOUT_MS)->second.c_str()); - } - if (properties.find(S3_CONN_TIMEOUT_MS) != properties.end()) { - aws_config.connectTimeoutMs = - std::atoi(properties.find(S3_CONN_TIMEOUT_MS)->second.c_str()); - } - - aws_config.verifySSL = false; - // See https://sdk.amazonaws.com/cpp/api/LATEST/class_aws_1_1_s3_1_1_s3_client.html - bool use_virtual_addressing = true; - if (properties.find(USE_PATH_STYLE) != properties.end()) { - use_virtual_addressing = properties.find(USE_PATH_STYLE)->second == "true" ? false : true; - } - return std::make_shared( - std::move(aws_cred), std::move(aws_config), - Aws::Client::AWSAuthV4Signer::PayloadSigningPolicy::Never, use_virtual_addressing); -} - bool S3ClientFactory::is_s3_conf_valid(const S3Conf& s3_conf) { return !s3_conf.ak.empty() && !s3_conf.sk.empty() && !s3_conf.endpoint.empty(); } @@ -156,6 +117,16 @@ std::shared_ptr S3ClientFactory::create(const S3Conf& s3_conf if (!is_s3_conf_valid(s3_conf)) { return nullptr; } + + uint64_t hash = s3_conf.get_hash(); + { + std::lock_guard l(_lock); + auto it = _cache.find(hash); + if (it != _cache.end()) { + return it->second; + } + } + Aws::Auth::AWSCredentials aws_cred(s3_conf.ak, s3_conf.sk); DCHECK(!aws_cred.IsExpiredOrEmpty()); @@ -171,10 +142,17 @@ std::shared_ptr S3ClientFactory::create(const S3Conf& s3_conf if (s3_conf.connect_timeout_ms > 0) { aws_config.connectTimeoutMs = s3_conf.connect_timeout_ms; } - return std::make_shared( + + std::shared_ptr new_client = std::make_shared( std::move(aws_cred), std::move(aws_config), Aws::Client::AWSAuthV4Signer::PayloadSigningPolicy::Never, s3_conf.use_virtual_addressing); + + { + std::lock_guard l(_lock); + _cache[hash] = new_client; + } + return new_client; } Status S3ClientFactory::convert_properties_to_s3_conf( @@ -199,6 +177,10 @@ Status S3ClientFactory::convert_properties_to_s3_conf( s3_conf->connect_timeout_ms = std::atoi(properties.find(S3_CONN_TIMEOUT_MS)->second.c_str()); } + if (s3_uri.get_bucket() == "") { + return Status::InvalidArgument("Invalid S3 URI {}, bucket is not specified", + s3_uri.to_string()); + } s3_conf->bucket = s3_uri.get_bucket(); s3_conf->prefix = ""; diff --git a/be/src/util/s3_util.h b/be/src/util/s3_util.h index 7ef5ca00c56107..382a203d5b9909 100644 --- a/be/src/util/s3_util.h +++ b/be/src/util/s3_util.h @@ -24,6 +24,7 @@ #include #include "common/status.h" +#include "gutil/hash/hash.h" namespace Aws { namespace S3 { @@ -63,6 +64,21 @@ struct S3Conf { ak, endpoint, region, bucket, prefix, max_connections, request_timeout_ms, connect_timeout_ms, use_virtual_addressing); } + + uint64_t get_hash() const { + uint64_t hash_code = 0; + hash_code += Fingerprint(ak); + hash_code += Fingerprint(sk); + hash_code += Fingerprint(endpoint); + hash_code += Fingerprint(region); + hash_code += Fingerprint(bucket); + hash_code += Fingerprint(prefix); + hash_code += Fingerprint(max_connections); + hash_code += Fingerprint(request_timeout_ms); + hash_code += Fingerprint(connect_timeout_ms); + hash_code += Fingerprint(use_virtual_addressing); + return hash_code; + } }; class S3ClientFactory { @@ -71,8 +87,6 @@ class S3ClientFactory { static S3ClientFactory& instance(); - std::shared_ptr create(const std::map& prop); - std::shared_ptr create(const S3Conf& s3_conf); static bool is_s3_conf_valid(const std::map& prop); @@ -86,6 +100,8 @@ class S3ClientFactory { S3ClientFactory(); Aws::SDKOptions _aws_options; + std::mutex _lock; + std::unordered_map> _cache; }; } // end namespace doris diff --git a/be/src/vec/core/block_spill_reader.cpp b/be/src/vec/core/block_spill_reader.cpp index c7871f30a50785..be14bc4d55c84b 100644 --- a/be/src/vec/core/block_spill_reader.cpp +++ b/be/src/vec/core/block_spill_reader.cpp @@ -19,9 +19,9 @@ #include "io/file_factory.h" #include "io/fs/file_system.h" +#include "io/fs/local_file_system.h" #include "olap/iterators.h" #include "runtime/block_spill_manager.h" -#include "util/file_utils.h" namespace doris { namespace vectorized { @@ -123,7 +123,7 @@ Status BlockSpillReader::close() { ExecEnv::GetInstance()->block_spill_mgr()->remove(stream_id_); file_reader_.reset(); if (delete_after_read_) { - FileUtils::remove(file_path_); + io::global_local_filesystem()->delete_file(file_path_); } return Status::OK(); } diff --git a/be/src/vec/core/block_spill_writer.cpp b/be/src/vec/core/block_spill_writer.cpp index 81dd0ff2c7d7eb..b1cca3a851b92f 100644 --- a/be/src/vec/core/block_spill_writer.cpp +++ b/be/src/vec/core/block_spill_writer.cpp @@ -20,7 +20,6 @@ #include "agent/be_exec_version_manager.h" #include "io/file_factory.h" #include "runtime/runtime_state.h" -#include "util/file_utils.h" namespace doris { namespace vectorized { diff --git a/be/src/vec/exec/format/parquet/parquet_thrift_util.h b/be/src/vec/exec/format/parquet/parquet_thrift_util.h index cccbe0f9c2258a..9c540d67692c0e 100644 --- a/be/src/vec/exec/format/parquet/parquet_thrift_util.h +++ b/be/src/vec/exec/format/parquet/parquet_thrift_util.h @@ -63,6 +63,7 @@ static Status parse_thrift_footer(io::FileReaderSPtr file, FileMetaData** file_m RETURN_IF_ERROR( file->read_at(file_size - PARQUET_FOOTER_SIZE - metadata_size, res, &bytes_read)); DCHECK_EQ(bytes_read, metadata_size); + LOG(INFO) << "yy debug bytes_read: " << bytes_read << ", metadata_size: " << metadata_size; RETURN_IF_ERROR(deserialize_thrift_msg(meta_buff.get(), &metadata_size, true, &t_metadata)); *file_metadata = new FileMetaData(t_metadata); RETURN_IF_ERROR((*file_metadata)->init_schema()); diff --git a/be/src/vec/exec/format/parquet/vparquet_reader.cpp b/be/src/vec/exec/format/parquet/vparquet_reader.cpp index 50f01b6b86f90e..8babca9108c952 100644 --- a/be/src/vec/exec/format/parquet/vparquet_reader.cpp +++ b/be/src/vec/exec/format/parquet/vparquet_reader.cpp @@ -167,21 +167,30 @@ void ParquetReader::close() { } Status ParquetReader::_open_file() { + LOG(INFO) << "yy debug 1"; if (_file_reader == nullptr) { + LOG(INFO) << "yy debug 2"; SCOPED_RAW_TIMER(&_statistics.open_file_time); ++_statistics.open_file_num; RETURN_IF_ERROR(FileFactory::create_file_reader( _profile, _system_properties, _file_description, &_file_system, &_file_reader)); + LOG(INFO) << "yy debug 3"; } + LOG(INFO) << "yy debug 4"; if (_file_metadata == nullptr) { + // LOG(INFO) << "yy debug 5: file size: " << _file_reader->size(); SCOPED_RAW_TIMER(&_statistics.parse_footer_time); if (_file_reader->size() == 0) { + LOG(INFO) << "yy debug 6"; return Status::EndOfFile("open file failed, empty parquet file: " + _scan_range.path); } + LOG(INFO) << "yy debug 7"; if (_kv_cache == nullptr) { + LOG(INFO) << "yy debug 8"; _is_file_metadata_owned = true; RETURN_IF_ERROR(parse_thrift_footer(_file_reader, &_file_metadata)); } else { + LOG(INFO) << "yy debug 9"; _is_file_metadata_owned = false; _file_metadata = _kv_cache->get( _meta_cache_key(_file_reader->path()), [&]() -> FileMetaData* { @@ -194,9 +203,12 @@ Status ParquetReader::_open_file() { } return meta; }); + LOG(INFO) << "yy debug 10"; } + LOG(INFO) << "yy debug 11"; if (_file_metadata == nullptr) { + LOG(INFO) << "yy debug 12"; return Status::InternalError("failed to get file meta data: {}", _file_description.path); } @@ -230,6 +242,7 @@ void ParquetReader::_init_file_description() { _file_description.path = _scan_range.path; _file_description.start_offset = _scan_range.start_offset; _file_description.file_size = _scan_range.__isset.file_size ? _scan_range.file_size : 0; + LOG(INFO) << "yy debug _file_description.file_size: " << _scan_range.file_size; } Status ParquetReader::init_reader( diff --git a/be/src/vec/exec/scan/scanner_scheduler.h b/be/src/vec/exec/scan/scanner_scheduler.h index 33627fe5381dac..6b78d64b6ae792 100644 --- a/be/src/vec/exec/scan/scanner_scheduler.h +++ b/be/src/vec/exec/scan/scanner_scheduler.h @@ -41,7 +41,6 @@ namespace doris::vectorized { // Each Scanner will act as a producer, read a group of blocks and put them into // the corresponding block queue. // The corresponding ScanNode will act as a consumer to consume blocks from the block queue. -class Env; class ScannerScheduler { public: ScannerScheduler(); diff --git a/be/src/vec/runtime/vfile_result_writer.cpp b/be/src/vec/runtime/vfile_result_writer.cpp index 5a109fa7115003..13eb1ab61627a2 100644 --- a/be/src/vec/runtime/vfile_result_writer.cpp +++ b/be/src/vec/runtime/vfile_result_writer.cpp @@ -23,12 +23,12 @@ #include "gutil/strings/substitute.h" #include "io/file_factory.h" #include "io/fs/file_writer.h" +#include "io/fs/local_file_system.h" #include "runtime/buffer_control_block.h" #include "runtime/descriptors.h" #include "runtime/large_int_value.h" #include "runtime/runtime_state.h" #include "service/backend_options.h" -#include "util/file_utils.h" #include "util/mysql_global.h" #include "util/mysql_row_buffer.h" #include "vec/common/string_ref.h" @@ -97,9 +97,10 @@ Status VFileResultWriter::_get_success_file_name(std::string* file_name) { // Because the file path is currently arbitrarily specified by the user, // Doris is not responsible for ensuring the correctness of the path. // This is just to prevent overwriting the existing file. - if (FileUtils::check_exist(*file_name)) { - return Status::InternalError("File already exists: " + *file_name + - ". Host: " + BackendOptions::get_localhost()); + bool exists = true; + RETURN_IF_ERROR(io::global_local_filesystem()->exists(*file_name, &exists)); + if (exists) { + return Status::InternalError("File already exists: {}", *file_name); } } @@ -155,9 +156,10 @@ Status VFileResultWriter::_get_next_file_name(std::string* file_name) { // Because the file path is currently arbitrarily specified by the user, // Doris is not responsible for ensuring the correctness of the path. // This is just to prevent overwriting the existing file. - if (FileUtils::check_exist(*file_name)) { - return Status::InternalError("File already exists: " + *file_name + - ". Host: " + BackendOptions::get_localhost()); + bool exists = true; + RETURN_IF_ERROR(io::global_local_filesystem()->exists(*file_name, &exists)); + if (exists) { + return Status::InternalError("File already exists: {}", *file_name); } } diff --git a/be/test/CMakeLists.txt b/be/test/CMakeLists.txt index f8f12d024937a3..a3721ae2cd4bac 100644 --- a/be/test/CMakeLists.txt +++ b/be/test/CMakeLists.txt @@ -32,9 +32,6 @@ set(COMMON_TEST_FILES common/config_test.cpp common/exception_test.cpp ) -set(ENV_TEST_FILES - env/env_posix_test.cpp -) set(EXEC_TEST_FILES vec/exec/parquet/parquet_thrift_test.cpp @@ -71,7 +68,8 @@ set(HTTP_TEST_FILES set(IO_TEST_FILES io/cache/remote_file_cache_test.cpp io/cache/file_block_cache_test.cpp - io/fs/file_system_test.cpp + io/fs/local_file_system_test.cpp + io/fs/remote_file_system_test.cpp ) set(OLAP_TEST_FILES olap/engine_storage_migration_task_test.cpp @@ -83,7 +81,7 @@ set(OLAP_TEST_FILES olap/bloom_filter_test.cpp olap/itoken_extractor_test.cpp olap/file_header_test.cpp - olap/file_utils_test.cpp + #olap/file_utils_test.cpp olap/cumulative_compaction_policy_test.cpp #olap/row_cursor_test.cpp olap/skiplist_test.cpp @@ -136,7 +134,6 @@ set(RUNTIME_TEST_FILES # runtime/result_buffer_mgr_test.cpp # runtime/parallel_executor_test.cpp # runtime/dpp_sink_test.cpp - # runtime/tmp_file_mgr_test.cpp # runtime/export_task_mgr_test.cpp runtime/mem_pool_test.cpp runtime/decimalv2_value_test.cpp @@ -171,7 +168,6 @@ set(UTIL_TEST_FILES util/coding_test.cpp util/crc32c_test.cpp util/lru_cache_util_test.cpp - util/filesystem_util_test.cpp util/cidr_test.cpp util/metrics_test.cpp util/doris_metrics_test.cpp @@ -196,7 +192,6 @@ set(UTIL_TEST_FILES util/utf8_check_test.cpp util/cgroup_util_test.cpp util/path_util_test.cpp - util/file_cache_test.cpp util/parse_util_test.cpp util/countdown_latch_test.cpp util/scoped_cleanup_test.cpp @@ -272,7 +267,6 @@ set(VEC_TEST_FILES add_executable(doris_be_test ${AGENT_TEST_FILES} ${COMMON_TEST_FILES} - ${ENV_TEST_FILES} ${EXEC_TEST_FILES} ${EXPRS_TEST_FILES} ${GEO_TEST_FILES} diff --git a/be/test/env/env_posix_test.cpp b/be/test/env/env_posix_test.cpp deleted file mode 100644 index fd900cb3989945..00000000000000 --- a/be/test/env/env_posix_test.cpp +++ /dev/null @@ -1,224 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, -// software distributed under the License is distributed on an -// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -// KIND, either express or implied. See the License for the -// specific language governing permissions and limitations -// under the License. - -#include - -#include - -#include "common/logging.h" -#include "env/env.h" -#include "util/file_utils.h" - -namespace doris { -using namespace ErrorCode; - -class EnvPosixTest : public testing::Test { -public: - EnvPosixTest() {} - virtual ~EnvPosixTest() {} - void SetUp() override { - auto st = FileUtils::create_dir("./ut_dir/env_posix"); - EXPECT_TRUE(st.ok()); - } - void TearDown() override {} -}; - -TEST_F(EnvPosixTest, file_path_desc) { - FilePathDesc path_desc("/local"); - path_desc.storage_medium = TStorageMedium::S3; - path_desc.remote_path = "/remote"; - FilePathDescStream path_desc_stream; - path_desc_stream << path_desc << "/test" - << "/" << 1; - FilePathDesc dest_path_desc = path_desc_stream.path_desc(); - EXPECT_EQ("/local/test/1", dest_path_desc.filepath); - EXPECT_EQ("/remote/test/1", dest_path_desc.remote_path); -} - -TEST_F(EnvPosixTest, random_access) { - std::string fname = "./ut_dir/env_posix/random_access"; - std::unique_ptr wfile; - auto env = Env::Default(); - auto st = env->new_writable_file(fname, &wfile); - EXPECT_TRUE(st.ok()); - st = wfile->pre_allocate(1024); -#ifndef __APPLE__ - EXPECT_TRUE(st.ok()); -#else - EXPECT_FALSE(st.ok()); -#endif - // write data - Slice field1("123456789"); - st = wfile->append(field1); - EXPECT_TRUE(st.ok()); - std::string buf; - for (int i = 0; i < 100; ++i) { - buf.push_back((char)i); - } - st = wfile->append(buf); - EXPECT_TRUE(st.ok()); - Slice abc("abc"); - Slice bcd("bcd"); - Slice slices[2] {abc, bcd}; - st = wfile->appendv(slices, 2); - EXPECT_TRUE(st.ok()); - st = wfile->flush(WritableFile::FLUSH_ASYNC); - EXPECT_TRUE(st.ok()); - st = wfile->sync(); - EXPECT_TRUE(st.ok()); - st = wfile->close(); - EXPECT_TRUE(st.ok()); - - EXPECT_EQ(115, wfile->size()); - - uint64_t size; - st = env->get_file_size(fname, &size); - EXPECT_TRUE(st.ok()); - EXPECT_EQ(115, size); - { - char mem[1024]; - std::unique_ptr rfile; - st = env->new_random_access_file(fname, &rfile); - EXPECT_TRUE(st.ok()); - - Slice slice1(mem, 9); - Slice slice2(mem + 9, 100); - Slice slice3(mem + 9 + 100, 3); - - Slice read_slices[3] {slice1, slice2, slice3}; - st = rfile->readv_at(0, read_slices, 3); - EXPECT_TRUE(st.ok()); - EXPECT_STREQ("123456789", std::string(slice1.data, slice1.size).c_str()); - EXPECT_STREQ("abc", std::string(slice3.data, slice3.size).c_str()); - - Slice slice4(mem, 3); - st = rfile->read_at(112, &slice4); - EXPECT_TRUE(st.ok()); - EXPECT_STREQ("bcd", std::string(slice4.data, slice4.size).c_str()); - - // end of file - st = rfile->read_at(114, &slice4); - EXPECT_EQ(END_OF_FILE, st.code()); - LOG(INFO) << "st=" << st.to_string(); - } -} - -TEST_F(EnvPosixTest, random_rw) { - std::string fname = "./ut_dir/env_posix/random_rw"; - std::unique_ptr wfile; - auto env = Env::Default(); - auto st = env->new_random_rw_file(fname, &wfile); - EXPECT_TRUE(st.ok()); - // write data - Slice field1("123456789"); - st = wfile->write_at(0, field1); - EXPECT_TRUE(st.ok()); - std::string buf; - for (int i = 0; i < 100; ++i) { - buf.push_back((char)i); - } - st = wfile->write_at(9, buf); - EXPECT_TRUE(st.ok()); - Slice abc("abc"); - Slice bcd("bcd"); - Slice slices[2] {abc, bcd}; - st = wfile->writev_at(0, slices, 2); - EXPECT_TRUE(st.ok()); - - uint64_t size; - st = wfile->size(&size); - EXPECT_TRUE(st.ok()); - EXPECT_EQ(109, size); - - st = wfile->flush(RandomRWFile::FLUSH_ASYNC, 0, 0); - EXPECT_TRUE(st.ok()); - st = wfile->sync(); - EXPECT_TRUE(st.ok()); - st = wfile->close(); - EXPECT_TRUE(st.ok()); - - st = env->get_file_size(fname, &size); - EXPECT_TRUE(st.ok()); - EXPECT_EQ(109, size); - { - char mem[1024]; - std::unique_ptr rfile; - RandomRWFileOptions opts; - opts.mode = Env::MUST_EXIST; - st = env->new_random_rw_file(opts, fname, &rfile); - EXPECT_TRUE(st.ok()); - - Slice slice1(mem, 3); - Slice slice2(mem + 3, 3); - Slice slice3(mem + 6, 3); - - Slice read_slices[3] {slice1, slice2, slice3}; - st = rfile->readv_at(0, read_slices, 3); - LOG(INFO) << st.to_string(); - EXPECT_TRUE(st.ok()); - EXPECT_STREQ("abc", std::string(slice1.data, slice1.size).c_str()); - EXPECT_STREQ("bcd", std::string(slice2.data, slice2.size).c_str()); - EXPECT_STREQ("789", std::string(slice3.data, slice3.size).c_str()); - - Slice slice4(mem, 100); - st = rfile->read_at(9, slice4); - EXPECT_TRUE(st.ok()); - - // end of file - st = rfile->read_at(102, slice4); - EXPECT_EQ(END_OF_FILE, st.code()); - LOG(INFO) << "st=" << st.to_string(); - } -} - -TEST_F(EnvPosixTest, iterate_dir) { - std::string dir_path = "./ut_dir/env_posix/iterate_dir"; - FileUtils::remove_all(dir_path); - auto st = Env::Default()->create_dir_if_missing(dir_path); - EXPECT_TRUE(st.ok()); - - st = Env::Default()->create_dir_if_missing(dir_path + "/abc"); - EXPECT_TRUE(st.ok()); - - st = Env::Default()->create_dir_if_missing(dir_path + "/123"); - EXPECT_TRUE(st.ok()); - - { - std::vector children; - st = Env::Default()->get_children(dir_path, &children); - EXPECT_EQ(4, children.size()); - std::sort(children.begin(), children.end()); - - EXPECT_STREQ(".", children[0].c_str()); - EXPECT_STREQ("..", children[1].c_str()); - EXPECT_STREQ("123", children[2].c_str()); - EXPECT_STREQ("abc", children[3].c_str()); - } - { - std::vector children; - st = FileUtils::list_files(Env::Default(), dir_path, &children); - EXPECT_EQ(2, children.size()); - std::sort(children.begin(), children.end()); - - EXPECT_STREQ("123", children[0].c_str()); - EXPECT_STREQ("abc", children[1].c_str()); - } - - FileUtils::remove_all(dir_path); -} - -} // namespace doris diff --git a/be/test/io/cache/remote_file_cache_test.cpp b/be/test/io/cache/remote_file_cache_test.cpp index 121c8b05aeef46..e38b10bafabff9 100644 --- a/be/test/io/cache/remote_file_cache_test.cpp +++ b/be/test/io/cache/remote_file_cache_test.cpp @@ -25,6 +25,7 @@ #include "gen_cpp/olap_file.pb.h" #include "gtest/gtest.h" +#include "io/fs/local_file_system.h" #include "io/fs/s3_common.h" #include "io/fs/s3_file_system.h" #include "olap/comparison_predicate.h" @@ -44,7 +45,6 @@ #include "runtime/exec_env.h" #include "runtime/mem_pool.h" #include "runtime/memory/mem_tracker.h" -#include "util/file_utils.h" #include "util/slice.h" namespace doris { @@ -67,10 +67,7 @@ static std::string resource_id = "10000"; class RemoteFileCacheTest : public ::testing::Test { protected: static void SetUpTestSuite() { - if (FileUtils::check_exist(kSegmentDir)) { - EXPECT_TRUE(FileUtils::remove_all(kSegmentDir).ok()); - } - EXPECT_TRUE(FileUtils::create_dir(kSegmentDir).ok()); + EXPECT_TRUE(io::global_local_filesystem()->delete_and_create_directory(kSegmentDir).ok()); doris::ExecEnv::GetInstance()->init_download_cache_required_components(); @@ -80,9 +77,7 @@ class RemoteFileCacheTest : public ::testing::Test { } static void TearDownTestSuite() { - if (FileUtils::check_exist(kSegmentDir)) { - EXPECT_TRUE(FileUtils::remove_all(kSegmentDir).ok()); - } + EXPECT_TRUE(io::global_local_filesystem()->delete_directory(kSegmentDir).ok()); if (k_engine != nullptr) { k_engine->stop(); delete k_engine; diff --git a/be/test/io/fs/local_file_system_test.cpp b/be/test/io/fs/local_file_system_test.cpp new file mode 100644 index 00000000000000..90cd5d7391a028 --- /dev/null +++ b/be/test/io/fs/local_file_system_test.cpp @@ -0,0 +1,460 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +#include "io/fs/local_file_system.h" + +#include +#include +#include +#include +#include + +#include "common/status.h" +#include "gmock/gmock.h" +#include "gtest/gtest.h" +#include "io/fs/file_reader.h" +#include "io/fs/file_writer.h" + +namespace doris { + +class LocalFileSystemTest : public testing::Test { +public: + virtual void SetUp() { + EXPECT_TRUE( + io::global_local_filesystem()->delete_and_create_directory(_s_test_data_path).ok()); + } + + Status save_string_file(const std::filesystem::path& filename, const std::string& content) { + io::FileWriterPtr file_writer; + RETURN_IF_ERROR(io::global_local_filesystem()->create_file(filename, &file_writer)); + RETURN_IF_ERROR(file_writer->append(content)); + return file_writer->close(); + } + + bool check_exists(const std::string& file) { + bool exists = true; + EXPECT_TRUE(io::global_local_filesystem()->exists(file, &exists).ok()); + return exists; + } + + bool is_dir(const std::string& path) { + bool is_dir = true; + EXPECT_TRUE(io::global_local_filesystem()->is_directory(path, &is_dir).ok()); + return is_dir; + } + + Status list_dirs_files(const std::string& path, std::vector* dirs, + std::vector* files) { + bool only_file = true; + if (dirs != nullptr) { + only_file = false; + } + std::vector file_infos; + bool exists = true; + RETURN_IF_ERROR(io::global_local_filesystem()->list(path, only_file, &file_infos, &exists)); + for (auto& file_info : file_infos) { + if (file_info.is_file && files != nullptr) { + files->push_back(file_info.file_name); + } + if (!file_info.is_file && dirs != nullptr) { + dirs->push_back(file_info.file_name); + } + } + return Status::OK(); + } + + Status delete_file_paths(const std::vector& ps) { + for (auto& p : ps) { + bool exists = true; + RETURN_IF_ERROR(io::global_local_filesystem()->exists(p, &exists)); + if (!exists) { + continue; + } + bool is_dir = true; + RETURN_IF_ERROR(io::global_local_filesystem()->is_directory(p, &is_dir)); + if (is_dir) { + RETURN_IF_ERROR(io::global_local_filesystem()->delete_directory(p)); + } else { + RETURN_IF_ERROR(io::global_local_filesystem()->delete_file(p)); + } + } + return Status::OK(); + } + + // delete the mock cgroup folder + virtual void TearDown() { + EXPECT_TRUE(io::global_local_filesystem()->delete_directory(_s_test_data_path).ok()); + } + + static std::string _s_test_data_path; +}; + +std::string LocalFileSystemTest::_s_test_data_path = "./file_utils_testxxxx123"; + +TEST_F(LocalFileSystemTest, TestRemove) { + // remove_all + EXPECT_TRUE(io::global_local_filesystem()->delete_directory("./file_test").ok()); + EXPECT_FALSE(check_exists("./file_test")); + + EXPECT_TRUE(io::global_local_filesystem()->create_directory("./file_test/123/456/789").ok()); + EXPECT_TRUE(io::global_local_filesystem()->create_directory("./file_test/abc/def/zxc").ok()); + EXPECT_TRUE(io::global_local_filesystem()->create_directory("./file_test/abc/123").ok()); + + save_string_file("./file_test/s1", "123"); + save_string_file("./file_test/123/s2", "123"); + + EXPECT_TRUE(check_exists("./file_test")); + EXPECT_TRUE(io::global_local_filesystem()->delete_directory("./file_test").ok()); + EXPECT_FALSE(check_exists("./file_test")); + + // remove + EXPECT_TRUE(io::global_local_filesystem()->create_directory("./file_test/abc/123").ok()); + save_string_file("./file_test/abc/123/s2", "123"); + + EXPECT_TRUE(check_exists("./file_test/abc/123/s2")); + EXPECT_TRUE(io::global_local_filesystem()->delete_file("./file_test/abc/123/s2").ok()); + EXPECT_FALSE(check_exists("./file_test/abc/123/s2")); + + EXPECT_TRUE(check_exists("./file_test/abc/123")); + EXPECT_TRUE(io::global_local_filesystem()->delete_directory("./file_test/abc/123").ok()); + EXPECT_FALSE(check_exists("./file_test/abc/123")); + + EXPECT_TRUE(io::global_local_filesystem()->delete_directory("./file_test").ok()); + EXPECT_FALSE(check_exists("./file_test")); + + // remove paths + EXPECT_TRUE(io::global_local_filesystem()->create_directory("./file_test/123/456/789").ok()); + EXPECT_TRUE(io::global_local_filesystem()->create_directory("./file_test/abc/def/zxc").ok()); + save_string_file("./file_test/s1", "123"); + save_string_file("./file_test/s2", "123"); + + std::vector ps; + ps.push_back("./file_test/123/456/789"); + ps.push_back("./file_test/123/456"); + ps.push_back("./file_test/123"); + + EXPECT_TRUE(check_exists("./file_test/123")); + EXPECT_TRUE(delete_file_paths(ps).ok()); + EXPECT_FALSE(check_exists("./file_test/123")); + + ps.clear(); + ps.push_back("./file_test/s1"); + ps.push_back("./file_test/abc/def"); + + EXPECT_TRUE(delete_file_paths(ps).ok()); + EXPECT_FALSE(check_exists("./file_test/s1")); + EXPECT_FALSE(check_exists("./file_test/abc/def/")); + + ps.clear(); + ps.push_back("./file_test/abc/def/zxc"); + ps.push_back("./file_test/s2"); + ps.push_back("./file_test/abc/def"); + ps.push_back("./file_test/abc"); + + EXPECT_TRUE(delete_file_paths(ps).ok()); + EXPECT_FALSE(check_exists("./file_test/s2")); + EXPECT_FALSE(check_exists("./file_test/abc")); + + EXPECT_TRUE(io::global_local_filesystem()->delete_directory("./file_test").ok()); +} + +TEST_F(LocalFileSystemTest, TestCreateDir) { + // normal + std::string path = "./file_test/123/456/789"; + io::global_local_filesystem()->delete_directory("./file_test"); + EXPECT_FALSE(check_exists(path)); + + EXPECT_TRUE(io::global_local_filesystem()->create_directory(path).ok()); + + EXPECT_TRUE(check_exists(path)); + EXPECT_TRUE(is_dir("./file_test")); + EXPECT_TRUE(is_dir("./file_test/123")); + EXPECT_TRUE(is_dir("./file_test/123/456")); + EXPECT_TRUE(is_dir("./file_test/123/456/789")); + + EXPECT_TRUE(io::global_local_filesystem()->delete_directory("./file_test").ok()); + + // normal + path = "./file_test/123/456/789/"; + EXPECT_TRUE(io::global_local_filesystem()->delete_directory("./file_test").ok()); + EXPECT_FALSE(check_exists(path)); + + EXPECT_TRUE(io::global_local_filesystem()->create_directory(path).ok()); + + EXPECT_TRUE(check_exists(path)); + EXPECT_TRUE(is_dir("./file_test")); + EXPECT_TRUE(is_dir("./file_test/123")); + EXPECT_TRUE(is_dir("./file_test/123/456")); + EXPECT_TRUE(is_dir("./file_test/123/456/789")); + + EXPECT_TRUE(io::global_local_filesystem()->delete_directory("./file_test").ok()); + + // absolute path; + std::string real_path; + EXPECT_TRUE(io::global_local_filesystem()->canonicalize(".", &real_path).ok()); + EXPECT_TRUE(io::global_local_filesystem() + ->create_directory(real_path + "/file_test/absolute/path/123/asdf") + .ok()); + EXPECT_TRUE(is_dir("./file_test/absolute/path/123/asdf")); + EXPECT_TRUE(io::global_local_filesystem()->delete_directory("./file_test").ok()); + + char filename[] = "temp-XXXXXX"; + // Setup a temporary directory with one subdir + std::string dir_name = mkdtemp(filename); + io::Path dir {dir_name}; + io::Path subdir1 = dir / "path1"; + io::Path subdir2 = dir / "path2"; + io::Path subdir3 = dir / "a" / "longer" / "path"; + EXPECT_TRUE(io::global_local_filesystem()->create_directory(subdir1).ok()); + // Test error cases by removing write permissions on root dir to prevent + // creation/deletion of subdirs + chmod(dir.string().c_str(), 0); + if (getuid() == 0) { // User root + EXPECT_TRUE(io::global_local_filesystem()->create_directory(subdir1).ok()); + EXPECT_TRUE(io::global_local_filesystem()->create_directory(subdir2).ok()); + } else { // User other + EXPECT_FALSE(io::global_local_filesystem()->create_directory(subdir1).ok()); + EXPECT_FALSE(io::global_local_filesystem()->create_directory(subdir2).ok()); + } + // Test success cases by adding write permissions back + chmod(dir.string().c_str(), S_IRWXU); + EXPECT_TRUE(io::global_local_filesystem()->delete_directory(subdir1).ok()); + EXPECT_TRUE(io::global_local_filesystem()->delete_directory(subdir2).ok()); + EXPECT_TRUE(io::global_local_filesystem()->create_directory(subdir1).ok()); + EXPECT_TRUE(io::global_local_filesystem()->create_directory(subdir2).ok()); + // Check that directories were created + bool is_dir = false; + EXPECT_TRUE(io::global_local_filesystem()->is_directory(subdir1, &is_dir).ok()); + EXPECT_TRUE(is_dir); + EXPECT_TRUE(io::global_local_filesystem()->is_directory(subdir2, &is_dir).ok()); + EXPECT_TRUE(is_dir); + EXPECT_FALSE(io::global_local_filesystem()->is_directory(subdir3, &is_dir).ok()); + // Check that nested directories can be created + EXPECT_TRUE(io::global_local_filesystem()->create_directory(subdir3).ok()); + EXPECT_TRUE(io::global_local_filesystem()->is_directory(subdir3, &is_dir).ok()); + EXPECT_TRUE(is_dir); + // Cleanup + EXPECT_TRUE(io::global_local_filesystem()->delete_directory(dir).ok()); +} + +TEST_F(LocalFileSystemTest, TestContainPath) { + { + std::string parent("/a/b"); + std::string sub("/a/b/c"); + EXPECT_TRUE(io::global_local_filesystem()->contain_path(parent, sub)); + EXPECT_FALSE(io::global_local_filesystem()->contain_path(sub, parent)); + EXPECT_TRUE(io::global_local_filesystem()->contain_path(parent, parent)); + EXPECT_TRUE(io::global_local_filesystem()->contain_path(sub, sub)); + } + + { + std::string parent("/a/b/"); + std::string sub("/a/b/c/"); + EXPECT_TRUE(io::global_local_filesystem()->contain_path(parent, sub)); + EXPECT_FALSE(io::global_local_filesystem()->contain_path(sub, parent)); + EXPECT_TRUE(io::global_local_filesystem()->contain_path(parent, parent)); + EXPECT_TRUE(io::global_local_filesystem()->contain_path(sub, sub)); + } + + { + std::string parent("/a///./././/./././b/"); // "/a/b/." + std::string sub("/a/b/../././b/c/"); // "/a/b/c/" + EXPECT_TRUE(io::global_local_filesystem()->contain_path(parent, sub)); + EXPECT_FALSE(io::global_local_filesystem()->contain_path(sub, parent)); + EXPECT_TRUE(io::global_local_filesystem()->contain_path(parent, parent)); + EXPECT_TRUE(io::global_local_filesystem()->contain_path(sub, sub)); + } + + { + // relative path + std::string parent("a/b/"); // "a/b/" + std::string sub("a/b/c/"); // "a/b/c/" + EXPECT_TRUE(io::global_local_filesystem()->contain_path(parent, sub)); + EXPECT_FALSE(io::global_local_filesystem()->contain_path(sub, parent)); + EXPECT_TRUE(io::global_local_filesystem()->contain_path(parent, parent)); + EXPECT_TRUE(io::global_local_filesystem()->contain_path(sub, sub)); + } + { + // relative path + std::string parent("a////./././b/"); // "a/b/" + std::string sub("a/b/../././b/c/"); // "a/b/c/" + EXPECT_TRUE(io::global_local_filesystem()->contain_path(parent, sub)); + EXPECT_FALSE(io::global_local_filesystem()->contain_path(sub, parent)); + EXPECT_TRUE(io::global_local_filesystem()->contain_path(parent, parent)); + EXPECT_TRUE(io::global_local_filesystem()->contain_path(sub, sub)); + } + { + // absolute path and relative path + std::string parent("/a////./././b/"); // "/a/b/" + std::string sub("a/b/../././b/c/"); // "a/b/c/" + EXPECT_FALSE(io::global_local_filesystem()->contain_path(parent, sub)); + EXPECT_FALSE(io::global_local_filesystem()->contain_path(sub, parent)); + EXPECT_TRUE(io::global_local_filesystem()->contain_path(parent, parent)); + EXPECT_TRUE(io::global_local_filesystem()->contain_path(sub, sub)); + } +} + +TEST_F(LocalFileSystemTest, TestListDirsFiles) { + std::string path = "./file_test/"; + EXPECT_TRUE(io::global_local_filesystem()->delete_directory(path).ok()); + EXPECT_TRUE(io::global_local_filesystem()->create_directory("./file_test/1").ok()); + EXPECT_TRUE(io::global_local_filesystem()->create_directory("./file_test/2").ok()); + EXPECT_TRUE(io::global_local_filesystem()->create_directory("./file_test/3").ok()); + EXPECT_TRUE(io::global_local_filesystem()->create_directory("./file_test/4").ok()); + EXPECT_TRUE(io::global_local_filesystem()->create_directory("./file_test/5").ok()); + + std::vector dirs; + std::vector files; + + EXPECT_TRUE(list_dirs_files("./file_test", &dirs, &files).ok()); + EXPECT_EQ(5, dirs.size()); + EXPECT_EQ(0, files.size()); + + dirs.clear(); + files.clear(); + + EXPECT_TRUE(list_dirs_files("./file_test", &dirs, nullptr).ok()); + EXPECT_EQ(5, dirs.size()); + EXPECT_EQ(0, files.size()); + + save_string_file("./file_test/f1", "just test"); + save_string_file("./file_test/f2", "just test"); + save_string_file("./file_test/f3", "just test"); + + dirs.clear(); + files.clear(); + + EXPECT_TRUE(list_dirs_files("./file_test", &dirs, &files).ok()); + EXPECT_EQ(5, dirs.size()); + EXPECT_EQ(3, files.size()); + + dirs.clear(); + files.clear(); + + EXPECT_TRUE(list_dirs_files("./file_test", nullptr, &files).ok()); + EXPECT_EQ(0, dirs.size()); + EXPECT_EQ(3, files.size()); + + EXPECT_TRUE(io::global_local_filesystem()->delete_directory(path).ok()); +} + +TEST_F(LocalFileSystemTest, TestRandomAccess) { + std::string fname = "./ut_dir/local_filesystem/random_access"; + EXPECT_TRUE(io::global_local_filesystem()->create_directory("./ut_dir/local_filesystem/").ok()); + io::FileWriterPtr file_writer; + EXPECT_TRUE(io::global_local_filesystem()->create_file(fname, &file_writer).ok()); + Slice field1("123456789"); + EXPECT_TRUE(file_writer->append(field1).ok()); + + std::string buf; + for (int i = 0; i < 100; ++i) { + buf.push_back((char)i); + } + EXPECT_TRUE(file_writer->append(buf).ok()); + Slice abc("abc"); + Slice bcd("bcd"); + Slice slices[2] {abc, bcd}; + EXPECT_TRUE(file_writer->appendv(slices, 2).ok()); + EXPECT_TRUE(file_writer->close().ok()); + + int64_t size; + EXPECT_TRUE(io::global_local_filesystem()->file_size(fname, &size).ok()); + EXPECT_EQ(115, size); + { + io::FileReaderSPtr file_reader; + EXPECT_TRUE(io::global_local_filesystem()->open_file(fname, &file_reader).ok()); + + char mem[1024]; + Slice slice1(mem, 9); + Slice slice2(mem + 9, 100); + Slice slice3(mem + 9 + 100, 3); + Slice slice4(mem + 9 + 100 + 3, 3); + size_t bytes_read = 0; + EXPECT_TRUE(file_reader->read_at(0, slice1, &bytes_read).ok()); + EXPECT_STREQ("123456789", std::string(slice1.data, slice1.size).c_str()); + EXPECT_EQ(9, bytes_read); + + EXPECT_TRUE(file_reader->read_at(9, slice2, &bytes_read).ok()); + EXPECT_EQ(100, bytes_read); + + EXPECT_TRUE(file_reader->read_at(109, slice3, &bytes_read).ok()); + EXPECT_STREQ("abc", std::string(slice3.data, slice3.size).c_str()); + EXPECT_EQ(3, bytes_read); + + EXPECT_TRUE(file_reader->read_at(112, slice4, &bytes_read).ok()); + EXPECT_STREQ("bcd", std::string(slice4.data, slice4.size).c_str()); + EXPECT_EQ(3, bytes_read); + + EXPECT_TRUE(file_reader->close().ok()); + } +} + +TEST_F(LocalFileSystemTest, TestRandomWrite) { + std::string fname = "./ut_dir/env_posix/random_rw"; + EXPECT_TRUE(io::global_local_filesystem()->create_directory("./ut_dir/env_posix").ok()); + + io::FileWriterPtr file_writer; + EXPECT_TRUE(io::global_local_filesystem()->create_file(fname, &file_writer).ok()); + + // write data + Slice field1("123456789"); + EXPECT_TRUE(file_writer->write_at(0, field1).ok()); + std::string buf; + for (int i = 0; i < 100; ++i) { + buf.push_back((char)i); + } + EXPECT_TRUE(file_writer->write_at(9, buf).ok()); + Slice abc("abc"); + Slice bcd("bcd"); + Slice slices[2] {abc, bcd}; + EXPECT_TRUE(file_writer->write_at(0, slices[0]).ok()); + EXPECT_TRUE(file_writer->write_at(3, slices[1]).ok()); + EXPECT_TRUE(file_writer->close().ok()); + + int64_t size = 0; + EXPECT_TRUE(io::global_local_filesystem()->file_size(fname, &size).ok()); + EXPECT_EQ(109, size); + { + io::FileReaderSPtr file_reader; + EXPECT_TRUE(io::global_local_filesystem()->open_file(fname, &file_reader).ok()); + + char mem[1024]; + Slice slice1(mem, 3); + Slice slice2(mem + 3, 3); + Slice slice3(mem + 6, 3); + + size_t bytes_read = 0; + EXPECT_TRUE(file_reader->read_at(0, slice1, &bytes_read).ok()); + EXPECT_STREQ("abc", std::string(slice1.data, slice1.size).c_str()); + EXPECT_EQ(3, bytes_read); + + EXPECT_TRUE(file_reader->read_at(3, slice2, &bytes_read).ok()); + EXPECT_STREQ("bcd", std::string(slice2.data, slice2.size).c_str()); + EXPECT_EQ(3, bytes_read); + + EXPECT_TRUE(file_reader->read_at(6, slice3, &bytes_read).ok()); + EXPECT_STREQ("789", std::string(slice3.data, slice3.size).c_str()); + EXPECT_EQ(3, bytes_read); + + Slice slice4(mem, 100); + EXPECT_TRUE(file_reader->read_at(9, slice4, &bytes_read).ok()); + EXPECT_EQ(100, bytes_read); + + EXPECT_TRUE(file_reader->close().ok()); + } +} +} // namespace doris diff --git a/be/test/io/fs/file_system_test.cpp b/be/test/io/fs/remote_file_system_test.cpp similarity index 84% rename from be/test/io/fs/file_system_test.cpp rename to be/test/io/fs/remote_file_system_test.cpp index 0c5ac73f0f497f..774c1539042e7b 100644 --- a/be/test/io/fs/file_system_test.cpp +++ b/be/test/io/fs/remote_file_system_test.cpp @@ -65,7 +65,7 @@ static std::string broker_location = "hdfs://my_nameservice/user/doris"; #define TestS3FileSystem DISABLED_TestS3FileSystem #define TestBrokerFileSystem DISABLED_TestBrokerFileSystem -class FileSystemTest : public testing::Test { +class RemoteFileSystemTest : public testing::Test { public: virtual void SetUp() { s3_prop.emplace("AWS_ACCESS_KEY", ak); @@ -94,9 +94,9 @@ class FileSystemTest : public testing::Test { TNetworkAddress broker_addr; }; -TEST_F(FileSystemTest, TestBrokerFileSystem) { +TEST_F(RemoteFileSystemTest, TestBrokerFileSystem) { std::shared_ptr fs; - CHECK_STATUS_OK(io::BrokerFileSystem::create(broker_addr, hdfs_prop, 0, &fs)); + CHECK_STATUS_OK(io::BrokerFileSystem::create(broker_addr, hdfs_prop, &fs)); // delete directory io::Path delete_path = broker_location + "/tmp1"; @@ -131,7 +131,7 @@ TEST_F(FileSystemTest, TestBrokerFileSystem) { ASSERT_TRUE(exists); // file size - size_t file_size = 0; + int64_t file_size = 0; CHECK_STATUS_OK(fs->file_size(file1, &file_size)); // file size is not implemented ASSERT_EQ(0, file_size); @@ -243,7 +243,7 @@ TEST_F(FileSystemTest, TestBrokerFileSystem) { ASSERT_EQ("abc", download_content); } -TEST_F(FileSystemTest, TestHdfsFileSystem) { +TEST_F(RemoteFileSystemTest, TestHdfsFileSystem) { THdfsParams hdfs_params = parse_properties(hdfs_prop); std::shared_ptr fs; CHECK_STATUS_OK(io::HdfsFileSystem::create(hdfs_params, hdfs_location, &fs)); @@ -281,7 +281,7 @@ TEST_F(FileSystemTest, TestHdfsFileSystem) { ASSERT_TRUE(exists); // file size - size_t file_size = 0; + int64_t file_size = 0; CHECK_STATUS_OK(fs->file_size(file1, &file_size)); ASSERT_EQ(7, file_size); @@ -391,105 +391,7 @@ TEST_F(FileSystemTest, TestHdfsFileSystem) { ASSERT_EQ("abc", download_content); } -TEST_F(FileSystemTest, TestLocalFileSystem) { - std::shared_ptr fs = io::LocalFileSystem::create("./"); - // delete directory - io::Path delete_path = "tmp1"; - CHECK_STATUS_OK(fs->delete_directory(delete_path)); - io::Path delete_path2 = "tmp2"; - CHECK_STATUS_OK(fs->delete_directory(delete_path2)); - // create directory - io::Path create_path = delete_path; - CHECK_STATUS_OK(fs->create_directory(create_path)); - // write file - std::string file1 = "tmp1/file1.txt"; - io::FileWriterPtr writer; - CHECK_STATUS_OK(fs->create_file(file1, &writer)); - CHECK_STATUS_OK(writer->append({"content"})); - CHECK_STATUS_OK(writer->close()); - // read file - io::FileReaderSPtr reader; - CHECK_STATUS_OK(fs->open_file(file1, &reader)); - char read_buf[10]; - size_t bytes_read = 0; - CHECK_STATUS_OK(reader->read_at(0, {read_buf, 10}, &bytes_read)); - ASSERT_EQ(7, bytes_read); - - // exist - bool exists = false; - CHECK_STATUS_OK(fs->exists(file1, &exists)); - ASSERT_TRUE(exists); - std::string file_non_exist = "non-exist"; - CHECK_STATUS_OK(fs->exists(file_non_exist, &exists)); - ASSERT_FALSE(exists); - CHECK_STATUS_OK(fs->exists(delete_path, &exists)); - ASSERT_TRUE(exists); - - // file size - size_t file_size = 0; - CHECK_STATUS_OK(fs->file_size(file1, &file_size)); - ASSERT_EQ(7, file_size); - - // write more files - for (int i = 0; i < 10; i++) { - std::string tmp_file = fmt::format("tmp1/tmp_file_{}", i); - io::FileWriterPtr writer; - CHECK_STATUS_OK(fs->create_file(tmp_file, &writer)); - CHECK_STATUS_OK(writer->append({"content"})); - CHECK_STATUS_OK(writer->close()); - } - - // list files - std::vector files; - CHECK_STATUS_OK(fs->list(delete_path, true, &files, &exists)); - ASSERT_TRUE(exists); - ASSERT_EQ(11, files.size()); - for (auto& file_info : files) { - std::cout << "file name: " << file_info.file_name << std::endl; - ASSERT_EQ(7, file_info.file_size); - ASSERT_TRUE(file_info.is_file); - } - std::string non_exist_path = "non_exist/"; - files.clear(); - CHECK_STATUS_OK(fs->list(non_exist_path, true, &files, &exists)); - ASSERT_FALSE(exists); - ASSERT_EQ(0, files.size()); - - // rename - std::string src_name = file1; - std::string dst_name = "tmp1/new_file1.txt"; - CHECK_STATUS_OK(fs->rename(src_name, dst_name)); - CHECK_STATUS_OK(fs->exists(src_name, &exists)); - ASSERT_FALSE(exists); - CHECK_STATUS_OK(fs->exists(dst_name, &exists)); - ASSERT_TRUE(exists); - - // rename dir - std::string src_dir = delete_path; - std::string dst_dir = "tmp2"; - CHECK_STATUS_OK(fs->rename_dir(src_dir, dst_dir)); - CHECK_STATUS_OK(fs->exists(dst_name, &exists)); - ASSERT_FALSE(exists); - std::string new_dst_name = "tmp2/new_file1.txt"; - CHECK_STATUS_OK(fs->exists(new_dst_name, &exists)); - ASSERT_TRUE(exists); - - // batch delete - std::vector delete_files; - for (int i = 0; i < 10; i++) { - std::string tmp_file = fmt::format("tmp2/tmp_file_{}", i); - delete_files.emplace_back(tmp_file); - CHECK_STATUS_OK(fs->batch_delete(delete_files)); - } - - // list to check - files.clear(); - CHECK_STATUS_OK(fs->list(dst_dir, true, &files, &exists)); - ASSERT_TRUE(exists); - ASSERT_EQ(1, files.size()); -} - -TEST_F(FileSystemTest, TestS3FileSystem) { +TEST_F(RemoteFileSystemTest, TestS3FileSystem) { S3Conf s3_conf; S3URI s3_uri(s3_location); CHECK_STATUS_OK(s3_uri.parse()); @@ -530,7 +432,7 @@ TEST_F(FileSystemTest, TestS3FileSystem) { ASSERT_FALSE(exists); // file size - size_t file_size = 0; + int64_t file_size = 0; CHECK_STATUS_OK(fs->file_size(file1, &file_size)); ASSERT_EQ(7, file_size); diff --git a/be/test/olap/delete_handler_test.cpp b/be/test/olap/delete_handler_test.cpp index eab780df865d6f..88669e1e83996a 100644 --- a/be/test/olap/delete_handler_test.cpp +++ b/be/test/olap/delete_handler_test.cpp @@ -27,6 +27,7 @@ #include #include +#include "io/fs/local_file_system.h" #include "olap/olap_define.h" #include "olap/options.h" #include "olap/push_handler.h" @@ -34,7 +35,6 @@ #include "olap/storage_engine.h" #include "olap/utils.h" #include "util/cpu_info.h" -#include "util/file_utils.h" using namespace std; using namespace doris; @@ -50,9 +50,13 @@ static void set_up() { char buffer[MAX_PATH_LEN]; EXPECT_NE(getcwd(buffer, MAX_PATH_LEN), nullptr); config::storage_root_path = string(buffer) + "/data_test"; - FileUtils::remove_all(config::storage_root_path); - FileUtils::remove_all(string(getenv("DORIS_HOME")) + "/" + UNUSED_PREFIX); - FileUtils::create_dir(config::storage_root_path); + EXPECT_TRUE(io::global_local_filesystem() + ->delete_and_create_directory(config::storage_root_path) + .ok()); + EXPECT_TRUE(io::global_local_filesystem() + ->delete_directory(string(getenv("DORIS_HOME")) + "/" + UNUSED_PREFIX) + .ok()); + std::vector paths; paths.emplace_back(config::storage_root_path, -1); config::min_file_descriptor_number = 1000; @@ -70,8 +74,10 @@ static void tear_down() { char buffer[MAX_PATH_LEN]; EXPECT_NE(getcwd(buffer, MAX_PATH_LEN), nullptr); config::storage_root_path = string(buffer) + "/data_test"; - FileUtils::remove_all(config::storage_root_path); - FileUtils::remove_all(string(getenv("DORIS_HOME")) + "/" + UNUSED_PREFIX); + EXPECT_TRUE(io::global_local_filesystem()->delete_directory(config::storage_root_path).ok()); + EXPECT_TRUE(io::global_local_filesystem() + ->delete_directory(string(getenv("DORIS_HOME")) + "/" + UNUSED_PREFIX) + .ok()); if (k_engine != nullptr) { k_engine->stop(); delete k_engine; @@ -253,8 +259,9 @@ class TestDeleteConditionHandler : public testing::Test { char buffer[MAX_PATH_LEN]; EXPECT_NE(getcwd(buffer, MAX_PATH_LEN), nullptr); config::storage_root_path = string(buffer) + "/data_delete_condition"; - FileUtils::remove_all(config::storage_root_path); - EXPECT_TRUE(FileUtils::create_dir(config::storage_root_path).ok()); + EXPECT_TRUE(io::global_local_filesystem() + ->delete_and_create_directory(config::storage_root_path) + .ok()); // 1. Prepare for query split key. // create base tablet @@ -280,7 +287,8 @@ class TestDeleteConditionHandler : public testing::Test { dup_tablet.reset(); StorageEngine::instance()->tablet_manager()->drop_tablet(_create_tablet.tablet_id, _create_tablet.replica_id, false); - EXPECT_TRUE(FileUtils::remove_all(config::storage_root_path).ok()); + EXPECT_TRUE( + io::global_local_filesystem()->delete_directory(config::storage_root_path).ok()); } std::string _tablet_path; @@ -427,8 +435,9 @@ class TestDeleteConditionHandler2 : public testing::Test { char buffer[MAX_PATH_LEN]; EXPECT_NE(getcwd(buffer, MAX_PATH_LEN), nullptr); config::storage_root_path = string(buffer) + "/data_delete_condition"; - FileUtils::remove_all(config::storage_root_path); - EXPECT_TRUE(FileUtils::create_dir(config::storage_root_path).ok()); + EXPECT_TRUE(io::global_local_filesystem() + ->delete_and_create_directory(config::storage_root_path) + .ok()); // 1. Prepare for query split key. // create base tablet @@ -446,7 +455,8 @@ class TestDeleteConditionHandler2 : public testing::Test { tablet.reset(); k_engine->tablet_manager()->drop_tablet(_create_tablet.tablet_id, _create_tablet.replica_id, false); - EXPECT_TRUE(FileUtils::remove_all(config::storage_root_path).ok()); + EXPECT_TRUE( + io::global_local_filesystem()->delete_directory(config::storage_root_path).ok()); } std::string _tablet_path; @@ -799,8 +809,9 @@ class TestDeleteHandler : public testing::Test { char buffer[MAX_PATH_LEN]; EXPECT_NE(getcwd(buffer, MAX_PATH_LEN), nullptr); config::storage_root_path = string(buffer) + "/data_delete_condition"; - FileUtils::remove_all(config::storage_root_path); - EXPECT_TRUE(FileUtils::create_dir(config::storage_root_path).ok()); + EXPECT_TRUE(io::global_local_filesystem() + ->delete_and_create_directory(config::storage_root_path) + .ok()); // 1. Prepare for query split key. // create base tablet @@ -842,7 +853,8 @@ class TestDeleteHandler : public testing::Test { _delete_handler.finalize(); StorageEngine::instance()->tablet_manager()->drop_tablet(_create_tablet.tablet_id, _create_tablet.replica_id, false); - EXPECT_TRUE(FileUtils::remove_all(config::storage_root_path).ok()); + EXPECT_TRUE( + io::global_local_filesystem()->delete_directory(config::storage_root_path).ok()); } void init_rs_meta(RowsetMetaSharedPtr& pb1, int64_t start, int64_t end) { diff --git a/be/test/olap/delta_writer_test.cpp b/be/test/olap/delta_writer_test.cpp index c4206a5120dc00..768b79975ce95b 100644 --- a/be/test/olap/delta_writer_test.cpp +++ b/be/test/olap/delta_writer_test.cpp @@ -27,6 +27,7 @@ #include "gen_cpp/PaloInternalService_types.h" #include "gen_cpp/Types_types.h" #include "gen_cpp/internal_service.pb.h" +#include "io/fs/local_file_system.h" #include "olap/field.h" #include "olap/options.h" #include "olap/rowset/beta_rowset.h" @@ -37,7 +38,6 @@ #include "runtime/descriptor_helper.h" #include "runtime/exec_env.h" #include "runtime/mem_pool.h" -#include "util/file_utils.h" namespace doris { @@ -52,8 +52,7 @@ static void set_up() { char buffer[MAX_PATH_LEN]; EXPECT_NE(getcwd(buffer, MAX_PATH_LEN), nullptr); config::storage_root_path = std::string(buffer) + "/data_test"; - FileUtils::remove_all(config::storage_root_path); - FileUtils::create_dir(config::storage_root_path); + io::global_local_filesystem()->delete_and_create_directory(config::storage_root_path); std::vector paths; paths.emplace_back(config::storage_root_path, -1); @@ -74,7 +73,8 @@ static void tear_down() { k_engine = nullptr; } EXPECT_EQ(system("rm -rf ./data_test"), 0); - FileUtils::remove_all(std::string(getenv("DORIS_HOME")) + "/" + UNUSED_PREFIX); + io::global_local_filesystem()->delete_directory(std::string(getenv("DORIS_HOME")) + "/" + + UNUSED_PREFIX); } static void create_tablet_request(int64_t tablet_id, int32_t schema_hash, diff --git a/be/test/olap/engine_storage_migration_task_test.cpp b/be/test/olap/engine_storage_migration_task_test.cpp index 5d0e91eb6436b7..f862957a242a2b 100644 --- a/be/test/olap/engine_storage_migration_task_test.cpp +++ b/be/test/olap/engine_storage_migration_task_test.cpp @@ -27,6 +27,7 @@ #include "gen_cpp/PaloInternalService_types.h" #include "gen_cpp/Types_types.h" #include "gen_cpp/internal_service.pb.h" +#include "io/fs/local_file_system.h" #include "olap/delta_writer.h" #include "olap/field.h" #include "olap/options.h" @@ -37,7 +38,6 @@ #include "runtime/descriptor_helper.h" #include "runtime/exec_env.h" #include "runtime/mem_pool.h" -#include "util/file_utils.h" namespace doris { @@ -54,11 +54,8 @@ static void set_up() { path2 = std::string(buffer) + "/data_test_2"; config::storage_root_path = path1 + ";" + path2; config::min_file_descriptor_number = 1000; - FileUtils::remove_all(path1); - FileUtils::create_dir(path1); - - FileUtils::remove_all(path2); - FileUtils::create_dir(path2); + EXPECT_TRUE(io::global_local_filesystem()->delete_and_create_directory(path1).ok()); + EXPECT_TRUE(io::global_local_filesystem()->delete_and_create_directory(path2).ok()); std::vector paths; paths.emplace_back(path1, -1); paths.emplace_back(path2, -1); @@ -80,7 +77,9 @@ static void tear_down() { } EXPECT_EQ(system("rm -rf ./data_test_1"), 0); EXPECT_EQ(system("rm -rf ./data_test_2"), 0); - FileUtils::remove_all(std::string(getenv("DORIS_HOME")) + "/" + UNUSED_PREFIX); + EXPECT_TRUE(io::global_local_filesystem() + ->delete_directory(std::string(getenv("DORIS_HOME")) + "/" + UNUSED_PREFIX) + .ok()); } static void create_tablet_request_with_sequence_col(int64_t tablet_id, int32_t schema_hash, diff --git a/be/test/olap/file_utils_test.cpp b/be/test/olap/file_utils_test.cpp deleted file mode 100644 index 85634182d944f8..00000000000000 --- a/be/test/olap/file_utils_test.cpp +++ /dev/null @@ -1,236 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, -// software distributed under the License is distributed on an -// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -// KIND, either express or implied. See the License for the -// specific language governing permissions and limitations -// under the License. - -#include "util/file_utils.h" - -#include -#include -#include -#include -#include - -#include "common/status.h" -#include "env/env.h" -#include "gmock/gmock.h" -#include "gtest/gtest.h" -#include "olap/file_header.h" - -using ::testing::_; -using ::testing::Return; -using ::testing::SetArgPointee; -using std::string; - -namespace doris { - -class FileUtilsTest : public testing::Test { -public: - // create a mock cgroup folder - virtual void SetUp() { - EXPECT_FALSE(std::filesystem::exists(_s_test_data_path)); - // create a mock cgroup path - EXPECT_TRUE(std::filesystem::create_directory(_s_test_data_path)); - } - void save_string_file(const std::filesystem::path& filename, const std::string& content) { - std::ofstream file; - file.exceptions(std::ofstream::failbit | std::ofstream::badbit); - file.open(filename, std::ios_base::binary); - file.write(content.c_str(), content.size()); - } - - // delete the mock cgroup folder - virtual void TearDown() { EXPECT_TRUE(std::filesystem::remove_all(_s_test_data_path)); } - - static std::string _s_test_data_path; -}; - -std::string FileUtilsTest::_s_test_data_path = "./file_utils_testxxxx123"; - -TEST_F(FileUtilsTest, TestCopyFile) { - std::string src_file_name = _s_test_data_path + "/abcd12345.txt"; - // create a file using open - std::shared_ptr fs = io::global_local_filesystem(); - io::FileWriterPtr file_writer; - EXPECT_TRUE(fs->create_file(src_file_name, &file_writer).ok()); - - char large_bytes2[(1 << 12)]; - memset(&large_bytes2, 0, sizeof(large_bytes2)); - int i = 0; - while (i < 1 << 10) { - EXPECT_TRUE(file_writer->append({large_bytes2, sizeof(large_bytes2)}).ok()); - ++i; - } - EXPECT_TRUE(file_writer->append({large_bytes2, 13}).ok()); - EXPECT_TRUE(file_writer->close().ok()); - - std::string dst_file_name = _s_test_data_path + "/abcd123456.txt"; - FileUtils::copy_file(src_file_name, dst_file_name); - - io::FileReaderSPtr file_reader; - EXPECT_TRUE(fs->open_file(dst_file_name, &file_reader).ok()); - EXPECT_EQ(4194317, file_reader->size()); -} - -TEST_F(FileUtilsTest, TestRemove) { - // remove_all - EXPECT_TRUE(FileUtils::remove_all("./file_test").ok()); - EXPECT_FALSE(FileUtils::check_exist("./file_test")); - - EXPECT_TRUE(FileUtils::create_dir("./file_test/123/456/789").ok()); - EXPECT_TRUE(FileUtils::create_dir("./file_test/abc/def/zxc").ok()); - EXPECT_TRUE(FileUtils::create_dir("./file_test/abc/123").ok()); - - save_string_file("./file_test/s1", "123"); - save_string_file("./file_test/123/s2", "123"); - - EXPECT_TRUE(FileUtils::check_exist("./file_test")); - EXPECT_TRUE(FileUtils::remove_all("./file_test").ok()); - EXPECT_FALSE(FileUtils::check_exist("./file_test")); - - // remove - EXPECT_TRUE(FileUtils::create_dir("./file_test/abc/123").ok()); - save_string_file("./file_test/abc/123/s2", "123"); - - EXPECT_TRUE(FileUtils::check_exist("./file_test/abc/123/s2")); - EXPECT_TRUE(FileUtils::remove("./file_test/abc/123/s2").ok()); - EXPECT_FALSE(FileUtils::check_exist("./file_test/abc/123/s2")); - - EXPECT_TRUE(FileUtils::check_exist("./file_test/abc/123")); - EXPECT_TRUE(FileUtils::remove("./file_test/abc/123/").ok()); - EXPECT_FALSE(FileUtils::check_exist("./file_test/abc/123")); - - EXPECT_TRUE(FileUtils::remove_all("./file_test").ok()); - EXPECT_FALSE(FileUtils::check_exist("./file_test")); - - // remove paths - EXPECT_TRUE(FileUtils::create_dir("./file_test/123/456/789").ok()); - EXPECT_TRUE(FileUtils::create_dir("./file_test/abc/def/zxc").ok()); - save_string_file("./file_test/s1", "123"); - save_string_file("./file_test/s2", "123"); - - std::vector ps; - ps.push_back("./file_test/123/456/789"); - ps.push_back("./file_test/123/456"); - ps.push_back("./file_test/123"); - - EXPECT_TRUE(FileUtils::check_exist("./file_test/123")); - EXPECT_TRUE(FileUtils::remove_paths(ps).ok()); - EXPECT_FALSE(FileUtils::check_exist("./file_test/123")); - - ps.clear(); - ps.push_back("./file_test/s1"); - ps.push_back("./file_test/abc/def"); - - EXPECT_TRUE(FileUtils::remove_paths(ps).ok()); - EXPECT_FALSE(FileUtils::check_exist("./file_test/s1")); - EXPECT_FALSE(FileUtils::check_exist("./file_test/abc/def/")); - - ps.clear(); - ps.push_back("./file_test/abc/def/zxc"); - ps.push_back("./file_test/s2"); - ps.push_back("./file_test/abc/def"); - ps.push_back("./file_test/abc"); - - EXPECT_TRUE(FileUtils::remove_paths(ps).ok()); - EXPECT_FALSE(FileUtils::check_exist("./file_test/s2")); - EXPECT_FALSE(FileUtils::check_exist("./file_test/abc")); - - EXPECT_TRUE(FileUtils::remove_all("./file_test").ok()); -} - -TEST_F(FileUtilsTest, TestCreateDir) { - // normal - std::string path = "./file_test/123/456/789"; - FileUtils::remove_all("./file_test"); - EXPECT_FALSE(FileUtils::check_exist(path)); - - EXPECT_TRUE(FileUtils::create_dir(path).ok()); - - EXPECT_TRUE(FileUtils::check_exist(path)); - EXPECT_TRUE(FileUtils::is_dir("./file_test")); - EXPECT_TRUE(FileUtils::is_dir("./file_test/123")); - EXPECT_TRUE(FileUtils::is_dir("./file_test/123/456")); - EXPECT_TRUE(FileUtils::is_dir("./file_test/123/456/789")); - - FileUtils::remove_all("./file_test"); - - // normal - path = "./file_test/123/456/789/"; - FileUtils::remove_all("./file_test"); - EXPECT_FALSE(FileUtils::check_exist(path)); - - EXPECT_TRUE(FileUtils::create_dir(path).ok()); - - EXPECT_TRUE(FileUtils::check_exist(path)); - EXPECT_TRUE(FileUtils::is_dir("./file_test")); - EXPECT_TRUE(FileUtils::is_dir("./file_test/123")); - EXPECT_TRUE(FileUtils::is_dir("./file_test/123/456")); - EXPECT_TRUE(FileUtils::is_dir("./file_test/123/456/789")); - - FileUtils::remove_all("./file_test"); - - // absolute path; - std::string real_path; - Env::Default()->canonicalize(".", &real_path); - EXPECT_TRUE(FileUtils::create_dir(real_path + "/file_test/absolute/path/123/asdf").ok()); - EXPECT_TRUE(FileUtils::is_dir("./file_test/absolute/path/123/asdf")); - FileUtils::remove_all("./file_test"); -} - -TEST_F(FileUtilsTest, TestListDirsFiles) { - std::string path = "./file_test/"; - FileUtils::remove_all(path); - FileUtils::create_dir("./file_test/1"); - FileUtils::create_dir("./file_test/2"); - FileUtils::create_dir("./file_test/3"); - FileUtils::create_dir("./file_test/4"); - FileUtils::create_dir("./file_test/5"); - - std::set dirs; - std::set files; - - EXPECT_TRUE(FileUtils::list_dirs_files("./file_test", &dirs, &files, Env::Default()).ok()); - EXPECT_EQ(5, dirs.size()); - EXPECT_EQ(0, files.size()); - - dirs.clear(); - files.clear(); - - EXPECT_TRUE(FileUtils::list_dirs_files("./file_test", &dirs, nullptr, Env::Default()).ok()); - EXPECT_EQ(5, dirs.size()); - EXPECT_EQ(0, files.size()); - - save_string_file("./file_test/f1", "just test"); - save_string_file("./file_test/f2", "just test"); - save_string_file("./file_test/f3", "just test"); - - dirs.clear(); - files.clear(); - - EXPECT_TRUE(FileUtils::list_dirs_files("./file_test", &dirs, &files, Env::Default()).ok()); - EXPECT_EQ(5, dirs.size()); - EXPECT_EQ(3, files.size()); - - dirs.clear(); - files.clear(); - - EXPECT_TRUE(FileUtils::list_dirs_files("./file_test", nullptr, &files, Env::Default()).ok()); - EXPECT_EQ(0, dirs.size()); - EXPECT_EQ(3, files.size()); - - FileUtils::remove_all(path); -} -} // namespace doris diff --git a/be/test/olap/memtable_flush_executor_test.cpp b/be/test/olap/memtable_flush_executor_test.cpp index d8244f507831f6..4eed9c2c7737de 100644 --- a/be/test/olap/memtable_flush_executor_test.cpp +++ b/be/test/olap/memtable_flush_executor_test.cpp @@ -25,6 +25,7 @@ #include "gen_cpp/Descriptors_types.h" #include "gen_cpp/PaloInternalService_types.h" #include "gen_cpp/Types_types.h" +#include "io/fs/local_file_system.h" #include "olap/delta_writer.h" #include "olap/field.h" #include "olap/memtable.h" @@ -36,7 +37,6 @@ #include "olap/utils.h" #include "runtime/descriptor_helper.h" #include "runtime/exec_env.h" -#include "util/file_utils.h" namespace doris { @@ -47,8 +47,9 @@ void set_up() { char buffer[1024]; getcwd(buffer, 1024); config::storage_root_path = std::string(buffer) + "/flush_test"; - FileUtils::remove_all(config::storage_root_path); - FileUtils::create_dir(config::storage_root_path); + EXPECT_TRUE(io::global_local_filesystem() + ->delete_and_create_directory(config::storage_root_path) + .ok()); std::vector paths; paths.emplace_back(config::storage_root_path, -1); @@ -67,7 +68,9 @@ void tear_down() { delete k_engine; k_engine = nullptr; system("rm -rf ./flush_test"); - FileUtils::remove_all(std::string(getenv("DORIS_HOME")) + "/" + UNUSED_PREFIX); + EXPECT_TRUE(io::global_local_filesystem() + ->delete_directory(std::string(getenv("DORIS_HOME")) + "/" + UNUSED_PREFIX) + .ok()); } Schema create_schema() { diff --git a/be/test/olap/olap_meta_test.cpp b/be/test/olap/olap_meta_test.cpp index 5d5339c0dff9c8..e668501dd59813 100644 --- a/be/test/olap/olap_meta_test.cpp +++ b/be/test/olap/olap_meta_test.cpp @@ -23,8 +23,8 @@ #include #include +#include "io/fs/local_file_system.h" #include "olap/olap_define.h" -#include "util/file_utils.h" using std::string; @@ -35,8 +35,7 @@ class OlapMetaTest : public testing::Test { public: virtual void SetUp() { _root_path = "./ut_dir/olap_meta_test"; - FileUtils::remove_all(_root_path); - FileUtils::create_dir(_root_path); + EXPECT_TRUE(io::global_local_filesystem()->delete_and_create_directory(_root_path).ok()); _meta = new OlapMeta(_root_path); Status s = _meta->init(); @@ -46,7 +45,7 @@ class OlapMetaTest : public testing::Test { virtual void TearDown() { delete _meta; - FileUtils::remove_all(_root_path); + EXPECT_TRUE(io::global_local_filesystem()->delete_directory(_root_path).ok()); } private: diff --git a/be/test/olap/ordered_data_compaction_test.cpp b/be/test/olap/ordered_data_compaction_test.cpp index 02ad39e2fbe2ab..df5bf117c1b179 100644 --- a/be/test/olap/ordered_data_compaction_test.cpp +++ b/be/test/olap/ordered_data_compaction_test.cpp @@ -32,7 +32,6 @@ #include "olap/schema.h" #include "olap/tablet_schema.h" #include "olap/tablet_schema_helper.h" -#include "util/file_utils.h" #include "vec/olap/vertical_block_reader.h" #include "vec/olap/vertical_merge_iterator.h" @@ -49,12 +48,11 @@ class OrderedDataCompactionTest : public ::testing::Test { char buffer[MAX_PATH_LEN]; EXPECT_NE(getcwd(buffer, MAX_PATH_LEN), nullptr); absolute_dir = std::string(buffer) + kTestDir; + EXPECT_TRUE(io::global_local_filesystem()->delete_and_create_directory(absolute_dir).ok()); + EXPECT_TRUE(io::global_local_filesystem() + ->create_directory(absolute_dir + "/tablet_path") + .ok()); - if (FileUtils::check_exist(absolute_dir)) { - EXPECT_TRUE(FileUtils::remove_all(absolute_dir).ok()); - } - EXPECT_TRUE(FileUtils::create_dir(absolute_dir).ok()); - EXPECT_TRUE(FileUtils::create_dir(absolute_dir + "/tablet_path").ok()); _data_dir = std::make_unique(absolute_dir); _data_dir->update_capacity(); doris::EngineOptions options; @@ -65,9 +63,7 @@ class OrderedDataCompactionTest : public ::testing::Test { config::ordered_data_compaction_min_segment_size = 10; } void TearDown() override { - if (FileUtils::check_exist(absolute_dir)) { - EXPECT_TRUE(FileUtils::remove_all(absolute_dir).ok()); - } + EXPECT_TRUE(io::global_local_filesystem()->delete_directory(absolute_dir).ok()); if (k_engine != nullptr) { k_engine->stop(); delete k_engine; @@ -389,7 +385,7 @@ TEST_F(OrderedDataCompactionTest, test_01) { TabletSchemaSPtr tablet_schema = create_schema(); TabletSharedPtr tablet = create_tablet(*tablet_schema, false, 10000, false); - EXPECT_TRUE(FileUtils::create_dir(tablet->tablet_path()).ok()); + EXPECT_TRUE(io::global_local_filesystem()->create_directory(tablet->tablet_path()).ok()); // create input rowset vector input_rowsets; SegmentsOverlapPB new_overlap = NONOVERLAPPING; @@ -457,4 +453,4 @@ TEST_F(OrderedDataCompactionTest, test_01) { } } // namespace vectorized -} // namespace doris \ No newline at end of file +} // namespace doris diff --git a/be/test/olap/primary_key_index_test.cpp b/be/test/olap/primary_key_index_test.cpp index a739dfcaf7e7d2..88ef7d284b2db6 100644 --- a/be/test/olap/primary_key_index_test.cpp +++ b/be/test/olap/primary_key_index_test.cpp @@ -20,8 +20,8 @@ #include #include "io/fs/file_writer.h" +#include "io/fs/fs_utils.h" #include "io/fs/local_file_system.h" -#include "util/file_utils.h" #include "vec/data_types/data_type_factory.hpp" namespace doris { @@ -30,15 +30,10 @@ using namespace ErrorCode; class PrimaryKeyIndexTest : public testing::Test { public: void SetUp() override { - if (FileUtils::check_exist(kTestDir)) { - EXPECT_TRUE(FileUtils::remove_all(kTestDir).ok()); - } - EXPECT_TRUE(FileUtils::create_dir(kTestDir).ok()); + EXPECT_TRUE(io::global_local_filesystem()->delete_and_create_directory(kTestDir).ok()); } void TearDown() override { - if (FileUtils::check_exist(kTestDir)) { - EXPECT_TRUE(FileUtils::remove_all(kTestDir).ok()); - } + EXPECT_TRUE(io::global_local_filesystem()->delete_directory(kTestDir).ok()); } private: @@ -67,7 +62,7 @@ TEST_F(PrimaryKeyIndexTest, builder) { EXPECT_TRUE(file_writer->close().ok()); EXPECT_EQ(num_rows, builder.num_rows()); - FilePathDesc path_desc(filename); + io::FilePathDesc path_desc(filename); PrimaryKeyIndexReader index_reader; io::FileReaderSPtr file_reader; EXPECT_TRUE(fs->open_file(filename, &file_reader).ok()); diff --git a/be/test/olap/remote_rowset_gc_test.cpp b/be/test/olap/remote_rowset_gc_test.cpp index 6db84ba17112f8..3ff05f01512b1d 100644 --- a/be/test/olap/remote_rowset_gc_test.cpp +++ b/be/test/olap/remote_rowset_gc_test.cpp @@ -30,7 +30,6 @@ #include "olap/storage_policy.h" #include "olap/tablet.h" #include "runtime/descriptor_helper.h" -#include "util/file_utils.h" #include "util/s3_util.h" namespace doris { @@ -70,8 +69,9 @@ class RemoteRowsetGcTest : public testing::Test { config::storage_root_path = std::string(buffer) + "/" + kTestDir; config::min_file_descriptor_number = 1000; - FileUtils::remove_all(config::storage_root_path); - FileUtils::create_dir(config::storage_root_path); + EXPECT_TRUE(io::global_local_filesystem() + ->delete_and_create_directory(config::storage_root_path) + .ok()); std::vector paths {{config::storage_root_path, -1}}; diff --git a/be/test/olap/rowid_conversion_test.cpp b/be/test/olap/rowid_conversion_test.cpp index 175bdec94bee45..9c615371864a21 100644 --- a/be/test/olap/rowid_conversion_test.cpp +++ b/be/test/olap/rowid_conversion_test.cpp @@ -19,6 +19,7 @@ #include +#include "io/fs/local_file_system.h" #include "olap/data_dir.h" #include "olap/delete_handler.h" #include "olap/merger.h" @@ -31,7 +32,6 @@ #include "olap/rowset/rowset_writer_context.h" #include "olap/storage_engine.h" #include "olap/tablet_schema.h" -#include "util/file_utils.h" namespace doris { using namespace ErrorCode; @@ -45,21 +45,17 @@ class TestRowIdConversion : public testing::TestWithParamdelete_and_create_directory(absolute_dir).ok()); + EXPECT_TRUE(io::global_local_filesystem() + ->create_directory(absolute_dir + "/tablet_path") + .ok()); doris::EngineOptions options; k_engine = new StorageEngine(options); StorageEngine::_s_instance = k_engine; } void TearDown() override { - if (FileUtils::check_exist(absolute_dir)) { - EXPECT_TRUE(FileUtils::remove_all(absolute_dir).ok()); - } + EXPECT_TRUE(io::global_local_filesystem()->delete_directory(absolute_dir).ok()); if (k_engine != nullptr) { k_engine->stop(); delete k_engine; diff --git a/be/test/olap/rowset/beta_rowset_test.cpp b/be/test/olap/rowset/beta_rowset_test.cpp index 91b2b069543246..703986494a5b9a 100644 --- a/be/test/olap/rowset/beta_rowset_test.cpp +++ b/be/test/olap/rowset/beta_rowset_test.cpp @@ -23,6 +23,7 @@ #include "gen_cpp/olap_file.pb.h" #include "gtest/gtest.h" +#include "io/fs/local_file_system.h" #include "io/fs/s3_file_system.h" #include "olap/comparison_predicate.h" #include "olap/data_dir.h" @@ -38,7 +39,6 @@ #include "runtime/exec_env.h" #include "runtime/mem_pool.h" #include "runtime/memory/mem_tracker.h" -#include "util/file_utils.h" #include "util/slice.h" using std::string; @@ -65,8 +65,9 @@ class BetaRowsetTest : public testing::Test { EXPECT_NE(getcwd(buffer, MAX_PATH_LEN), nullptr); config::storage_root_path = std::string(buffer) + "/data_test"; - EXPECT_TRUE(FileUtils::remove_all(config::storage_root_path).ok()); - EXPECT_TRUE(FileUtils::create_dir(config::storage_root_path).ok()); + EXPECT_TRUE(io::global_local_filesystem() + ->delete_and_create_directory(config::storage_root_path) + .ok()); std::vector paths; paths.emplace_back(config::storage_root_path, -1); @@ -79,7 +80,7 @@ class BetaRowsetTest : public testing::Test { ExecEnv* exec_env = doris::ExecEnv::GetInstance(); exec_env->set_storage_engine(k_engine); - EXPECT_TRUE(FileUtils::create_dir(kTestDir).ok()); + EXPECT_TRUE(io::global_local_filesystem()->create_directory(kTestDir).ok()); } static void TearDownTestSuite() { diff --git a/be/test/olap/rowset/segment_v2/bitmap_index_test.cpp b/be/test/olap/rowset/segment_v2/bitmap_index_test.cpp index fc1e9cd62f25f7..b9548b49571d51 100644 --- a/be/test/olap/rowset/segment_v2/bitmap_index_test.cpp +++ b/be/test/olap/rowset/segment_v2/bitmap_index_test.cpp @@ -21,7 +21,6 @@ #include #include "common/logging.h" -#include "env/env.h" #include "io/fs/file_reader.h" #include "io/fs/file_system.h" #include "io/fs/file_writer.h" @@ -32,7 +31,6 @@ #include "olap/rowset/segment_v2/bitmap_index_writer.h" #include "olap/types.h" #include "testutil/test_util.h" -#include "util/file_utils.h" namespace doris { @@ -46,15 +44,10 @@ class BitmapIndexTest : public testing::Test { const std::string kTestDir = "./ut_dir/bitmap_index_test"; void SetUp() override { - if (FileUtils::check_exist(kTestDir)) { - EXPECT_TRUE(FileUtils::remove_all(kTestDir).ok()); - } - EXPECT_TRUE(FileUtils::create_dir(kTestDir).ok()); + EXPECT_TRUE(io::global_local_filesystem()->delete_and_create_directory(kTestDir).ok()); } void TearDown() override { - if (FileUtils::check_exist(kTestDir)) { - EXPECT_TRUE(FileUtils::remove_all(kTestDir).ok()); - } + EXPECT_TRUE(io::global_local_filesystem()->delete_directory(kTestDir).ok()); } }; diff --git a/be/test/olap/rowset/segment_v2/block_bloom_filter_test.cpp b/be/test/olap/rowset/segment_v2/block_bloom_filter_test.cpp index e48612253035c5..62651949116633 100644 --- a/be/test/olap/rowset/segment_v2/block_bloom_filter_test.cpp +++ b/be/test/olap/rowset/segment_v2/block_bloom_filter_test.cpp @@ -20,6 +20,7 @@ #include #include "olap/rowset/segment_v2/bloom_filter.h" +#include "util/slice.h" namespace doris { namespace segment_v2 { diff --git a/be/test/olap/rowset/segment_v2/bloom_filter_index_reader_writer_test.cpp b/be/test/olap/rowset/segment_v2/bloom_filter_index_reader_writer_test.cpp index 661eb497eba4df..c66b8e91f99a5d 100644 --- a/be/test/olap/rowset/segment_v2/bloom_filter_index_reader_writer_test.cpp +++ b/be/test/olap/rowset/segment_v2/bloom_filter_index_reader_writer_test.cpp @@ -18,7 +18,6 @@ #include #include "common/logging.h" -#include "env/env.h" #include "io/fs/file_system.h" #include "io/fs/file_writer.h" #include "io/fs/local_file_system.h" @@ -28,7 +27,6 @@ #include "olap/rowset/segment_v2/bloom_filter_index_reader.h" #include "olap/rowset/segment_v2/bloom_filter_index_writer.h" #include "olap/types.h" -#include "util/file_utils.h" namespace doris { namespace segment_v2 { @@ -38,15 +36,10 @@ const std::string dname = "./ut_dir/bloom_filter_index_reader_writer_test"; class BloomFilterIndexReaderWriterTest : public testing::Test { public: void SetUp() override { - if (FileUtils::check_exist(dname)) { - EXPECT_TRUE(FileUtils::remove_all(dname).ok()); - } - EXPECT_TRUE(FileUtils::create_dir(dname).ok()); + EXPECT_TRUE(io::global_local_filesystem()->delete_and_create_directory(dname).ok()); } void TearDown() override { - if (FileUtils::check_exist(dname)) { - EXPECT_TRUE(FileUtils::remove_all(dname).ok()); - } + EXPECT_TRUE(io::global_local_filesystem()->delete_directory(dname).ok()); } }; diff --git a/be/test/olap/rowset/segment_v2/column_reader_writer_test.cpp b/be/test/olap/rowset/segment_v2/column_reader_writer_test.cpp index f3c4f395e43aa6..5b4ad295408958 100644 --- a/be/test/olap/rowset/segment_v2/column_reader_writer_test.cpp +++ b/be/test/olap/rowset/segment_v2/column_reader_writer_test.cpp @@ -19,7 +19,6 @@ #include -#include "env/env.h" #include "io/fs/file_system.h" #include "io/fs/file_writer.h" #include "io/fs/local_file_system.h" @@ -32,7 +31,6 @@ #include "olap/types.h" #include "runtime/mem_pool.h" #include "testutil/test_util.h" -#include "util/file_utils.h" #include "vec/core/types.h" #include "vec/data_types/data_type_date.h" #include "vec/data_types/data_type_date_time.h" @@ -56,16 +54,11 @@ class ColumnReaderWriterTest : public testing::Test { protected: void SetUp() override { config::disable_storage_page_cache = true; - if (FileUtils::check_exist(TEST_DIR)) { - EXPECT_TRUE(FileUtils::remove_all(TEST_DIR).ok()); - } - EXPECT_TRUE(FileUtils::create_dir(TEST_DIR).ok()); + EXPECT_TRUE(io::global_local_filesystem()->delete_and_create_directory(TEST_DIR).ok()); } void TearDown() override { - if (FileUtils::check_exist(TEST_DIR)) { - EXPECT_TRUE(FileUtils::remove_all(TEST_DIR).ok()); - } + EXPECT_TRUE(io::global_local_filesystem()->delete_directory(TEST_DIR).ok()); } private: diff --git a/be/test/olap/rowset/segment_v2/inverted_index_searcher_cache_test.cpp b/be/test/olap/rowset/segment_v2/inverted_index_searcher_cache_test.cpp index 87ebd8066459b4..efd0bc96caa2f5 100644 --- a/be/test/olap/rowset/segment_v2/inverted_index_searcher_cache_test.cpp +++ b/be/test/olap/rowset/segment_v2/inverted_index_searcher_cache_test.cpp @@ -19,7 +19,6 @@ #include "io/fs/local_file_system.h" #include "olap/rowset/segment_v2/inverted_index_cache.h" -#include "util/file_utils.h" #include "util/time.h" namespace doris { @@ -36,15 +35,10 @@ class InvertedIndexSearcherCacheTest : public testing::Test { const std::string kTestDir = "./ut_dir/invertedInvertedIndexSearcherCache::instance()_test"; void SetUp() override { - if (FileUtils::check_exist(kTestDir)) { - EXPECT_TRUE(FileUtils::remove_all(kTestDir).ok()); - } - EXPECT_TRUE(FileUtils::create_dir(kTestDir).ok()); + EXPECT_TRUE(io::global_local_filesystem()->delete_and_create_directory(kTestDir).ok()); } void TearDown() override { - if (FileUtils::check_exist(kTestDir)) { - EXPECT_TRUE(FileUtils::remove_all(kTestDir).ok()); - } + EXPECT_TRUE(io::global_local_filesystem()->delete_directory(kTestDir).ok()); } }; @@ -365,4 +359,4 @@ TEST_F(InvertedIndexSearcherCacheTest, remove_element_only_in_table) { } } // namespace segment_v2 -} // namespace doris \ No newline at end of file +} // namespace doris diff --git a/be/test/olap/rowset/segment_v2/ordinal_page_index_test.cpp b/be/test/olap/rowset/segment_v2/ordinal_page_index_test.cpp index cb7d7309730452..0ee758bd15a161 100644 --- a/be/test/olap/rowset/segment_v2/ordinal_page_index_test.cpp +++ b/be/test/olap/rowset/segment_v2/ordinal_page_index_test.cpp @@ -24,13 +24,11 @@ #include #include "common/logging.h" -#include "env/env.h" #include "io/fs/file_reader.h" #include "io/fs/file_system.h" #include "io/fs/file_writer.h" #include "io/fs/local_file_system.h" #include "olap/page_cache.h" -#include "util/file_utils.h" namespace doris { namespace segment_v2 { @@ -40,15 +38,10 @@ class OrdinalPageIndexTest : public testing::Test { const std::string kTestDir = "./ut_dir/ordinal_page_index_test"; void SetUp() override { - if (FileUtils::check_exist(kTestDir)) { - EXPECT_TRUE(FileUtils::remove_all(kTestDir).ok()); - } - EXPECT_TRUE(FileUtils::create_dir(kTestDir).ok()); + EXPECT_TRUE(io::global_local_filesystem()->delete_and_create_directory(kTestDir).ok()); } void TearDown() override { - if (FileUtils::check_exist(kTestDir)) { - EXPECT_TRUE(FileUtils::remove_all(kTestDir).ok()); - } + EXPECT_TRUE(io::global_local_filesystem()->delete_directory(kTestDir).ok()); } }; diff --git a/be/test/olap/rowset/segment_v2/zone_map_index_test.cpp b/be/test/olap/rowset/segment_v2/zone_map_index_test.cpp index f7607d8ca9c0b5..99a7fbdb7fbb09 100644 --- a/be/test/olap/rowset/segment_v2/zone_map_index_test.cpp +++ b/be/test/olap/rowset/segment_v2/zone_map_index_test.cpp @@ -23,13 +23,11 @@ #include #include "common/config.h" -#include "env/env.h" #include "io/fs/file_system.h" #include "io/fs/file_writer.h" #include "io/fs/local_file_system.h" #include "olap/page_cache.h" #include "olap/tablet_schema_helper.h" -#include "util/file_utils.h" namespace doris { namespace segment_v2 { @@ -39,15 +37,10 @@ class ColumnZoneMapTest : public testing::Test { const std::string kTestDir = "./ut_dir/zone_map_index_test"; void SetUp() override { - if (FileUtils::check_exist(kTestDir)) { - EXPECT_TRUE(FileUtils::remove_all(kTestDir).ok()); - } - EXPECT_TRUE(FileUtils::create_dir(kTestDir).ok()); + EXPECT_TRUE(io::global_local_filesystem()->delete_and_create_directory(kTestDir).ok()); } void TearDown() override { - if (FileUtils::check_exist(kTestDir)) { - EXPECT_TRUE(FileUtils::remove_all(kTestDir).ok()); - } + EXPECT_TRUE(io::global_local_filesystem()->delete_directory(kTestDir).ok()); } void test_string(std::string testname, Field* field) { diff --git a/be/test/olap/segcompaction_test.cpp b/be/test/olap/segcompaction_test.cpp index 4b8e67bca47003..ee834315269a09 100644 --- a/be/test/olap/segcompaction_test.cpp +++ b/be/test/olap/segcompaction_test.cpp @@ -26,6 +26,7 @@ #include "env/env_posix.h" #include "gen_cpp/AgentService_types.h" #include "gen_cpp/olap_file.pb.h" +#include "io/fs/local_file_system.h" #include "olap/data_dir.h" #include "olap/row_cursor.h" #include "olap/rowset/beta_rowset_reader.h" @@ -41,7 +42,6 @@ #include "runtime/exec_env.h" #include "runtime/mem_pool.h" #include "runtime/memory/mem_tracker.h" -#include "util/file_utils.h" #include "util/slice.h" namespace doris { @@ -67,8 +67,9 @@ class SegCompactionTest : public testing::Test { EXPECT_NE(getcwd(buffer, MAX_PATH_LEN), nullptr); config::storage_root_path = std::string(buffer) + "/data_test"; - EXPECT_TRUE(FileUtils::remove_all(config::storage_root_path).ok()); - EXPECT_TRUE(FileUtils::create_dir(config::storage_root_path).ok()); + EXPECT_TRUE(io::global_local_filesystem() + ->delete_and_create_directory(config::storage_root_path) + .ok()); std::vector paths; paths.emplace_back(config::storage_root_path, -1); @@ -81,7 +82,7 @@ class SegCompactionTest : public testing::Test { ExecEnv* exec_env = doris::ExecEnv::GetInstance(); exec_env->set_storage_engine(l_engine); - EXPECT_TRUE(FileUtils::create_dir(lTestDir).ok()); + EXPECT_TRUE(io::global_local_filesystem()->create_directory(lTestDir).ok()); l_engine->start_bg_threads(); } diff --git a/be/test/olap/tablet_cooldown_test.cpp b/be/test/olap/tablet_cooldown_test.cpp index d43e56164dd8ae..d4a07c8dfd5341 100644 --- a/be/test/olap/tablet_cooldown_test.cpp +++ b/be/test/olap/tablet_cooldown_test.cpp @@ -34,7 +34,6 @@ #include "olap/storage_policy.h" #include "olap/tablet.h" #include "runtime/descriptor_helper.h" -#include "util/file_utils.h" #include "util/s3_util.h" namespace doris { @@ -109,7 +108,7 @@ class RemoteFileSystemMock : public io::RemoteFileSystem { return Status::OK(); } - Status create_directory_impl(const Path& path) override { + Status create_directory_impl(const Path& path, bool failed_if_exists) override { return _local_fs->create_directory(get_remote_path(path)); } @@ -132,7 +131,7 @@ class RemoteFileSystemMock : public io::RemoteFileSystem { return _local_fs->exists(get_remote_path(path), res); } - Status file_size_impl(const Path& path, size_t* file_size) const override { + Status file_size_impl(const Path& path, int64_t* file_size) const override { return _local_fs->file_size(get_remote_path(path), file_size); } @@ -174,7 +173,8 @@ class RemoteFileSystemMock : public io::RemoteFileSystem { return Status::OK(); } - Status open_file_internal(const Path& file, io::FileReaderSPtr* reader) override { + Status open_file_internal(const Path& file, int64_t file_size, + io::FileReaderSPtr* reader) override { return _local_fs->open_file(get_remote_path(file), io::FileReaderOptions::DEFAULT, reader); } @@ -211,10 +211,15 @@ class TabletCooldownTest : public testing::Test { config::storage_root_path = std::string(buffer) + "/" + kTestDir; config::min_file_descriptor_number = 1000; - FileUtils::remove_all(config::storage_root_path); - FileUtils::create_dir(config::storage_root_path); - FileUtils::create_dir(get_remote_path(fmt::format("data/{}", kTabletId))); - FileUtils::create_dir(get_remote_path(fmt::format("data/{}", kTabletId2))); + EXPECT_TRUE(io::global_local_filesystem() + ->delete_and_create_directory(config::storage_root_path) + .ok()); + EXPECT_TRUE(io::global_local_filesystem() + ->create_directory(get_remote_path(fmt::format("data/{}", kTabletId))) + .ok()); + EXPECT_TRUE(io::global_local_filesystem() + ->create_directory(get_remote_path(fmt::format("data/{}", kTabletId2))) + .ok()); std::vector paths {{config::storage_root_path, -1}}; diff --git a/be/test/olap/tablet_mgr_test.cpp b/be/test/olap/tablet_mgr_test.cpp index d7297fc3b8e7b6..22d277ebe3a72a 100644 --- a/be/test/olap/tablet_mgr_test.cpp +++ b/be/test/olap/tablet_mgr_test.cpp @@ -22,10 +22,10 @@ #include "gmock/gmock.h" #include "gtest/gtest.h" +#include "io/fs/local_file_system.h" #include "olap/storage_engine.h" #include "olap/tablet_meta_manager.h" #include "olap/txn_manager.h" -#include "util/file_utils.h" using ::testing::_; using ::testing::Return; @@ -38,9 +38,10 @@ class TabletMgrTest : public testing::Test { public: virtual void SetUp() { _engine_data_path = "./be/test/olap/test_data/converter_test_data/tmp"; - std::filesystem::remove_all(_engine_data_path); - FileUtils::create_dir(_engine_data_path); - FileUtils::create_dir(_engine_data_path + "/meta"); + EXPECT_TRUE( + io::global_local_filesystem()->delete_and_create_directory(_engine_data_path).ok()); + EXPECT_TRUE( + io::global_local_filesystem()->create_directory(_engine_data_path + "/meta").ok()); config::tablet_map_shard_size = 1; config::txn_map_shard_size = 1; @@ -56,9 +57,7 @@ class TabletMgrTest : public testing::Test { virtual void TearDown() { SAFE_DELETE(_data_dir); - if (std::filesystem::exists(_engine_data_path)) { - EXPECT_TRUE(std::filesystem::remove_all(_engine_data_path)); - } + EXPECT_TRUE(io::global_local_filesystem()->delete_directory(_engine_data_path).ok()); if (k_engine != nullptr) { k_engine->stop(); } @@ -99,7 +98,8 @@ TEST_F(TabletMgrTest, CreateTablet) { TabletSharedPtr tablet = _tablet_mgr->get_tablet(111); EXPECT_TRUE(tablet != nullptr); // check dir exist - bool dir_exist = FileUtils::check_exist(tablet->tablet_path()); + bool dir_exist = false; + EXPECT_TRUE(io::global_local_filesystem()->exists(tablet->tablet_path(), &dir_exist).ok()); EXPECT_TRUE(dir_exist); // check meta has this tablet TabletMetaSharedPtr new_tablet_meta(new TabletMeta()); @@ -158,8 +158,9 @@ TEST_F(TabletMgrTest, CreateTabletWithSequence) { TabletSharedPtr tablet = _tablet_mgr->get_tablet(111); EXPECT_TRUE(tablet != nullptr); // check dir exist - bool dir_exist = FileUtils::check_exist(tablet->tablet_path()); - EXPECT_TRUE(dir_exist) << tablet->tablet_path(); + bool dir_exist = false; + EXPECT_TRUE(io::global_local_filesystem()->exists(tablet->tablet_path(), &dir_exist).ok()); + EXPECT_TRUE(dir_exist); // check meta has this tablet TabletMetaSharedPtr new_tablet_meta(new TabletMeta()); Status check_meta_st = TabletMetaManager::get_meta(_data_dir, 111, 3333, new_tablet_meta); @@ -214,7 +215,8 @@ TEST_F(TabletMgrTest, DropTablet) { // check dir exist std::string tablet_path = tablet->tablet_path(); - bool dir_exist = FileUtils::check_exist(tablet_path); + bool dir_exist = false; + EXPECT_TRUE(io::global_local_filesystem()->exists(tablet_path, &dir_exist).ok()); EXPECT_TRUE(dir_exist); // do trash sweep, tablet will not be garbage collected @@ -223,7 +225,7 @@ TEST_F(TabletMgrTest, DropTablet) { EXPECT_TRUE(trash_st == Status::OK()); tablet = _tablet_mgr->get_tablet(111, true); EXPECT_TRUE(tablet != nullptr); - dir_exist = FileUtils::check_exist(tablet_path); + EXPECT_TRUE(io::global_local_filesystem()->exists(tablet_path, &dir_exist).ok()); EXPECT_TRUE(dir_exist); // reset tablet ptr @@ -232,8 +234,8 @@ TEST_F(TabletMgrTest, DropTablet) { EXPECT_TRUE(trash_st == Status::OK()); tablet = _tablet_mgr->get_tablet(111, true); EXPECT_TRUE(tablet == nullptr); - dir_exist = FileUtils::check_exist(tablet_path); - EXPECT_TRUE(!dir_exist); + EXPECT_TRUE(io::global_local_filesystem()->exists(tablet_path, &dir_exist).ok()); + EXPECT_FALSE(dir_exist); } TEST_F(TabletMgrTest, GetRowsetId) { diff --git a/be/test/olap/tablet_test.cpp b/be/test/olap/tablet_test.cpp index b916bd0d6f4733..5a6113576a5374 100644 --- a/be/test/olap/tablet_test.cpp +++ b/be/test/olap/tablet_test.cpp @@ -22,6 +22,7 @@ #include #include "http/action/pad_rowset_action.h" +#include "io/fs/local_file_system.h" #include "olap/olap_define.h" #include "olap/rowset/beta_rowset.h" #include "olap/storage_engine.h" @@ -29,7 +30,6 @@ #include "olap/tablet_meta.h" #include "olap/tablet_schema_cache.h" #include "testutil/mock_rowset.h" -#include "util/file_utils.h" #include "util/time.h" using namespace std; @@ -73,11 +73,10 @@ class TestTablet : public testing::Test { EXPECT_NE(getcwd(buffer, MAX_PATH_LEN), nullptr); absolute_dir = std::string(buffer) + kTestDir; - if (FileUtils::check_exist(absolute_dir)) { - EXPECT_TRUE(FileUtils::remove_all(absolute_dir).ok()); - } - EXPECT_TRUE(FileUtils::create_dir(absolute_dir).ok()); - EXPECT_TRUE(FileUtils::create_dir(absolute_dir + "/tablet_path").ok()); + EXPECT_TRUE(io::global_local_filesystem()->delete_and_create_directory(absolute_dir).ok()); + EXPECT_TRUE(io::global_local_filesystem() + ->create_directory(absolute_dir + "/tablet_path") + .ok()); _data_dir = std::make_unique(absolute_dir); _data_dir->update_capacity(); @@ -87,9 +86,7 @@ class TestTablet : public testing::Test { } void TearDown() override { - if (FileUtils::check_exist(absolute_dir)) { - EXPECT_TRUE(FileUtils::remove_all(absolute_dir).ok()); - } + EXPECT_TRUE(io::global_local_filesystem()->delete_directory(absolute_dir).ok()); if (k_engine != nullptr) { k_engine->stop(); delete k_engine; diff --git a/be/test/runtime/test_env.cc b/be/test/runtime/test_env.cc index 0f3a440e7bd6a7..90cba77566a7ba 100644 --- a/be/test/runtime/test_env.cc +++ b/be/test/runtime/test_env.cc @@ -35,16 +35,6 @@ TestEnv::TestEnv() { // TODO may need rpc support, etc. } -void TestEnv::init_tmp_file_mgr(const std::vector& tmp_dirs, bool one_dir_per_device) { - _tmp_file_mgr = std::make_shared(); - _exec_env->_tmp_file_mgr = _tmp_file_mgr.get(); - - DiskInfo::init(); - // will use DiskInfo::num_disks(), DiskInfo should be initialized before - auto st = _tmp_file_mgr->init_custom(tmp_dirs, one_dir_per_device); - EXPECT_TRUE(st.ok()); -} - TestEnv::~TestEnv() { SAFE_DELETE(_exec_env->_result_queue_mgr); diff --git a/be/test/runtime/test_env.h b/be/test/runtime/test_env.h index a6baae9d272a06..06993c899ea354 100644 --- a/be/test/runtime/test_env.h +++ b/be/test/runtime/test_env.h @@ -20,7 +20,6 @@ #include "runtime/exec_env.h" #include "runtime/runtime_state.h" -#include "runtime/tmp_file_mgr.h" namespace doris { @@ -32,10 +31,6 @@ class TestEnv { TestEnv(); ~TestEnv(); - // Reinitialize tmp_file_mgr with custom configuration. Only valid to call before - // query states have been created. - void init_tmp_file_mgr(const std::vector& tmp_dirs, bool one_dir_per_device); - // If don't need to open, paths can be empty. void init_storage_engine(bool need_open, const std::vector& paths = {}); @@ -47,14 +42,12 @@ class TestEnv { static int64_t calculate_mem_tracker(int max_buffers, int block_size); ExecEnv* exec_env() { return _exec_env; } - TmpFileMgr* tmp_file_mgr() { return _tmp_file_mgr.get(); } private: // Create a new RuntimeState sharing global environment. RuntimeState* create_runtime_state(int64_t query_id); ExecEnv* _exec_env; - std::shared_ptr _tmp_file_mgr; // Per-query states with associated block managers. std::vector > _query_states; diff --git a/be/test/runtime/tmp_file_mgr_test.cpp b/be/test/runtime/tmp_file_mgr_test.cpp deleted file mode 100644 index 07837bb636e285..00000000000000 --- a/be/test/runtime/tmp_file_mgr_test.cpp +++ /dev/null @@ -1,216 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, -// software distributed under the License is distributed on an -// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -// KIND, either express or implied. See the License for the -// specific language governing permissions and limitations -// under the License. - -#include "runtime/tmp_file_mgr.h" - -#include - -#include -#include - -#include "gen_cpp/Types_types.h" // for TUniqueId -#include "util/disk_info.h" -#include "util/filesystem_util.h" -#include "util/metrics.h" - -using std::filesystem::path; -using std::string; -using std::vector; -using std::set; - -namespace doris { - -class TmpFileMgrTest : public ::testing::Test { -protected: - // Check that metric values are consistent with TmpFileMgr state. - void check_metrics(TmpFileMgr* tmp_file_mgr) { - std::vector active = tmp_file_mgr->active_tmp_devices(); - int64_t active_metric = DorisMetrics::instance() - ->metric_registry() - ->get_entity("server") - ->get_metric("active_scratch_dirs") - .value(); - EXPECT_EQ(active.size(), active_metric); - } -}; - -// Regression test for IMPALA-2160. Verify that temporary file manager allocates blocks -// at the expected file offsets and expands the temporary file to the correct size. -TEST_F(TmpFileMgrTest, TestFileAllocation) { - TmpFileMgr tmp_file_mgr; - EXPECT_TRUE(tmp_file_mgr.init().ok()); - // Default configuration should give us one temporary device. - EXPECT_EQ(1, tmp_file_mgr.num_active_tmp_devices()); - std::vector tmp_devices = tmp_file_mgr.active_tmp_devices(); - EXPECT_EQ(1, tmp_devices.size()); - TUniqueId id; - TmpFileMgr::File* file; - Status status = tmp_file_mgr.get_file(tmp_devices[0], id, &file); - EXPECT_TRUE(status.ok()); - EXPECT_TRUE(file != nullptr); - // Apply writes of variable sizes and check space was allocated correctly. - int64_t write_sizes[] = {1, 10, 1024, 4, 1024 * 1024 * 8, 1024 * 1024 * 8, 16, 10}; - int num_write_sizes = sizeof(write_sizes) / sizeof(write_sizes[0]); - int64_t next_offset = 0; - for (int i = 0; i < num_write_sizes; ++i) { - int64_t offset; - status = file->allocate_space(write_sizes[i], &offset); - EXPECT_TRUE(status.ok()); - EXPECT_EQ(next_offset, offset); - next_offset = offset + write_sizes[i]; - EXPECT_EQ(next_offset, std::filesystem::file_size(file->path())); - } - // Check that cleanup is correct. - status = file->remove(); - EXPECT_TRUE(status.ok()); - EXPECT_FALSE(std::filesystem::exists(file->path())); - // check_metrics(&tmp_file_mgr); -} -// Test that we can do initialization with two directories on same device and -// that validations prevents duplication of directories. -TEST_F(TmpFileMgrTest, TestOneDirPerDevice) { - std::vector tmp_dirs; - tmp_dirs.push_back("/tmp/tmp-file-mgr-test.1"); - tmp_dirs.push_back("/tmp/tmp-file-mgr-test.2"); - for (int i = 0; i < tmp_dirs.size(); ++i) { - EXPECT_TRUE(FileSystemUtil::create_directory(tmp_dirs[i]).ok()); - } - TmpFileMgr tmp_file_mgr; - tmp_file_mgr.init_custom(tmp_dirs, true); - - // Only the first directory should be used. - EXPECT_EQ(1, tmp_file_mgr.num_active_tmp_devices()); - std::vector devices = tmp_file_mgr.active_tmp_devices(); - EXPECT_EQ(1, devices.size()); - TUniqueId id; - TmpFileMgr::File* file; - EXPECT_TRUE(tmp_file_mgr.get_file(devices[0], id, &file).ok()); - // Check the prefix is the expected temporary directory. - EXPECT_EQ(0, file->path().find(tmp_dirs[0])); - FileSystemUtil::remove_paths(tmp_dirs); - // check_metrics(&tmp_file_mgr); -} - -// Test that we can do custom initialization with two dirs on same device. -TEST_F(TmpFileMgrTest, TestMultiDirsPerDevice) { - std::vector tmp_dirs; - tmp_dirs.push_back("/tmp/tmp-file-mgr-test.1"); - tmp_dirs.push_back("/tmp/tmp-file-mgr-test.2"); - for (int i = 0; i < tmp_dirs.size(); ++i) { - EXPECT_TRUE(FileSystemUtil::create_directory(tmp_dirs[i]).ok()); - } - TmpFileMgr tmp_file_mgr; - tmp_file_mgr.init_custom(tmp_dirs, false); - - // Both directories should be used. - EXPECT_EQ(2, tmp_file_mgr.num_active_tmp_devices()); - std::vector devices = tmp_file_mgr.active_tmp_devices(); - EXPECT_EQ(2, devices.size()); - for (int i = 0; i < tmp_dirs.size(); ++i) { - EXPECT_EQ(0, tmp_file_mgr.get_tmp_dir_path(devices[i]).find(tmp_dirs[i])); - TUniqueId id; - TmpFileMgr::File* file; - EXPECT_TRUE(tmp_file_mgr.get_file(devices[i], id, &file).ok()); - // Check the prefix is the expected temporary directory. - EXPECT_EQ(0, file->path().find(tmp_dirs[i])); - } - FileSystemUtil::remove_paths(tmp_dirs); - // check_metrics(&tmp_file_mgr); -} - -// Test that reporting a write error is possible but does not result in -// blacklisting, which is disabled. -TEST_F(TmpFileMgrTest, TestReportError) { - std::vector tmp_dirs; - tmp_dirs.push_back("/tmp/tmp-file-mgr-test.1"); - tmp_dirs.push_back("/tmp/tmp-file-mgr-test.2"); - for (int i = 0; i < tmp_dirs.size(); ++i) { - EXPECT_TRUE(FileSystemUtil::create_directory(tmp_dirs[i]).ok()); - } - TmpFileMgr tmp_file_mgr; - tmp_file_mgr.init_custom(tmp_dirs, false); - - // Both directories should be used. - std::vector devices = tmp_file_mgr.active_tmp_devices(); - EXPECT_EQ(2, devices.size()); - // check_metrics(&tmp_file_mgr); - - // Inject an error on one device so that we can validate it is handled correctly. - TUniqueId id; - int good_device = 0; - int bad_device = 1; - TmpFileMgr::File* bad_file; - EXPECT_TRUE(tmp_file_mgr.get_file(devices[bad_device], id, &bad_file).ok()); - // ErrorMsg errmsg(TErrorCode::GENERAL, "A fake error"); - // bad_file->ReportIOError(errmsg); - bad_file->report_io_error("A fake error"); - - // Blacklisting is disabled. - EXPECT_FALSE(bad_file->is_blacklisted()); - // The second device should still be active. - EXPECT_EQ(2, tmp_file_mgr.num_active_tmp_devices()); - std::vector devices_after = tmp_file_mgr.active_tmp_devices(); - EXPECT_EQ(2, devices_after.size()); - // check_metrics(&tmp_file_mgr); - - // Attempts to expand bad file should succeed. - int64_t offset; - EXPECT_TRUE(bad_file->allocate_space(128, &offset).ok()); - EXPECT_TRUE(bad_file->remove().ok()); - // The good device should still be usable. - TmpFileMgr::File* good_file; - EXPECT_TRUE(tmp_file_mgr.get_file(devices[good_device], id, &good_file).ok()); - EXPECT_TRUE(good_file != nullptr); - EXPECT_TRUE(good_file->allocate_space(128, &offset).ok()); - // Attempts to allocate new files on bad device should succeed. - EXPECT_TRUE(tmp_file_mgr.get_file(devices[bad_device], id, &bad_file).ok()); - FileSystemUtil::remove_paths(tmp_dirs); - // check_metrics(&tmp_file_mgr); -} - -TEST_F(TmpFileMgrTest, TestAllocateFails) { - string tmp_dir("/tmp/tmp-file-mgr-test.1"); - string scratch_subdir = tmp_dir + "/doris-scratch"; - std::vector tmp_dirs(1, tmp_dir); - EXPECT_TRUE(FileSystemUtil::create_directory(tmp_dir).ok()); - TmpFileMgr tmp_file_mgr; - tmp_file_mgr.init_custom(tmp_dirs, false); - - TUniqueId id; - TmpFileMgr::File* allocated_file1; - TmpFileMgr::File* allocated_file2; - int64_t offset; - EXPECT_TRUE(tmp_file_mgr.get_file(0, id, &allocated_file1).ok()); - EXPECT_TRUE(tmp_file_mgr.get_file(0, id, &allocated_file2).ok()); - EXPECT_TRUE(allocated_file1->allocate_space(1, &offset).ok()); - - // Make scratch non-writable and test for allocation errors at different stages: - // new file creation, files with no allocated blocks. files with allocated space. - chmod(scratch_subdir.c_str(), 0); - // allocated_file1 already has space allocated. - EXPECT_FALSE(allocated_file1->allocate_space(1, &offset).ok()); - // allocated_file2 has no space allocated. - EXPECT_FALSE(allocated_file2->allocate_space(1, &offset).ok()); - // Creating a new File object can succeed because it is not immediately created on disk. - TmpFileMgr::File* unallocated_file; - EXPECT_TRUE(tmp_file_mgr.get_file(0, id, &unallocated_file).ok()); - - chmod(scratch_subdir.c_str(), S_IRWXU); - FileSystemUtil::remove_paths(tmp_dirs); -} - -} // end namespace doris diff --git a/be/test/runtime/user_function_cache_test.cpp b/be/test/runtime/user_function_cache_test.cpp index 0142f0ef5358e3..91a3c317de0367 100644 --- a/be/test/runtime/user_function_cache_test.cpp +++ b/be/test/runtime/user_function_cache_test.cpp @@ -27,5 +27,4 @@ #include "http/http_channel.h" #include "http/http_handler.h" #include "http/http_request.h" -#include "util/file_utils.h" #include "util/md5.h" diff --git a/be/test/tools/benchmark_tool.cpp b/be/test/tools/benchmark_tool.cpp index 80f9e755a6eed1..75ff41e0cddb80 100644 --- a/be/test/tools/benchmark_tool.cpp +++ b/be/test/tools/benchmark_tool.cpp @@ -53,7 +53,6 @@ #include "runtime/mem_pool.h" #include "testutil/test_util.h" #include "util/debug_util.h" -#include "util/file_utils.h" DEFINE_string(operation, "Custom", "valid operation: Custom, BinaryDictPageEncode, BinaryDictPageDecode, SegmentScan, " @@ -237,24 +236,15 @@ class SegmentBenchmark : public BaseBenchmark { public: SegmentBenchmark(const std::string& name, int iterations, const std::string& column_type) : BaseBenchmark(name, iterations), _pool() { - if (FileUtils::check_exist(kSegmentDir)) { - FileUtils::remove_all(kSegmentDir); - } - FileUtils::create_dir(kSegmentDir); - + io::global_local_filesystem()->delete_and_create_directory(kSegmentDir); init_schema(column_type); } SegmentBenchmark(const std::string& name, int iterations) : BaseBenchmark(name, iterations), _pool() { - if (FileUtils::check_exist(kSegmentDir)) { - FileUtils::remove_all(kSegmentDir); - } - FileUtils::create_dir(kSegmentDir); + io::global_local_filesystem()->delete_and_create_directory(kSegmentDir); } virtual ~SegmentBenchmark() override { - if (FileUtils::check_exist(kSegmentDir)) { - FileUtils::remove_all(kSegmentDir); - } + io::global_local_filesystem()->delete_directory(kSegmentDir); } const Schema& get_schema() { return *_schema; } diff --git a/be/test/util/file_cache_test.cpp b/be/test/util/file_cache_test.cpp deleted file mode 100644 index 4b97015dae58c9..00000000000000 --- a/be/test/util/file_cache_test.cpp +++ /dev/null @@ -1,66 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, -// software distributed under the License is distributed on an -// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -// KIND, either express or implied. See the License for the -// specific language governing permissions and limitations -// under the License. - -#include "util/file_cache.h" - -#include - -#include "env/env.h" - -namespace doris { - -class FileCacheTest : public testing::Test { -public: - FileCacheTest() {} - - void SetUp() override { - _file_cache.reset(new FileCache("test_cache", 10000)); - _file_exist = "file_exist"; - std::unique_ptr file; - auto st = Env::Default()->new_writable_file(_file_exist, &file); - EXPECT_TRUE(st.ok()); - st = file->close(); - EXPECT_TRUE(st.ok()); - } - - void TearDown() override { - _file_cache.reset(nullptr); - auto st = Env::Default()->delete_file(_file_exist); - EXPECT_TRUE(st.ok()); - } - -private: - std::unique_ptr> _file_cache; - std::string _file_exist; -}; - -TEST_F(FileCacheTest, normal) { - OpenedFileHandle file_handle; - auto found = _file_cache->lookup(_file_exist, &file_handle); - EXPECT_FALSE(found); - std::unique_ptr file; - auto st = Env::Default()->new_random_access_file(_file_exist, &file); - EXPECT_TRUE(st.ok()); - RandomAccessFile* tmp_file = file.release(); - _file_cache->insert(_file_exist, tmp_file, &file_handle); - EXPECT_EQ(tmp_file, file_handle.file()); - OpenedFileHandle file_handle2; - found = _file_cache->lookup(_file_exist, &file_handle2); - EXPECT_EQ(file_handle.file(), file_handle2.file()); -} - -} // namespace doris diff --git a/be/test/util/filesystem_util_test.cpp b/be/test/util/filesystem_util_test.cpp deleted file mode 100644 index 67f3891cb78c58..00000000000000 --- a/be/test/util/filesystem_util_test.cpp +++ /dev/null @@ -1,130 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, -// software distributed under the License is distributed on an -// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -// KIND, either express or implied. See the License for the -// specific language governing permissions and limitations -// under the License. - -#include "util/filesystem_util.h" - -#include -#include - -#include - -#include "common/configbase.h" - -namespace doris { - -namespace filesystem = std::filesystem; -using filesystem::path; - -TEST(FileSystemUtil, rlimit) { - EXPECT_LT(0ul, FileSystemUtil::max_num_file_handles()); -} - -TEST(FileSystemUtil, CreateDirectory) { - char filename[] = "temp-XXXXXX"; - // Setup a temporary directory with one subdir - std::string dir_name = mkdtemp(filename); - path dir {dir_name}; - path subdir1 = dir / "path1"; - path subdir2 = dir / "path2"; - path subdir3 = dir / "a" / "longer" / "path"; - filesystem::create_directories(subdir1); - // Test error cases by removing write permissions on root dir to prevent - // creation/deletion of subdirs - chmod(dir.string().c_str(), 0); - if (getuid() == 0) { // User root - EXPECT_TRUE(FileSystemUtil::create_directory(subdir1.string()).ok()); - EXPECT_TRUE(FileSystemUtil::create_directory(subdir2.string()).ok()); - } else { // User other - EXPECT_FALSE(FileSystemUtil::create_directory(subdir1.string()).ok()); - EXPECT_FALSE(FileSystemUtil::create_directory(subdir2.string()).ok()); - } - // Test success cases by adding write permissions back - chmod(dir.string().c_str(), S_IRWXU); - EXPECT_TRUE(FileSystemUtil::create_directory(subdir1.string()).ok()); - EXPECT_TRUE(FileSystemUtil::create_directory(subdir2.string()).ok()); - // Check that directories were created - EXPECT_TRUE(filesystem::exists(subdir1) && filesystem::is_directory(subdir1)); - EXPECT_TRUE(filesystem::exists(subdir2) && filesystem::is_directory(subdir2)); - // Exercise VerifyIsDirectory - EXPECT_TRUE(FileSystemUtil::verify_is_directory(subdir1.string()).ok()); - EXPECT_TRUE(FileSystemUtil::verify_is_directory(subdir2.string()).ok()); - EXPECT_FALSE(FileSystemUtil::verify_is_directory(subdir3.string()).ok()); - // Check that nested directories can be created - EXPECT_TRUE(FileSystemUtil::create_directory(subdir3.string()).ok()); - EXPECT_TRUE(filesystem::exists(subdir3) && filesystem::is_directory(subdir3)); - // Cleanup - filesystem::remove_all(dir); -} - -TEST(FilesystemUtil, contain_path) { - { - std::string parent("/a/b"); - std::string sub("/a/b/c"); - EXPECT_TRUE(FileSystemUtil::contain_path(parent, sub)); - EXPECT_FALSE(FileSystemUtil::contain_path(sub, parent)); - EXPECT_TRUE(FileSystemUtil::contain_path(parent, parent)); - EXPECT_TRUE(FileSystemUtil::contain_path(sub, sub)); - } - - { - std::string parent("/a/b/"); - std::string sub("/a/b/c/"); - EXPECT_TRUE(FileSystemUtil::contain_path(parent, sub)); - EXPECT_FALSE(FileSystemUtil::contain_path(sub, parent)); - EXPECT_TRUE(FileSystemUtil::contain_path(parent, parent)); - EXPECT_TRUE(FileSystemUtil::contain_path(sub, sub)); - } - - { - std::string parent("/a///./././/./././b/"); // "/a/b/." - std::string sub("/a/b/../././b/c/"); // "/a/b/c/" - EXPECT_TRUE(FileSystemUtil::contain_path(parent, sub)); - EXPECT_FALSE(FileSystemUtil::contain_path(sub, parent)); - EXPECT_TRUE(FileSystemUtil::contain_path(parent, parent)); - EXPECT_TRUE(FileSystemUtil::contain_path(sub, sub)); - } - - { - // relative path - std::string parent("a/b/"); // "a/b/" - std::string sub("a/b/c/"); // "a/b/c/" - EXPECT_TRUE(FileSystemUtil::contain_path(parent, sub)); - EXPECT_FALSE(FileSystemUtil::contain_path(sub, parent)); - EXPECT_TRUE(FileSystemUtil::contain_path(parent, parent)); - EXPECT_TRUE(FileSystemUtil::contain_path(sub, sub)); - } - { - // relative path - std::string parent("a////./././b/"); // "a/b/" - std::string sub("a/b/../././b/c/"); // "a/b/c/" - EXPECT_TRUE(FileSystemUtil::contain_path(parent, sub)); - EXPECT_FALSE(FileSystemUtil::contain_path(sub, parent)); - EXPECT_TRUE(FileSystemUtil::contain_path(parent, parent)); - EXPECT_TRUE(FileSystemUtil::contain_path(sub, sub)); - } - { - // absolute path and relative path - std::string parent("/a////./././b/"); // "/a/b/" - std::string sub("a/b/../././b/c/"); // "a/b/c/" - EXPECT_FALSE(FileSystemUtil::contain_path(parent, sub)); - EXPECT_FALSE(FileSystemUtil::contain_path(sub, parent)); - EXPECT_TRUE(FileSystemUtil::contain_path(parent, parent)); - EXPECT_TRUE(FileSystemUtil::contain_path(sub, sub)); - } -} - -} // end namespace doris diff --git a/be/test/util/mysql_row_buffer_test.cpp b/be/test/util/mysql_row_buffer_test.cpp index fdfc11f18675c3..4697b612151d77 100644 --- a/be/test/util/mysql_row_buffer_test.cpp +++ b/be/test/util/mysql_row_buffer_test.cpp @@ -23,7 +23,6 @@ #include -#include "env/env.h" #include "gutil/strings/util.h" namespace doris { diff --git a/be/test/util/s3_uri_test.cpp b/be/test/util/s3_uri_test.cpp index f166cf0e95dadb..70439a07c218cc 100644 --- a/be/test/util/s3_uri_test.cpp +++ b/be/test/util/s3_uri_test.cpp @@ -65,13 +65,8 @@ TEST_F(S3URITest, MissingKey) { TEST_F(S3URITest, RelativePathing) { std::string p1 = "/path/to/file"; S3URI uri1(p1); - EXPECT_FALSE(uri1.parse()); -} - -TEST_F(S3URITest, InvalidScheme) { - std::string p1 = "ftp://bucket/"; - S3URI uri1(p1); - EXPECT_FALSE(uri1.parse()); + EXPECT_TRUE(uri1.parse()); + EXPECT_EQ("/path/to/file", uri1.get_key()); } TEST_F(S3URITest, QueryAndFragment) { diff --git a/be/test/vec/core/block_spill_test.cpp b/be/test/vec/core/block_spill_test.cpp index 9cefafe9340339..94e12931f27ad5 100644 --- a/be/test/vec/core/block_spill_test.cpp +++ b/be/test/vec/core/block_spill_test.cpp @@ -17,9 +17,9 @@ #include +#include "io/fs/local_file_system.h" #include "runtime/block_spill_manager.h" #include "runtime/runtime_state.h" -#include "util/file_utils.h" #include "vec/columns/column_array.h" #include "vec/columns/column_decimal.h" #include "vec/columns/column_nullable.h" @@ -55,8 +55,7 @@ class TestBlockSpill : public testing::Test { EXPECT_NE(getcwd(buffer, MAX_PATH_LEN), nullptr); test_data_dir = std::string(buffer) + "/" + TMP_DATA_DIR; std::cout << "test data dir: " << test_data_dir << "\n"; - FileUtils::remove_all(test_data_dir); - FileUtils::create_dir(test_data_dir); + io::global_local_filesystem()->delete_and_create_directory(test_data_dir); std::vector paths; paths.emplace_back(test_data_dir, -1); @@ -64,7 +63,9 @@ class TestBlockSpill : public testing::Test { block_spill_manager->init(); } - static void TearDownTestSuite() { FileUtils::remove_all(test_data_dir); } + static void TearDownTestSuite() { + io::global_local_filesystem()->delete_directory(test_data_dir); + } protected: void SetUp() { diff --git a/be/test/vec/olap/vertical_compaction_test.cpp b/be/test/vec/olap/vertical_compaction_test.cpp index 88f959c776541b..a2268b3f1a47c5 100644 --- a/be/test/vec/olap/vertical_compaction_test.cpp +++ b/be/test/vec/olap/vertical_compaction_test.cpp @@ -20,6 +20,7 @@ #include +#include "io/fs/local_file_system.h" #include "olap/merger.h" #include "olap/rowset/beta_rowset.h" #include "olap/rowset/rowset.h" @@ -32,7 +33,6 @@ #include "olap/storage_engine.h" #include "olap/tablet_schema.h" #include "olap/tablet_schema_helper.h" -#include "util/file_utils.h" #include "vec/olap/vertical_block_reader.h" #include "vec/olap/vertical_merge_iterator.h" @@ -50,19 +50,17 @@ class VerticalCompactionTest : public ::testing::Test { EXPECT_NE(getcwd(buffer, MAX_PATH_LEN), nullptr); absolute_dir = std::string(buffer) + kTestDir; - if (FileUtils::check_exist(absolute_dir)) { - EXPECT_TRUE(FileUtils::remove_all(absolute_dir).ok()); - } - EXPECT_TRUE(FileUtils::create_dir(absolute_dir).ok()); - EXPECT_TRUE(FileUtils::create_dir(absolute_dir + "/tablet_path").ok()); + EXPECT_TRUE(io::global_local_filesystem()->delete_and_create_directory(absolute_dir).ok()); + EXPECT_TRUE(io::global_local_filesystem() + ->create_directory(absolute_dir + "/tablet_path") + .ok()); + doris::EngineOptions options; k_engine = new StorageEngine(options); StorageEngine::_s_instance = k_engine; } void TearDown() override { - if (FileUtils::check_exist(absolute_dir)) { - EXPECT_TRUE(FileUtils::remove_all(absolute_dir).ok()); - } + EXPECT_TRUE(io::global_local_filesystem()->delete_directory(absolute_dir).ok()); if (k_engine != nullptr) { k_engine->stop(); delete k_engine; @@ -824,4 +822,4 @@ TEST_F(VerticalCompactionTest, TestAggKeyVerticalMerge) { } } // namespace vectorized -} // namespace doris \ No newline at end of file +} // namespace doris diff --git a/bin/start_be.sh b/bin/start_be.sh index 199990a4b2a585..166b2f1e59ecd6 100755 --- a/bin/start_be.sh +++ b/bin/start_be.sh @@ -70,16 +70,34 @@ if [[ "$(uname -s)" != 'Darwin' ]]; then fi fi -# add libs to CLASSPATH +# add java libs for f in "${DORIS_HOME}/lib"/*.jar; do - if [[ -z "${DORIS_JNI_CLASSPATH_PARAMETER}" ]]; then - export DORIS_JNI_CLASSPATH_PARAMETER="${f}" + if [[ -z "${DORIS_CLASSPATH}" ]]; then + export DORIS_CLASSPATH="${f}" else - export DORIS_JNI_CLASSPATH_PARAMETER="${f}:${DORIS_JNI_CLASSPATH_PARAMETER}" + export DORIS_CLASSPATH="${f}:${DORIS_CLASSPATH}" fi done -# DORIS_JNI_CLASSPATH_PARAMETER is used to configure additional jar path to jvm. e.g. -Djava.class.path=$DORIS_HOME/lib/java-udf.jar -export DORIS_JNI_CLASSPATH_PARAMETER="-Djava.class.path=${DORIS_JNI_CLASSPATH_PARAMETER}" + +if [[ -d "${DORIS_HOME}/lib/hadoop_hdfs/" ]]; then + # add hadoop libs + for f in "${DORIS_HOME}/lib/hadoop_hdfs/common"/*.jar; do + DORIS_CLASSPATH="${f}:${DORIS_CLASSPATH}" + done + for f in "${DORIS_HOME}/lib/hadoop_hdfs/common/lib"/*.jar; do + DORIS_CLASSPATH="${f}:${DORIS_CLASSPATH}" + done + for f in "${DORIS_HOME}/lib/hadoop_hdfs/hdfs"/*.jar; do + DORIS_CLASSPATH="${f}:${DORIS_CLASSPATH}" + done + for f in "${DORIS_HOME}/lib/hadoop_hdfs/hdfs/lib"/*.jar; do + DORIS_CLASSPATH="${f}:${DORIS_CLASSPATH}" + done +fi + +# the CLASSPATH and LIBHDFS_OPTS is used for hadoop libhdfs +# and conf/ dir so that hadoop libhdfs can read .xml config file in conf/ +export CLASSPATH="${DORIS_HOME}/conf/:$DORIS_CLASSPATH" jdk_version() { local java_cmd="${1}" @@ -230,11 +248,28 @@ set_tcmalloc_heap_limit() { # set_tcmalloc_heap_limit || exit 1 -## set hdfs conf +## set hdfs3 conf if [[ -f "${DORIS_HOME}/conf/hdfs-site.xml" ]]; then export LIBHDFS3_CONF="${DORIS_HOME}/conf/hdfs-site.xml" fi +# set jvm library for hadoop libhdfs +if [[ -d "${DORIS_HOME}/lib/hadoop_hdfs/" ]]; then + MACHINE_ARCH=$(uname -m) + if [[ "${MACHINE_ARCH}" == "x86_64" ]]; then + # TODO: for now, only support hadoop libs on x86_64 + jvm_arch=amd64 + export LD_LIBRARY_PATH=$JAVA_HOME/jre/lib/$jvm_arch/server:$JAVA_HOME/jre/lib/$jvm_arch:$LD_LIBRARY_PATH + export LD_LIBRARY_PATH=$DORIS_HOME/lib/hadoop_hdfs/native:$LD_LIBRARY_PATH + export LIBHDFS_OPTS="${JAVA_OPTS}" + fi +fi + +# FIXME: for debug +echo "CLASSPATH: ${CLASSPATH}\n" +echo "LD_LIBRARY_PATH: ${LD_LIBRARY_PATH}\n" +echo "LIBHDFS_OPTS: ${LIBHDFS_OPTS}\n" + # see https://github.com/apache/doris/blob/master/docs/zh-CN/community/developer-guide/debug-tool.md#jemalloc-heap-profile export JEMALLOC_CONF="percpu_arena:percpu,background_thread:true,metadata_thp:auto,muzzy_decay_ms:30000,dirty_decay_ms:30000,oversize_threshold:0,lg_tcache_max:16,prof:true,prof_prefix:jeprof.out" diff --git a/build.sh b/build.sh index ca4bda90c126ce..f48ae181757f03 100755 --- a/build.sh +++ b/build.sh @@ -552,6 +552,10 @@ if [[ "${OUTPUT_BE_BINARY}" -eq 1 ]]; then cp -r -p "${DORIS_HOME}/be/output/bin"/* "${DORIS_OUTPUT}/be/bin"/ cp -r -p "${DORIS_HOME}/be/output/conf"/* "${DORIS_OUTPUT}/be/conf"/ + if [[ -d "${DORIS_THIRDPARTY}/installed/lib/hadoop_hdfs/" ]]; then + cp -r -p "${DORIS_THIRDPARTY}/installed/lib/hadoop_hdfs/" "${DORIS_OUTPUT}/be/lib/" + fi + if [[ "${DISABLE_JAVA_UDF_IN_CONF}" -eq 1 ]]; then echo -e "\033[33;1mWARNNING: \033[37;1mDisable Java UDF support in be.conf due to the BE was built without Java UDF.\033[0m" cat >>"${DORIS_OUTPUT}/be/conf/be.conf" < iface) { - return false; - } - - @Override - public T unwrap(Class iface) { - throw new UnsupportedOperationException("Unimplemented method 'unwrap'"); - } } diff --git a/fe/fe-core/src/main/java/org/apache/doris/common/ThriftServerEventProcessor.java b/fe/fe-core/src/main/java/org/apache/doris/common/ThriftServerEventProcessor.java index 33228cd1a14a23..253eaf5ee52708 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/common/ThriftServerEventProcessor.java +++ b/fe/fe-core/src/main/java/org/apache/doris/common/ThriftServerEventProcessor.java @@ -25,9 +25,9 @@ import org.apache.thrift.protocol.TProtocol; import org.apache.thrift.server.ServerContext; import org.apache.thrift.server.TServerEventHandler; +import org.apache.thrift.transport.TFramedTransport; import org.apache.thrift.transport.TSocket; import org.apache.thrift.transport.TTransport; -import org.apache.thrift.transport.layered.TFramedTransport; import java.net.InetSocketAddress; import java.net.SocketAddress; diff --git a/fe/fe-core/src/main/java/org/apache/doris/httpv2/rest/TableQueryPlanAction.java b/fe/fe-core/src/main/java/org/apache/doris/httpv2/rest/TableQueryPlanAction.java index 6d42f4a338b625..fa051b983f5003 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/httpv2/rest/TableQueryPlanAction.java +++ b/fe/fe-core/src/main/java/org/apache/doris/httpv2/rest/TableQueryPlanAction.java @@ -236,10 +236,9 @@ private void handleQuery(ConnectContext context, String requestDb, String reques tQueryPlanInfo.tablet_info = tabletInfo; // serialize TQueryPlanInfo and encode plan with Base64 to string in order to translate by json format - TSerializer serializer; + TSerializer serializer = new TSerializer(); String opaquedQueryPlan; try { - serializer = new TSerializer(); byte[] queryPlanStream = serializer.serialize(tQueryPlanInfo); opaquedQueryPlan = Base64.getEncoder().encodeToString(queryPlanStream); } catch (TException e) { diff --git a/fe/fe-core/src/main/java/org/apache/doris/planner/external/ExternalFileScanNode.java b/fe/fe-core/src/main/java/org/apache/doris/planner/external/ExternalFileScanNode.java index d9c69c212a6c5a..e22025800d7935 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/planner/external/ExternalFileScanNode.java +++ b/fe/fe-core/src/main/java/org/apache/doris/planner/external/ExternalFileScanNode.java @@ -69,11 +69,15 @@ import org.apache.doris.thrift.TUniqueId; import com.google.common.base.Preconditions; +import com.google.common.collect.ArrayListMultimap; import com.google.common.collect.Lists; import com.google.common.collect.Maps; +import com.google.common.collect.Multimap; import org.apache.logging.log4j.LogManager; import org.apache.logging.log4j.Logger; +import java.util.Collections; +import java.util.Comparator; import java.util.List; import java.util.Map; import java.util.Set; @@ -732,22 +736,47 @@ public String getNodeExplainString(String prefix, TExplainLevel detailLevel) { if (detailLevel == TExplainLevel.VERBOSE) { output.append(prefix).append("backends:").append("\n"); + Multimap scanRangeLocationsMap = ArrayListMultimap.create(); + // 1. group by backend id for (TScanRangeLocations locations : scanRangeLocations) { - output.append(prefix).append(" ").append(locations.getLocations().get(0).backend_id).append("\n"); - List files = locations.getScanRange().getExtScanRange().getFileScanRange().getRanges(); - for (int i = 0; i < 3; i++) { - if (i >= files.size()) { - break; + scanRangeLocationsMap.putAll(locations.getLocations().get(0).backend_id, + locations.getScanRange().getExtScanRange().getFileScanRange().getRanges()); + } + for (long beId : scanRangeLocationsMap.keySet()) { + output.append(prefix).append(" ").append(beId).append("\n"); + List fileRangeDescs = Lists.newArrayList(scanRangeLocationsMap.get(beId)); + // 2. sort by file start offset + Collections.sort(fileRangeDescs, new Comparator() { + @Override + public int compare(TFileRangeDesc o1, TFileRangeDesc o2) { + return Long.compare(o1.getStartOffset(), o2.getStartOffset()); + } + }); + // 3. if size <= 4, print all. if size > 4, print first 3 and last 1 + int size = fileRangeDescs.size(); + if (size <= 4) { + for (TFileRangeDesc file : fileRangeDescs) { + output.append(prefix).append(" ").append(file.getPath()) + .append(" start: ").append(file.getStartOffset()) + .append(" length: ").append(file.getFileSize()) + .append("\n"); } - TFileRangeDesc file = files.get(i); + } else { + for (int i = 0; i < 3; i++) { + TFileRangeDesc file = fileRangeDescs.get(i); + output.append(prefix).append(" ").append(file.getPath()) + .append(" start: ").append(file.getStartOffset()) + .append(" length: ").append(file.getFileSize()) + .append("\n"); + } + int other = size - 4; + output.append(prefix).append(" ... other ").append(other).append(" files ...\n"); + TFileRangeDesc file = fileRangeDescs.get(size - 1); output.append(prefix).append(" ").append(file.getPath()) .append(" start: ").append(file.getStartOffset()) .append(" length: ").append(file.getFileSize()) .append("\n"); } - if (files.size() > 3) { - output.append(prefix).append(" ...other ").append(files.size() - 3).append(" files\n"); - } } } diff --git a/fe/fe-core/src/main/java/org/apache/doris/planner/external/FileSplit.java b/fe/fe-core/src/main/java/org/apache/doris/planner/external/FileSplit.java index a4e7bfae2f9213..dedbc9f4693b1b 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/planner/external/FileSplit.java +++ b/fe/fe-core/src/main/java/org/apache/doris/planner/external/FileSplit.java @@ -26,15 +26,19 @@ public class FileSplit extends Split { protected Path path; protected long start; + // length of this split, in bytes protected long length; + // length of the file this split belongs to, in bytes + // -1 means unset. + // If the file length is not set, the file length will be fetched from the file system. + protected long fileLength; protected TableFormatType tableFormatType; - public FileSplit() {} - - public FileSplit(Path path, long start, long length, String[] hosts) { + public FileSplit(Path path, long start, long length, long fileLength, String[] hosts) { this.path = path; this.start = start; this.length = length; + this.fileLength = fileLength; this.hosts = hosts; } diff --git a/fe/fe-core/src/main/java/org/apache/doris/planner/external/HiveSplitter.java b/fe/fe-core/src/main/java/org/apache/doris/planner/external/HiveSplitter.java index dd2d3c54d638dc..f7f09b6da6f66a 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/planner/external/HiveSplitter.java +++ b/fe/fe-core/src/main/java/org/apache/doris/planner/external/HiveSplitter.java @@ -147,10 +147,9 @@ private void getFileSplitByPartitions(HiveMetaStoreCache cache, List { FileSplit fs = (FileSplit) file; - org.apache.doris.planner.external.FileSplit split = new org.apache.doris.planner.external.FileSplit(); - split.setPath(fs.getPath()); - split.setStart(fs.getStart()); - split.setLength(fs.getLength()); + org.apache.doris.planner.external.FileSplit split = new org.apache.doris.planner.external.FileSplit( + fs.getPath(), fs.getStart(), fs.getLength(), -1, null + ); return split; }).collect(Collectors.toList())); } diff --git a/fe/fe-core/src/main/java/org/apache/doris/planner/external/IcebergSplitter.java b/fe/fe-core/src/main/java/org/apache/doris/planner/external/IcebergSplitter.java index b595e95cc3766e..15f19bf7a5afff 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/planner/external/IcebergSplitter.java +++ b/fe/fe-core/src/main/java/org/apache/doris/planner/external/IcebergSplitter.java @@ -92,10 +92,11 @@ public List getSplits(List exprs) throws UserException { List splits = new ArrayList<>(); int formatVersion = ((BaseTable) table).operations().current().formatVersion(); for (FileScanTask task : scan.planFiles()) { + long fileSize = task.file().fileSizeInBytes(); for (FileScanTask splitTask : task.split(128 * 1024 * 1024)) { String dataFilePath = splitTask.file().path().toString(); IcebergSplit split = new IcebergSplit(new Path(dataFilePath), splitTask.start(), - splitTask.length(), new String[0]); + splitTask.length(), fileSize, new String[0]); split.setFormatVersion(formatVersion); if (formatVersion >= IcebergScanProvider.MIN_DELETE_FILE_SUPPORT_VERSION) { split.setDeleteFileFilters(getDeleteFileFilters(splitTask)); diff --git a/fe/fe-core/src/main/java/org/apache/doris/planner/external/QueryScanProvider.java b/fe/fe-core/src/main/java/org/apache/doris/planner/external/QueryScanProvider.java index 6636642ed50692..5e1eee54ffc531 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/planner/external/QueryScanProvider.java +++ b/fe/fe-core/src/main/java/org/apache/doris/planner/external/QueryScanProvider.java @@ -183,7 +183,7 @@ private TFileRangeDesc createFileRangeDesc(FileSplit fileSplit, List col rangeDesc.setSize(fileSplit.getLength()); // fileSize only be used when format is orc or parquet and TFileType is broker // When TFileType is other type, it is not necessary - rangeDesc.setFileSize(fileSplit.getLength()); + rangeDesc.setFileSize(fileSplit.getFileLength()); rangeDesc.setColumnsFromPath(columnsFromPath); rangeDesc.setColumnsFromPathKeys(columnsFromPathKeys); diff --git a/fe/fe-core/src/main/java/org/apache/doris/planner/external/TVFSplitter.java b/fe/fe-core/src/main/java/org/apache/doris/planner/external/TVFSplitter.java index efb68c1a3178f3..5929d545ecee45 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/planner/external/TVFSplitter.java +++ b/fe/fe-core/src/main/java/org/apache/doris/planner/external/TVFSplitter.java @@ -47,15 +47,16 @@ public List getSplits(List exprs) throws UserException { List splits = Lists.newArrayList(); List fileStatuses = tableValuedFunction.getFileStatuses(); for (TBrokerFileStatus fileStatus : fileStatuses) { + long fileLength = fileStatus.getSize(); Path path = new Path(fileStatus.getPath()); if (fileStatus.isSplitable) { long splitSize = Config.file_split_size; if (splitSize <= 0) { splitSize = fileStatus.getBlockSize() > 0 ? fileStatus.getBlockSize() : DEFAULT_SPLIT_SIZE; } - addFileSplits(path, fileStatus.getSize(), splitSize, splits); + addFileSplits(path, fileLength, splitSize, splits); } else { - Split split = new FileSplit(path, 0, fileStatus.getSize(), new String[0]); + Split split = new FileSplit(path, 0, fileLength, fileLength, new String[0]); splits.add(split); } } @@ -66,10 +67,10 @@ private void addFileSplits(Path path, long fileSize, long splitSize, List long bytesRemaining; for (bytesRemaining = fileSize; (double) bytesRemaining / (double) splitSize > 1.1D; bytesRemaining -= splitSize) { - splits.add(new FileSplit(path, fileSize - bytesRemaining, splitSize, new String[0])); + splits.add(new FileSplit(path, fileSize - bytesRemaining, splitSize, fileSize, new String[0])); } if (bytesRemaining != 0L) { - splits.add(new FileSplit(path, fileSize - bytesRemaining, bytesRemaining, new String[0])); + splits.add(new FileSplit(path, fileSize - bytesRemaining, bytesRemaining, fileSize, new String[0])); } } diff --git a/fe/fe-core/src/main/java/org/apache/doris/planner/external/iceberg/IcebergSplit.java b/fe/fe-core/src/main/java/org/apache/doris/planner/external/iceberg/IcebergSplit.java index 431652b8943575..1a0f63d6f54ded 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/planner/external/iceberg/IcebergSplit.java +++ b/fe/fe-core/src/main/java/org/apache/doris/planner/external/iceberg/IcebergSplit.java @@ -27,8 +27,8 @@ @Data public class IcebergSplit extends FileSplit { - public IcebergSplit(Path file, long start, long length, String[] hosts) { - super(file, start, length, hosts); + public IcebergSplit(Path file, long start, long length, long fileLength, String[] hosts) { + super(file, start, length, fileLength, hosts); } private Analyzer analyzer; diff --git a/fe/fe-core/src/main/java/org/apache/doris/tablefunction/ExternalFileTableValuedFunction.java b/fe/fe-core/src/main/java/org/apache/doris/tablefunction/ExternalFileTableValuedFunction.java index e9f420343633ad..78e93ae6b4176c 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/tablefunction/ExternalFileTableValuedFunction.java +++ b/fe/fe-core/src/main/java/org/apache/doris/tablefunction/ExternalFileTableValuedFunction.java @@ -414,6 +414,7 @@ private PFetchTableSchemaRequest getFetchTableStructureRequest() throws Analysis fileRangeDesc.setPath(firstFile.getPath()); fileRangeDesc.setStartOffset(0); fileRangeDesc.setSize(firstFile.getSize()); + fileRangeDesc.setFileSize(firstFile.getSize()); // set TFileScanRange TFileScanRange fileScanRange = new TFileScanRange(); fileScanRange.addToRanges(fileRangeDesc); diff --git a/fe/fe-core/src/main/java/org/apache/hadoop/hive/metastore/HiveMetaStoreClient.java b/fe/fe-core/src/main/java/org/apache/hadoop/hive/metastore/HiveMetaStoreClient.java index cafa639f22484d..4a5acc4a2674eb 100644 --- a/fe/fe-core/src/main/java/org/apache/hadoop/hive/metastore/HiveMetaStoreClient.java +++ b/fe/fe-core/src/main/java/org/apache/hadoop/hive/metastore/HiveMetaStoreClient.java @@ -222,10 +222,10 @@ import org.apache.thrift.protocol.TBinaryProtocol; import org.apache.thrift.protocol.TCompactProtocol; import org.apache.thrift.protocol.TProtocol; +import org.apache.thrift.transport.TFramedTransport; import org.apache.thrift.transport.TSocket; import org.apache.thrift.transport.TTransport; import org.apache.thrift.transport.TTransportException; -import org.apache.thrift.transport.layered.TFramedTransport; import org.slf4j.Logger; import org.slf4j.LoggerFactory; @@ -653,12 +653,7 @@ private void open() throws MetaException { throw new MetaException(e.toString()); } } else { - try { - transport = new TSocket(store.getHost(), store.getPort(), clientSocketTimeout); - } catch (TTransportException e) { - tte = e; - throw new MetaException(e.toString()); - } + transport = new TSocket(store.getHost(), store.getPort(), clientSocketTimeout); } if (useSasl) { @@ -696,12 +691,7 @@ private void open() throws MetaException { } } else { if (useFramedTransport) { - try { - transport = new TFramedTransport(transport); - } catch (TTransportException e) { - tte = e; - throw new MetaException(e.toString()); - } + transport = new TFramedTransport(transport); } } diff --git a/fe/java-udf/src/main/java/org/apache/doris/udf/JniUtil.java b/fe/java-udf/src/main/java/org/apache/doris/udf/JniUtil.java index af1cf6e3b0d779..d949b85586f712 100644 --- a/fe/java-udf/src/main/java/org/apache/doris/udf/JniUtil.java +++ b/fe/java-udf/src/main/java/org/apache/doris/udf/JniUtil.java @@ -90,8 +90,8 @@ public static String throwableToStackTrace(Throwable t) { * Serializes input into a byte[] using the default protocol factory. */ public static > byte[] serializeToThrift(T input) throws InternalException { + TSerializer serializer = new TSerializer(protocolFactory_); try { - TSerializer serializer = new TSerializer(protocolFactory_); return serializer.serialize(input); } catch (TException e) { throw new InternalException(e.getMessage()); @@ -103,8 +103,8 @@ public static String throwableToStackTrace(Throwable t) { */ public static , F extends TProtocolFactory> byte[] serializeToThrift( T input, F protocolFactory) throws InternalException { + TSerializer serializer = new TSerializer(protocolFactory); try { - TSerializer serializer = new TSerializer(protocolFactory); return serializer.serialize(input); } catch (TException e) { throw new InternalException(e.getMessage()); @@ -122,8 +122,8 @@ public static String throwableToStackTrace(Throwable t) { public static , F extends TProtocolFactory> void deserializeThrift( F protocolFactory, T result, byte[] thriftData) throws InternalException { // TODO: avoid creating deserializer for each query? + TDeserializer deserializer = new TDeserializer(protocolFactory); try { - TDeserializer deserializer = new TDeserializer(protocolFactory); deserializer.deserialize(result, thriftData); } catch (TException e) { throw new InternalException(e.getMessage()); diff --git a/fe/pom.xml b/fe/pom.xml index ca2b90504625d5..5705864c6a341c 100644 --- a/fe/pom.xml +++ b/fe/pom.xml @@ -189,7 +189,7 @@ under the License. 1.1.1 5.8.2 1.2.5 - 0.16.0 + 0.13.0 8.5.86 2.18.0 2.18.0 diff --git a/fs_brokers/apache_hdfs_broker/pom.xml b/fs_brokers/apache_hdfs_broker/pom.xml index b67c119fc94202..bea0443d5645e5 100644 --- a/fs_brokers/apache_hdfs_broker/pom.xml +++ b/fs_brokers/apache_hdfs_broker/pom.xml @@ -252,7 +252,7 @@ under the License. org.apache.thrift libthrift - 0.16.0 + 0.13.0 diff --git a/fs_brokers/apache_hdfs_broker/src/main/java/org/apache/doris/broker/hdfs/FileSystemManager.java b/fs_brokers/apache_hdfs_broker/src/main/java/org/apache/doris/broker/hdfs/FileSystemManager.java index f66872a10accc2..21cbe48acafe0e 100644 --- a/fs_brokers/apache_hdfs_broker/src/main/java/org/apache/doris/broker/hdfs/FileSystemManager.java +++ b/fs_brokers/apache_hdfs_broker/src/main/java/org/apache/doris/broker/hdfs/FileSystemManager.java @@ -904,7 +904,7 @@ public BrokerFileSystem getJuiceFileSystem(String path, Map prop fileOutputStream.close(); keytab = tmpFilePath; } else { - throw new BrokerException(TBrokerOperationStatusCode.INVALID_ARGUMENT, + throw new BrokerException(TBrokerOperationStatusCode.INVALID_ARGUMENT, "keytab is required for kerberos authentication"); } UserGroupInformation.setConfiguration(conf); @@ -1284,4 +1284,24 @@ private BrokerFileSystem updateCachedFileSystem(FileSystemIdentity fileSystemIde } return brokerFileSystem; } + + public long fileSize(String path, Map properties) { + WildcardURI pathUri = new WildcardURI(path); + BrokerFileSystem fileSystem = getFileSystem(path, properties); + Path filePath = new Path(pathUri.getPath()); + try { + FileStatus fileStatus = fileSystem.getDFSFileSystem().getFileStatus(filePath); + if (fileStatus.isDirectory()) { + throw new BrokerException(TBrokerOperationStatusCode.INVALID_INPUT_FILE_PATH, + "not a file: {}", path); + } + return fileStatus.getLen(); + } catch (IOException e) { + logger.error("errors while getting file size: " + path); + fileSystem.closeFileSystem(); + throw new BrokerException(TBrokerOperationStatusCode.TARGET_STORAGE_SERVICE_ERROR, + e, "errors while getting file size {}", path); + } + } } + diff --git a/fs_brokers/apache_hdfs_broker/src/main/java/org/apache/doris/broker/hdfs/HDFSBrokerServiceImpl.java b/fs_brokers/apache_hdfs_broker/src/main/java/org/apache/doris/broker/hdfs/HDFSBrokerServiceImpl.java index 0d7524f0de58a8..14ff74dd41e62e 100644 --- a/fs_brokers/apache_hdfs_broker/src/main/java/org/apache/doris/broker/hdfs/HDFSBrokerServiceImpl.java +++ b/fs_brokers/apache_hdfs_broker/src/main/java/org/apache/doris/broker/hdfs/HDFSBrokerServiceImpl.java @@ -25,6 +25,8 @@ import org.apache.doris.thrift.TBrokerCloseWriterRequest; import org.apache.doris.thrift.TBrokerDeletePathRequest; import org.apache.doris.thrift.TBrokerFD; +import org.apache.doris.thrift.TBrokerFileSizeRequest; +import org.apache.doris.thrift.TBrokerFileSizeResponse; import org.apache.doris.thrift.TBrokerFileStatus; import org.apache.doris.thrift.TBrokerListPathRequest; import org.apache.doris.thrift.TBrokerListResponse; @@ -263,4 +265,21 @@ public TBrokerOperationStatus ping(TBrokerPingBrokerRequest request) } return generateOKStatus(); } + + @Override + public TBrokerFileSizeResponse fileSize( + TBrokerFileSizeRequest request) throws TException { + logger.debug("receive a file size request, request detail: " + request); + TBrokerFileSizeResponse response = new TBrokerFileSizeResponse(); + try { + long fileSize = fileSystemManager.fileSize(request.path, request.properties); + response.setFileSize(fileSize); + response.setOpStatus(generateOKStatus()); + } catch (BrokerException e) { + logger.warn("failed to get file size: " + request.path, e); + TBrokerOperationStatus errorStatus = e.generateFailedOperationStatus(); + response.setOpStatus(errorStatus); + } + return response; + } } diff --git a/gensrc/thrift/PaloBrokerService.thrift b/gensrc/thrift/PaloBrokerService.thrift index f63cebe68a425b..1d4d2e876a0b2d 100644 --- a/gensrc/thrift/PaloBrokerService.thrift +++ b/gensrc/thrift/PaloBrokerService.thrift @@ -168,6 +168,17 @@ struct TBrokerPingBrokerRequest { 2: required string clientId; } +struct TBrokerFileSizeRequest { + 1: required TBrokerVersion version; + 2: required string path; + 3: optional map properties; +} + +struct TBrokerFileSizeResponse { + 1: required TBrokerOperationStatus opStatus; + 2: optional i64 fileSize; +} + service TPaloBrokerService { // return a list of files under a path @@ -232,6 +243,9 @@ service TPaloBrokerService { // close file write stream TBrokerOperationStatus closeWriter(1: TBrokerCloseWriterRequest request); - // + // ping broker service TBrokerOperationStatus ping(1: TBrokerPingBrokerRequest request); + + // get size of specified file + TBrokerFileSizeResponse fileSize(1: TBrokerFileSizeRequest request); } diff --git a/gensrc/thrift/PlanNodes.thrift b/gensrc/thrift/PlanNodes.thrift index 504938b9be3d98..e5ef30f977c45b 100644 --- a/gensrc/thrift/PlanNodes.thrift +++ b/gensrc/thrift/PlanNodes.thrift @@ -340,7 +340,8 @@ struct TFileRangeDesc { 3: optional i64 start_offset; // Size of this range, if size = -1, this means that will read to the end of file 4: optional i64 size; - 5: optional i64 file_size; + // total size of file this range belongs to, -1 means unset + 5: optional i64 file_size = -1; // columns parsed from file path should be after the columns read from file 6: optional list columns_from_path; // column names from file path, in the same order with columns_from_path diff --git a/thirdparty/CHANGELOG.md b/thirdparty/CHANGELOG.md index 7f8f46432e15be..c16c0d6a6f6b04 100644 --- a/thirdparty/CHANGELOG.md +++ b/thirdparty/CHANGELOG.md @@ -2,9 +2,6 @@ This file contains version of the third-party dependency libraries in the build-env image. The docker build-env image is apache/doris, and the tag is `build-env-${version}` -## v20230228 -- Modified: thrift 0.13 -> 0.16 - ## v20230221 - Modified: clucene 2.4.4 -> 2.4.6 diff --git a/thirdparty/build-thirdparty.sh b/thirdparty/build-thirdparty.sh index 7a011374c8fe2c..d50c07d1de3ed7 100755 --- a/thirdparty/build-thirdparty.sh +++ b/thirdparty/build-thirdparty.sh @@ -367,11 +367,11 @@ build_thrift() { if [[ "${KERNEL}" != 'Darwin' ]]; then cflags="-I${TP_INCLUDE_DIR}" - cxxflags="-I${TP_INCLUDE_DIR} ${warning_unused_but_set_variable} -Wno-inconsistent-missing-override" + cxxflags="-I${TP_INCLUDE_DIR} ${warning_unused_but_set_variable}" ldflags="-L${TP_LIB_DIR} --static" else - cflags="-I${TP_INCLUDE_DIR} -Wno-implicit-function-declaration -Wno-inconsistent-missing-override" - cxxflags="-I${TP_INCLUDE_DIR} ${warning_unused_but_set_variable} -Wno-inconsistent-missing-override" + cflags="-I${TP_INCLUDE_DIR} -Wno-implicit-function-declaration" + cxxflags="-I${TP_INCLUDE_DIR} ${warning_unused_but_set_variable}" ldflags="-L${TP_LIB_DIR}" fi diff --git a/thirdparty/vars.sh b/thirdparty/vars.sh index 87c50edfc17494..002a0e513d7947 100644 --- a/thirdparty/vars.sh +++ b/thirdparty/vars.sh @@ -73,10 +73,10 @@ OPENSSL_SOURCE=openssl-OpenSSL_1_1_1s OPENSSL_MD5SUM="7e79a7560dee77c0758baa33c61af4b4" # thrift -THRIFT_DOWNLOAD="http://archive.apache.org/dist/thrift/0.16.0/thrift-0.16.0.tar.gz" -THRIFT_NAME=thrift-0.16.0.tar.gz -THRIFT_SOURCE=thrift-0.16.0 -THRIFT_MD5SUM="44cf1b54b4ec1890576c85804acfa637" +THRIFT_DOWNLOAD="http://archive.apache.org/dist/thrift/0.13.0/thrift-0.13.0.tar.gz" +THRIFT_NAME=thrift-0.13.0.tar.gz +THRIFT_SOURCE=thrift-0.13.0 +THRIFT_MD5SUM="38a27d391a2b03214b444cb13d5664f1" # protobuf PROTOBUF_DOWNLOAD="https://github.com/google/protobuf/archive/v3.15.0.tar.gz"