diff --git a/be/CMakeLists.txt b/be/CMakeLists.txt index 7ba593fe2f5881..545d42f829d96d 100644 --- a/be/CMakeLists.txt +++ b/be/CMakeLists.txt @@ -240,29 +240,30 @@ set_target_properties(orc PROPERTIES IMPORTED_LOCATION ${THIRDPARTY_DIR}/lib/lib add_library(cctz STATIC IMPORTED) set_target_properties(cctz PROPERTIES IMPORTED_LOCATION ${THIRDPARTY_DIR}/lib/libcctz.a) -# add_library(aws-sdk-core STATIC IMPORTED) -# set_target_properties(aws-sdk-core PROPERTIES IMPORTED_LOCATION ${THIRDPARTY_DIR}/lib64/libaws-cpp-sdk-core.a) +add_library(aws-sdk-core STATIC IMPORTED) +set_target_properties(aws-sdk-core PROPERTIES IMPORTED_LOCATION ${THIRDPARTY_DIR}/lib64/libaws-cpp-sdk-core.a) -# add_library(aws-sdk-s3 STATIC IMPORTED) -# set_target_properties(aws-sdk-s3 PROPERTIES IMPORTED_LOCATION ${THIRDPARTY_DIR}/lib64/libaws-cpp-sdk-s3.a) +add_library(aws-sdk-s3 STATIC IMPORTED) +set_target_properties(aws-sdk-s3 PROPERTIES IMPORTED_LOCATION ${THIRDPARTY_DIR}/lib64/libaws-cpp-sdk-s3.a) -# add_library(aws-c-cal STATIC IMPORTED) -# set_target_properties(aws-c-cal PROPERTIES IMPORTED_LOCATION ${THIRDPARTY_DIR}/lib64/libaws-c-cal.a) +add_library(aws-c-cal STATIC IMPORTED) +set_target_properties(aws-c-cal PROPERTIES IMPORTED_LOCATION ${THIRDPARTY_DIR}/lib64/libaws-c-cal.a) -# add_library(aws-c-common STATIC IMPORTED) -# set_target_properties(aws-c-common PROPERTIES IMPORTED_LOCATION ${THIRDPARTY_DIR}/lib64/libaws-c-common.a) +add_library(aws-c-common STATIC IMPORTED) +set_target_properties(aws-c-common PROPERTIES IMPORTED_LOCATION ${THIRDPARTY_DIR}/lib64/libaws-c-common.a) -# add_library(aws-c-event-stream STATIC IMPORTED) -# set_target_properties(aws-c-event-stream PROPERTIES IMPORTED_LOCATION ${THIRDPARTY_DIR}/lib64/libaws-c-event-stream.a) +add_library(aws-c-event-stream STATIC IMPORTED) +set_target_properties(aws-c-event-stream PROPERTIES IMPORTED_LOCATION ${THIRDPARTY_DIR}/lib64/libaws-c-event-stream.a) -# add_library(aws-c-io STATIC IMPORTED) -# set_target_properties(aws-c-io PROPERTIES IMPORTED_LOCATION ${THIRDPARTY_DIR}/lib64/libaws-c-io.a) +add_library(aws-c-io STATIC IMPORTED) +set_target_properties(aws-c-io PROPERTIES IMPORTED_LOCATION ${THIRDPARTY_DIR}/lib64/libaws-c-io.a) -# add_library(aws-checksums STATIC IMPORTED) -# set_target_properties(aws-checksums PROPERTIES IMPORTED_LOCATION ${THIRDPARTY_DIR}/lib64/libaws-checksums.a) +add_library(aws-checksums STATIC IMPORTED) +set_target_properties(aws-checksums PROPERTIES IMPORTED_LOCATION ${THIRDPARTY_DIR}/lib64/libaws-checksums.a) + +add_library(aws-s2n STATIC IMPORTED) +set_target_properties(aws-s2n PROPERTIES IMPORTED_LOCATION ${THIRDPARTY_DIR}/lib64/libs2n.a) -# add_library(aws-s2n STATIC IMPORTED) -# set_target_properties(aws-s2n PROPERTIES IMPORTED_LOCATION ${THIRDPARTY_DIR}/lib64/libs2n.a) find_program(THRIFT_COMPILER thrift ${CMAKE_SOURCE_DIR}/bin) @@ -387,7 +388,7 @@ include_directories( set(WL_START_GROUP "-Wl,--start-group") set(WL_END_GROUP "-Wl,--end-group") -# set(AWS_LIBS aws-sdk-s3 aws-sdk-core aws-checksums aws-c-io aws-c-event-stream aws-c-common aws-c-cal aws-s2n) +set(AWS_LIBS aws-sdk-s3 aws-sdk-core aws-checksums aws-c-io aws-c-event-stream aws-c-common aws-c-cal aws-s2n) # Set Palo libraries set(DORIS_LINK_LIBS @@ -454,7 +455,7 @@ set(DORIS_DEPENDENCIES orc odbc cctz - # ${AWS_LIBS} + ${AWS_LIBS} ${WL_END_GROUP} ) diff --git a/be/src/agent/task_worker_pool.cpp b/be/src/agent/task_worker_pool.cpp index 86e181de37ec1c..9e7efc37184359 100644 --- a/be/src/agent/task_worker_pool.cpp +++ b/be/src/agent/task_worker_pool.cpp @@ -1237,9 +1237,14 @@ void TaskWorkerPool::_upload_worker_thread_callback() { << ", job id:" << upload_request.job_id; std::map> tablet_files; - SnapshotLoader loader(_env, upload_request.job_id, agent_task_req.signature); - Status status = loader.upload(upload_request.src_dest_map, upload_request.broker_addr, - upload_request.broker_prop, &tablet_files); + std::unique_ptr loader = nullptr; + if (upload_request.__isset.storage_backend && upload_request.storage_backend == TStorageBackendType::S3) { + loader.reset(new SnapshotLoader(_env, upload_request.job_id, agent_task_req.signature, upload_request.broker_prop)); + } else { + loader.reset(new SnapshotLoader(_env, upload_request.job_id, agent_task_req.signature, upload_request.broker_addr, + upload_request.broker_prop)); + } + Status status = loader->upload(upload_request.src_dest_map, &tablet_files); TStatusCode::type status_code = TStatusCode::OK; std::vector error_msgs; @@ -1295,9 +1300,15 @@ void TaskWorkerPool::_download_worker_thread_callback() { // TODO: download std::vector downloaded_tablet_ids; - SnapshotLoader loader(_env, download_request.job_id, agent_task_req.signature); - Status status = loader.download(download_request.src_dest_map, download_request.broker_addr, - download_request.broker_prop, &downloaded_tablet_ids); + + std::unique_ptr loader = nullptr; + if (download_request.__isset.storage_backend && download_request.storage_backend == TStorageBackendType::S3) { + loader.reset(new SnapshotLoader(_env, download_request.job_id, agent_task_req.signature, download_request.broker_prop)); + } else { + loader.reset(new SnapshotLoader(_env, download_request.job_id, agent_task_req.signature, download_request.broker_addr, + download_request.broker_prop)); + } + Status status = loader->download(download_request.src_dest_map, &downloaded_tablet_ids); if (!status.ok()) { status_code = TStatusCode::RUNTIME_ERROR; diff --git a/be/src/common/daemon.cpp b/be/src/common/daemon.cpp index 99977d0c4a875f..07b7a5b9088f55 100644 --- a/be/src/common/daemon.cpp +++ b/be/src/common/daemon.cpp @@ -18,6 +18,8 @@ #include "common/daemon.h" #include + +#include #include #include @@ -66,6 +68,8 @@ namespace doris { bool k_doris_exit = false; +Aws::SDKOptions aws_options; + void Daemon::tcmalloc_gc_thread() { while (!_stop_background_threads_latch.wait_for(MonoDelta::FromSeconds(10))) { size_t used_size = 0; @@ -264,6 +268,14 @@ void Daemon::init(int argc, char** argv, const std::vector& paths) { HllFunctions::init(); HashFunctions::init(); TopNFunctions::init(); + // disable EC2 metadata service + setenv("AWS_EC2_METADATA_DISABLED", "true", false); + Aws::Utils::Logging::LogLevel logLevel = Aws::Utils::Logging::LogLevel::Info; + aws_options.loggingOptions.logLevel = logLevel; + aws_options.loggingOptions.logger_create_fn = [logLevel] { + return std::make_shared(logLevel); + }; + Aws::InitAPI(aws_options); LOG(INFO) << CpuInfo::debug_string(); LOG(INFO) << DiskInfo::debug_string(); @@ -313,6 +325,7 @@ void Daemon::stop() { if (_calculate_metrics_thread) { _calculate_metrics_thread->join(); } + Aws::ShutdownAPI(aws_options); } } // namespace doris diff --git a/be/src/exec/CMakeLists.txt b/be/src/exec/CMakeLists.txt index 6ba785e6f540ae..9f04d41dd7acd7 100644 --- a/be/src/exec/CMakeLists.txt +++ b/be/src/exec/CMakeLists.txt @@ -104,6 +104,8 @@ set(EXEC_FILES odbc_connector.cpp json_scanner.cpp assert_num_rows_node.cpp + s3_reader.cpp + s3_writer.cpp ) if (WITH_MYSQL) diff --git a/be/src/exec/broker_scanner.cpp b/be/src/exec/broker_scanner.cpp index c2682753b05272..1844ba8442efdc 100644 --- a/be/src/exec/broker_scanner.cpp +++ b/be/src/exec/broker_scanner.cpp @@ -25,6 +25,7 @@ #include "exec/exec_node.h" #include "exec/local_file_reader.h" #include "exec/plain_text_line_reader.h" +#include "exec/s3_reader.h" #include "exec/text_converter.h" #include "exec/text_converter.hpp" #include "exprs/expr.h" @@ -154,6 +155,12 @@ Status BrokerScanner::open_file_reader() { _cur_file_reader = broker_reader; break; } + case TFileType::FILE_S3: { + S3Reader* s3_reader = new S3Reader(_params.properties, range.path, start_offset); + RETURN_IF_ERROR(s3_reader->open()); + _cur_file_reader = s3_reader; + break; + } case TFileType::FILE_STREAM: { _stream_load_pipe = _state->exec_env()->load_stream_mgr()->get(range.load_id); if (_stream_load_pipe == nullptr) { diff --git a/be/src/exec/json_scanner.cpp b/be/src/exec/json_scanner.cpp index 2770435ee81f7f..feb85b6ab41ea5 100644 --- a/be/src/exec/json_scanner.cpp +++ b/be/src/exec/json_scanner.cpp @@ -22,6 +22,7 @@ #include "env/env.h" #include "exec/broker_reader.h" #include "exec/local_file_reader.h" +#include "exec/s3_reader.h" #include "exprs/expr.h" #include "exprs/json_functions.h" #include "gutil/strings/split.h" @@ -119,7 +120,12 @@ Status JsonScanner::open_next_reader() { file = broker_reader; break; } - + case TFileType::FILE_S3: { + S3Reader* s3_reader = new S3Reader(_params.properties, range.path, start_offset); + RETURN_IF_ERROR(s3_reader->open()); + file = s3_reader; + break; + } case TFileType::FILE_STREAM: { _stream_load_pipe = _state->exec_env()->load_stream_mgr()->get(range.load_id); if (_stream_load_pipe == nullptr) { diff --git a/be/src/exec/orc_scanner.cpp b/be/src/exec/orc_scanner.cpp index 505cdd41daa55d..0d40a334f3bed5 100644 --- a/be/src/exec/orc_scanner.cpp +++ b/be/src/exec/orc_scanner.cpp @@ -18,7 +18,9 @@ #include "exec/orc_scanner.h" #include "exec/broker_reader.h" +#include "exec/buffered_reader.h" #include "exec/local_file_reader.h" +#include "exec/s3_reader.h" #include "exprs/expr.h" #include "runtime/descriptors.h" #include "runtime/exec_env.h" @@ -399,6 +401,11 @@ Status ORCScanner::open_next_reader() { file_size)); break; } + case TFileType::FILE_S3: { + file_reader.reset(new BufferedReader( + new S3Reader(_params.properties, range.path, range.start_offset))); + break; + } default: { std::stringstream ss; ss << "Unknown file type, type=" << range.file_type; diff --git a/be/src/exec/parquet_scanner.cpp b/be/src/exec/parquet_scanner.cpp index a3a66560ed470e..50c255aa68c046 100644 --- a/be/src/exec/parquet_scanner.cpp +++ b/be/src/exec/parquet_scanner.cpp @@ -22,6 +22,7 @@ #include "exec/decompressor.h" #include "exec/local_file_reader.h" #include "exec/parquet_reader.h" +#include "exec/s3_reader.h" #include "exec/text_converter.h" #include "exec/text_converter.hpp" #include "exprs/expr.h" @@ -127,6 +128,11 @@ Status ParquetScanner::open_next_reader() { range.path, range.start_offset, file_size))); break; } + case TFileType::FILE_S3: { + file_reader.reset(new BufferedReader( + new S3Reader(_params.properties, range.path, range.start_offset))); + break; + } #if 0 case TFileType::FILE_STREAM: { diff --git a/be/src/exec/s3_reader.cpp b/be/src/exec/s3_reader.cpp new file mode 100644 index 00000000000000..3f4fa93924e19b --- /dev/null +++ b/be/src/exec/s3_reader.cpp @@ -0,0 +1,142 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +#include "exec/s3_reader.h" + +#include +#include +#include + +#include "common/logging.h" +#include "gutil/strings/strcat.h" +#include "util/s3_util.h" + +namespace doris { + +#ifndef CHECK_S3_CLIENT +#define CHECK_S3_CLIENT(client) \ + if (!client) { \ + return Status::InternalError("init aws s3 client error."); \ + } +#endif + +S3Reader::S3Reader(const std::map& properties, const std::string& path, + int64_t start_offset) + : _properties(properties), + _path(path), + _uri(path), + _cur_offset(start_offset), + _file_size(0), + _closed(false) { + _client = create_client(_properties); + DCHECK(_client) << "init aws s3 client error."; +} + +S3Reader::~S3Reader() {} + +Status S3Reader::open() { + CHECK_S3_CLIENT(_client); + if (!_uri.parse()) { + return Status::InvalidArgument("s3 uri is invalid: " + _path); + } + Aws::S3::Model::HeadObjectRequest request; + request.WithBucket(_uri.get_bucket()).WithKey(_uri.get_key()); + Aws::S3::Model::HeadObjectOutcome response = _client->HeadObject(request); + if (response.IsSuccess()) { + _file_size = response.GetResult().GetContentLength(); + return Status::OK(); + } else if (response.GetError().GetResponseCode() == Aws::Http::HttpResponseCode::NOT_FOUND) { + return Status::NotFound(_path + " not exists!"); + } else { + std::stringstream out; + out << "Error: [" << response.GetError().GetExceptionName() << ":" + << response.GetError().GetMessage(); + return Status::InternalError(out.str()); + } +} +Status S3Reader::read(uint8_t* buf, size_t* buf_len, bool* eof) { + DCHECK_NE(*buf_len, 0); + RETURN_IF_ERROR(readat(_cur_offset, (int64_t)*buf_len, (int64_t*)buf_len, buf)); + if (*buf_len == 0 ) { + *eof = true; + } else { + *eof = false; + } + return Status::OK(); +} +Status S3Reader::readat(int64_t position, int64_t nbytes, int64_t* bytes_read, void* out) { + CHECK_S3_CLIENT(_client); + if (position >= _file_size) { + *bytes_read = 0; + VLOG_FILE << "Read end of file: " + _path; + return Status::EndOfFile("Read end of file: " + _path); + } + Aws::S3::Model::GetObjectRequest request; + request.WithBucket(_uri.get_bucket()).WithKey(_uri.get_key()); + string bytes = StrCat("bytes=", position, "-"); + if (position + nbytes < _file_size) { + string bytes = StrCat(bytes.c_str(), position + nbytes - 1); + } + request.SetRange(bytes.c_str()); + auto response = _client->GetObject(request); + if (!response.IsSuccess()) { + *bytes_read = 0; + std::stringstream out; + out << "Error: [" << response.GetError().GetExceptionName() << ":" + << response.GetError().GetMessage(); + LOG(INFO) << out.str(); + return Status::InternalError(out.str()); + } + *bytes_read = response.GetResult().GetContentLength(); + *bytes_read = nbytes < *bytes_read ? nbytes : *bytes_read; + _cur_offset = position + *bytes_read; + response.GetResult().GetBody().read((char*)out, *bytes_read); + return Status::OK(); +} +Status S3Reader::read_one_message(std::unique_ptr* buf, size_t* length) { + bool eof; + int64_t file_size = size() - _cur_offset; + if (file_size <= 0) { + buf->reset(); + *length = 0; + return Status::OK(); + } + *length = file_size; + buf->reset(new uint8_t[file_size]); + read(buf->get(), length, &eof); + return Status::OK(); +} + +int64_t S3Reader::size() { + return _file_size; +} +Status S3Reader::seek(int64_t position) { + _cur_offset = position; + return Status::OK(); +} +Status S3Reader::tell(int64_t* position) { + *position = _cur_offset; + return Status::OK(); +} +void S3Reader::close() { + _closed = true; +} +bool S3Reader::closed() { + return _closed; +} + +} // end namespace doris diff --git a/be/src/exec/s3_reader.h b/be/src/exec/s3_reader.h new file mode 100644 index 00000000000000..260c9198e64db2 --- /dev/null +++ b/be/src/exec/s3_reader.h @@ -0,0 +1,69 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +#pragma once + +#include +#include + +#include "exec/file_reader.h" +#include "util/s3_uri.h" + +namespace Aws { +namespace S3 { +class S3Client; +} +} // namespace Aws + +namespace doris { +class S3Reader : public FileReader { +public: + S3Reader(const std::map& properties, const std::string& path, + int64_t start_offset); + ~S3Reader(); + virtual Status open() override; + // Read content to 'buf', 'buf_len' is the max size of this buffer. + // Return ok when read success, and 'buf_len' is set to size of read content + // If reach to end of file, the eof is set to true. meanwhile 'buf_len' + // is set to zero. + virtual Status read(uint8_t* buf, size_t* buf_len, bool* eof) override; + virtual Status readat(int64_t position, int64_t nbytes, int64_t* bytes_read, + void* out) override; + + /** + * This interface is used read a whole message, For example: read a message from kafka. + * + * if read eof then return Status::OK and length is set 0 and buf is set NULL, + * other return readed bytes. + */ + virtual Status read_one_message(std::unique_ptr* buf, size_t* length) override; + virtual int64_t size() override; + virtual Status seek(int64_t position) override; + virtual Status tell(int64_t* position) override; + virtual void close() override; + virtual bool closed() override; + +private: + const std::map& _properties; + std::string _path; + S3URI _uri; + int64_t _cur_offset; + int64_t _file_size; + bool _closed; + std::unique_ptr _client; +}; +} // end namespace doris diff --git a/be/src/exec/s3_writer.cpp b/be/src/exec/s3_writer.cpp new file mode 100644 index 00000000000000..557c004bf34c54 --- /dev/null +++ b/be/src/exec/s3_writer.cpp @@ -0,0 +1,128 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +#include "exec/s3_writer.h" + +#include +#include +#include +#include + +#include "common/logging.h" +#include "util/s3_uri.h" +#include "util/s3_util.h" + +namespace doris { + +#ifndef CHECK_S3_CLIENT +#define CHECK_S3_CLIENT(client) \ + if (!client) { \ + return Status::InternalError("init aws s3 client error."); \ + } +#endif + +S3Writer::S3Writer(const std::map& properties, const std::string& path, + int64_t start_offset) + : _properties(properties), + _path(path), + _uri(path), + _sync_needed(false), + _temp_file(Aws::MakeShared( + "S3WRITER", + // "/tmp/doris_tmp_", "s3tmp", + std::ios_base::binary | std::ios_base::trunc | std::ios_base::in | + std::ios_base::out)) { + _client = create_client(_properties); + DCHECK(_client) << "init aws s3 client error."; +} + +S3Writer::~S3Writer() { + close(); +} + +Status S3Writer::open() { + CHECK_S3_CLIENT(_client); + if (!_uri.parse()) { + return Status::InvalidArgument("s3 uri is invalid: " + _path); + } + Aws::S3::Model::HeadObjectRequest request; + request.WithBucket(_uri.get_bucket()).WithKey(_uri.get_key()); + Aws::S3::Model::HeadObjectOutcome response = _client->HeadObject(request); + if (response.IsSuccess()) { + return Status::AlreadyExist(_path + " already exists."); + } else if (response.GetError().GetResponseCode() == Aws::Http::HttpResponseCode::NOT_FOUND) { + return Status::OK(); + } else { + std::stringstream out; + out << "Error: [" << response.GetError().GetExceptionName() << ":" + << response.GetError().GetMessage(); + return Status::InternalError(out.str()); + } +} + +Status S3Writer::write(const uint8_t* buf, size_t buf_len, size_t* written_len) { + if (buf_len == 0) { + *written_len = 0; + return Status::OK(); + } + if (!_temp_file) { + return Status::BufferAllocFailed("The internal temporary file is not writable."); + } + _sync_needed = true; + _temp_file->write(reinterpret_cast(buf), buf_len); + if (!_temp_file->good()) { + return Status::BufferAllocFailed("Could not append to the internal temporary file."); + } + *written_len = buf_len; + return Status::OK(); +} + +Status S3Writer::close() { + if (_temp_file) { + RETURN_IF_ERROR(_sync()); + _temp_file.reset(); + } + return Status::OK(); +} + +Status S3Writer::_sync() { + if (!_temp_file) { + return Status::BufferAllocFailed("The internal temporary file is not writable."); + } + if (!_sync_needed) { + return Status::OK(); + } + CHECK_S3_CLIENT(_client); + Aws::S3::Model::PutObjectRequest request; + request.WithBucket(_uri.get_bucket()).WithKey(_uri.get_key()); + long offset = _temp_file->tellp(); + _temp_file->seekg(0); + request.SetBody(_temp_file); + request.SetContentLength(offset); + auto response = _client->PutObject(request); + _temp_file->clear(); + _temp_file->seekp(offset); + if (response.IsSuccess()) { + return Status::OK(); + } else { + std::stringstream out; + out << "Error: [" << response.GetError().GetExceptionName() << ":" + << response.GetError().GetMessage(); + return Status::InternalError(out.str()); + } +} +} // end namespace doris diff --git a/be/src/exec/s3_writer.h b/be/src/exec/s3_writer.h new file mode 100644 index 00000000000000..e070a99e0c8862 --- /dev/null +++ b/be/src/exec/s3_writer.h @@ -0,0 +1,60 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +#pragma once + +#include +#include + +#include "exec/file_writer.h" +#include "util/s3_uri.h" + +namespace Aws { +namespace Utils { +class TempFile; +} +namespace S3 { +class S3Client; +} +} // namespace Aws + +namespace doris { +class S3Writer : public FileWriter { +public: + S3Writer(const std::map& properties, const std::string& path, + int64_t start_offset); + ~S3Writer(); + Status open() override; + + // Writes up to count bytes from the buffer pointed buf to the file. + // NOTE: the number of bytes written may be less than count if. + Status write(const uint8_t* buf, size_t buf_len, size_t* written_len) override; + + Status close() override; + +private: + Status _sync(); + + const std::map& _properties; + std::string _path; + S3URI _uri; + bool _sync_needed; + std::shared_ptr _temp_file; + std::unique_ptr _client; +}; + +} // end namespace doris diff --git a/be/src/runtime/client_cache.h b/be/src/runtime/client_cache.h index b602a2ca98e03c..8286853a3c8b00 100644 --- a/be/src/runtime/client_cache.h +++ b/be/src/runtime/client_cache.h @@ -146,7 +146,7 @@ class ClientCache; template class ClientConnection { public: - ClientConnection(ClientCache* client_cache, TNetworkAddress address, Status* status) + ClientConnection(ClientCache* client_cache, const TNetworkAddress& address, Status* status) : _client_cache(client_cache), _client(NULL) { *status = _client_cache->get_client(address, &_client, 0); @@ -155,7 +155,7 @@ class ClientConnection { } } - ClientConnection(ClientCache* client_cache, TNetworkAddress address, int timeout_ms, + ClientConnection(ClientCache* client_cache, const TNetworkAddress& address, int timeout_ms, Status* status) : _client_cache(client_cache), _client(NULL) { *status = _client_cache->get_client(address, &_client, timeout_ms); diff --git a/be/src/runtime/datetime_value.h b/be/src/runtime/datetime_value.h index 8751b93766f305..b48f4ae2b597e0 100644 --- a/be/src/runtime/datetime_value.h +++ b/be/src/runtime/datetime_value.h @@ -430,7 +430,7 @@ class DateTimeValue { static DateTimeValue _s_min_datetime_value(0, TIME_DATETIME, 0, 0, 0, 0, 0, 1, 1); return _s_min_datetime_value; } - + static DateTimeValue datetime_max_value() { static DateTimeValue _s_max_datetime_value(0, TIME_DATETIME, 23, 59, 59, 0, 9999, 12, 31); return _s_max_datetime_value; diff --git a/be/src/runtime/export_sink.cpp b/be/src/runtime/export_sink.cpp index 157abb946accc3..253fe746dbe62d 100644 --- a/be/src/runtime/export_sink.cpp +++ b/be/src/runtime/export_sink.cpp @@ -23,6 +23,7 @@ #include "exec/broker_writer.h" #include "exec/local_file_writer.h" +#include "exec/s3_writer.h" #include "exprs/expr.h" #include "exprs/expr_context.h" #include "runtime/mem_tracker.h" @@ -262,6 +263,13 @@ Status ExportSink::open_file_writer() { _file_writer.reset(broker_writer); break; } + case TFileType::FILE_S3: { + S3Writer* s3_writer = new S3Writer( _t_export_sink.properties, + _t_export_sink.export_path + "/" + file_name, 0 /* offset */); + RETURN_IF_ERROR(s3_writer->open()); + _file_writer.reset(s3_writer); + break; + } default: { std::stringstream ss; ss << "Unknown file type, type=" << _t_export_sink.file_type; diff --git a/be/src/runtime/snapshot_loader.cpp b/be/src/runtime/snapshot_loader.cpp index ff763087fb59ee..1e863ed8146a7c 100644 --- a/be/src/runtime/snapshot_loader.cpp +++ b/be/src/runtime/snapshot_loader.cpp @@ -34,42 +34,50 @@ #include "olap/tablet.h" #include "runtime/broker_mgr.h" #include "runtime/exec_env.h" +#include "util/broker_storage_backend.h" #include "util/file_utils.h" +#include "util/s3_storage_backend.h" #include "util/thrift_rpc_helper.h" namespace doris { -#ifdef BE_TEST -inline BrokerServiceClientCache* client_cache(ExecEnv* env) { - static BrokerServiceClientCache s_client_cache; - return &s_client_cache; +SnapshotLoader::SnapshotLoader(ExecEnv* env, int64_t job_id, int64_t task_id, + const TNetworkAddress& broker_addr, + const std::map& broker_prop) + : _env(env), + _job_id(job_id), + _task_id(task_id), + _broker_addr(broker_addr), + _prop(broker_prop) { + _storage_backend.reset(new BrokerStorageBackend(_env, broker_addr, broker_prop)); } -inline const std::string& client_id(ExecEnv* env, const TNetworkAddress& addr) { - static std::string s_client_id = "doris_unit_test"; - return s_client_id; -} -#else -inline BrokerServiceClientCache* client_cache(ExecEnv* env) { - return env->broker_client_cache(); -} - -inline const std::string& client_id(ExecEnv* env, const TNetworkAddress& addr) { - return env->broker_mgr()->get_client_id(addr); -} -#endif - SnapshotLoader::SnapshotLoader(ExecEnv* env, int64_t job_id, int64_t task_id) - : _env(env), _job_id(job_id), _task_id(task_id) {} + : _env(env), + _job_id(job_id), + _task_id(task_id), + _broker_addr(TNetworkAddress()), + _prop(std::map()), + _storage_backend(nullptr) {} + +SnapshotLoader::SnapshotLoader(ExecEnv* env, int64_t job_id, int64_t task_id, const std::map& prop) + : _env(env), + _job_id(job_id), + _task_id(task_id), + _broker_addr(TNetworkAddress()), + _prop(prop) { + _storage_backend.reset(new S3StorageBackend(prop)); + } SnapshotLoader::~SnapshotLoader() {} Status SnapshotLoader::upload(const std::map& src_to_dest_path, - const TNetworkAddress& broker_addr, - const std::map& broker_prop, std::map>* tablet_files) { + if (!_storage_backend) { + return Status::InternalError("Storage backend not initialized."); + } LOG(INFO) << "begin to upload snapshot files. num: " << src_to_dest_path.size() - << ", broker addr: " << broker_addr << ", job: " << _job_id << ", task" << _task_id; + << ", broker addr: " << _broker_addr << ", job: " << _job_id << ", task" << _task_id; // check if job has already been cancelled int tmp_counter = 1; @@ -79,19 +87,7 @@ Status SnapshotLoader::upload(const std::map& src_to_d // 1. validate local tablet snapshot paths RETURN_IF_ERROR(_check_local_snapshot_paths(src_to_dest_path, true)); - // 2. get broker client - BrokerServiceConnection client(client_cache(_env), broker_addr, 10000, &status); - if (!status.ok()) { - std::stringstream ss; - ss << "failed to get broker client. " - << "broker addr: " << broker_addr << ". msg: " << status.get_error_msg(); - LOG(WARNING) << ss.str(); - return Status::InternalError(ss.str()); - } - - std::vector broker_addrs; - broker_addrs.push_back(broker_addr); - // 3. for each src path, upload it to remote storage + // 2. for each src path, upload it to remote storage // we report to frontend for every 10 files, and we will cancel the job if // the job has already been cancelled in frontend. int report_counter = 0; @@ -108,8 +104,7 @@ Status SnapshotLoader::upload(const std::map& src_to_d // 2.1 get existing files from remote path std::map remote_files; - RETURN_IF_ERROR( - _get_existing_files_from_remote(client, dest_path, broker_prop, &remote_files)); + RETURN_IF_ERROR(_storage_backend->list(dest_path, &remote_files)); for (auto& tmp : remote_files) { VLOG_CRITICAL << "get remote file: " << tmp.first << ", checksum: " << tmp.second.md5; @@ -160,60 +155,9 @@ Status SnapshotLoader::upload(const std::map& src_to_d } // upload - // open broker writer. file name end with ".part" - // it will be rename to ".md5sum" after upload finished std::string full_remote_file = dest_path + "/" + local_file; - { - // NOTICE: broker writer must be closed before calling rename - std::unique_ptr broker_writer; - broker_writer.reset(new BrokerWriter(_env, broker_addrs, broker_prop, - full_remote_file + ".part", 0 /* offset */)); - RETURN_IF_ERROR(broker_writer->open()); - - // read file and write to broker - std::string full_local_file = src_path + "/" + local_file; - FileHandler file_handler; - OLAPStatus ost = file_handler.open(full_local_file, O_RDONLY); - if (ost != OLAP_SUCCESS) { - return Status::InternalError("failed to open file: " + full_local_file); - } - - size_t file_len = file_handler.length(); - if (file_len == -1) { - return Status::InternalError("failed to get length of file: " + - full_local_file); - } - - constexpr size_t buf_sz = 1024 * 1024; - char read_buf[buf_sz]; - size_t left_len = file_len; - size_t read_offset = 0; - while (left_len > 0) { - size_t read_len = left_len > buf_sz ? buf_sz : left_len; - ost = file_handler.pread(read_buf, read_len, read_offset); - if (ost != OLAP_SUCCESS) { - return Status::InternalError("failed to read file: " + full_local_file); - } - // write through broker - size_t write_len = 0; - RETURN_IF_ERROR(broker_writer->write(reinterpret_cast(read_buf), - read_len, &write_len)); - DCHECK_EQ(write_len, read_len); - - read_offset += read_len; - left_len -= read_len; - } - - // close manually, because we need to check its close status - RETURN_IF_ERROR(broker_writer->close()); - - LOG(INFO) << "finished to write file via broker. file: " << full_local_file - << ", length: " << file_len; - } - - // rename file to end with ".md5sum" - RETURN_IF_ERROR(_rename_remote_file(client, full_remote_file + ".part", - full_remote_file + "." + md5sum, broker_prop)); + std::string full_local_file = src_path + "/" + local_file; + RETURN_IF_ERROR(_storage_backend->upload_with_checksum(full_local_file, full_remote_file, md5sum)); } // end for each tablet's local files tablet_files->emplace(tablet_id, local_files_with_checksum); @@ -232,11 +176,12 @@ Status SnapshotLoader::upload(const std::map& src_to_d * may also contains severval useless files. */ Status SnapshotLoader::download(const std::map& src_to_dest_path, - const TNetworkAddress& broker_addr, - const std::map& broker_prop, std::vector* downloaded_tablet_ids) { + if (!_storage_backend) { + return Status::InternalError("Storage backend not initialized."); + } LOG(INFO) << "begin to download snapshot files. num: " << src_to_dest_path.size() - << ", broker addr: " << broker_addr << ", job: " << _job_id + << ", broker addr: " << _broker_addr << ", job: " << _job_id << ", task id: " << _task_id; // check if job has already been cancelled @@ -247,19 +192,7 @@ Status SnapshotLoader::download(const std::map& src_to // 1. validate local tablet snapshot paths RETURN_IF_ERROR(_check_local_snapshot_paths(src_to_dest_path, false)); - // 2. get broker client - BrokerServiceConnection client(client_cache(_env), broker_addr, 10000, &status); - if (!status.ok()) { - std::stringstream ss; - ss << "failed to get broker client. " - << "broker addr: " << broker_addr << ". msg: " << status.get_error_msg(); - LOG(WARNING) << ss.str(); - return Status::InternalError(ss.str()); - } - - std::vector broker_addrs; - broker_addrs.push_back(broker_addr); - // 3. for each src path, download it to local storage + // 2. for each src path, download it to local storage int report_counter = 0; int total_num = src_to_dest_path.size(); int finished_num = 0; @@ -278,14 +211,13 @@ Status SnapshotLoader::download(const std::map& src_to VLOG_CRITICAL << "get local tablet id: " << local_tablet_id << ", schema hash: " << schema_hash << ", remote tablet id: " << remote_tablet_id; - // 1. get local files + // 2.1. get local files std::vector local_files; RETURN_IF_ERROR(_get_existing_files_from_local(local_path, &local_files)); - // 2. get remote files + // 2.2. get remote files std::map remote_files; - RETURN_IF_ERROR( - _get_existing_files_from_remote(client, remote_path, broker_prop, &remote_files)); + RETURN_IF_ERROR(_storage_backend->list(remote_path, &remote_files)); if (remote_files.empty()) { std::stringstream ss; ss << "get nothing from remote path: " << remote_path; @@ -356,59 +288,12 @@ Status SnapshotLoader::download(const std::map& src_to if (data_dir->reach_capacity_limit(file_len)) { return Status::InternalError("capacity limit reached"); } + // remove file which will be downloaded now. + // this file will be added to local_files if it be downloaded successfully. + local_files.erase(find); + RETURN_IF_ERROR(_storage_backend->download(full_remote_file, full_local_file)); - { - // 1. open remote file for read - std::unique_ptr broker_reader; - broker_reader.reset(new BrokerReader(_env, broker_addrs, broker_prop, - full_remote_file, 0 /* offset */)); - RETURN_IF_ERROR(broker_reader->open()); - - // 2. remove the existing local file if exist - if (boost::filesystem::remove(full_local_file)) { - VLOG_CRITICAL << "remove the previously exist local file: " << full_local_file; - } - // remove file which will be downloaded now. - // this file will be added to local_files if it be downloaded successfully. - local_files.erase(find); - - // 3. open local file for write - FileHandler file_handler; - OLAPStatus ost = file_handler.open_with_mode( - full_local_file, O_CREAT | O_TRUNC | O_WRONLY, S_IRUSR | S_IWUSR); - if (ost != OLAP_SUCCESS) { - return Status::InternalError("failed to open file: " + full_local_file); - } - - // 4. read remote and write to local - VLOG_CRITICAL << "read remote file: " << full_remote_file - << " to local: " << full_local_file << ". file size: " << file_len; - constexpr size_t buf_sz = 1024 * 1024; - char read_buf[buf_sz]; - size_t write_offset = 0; - bool eof = false; - while (!eof) { - size_t read_len = buf_sz; - RETURN_IF_ERROR(broker_reader->read(reinterpret_cast(read_buf), - &read_len, &eof)); - - if (eof) { - continue; - } - - if (read_len > 0) { - ost = file_handler.pwrite(read_buf, read_len, write_offset); - if (ost != OLAP_SUCCESS) { - return Status::InternalError("failed to write file: " + - full_local_file); - } - - write_offset += read_len; - } - } - } // file_handler should be closed before calculating checksum - - // 5. check md5 of the downloaded file + // 3. check md5 of the downloaded file std::string downloaded_md5sum; status = FileUtils::md5sum(full_local_file, &downloaded_md5sum); if (!status.ok()) { @@ -656,72 +541,6 @@ Status SnapshotLoader::_check_local_snapshot_paths( return Status::OK(); } -Status SnapshotLoader::_get_existing_files_from_remote( - BrokerServiceConnection& client, const std::string& remote_path, - const std::map& broker_prop, - std::map* files) { - try { - // get existing files from remote path - TBrokerListResponse list_rep; - TBrokerListPathRequest list_req; - list_req.__set_version(TBrokerVersion::VERSION_ONE); - list_req.__set_path(remote_path + "/*"); - list_req.__set_isRecursive(false); - list_req.__set_properties(broker_prop); - list_req.__set_fileNameOnly(true); // we only need file name, not abs path - - try { - client->listPath(list_rep, list_req); - } catch (apache::thrift::transport::TTransportException& e) { - RETURN_IF_ERROR(client.reopen()); - client->listPath(list_rep, list_req); - } - - if (list_rep.opStatus.statusCode == TBrokerOperationStatusCode::FILE_NOT_FOUND) { - LOG(INFO) << "path does not exist: " << remote_path; - return Status::OK(); - } else if (list_rep.opStatus.statusCode != TBrokerOperationStatusCode::OK) { - std::stringstream ss; - ss << "failed to list files from remote path: " << remote_path - << ", msg: " << list_rep.opStatus.message; - LOG(WARNING) << ss.str(); - return Status::InternalError(ss.str()); - } - LOG(INFO) << "finished to list files from remote path. file num: " << list_rep.files.size(); - - // split file name and checksum - for (const auto& file : list_rep.files) { - if (file.isDir) { - // this is not a file - continue; - } - - const std::string& file_name = file.path; - size_t pos = file_name.find_last_of("."); - if (pos == std::string::npos || pos == file_name.size() - 1) { - // Not found checksum separator, ignore this file - continue; - } - - FileStat stat = {std::string(file_name, 0, pos), std::string(file_name, pos + 1), - file.size}; - files->emplace(std::string(file_name, 0, pos), stat); - VLOG_CRITICAL << "split remote file: " << std::string(file_name, 0, pos) - << ", checksum: " << std::string(file_name, pos + 1); - } - - LOG(INFO) << "finished to split files. valid file num: " << files->size(); - - } catch (apache::thrift::TException& e) { - std::stringstream ss; - ss << "failed to list files in remote path: " << remote_path << ", msg: " << e.what(); - LOG(WARNING) << ss.str(); - return Status::ThriftRpcError(ss.str()); - } - - return Status::OK(); -} - Status SnapshotLoader::_get_existing_files_from_local(const std::string& local_path, std::vector* local_files) { Status status = FileUtils::list_files(Env::Default(), local_path, local_files); @@ -737,44 +556,6 @@ Status SnapshotLoader::_get_existing_files_from_local(const std::string& local_p return Status::OK(); } -Status SnapshotLoader::_rename_remote_file(BrokerServiceConnection& client, - const std::string& orig_name, - const std::string& new_name, - const std::map& broker_prop) { - try { - TBrokerOperationStatus op_status; - TBrokerRenamePathRequest rename_req; - rename_req.__set_version(TBrokerVersion::VERSION_ONE); - rename_req.__set_srcPath(orig_name); - rename_req.__set_destPath(new_name); - rename_req.__set_properties(broker_prop); - - try { - client->renamePath(op_status, rename_req); - } catch (apache::thrift::transport::TTransportException& e) { - RETURN_IF_ERROR(client.reopen()); - client->renamePath(op_status, rename_req); - } - - if (op_status.statusCode != TBrokerOperationStatusCode::OK) { - std::stringstream ss; - ss << "Fail to rename file: " << orig_name << " to: " << new_name - << " msg:" << op_status.message; - LOG(WARNING) << ss.str(); - return Status::InternalError(ss.str()); - } - } catch (apache::thrift::TException& e) { - std::stringstream ss; - ss << "Fail to rename file: " << orig_name << " to: " << new_name << " msg:" << e.what(); - LOG(WARNING) << ss.str(); - return Status::ThriftRpcError(ss.str()); - } - - LOG(INFO) << "finished to rename file. orig: " << orig_name << ", new: " << new_name; - - return Status::OK(); -} - void SnapshotLoader::_assemble_file_name(const std::string& snapshot_path, const std::string& tablet_path, int64_t tablet_id, int64_t start_version, int64_t end_version, diff --git a/be/src/runtime/snapshot_loader.h b/be/src/runtime/snapshot_loader.h index efba4e1d33d119..8f4bd6c420ec0a 100644 --- a/be/src/runtime/snapshot_loader.h +++ b/be/src/runtime/snapshot_loader.h @@ -32,12 +32,8 @@ namespace doris { class ExecEnv; - -struct FileStat { - std::string name; - std::string md5; - int64_t size; -}; +class StorageBackend; +class FileStat; /* * Upload: @@ -62,17 +58,18 @@ struct FileStat { class SnapshotLoader { public: SnapshotLoader(ExecEnv* env, int64_t job_id, int64_t task_id); + SnapshotLoader(ExecEnv* env, int64_t job_id, int64_t task_id, + const TNetworkAddress& broker_addr, + const std::map& broker_prop); + SnapshotLoader(ExecEnv* env, int64_t job_id, int64_t task_id, + const std::map& broker_prop); ~SnapshotLoader(); Status upload(const std::map& src_to_dest_path, - const TNetworkAddress& broker_addr, - const std::map& broker_prop, std::map>* tablet_files); Status download(const std::map& src_to_dest_path, - const TNetworkAddress& broker_addr, - const std::map& broker_prop, std::vector* downloaded_tablet_ids); Status move(const std::string& snapshot_path, TabletSharedPtr tablet, bool overwrite); @@ -84,18 +81,9 @@ class SnapshotLoader { Status _check_local_snapshot_paths(const std::map& src_to_dest_path, bool check_src); - Status _get_existing_files_from_remote(BrokerServiceConnection& client, - const std::string& remote_path, - const std::map& broker_prop, - std::map* files); - Status _get_existing_files_from_local(const std::string& local_path, std::vector* local_files); - Status _rename_remote_file(BrokerServiceConnection& client, const std::string& orig_name, - const std::string& new_name, - const std::map& broker_prop); - bool _end_with(const std::string& str, const std::string& match); void _assemble_file_name(const std::string& snapshot_path, const std::string& tablet_path, @@ -115,6 +103,9 @@ class SnapshotLoader { ExecEnv* _env; int64_t _job_id; int64_t _task_id; + const TNetworkAddress& _broker_addr; + const std::map& _prop; + std::unique_ptr _storage_backend; }; } // end namespace doris diff --git a/be/src/util/CMakeLists.txt b/be/src/util/CMakeLists.txt index 8ea8e8d479cf79..a95fc874e7aa6f 100644 --- a/be/src/util/CMakeLists.txt +++ b/be/src/util/CMakeLists.txt @@ -68,6 +68,7 @@ set(UTIL_FILES filesystem_util.cc load_error_hub.cpp broker_load_error_hub.cpp + broker_storage_backend.cpp null_load_error_hub.cpp time.cpp os_info.cpp @@ -103,6 +104,9 @@ set(UTIL_FILES brpc_stub_cache.cpp zlib.cpp pprof_utils.cpp + s3_uri.cpp + s3_storage_backend.cpp + s3_util.cpp topn_counter.cpp ) diff --git a/be/src/util/broker_storage_backend.cpp b/be/src/util/broker_storage_backend.cpp new file mode 100644 index 00000000000000..c0ec3653126cbf --- /dev/null +++ b/be/src/util/broker_storage_backend.cpp @@ -0,0 +1,375 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +#include "util/broker_storage_backend.h" + +#include "env/env.h" +#include "exec/broker_reader.h" +#include "exec/broker_writer.h" +#include "gen_cpp/FrontendService.h" +#include "gen_cpp/FrontendService_types.h" +#include "gen_cpp/HeartbeatService_types.h" +#include "gen_cpp/PaloBrokerService_types.h" +#include "gen_cpp/TPaloBrokerService.h" +#include "olap/file_helper.h" +#include "runtime/client_cache.h" +#include "runtime/exec_env.h" + +namespace doris { + +#ifdef BE_TEST +inline BrokerServiceClientCache* client_cache(ExecEnv* env) { + static BrokerServiceClientCache s_client_cache; + return &s_client_cache; +} +#else +inline BrokerServiceClientCache* client_cache(ExecEnv* env) { + return env->broker_client_cache(); +} +#endif + +BrokerStorageBackend::BrokerStorageBackend(ExecEnv* env, const TNetworkAddress& broker_addr, + const std::map& broker_prop) + : _env(env), _broker_addr(broker_addr), _broker_prop(broker_prop) {} + +Status BrokerStorageBackend::download(const std::string& remote, const std::string& local) { + // 1. open remote file for read + std::vector broker_addrs; + broker_addrs.push_back(_broker_addr); + std::unique_ptr broker_reader( + new BrokerReader(_env, broker_addrs, _broker_prop, remote, 0 /* offset */)); + RETURN_IF_ERROR(broker_reader->open()); + + // 2. remove the existing local file if exist + if (boost::filesystem::remove(local)) { + VLOG(2) << "remove the previously exist local file: " << local; + } + + // 3. open local file for write + FileHandler file_handler; + OLAPStatus ost = + file_handler.open_with_mode(local, O_CREAT | O_TRUNC | O_WRONLY, S_IRUSR | S_IWUSR); + if (ost != OLAP_SUCCESS) { + return Status::InternalError("failed to open file: " + local); + } + + // 4. read remote and write to local + VLOG(2) << "read remote file: " << remote << " to local: " << local; + constexpr size_t buf_sz = 1024 * 1024; + char read_buf[buf_sz]; + size_t write_offset = 0; + bool eof = false; + while (!eof) { + size_t read_len = buf_sz; + RETURN_IF_ERROR(broker_reader->read(reinterpret_cast(read_buf), &read_len, &eof)); + + if (eof) { + continue; + } + + if (read_len > 0) { + ost = file_handler.pwrite(read_buf, read_len, write_offset); + if (ost != OLAP_SUCCESS) { + return Status::InternalError("failed to write file: " + local); + } + + write_offset += read_len; + } + + } // file_handler should be closed before calculating checksum + + return Status::OK(); +} + +Status BrokerStorageBackend::upload(const std::string& local, const std::string& remote) { + // read file and write to broker + FileHandler file_handler; + OLAPStatus ost = file_handler.open(local, O_RDONLY); + if (ost != OLAP_SUCCESS) { + return Status::InternalError("failed to open file: " + local); + } + + size_t file_len = file_handler.length(); + if (file_len == -1) { + return Status::InternalError("failed to get length of file: " + local); + } + + // NOTICE: broker writer must be closed before calling rename + std::vector broker_addrs; + broker_addrs.push_back(_broker_addr); + std::unique_ptr broker_writer( + new BrokerWriter(_env, broker_addrs, _broker_prop, remote, 0 /* offset */)); + RETURN_IF_ERROR(broker_writer->open()); + + constexpr size_t buf_sz = 1024 * 1024; + char read_buf[buf_sz]; + size_t left_len = file_len; + size_t read_offset = 0; + while (left_len > 0) { + size_t read_len = left_len > buf_sz ? buf_sz : left_len; + ost = file_handler.pread(read_buf, read_len, read_offset); + if (ost != OLAP_SUCCESS) { + return Status::InternalError("failed to read file: " + local); + } + // write through broker + size_t write_len = 0; + RETURN_IF_ERROR(broker_writer->write(reinterpret_cast(read_buf), read_len, + &write_len)); + DCHECK_EQ(write_len, read_len); + + read_offset += read_len; + left_len -= read_len; + } + + // close manually, because we need to check its close status + RETURN_IF_ERROR(broker_writer->close()); + + LOG(INFO) << "finished to write file via broker. file: " << local << ", length: " << file_len; + return Status::OK(); +} + +Status BrokerStorageBackend::rename(const std::string& orig_name, const std::string& new_name) { + Status status = Status::OK(); + BrokerServiceConnection client(client_cache(_env), _broker_addr, 10000, &status); + if (!status.ok()) { + std::stringstream ss; + ss << "failed to get broker client. " + << "broker addr: " << _broker_addr << ". msg: " << status.get_error_msg(); + LOG(WARNING) << ss.str(); + return Status::InternalError(ss.str()); + } + try { + TBrokerOperationStatus op_status; + TBrokerRenamePathRequest rename_req; + rename_req.__set_version(TBrokerVersion::VERSION_ONE); + rename_req.__set_srcPath(orig_name); + rename_req.__set_destPath(new_name); + rename_req.__set_properties(_broker_prop); + + try { + client->renamePath(op_status, rename_req); + } catch (apache::thrift::transport::TTransportException& e) { + RETURN_IF_ERROR(client.reopen()); + client->renamePath(op_status, rename_req); + } + + if (op_status.statusCode != TBrokerOperationStatusCode::OK) { + std::stringstream ss; + ss << "Fail to rename file: " << orig_name << " to: " << new_name + << " msg:" << op_status.message; + LOG(WARNING) << ss.str(); + return Status::InternalError(ss.str()); + } + } catch (apache::thrift::TException& e) { + std::stringstream ss; + ss << "Fail to rename file: " << orig_name << " to: " << new_name << " msg:" << e.what(); + LOG(WARNING) << ss.str(); + return Status::ThriftRpcError(ss.str()); + } + + LOG(INFO) << "finished to rename file. orig: " << orig_name << ", new: " << new_name; + + return status; +} + +Status BrokerStorageBackend::list(const std::string& remote_path, + std::map* files) { + Status status = Status::OK(); + BrokerServiceConnection client(client_cache(_env), _broker_addr, 10000, &status); + if (!status.ok()) { + std::stringstream ss; + ss << "failed to get broker client. " + << "broker addr: " << _broker_addr << ". msg: " << status.get_error_msg(); + LOG(WARNING) << ss.str(); + return Status::InternalError(ss.str()); + } + try { + // get existing files from remote path + TBrokerListResponse list_rep; + TBrokerListPathRequest list_req; + list_req.__set_version(TBrokerVersion::VERSION_ONE); + list_req.__set_path(remote_path + "/*"); + list_req.__set_isRecursive(false); + list_req.__set_properties(_broker_prop); + list_req.__set_fileNameOnly(true); // we only need file name, not abs path + + try { + client->listPath(list_rep, list_req); + } catch (apache::thrift::transport::TTransportException& e) { + RETURN_IF_ERROR(client.reopen()); + client->listPath(list_rep, list_req); + } + + if (list_rep.opStatus.statusCode == TBrokerOperationStatusCode::FILE_NOT_FOUND) { + LOG(INFO) << "path does not exist: " << remote_path; + return Status::OK(); + } else if (list_rep.opStatus.statusCode != TBrokerOperationStatusCode::OK) { + std::stringstream ss; + ss << "failed to list files from remote path: " << remote_path + << ", msg: " << list_rep.opStatus.message; + LOG(WARNING) << ss.str(); + return Status::InternalError(ss.str()); + } + LOG(INFO) << "finished to list files from remote path. file num: " << list_rep.files.size(); + + // split file name and checksum + for (const auto& file : list_rep.files) { + if (file.isDir) { + // this is not a file + continue; + } + + const std::string& file_name = file.path; + size_t pos = file_name.find_last_of("."); + if (pos == std::string::npos || pos == file_name.size() - 1) { + // Not found checksum separator, ignore this file + continue; + } + + FileStat stat = {std::string(file_name, 0, pos), std::string(file_name, pos + 1), + file.size}; + files->emplace(std::string(file_name, 0, pos), stat); + VLOG(2) << "split remote file: " << std::string(file_name, 0, pos) + << ", checksum: " << std::string(file_name, pos + 1); + } + + LOG(INFO) << "finished to split files. valid file num: " << files->size(); + + } catch (apache::thrift::TException& e) { + std::stringstream ss; + ss << "failed to list files in remote path: " << remote_path << ", msg: " << e.what(); + LOG(WARNING) << ss.str(); + return Status::ThriftRpcError(ss.str()); + } + + return status; +} +Status BrokerStorageBackend::direct_upload(const std::string& remote, const std::string& content) { + std::vector broker_addrs; + broker_addrs.push_back(_broker_addr); + std::unique_ptr broker_writer( + new BrokerWriter(_env, broker_addrs, _broker_prop, remote, 0 /* offset */)); + RETURN_IF_ERROR(broker_writer->open()); + size_t write_len = 0; + RETURN_IF_ERROR(broker_writer->write(reinterpret_cast(content.c_str()), + content.size(), &write_len)); + DCHECK_EQ(write_len, content.size()); + RETURN_IF_ERROR(broker_writer->close()); + return Status::OK(); +} + +Status BrokerStorageBackend::rm(const std::string& remote) { + Status status = Status::OK(); + BrokerServiceConnection client(client_cache(_env), _broker_addr, 10000, &status); + if (!status.ok()) { + std::stringstream ss; + ss << "failed to get broker client. " + << "broker addr: " << _broker_addr << ". msg: " << status.get_error_msg(); + LOG(WARNING) << ss.str(); + return Status::InternalError(ss.str()); + } + try { + // rm file from remote path + TBrokerDeletePathRequest del_req; + TBrokerOperationStatus del_rep; + del_req.__set_version(TBrokerVersion::VERSION_ONE); + del_req.__set_path(remote); + del_req.__set_properties(_broker_prop); + + try { + client->deletePath(del_rep, del_req); + } catch (apache::thrift::transport::TTransportException& e) { + RETURN_IF_ERROR(client.reopen()); + client->deletePath(del_rep, del_req); + } + + if (del_rep.statusCode == TBrokerOperationStatusCode::OK) { + return Status::OK(); + } else { + std::stringstream ss; + ss << "failed to delete from remote path: " << remote << ", msg: " << del_rep.message; + LOG(WARNING) << ss.str(); + return Status::InternalError(ss.str()); + } + } catch (apache::thrift::TException& e) { + std::stringstream ss; + ss << "failed to delete file in remote path: " << remote << ", msg: " << e.what(); + LOG(WARNING) << ss.str(); + return Status::ThriftRpcError(ss.str()); + } +} + +Status BrokerStorageBackend::copy(const std::string& src, const std::string& dst) { + return Status::NotSupported("copy not implemented!"); +} + +Status BrokerStorageBackend::mkdir(const std::string& path) { + return Status::NotSupported("mkdir not implemented!"); +} + +Status BrokerStorageBackend::exist(const std::string& path) { + Status status = Status::OK(); + BrokerServiceConnection client(client_cache(_env), _broker_addr, 10000, &status); + if (!status.ok()) { + std::stringstream ss; + ss << "failed to get broker client. " + << "broker addr: " << _broker_addr << ". msg: " << status.get_error_msg(); + LOG(WARNING) << ss.str(); + return Status::InternalError(ss.str()); + } + try { + TBrokerCheckPathExistRequest check_req; + TBrokerCheckPathExistResponse check_rep; + check_req.__set_version(TBrokerVersion::VERSION_ONE); + check_req.__set_path(path); + check_req.__set_properties(_broker_prop); + + try { + client->checkPathExist(check_rep, check_req); + } catch (apache::thrift::transport::TTransportException& e) { + RETURN_IF_ERROR(client.reopen()); + client->checkPathExist(check_rep, check_req); + } + + if (check_rep.opStatus.statusCode != TBrokerOperationStatusCode::OK) { + std::stringstream ss; + ss << "failed to check exist: " << path << ", msg: " << check_rep.opStatus.message; + LOG(WARNING) << ss.str(); + return Status::InternalError(ss.str()); + } else if (!check_rep.isPathExist) { + return Status::NotFound(path + " not exists!"); + } else { + return Status::OK(); + } + } catch (apache::thrift::TException& e) { + std::stringstream ss; + ss << "failed to check exist: " << path << ", msg: " << e.what(); + LOG(WARNING) << ss.str(); + return Status::ThriftRpcError(ss.str()); + } +} + +Status BrokerStorageBackend::upload_with_checksum(const std::string& local, + const std::string& remote, + const std::string& checksum) { + std::string temp = remote + ".part"; + std::string final = remote + "." + checksum; + RETURN_IF_ERROR(upload(local, remote + ".part")); + return rename(temp, final); +} + +} // end namespace doris diff --git a/be/src/util/broker_storage_backend.h b/be/src/util/broker_storage_backend.h new file mode 100644 index 00000000000000..1dcfc1003f87c6 --- /dev/null +++ b/be/src/util/broker_storage_backend.h @@ -0,0 +1,49 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +#pragma once + +#include "util/storage_backend.h" + +namespace doris { + +class ExecEnv; +class TNetworkAddress; + +class BrokerStorageBackend : public StorageBackend { +public: + BrokerStorageBackend(ExecEnv* env, const TNetworkAddress& broker_addr, + const std::map& broker_prop); + ~BrokerStorageBackend() {} + Status download(const std::string& remote, const std::string& local) override; + Status upload(const std::string& local, const std::string& remote) override; + Status upload_with_checksum(const std::string& local, const std::string& remote, + const std::string& checksum) override; + Status rename(const std::string& orig_name, const std::string& new_name) override; + Status list(const std::string& remote_path, std::map* files) override; + Status direct_upload(const std::string& remote, const std::string& content) override; + Status rm(const std::string& remote) override; + Status copy(const std::string& src, const std::string& dst) override; + Status mkdir(const std::string& path) override; + Status exist(const std::string& path) override; + +private: + ExecEnv* _env; + const TNetworkAddress& _broker_addr; + const std::map& _broker_prop; +}; +} // end namespace doris diff --git a/be/src/util/logging.h b/be/src/util/logging.h index fe8204235a7a2a..27d7e8279b0b9f 100644 --- a/be/src/util/logging.h +++ b/be/src/util/logging.h @@ -18,6 +18,10 @@ #ifndef DORIS_BE_SRC_COMMON_UTIL_LOGGING_H #define DORIS_BE_SRC_COMMON_UTIL_LOGGING_H +#include +#include + +#include #include #include "common/logging.h" @@ -36,6 +40,54 @@ void shutdown_logging(); // Format a timestamp in the same format as used by GLog. std::string FormatTimestampForLog(MicrosecondsInt64 micros_since_epoch); +class DorisAWSLogger final : public Aws::Utils::Logging::LogSystemInterface { +public: + DorisAWSLogger() : _log_level(Aws::Utils::Logging::LogLevel::Info) {} + DorisAWSLogger(Aws::Utils::Logging::LogLevel log_level) : _log_level(log_level) {} + ~DorisAWSLogger() final = default; + Aws::Utils::Logging::LogLevel GetLogLevel() const final { return _log_level; } + void Log(Aws::Utils::Logging::LogLevel log_level, const char* tag, const char* format_str, + ...) final { + _log_impl(log_level, tag, format_str); + } + void LogStream(Aws::Utils::Logging::LogLevel log_level, const char* tag, + const Aws::OStringStream& message_stream) final { + _log_impl(log_level, tag, message_stream.str().c_str()); + } + + void Flush() final {} + +private: + void _log_impl(Aws::Utils::Logging::LogLevel log_level, const char* tag, const char* message) { + switch (log_level) { + case Aws::Utils::Logging::LogLevel::Off: + break; + case Aws::Utils::Logging::LogLevel::Fatal: + LOG(FATAL) << "[" << tag << "] " << message; + break; + case Aws::Utils::Logging::LogLevel::Error: + LOG(ERROR) << "[" << tag << "] " << message; + break; + case Aws::Utils::Logging::LogLevel::Warn: + LOG(WARNING) << "[" << tag << "] " << message; + break; + case Aws::Utils::Logging::LogLevel::Info: + LOG(INFO) << "[" << tag << "] " << message; + break; + case Aws::Utils::Logging::LogLevel::Debug: + VLOG_ROW << "[" << tag << "] " << message; + break; + case Aws::Utils::Logging::LogLevel::Trace: + VLOG_ROW << "[" << tag << "] " << message; + break; + default: + break; + } + } + + std::atomic _log_level; +}; + } // namespace doris #endif // DORIS_BE_SRC_COMMON_UTIL_LOGGING_H diff --git a/be/src/util/s3_storage_backend.cpp b/be/src/util/s3_storage_backend.cpp new file mode 100644 index 00000000000000..bd1a7d232bc413 --- /dev/null +++ b/be/src/util/s3_storage_backend.cpp @@ -0,0 +1,239 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +#include "util/s3_storage_backend.h" + +#include +#include +#include +#include +#include +#include +#include + +#include +#include + +#include "common/logging.h" +#include "gutil/strings/strip.h" +#include "util/s3_uri.h" +#include "util/s3_util.h" + +namespace doris { + +#ifndef CHECK_S3_CLIENT +#define CHECK_S3_CLIENT(client) \ + if (!client) { \ + return Status::InternalError("init aws s3 client error."); \ + } +#endif + +#ifndef CHECK_S3_PATH +#define CHECK_S3_PATH(uri, path) \ + S3URI uri(path); \ + if (!uri.parse()) { \ + return Status::InvalidArgument("s3 uri is invalid: " + path); \ + } +#endif + +#ifndef RETRUN_S3_STATUS +#define RETRUN_S3_STATUS(response) \ + if (response.IsSuccess()) { \ + return Status::OK(); \ + } else { \ + return Status::InternalError(error_msg(response)); \ + } +#endif + +S3StorageBackend::S3StorageBackend(const std::map& prop) + : _properties(prop) { + _client = create_client(_properties); + DCHECK(_client) << "init aws s3 client error."; +} + +S3StorageBackend::~S3StorageBackend() {} + +Status S3StorageBackend::download(const std::string& remote, const std::string& local) { + CHECK_S3_CLIENT(_client); + CHECK_S3_PATH(uri, remote); + Aws::S3::Model::GetObjectRequest request; + request.WithBucket(uri.get_bucket()).WithKey(uri.get_key()); + Aws::S3::Model::GetObjectOutcome response = _client->GetObject(request); + if (response.IsSuccess()) { + Aws::OFStream local_file; + local_file.open(local, std::ios::out | std::ios::binary); + if (local_file.good()) { + local_file << response.GetResult().GetBody().rdbuf(); + } + if (!local_file.good()) { + return Status::InternalError("failed to write file: " + local); + } + } else { + return Status::IOError("s3 download error: " + error_msg(response)); + } + return Status::OK(); +} + +Status S3StorageBackend::upload(const std::string& local, const std::string& remote) { + CHECK_S3_CLIENT(_client); + CHECK_S3_PATH(uri, remote); + Aws::S3::Model::PutObjectRequest request; + request.WithBucket(uri.get_bucket()).WithKey(uri.get_key()); + + const std::shared_ptr input_data = Aws::MakeShared( + local.c_str(), local.c_str(), std::ios_base::in | std::ios_base::binary); + if (input_data->good()) { + request.SetBody(input_data); + } + if (!input_data->good()) { + return Status::InternalError("failed to read file: " + local); + } + Aws::S3::Model::PutObjectOutcome response = _client->PutObject(request); + + RETRUN_S3_STATUS(response); +} + +Status S3StorageBackend::list(const std::string& remote_path, + std::map* files) { + std::string normal_str(remote_path); + if (!normal_str.empty() && normal_str.at(normal_str.size() - 1) != '/') { + normal_str += '/'; + } + CHECK_S3_CLIENT(_client); + CHECK_S3_PATH(uri, normal_str); + + Aws::S3::Model::ListObjectsRequest request; + request.WithBucket(uri.get_bucket()).WithPrefix(uri.get_key()).WithDelimiter("/"); + Aws::S3::Model::ListObjectsOutcome response = _client->ListObjects(request); + if (response.IsSuccess()) { + Aws::Vector objects = response.GetResult().GetContents(); + + for (Aws::S3::Model::Object& object : objects) { + std::string key = object.GetKey(); + if (key.at(key.size() - 1) == '/') { + continue; + } + size_t pos = key.find_last_of("/"); + if (pos != std::string::npos && pos != key.size() - 1) { + key = std::string(key, pos + 1); + } + pos = key.find_last_of("."); + if (pos == std::string::npos || pos == key.size() - 1) { + // Not found checksum separator, ignore this file + continue; + } + FileStat stat = {std::string(key, 0, pos), std::string(key, pos + 1), object.GetSize()}; + files->emplace(std::string(key, 0, pos), stat); + } + return Status::OK(); + } else { + return Status::InternalError("list form s3 error: " + error_msg(response)); + } +} + +Status S3StorageBackend::rename(const std::string& orig_name, const std::string& new_name) { + RETURN_IF_ERROR(copy(orig_name, new_name)); + return rm(orig_name); +} + +Status S3StorageBackend::direct_upload(const std::string& remote, const std::string& content) { + CHECK_S3_CLIENT(_client); + CHECK_S3_PATH(uri, remote); + Aws::S3::Model::PutObjectRequest request; + request.WithBucket(uri.get_bucket()).WithKey(uri.get_key()); + const std::shared_ptr input_data = + Aws::MakeShared("upload_directly"); + *input_data << content.c_str(); + if (input_data->good()) { + request.SetBody(input_data); + } + if (!input_data->good()) { + return Status::InternalError("failed to read from string"); + } + Aws::S3::Model::PutObjectOutcome response = _client->PutObject(request); + + RETRUN_S3_STATUS(response); +} + +Status S3StorageBackend::rm(const std::string& remote) { + CHECK_S3_CLIENT(_client); + CHECK_S3_PATH(uri, remote); + Aws::S3::Model::DeleteObjectRequest request; + + request.WithKey(uri.get_key()).WithBucket(uri.get_bucket()); + + Aws::S3::Model::DeleteObjectOutcome response = _client->DeleteObject(request); + + RETRUN_S3_STATUS(response); +} + +Status S3StorageBackend::copy(const std::string& src, const std::string& dst) { + CHECK_S3_CLIENT(_client); + CHECK_S3_PATH(src_uri, src); + CHECK_S3_PATH(dst_uri, dst); + Aws::S3::Model::CopyObjectRequest request; + request.WithCopySource(src_uri.get_bucket() + "/" + src_uri.get_key()) + .WithKey(dst_uri.get_key()) + .WithBucket(dst_uri.get_bucket()); + + Aws::S3::Model::CopyObjectOutcome response = _client->CopyObject(request); + + RETRUN_S3_STATUS(response); +} + +Status S3StorageBackend::mkdir(const std::string& path) { + std::string normal_str(path); + if (!normal_str.empty() && normal_str.at(normal_str.size() - 1) != '/') { + normal_str += '/'; + } + CHECK_S3_CLIENT(_client); + CHECK_S3_PATH(uri, normal_str); + Aws::S3::Model::PutObjectRequest request; + request.WithBucket(uri.get_bucket()).WithKey(uri.get_key()).WithContentLength(0); + Aws::S3::Model::PutObjectOutcome response = _client->PutObject(request); + RETRUN_S3_STATUS(response); +} + +Status S3StorageBackend::exist(const std::string& path) { + CHECK_S3_CLIENT(_client); + CHECK_S3_PATH(uri, path); + Aws::S3::Model::HeadObjectRequest request; + request.WithBucket(uri.get_bucket()).WithKey(uri.get_key()); + Aws::S3::Model::HeadObjectOutcome response = _client->HeadObject(request); + if (response.IsSuccess()) { + return Status::OK(); + } else if (response.GetError().GetResponseCode() == Aws::Http::HttpResponseCode::NOT_FOUND) { + return Status::NotFound(path + " not exists!"); + } else { + return Status::InternalError(error_msg(response)); + } +} + +Status S3StorageBackend::upload_with_checksum(const std::string& local, const std::string& remote, + const std::string& checksum) { + return upload(local, remote + "." + checksum); +} + +template +std::string S3StorageBackend::error_msg(const AwsOutcome& outcome) { + std::stringstream out; + out << "Error: [" << outcome.GetError().GetExceptionName() << ":" + << outcome.GetError().GetMessage(); + return out.str(); +} + +} // end namespace doris diff --git a/be/src/util/s3_storage_backend.h b/be/src/util/s3_storage_backend.h new file mode 100644 index 00000000000000..09c52f8665e99b --- /dev/null +++ b/be/src/util/s3_storage_backend.h @@ -0,0 +1,53 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +#pragma once + +#include + +namespace Aws { +namespace S3 { +class S3Client; +} // namespace S3 +} // namespace Aws + +namespace doris { + +class S3StorageBackend : public StorageBackend { +public: + S3StorageBackend(const std::map& prop); + ~S3StorageBackend(); + Status download(const std::string& remote, const std::string& local) override; + Status upload(const std::string& local, const std::string& remote) override; + Status upload_with_checksum(const std::string& local, const std::string& remote, + const std::string& checksum) override; + Status list(const std::string& remote_path, std::map* files) override; + Status rename(const std::string& orig_name, const std::string& new_name) override; + Status direct_upload(const std::string& remote, const std::string& content) override; + Status rm(const std::string& remote) override; + Status copy(const std::string& src, const std::string& dst) override; + Status mkdir(const std::string& path) override; + Status exist(const std::string& path) override; + +private: + template + std::string error_msg(const AwsOutcome& outcome); + const std::map& _properties; + std::unique_ptr _client; +}; + +} // end namespace doris diff --git a/be/src/util/s3_uri.cpp b/be/src/util/s3_uri.cpp new file mode 100644 index 00000000000000..9744168f7e734e --- /dev/null +++ b/be/src/util/s3_uri.cpp @@ -0,0 +1,67 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +#include "util/s3_uri.h" + +#include + +#include "gutil/strings/split.h" +#include "gutil/strings/strip.h" +#include "util/logging.h" + +namespace doris { + +const std::string S3URI::_SCHEME_DELIM = "://"; +const std::string S3URI::_PATH_DELIM = "/"; +const std::string S3URI::_QUERY_DELIM = "?"; +const std::string S3URI::_FRAGMENT_DELIM = "#"; +const StringCaseSet S3URI::_VALID_SCHEMES = {"http", "https", "s3", "s3a", "s3n", "bos"}; +bool S3URI::parse() { + if (_location.empty()) { + return false; + } + std::vector scheme_split = strings::Split(_location, _SCHEME_DELIM); + if (scheme_split.size() != 2) { + LOG(WARNING) << "Invalid S3 URI: " << _location; + return false; + } + _scheme = scheme_split[0]; + if (_VALID_SCHEMES.find(_scheme) == _VALID_SCHEMES.end()) { + LOG(WARNING) << "Invalid scheme: " << _scheme; + return false; + } + std::vector authority_split = + strings::Split(scheme_split[1], strings::delimiter::Limit(_PATH_DELIM, 1)); + if (authority_split.size() != 2) { + LOG(WARNING) << "Invalid S3 URI: " << _location; + return false; + } + _key = authority_split[1]; + StripWhiteSpace(&_key); + if (_key.empty()) { + LOG(WARNING) << "Invalid S3 key: " << _location; + return false; + } + _bucket = authority_split[0]; + // Strip query and fragment if they exist + std::vector _query_split = strings::Split(_key, _QUERY_DELIM); + std::vector _fragment_split = strings::Split(_query_split[0], _FRAGMENT_DELIM); + _key = _fragment_split[0]; + return true; +} + +} // end namespace doris diff --git a/be/src/util/s3_uri.h b/be/src/util/s3_uri.h new file mode 100644 index 00000000000000..be8579c320f0fc --- /dev/null +++ b/be/src/util/s3_uri.h @@ -0,0 +1,48 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +#pragma once + +#include + +#include "util/string_util.h" + + +namespace doris { + +class S3URI { +public: + S3URI(const std::string& location) : _location(location) {} + bool parse(); + inline const std::string& get_bucket() const { return _bucket; } + inline const std::string& get_key() const { return _key; } + inline const std::string& get_location() const { return _location; } + inline const std::string& get_scheme() const { return _scheme; } + +private: + static const std::string _SCHEME_DELIM; + static const std::string _PATH_DELIM; + static const std::string _QUERY_DELIM; + static const std::string _FRAGMENT_DELIM; + static const StringCaseSet _VALID_SCHEMES; + + std::string _location; + std::string _bucket; + std::string _key; + std::string _scheme; +}; +} // end namespace doris diff --git a/be/src/util/s3_util.cpp b/be/src/util/s3_util.cpp new file mode 100644 index 00000000000000..17baa899cabf25 --- /dev/null +++ b/be/src/util/s3_util.cpp @@ -0,0 +1,53 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +#include "util/s3_util.h" + +#include +#include +#include +#include + +#include "common/logging.h" + +namespace doris { + +const static std::string S3_AK = "AWS_ACCESS_KEY"; +const static std::string S3_SK = "AWS_SECRET_KEY"; +const static std::string S3_ENDPOINT = "AWS_ENDPOINT"; +const static std::string S3_REGION = "AWS_REGION"; + +std::unique_ptr create_client(const std::map& prop) { + StringCaseMap properties(prop.begin(), prop.end()); + Aws::Auth::AWSCredentials aws_cred; + Aws::Client::ClientConfiguration aws_config; + std::unique_ptr client; + if (properties.find(S3_AK) != properties.end() && properties.find(S3_SK) != properties.end() && + properties.find(S3_ENDPOINT) != properties.end() && + properties.find(S3_REGION) != properties.end()) { + aws_cred.SetAWSAccessKeyId(properties.find(S3_AK)->second); + aws_cred.SetAWSSecretKey(properties.find(S3_SK)->second); + DCHECK(!aws_cred.IsExpiredOrEmpty()); + aws_config.endpointOverride = properties.find(S3_ENDPOINT)->second; + aws_config.region = properties.find(S3_REGION)->second; + client.reset(new Aws::S3::S3Client(aws_cred, aws_config)); + } else { + client.reset(nullptr); + } + return client; +} +} // end namespace doris diff --git a/be/src/util/s3_util.h b/be/src/util/s3_util.h new file mode 100644 index 00000000000000..57a9314bc1be92 --- /dev/null +++ b/be/src/util/s3_util.h @@ -0,0 +1,34 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +#pragma once + +#include +#include +#include + +namespace Aws { +namespace S3 { +class S3Client; +} // namespace S3 +} // namespace Aws + +namespace doris { + +std::unique_ptr create_client(const std::map& prop); + +} // end namespace doris diff --git a/be/src/util/storage_backend.h b/be/src/util/storage_backend.h new file mode 100644 index 00000000000000..7c96f5480e43f5 --- /dev/null +++ b/be/src/util/storage_backend.h @@ -0,0 +1,44 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +#pragma once + +#include "common/status.h" + +namespace doris { + +struct FileStat { + std::string name; + std::string md5; + int64_t size; +}; + +class StorageBackend { +public: + virtual Status download(const std::string& remote, const std::string& local) = 0; + virtual Status upload(const std::string& local, const std::string& remote) = 0; + virtual Status upload_with_checksum(const std::string& local, const std::string& remote, + const std::string& checksum) = 0; + virtual Status list(const std::string& remote_path, std::map* files) = 0; + virtual Status rename(const std::string& orig_name, const std::string& new_name) = 0; + virtual Status direct_upload(const std::string& remote, const std::string& content) = 0; + virtual Status copy(const std::string& src, const std::string& dst) = 0; + virtual Status rm(const std::string& remote) = 0; + virtual Status mkdir(const std::string& path) = 0; + virtual Status exist(const std::string& path) = 0; +}; +} // end namespace doris diff --git a/be/test/exec/CMakeLists.txt b/be/test/exec/CMakeLists.txt index 8d2bb42cac7c10..7cd6a935b6185b 100644 --- a/be/test/exec/CMakeLists.txt +++ b/be/test/exec/CMakeLists.txt @@ -72,3 +72,4 @@ ADD_BE_TEST(unix_odbc_test) #ADD_BE_TEST(schema_scanner/schema_engines_scanner_test) #ADD_BE_TEST(schema_scanner/schema_collations_scanner_test) #ADD_BE_TEST(schema_scanner/schema_charsets_scanner_test) +ADD_BE_TEST(s3_reader_test) diff --git a/be/test/exec/s3_reader_test.cpp b/be/test/exec/s3_reader_test.cpp new file mode 100644 index 00000000000000..0fc26196893a55 --- /dev/null +++ b/be/test/exec/s3_reader_test.cpp @@ -0,0 +1,133 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +#include "exec/s3_reader.h" + +#include +#include + +#include +#include +#include +#include +#include +#include +#include + +#include "exec/s3_writer.h" + +namespace doris { +static const std::string AK = "353b8de00d85438c8ea0818704b156d3"; +static const std::string SK = "ea15a18b4409479fa8cca029f78e8d77"; +static const std::string ENDPOINT = "http://s3.bj.bcebos.com"; +static const std::string REGION = "bj"; +static const std::string BUCKET = "s3://yang-repo/"; +class S3ReaderTest : public testing::Test { +public: + S3ReaderTest() + : _aws_properties({{"AWS_ACCESS_KEY", AK}, + {"AWS_SECRET_KEY", SK}, + {"AWS_ENDPOINT", ENDPOINT}, + {"AWS_REGION", "bj"}}) { + _s3_base_path = BUCKET + "s3/" + gen_uuid(); + } + +protected: + virtual void SetUp() {} + virtual void TearDown() {} + std::string gen_uuid() { + auto id = boost::uuids::random_generator()(); + return boost::lexical_cast(id); + } + std::map _aws_properties; + std::string _s3_base_path; + std::string _content = + "O wild West Wind, thou breath of Autumn's being\n" + "Thou, from whose unseen presence the leaves dead\n" + "Are driven, like ghosts from an enchanter fleeing,\n" + "Yellow, and black, and pale, and hectic red,\n" + "Pestilence-stricken multitudes:O thou\n" + "Who chariotest to their dark wintry bed\n" + "The winged seeds, where they lie cold and low,\n" + "Each like a corpse within its grave, until\n" + "Thine azure sister of the Spring shall blow\n" + "Her clarion o'er the dreaming earth, and fill\n" + "(Driving sweet buds like flocks to feed in air)\n" + "With living hues and odors plain and hill:\n" + "Wild Spirit, which art moving everywhere;\n" + "Destroyer and preserver; hear, oh, hear!"; +}; + +TEST_F(S3ReaderTest, normal) { + std::string path = _s3_base_path + "/test_file"; + std::unique_ptr writer(new S3Writer(_aws_properties, path, 0)); + auto st = writer->open(); + ASSERT_TRUE(st.ok()); + size_t l = 0; + st = writer->write(reinterpret_cast(_content.c_str()), _content.length(), &l); + ASSERT_TRUE(st.ok()); + ASSERT_EQ(_content.length(), l); + st = writer->close(); + ASSERT_TRUE(st.ok()); + std::unique_ptr writer1(new S3Writer(_aws_properties, path, 0)); + st = writer1->open(); + ASSERT_TRUE(st.is_already_exist()); + std::unique_ptr reader(new S3Reader(_aws_properties, path, 0)); + st = reader->open(); + ASSERT_TRUE(st.ok()); + std::unique_ptr reader1(new S3Reader(_aws_properties, path + "xx", 0)); + st = reader1->open(); + ASSERT_TRUE(st.is_not_found()); + ASSERT_EQ(_content.length(), reader->size()); + std::string verification_contents; + verification_contents.resize(_content.length()); + size_t total_read = _content.length(); + bool eof = false; + st = reader->read((uint8_t*)&verification_contents[0], &total_read, &eof); + ASSERT_TRUE(st.ok()); + ASSERT_EQ(_content, verification_contents); + ASSERT_EQ(_content.length(), total_read); + ASSERT_TRUE(eof); + int64_t t = 0; + st = reader->tell(&t); + ASSERT_TRUE(st.ok()); + ASSERT_EQ(_content.length(), t); + st = reader->readat(_content.length(), _content.length(), (int64_t*)(&total_read), + (uint8_t*)&verification_contents[0]); + LOG(INFO) << total_read; + ASSERT_TRUE(total_read==0); +} +} // end namespace doris + +int main(int argc, char** argv) { + // std::string conffile = std::string(getenv("DORIS_HOME")) + "/conf/be.conf"; + // if (!doris::config::init(conffile.c_str(), false)) { + // fprintf(stderr, "error read config file. \n"); + // return -1; + // } + // doris::init_glog("be-test"); + // doris::CpuInfo::init(); + ::testing::InitGoogleTest(&argc, argv); + int ret = 0; + Aws::SDKOptions options; + options.loggingOptions.logLevel = Aws::Utils::Logging::LogLevel::Debug; + Aws::InitAPI(options); + // ak sk is secret + // ret = RUN_ALL_TESTS(); + Aws::ShutdownAPI(options); + return ret; +} diff --git a/be/test/util/CMakeLists.txt b/be/test/util/CMakeLists.txt index a26ee994cf6847..d6bab33fe69066 100644 --- a/be/test/util/CMakeLists.txt +++ b/be/test/util/CMakeLists.txt @@ -67,4 +67,7 @@ ADD_BE_TEST(trace_test) ADD_BE_TEST(easy_json-test) ADD_BE_TEST(http_channel_test) ADD_BE_TEST(histogram_test) +ADD_BE_TEST(s3_uri_test) +ADD_BE_TEST(s3_storage_backend_test) +ADD_BE_TEST(broker_storage_backend_test) ADD_BE_TEST(sort_heap_test) diff --git a/be/test/util/broker_storage_backend_test.cpp b/be/test/util/broker_storage_backend_test.cpp new file mode 100644 index 00000000000000..91a4a5897232ed --- /dev/null +++ b/be/test/util/broker_storage_backend_test.cpp @@ -0,0 +1,197 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +#include "util/broker_storage_backend.h" + +#include + +#include +#include +#include +#include +#include +#include +#include + +#include "runtime/exec_env.h" +#include "util/file_utils.h" +#include "util/storage_backend.h" + +namespace doris { +static const std::string AK = "AK"; +static const std::string SK = "SK"; +static const std::string ENDPOINT = "http://bj.bcebos.com"; +static const std::string BUCKET = "bos://yang-repo/"; +static const std::string BROKER_IP = "127.0.0.1"; +static const int BROKER_PORT = 8111; +class StorageBackendTest : public testing::Test { +public: + StorageBackendTest() + : _broker_properties({{"bos_accesskey", AK}, + {"bos_secret_accesskey", SK}, + {"bos_endpoint", ENDPOINT}}), + _env(ExecEnv::GetInstance()) { + _broker_addr.__set_hostname("127.0.0.1"); + _broker_addr.__set_port(8111); + _broker.reset(new BrokerStorageBackend(_env, _broker_addr, _broker_properties)); + _broker_base_path = BUCKET + "broker/" + gen_uuid(); + } + virtual ~StorageBackendTest() {} + +protected: + virtual void SetUp() { + _test_file = "/tmp/" + gen_uuid(); + std::ofstream out(_test_file); + out << _content; + out.close(); + } + virtual void TearDown() { remove(_test_file.c_str()); } + std::string gen_uuid() { + auto id = boost::uuids::random_generator()(); + return boost::lexical_cast(id); + } + std::unique_ptr _broker; + std::map _broker_properties; + std::string _test_file; + ExecEnv* _env; + std::string _broker_base_path; + TNetworkAddress _broker_addr; + std::string _content = + "O wild West Wind, thou breath of Autumn's being\n" + "Thou, from whose unseen presence the leaves dead\n" + "Are driven, like ghosts from an enchanter fleeing,\n" + "Yellow, and black, and pale, and hectic red,\n" + "Pestilence-stricken multitudes:O thou\n" + "Who chariotest to their dark wintry bed\n" + "The winged seeds, where they lie cold and low,\n" + "Each like a corpse within its grave, until\n" + "Thine azure sister of the Spring shall blow\n" + "Her clarion o'er the dreaming earth, and fill\n" + "(Driving sweet buds like flocks to feed in air)\n" + "With living hues and odors plain and hill:\n" + "Wild Spirit, which art moving everywhere;\n" + "Destroyer and preserver; hear, oh, hear!"; +}; + +TEST_F(StorageBackendTest, broker_upload) { + Status status = _broker->upload(_test_file, _broker_base_path + "/Ode_to_the_West_Wind.txt"); + ASSERT_TRUE(status.ok()); + status = _broker->exist(_broker_base_path + "/Ode_to_the_West_Wind.txt"); + ASSERT_TRUE(status.ok()); + std::string orig_md5sum; + FileUtils::md5sum(_test_file, &orig_md5sum); + status = _broker->download(_broker_base_path + "/Ode_to_the_West_Wind.txt", + _test_file + ".download"); + ASSERT_TRUE(status.ok()); + std::string download_md5sum; + FileUtils::md5sum(_test_file + ".download", &download_md5sum); + ASSERT_EQ(orig_md5sum, download_md5sum); + status = _broker->upload(_test_file + "_not_found", + _broker_base_path + "/Ode_to_the_West_Wind1.txt"); + ASSERT_FALSE(status.ok()); + status = _broker->exist(_broker_base_path + "/Ode_to_the_West_Wind1.txt"); + ASSERT_EQ(TStatusCode::NOT_FOUND, status.code()); +} + +TEST_F(StorageBackendTest, broker_direct_upload) { + Status status = + _broker->direct_upload(_broker_base_path + "/Ode_to_the_West_Wind.txt", _content); + ASSERT_TRUE(status.ok()); + status = _broker->exist(_broker_base_path + "/Ode_to_the_West_Wind.txt"); + ASSERT_TRUE(status.ok()); + std::string orig_md5sum; + FileUtils::md5sum(_test_file, &orig_md5sum); + status = _broker->download(_broker_base_path + "/Ode_to_the_West_Wind.txt", + _test_file + ".download"); + ASSERT_TRUE(status.ok()); + std::string download_md5sum; + FileUtils::md5sum(_test_file + ".download", &download_md5sum); + ASSERT_EQ(orig_md5sum, download_md5sum); +} + +TEST_F(StorageBackendTest, broker_download) { + Status status = _broker->upload(_test_file, _broker_base_path + "/Ode_to_the_West_Wind.txt"); + ASSERT_TRUE(status.ok()); + std::string orig_md5sum; + FileUtils::md5sum(_test_file, &orig_md5sum); + status = _broker->download(_broker_base_path + "/Ode_to_the_West_Wind.txt", + _test_file + ".download"); + ASSERT_TRUE(status.ok()); + std::string download_md5sum; + FileUtils::md5sum(_test_file + ".download", &download_md5sum); + ASSERT_EQ(orig_md5sum, download_md5sum); + status = _broker->download(_broker_base_path + "/Ode_to_the_West_Wind.txt.not_found", + _test_file + ".download"); + ASSERT_FALSE(status.ok()); + status = _broker->download(_broker_base_path + "/Ode_to_the_West_Wind.txt.not_found", + "/not_permitted.download"); + ASSERT_FALSE(status.ok()); +} + +TEST_F(StorageBackendTest, broker_rename) { + Status status = + _broker->direct_upload(_broker_base_path + "/Ode_to_the_West_Wind.txt", _content); + ASSERT_TRUE(status.ok()); + status = _broker->rename(_broker_base_path + "/Ode_to_the_West_Wind.txt", + _broker_base_path + "/Ode_to_the_West_Wind.txt.new"); + ASSERT_TRUE(status.ok()); + // rm by broker old file may exist for a few moment + // status = _broker->exist(_broker_base_path + "/Ode_to_the_West_Wind.txt"); + // ASSERT_TRUE(status.code() == TStatusCode::NOT_FOUND); + status = _broker->exist(_broker_base_path + "/Ode_to_the_West_Wind.txt.new"); + ASSERT_TRUE(status.ok()); +} + +TEST_F(StorageBackendTest, broker_list) { + Status status = + _broker->direct_upload(_broker_base_path + "/Ode_to_the_West_Wind.md5", _content); + ASSERT_TRUE(status.ok()); + status = _broker->direct_upload(_broker_base_path + "/Ode_to_the_West_Wind1.md5", _content); + ASSERT_TRUE(status.ok()); + status = _broker->direct_upload(_broker_base_path + "/Ode_to_the_West_Wind2.md5", _content); + ASSERT_TRUE(status.ok()); + std::map files; + status = _broker->list(_broker_base_path, &files); + ASSERT_TRUE(status.ok()); + ASSERT_TRUE(files.find("Ode_to_the_West_Wind") != files.end()); + ASSERT_TRUE(files.find("Ode_to_the_West_Wind1") != files.end()); + ASSERT_TRUE(files.find("Ode_to_the_West_Wind2") != files.end()); + ASSERT_EQ(3, files.size()); +} + +TEST_F(StorageBackendTest, broker_rm) { + Status status = + _broker->direct_upload(_broker_base_path + "/Ode_to_the_West_Wind.txt", _content); + ASSERT_TRUE(status.ok()); + status = _broker->exist(_broker_base_path + "/Ode_to_the_West_Wind.txt"); + ASSERT_TRUE(status.ok()); + status = _broker->rm(_broker_base_path + "/Ode_to_the_West_Wind.txt"); + ASSERT_TRUE(status.ok()); + status = _broker->exist(_broker_base_path + "/Ode_to_the_West_Wind.txt"); + ASSERT_TRUE(status.code() == TStatusCode::NOT_FOUND); +} + +} // end namespace doris + +int main(int argc, char** argv) { + ::testing::InitGoogleTest(&argc, argv); + doris::CpuInfo::init(); + int ret = 0; + // ak sk is secret + // ret = RUN_ALL_TESTS(); + return ret; +} \ No newline at end of file diff --git a/be/test/util/s3_storage_backend_test.cpp b/be/test/util/s3_storage_backend_test.cpp new file mode 100644 index 00000000000000..a8ea878f145633 --- /dev/null +++ b/be/test/util/s3_storage_backend_test.cpp @@ -0,0 +1,198 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +#include "util/s3_storage_backend.h" + +#include + + +#include +#include +#include +#include +#include +#include +#include +#include + +#include "util/file_utils.h" +#include "util/storage_backend.h" + +namespace doris { +static const std::string AK = "AK"; +static const std::string SK = "SK"; +static const std::string ENDPOINT = "http://s3.bj.bcebos.com"; +static const std::string REGION = "bj"; +static const std::string BUCKET = "s3://yang-repo/"; +class S3StorageBackendTest : public testing::Test { +public: + S3StorageBackendTest() + : _aws_properties({{"AWS_ACCESS_KEY", AK}, + {"AWS_SECRET_KEY", SK}, + {"AWS_ENDPOINT", ENDPOINT}, + {"AWS_REGION", "bj"}}) { + _s3.reset(new S3StorageBackend(_aws_properties)); + _s3_base_path = BUCKET + "s3/" + gen_uuid(); + } + virtual ~S3StorageBackendTest() {} + +protected: + virtual void SetUp() { + _test_file = "/tmp/" + gen_uuid(); + std::ofstream out(_test_file); + out << _content; + out.close(); + } + virtual void TearDown() { + remove(_test_file.c_str()); + _s3->rm(_s3_base_path); + } + std::string gen_uuid() { + auto id = boost::uuids::random_generator()(); + return boost::lexical_cast(id); + } + std::unique_ptr _s3; + std::map _aws_properties; + std::string _test_file; + std::string _s3_base_path; + std::string _content = + "O wild West Wind, thou breath of Autumn's being\n" + "Thou, from whose unseen presence the leaves dead\n" + "Are driven, like ghosts from an enchanter fleeing,\n" + "Yellow, and black, and pale, and hectic red,\n" + "Pestilence-stricken multitudes:O thou\n" + "Who chariotest to their dark wintry bed\n" + "The winged seeds, where they lie cold and low,\n" + "Each like a corpse within its grave, until\n" + "Thine azure sister of the Spring shall blow\n" + "Her clarion o'er the dreaming earth, and fill\n" + "(Driving sweet buds like flocks to feed in air)\n" + "With living hues and odors plain and hill:\n" + "Wild Spirit, which art moving everywhere;\n" + "Destroyer and preserver; hear, oh, hear!"; +}; + +TEST_F(S3StorageBackendTest, s3_upload) { + Status status = _s3->upload(_test_file, _s3_base_path + "/Ode_to_the_West_Wind.txt"); + ASSERT_TRUE(status.ok()); + status = _s3->exist(_s3_base_path + "/Ode_to_the_West_Wind.txt"); + ASSERT_TRUE(status.ok()); + std::string orig_md5sum; + FileUtils::md5sum(_test_file, &orig_md5sum); + status = _s3->download(_s3_base_path + "/Ode_to_the_West_Wind.txt", _test_file + ".download"); + ASSERT_TRUE(status.ok()); + std::string download_md5sum; + FileUtils::md5sum(_test_file + ".download", &download_md5sum); + ASSERT_EQ(orig_md5sum, download_md5sum); + status = _s3->upload(_test_file + "_not_found", _s3_base_path + "/Ode_to_the_West_Wind1.txt"); + ASSERT_FALSE(status.ok()); + status = _s3->exist(_s3_base_path + "/Ode_to_the_West_Wind1.txt"); + ASSERT_TRUE(status.code() == TStatusCode::NOT_FOUND); +} + +TEST_F(S3StorageBackendTest, s3_direct_upload) { + Status status = _s3->direct_upload(_s3_base_path + "/Ode_to_the_West_Wind.txt", _content); + ASSERT_TRUE(status.ok()); + status = _s3->exist(_s3_base_path + "/Ode_to_the_West_Wind.txt"); + ASSERT_TRUE(status.ok()); + std::string orig_md5sum; + FileUtils::md5sum(_test_file, &orig_md5sum); + status = _s3->download(_s3_base_path + "/Ode_to_the_West_Wind.txt", _test_file + ".download"); + ASSERT_TRUE(status.ok()); + std::string download_md5sum; + FileUtils::md5sum(_test_file + ".download", &download_md5sum); + ASSERT_EQ(orig_md5sum, download_md5sum); +} + +TEST_F(S3StorageBackendTest, s3_download) { + Status status = _s3->upload(_test_file, _s3_base_path + "/Ode_to_the_West_Wind.txt"); + ASSERT_TRUE(status.ok()); + std::string orig_md5sum; + FileUtils::md5sum(_test_file, &orig_md5sum); + status = _s3->download(_s3_base_path + "/Ode_to_the_West_Wind.txt", _test_file + ".download"); + ASSERT_TRUE(status.ok()); + std::string download_md5sum; + FileUtils::md5sum(_test_file + ".download", &download_md5sum); + ASSERT_EQ(orig_md5sum, download_md5sum); + status = _s3->download(_s3_base_path + "/Ode_to_the_West_Wind.txt.not_found", + _test_file + ".download"); + ASSERT_FALSE(status.ok()); + status = _s3->download(_s3_base_path + "/Ode_to_the_West_Wind.txt.not_found", + "/not_permitted.download"); + ASSERT_FALSE(status.ok()); +} + +TEST_F(S3StorageBackendTest, s3_rename) { + Status status = _s3->direct_upload(_s3_base_path + "/Ode_to_the_West_Wind.txt", _content); + ASSERT_TRUE(status.ok()); + status = _s3->rename(_s3_base_path + "/Ode_to_the_West_Wind.txt", + _s3_base_path + "/Ode_to_the_West_Wind.txt.new"); + ASSERT_TRUE(status.ok()); + status = _s3->exist(_s3_base_path + "/Ode_to_the_West_Wind.txt"); + ASSERT_TRUE(status.code() == TStatusCode::NOT_FOUND); + status = _s3->exist(_s3_base_path + "/Ode_to_the_West_Wind.txt.new"); + ASSERT_TRUE(status.ok()); +} + +TEST_F(S3StorageBackendTest, s3_list) { + Status status = _s3->direct_upload(_s3_base_path + "/Ode_to_the_West_Wind.md5", _content); + ASSERT_TRUE(status.ok()); + status = _s3->direct_upload(_s3_base_path + "/Ode_to_the_West_Wind1.md5", _content); + ASSERT_TRUE(status.ok()); + status = _s3->direct_upload(_s3_base_path + "/Ode_to_the_West_Wind2.md5", _content); + ASSERT_TRUE(status.ok()); + std::map files; + status = _s3->list(_s3_base_path, &files); + ASSERT_TRUE(status.ok()); + ASSERT_TRUE(files.find("Ode_to_the_West_Wind") != files.end()); + ASSERT_TRUE(files.find("Ode_to_the_West_Wind1") != files.end()); + ASSERT_TRUE(files.find("Ode_to_the_West_Wind2") != files.end()); + ASSERT_EQ(3, files.size()); +} + +TEST_F(S3StorageBackendTest, s3_rm) { + Status status = _s3->direct_upload(_s3_base_path + "/Ode_to_the_West_Wind.txt", _content); + ASSERT_TRUE(status.ok()); + status = _s3->exist(_s3_base_path + "/Ode_to_the_West_Wind.txt"); + ASSERT_TRUE(status.ok()); + status = _s3->rm(_s3_base_path + "/Ode_to_the_West_Wind.txt"); + ASSERT_TRUE(status.ok()); + status = _s3->exist(_s3_base_path + "/Ode_to_the_West_Wind.txt"); + ASSERT_TRUE(status.code() == TStatusCode::NOT_FOUND); +} + +TEST_F(S3StorageBackendTest, s3_mkdir) { + Status status = _s3->mkdir(_s3_base_path + "/dir"); + ASSERT_TRUE(status.ok()); + status = _s3->exist(_s3_base_path + "/dir"); + ASSERT_TRUE(status.code() == TStatusCode::NOT_FOUND); + status = _s3->exist(_s3_base_path + "/dir/"); + ASSERT_TRUE(status.ok()); +} + +} // end namespace doris + +int main(int argc, char** argv) { + ::testing::InitGoogleTest(&argc, argv); + int ret = 0; + Aws::SDKOptions options; + Aws::InitAPI(options); + // ak sk is secret + // ret = RUN_ALL_TESTS(); + Aws::ShutdownAPI(options); + return ret; +} \ No newline at end of file diff --git a/be/test/util/s3_uri_test.cpp b/be/test/util/s3_uri_test.cpp new file mode 100644 index 00000000000000..1f62102891b0d4 --- /dev/null +++ b/be/test/util/s3_uri_test.cpp @@ -0,0 +1,92 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +#include "util/s3_uri.h" + +#include + +#include + +#include "util/logging.h" + +namespace doris { + +class S3URITest : public testing::Test { +public: + S3URITest() {} + ~S3URITest() {} +}; // end class StringParserTest + +TEST_F(S3URITest, LocationParsing) { + std::string p1 = "s3://bucket/path/to/file"; + S3URI uri1(p1); + ASSERT_TRUE(uri1.parse()); + ASSERT_EQ("bucket", uri1.get_bucket()); + ASSERT_EQ("path/to/file", uri1.get_key()); +} + +TEST_F(S3URITest, PathLocationParsing) { + std::string p1 = "s3://bucket/path/"; + S3URI uri1(p1); + ASSERT_TRUE(uri1.parse()); + ASSERT_EQ("bucket", uri1.get_bucket()); + ASSERT_EQ("path/", uri1.get_key()); +} + +TEST_F(S3URITest, EncodedString) { + std::string p1 = "s3://bucket/path%20to%20file"; + S3URI uri1(p1); + ASSERT_TRUE(uri1.parse()); + ASSERT_EQ("bucket", uri1.get_bucket()); + ASSERT_EQ("path%20to%20file", uri1.get_key()); +} + +TEST_F(S3URITest, MissingKey) { + std::string p1 = "https://bucket/"; + S3URI uri1(p1); + ASSERT_FALSE(uri1.parse()); + std::string p2 = "s3://bucket/"; + S3URI uri2(p2); + ASSERT_FALSE(uri2.parse()); +} + +TEST_F(S3URITest, RelativePathing) { + std::string p1 = "/path/to/file"; + S3URI uri1(p1); + ASSERT_FALSE(uri1.parse()); +} + +TEST_F(S3URITest, InvalidScheme) { + std::string p1 = "ftp://bucket/"; + S3URI uri1(p1); + ASSERT_FALSE(uri1.parse()); +} + +TEST_F(S3URITest, QueryAndFragment) { + std::string p1 = "s3://bucket/path/to/file?query=foo#bar"; + S3URI uri1(p1); + ASSERT_TRUE(uri1.parse()); + ASSERT_EQ("bucket", uri1.get_bucket()); + ASSERT_EQ("path/to/file", uri1.get_key()); +} + +} // end namespace doris + +int main(int argc, char** argv) { + ::testing::InitGoogleTest(&argc, argv); + return RUN_ALL_TESTS(); +} diff --git a/docs/en/sql-reference/sql-statements/Data Definition/CREATE REPOSITORY.md b/docs/en/sql-reference/sql-statements/Data Definition/CREATE REPOSITORY.md index 95017f7544aafd..d21f3a2ad59428 100644 --- a/docs/en/sql-reference/sql-statements/Data Definition/CREATE REPOSITORY.md +++ b/docs/en/sql-reference/sql-statements/Data Definition/CREATE REPOSITORY.md @@ -26,50 +26,62 @@ under the License. # CREATE REPOSITORY ## Description -This statement is used to create the warehouse. The warehouse is used for backup or recovery. Only root or superuser users can create warehouses. -Grammar: -CREATE [READ ONLY] REPOSITORY `repo_name` -WITH BROKER `broker_name` -ON LOCATION `repo_location` -PROPERTIES ("key"="value", ...); + This statement is used to create the warehouse. The warehouse is used for backup or recovery. Only root or superuser users can create warehouses. + Grammar: + CREATE [READ ONLY] REPOSITORY `repo_name` + WITH [BROKER `broker_name`|S3] + ON LOCATION `repo_location` + PROPERTIES ("key"="value", ...); -Explain: -1. The creation of warehouses depends on existing brokers -2. If it is a read-only warehouse, it can only be restored on the warehouse. If not, you can backup and restore operations. -3. According to the different types of broker, PROPERTIES is different, see the example. + Explain: + 1. The creation of warehouses depends on existing brokers, or use aws s3 protocl to connet cloud storage directly. + 2. If it is a read-only warehouse, it can only be restored on the warehouse. If not, you can backup and restore operations. + 3. According to the different types of broker or S3, PROPERTIES is different, see the example. ## example -1. Create a warehouse named bos_repo, which relies on BOS broker "bos_broker", and the data root directory is: bos://palo_backup. -CREATE REPOSITORY `bos_repo` -WITH BROKER `bos_broker` -ON LOCATION "bos://palo_backup" -PROPERTIES -( -"bos_endpoint" ="http://gz.bcebos.com", -"bos_accesskey" = "069fc2786e664e63a5f111111114ddbs22", -"bos_secret_accesskey"="70999999999999de274d59eaa980a" -); + 1. Create a warehouse named bos_repo, which relies on BOS broker "bos_broker", and the data root directory is: bos://palo_backup. + CREATE REPOSITORY `bos_repo` + WITH BROKER `bos_broker` + ON LOCATION "bos://palo_backup" + PROPERTIES + ( + "bos_endpoint" ="http://gz.bcebos.com", + "bos_accesskey" = "bos_accesskey", + "bos_secret_accesskey"="bos_accesskey" + ); + + 2. Create the same warehouse as in Example 1, but with read-only attributes: + CREATE READ ONLY REPOSITORY `bos_repo` + WITH BROKER `bos_broker` + ON LOCATION "bos://palo_backup" + PROPERTIES + ( + "bos_endpoint" ="http://gz.bcebos.com", + "bos_accesskey" = "bos_accesskey", + "bos_secret_accesskey"="bos_secret_accesskey" + ); + + 3. Create a warehouse named hdfs_repo, which relies on Baidu HDFS broker "hdfs_broker", and the data root directory is: hdfs://hadoop-name-node:54310/path/to/repo./ + CREATE REPOSITORY `hdfs_repo` + WITH BROKER `hdfs_broker` + ON LOCATION "hdfs://hadoop-name-node:54310/path/to/repo/" + PROPERTIES + ( + "Username" = "User" + "password" = "password" + ); -2. Create the same warehouse as in Example 1, but with read-only attributes: -CREATE READ ONLY REPOSITORY `bos_repo` -WITH BROKER `bos_broker` -ON LOCATION "bos://palo_backup" -PROPERTIES -( -"bos_endpoint" ="http://gz.bcebos.com", -"bos_accesskey" = "069fc2786e664e63a5f111111114ddbs22", -"bos_secret_accesskey"="70999999999999de274d59eaa980a" -); - -3. Create a warehouse named hdfs_repo, which relies on Baidu HDFS broker "hdfs_broker", and the data root directory is: hdfs://hadoop-name-node:54310/path/to/repo./ -CREATE REPOSITORY `hdfs_repo` -WITH BROKER `hdfs_broker` -ON LOCATION "hdfs://hadoop-name-node:54310/path/to/repo/" -PROPERTIES -( -"Username" = "User" -"password" = "password" -); + 4. 创建名为 s3_repo 的仓库,直接链接云存储,而不通过broker. + CREATE REPOSITORY `s3_repo` + WITH S3 + ON LOCATION "s3://s3-repo" + PROPERTIES + ( + "AWS_ENDPOINT" = "http://s3-REGION.amazonaws.com", + "AWS_ACCESS_KEY" = "AWS_ACCESS_KEY", + "AWS_SECRET_KEY"="AWS_SECRET_KEY", + "AWS_REGION" = "REGION" + ); ## keyword CREATE REPOSITORY diff --git a/docs/zh-CN/sql-reference/sql-statements/Data Definition/CREATE REPOSITORY.md b/docs/zh-CN/sql-reference/sql-statements/Data Definition/CREATE REPOSITORY.md index b3839f87d351e2..d6d3a8f0de4706 100644 --- a/docs/zh-CN/sql-reference/sql-statements/Data Definition/CREATE REPOSITORY.md +++ b/docs/zh-CN/sql-reference/sql-statements/Data Definition/CREATE REPOSITORY.md @@ -29,14 +29,14 @@ under the License. 该语句用于创建仓库。仓库用于属于备份或恢复。仅 root 或 superuser 用户可以创建仓库。 语法: CREATE [READ ONLY] REPOSITORY `repo_name` - WITH BROKER `broker_name` + WITH [BROKER `broker_name`|S3] ON LOCATION `repo_location` PROPERTIES ("key"="value", ...); 说明: - 1. 仓库的创建,依赖于已存在的 broker + 1. 仓库的创建,依赖于已存在的 broker 或者直接通过AWS s3 协议访问云存储 2. 如果是只读仓库,则只能在仓库上进行恢复。如果不是,则可以进行备份和恢复操作。 - 3. 根据 broker 的不同类型,PROPERTIES 有所不同,具体见示例。 + 3. 根据 broker 或者S3的不同类型,PROPERTIES 有所不同,具体见示例。 ## example 1. 创建名为 bos_repo 的仓库,依赖 BOS broker "bos_broker",数据根目录为:bos://palo_backup @@ -46,8 +46,8 @@ under the License. PROPERTIES ( "bos_endpoint" = "http://gz.bcebos.com", - "bos_accesskey" = "069fc2786e664e63a5f111111114ddbs22", - "bos_secret_accesskey"="70999999999999de274d59eaa980a" + "bos_accesskey" = "bos_accesskey", + "bos_secret_accesskey"="bos_secret_accesskey" ); 2. 创建和示例 1 相同的仓库,但属性为只读: @@ -57,8 +57,8 @@ under the License. PROPERTIES ( "bos_endpoint" = "http://gz.bcebos.com", - "bos_accesskey" = "069fc2786e664e63a5f111111114ddbs22", - "bos_secret_accesskey"="70999999999999de274d59eaa980a" + "bos_accesskey" = "bos_accesskey", + "bos_secret_accesskey"="bos_accesskey" ); 3. 创建名为 hdfs_repo 的仓库,依赖 Baidu hdfs broker "hdfs_broker",数据根目录为:hdfs://hadoop-name-node:54310/path/to/repo/ @@ -70,6 +70,18 @@ under the License. "username" = "user", "password" = "password" ); + + 4. 创建名为 s3_repo 的仓库,直接链接云存储,而不通过broker. + CREATE REPOSITORY `s3_repo` + WITH S3 + ON LOCATION "s3://s3-repo" + PROPERTIES + ( + "AWS_ENDPOINT" = "http://s3-REGION.amazonaws.com", + "AWS_ACCESS_KEY" = "AWS_ACCESS_KEY", + "AWS_SECRET_KEY"="AWS_SECRET_KEY", + "AWS_REGION" = "REGION" + ); ## keyword CREATE REPOSITORY diff --git a/fe/fe-core/pom.xml b/fe/fe-core/pom.xml index 3218be142403c4..5112e6b5a94f65 100644 --- a/fe/fe-core/pom.xml +++ b/fe/fe-core/pom.xml @@ -85,6 +85,17 @@ under the License. springloaded 1.2.6.RELEASE + + org.springframework.boot + spring-boot-starter-data-ldap + 2.3.3.RELEASE + + + + commons-pool + commons-pool + 1.5.1 + org.apache @@ -279,6 +290,10 @@ under the License. log4j-slf4j-impl + + org.slf4j + slf4j-log4j12 + io.dropwizard.metrics @@ -485,6 +500,22 @@ under the License. 5.1.0.Final + + org.apache.hadoop + hadoop-aws + 2.7.3 + + + org.slf4j + slf4j-log4j12 + + + log4j + log4j + + + + org.springframework.boot @@ -554,12 +585,23 @@ under the License. 5.5.0 + + software.amazon.awssdk + s3 + 2.15.45 + + org.awaitility awaitility 4.0.3 + + org.projectlombok + lombok + + diff --git a/fe/fe-core/src/main/cup/sql_parser.cup b/fe/fe-core/src/main/cup/sql_parser.cup index 41410d1576888e..71bd5bc76c9ebe 100644 --- a/fe/fe-core/src/main/cup/sql_parser.cup +++ b/fe/fe-core/src/main/cup/sql_parser.cup @@ -239,7 +239,7 @@ terminal String KW_ADD, KW_ADMIN, KW_AFTER, KW_AGGREGATE, KW_ALL, KW_ALTER, KW_A KW_EXISTS, KW_EXPORT, KW_EXTERNAL, KW_EXTRACT, KW_FALSE, KW_FEATURE, KW_FOLLOWER, KW_FOLLOWING, KW_FREE, KW_FROM, KW_FILE, KW_FILTER, KW_FIRST, KW_FLOAT, KW_FOR, KW_FORCE, KW_FORMAT, KW_FRONTEND, KW_FRONTENDS, KW_FULL, KW_FUNCTION, KW_FUNCTIONS, KW_GLOBAL, KW_GRANT, KW_GRANTS, KW_GROUP, KW_GROUPING, - KW_HASH, KW_HAVING, KW_HELP,KW_HLL, KW_HLL_UNION, KW_HOUR, KW_HUB, + KW_HASH, KW_HAVING, KW_HDFS, KW_HELP,KW_HLL, KW_HLL_UNION, KW_HOUR, KW_HUB, KW_IDENTIFIED, KW_IF, KW_IN, KW_INDEX, KW_INDEXES, KW_INFILE, KW_INSTALL, KW_INNER, KW_INSERT, KW_INT, KW_INTERMEDIATE, KW_INTERSECT, KW_INTERVAL, KW_INTO, KW_IS, KW_ISNULL, KW_ISOLATION, KW_JOIN, @@ -257,7 +257,7 @@ terminal String KW_ADD, KW_ADMIN, KW_AFTER, KW_AGGREGATE, KW_ALL, KW_ALTER, KW_A KW_RANDOM, KW_RANGE, KW_READ, KW_RECOVER, KW_REGEXP, KW_RELEASE, KW_RENAME, KW_REPAIR, KW_REPEATABLE, KW_REPOSITORY, KW_REPOSITORIES, KW_REPLACE, KW_REPLACE_IF_NOT_NULL, KW_REPLICA, KW_RESOURCE, KW_RESOURCES, KW_RESTORE, KW_RETURNS, KW_RESUME, KW_REVOKE, KW_RIGHT, KW_ROLE, KW_ROLES, KW_ROLLBACK, KW_ROLLUP, KW_ROUTINE, KW_ROW, KW_ROWS, - KW_SCHEMA, KW_SCHEMAS, KW_SECOND, KW_SELECT, KW_SEMI, KW_SERIALIZABLE, KW_SESSION, KW_SET, KW_SETS, KW_SET_VAR, KW_SHOW, KW_SIGNED, + KW_S3, KW_SCHEMA, KW_SCHEMAS, KW_SECOND, KW_SELECT, KW_SEMI, KW_SERIALIZABLE, KW_SESSION, KW_SET, KW_SETS, KW_SET_VAR, KW_SHOW, KW_SIGNED, KW_SMALLINT, KW_SNAPSHOT, KW_SONAME, KW_SPLIT, KW_START, KW_STATUS, KW_STOP, KW_STORAGE, KW_STRING, KW_SUM, KW_SUPERUSER, KW_SYNC, KW_SYSTEM, KW_TABLE, KW_TABLES, KW_TABLET, KW_TASK, KW_TEMPORARY, KW_TERMINATED, KW_THAN, KW_TIME, KW_THEN, KW_TIMESTAMP, KW_TINYINT, @@ -304,7 +304,7 @@ nonterminal ValueList value_clause; // No return. nonterminal describe_command, opt_full, opt_inner, opt_outer, from_or_in, keys_or_index, opt_storage, opt_wild_where, - charset, opt_charset_name, equal, transaction_characteristics, isolation_level, + charset, equal, transaction_characteristics, isolation_level, transaction_access_mode, isolation_types; // String @@ -349,7 +349,6 @@ nonterminal FunctionName function_name; nonterminal Expr pre_filter_clause; nonterminal Expr where_clause; nonterminal Expr delete_on_clause; -nonterminal Expr where_clause_without_null; nonterminal String sequence_col_clause; nonterminal Predicate predicate, between_predicate, comparison_predicate, compound_predicate, in_predicate, like_predicate, exists_predicate; @@ -436,7 +435,7 @@ nonterminal String opt_with_label; nonterminal String opt_system; nonterminal BrokerDesc opt_broker; nonterminal ResourceDesc resource_desc; -nonterminal List opt_col_list, col_list, opt_dup_keys, opt_columns_from_path; +nonterminal List opt_col_list, opt_dup_keys, opt_columns_from_path; nonterminal List opt_col_with_comment_list, col_with_comment_list; nonterminal ColWithComment col_with_comment; nonterminal List opt_col_mapping_list; @@ -456,7 +455,7 @@ nonterminal String opt_from_rollup, opt_to_rollup; nonterminal ColumnPosition opt_col_pos; // Alter statement -nonterminal AlterClause alter_system_clause, alter_cluster_clause, alter_table_clause; +nonterminal AlterClause alter_system_clause, alter_table_clause; nonterminal List alter_table_clause_list, opt_rollup, add_rollup_clause_list, drop_rollup_clause_list; nonterminal AddRollupClause add_rollup_clause; nonterminal DropRollupClause drop_rollup_clause; @@ -488,6 +487,8 @@ nonterminal RoutineLoadDataSourceProperties opt_datasource_properties; nonterminal Boolean opt_signed_unsigned; +nonterminal StorageBackend storage_backend; + precedence nonassoc COMMA; precedence nonassoc STRING_LITERAL; precedence nonassoc KW_COLUMNS; @@ -519,6 +520,10 @@ precedence left KW_PARTITIONS; precedence right KW_TEMPORARY; precedence right LBRACKET; +// unused +// terminal KW_BOTH, KW_PRIMARY; +// nonterminal Expr where_clause_without_null, List col_list, opt_charset_name, AlterClause alter_cluster_clause; + start with stmts; stmts ::= @@ -1074,13 +1079,6 @@ alter_system_clause ::= :} ; -alter_cluster_clause ::= - KW_MODIFY opt_properties:properties - {: - RESULT = new AlterClusterClause(AlterClusterType.ALTER_CLUSTER_PROPERTIES, properties); - :} - ; - // Sync Stmt sync_stmt ::= KW_SYNC @@ -1167,11 +1165,9 @@ create_stmt ::= {: RESULT = new CreateClusterStmt(name, properties, password); :} - | KW_CREATE opt_read_only:isReadOnly KW_REPOSITORY ident:repoName KW_WITH KW_BROKER ident:brokerName - KW_ON KW_LOCATION STRING_LITERAL:location - opt_properties:properties + | KW_CREATE opt_read_only:isReadOnly KW_REPOSITORY ident:repoName KW_WITH storage_backend:storage {: - RESULT = new CreateRepositoryStmt(isReadOnly, repoName, brokerName, location, properties); + RESULT = new CreateRepositoryStmt(isReadOnly, repoName, storage); :} | KW_CREATE KW_ROLE ident:role {: @@ -1206,6 +1202,25 @@ opt_aggregate ::= :} ; +storage_backend ::= + | KW_BROKER ident:brokerName KW_ON KW_LOCATION STRING_LITERAL:location opt_properties:properties + {: + RESULT = new StorageBackend(brokerName, location, StorageBackend.StorageType.BROKER, properties); + :} + | KW_S3 KW_ON KW_LOCATION STRING_LITERAL:location opt_properties:properties + {: + RESULT = new StorageBackend("", location, StorageBackend.StorageType.S3, properties); + :} + | KW_HDFS KW_ON KW_LOCATION STRING_LITERAL:location opt_properties:properties + {: + RESULT = new StorageBackend("", location, StorageBackend.StorageType.HDFS, properties); + :} + | KW_LOCAL KW_ON KW_LOCATION STRING_LITERAL:location opt_properties:properties + {: + RESULT = new StorageBackend("", location, StorageBackend.StorageType.LOCAL, properties); + :} + ; + opt_read_only ::= {: RESULT = false; @@ -1495,13 +1510,6 @@ col_with_comment ::= :} ; -col_list ::= - KW_COLUMNS LPAREN ident_list:colList RPAREN - {: - RESULT = colList; - :} - ; - opt_col_mapping_list ::= /* Empty */ {: @@ -1527,6 +1535,18 @@ opt_broker ::= {: RESULT = null; :} + | KW_WITH KW_S3 LPAREN key_value_map:properties RPAREN + {: + RESULT = new BrokerDesc("S3", StorageBackend.StorageType.S3, properties); + :} + | KW_WITH KW_HDFS LPAREN key_value_map:properties RPAREN + {: + RESULT = new BrokerDesc("HDFS", StorageBackend.StorageType.HDFS, properties); + :} + | KW_WITH KW_LOCAL LPAREN key_value_map:properties RPAREN + {: + RESULT = new BrokerDesc("LOCAL", StorageBackend.StorageType.LOCAL, properties); + :} | KW_WITH KW_BROKER ident_or_text:name {: RESULT = new BrokerDesc(name, null); @@ -2531,11 +2551,6 @@ old_or_new_charset_name_or_default ::= :} ; -opt_charset_name ::= - /* empty */ - | charset old_or_new_charset_name_or_default - ; - opt_collate ::= /* Empty */ {: @@ -3784,11 +3799,6 @@ pre_filter_clause ::= {: RESULT = e; :} ; -where_clause_without_null ::= - KW_WHERE expr:e - {: RESULT = e; :} - ; - grouping_set ::= LPAREN RPAREN {: @@ -4666,6 +4676,10 @@ keyword ::= {: RESULT = id; :} | KW_BROKER:id {: RESULT = id; :} + | KW_S3:id + {: RESULT = id; :} + | KW_HDFS:id + {: RESULT = id; :} | KW_BACKENDS:id {: RESULT = id; :} | KW_BUILTIN:id @@ -5036,3 +5050,29 @@ time_unit ::= // // Specify a scalar value, a row, or a table derived from a . // subquery ::= // LPAREN query_expr_body RPAREN; + +// unused +// +// where_clause_without_null ::= +// KW_WHERE expr:e +// {: RESULT = e; :} +// ; +// +// alter_cluster_clause ::= +// KW_MODIFY opt_properties:properties +// {: +// RESULT = new AlterClusterClause(AlterClusterType.ALTER_CLUSTER_PROPERTIES, properties); +// :} +// ; +// +// col_list ::= +// KW_COLUMNS LPAREN ident_list:colList RPAREN +// {: +// RESULT = colList; +// :} +// ; +// +// opt_charset_name ::= +// /* empty */ +// | charset old_or_new_charset_name_or_default +// ; diff --git a/fe/fe-core/src/main/java/org/apache/doris/analysis/BrokerDesc.java b/fe/fe-core/src/main/java/org/apache/doris/analysis/BrokerDesc.java index eed2cce256730a..22526c4b2dfc92 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/analysis/BrokerDesc.java +++ b/fe/fe-core/src/main/java/org/apache/doris/analysis/BrokerDesc.java @@ -17,6 +17,7 @@ package org.apache.doris.analysis; +import org.apache.doris.backup.BlobStorage; import org.apache.doris.common.io.Text; import org.apache.doris.common.io.Writable; import org.apache.doris.common.util.PrintableMap; @@ -24,6 +25,9 @@ import com.google.common.collect.Maps; +import org.apache.logging.log4j.LogManager; +import org.apache.logging.log4j.Logger; + import java.io.DataInput; import java.io.DataOutput; import java.io.IOException; @@ -37,16 +41,17 @@ // "username" = "user0", // "password" = "password0" // ) -public class BrokerDesc implements Writable { +public class BrokerDesc extends StorageDesc implements Writable { + private final static Logger LOG = LogManager.getLogger(BrokerDesc.class); + // just for multi load public final static String MULTI_LOAD_BROKER = "__DORIS_MULTI_LOAD_BROKER__"; public final static String MULTI_LOAD_BROKER_BACKEND_KEY = "__DORIS_MULTI_LOAD_BROKER_BACKEND__"; - private String name; - private Map properties; // Only used for recovery private BrokerDesc() { this.properties = Maps.newHashMap(); + this.storageType = StorageBackend.StorageType.BROKER; } public BrokerDesc(String name, Map properties) { @@ -55,6 +60,17 @@ public BrokerDesc(String name, Map properties) { if (this.properties == null) { this.properties = Maps.newHashMap(); } + this.storageType = StorageBackend.StorageType.BROKER; + tryConvertToS3(); + } + public BrokerDesc(String name, StorageBackend.StorageType storageType, Map properties) { + this.name = name; + this.properties = properties; + if (this.properties == null) { + this.properties = Maps.newHashMap(); + } + this.storageType = storageType; + tryConvertToS3(); } public String getName() { @@ -65,6 +81,10 @@ public Map getProperties() { return properties; } + public StorageBackend.StorageType getStorageType() { + return storageType; + } + public boolean isMultiLoadBroker() { return this.name.equalsIgnoreCase(MULTI_LOAD_BROKER); } @@ -73,12 +93,25 @@ public TFileType getFileType() { if (isMultiLoadBroker()) { return TFileType.FILE_LOCAL; } + if (storageType == StorageBackend.StorageType.BROKER) { + return TFileType.FILE_BROKER; + } + if (storageType == StorageBackend.StorageType.S3) { + return TFileType.FILE_S3; + } + if (storageType == StorageBackend.StorageType.HDFS) { + return TFileType.FILE_HDFS; + } return TFileType.FILE_BROKER; } + public StorageBackend.StorageType storageType() { + return storageType; + } @Override public void write(DataOutput out) throws IOException { Text.writeString(out, name); + properties.put(BlobStorage.STORAGE_TYPE, storageType.name()); out.writeInt(properties.size()); for (Map.Entry entry : properties.entrySet()) { Text.writeString(out, entry.getKey()); @@ -95,6 +128,16 @@ public void readFields(DataInput in) throws IOException { final String val = Text.readString(in); properties.put(key, val); } + StorageBackend.StorageType st = StorageBackend.StorageType.BROKER; + String typeStr = properties.remove(BlobStorage.STORAGE_TYPE); + if (typeStr != null) { + try { + st = StorageBackend.StorageType.valueOf(typeStr); + } catch (IllegalArgumentException e) { + LOG.warn("set to BROKER, because of exception", e); + } + } + storageType = st; } public static BrokerDesc read(DataInput in) throws IOException { @@ -105,7 +148,11 @@ public static BrokerDesc read(DataInput in) throws IOException { public String toSql() { StringBuilder sb = new StringBuilder(); - sb.append("WITH BROKER ").append(name); + if (storageType == StorageBackend.StorageType.BROKER) { + sb.append("WITH BROKER ").append(name); + } else { + sb.append("WITH ").append(storageType.name()); + } if (properties != null && !properties.isEmpty()) { PrintableMap printableMap = new PrintableMap<>(properties, " = ", true, false, true); sb.append(" (").append(printableMap.toString()).append(")"); diff --git a/fe/fe-core/src/main/java/org/apache/doris/analysis/CreateRepositoryStmt.java b/fe/fe-core/src/main/java/org/apache/doris/analysis/CreateRepositoryStmt.java index c8724051bfbcac..a83e10bbf17c77 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/analysis/CreateRepositoryStmt.java +++ b/fe/fe-core/src/main/java/org/apache/doris/analysis/CreateRepositoryStmt.java @@ -18,33 +18,23 @@ package org.apache.doris.analysis; import org.apache.doris.catalog.Catalog; -import org.apache.doris.common.AnalysisException; import org.apache.doris.common.ErrorCode; import org.apache.doris.common.ErrorReport; -import org.apache.doris.common.FeNameFormat; import org.apache.doris.common.UserException; -import org.apache.doris.common.util.PrintableMap; import org.apache.doris.mysql.privilege.PrivPredicate; import org.apache.doris.qe.ConnectContext; -import com.google.common.base.Strings; - import java.util.Map; public class CreateRepositoryStmt extends DdlStmt { private boolean isReadOnly; private String name; - private String brokerName; - private String location; - private Map properties; + private StorageBackend storage; - public CreateRepositoryStmt(boolean isReadOnly, String name, String brokerName, String location, - Map properties) { + public CreateRepositoryStmt(boolean isReadOnly, String name, StorageBackend storage) { this.isReadOnly = isReadOnly; this.name = name; - this.brokerName = brokerName; - this.location = location; - this.properties = properties; + this.storage = storage; } public boolean isReadOnly() { @@ -56,35 +46,29 @@ public String getName() { } public String getBrokerName() { - return brokerName; + return storage.getStorageName(); + } + + public StorageBackend.StorageType getStorageType() { + return storage.getStorageType(); } public String getLocation() { - return location; + return storage.getLocation(); } public Map getProperties() { - return properties; + return storage.getProperties(); } @Override public void analyze(Analyzer analyzer) throws UserException { super.analyze(analyzer); - + storage.analyze(analyzer); // check auth if (!Catalog.getCurrentCatalog().getAuth().checkGlobalPriv(ConnectContext.get(), PrivPredicate.ADMIN)) { ErrorReport.reportAnalysisException(ErrorCode.ERR_SPECIFIC_ACCESS_DENIED_ERROR, "ADMIN"); } - - FeNameFormat.checkCommonName("repository", name); - - if (Strings.isNullOrEmpty(brokerName)) { - throw new AnalysisException("You must specify the broker of the repository"); - } - - if (Strings.isNullOrEmpty(location)) { - throw new AnalysisException("You must specify a location on the repository"); - } } @Override @@ -94,8 +78,7 @@ public String toSql() { if (isReadOnly) { sb.append("READ_ONLY "); } - sb.append("REPOSITORY `").append(name).append("` ").append("WITH BROKER `").append(brokerName).append("` "); - sb.append("PROPERTIES(").append(new PrintableMap<>(properties, " = ", true, false)).append(")"); + sb.append("REPOSITORY `").append(name).append("` WITH ").append(storage.toSql()); return sb.toString(); } } diff --git a/fe/fe-core/src/main/java/org/apache/doris/analysis/ExportStmt.java b/fe/fe-core/src/main/java/org/apache/doris/analysis/ExportStmt.java index 9af531996c97bd..a3ff5a8eff6d18 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/analysis/ExportStmt.java +++ b/fe/fe-core/src/main/java/org/apache/doris/analysis/ExportStmt.java @@ -150,21 +150,22 @@ public void analyze(Analyzer analyzer) throws AnalysisException, UserException { // check table && partitions whether exist checkTable(analyzer.getCatalog()); - // check path is valid - checkPath(path); - // check broker whether exist if (brokerDesc == null) { throw new AnalysisException("broker is not provided"); } - if (!analyzer.getCatalog().getBrokerMgr().containsBroker(brokerDesc.getName())) { - throw new AnalysisException("broker " + brokerDesc.getName() + " does not exist"); - } + // check path is valid + checkPath(path, brokerDesc.getStorageType()); + if (brokerDesc.getStorageType() == StorageBackend.StorageType.BROKER) { + if (!analyzer.getCatalog().getBrokerMgr().containsBroker(brokerDesc.getName())) { + throw new AnalysisException("broker " + brokerDesc.getName() + " does not exist"); + } - FsBroker broker = analyzer.getCatalog().getBrokerMgr().getAnyBroker(brokerDesc.getName()); - if (broker == null) { - throw new AnalysisException("failed to get alive broker"); + FsBroker broker = analyzer.getCatalog().getBrokerMgr().getAnyBroker(brokerDesc.getName()); + if (broker == null) { + throw new AnalysisException("failed to get alive broker"); + } } // check properties @@ -216,7 +217,7 @@ private void checkTable(Catalog catalog) throws AnalysisException { } } - private static void checkPath(String path) throws AnalysisException { + public static void checkPath(String path, StorageBackend.StorageType type) throws AnalysisException { if (Strings.isNullOrEmpty(path)) { throw new AnalysisException("No dest path specified."); } @@ -224,9 +225,23 @@ private static void checkPath(String path) throws AnalysisException { try { URI uri = new URI(path); String schema = uri.getScheme(); - if (schema == null || (!schema.equalsIgnoreCase("bos") && !schema.equalsIgnoreCase("afs") + if (type == StorageBackend.StorageType.BROKER) { + if (schema == null || (!schema.equalsIgnoreCase("bos") && !schema.equalsIgnoreCase("afs") && !schema.equalsIgnoreCase("hdfs"))) { - throw new AnalysisException("Invalid export path. please use valid 'HDFS://', 'AFS://' or 'BOS://' path."); + throw new AnalysisException("Invalid export path. please use valid 'HDFS://', 'AFS://' or 'BOS://' path."); + } + } else if (type == StorageBackend.StorageType.S3) { + if (schema == null || !schema.equalsIgnoreCase("s3")) { + throw new AnalysisException("Invalid export path. please use valid 'S3://' path."); + } + } else if (type == StorageBackend.StorageType.HDFS) { + if (schema == null || !schema.equalsIgnoreCase("hdfs")) { + throw new AnalysisException("Invalid export path. please use valid 'HDFS://' path."); + } + } else if (type == StorageBackend.StorageType.LOCAL) { + if (schema != null && !schema.equalsIgnoreCase("file")) { + throw new AnalysisException("Invalid export path. please use valid 'file://' path."); + } } } catch (URISyntaxException e) { throw new AnalysisException("Invalid path format. " + e.getMessage()); diff --git a/fe/fe-core/src/main/java/org/apache/doris/analysis/Expr.java b/fe/fe-core/src/main/java/org/apache/doris/analysis/Expr.java index 263c30dc9837b5..69c996c3c2f4a1 100755 --- a/fe/fe-core/src/main/java/org/apache/doris/analysis/Expr.java +++ b/fe/fe-core/src/main/java/org/apache/doris/analysis/Expr.java @@ -362,6 +362,8 @@ public final void analyze(Analyzer analyzer) throws AnalysisException { throw new AnalysisException(String.format("Exceeded the maximum depth of an " + "expression tree (%s).", Config.expr_depth_limit)); } + } else { + throw new AnalysisException("analyzer is null."); } for (Expr child: children) { diff --git a/fe/fe-core/src/main/java/org/apache/doris/analysis/LoadStmt.java b/fe/fe-core/src/main/java/org/apache/doris/analysis/LoadStmt.java index 8ebee50ad0e9ab..8de1adb0089631 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/analysis/LoadStmt.java +++ b/fe/fe-core/src/main/java/org/apache/doris/analysis/LoadStmt.java @@ -317,6 +317,15 @@ public void analyze(Analyzer analyzer) throws UserException { && !((table instanceof OlapTable) && ((OlapTable) table).hasDeleteSign()) ) { throw new AnalysisException("load by MERGE or DELETE need to upgrade table to support batch delete."); } + if (brokerDesc != null && !brokerDesc.isMultiLoadBroker()) { + for (int i = 0; i < dataDescription.getFilePaths().size(); i++) { + dataDescription.getFilePaths().set(i, + brokerDesc.convertPathToS3(dataDescription.getFilePaths().get(i))); + } + for (String path : dataDescription.getFilePaths()) { + ExportStmt.checkPath(path, brokerDesc.getStorageType()); + } + } } if (isLoadFromTable) { if (dataDescriptions.size() > 1) { diff --git a/fe/fe-core/src/main/java/org/apache/doris/analysis/ShowRepositoriesStmt.java b/fe/fe-core/src/main/java/org/apache/doris/analysis/ShowRepositoriesStmt.java index 95bff25821963a..d5a8476c6e617a 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/analysis/ShowRepositoriesStmt.java +++ b/fe/fe-core/src/main/java/org/apache/doris/analysis/ShowRepositoriesStmt.java @@ -26,7 +26,7 @@ public class ShowRepositoriesStmt extends ShowStmt { public static final ImmutableList TITLE_NAMES = new ImmutableList.Builder() .add("RepoId").add("RepoName").add("CreateTime").add("IsReadOnly").add("Location") - .add("Broker").add("ErrMsg") + .add("Broker").add("Type").add("ErrMsg") .build(); public ShowRepositoriesStmt() { diff --git a/fe/fe-core/src/main/java/org/apache/doris/analysis/StorageBackend.java b/fe/fe-core/src/main/java/org/apache/doris/analysis/StorageBackend.java new file mode 100644 index 00000000000000..921c727e95ea68 --- /dev/null +++ b/fe/fe-core/src/main/java/org/apache/doris/analysis/StorageBackend.java @@ -0,0 +1,142 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +package org.apache.doris.analysis; + +import org.apache.doris.common.AnalysisException; +import org.apache.doris.common.FeNameFormat; +import org.apache.doris.common.NotImplementedException; +import org.apache.doris.common.UserException; +import org.apache.doris.common.util.PrintableMap; +import org.apache.doris.thrift.TStorageBackendType; + +import com.google.common.base.Strings; + +import org.apache.commons.lang3.StringUtils; +import org.apache.log4j.Logger; + +import java.util.Map; + +public class StorageBackend extends StorageDesc implements ParseNode { + private static Logger LOG = Logger.getLogger(StorageBackend.class); + + private String location; + private StorageType storageType; + private Map properties; + + public StorageBackend(String storageName, String location, StorageType storageType, Map properties) { + this.name = storageName; + this.location = location; + this.storageType = storageType; + this.properties = properties; + tryConvertToS3(); + this.location = convertPathToS3(location); + } + + public StorageType getStorageType() { + return storageType; + } + + public void setStorageType(StorageType storageType) { + this.storageType = storageType; + } + + public String getStorageName() { + return name; + } + + public void setStorageName(String storageName) { + this.name = storageName; + } + + public String getLocation() { + return location; + } + + public void setLocation(String location) { + this.location = location; + } + + public Map getProperties() { + return properties; + } + + public void setProperties(Map properties) { + this.properties = properties; + } + + @Override + public void analyze(Analyzer analyzer) throws UserException { + if (this.storageType != StorageType.BROKER && StringUtils.isEmpty(name)) { + name = this.storageType.name(); + } + if (this.storageType != StorageType.BROKER && this.storageType != StorageType.S3) { + throw new NotImplementedException(this.storageType.toString() + " is not support now."); + } + FeNameFormat.checkCommonName("repository", name); + + if (Strings.isNullOrEmpty(location)) { + throw new AnalysisException("You must specify a location on the repository"); + } + ExportStmt.checkPath(location, storageType); + } + + @Override + public String toSql() { + StringBuilder sb = new StringBuilder(); + sb.append(storageType.name()); + if (storageType == StorageType.BROKER) { + sb.append(" `").append(name).append("`"); + } + sb.append(" ON LOCATION ").append(location).append(" PROPERTIES(") + .append(new PrintableMap<>(properties, " = ", true, false)).append(")"); + return sb.toString(); + } + + public enum StorageType { + BROKER("Doris Broker"), + S3("Amazon S3 Simple Storage Service"), + // the following is not used currently + HDFS("Hadoop Distributed File System"), + LOCAL("Local file system"); + + private final String description; + + StorageType(String description) { + this.description = description; + } + + @Override + public String toString() { + return description; + } + + public TStorageBackendType toThrift() { + switch (this) { + case S3: + return TStorageBackendType.S3; + case HDFS: + return TStorageBackendType.HDFS; + case LOCAL: + return TStorageBackendType.LOCAL; + default: + return TStorageBackendType.BROKER; + } + } + } + +} diff --git a/fe/fe-core/src/main/java/org/apache/doris/analysis/StorageDesc.java b/fe/fe-core/src/main/java/org/apache/doris/analysis/StorageDesc.java new file mode 100644 index 00000000000000..83e51105f28ab6 --- /dev/null +++ b/fe/fe-core/src/main/java/org/apache/doris/analysis/StorageDesc.java @@ -0,0 +1,107 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +package org.apache.doris.analysis; + +import org.apache.doris.backup.S3Storage; +import org.apache.doris.common.Config; + +import org.apache.commons.collections.map.CaseInsensitiveMap; +import org.apache.commons.lang3.StringUtils; +import org.apache.http.client.utils.URIBuilder; +import org.apache.log4j.Logger; + +import java.net.URI; +import java.net.URISyntaxException; +import java.util.Map; + +public abstract class StorageDesc { + private static Logger LOG = Logger.getLogger(StorageBackend.class); + // for dfs + public static final String HADOOP_JOB_UGI = "hadoop.job.ugi"; + public static final String HADOOP_JOB_GROUP_NAME = "hadoop.job.group.name"; + public static final String USER_NAME_KEY = "username"; + public static final String PASSWORD_KEY = "password"; + public static final String FS_DEFAULT_NAME = "fs.default.name"; + public static final String FS_HDFS_IMPL = "fs.hdfs.impl"; + public static final String FS_AFS_IMPL = "fs.afs.impl"; + public static final String DFS_AGENT_PORT = "dfs.agent.port"; + public static final String DFS_CLIENT_AUTH_METHOD = "dfs.client.auth.method"; + // for bos + public static final String BOS_ENDPOINT = "bos_endpoint"; + public static final String BOS_ACCESS_KEY = "bos_accesskey"; + public static final String BOS_SECRET_ACCESS_KEY = "bos_secret_accesskey"; + public static final String FS_BOS_IMPL = "fs.bos.impl"; + public static final String FS_BOS_ACCESS_KEY = "fs.bos.access.key"; + public static final String FS_BOS_SECRET_ACCESS_KEY = "fs.bos.secret.access.key"; + public static final String FS_BOS_ENDPOINT = "fs.bos.endpoint"; + public static final String FS_BOS_MULTIPART_UPLOADS_BLOCK_SIZE = "fs.bos.multipart.uploads.block.size"; + + protected StorageBackend.StorageType storageType; + protected Map properties; + protected String name; + protected boolean convertedToS3 = false; + + protected void tryConvertToS3() { + if (!Config.enable_access_file_without_broker || storageType != StorageBackend.StorageType.BROKER) { + return; + } + CaseInsensitiveMap ciProperties = new CaseInsensitiveMap(); + ciProperties.putAll(properties); + if (StringUtils.isNotEmpty(ciProperties.get(BOS_ENDPOINT).toString()) && + StringUtils.isNotEmpty(ciProperties.get(BOS_ACCESS_KEY).toString()) && + StringUtils.isNotEmpty(ciProperties.get(BOS_SECRET_ACCESS_KEY).toString())) { + // bos endpoint like http[s]://gz.bcebos.com, we want to extract region gz, + // and convert to s3 endpoint http[s]://s3.gz.bcebos.com + String bosEndpiont = ciProperties.get(BOS_ENDPOINT).toString(); + try { + URI uri = new URI(bosEndpiont); + String host = uri.getHost(); + String[] hostSplit = host.split("\\."); + if (hostSplit.length < 3) { + return; + } + String region = hostSplit[0]; + String s3Endpoint = new URIBuilder(uri).setHost("s3." + host).build().toString(); + properties.clear(); + properties.put(S3Storage.S3_ENDPOINT, s3Endpoint); + properties.put(S3Storage.S3_REGION, region); + properties.put(S3Storage.S3_AK, ciProperties.get(BOS_ACCESS_KEY).toString()); + properties.put(S3Storage.S3_SK, ciProperties.get(BOS_SECRET_ACCESS_KEY).toString()); + storageType = StorageBackend.StorageType.S3; + convertedToS3 = true; + LOG.info("skip BROKER and access S3 directly."); + } catch (URISyntaxException e) { + LOG.warn(BOS_ENDPOINT + ": " + bosEndpiont + " is invalid."); + } + } + } + + protected String convertPathToS3(String path) { + if (!convertedToS3) { + return path; + } + try { + URI orig = new URI(path); + URI s3url = new URI("s3", orig.getRawAuthority(), + orig.getRawPath(), orig.getRawQuery(), orig.getRawFragment()); + return s3url.toString(); + } catch (URISyntaxException e) { + return path; + } + } +} diff --git a/fe/fe-core/src/main/java/org/apache/doris/backup/BackupHandler.java b/fe/fe-core/src/main/java/org/apache/doris/backup/BackupHandler.java index 77c111bd871ae1..bf5e43cb254c3c 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/backup/BackupHandler.java +++ b/fe/fe-core/src/main/java/org/apache/doris/backup/BackupHandler.java @@ -26,6 +26,7 @@ import org.apache.doris.analysis.DropRepositoryStmt; import org.apache.doris.analysis.PartitionNames; import org.apache.doris.analysis.RestoreStmt; +import org.apache.doris.analysis.StorageBackend; import org.apache.doris.analysis.TableName; import org.apache.doris.analysis.TableRef; import org.apache.doris.backup.AbstractJob.JobType; @@ -172,11 +173,12 @@ protected void runAfterCatalogReady() { // handle create repository stmt public void createRepository(CreateRepositoryStmt stmt) throws DdlException { - if (!catalog.getBrokerMgr().containsBroker(stmt.getBrokerName())) { + if (!catalog.getBrokerMgr().containsBroker(stmt.getBrokerName()) + && stmt.getStorageType() == StorageBackend.StorageType.BROKER) { ErrorReport.reportDdlException(ErrorCode.ERR_COMMON_ERROR, "broker does not exist: " + stmt.getBrokerName()); } - BlobStorage storage = new BlobStorage(stmt.getBrokerName(), stmt.getProperties()); + BlobStorage storage = BlobStorage.create(stmt.getBrokerName(),stmt.getStorageType(), stmt.getProperties()); long repoId = catalog.getNextId(); Repository repo = new Repository(repoId, stmt.getName(), stmt.isReadOnly(), stmt.getLocation(), storage); diff --git a/fe/fe-core/src/main/java/org/apache/doris/backup/BackupJob.java b/fe/fe-core/src/main/java/org/apache/doris/backup/BackupJob.java index cbb6c8a47d47da..b9791b7c8db4de 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/backup/BackupJob.java +++ b/fe/fe-core/src/main/java/org/apache/doris/backup/BackupJob.java @@ -607,7 +607,7 @@ private void uploadSnapshot() { } long signature = catalog.getNextId(); UploadTask task = new UploadTask(null, beId, signature, jobId, dbId, srcToDest, - brokers.get(0), repo.getStorage().getProperties()); + brokers.get(0), repo.getStorage().getProperties(), repo.getStorage().getStorageType()); batchTask.addTask(task); unfinishedTaskIds.put(signature, beId); } diff --git a/fe/fe-core/src/main/java/org/apache/doris/backup/BlobStorage.java b/fe/fe-core/src/main/java/org/apache/doris/backup/BlobStorage.java index 77be204796163f..10fb1d475274db 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/backup/BlobStorage.java +++ b/fe/fe-core/src/main/java/org/apache/doris/backup/BlobStorage.java @@ -17,746 +17,122 @@ package org.apache.doris.backup; -import org.apache.doris.backup.Status.ErrCode; -import org.apache.doris.catalog.Catalog; -import org.apache.doris.catalog.FsBroker; -import org.apache.doris.common.AnalysisException; -import org.apache.doris.common.ClientPool; +import org.apache.doris.analysis.StorageBackend; import org.apache.doris.common.Config; -import org.apache.doris.common.Pair; import org.apache.doris.common.io.Text; import org.apache.doris.common.io.Writable; -import org.apache.doris.common.util.BrokerUtil; import org.apache.doris.service.FrontendOptions; -import org.apache.doris.thrift.TBrokerCheckPathExistRequest; -import org.apache.doris.thrift.TBrokerCheckPathExistResponse; -import org.apache.doris.thrift.TBrokerCloseReaderRequest; -import org.apache.doris.thrift.TBrokerCloseWriterRequest; -import org.apache.doris.thrift.TBrokerDeletePathRequest; -import org.apache.doris.thrift.TBrokerFD; -import org.apache.doris.thrift.TBrokerFileStatus; -import org.apache.doris.thrift.TBrokerListPathRequest; -import org.apache.doris.thrift.TBrokerListResponse; -import org.apache.doris.thrift.TBrokerOpenMode; -import org.apache.doris.thrift.TBrokerOpenReaderRequest; -import org.apache.doris.thrift.TBrokerOpenReaderResponse; -import org.apache.doris.thrift.TBrokerOpenWriterRequest; -import org.apache.doris.thrift.TBrokerOpenWriterResponse; -import org.apache.doris.thrift.TBrokerOperationStatus; -import org.apache.doris.thrift.TBrokerOperationStatusCode; -import org.apache.doris.thrift.TBrokerPReadRequest; -import org.apache.doris.thrift.TBrokerPWriteRequest; -import org.apache.doris.thrift.TBrokerReadResponse; -import org.apache.doris.thrift.TBrokerRenamePathRequest; -import org.apache.doris.thrift.TBrokerVersion; -import org.apache.doris.thrift.TNetworkAddress; -import org.apache.doris.thrift.TPaloBrokerService; -import com.google.common.base.Preconditions; import com.google.common.collect.Maps; -import org.apache.logging.log4j.LogManager; -import org.apache.logging.log4j.Logger; -import org.apache.thrift.TException; -import org.apache.thrift.transport.TTransportException; - -import java.io.BufferedInputStream; -import java.io.BufferedOutputStream; import java.io.DataInput; import java.io.DataOutput; -import java.io.File; -import java.io.FileInputStream; -import java.io.FileNotFoundException; -import java.io.FileOutputStream; import java.io.IOException; -import java.io.UnsupportedEncodingException; -import java.nio.ByteBuffer; -import java.nio.file.FileVisitOption; -import java.nio.file.Files; -import java.nio.file.Path; -import java.nio.file.Paths; -import java.util.Comparator; import java.util.List; import java.util.Map; -public class BlobStorage implements Writable { - private static final Logger LOG = LogManager.getLogger(BlobStorage.class); +public abstract class BlobStorage implements Writable { - private String brokerName; + public static final String STORAGE_TYPE = "_DORIS_STORAGE_TYPE_"; private Map properties = Maps.newHashMap(); + private String name; + private StorageBackend.StorageType type; + private String location; - private BlobStorage() { - // for persist - } - - public BlobStorage(String brokerName, Map properties) { - this.brokerName = brokerName; - this.properties = properties; - } - - public String getBrokerName() { - return brokerName; - } - - public Map getProperties() { - return properties; + public static String clientId() { + return FrontendOptions.getLocalHostAddress() + ":" + Config.edit_log_port; } - public Status downloadWithFileSize(String remoteFilePath, String localFilePath, long fileSize) { - LOG.debug("download from {} to {}, file size: {}.", - remoteFilePath, localFilePath, fileSize); - - long start = System.currentTimeMillis(); - - // 1. get a proper broker - Pair pair = new Pair( - null, null); - Status st = getBroker(pair); - if (!st.ok()) { - return st; - } - TPaloBrokerService.Client client = pair.first; - TNetworkAddress address = pair.second; - - // 2. open file reader with broker - TBrokerFD fd = null; - try { - TBrokerOpenReaderRequest req = new TBrokerOpenReaderRequest(TBrokerVersion.VERSION_ONE, remoteFilePath, - 0, clientId(), properties); - TBrokerOpenReaderResponse rep = client.openReader(req); - TBrokerOperationStatus opst = rep.getOpStatus(); - if (opst.getStatusCode() != TBrokerOperationStatusCode.OK) { - return new Status(ErrCode.COMMON_ERROR, - "failed to open reader on broker " + BrokerUtil.printBroker(brokerName, address) - + " for file: " + remoteFilePath + ". msg: " + opst.getMessage()); - } - - fd = rep.getFd(); - LOG.info("finished to open reader. fd: {}. download {} to {}.", - fd, remoteFilePath, localFilePath); - } catch (TException e) { - return new Status(ErrCode.COMMON_ERROR, - "failed to open reader on broker " + BrokerUtil.printBroker(brokerName, address) - + " for file: " + remoteFilePath + ". msg: " + e.getMessage()); - } - Preconditions.checkNotNull(fd); - - // 3. delete local file if exist - File localFile = new File(localFilePath); - if (localFile.exists()) { - try { - Files.walk(Paths.get(localFilePath), - FileVisitOption.FOLLOW_LINKS).sorted(Comparator.reverseOrder()).map(Path::toFile).forEach(File::delete); - } catch (IOException e) { - return new Status(ErrCode.COMMON_ERROR, "failed to delete exist local file: " + localFilePath); - } - } - - // 4. create local file - Status status = Status.OK; - try { - if (!localFile.createNewFile()) { - return new Status(ErrCode.COMMON_ERROR, "failed to create local file: " + localFilePath); - } - } catch (IOException e) { - return new Status(ErrCode.COMMON_ERROR, "failed to create local file: " - + localFilePath + ", msg: " + e.getMessage()); - } - - // 5. read remote file with broker and write to local - String lastErrMsg = null; - try (BufferedOutputStream out = new BufferedOutputStream(new FileOutputStream(localFile))) { - final long bufSize = 1024 * 1024; // 1MB - long leftSize = fileSize; - long readOffset = 0; - while (leftSize > 0) { - long readLen = leftSize > bufSize ? bufSize : leftSize; - TBrokerReadResponse rep = null; - // We only retry if we encounter a timeout thrift exception. - int tryTimes = 0; - while (tryTimes < 3) { - try { - TBrokerPReadRequest req = new TBrokerPReadRequest(TBrokerVersion.VERSION_ONE, - fd, readOffset, readLen); - rep = client.pread(req); - if (rep.getOpStatus().getStatusCode() != TBrokerOperationStatusCode.OK) { - // pread return failure. - lastErrMsg = String.format("failed to read via broker %s. " - + "current read offset: %d, read length: %d," - + " file size: %d, file: %s, err code: %d, msg: %s", - BrokerUtil.printBroker(brokerName, address), - readOffset, readLen, fileSize, - remoteFilePath, rep.getOpStatus().getStatusCode(), - rep.getOpStatus().getMessage()); - LOG.warn(lastErrMsg); - status = new Status(ErrCode.COMMON_ERROR, lastErrMsg); - } - LOG.debug("download. readLen: {}, read data len: {}, left size:{}. total size: {}", - readLen, rep.getData().length, leftSize, fileSize); - break; - } catch (TTransportException e) { - if (e.getType() == TTransportException.TIMED_OUT) { - // we only retry when we encounter timeout exception. - lastErrMsg = String.format("failed to read via broker %s. " - + "current read offset: %d, read length: %d," - + " file size: %d, file: %s, timeout.", - BrokerUtil.printBroker(brokerName, address), - readOffset, readLen, fileSize, - remoteFilePath); - tryTimes++; - continue; - } - - lastErrMsg = String.format("failed to read via broker %s. " - + "current read offset: %d, read length: %d," - + " file size: %d, file: %s. msg: %s", - BrokerUtil.printBroker(brokerName, address), - readOffset, readLen, fileSize, - remoteFilePath, e.getMessage()); - LOG.warn(lastErrMsg); - status = new Status(ErrCode.COMMON_ERROR, lastErrMsg); - break; - } catch (TException e) { - lastErrMsg = String.format("failed to read via broker %s. " - + "current read offset: %d, read length: %d," - + " file size: %d, file: %s. msg: %s", - BrokerUtil.printBroker(brokerName, address), - readOffset, readLen, fileSize, - remoteFilePath, e.getMessage()); - LOG.warn(lastErrMsg); - status = new Status(ErrCode.COMMON_ERROR, lastErrMsg); - break; - } - } // end of retry loop - - if (status.ok() && tryTimes < 3) { - // read succeed, write to local file - Preconditions.checkNotNull(rep); - // NOTICE(cmy): Sometimes the actual read length does not equal to the expected read length, - // even if the broker's read buffer size is large enough. - // I don't know why, but have to adapt to it. - if (rep.getData().length != readLen) { - LOG.warn("the actual read length does not equal to " - + "the expected read length: {} vs. {}, file: {}, broker: {}", - rep.getData().length, readLen, remoteFilePath, - BrokerUtil.printBroker(brokerName, address)); - } - - out.write(rep.getData()); - readOffset += rep.getData().length; - leftSize -= rep.getData().length; - } else { - status = new Status(ErrCode.COMMON_ERROR, lastErrMsg); - break; - } - } // end of reading remote file - } catch (IOException e) { - return new Status(ErrCode.COMMON_ERROR, "Got exception: " + e.getMessage() + ", broker: " + - BrokerUtil.printBroker(brokerName, address)) ; - } finally { - // close broker reader - Status closeStatus = closeReader(client, address, fd); - if (!closeStatus.ok()) { - LOG.warn(closeStatus.getErrMsg()); - if (status.ok()) { - // we return close write error only if no other error has been encountered. - status = closeStatus; - } - ClientPool.brokerPool.invalidateObject(address, client); - } else { - ClientPool.brokerPool.returnObject(address, client); - } + public static BlobStorage create(String name, StorageBackend.StorageType type, Map properties) { + if (type == StorageBackend.StorageType.S3) { + return new S3Storage(properties); + } else if (type == StorageBackend.StorageType.BROKER) { + return new BrokerStorage(name, properties); + } else { + throw new UnsupportedOperationException(type.toString() + "backend is not implemented"); } - - LOG.info("finished to download from {} to {} with size: {}. cost {} ms", remoteFilePath, localFilePath, - fileSize, (System.currentTimeMillis() - start)); - return status; } - // directly upload the content to remote file - public Status directUpload(String content, String remoteFile) { - Status status = Status.OK; - - // 1. get a proper broker - Pair pair = new Pair( - null, null); - status = getBroker(pair); - if (!status.ok()) { - return status; + public static BlobStorage read(DataInput in) throws IOException { + String name = Text.readString(in); + Map properties = Maps.newHashMap(); + StorageBackend.StorageType type = StorageBackend.StorageType.BROKER; + int size = in.readInt(); + for (int i = 0; i < size; i++) { + String key = Text.readString(in); + String value = Text.readString(in); + properties.put(key, value); } - TPaloBrokerService.Client client = pair.first; - TNetworkAddress address = pair.second; - - TBrokerFD fd = new TBrokerFD(); - try { - // 2. open file writer with broker - status = openWriter(client, address, remoteFile, fd); - if (!status.ok()) { - return status; - } - - // 3. write content - try { - ByteBuffer bb = ByteBuffer.wrap(content.getBytes("UTF-8")); - TBrokerPWriteRequest req = new TBrokerPWriteRequest(TBrokerVersion.VERSION_ONE, fd, 0, bb); - TBrokerOperationStatus opst = client.pwrite(req); - if (opst.getStatusCode() != TBrokerOperationStatusCode.OK) { - // pwrite return failure. - status = new Status(ErrCode.COMMON_ERROR, "write failed: " + opst.getMessage() - + ", broker: " + BrokerUtil.printBroker(brokerName, address)); - } - } catch (TException e) { - status = new Status(ErrCode.BAD_CONNECTION, "write exception: " + e.getMessage() - + ", broker: " + BrokerUtil.printBroker(brokerName, address)); - } catch (UnsupportedEncodingException e) { - status = new Status(ErrCode.COMMON_ERROR, "unsupported encoding: " + e.getMessage()); - } - } finally { - Status closeStatus = closeWriter(client, address, fd); - if (closeStatus.getErrCode() == ErrCode.BAD_CONNECTION || status.getErrCode() == ErrCode.BAD_CONNECTION) { - ClientPool.brokerPool.invalidateObject(address, client); - } else { - ClientPool.brokerPool.returnObject(address, client); - } + if (properties.containsKey(STORAGE_TYPE)) { + type = StorageBackend.StorageType.valueOf(properties.get(STORAGE_TYPE)); + properties.remove(STORAGE_TYPE); } - - return status; + return BlobStorage.create(name, type, properties); } - public Status upload(String localPath, String remotePath) { - long start = System.currentTimeMillis(); - - Status status = Status.OK; - - // 1. get a proper broker - Pair pair = new Pair(null, null); - status = getBroker(pair); - if (!status.ok()) { - return status; - } - TPaloBrokerService.Client client = pair.first; - TNetworkAddress address = pair.second; - - // 2. open file write with broker - TBrokerFD fd = new TBrokerFD(); - status = openWriter(client, address, remotePath, fd); - if (!status.ok()) { - return status; - } - - // 3. read local file and write to remote with broker - File localFile = new File(localPath); - long fileLength = localFile.length(); - byte[] readBuf = new byte[1024]; - try (BufferedInputStream in = new BufferedInputStream(new FileInputStream(localFile))) { - // save the last err msg - String lastErrMsg = null; - // save the current write offset of remote file - long writeOffset = 0; - // read local file, 1MB at a time - int bytesRead = 0; - while ((bytesRead = in.read(readBuf)) != -1) { - ByteBuffer bb = ByteBuffer.wrap(readBuf, 0, bytesRead); - - // We only retry if we encounter a timeout thrift exception. - int tryTimes = 0; - while (tryTimes < 3) { - try { - TBrokerPWriteRequest req = new TBrokerPWriteRequest(TBrokerVersion.VERSION_ONE, fd, writeOffset, bb); - TBrokerOperationStatus opst = client.pwrite(req); - if (opst.getStatusCode() != TBrokerOperationStatusCode.OK) { - // pwrite return failure. - lastErrMsg = String.format("failed to write via broker %s. " - + "current write offset: %d, write length: %d," - + " file length: %d, file: %s, err code: %d, msg: %s", - BrokerUtil.printBroker(brokerName, address), - writeOffset, bytesRead, fileLength, - remotePath, opst.getStatusCode(), opst.getMessage()); - LOG.warn(lastErrMsg); - status = new Status(ErrCode.COMMON_ERROR, lastErrMsg); - } - break; - } catch (TTransportException e) { - if (e.getType() == TTransportException.TIMED_OUT) { - // we only retry when we encounter timeout exception. - lastErrMsg = String.format("failed to write via broker %s. " - + "current write offset: %d, write length: %d," - + " file length: %d, file: %s. timeout", - BrokerUtil.printBroker(brokerName, address), - writeOffset, bytesRead, fileLength, - remotePath); - tryTimes++; - continue; - } - - lastErrMsg = String.format("failed to write via broker %s. " - + "current write offset: %d, write length: %d," - + " file length: %d, file: %s. encounter TTransportException: %s", - BrokerUtil.printBroker(brokerName, address), - writeOffset, bytesRead, fileLength, - remotePath, e.getMessage()); - LOG.warn(lastErrMsg, e); - status = new Status(ErrCode.COMMON_ERROR, lastErrMsg); - break; - } catch (TException e) { - lastErrMsg = String.format("failed to write via broker %s. " - + "current write offset: %d, write length: %d," - + " file length: %d, file: %s. encounter TException: %s", - BrokerUtil.printBroker(brokerName, address), - writeOffset, bytesRead, fileLength, - remotePath, e.getMessage()); - LOG.warn(lastErrMsg, e); - status = new Status(ErrCode.COMMON_ERROR, lastErrMsg); - break; - } - } - - if (status.ok() && tryTimes < 3) { - // write succeed, update current write offset - writeOffset += bytesRead; - } else { - status = new Status(ErrCode.COMMON_ERROR, lastErrMsg); - break; - } - } // end of read local file loop - } catch (FileNotFoundException e1) { - return new Status(ErrCode.COMMON_ERROR, "encounter file not found exception: " + e1.getMessage() - + ", broker: " + BrokerUtil.printBroker(brokerName, address)); - } catch (IOException e1) { - return new Status(ErrCode.COMMON_ERROR, "encounter io exception: " + e1.getMessage() - + ", broker: " + BrokerUtil.printBroker(brokerName, address)); - } finally { - // close write - Status closeStatus = closeWriter(client, address, fd); - if (!closeStatus.ok()) { - LOG.warn(closeStatus.getErrMsg()); - if (status.ok()) { - // we return close write error only if no other error has been encountered. - status = closeStatus; - } - ClientPool.brokerPool.invalidateObject(address, client); - } else { - ClientPool.brokerPool.returnObject(address, client); - } - } - - if (status.ok()) { - LOG.info("finished to upload {} to remote path {}. cost: {} ms", - localPath, remotePath, (System.currentTimeMillis() - start)); - } - return status; + public String getLocation() { + return location; } - public Status rename(String origFilePath, String destFilePath) { - long start = System.currentTimeMillis(); - Status status = Status.OK; - - // 1. get a proper broker - Pair pair = new Pair( - null, null); - status = getBroker(pair); - if (!status.ok()) { - return status; - } - TPaloBrokerService.Client client = pair.first; - TNetworkAddress address = pair.second; - - // 2. rename - boolean needReturn = true; - try { - TBrokerRenamePathRequest req = new TBrokerRenamePathRequest(TBrokerVersion.VERSION_ONE, origFilePath, - destFilePath, properties); - TBrokerOperationStatus ost = client.renamePath(req); - if (ost.getStatusCode() != TBrokerOperationStatusCode.OK) { - return new Status(ErrCode.COMMON_ERROR, - "failed to rename " + origFilePath + " to " + destFilePath + ", msg: " + ost.getMessage() - + ", broker: " + BrokerUtil.printBroker(brokerName, address)); - } - } catch (TException e) { - needReturn = false; - return new Status(ErrCode.COMMON_ERROR, - "failed to rename " + origFilePath + " to " + destFilePath + ", msg: " + e.getMessage() - + ", broker: " + BrokerUtil.printBroker(brokerName, address)); - } finally { - if (needReturn) { - ClientPool.brokerPool.returnObject(address, client); - } else { - ClientPool.brokerPool.invalidateObject(address, client); - } - } - - LOG.info("finished to rename {} to {}. cost: {} ms", - origFilePath, destFilePath, (System.currentTimeMillis() - start)); - return Status.OK; + public void setLocation(String location) { + this.location = location; } - public Status delete(String remotePath) { - // get a proper broker - Pair pair = new Pair( - null, null); - Status st = getBroker(pair); - if (!st.ok()) { - return st; - } - TPaloBrokerService.Client client = pair.first; - TNetworkAddress address = pair.second; - - // delete - boolean needReturn = true; - try { - TBrokerDeletePathRequest req = new TBrokerDeletePathRequest(TBrokerVersion.VERSION_ONE, remotePath, - properties); - TBrokerOperationStatus opst = client.deletePath(req); - if (opst.getStatusCode() != TBrokerOperationStatusCode.OK) { - return new Status(ErrCode.COMMON_ERROR, - "failed to delete remote path: " + remotePath + ". msg: " + opst.getMessage() - + ", broker: " + BrokerUtil.printBroker(brokerName, address)); - } - - LOG.info("finished to delete remote path {}.", remotePath); - } catch (TException e) { - needReturn = false; - return new Status(ErrCode.COMMON_ERROR, - "failed to delete remote path: " + remotePath + ". msg: " + e.getMessage() - + ", broker: " + BrokerUtil.printBroker(brokerName, address)); - } finally { - if (needReturn) { - ClientPool.brokerPool.returnObject(address, client); - } else { - ClientPool.brokerPool.invalidateObject(address, client); - } - } - - return Status.OK; + public String getName() { + return name; } - // List files in remotePath - // The remote file name will only contains file name only(Not full path) - public Status list(String remotePath, List result) { - // get a proper broker - Pair pair = new Pair(null, null); - Status st = getBroker(pair); - if (!st.ok()) { - return st; - } - TPaloBrokerService.Client client = pair.first; - TNetworkAddress address = pair.second; - - // list - boolean needReturn = true; - try { - TBrokerListPathRequest req = new TBrokerListPathRequest(TBrokerVersion.VERSION_ONE, remotePath, - false /* not recursive */, properties); - req.setFileNameOnly(true); - TBrokerListResponse rep = client.listPath(req); - TBrokerOperationStatus opst = rep.getOpStatus(); - if (opst.getStatusCode() != TBrokerOperationStatusCode.OK) { - return new Status(ErrCode.COMMON_ERROR, - "failed to list remote path: " + remotePath + ". msg: " + opst.getMessage() - + ", broker: " + BrokerUtil.printBroker(brokerName, address)); - } - - List fileStatus = rep.getFiles(); - for (TBrokerFileStatus tFile : fileStatus) { - RemoteFile file = new RemoteFile(tFile.path, !tFile.isDir, tFile.size); - result.add(file); - } - LOG.info("finished to list remote path {}. get files: {}", remotePath, result); - } catch (TException e) { - needReturn = false; - return new Status(ErrCode.COMMON_ERROR, - "failed to list remote path: " + remotePath + ". msg: " + e.getMessage() - + ", broker: " + BrokerUtil.printBroker(brokerName, address)); - } finally { - if (needReturn) { - ClientPool.brokerPool.returnObject(address, client); - } else { - ClientPool.brokerPool.invalidateObject(address, client); - } - } - - return Status.OK; + public void setName(String name) { + this.name = name; } - public Status makeDir(String remotePath) { - // 1. get a proper broker - Pair pair = new Pair( - null, null); - Status st = getBroker(pair); - if (!st.ok()) { - return st; - } - TPaloBrokerService.Client client = pair.first; - TNetworkAddress address = pair.second; - - // TODO: mkdir - return Status.OK; + public StorageBackend.StorageType getType() { + return type; } - public Status checkPathExist(String remotePath) { - // 1. get a proper broker - Pair pair = new Pair( - null, null); - Status st = getBroker(pair); - if (!st.ok()) { - return st; - } - TPaloBrokerService.Client client = pair.first; - TNetworkAddress address = pair.second; - - // check path - boolean needReturn = true; - try { - TBrokerCheckPathExistRequest req = new TBrokerCheckPathExistRequest(TBrokerVersion.VERSION_ONE, - remotePath, properties); - TBrokerCheckPathExistResponse rep = client.checkPathExist(req); - TBrokerOperationStatus opst = rep.getOpStatus(); - if (opst.getStatusCode() != TBrokerOperationStatusCode.OK) { - return new Status(ErrCode.COMMON_ERROR, - "failed to check remote path exist: " + remotePath - + ", broker: " + BrokerUtil.printBroker(brokerName, address) - + ". msg: " + opst.getMessage()); - } - - if (!rep.isIsPathExist()) { - return new Status(ErrCode.NOT_FOUND, "remote path does not exist: " + remotePath); - } - - return Status.OK; - } catch (TException e) { - needReturn = false; - return new Status(ErrCode.COMMON_ERROR, - "failed to check remote path exist: " + remotePath - + ", broker: " + BrokerUtil.printBroker(brokerName, address) - + ". msg: " + e.getMessage()); - } finally { - if (needReturn) { - ClientPool.brokerPool.returnObject(address, client); - } else { - ClientPool.brokerPool.invalidateObject(address, client); - } - } + public void setType(StorageBackend.StorageType type) { + this.type = type; } - public static String clientId() { - return FrontendOptions.getLocalHostAddress() + ":" + Config.edit_log_port; + public Map getProperties() { + return properties; } - private Status getBroker(Pair result) { - FsBroker broker = null; - try { - String localIP = FrontendOptions.getLocalHostAddress(); - broker = Catalog.getCurrentCatalog().getBrokerMgr().getBroker(brokerName, localIP); - } catch (AnalysisException e) { - return new Status(ErrCode.COMMON_ERROR, "failed to get a broker address: " + e.getMessage()); - } - TNetworkAddress address = new TNetworkAddress(broker.ip, broker.port); - TPaloBrokerService.Client client = null; - try { - client = ClientPool.brokerPool.borrowObject(address); - } catch (Exception e) { - return new Status(ErrCode.COMMON_ERROR, "failed to get broker client: " + e.getMessage()); - } - - result.first = client; - result.second = address; - LOG.info("get broker: {}", BrokerUtil.printBroker(brokerName, address)); - return Status.OK; + public void setProperties(Map properties) { + this.properties = properties; } - private Status openWriter(TPaloBrokerService.Client client, TNetworkAddress address, String remoteFile, - TBrokerFD fd) { - try { - TBrokerOpenWriterRequest req = new TBrokerOpenWriterRequest(TBrokerVersion.VERSION_ONE, - remoteFile, TBrokerOpenMode.APPEND, clientId(), properties); - TBrokerOpenWriterResponse rep = client.openWriter(req); - TBrokerOperationStatus opst = rep.getOpStatus(); - if (opst.getStatusCode() != TBrokerOperationStatusCode.OK) { - return new Status(ErrCode.COMMON_ERROR, - "failed to open writer on broker " + BrokerUtil.printBroker(brokerName, address) - + " for file: " + remoteFile + ". msg: " + opst.getMessage()); - } + public abstract Status downloadWithFileSize(String remoteFilePath, String localFilePath, long fileSize); - fd.setHigh(rep.getFd().getHigh()); - fd.setLow(rep.getFd().getLow()); - LOG.info("finished to open writer. fd: {}. directly upload to remote path {}.", - fd, remoteFile); - } catch (TException e) { - return new Status(ErrCode.BAD_CONNECTION, - "failed to open writer on broker " + BrokerUtil.printBroker(brokerName, address) - + ", err: " + e.getMessage()); - } + // directly upload the content to remote file + public abstract Status directUpload(String content, String remoteFile); - return Status.OK; - } + public abstract Status upload(String localPath, String remotePath); - private Status closeWriter(TPaloBrokerService.Client client, TNetworkAddress address, TBrokerFD fd) { - try { - TBrokerCloseWriterRequest req = new TBrokerCloseWriterRequest(TBrokerVersion.VERSION_ONE, fd); - TBrokerOperationStatus st = client.closeWriter(req); - if (st.getStatusCode() != TBrokerOperationStatusCode.OK) { - return new Status(ErrCode.COMMON_ERROR, - "failed to close writer on broker " + BrokerUtil.printBroker(brokerName, address) - + " for fd: " + fd); - } + public abstract Status rename(String origFilePath, String destFilePath); - LOG.info("finished to close writer. fd: {}.", fd); - } catch (TException e) { - return new Status(ErrCode.BAD_CONNECTION, - "failed to close writer on broker " + BrokerUtil.printBroker(brokerName, address) - + ", fd " + fd + ", msg: " + e.getMessage()); - } + public abstract Status delete(String remotePath); - return Status.OK; - } + // List files in remotePath + // The remote file name will only contains file name only(Not full path) + public abstract Status list(String remotePath, List result); - private Status closeReader(TPaloBrokerService.Client client, TNetworkAddress address, TBrokerFD fd) { - try { - TBrokerCloseReaderRequest req = new TBrokerCloseReaderRequest(TBrokerVersion.VERSION_ONE, fd); - TBrokerOperationStatus st = client.closeReader(req); - if (st.getStatusCode() != TBrokerOperationStatusCode.OK) { - return new Status(ErrCode.COMMON_ERROR, - "failed to close reader on broker " + BrokerUtil.printBroker(brokerName, address) - + " for fd: " + fd); - } + public abstract Status makeDir(String remotePath); - LOG.info("finished to close reader. fd: {}.", fd); - } catch (TException e) { - return new Status(ErrCode.BAD_CONNECTION, - "failed to close reader on broker " + BrokerUtil.printBroker(brokerName, address) - + ", fd " + fd + ", msg: " + e.getMessage()); - } + public abstract Status checkPathExist(String remotePath); - return Status.OK; - } - - public static BlobStorage read(DataInput in) throws IOException { - BlobStorage blobStorage = new BlobStorage(); - blobStorage.readFields(in); - return blobStorage; - } + public abstract StorageBackend.StorageType getStorageType(); @Override public void write(DataOutput out) throws IOException { // must write type first - Text.writeString(out, brokerName); - - out.writeInt(properties.size()); - for (Map.Entry entry : properties.entrySet()) { + Text.writeString(out, name); + properties.put(STORAGE_TYPE, type.name()); + out.writeInt(getProperties().size()); + for (Map.Entry entry : getProperties().entrySet()) { Text.writeString(out, entry.getKey()); Text.writeString(out, entry.getValue()); } } - public void readFields(DataInput in) throws IOException { - brokerName = Text.readString(in); - - // properties - int size = in.readInt(); - for (int i = 0; i < size; i++) { - String key = Text.readString(in); - String value = Text.readString(in); - properties.put(key, value); - } - } } diff --git a/fe/fe-core/src/main/java/org/apache/doris/backup/BrokerStorage.java b/fe/fe-core/src/main/java/org/apache/doris/backup/BrokerStorage.java new file mode 100644 index 00000000000000..1200e268ecbd5d --- /dev/null +++ b/fe/fe-core/src/main/java/org/apache/doris/backup/BrokerStorage.java @@ -0,0 +1,701 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +package org.apache.doris.backup; + +import org.apache.doris.analysis.StorageBackend; +import org.apache.doris.catalog.Catalog; +import org.apache.doris.catalog.FsBroker; +import org.apache.doris.common.AnalysisException; +import org.apache.doris.common.ClientPool; +import org.apache.doris.common.Pair; +import org.apache.doris.common.util.BrokerUtil; +import org.apache.doris.service.FrontendOptions; +import org.apache.doris.thrift.TBrokerCheckPathExistRequest; +import org.apache.doris.thrift.TBrokerCheckPathExistResponse; +import org.apache.doris.thrift.TBrokerCloseReaderRequest; +import org.apache.doris.thrift.TBrokerCloseWriterRequest; +import org.apache.doris.thrift.TBrokerDeletePathRequest; +import org.apache.doris.thrift.TBrokerFD; +import org.apache.doris.thrift.TBrokerFileStatus; +import org.apache.doris.thrift.TBrokerListPathRequest; +import org.apache.doris.thrift.TBrokerListResponse; +import org.apache.doris.thrift.TBrokerOpenMode; +import org.apache.doris.thrift.TBrokerOpenReaderRequest; +import org.apache.doris.thrift.TBrokerOpenReaderResponse; +import org.apache.doris.thrift.TBrokerOpenWriterRequest; +import org.apache.doris.thrift.TBrokerOpenWriterResponse; +import org.apache.doris.thrift.TBrokerOperationStatus; +import org.apache.doris.thrift.TBrokerOperationStatusCode; +import org.apache.doris.thrift.TBrokerPReadRequest; +import org.apache.doris.thrift.TBrokerPWriteRequest; +import org.apache.doris.thrift.TBrokerReadResponse; +import org.apache.doris.thrift.TBrokerRenamePathRequest; +import org.apache.doris.thrift.TBrokerVersion; +import org.apache.doris.thrift.TNetworkAddress; +import org.apache.doris.thrift.TPaloBrokerService; + +import com.google.common.base.Preconditions; + +import org.apache.logging.log4j.LogManager; +import org.apache.logging.log4j.Logger; +import org.apache.thrift.TException; +import org.apache.thrift.transport.TTransportException; + +import java.io.BufferedInputStream; +import java.io.BufferedOutputStream; +import java.io.File; +import java.io.FileInputStream; +import java.io.FileNotFoundException; +import java.io.FileOutputStream; +import java.io.IOException; +import java.nio.ByteBuffer; +import java.nio.charset.StandardCharsets; +import java.nio.file.FileVisitOption; +import java.nio.file.Files; +import java.nio.file.Path; +import java.nio.file.Paths; +import java.util.Comparator; +import java.util.List; +import java.util.Map; + +public class BrokerStorage extends BlobStorage { + private static final Logger LOG = LogManager.getLogger(BrokerStorage.class); + + public BrokerStorage(String brokerName, Map properties) { + setName(brokerName); + setProperties(properties); + setType(StorageBackend.StorageType.BROKER); + } + + public String getBrokerName() { + return getName(); + } + + @Override + public Status downloadWithFileSize(String remoteFilePath, String localFilePath, long fileSize) { + LOG.debug("download from {} to {}, file size: {}.", + remoteFilePath, localFilePath, fileSize); + + long start = System.currentTimeMillis(); + + // 1. get a proper broker + Pair pair = getBroker(); + if (pair == null) { + return new Status(Status.ErrCode.COMMON_ERROR, "failed to get broker client"); + } + TPaloBrokerService.Client client = pair.first; + TNetworkAddress address = pair.second; + + // 2. open file reader with broker + TBrokerFD fd; + try { + TBrokerOpenReaderRequest req = new TBrokerOpenReaderRequest(TBrokerVersion.VERSION_ONE, remoteFilePath, + 0, clientId(), getProperties()); + TBrokerOpenReaderResponse rep = client.openReader(req); + TBrokerOperationStatus opst = rep.getOpStatus(); + if (opst.getStatusCode() != TBrokerOperationStatusCode.OK) { + return new Status(Status.ErrCode.COMMON_ERROR, + "failed to open reader on broker " + BrokerUtil.printBroker(getName(), address) + + " for file: " + remoteFilePath + ". msg: " + opst.getMessage()); + } + + fd = rep.getFd(); + LOG.info("finished to open reader. fd: {}. download {} to {}.", + fd, remoteFilePath, localFilePath); + } catch (TException e) { + return new Status(Status.ErrCode.COMMON_ERROR, + "failed to open reader on broker " + BrokerUtil.printBroker(getName(), address) + + " for file: " + remoteFilePath + ". msg: " + e.getMessage()); + } + Preconditions.checkNotNull(fd); + + // 3. delete local file if exist + File localFile = new File(localFilePath); + if (localFile.exists()) { + try { + Files.walk(Paths.get(localFilePath), + FileVisitOption.FOLLOW_LINKS).sorted(Comparator.reverseOrder()).map(Path::toFile).forEach(File::delete); + } catch (IOException e) { + return new Status(Status.ErrCode.COMMON_ERROR, "failed to delete exist local file: " + localFilePath); + } + } + + // 4. create local file + Status status = Status.OK; + try { + if (!localFile.createNewFile()) { + return new Status(Status.ErrCode.COMMON_ERROR, "failed to create local file: " + localFilePath); + } + } catch (IOException e) { + return new Status(Status.ErrCode.COMMON_ERROR, "failed to create local file: " + + localFilePath + ", msg: " + e.getMessage()); + } + + // 5. read remote file with broker and write to local + String lastErrMsg = null; + try (BufferedOutputStream out = new BufferedOutputStream(new FileOutputStream(localFile))) { + final long bufSize = 1024 * 1024; // 1MB + long leftSize = fileSize; + long readOffset = 0; + while (leftSize > 0) { + long readLen = Math.min(leftSize, bufSize); + TBrokerReadResponse rep = null; + // We only retry if we encounter a timeout thrift exception. + int tryTimes = 0; + while (tryTimes < 3) { + try { + TBrokerPReadRequest req = new TBrokerPReadRequest(TBrokerVersion.VERSION_ONE, + fd, readOffset, readLen); + rep = client.pread(req); + if (rep.getOpStatus().getStatusCode() != TBrokerOperationStatusCode.OK) { + // pread return failure. + lastErrMsg = String.format("failed to read via broker %s. " + + "current read offset: %d, read length: %d," + + " file size: %d, file: %s, err code: %d, msg: %s", + BrokerUtil.printBroker(getName(), address), + readOffset, readLen, fileSize, + remoteFilePath, rep.getOpStatus().getStatusCode().getValue(), + rep.getOpStatus().getMessage()); + LOG.warn(lastErrMsg); + status = new Status(Status.ErrCode.COMMON_ERROR, lastErrMsg); + } + if (rep.opStatus.statusCode != TBrokerOperationStatusCode.END_OF_FILE) { + LOG.debug("download. readLen: {}, read data len: {}, left size:{}. total size: {}", + readLen, rep.getData().length, leftSize, fileSize); + } else { + LOG.debug("read eof: " + remoteFilePath); + } + break; + } catch (TTransportException e) { + if (e.getType() == TTransportException.TIMED_OUT) { + // we only retry when we encounter timeout exception. + lastErrMsg = String.format("failed to read via broker %s. " + + "current read offset: %d, read length: %d," + + " file size: %d, file: %s, timeout.", + BrokerUtil.printBroker(getName(), address), + readOffset, readLen, fileSize, + remoteFilePath); + tryTimes++; + continue; + } + + lastErrMsg = String.format("failed to read via broker %s. " + + "current read offset: %d, read length: %d," + + " file size: %d, file: %s. msg: %s", + BrokerUtil.printBroker(getName(), address), + readOffset, readLen, fileSize, + remoteFilePath, e.getMessage()); + LOG.warn(lastErrMsg); + status = new Status(Status.ErrCode.COMMON_ERROR, lastErrMsg); + break; + } catch (TException e) { + lastErrMsg = String.format("failed to read via broker %s. " + + "current read offset: %d, read length: %d," + + " file size: %d, file: %s. msg: %s", + BrokerUtil.printBroker(getName(), address), + readOffset, readLen, fileSize, + remoteFilePath, e.getMessage()); + LOG.warn(lastErrMsg); + status = new Status(Status.ErrCode.COMMON_ERROR, lastErrMsg); + break; + } + } // end of retry loop + + if (status.ok() && tryTimes < 3) { + // read succeed, write to local file + Preconditions.checkNotNull(rep); + // NOTICE(cmy): Sometimes the actual read length does not equal to the expected read length, + // even if the broker's read buffer size is large enough. + // I don't know why, but have to adapt to it. + if (rep.getData().length != readLen) { + LOG.warn("the actual read length does not equal to " + + "the expected read length: {} vs. {}, file: {}, broker: {}", + rep.getData().length, readLen, remoteFilePath, + BrokerUtil.printBroker(getName(), address)); + } + + out.write(rep.getData()); + readOffset += rep.getData().length; + leftSize -= rep.getData().length; + } else { + status = new Status(Status.ErrCode.COMMON_ERROR, lastErrMsg); + break; + } + } // end of reading remote file + } catch (IOException e) { + return new Status(Status.ErrCode.COMMON_ERROR, "Got exception: " + e.getMessage() + ", broker: " + + BrokerUtil.printBroker(getName(), address)); + } finally { + // close broker reader + Status closeStatus = closeReader(client, address, fd); + if (!closeStatus.ok()) { + LOG.warn(closeStatus.getErrMsg()); + if (status.ok()) { + // we return close write error only if no other error has been encountered. + status = closeStatus; + } + ClientPool.brokerPool.invalidateObject(address, client); + } else { + ClientPool.brokerPool.returnObject(address, client); + } + } + + LOG.info("finished to download from {} to {} with size: {}. cost {} ms", remoteFilePath, localFilePath, + fileSize, (System.currentTimeMillis() - start)); + return status; + } + + // directly upload the content to remote file + @Override + public Status directUpload(String content, String remoteFile) { + // 1. get a proper broker + Pair pair = getBroker(); + if (pair == null) { + return new Status(Status.ErrCode.COMMON_ERROR, "failed to get broker client"); + } + TPaloBrokerService.Client client = pair.first; + TNetworkAddress address = pair.second; + + TBrokerFD fd = new TBrokerFD(); + Status status = Status.OK; + try { + // 2. open file writer with broker + status = openWriter(client, address, remoteFile, fd); + if (!status.ok()) { + return status; + } + + // 3. write content + try { + ByteBuffer bb = ByteBuffer.wrap(content.getBytes(StandardCharsets.UTF_8)); + TBrokerPWriteRequest req = new TBrokerPWriteRequest(TBrokerVersion.VERSION_ONE, fd, 0, bb); + TBrokerOperationStatus opst = client.pwrite(req); + if (opst.getStatusCode() != TBrokerOperationStatusCode.OK) { + // pwrite return failure. + status = new Status(Status.ErrCode.COMMON_ERROR, "write failed: " + opst.getMessage() + + ", broker: " + BrokerUtil.printBroker(getName(), address)); + } + } catch (TException e) { + status = new Status(Status.ErrCode.BAD_CONNECTION, "write exception: " + e.getMessage() + + ", broker: " + BrokerUtil.printBroker(getName(), address)); + } + } finally { + Status closeStatus = closeWriter(client, address, fd); + if (closeStatus.getErrCode() == Status.ErrCode.BAD_CONNECTION || status.getErrCode() == Status.ErrCode.BAD_CONNECTION) { + ClientPool.brokerPool.invalidateObject(address, client); + } else { + ClientPool.brokerPool.returnObject(address, client); + } + } + + return status; + } + + @Override + public Status upload(String localPath, String remotePath) { + long start = System.currentTimeMillis(); + // 1. get a proper broker + Pair pair = getBroker(); + if (pair == null) { + return new Status(Status.ErrCode.COMMON_ERROR, "failed to get broker client"); + } + TPaloBrokerService.Client client = pair.first; + TNetworkAddress address = pair.second; + + // 2. open file write with broker + TBrokerFD fd = new TBrokerFD(); + Status status = openWriter(client, address, remotePath, fd); + if (!status.ok()) { + return status; + } + + // 3. read local file and write to remote with broker + File localFile = new File(localPath); + long fileLength = localFile.length(); + byte[] readBuf = new byte[1024]; + try (BufferedInputStream in = new BufferedInputStream(new FileInputStream(localFile))) { + // save the last err msg + String lastErrMsg = null; + // save the current write offset of remote file + long writeOffset = 0; + // read local file, 1MB at a time + int bytesRead; + while ((bytesRead = in.read(readBuf)) != -1) { + ByteBuffer bb = ByteBuffer.wrap(readBuf, 0, bytesRead); + + // We only retry if we encounter a timeout thrift exception. + int tryTimes = 0; + while (tryTimes < 3) { + try { + TBrokerPWriteRequest req = new TBrokerPWriteRequest(TBrokerVersion.VERSION_ONE, fd, writeOffset, bb); + TBrokerOperationStatus opst = client.pwrite(req); + if (opst.getStatusCode() != TBrokerOperationStatusCode.OK) { + // pwrite return failure. + lastErrMsg = String.format("failed to write via broker %s. " + + "current write offset: %d, write length: %d," + + " file length: %d, file: %s, err code: %d, msg: %s", + BrokerUtil.printBroker(getName(), address), + writeOffset, bytesRead, fileLength, + remotePath, opst.getStatusCode().getValue(), opst.getMessage()); + LOG.warn(lastErrMsg); + status = new Status(Status.ErrCode.COMMON_ERROR, lastErrMsg); + } + break; + } catch (TTransportException e) { + if (e.getType() == TTransportException.TIMED_OUT) { + // we only retry when we encounter timeout exception. + lastErrMsg = String.format("failed to write via broker %s. " + + "current write offset: %d, write length: %d," + + " file length: %d, file: %s. timeout", + BrokerUtil.printBroker(getName(), address), + writeOffset, bytesRead, fileLength, + remotePath); + tryTimes++; + continue; + } + + lastErrMsg = String.format("failed to write via broker %s. " + + "current write offset: %d, write length: %d," + + " file length: %d, file: %s. encounter TTransportException: %s", + BrokerUtil.printBroker(getName(), address), + writeOffset, bytesRead, fileLength, + remotePath, e.getMessage()); + LOG.warn(lastErrMsg, e); + status = new Status(Status.ErrCode.COMMON_ERROR, lastErrMsg); + break; + } catch (TException e) { + lastErrMsg = String.format("failed to write via broker %s. " + + "current write offset: %d, write length: %d," + + " file length: %d, file: %s. encounter TException: %s", + BrokerUtil.printBroker(getName(), address), + writeOffset, bytesRead, fileLength, + remotePath, e.getMessage()); + LOG.warn(lastErrMsg, e); + status = new Status(Status.ErrCode.COMMON_ERROR, lastErrMsg); + break; + } + } + + if (status.ok() && tryTimes < 3) { + // write succeed, update current write offset + writeOffset += bytesRead; + } else { + status = new Status(Status.ErrCode.COMMON_ERROR, lastErrMsg); + break; + } + } // end of read local file loop + } catch (FileNotFoundException e1) { + return new Status(Status.ErrCode.COMMON_ERROR, "encounter file not found exception: " + e1.getMessage() + + ", broker: " + BrokerUtil.printBroker(getName(), address)); + } catch (IOException e1) { + return new Status(Status.ErrCode.COMMON_ERROR, "encounter io exception: " + e1.getMessage() + + ", broker: " + BrokerUtil.printBroker(getName(), address)); + } finally { + // close write + Status closeStatus = closeWriter(client, address, fd); + if (!closeStatus.ok()) { + LOG.warn(closeStatus.getErrMsg()); + if (status.ok()) { + // we return close write error only if no other error has been encountered. + status = closeStatus; + } + ClientPool.brokerPool.invalidateObject(address, client); + } else { + ClientPool.brokerPool.returnObject(address, client); + } + } + + if (status.ok()) { + LOG.info("finished to upload {} to remote path {}. cost: {} ms", + localPath, remotePath, (System.currentTimeMillis() - start)); + } + return status; + } + + @Override + public Status rename(String origFilePath, String destFilePath) { + long start = System.currentTimeMillis(); + // 1. get a proper broker + Pair pair = getBroker(); + if (pair == null) { + return new Status(Status.ErrCode.COMMON_ERROR, "failed to get broker client"); + } + TPaloBrokerService.Client client = pair.first; + TNetworkAddress address = pair.second; + + // 2. rename + boolean needReturn = true; + try { + TBrokerRenamePathRequest req = new TBrokerRenamePathRequest(TBrokerVersion.VERSION_ONE, origFilePath, + destFilePath, getProperties()); + TBrokerOperationStatus ost = client.renamePath(req); + if (ost.getStatusCode() != TBrokerOperationStatusCode.OK) { + return new Status(Status.ErrCode.COMMON_ERROR, + "failed to rename " + origFilePath + " to " + destFilePath + ", msg: " + ost.getMessage() + + ", broker: " + BrokerUtil.printBroker(getName(), address)); + } + } catch (TException e) { + needReturn = false; + return new Status(Status.ErrCode.COMMON_ERROR, + "failed to rename " + origFilePath + " to " + destFilePath + ", msg: " + e.getMessage() + + ", broker: " + BrokerUtil.printBroker(getName(), address)); + } finally { + if (needReturn) { + ClientPool.brokerPool.returnObject(address, client); + } else { + ClientPool.brokerPool.invalidateObject(address, client); + } + } + + LOG.info("finished to rename {} to {}. cost: {} ms", + origFilePath, destFilePath, (System.currentTimeMillis() - start)); + return Status.OK; + } + + @Override + public Status delete(String remotePath) { + // get a proper broker + Pair pair = getBroker(); + if (pair == null) { + return new Status(Status.ErrCode.COMMON_ERROR, "failed to get broker client"); + } + TPaloBrokerService.Client client = pair.first; + TNetworkAddress address = pair.second; + + // delete + boolean needReturn = true; + try { + TBrokerDeletePathRequest req = new TBrokerDeletePathRequest(TBrokerVersion.VERSION_ONE, remotePath, + getProperties()); + TBrokerOperationStatus opst = client.deletePath(req); + if (opst.getStatusCode() != TBrokerOperationStatusCode.OK) { + return new Status(Status.ErrCode.COMMON_ERROR, + "failed to delete remote path: " + remotePath + ". msg: " + opst.getMessage() + + ", broker: " + BrokerUtil.printBroker(getName(), address)); + } + + LOG.info("finished to delete remote path {}.", remotePath); + } catch (TException e) { + needReturn = false; + return new Status(Status.ErrCode.COMMON_ERROR, + "failed to delete remote path: " + remotePath + ". msg: " + e.getMessage() + + ", broker: " + BrokerUtil.printBroker(getName(), address)); + } finally { + if (needReturn) { + ClientPool.brokerPool.returnObject(address, client); + } else { + ClientPool.brokerPool.invalidateObject(address, client); + } + } + + return Status.OK; + } + + // List files in remotePath + // The remote file name will only contains file name only(Not full path) + @Override + public Status list(String remotePath, List result) { + // get a proper broker + Pair pair = getBroker(); + if (pair == null) { + return new Status(Status.ErrCode.COMMON_ERROR, "failed to get broker client"); + } + TPaloBrokerService.Client client = pair.first; + TNetworkAddress address = pair.second; + + // list + boolean needReturn = true; + try { + TBrokerListPathRequest req = new TBrokerListPathRequest(TBrokerVersion.VERSION_ONE, remotePath, + false /* not recursive */, getProperties()); + req.setFileNameOnly(true); + TBrokerListResponse rep = client.listPath(req); + TBrokerOperationStatus opst = rep.getOpStatus(); + if (opst.getStatusCode() != TBrokerOperationStatusCode.OK) { + return new Status(Status.ErrCode.COMMON_ERROR, + "failed to list remote path: " + remotePath + ". msg: " + opst.getMessage() + + ", broker: " + BrokerUtil.printBroker(getName(), address)); + } + + List fileStatus = rep.getFiles(); + for (TBrokerFileStatus tFile : fileStatus) { + RemoteFile file = new RemoteFile(tFile.path, !tFile.isDir, tFile.size); + result.add(file); + } + LOG.info("finished to list remote path {}. get files: {}", remotePath, result); + } catch (TException e) { + needReturn = false; + return new Status(Status.ErrCode.COMMON_ERROR, + "failed to list remote path: " + remotePath + ". msg: " + e.getMessage() + + ", broker: " + BrokerUtil.printBroker(getName(), address)); + } finally { + if (needReturn) { + ClientPool.brokerPool.returnObject(address, client); + } else { + ClientPool.brokerPool.invalidateObject(address, client); + } + } + + return Status.OK; + } + + @Override + public Status makeDir(String remotePath) { + return new Status(Status.ErrCode.COMMON_ERROR, "mkdir is not implemented."); + } + + @Override + public Status checkPathExist(String remotePath) { + // 1. get a proper broker + Pair pair = getBroker(); + if (pair == null) { + return new Status(Status.ErrCode.COMMON_ERROR, "failed to get broker client"); + } + TPaloBrokerService.Client client = pair.first; + TNetworkAddress address = pair.second; + + // check path + boolean needReturn = true; + try { + TBrokerCheckPathExistRequest req = new TBrokerCheckPathExistRequest(TBrokerVersion.VERSION_ONE, + remotePath, getProperties()); + TBrokerCheckPathExistResponse rep = client.checkPathExist(req); + TBrokerOperationStatus opst = rep.getOpStatus(); + if (opst.getStatusCode() != TBrokerOperationStatusCode.OK) { + return new Status(Status.ErrCode.COMMON_ERROR, + "failed to check remote path exist: " + remotePath + + ", broker: " + BrokerUtil.printBroker(getName(), address) + + ". msg: " + opst.getMessage()); + } + + if (!rep.isIsPathExist()) { + return new Status(Status.ErrCode.NOT_FOUND, "remote path does not exist: " + remotePath); + } + + return Status.OK; + } catch (TException e) { + needReturn = false; + return new Status(Status.ErrCode.COMMON_ERROR, + "failed to check remote path exist: " + remotePath + + ", broker: " + BrokerUtil.printBroker(getName(), address) + + ". msg: " + e.getMessage()); + } finally { + if (needReturn) { + ClientPool.brokerPool.returnObject(address, client); + } else { + ClientPool.brokerPool.invalidateObject(address, client); + } + } + } + + @Override + public StorageBackend.StorageType getStorageType() { + return StorageBackend.StorageType.BROKER; + } + + public Pair getBroker() { + Pair result = new Pair<>(null, null); + FsBroker broker; + try { + String localIP = FrontendOptions.getLocalHostAddress(); + broker = Catalog.getCurrentCatalog().getBrokerMgr().getBroker(getName(), localIP); + } catch (AnalysisException e) { + LOG.warn("failed to get a broker address: " + e.getMessage()); + return null; + } + TNetworkAddress address = new TNetworkAddress(broker.ip, broker.port); + TPaloBrokerService.Client client; + try { + client = ClientPool.brokerPool.borrowObject(address); + } catch (Exception e) { + LOG.warn("failed to get broker client: " + e.getMessage()); + return null; + } + + result.first = client; + result.second = address; + LOG.info("get broker: {}", BrokerUtil.printBroker(getName(), address)); + return result; + } + + private Status openWriter(TPaloBrokerService.Client client, TNetworkAddress address, String remoteFile, + TBrokerFD fd) { + try { + TBrokerOpenWriterRequest req = new TBrokerOpenWriterRequest(TBrokerVersion.VERSION_ONE, + remoteFile, TBrokerOpenMode.APPEND, clientId(), getProperties()); + TBrokerOpenWriterResponse rep = client.openWriter(req); + TBrokerOperationStatus opst = rep.getOpStatus(); + if (opst.getStatusCode() != TBrokerOperationStatusCode.OK) { + return new Status(Status.ErrCode.COMMON_ERROR, + "failed to open writer on broker " + BrokerUtil.printBroker(getName(), address) + + " for file: " + remoteFile + ". msg: " + opst.getMessage()); + } + + fd.setHigh(rep.getFd().getHigh()); + fd.setLow(rep.getFd().getLow()); + LOG.info("finished to open writer. fd: {}. directly upload to remote path {}.", + fd, remoteFile); + } catch (TException e) { + return new Status(Status.ErrCode.BAD_CONNECTION, + "failed to open writer on broker " + BrokerUtil.printBroker(getName(), address) + + ", err: " + e.getMessage()); + } + + return Status.OK; + } + + private Status closeWriter(TPaloBrokerService.Client client, TNetworkAddress address, TBrokerFD fd) { + try { + TBrokerCloseWriterRequest req = new TBrokerCloseWriterRequest(TBrokerVersion.VERSION_ONE, fd); + TBrokerOperationStatus st = client.closeWriter(req); + if (st.getStatusCode() != TBrokerOperationStatusCode.OK) { + return new Status(Status.ErrCode.COMMON_ERROR, + "failed to close writer on broker " + BrokerUtil.printBroker(getName(), address) + + " for fd: " + fd); + } + + LOG.info("finished to close writer. fd: {}.", fd); + } catch (TException e) { + return new Status(Status.ErrCode.BAD_CONNECTION, + "failed to close writer on broker " + BrokerUtil.printBroker(getName(), address) + + ", fd " + fd + ", msg: " + e.getMessage()); + } + + return Status.OK; + } + + private Status closeReader(TPaloBrokerService.Client client, TNetworkAddress address, TBrokerFD fd) { + try { + TBrokerCloseReaderRequest req = new TBrokerCloseReaderRequest(TBrokerVersion.VERSION_ONE, fd); + TBrokerOperationStatus st = client.closeReader(req); + if (st.getStatusCode() != TBrokerOperationStatusCode.OK) { + return new Status(Status.ErrCode.COMMON_ERROR, + "failed to close reader on broker " + BrokerUtil.printBroker(getName(), address) + + " for fd: " + fd); + } + + LOG.info("finished to close reader. fd: {}.", fd); + } catch (TException e) { + return new Status(Status.ErrCode.BAD_CONNECTION, + "failed to close reader on broker " + BrokerUtil.printBroker(getName(), address) + + ", fd " + fd + ", msg: " + e.getMessage()); + } + + return Status.OK; + } +} diff --git a/fe/fe-core/src/main/java/org/apache/doris/backup/Repository.java b/fe/fe-core/src/main/java/org/apache/doris/backup/Repository.java index a624f5d0c32520..ae8ce5c6d72099 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/backup/Repository.java +++ b/fe/fe-core/src/main/java/org/apache/doris/backup/Repository.java @@ -17,6 +17,9 @@ package org.apache.doris.backup; +import org.apache.commons.codec.digest.DigestUtils; + +import org.apache.doris.analysis.StorageBackend; import org.apache.doris.backup.Status.ErrCode; import org.apache.doris.catalog.Catalog; import org.apache.doris.catalog.FsBroker; @@ -27,13 +30,6 @@ import org.apache.doris.common.io.Writable; import org.apache.doris.common.util.TimeUtils; import org.apache.doris.system.Backend; - -import com.google.common.base.Joiner; -import com.google.common.base.Preconditions; -import com.google.common.base.Strings; -import com.google.common.collect.Lists; - -import org.apache.commons.codec.digest.DigestUtils; import org.apache.logging.log4j.LogManager; import org.apache.logging.log4j.Logger; import org.json.JSONObject; @@ -50,10 +46,15 @@ import java.text.SimpleDateFormat; import java.util.List; +import com.google.common.base.Joiner; +import com.google.common.base.Preconditions; +import com.google.common.base.Strings; +import com.google.common.collect.Lists; + /* * Repository represents a remote storage for backup to or restore from * File organization in repository is: - * + * * * __palo_repository_repo_name/ * * __repo_info * * __ss_my_ss1/ @@ -76,8 +77,6 @@ * * __10023.hdr.dnmwDDWI92dDko */ public class Repository implements Writable { - private static final Logger LOG = LogManager.getLogger(Repository.class); - public static final String PREFIX_REPO = "__palo_repository_"; public static final String PREFIX_SNAPSHOT_DIR = "__ss_"; public static final String PREFIX_DB = "__db_"; @@ -86,14 +85,11 @@ public class Repository implements Writable { public static final String PREFIX_IDX = "__idx_"; public static final String PREFIX_COMMON = "__"; public static final String PREFIX_JOB_INFO = "__info_"; - public static final String SUFFIX_TMP_FILE = "part"; - public static final String FILE_REPO_INFO = "__repo_info"; public static final String FILE_META_INFO = "__meta"; - public static final String DIR_SNAPSHOT_CONTENT = "__ss_content"; - + private static final Logger LOG = LogManager.getLogger(Repository.class); private static final String PATH_DELIMITER = "/"; private static final String CHECKSUM_SEPARATOR = "."; @@ -124,6 +120,58 @@ public Repository(long id, String name, boolean isReadOnly, String location, Blo this.createTime = System.currentTimeMillis(); } + // join job info file name with timestamp + // eg: __info_2018-01-01-08-00-00 + private static String jobInfoFileNameWithTimestamp(long createTime) { + if (createTime == -1) { + return PREFIX_JOB_INFO; + } else { + return PREFIX_JOB_INFO + + TimeUtils.longToTimeString(createTime, new SimpleDateFormat("yyyy-MM-dd-HH-mm-ss")); + } + } + + // join the name with specified prefix + private static String joinPrefix(String prefix, Object name) { + return prefix + name; + } + + // disjoint the name with specified prefix + private static String disjoinPrefix(String prefix, String nameWithPrefix) { + return nameWithPrefix.substring(prefix.length()); + } + + private static String assembleFileNameWithSuffix(String filePath, String md5sum) { + return filePath + CHECKSUM_SEPARATOR + md5sum; + } + + public static Pair decodeFileNameWithChecksum(String fileNameWithChecksum) { + int index = fileNameWithChecksum.lastIndexOf(CHECKSUM_SEPARATOR); + if (index == -1) { + return null; + } + String fileName = fileNameWithChecksum.substring(0, index); + String md5sum = fileNameWithChecksum.substring(index + CHECKSUM_SEPARATOR.length()); + + if (md5sum.length() != 32) { + return null; + } + + return Pair.create(fileName, md5sum); + } + + // in: /path/to/orig_file + // out: /path/to/orig_file.BUWDnl831e4nldsf + public static String replaceFileNameWithChecksumFileName(String origPath, String fileNameWithChecksum) { + return origPath.substring(0, origPath.lastIndexOf(PATH_DELIMITER) + 1) + fileNameWithChecksum; + } + + public static Repository read(DataInput in) throws IOException { + Repository repo = new Repository(); + repo.readFields(in); + return repo; + } + public long getId() { return id; } @@ -166,7 +214,7 @@ public Status initRepository() { if (!remoteFile.isFile()) { return new Status(ErrCode.COMMON_ERROR, "the existing repo info is not a file"); } - + // exist, download and parse the repo info file String localFilePath = BackupHandler.BACKUP_ROOT_DIR + "/tmp_info_" + System.currentTimeMillis(); try { @@ -182,7 +230,7 @@ public Status initRepository() { createTime = TimeUtils.timeStringToLong((String) root.get("create_time")); if (createTime == -1) { return new Status(ErrCode.COMMON_ERROR, - "failed to parse create time of repository: " + (String) root.get("create_time")); + "failed to parse create time of repository: " + root.get("create_time")); } return Status.OK; @@ -192,7 +240,7 @@ public Status initRepository() { File localFile = new File(localFilePath); localFile.delete(); } - + } else if (remoteFiles.size() > 1) { return new Status(ErrCode.COMMON_ERROR, "Invalid repository dir. expected one repo info file. get more: " + remoteFiles); @@ -209,49 +257,49 @@ public Status initRepository() { // eg: location/__palo_repository_repo_name/__repo_info public String assembleRepoInfoFilePath() { return Joiner.on(PATH_DELIMITER).join(location, - joinPrefix(PREFIX_REPO, name), - FILE_REPO_INFO); + joinPrefix(PREFIX_REPO, name), + FILE_REPO_INFO); } // eg: location/__palo_repository_repo_name/__my_sp1/__meta public String assembleMetaInfoFilePath(String label) { return Joiner.on(PATH_DELIMITER).join(location, joinPrefix(PREFIX_REPO, name), - joinPrefix(PREFIX_SNAPSHOT_DIR, label), - FILE_META_INFO); + joinPrefix(PREFIX_SNAPSHOT_DIR, label), + FILE_META_INFO); } // eg: location/__palo_repository_repo_name/__my_sp1/__info_2018-01-01-08-00-00 public String assembleJobInfoFilePath(String label, long createTime) { return Joiner.on(PATH_DELIMITER).join(location, joinPrefix(PREFIX_REPO, name), - joinPrefix(PREFIX_SNAPSHOT_DIR, label), - jobInfoFileNameWithTimestamp(createTime)); + joinPrefix(PREFIX_SNAPSHOT_DIR, label), + jobInfoFileNameWithTimestamp(createTime)); } // eg: // __palo_repository_repo_name/__ss_my_ss1/__ss_content/__db_10001/__tbl_10020/__part_10031/__idx_10020/__10022/ public String getRepoTabletPathBySnapshotInfo(String label, SnapshotInfo info) { return Joiner.on(PATH_DELIMITER).join(location, joinPrefix(PREFIX_REPO, name), - joinPrefix(PREFIX_SNAPSHOT_DIR, label), - DIR_SNAPSHOT_CONTENT, - joinPrefix(PREFIX_DB, info.getDbId()), - joinPrefix(PREFIX_TBL, info.getTblId()), - joinPrefix(PREFIX_PART, info.getPartitionId()), - joinPrefix(PREFIX_IDX, info.getIndexId()), - joinPrefix(PREFIX_COMMON, info.getTabletId())); + joinPrefix(PREFIX_SNAPSHOT_DIR, label), + DIR_SNAPSHOT_CONTENT, + joinPrefix(PREFIX_DB, info.getDbId()), + joinPrefix(PREFIX_TBL, info.getTblId()), + joinPrefix(PREFIX_PART, info.getPartitionId()), + joinPrefix(PREFIX_IDX, info.getIndexId()), + joinPrefix(PREFIX_COMMON, info.getTabletId())); } public String getRepoPath(String label, String childPath) { return Joiner.on(PATH_DELIMITER).join(location, joinPrefix(PREFIX_REPO, name), - joinPrefix(PREFIX_SNAPSHOT_DIR, label), - DIR_SNAPSHOT_CONTENT, - childPath); + joinPrefix(PREFIX_SNAPSHOT_DIR, label), + DIR_SNAPSHOT_CONTENT, + childPath); } // Check if this repo is available. // If failed to connect this repo, set errMsg and return false. public boolean ping() { String checkPath = Joiner.on(PATH_DELIMITER).join(location, - joinPrefix(PREFIX_REPO, name)); + joinPrefix(PREFIX_REPO, name)); Status st = storage.checkPathExist(checkPath); if (!st.ok()) { errMsg = TimeUtils.longToTimeString(System.currentTimeMillis()) + ": " + st.getErrMsg(); @@ -297,23 +345,23 @@ public boolean prepareSnapshotInfo() { // /location/__palo_repository_repo_name/__ss_my_ss1/__ss_content/__db_10001/__tbl_10020/__part_10031/__idx_10032/__10023/__3481721 public String assembleRemoteSnapshotPath(String label, SnapshotInfo info) { String path = Joiner.on(PATH_DELIMITER).join(location, - joinPrefix(PREFIX_REPO, name), - joinPrefix(PREFIX_SNAPSHOT_DIR, label), - DIR_SNAPSHOT_CONTENT, - joinPrefix(PREFIX_DB, info.getDbId()), - joinPrefix(PREFIX_TBL, info.getTblId()), - joinPrefix(PREFIX_PART, info.getPartitionId()), - joinPrefix(PREFIX_IDX, info.getIndexId()), - joinPrefix(PREFIX_COMMON, info.getTabletId()), - joinPrefix(PREFIX_COMMON, info.getSchemaHash())); + joinPrefix(PREFIX_REPO, name), + joinPrefix(PREFIX_SNAPSHOT_DIR, label), + DIR_SNAPSHOT_CONTENT, + joinPrefix(PREFIX_DB, info.getDbId()), + joinPrefix(PREFIX_TBL, info.getTblId()), + joinPrefix(PREFIX_PART, info.getPartitionId()), + joinPrefix(PREFIX_IDX, info.getIndexId()), + joinPrefix(PREFIX_COMMON, info.getTabletId()), + joinPrefix(PREFIX_COMMON, info.getSchemaHash())); LOG.debug("get remote tablet snapshot path: {}", path); return path; } public Status getSnapshotInfoFile(String label, String backupTimestamp, List infos) { - String remoteInfoFilePath = assembleJobInfoFilePath(label, -1) + backupTimestamp; - File localInfoFile = new File(BackupHandler.BACKUP_ROOT_DIR + PATH_DELIMITER - + "info_" + System.currentTimeMillis()); + String remoteInfoFilePath = assembleJobInfoFilePath(label, -1) + backupTimestamp; + File localInfoFile = new File(BackupHandler.BACKUP_ROOT_DIR + PATH_DELIMITER + + "info_" + System.currentTimeMillis()); try { Status st = download(remoteInfoFilePath, localInfoFile.getPath()); if (!st.ok()) { @@ -372,32 +420,48 @@ public Status upload(String localFilePath, String remoteFilePath) { return new Status(ErrCode.COMMON_ERROR, "failed to get md5sum of file: " + localFilePath); } Preconditions.checkState(!Strings.isNullOrEmpty(md5sum)); - String tmpRemotePath = assembleFileNameWithSuffix(remoteFilePath, SUFFIX_TMP_FILE); String finalRemotePath = assembleFileNameWithSuffix(remoteFilePath, md5sum); - LOG.debug("get md5sum of file: {}. tmp remote path: {}. final remote path: {}", localFilePath, tmpRemotePath, finalRemotePath); - // this may be a retry, so we should first delete remote file - Status st = storage.delete(tmpRemotePath); - if (!st.ok()) { - return st; - } + Status st = Status.OK; + if (storage instanceof BrokerStorage) { + // this may be a retry, so we should first delete remote file + String tmpRemotePath = assembleFileNameWithSuffix(remoteFilePath, SUFFIX_TMP_FILE); + LOG.debug("get md5sum of file: {}. tmp remote path: {}. final remote path: {}", localFilePath, tmpRemotePath, finalRemotePath); + st = storage.delete(tmpRemotePath); + if (!st.ok()) { + return st; + } - st = storage.delete(finalRemotePath); - if (!st.ok()) { - return st; - } + st = storage.delete(finalRemotePath); + if (!st.ok()) { + return st; + } - // upload tmp file - st = storage.upload(localFilePath, tmpRemotePath); - if (!st.ok()) { - return st; - } + // upload tmp file + st = storage.upload(localFilePath, tmpRemotePath); + if (!st.ok()) { + return st; + } - // rename tmp file with checksum named file - st = storage.rename(tmpRemotePath, finalRemotePath); - if (!st.ok()) { - return st; + // rename tmp file with checksum named file + st = storage.rename(tmpRemotePath, finalRemotePath); + if (!st.ok()) { + return st; + } + } else if (storage instanceof S3Storage) { + LOG.debug("get md5sum of file: {}. final remote path: {}", localFilePath, finalRemotePath); + st = storage.delete(finalRemotePath); + if (!st.ok()) { + return st; + } + + // upload final file + st = storage.upload(localFilePath, finalRemotePath); + if (!st.ok()) { + return st; + } } + LOG.info("finished to upload local file {} to remote file: {}", localFilePath, finalRemotePath); return st; } @@ -419,7 +483,7 @@ public Status download(String remoteFilePath, String localFilePath) { } String remoteFilePathWithChecksum = replaceFileNameWithChecksumFileName(remoteFilePath, - remoteFiles.get(0).getName()); + remoteFiles.get(0).getName()); LOG.debug("get download filename with checksum: " + remoteFilePathWithChecksum); // 1. get checksum from remote file name @@ -457,52 +521,6 @@ public Status download(String remoteFilePath, String localFilePath) { return Status.OK; } - // join job info file name with timestamp - // eg: __info_2018-01-01-08-00-00 - private static String jobInfoFileNameWithTimestamp(long createTime) { - if (createTime == -1) { - return PREFIX_JOB_INFO; - } else { - return PREFIX_JOB_INFO - + TimeUtils.longToTimeString(createTime, new SimpleDateFormat("yyyy-MM-dd-HH-mm-ss")); - } - } - - // join the name with specified prefix - private static String joinPrefix(String prefix, Object name) { - return prefix + name; - } - - // disjoint the name with specified prefix - private static String disjoinPrefix(String prefix, String nameWithPrefix) { - return nameWithPrefix.substring(prefix.length()); - } - - private static String assembleFileNameWithSuffix(String filePath, String md5sum) { - return filePath + CHECKSUM_SEPARATOR + md5sum; - } - - public static Pair decodeFileNameWithChecksum(String fileNameWithChecksum) { - int index = fileNameWithChecksum.lastIndexOf(CHECKSUM_SEPARATOR); - if (index == -1) { - return null; - } - String fileName = fileNameWithChecksum.substring(0, index); - String md5sum = fileNameWithChecksum.substring(index + CHECKSUM_SEPARATOR.length()); - - if (md5sum.length() != 32) { - return null; - } - - return Pair.create(fileName, md5sum); - } - - // in: /path/to/orig_file - // out: /path/to/orig_file.BUWDnl831e4nldsf - public static String replaceFileNameWithChecksumFileName(String origPath, String fileNameWithChecksum) { - return origPath.substring(0, origPath.lastIndexOf(PATH_DELIMITER) + 1) + fileNameWithChecksum; - } - public Status getBrokerAddress(Long beId, Catalog catalog, List brokerAddrs) { // get backend Backend be = Catalog.getCurrentSystemInfo().getBackend(beId); @@ -510,19 +528,24 @@ public Status getBrokerAddress(Long beId, Catalog catalog, List broker return new Status(ErrCode.COMMON_ERROR, "backend " + beId + " is missing. " + "failed to send upload snapshot task"); } + // only Broker storage backend need to get broker addr, other type return a fake one; + if (storage.getStorageType() != StorageBackend.StorageType.BROKER) { + brokerAddrs.add(new FsBroker("127.0.0.1", 0)); + return Status.OK; + } // get proper broker for this backend FsBroker brokerAddr = null; try { - brokerAddr = catalog.getBrokerMgr().getBroker(storage.getBrokerName(), be.getHost()); + brokerAddr = catalog.getBrokerMgr().getBroker(((BrokerStorage) storage).getBrokerName(), be.getHost()); } catch (AnalysisException e) { return new Status(ErrCode.COMMON_ERROR, "failed to get address of broker " - + storage.getBrokerName() + " when try to send upload snapshot task: " + + ((BrokerStorage) storage).getBrokerName() + " when try to send upload snapshot task: " + e.getMessage()); } if (brokerAddr == null) { return new Status(ErrCode.COMMON_ERROR, "failed to get address of broker " - + storage.getBrokerName() + " when try to send upload snapshot task"); + + ((BrokerStorage) storage).getBrokerName() + " when try to send upload snapshot task"); } brokerAddrs.add(brokerAddr); return Status.OK; @@ -535,7 +558,8 @@ public List getInfo() { info.add(TimeUtils.longToTimeString(createTime)); info.add(String.valueOf(isReadOnly)); info.add(location); - info.add(storage.getBrokerName()); + info.add(storage.getType() != StorageBackend.StorageType.BROKER ? "-" : storage.getName()); + info.add(storage.getStorageType().name()); info.add(errMsg == null ? FeConstants.null_string : errMsg); return info; } @@ -636,12 +660,6 @@ private List getSnapshotInfo(String snapshotName, String timestamp) { return info; } - public static Repository read(DataInput in) throws IOException { - Repository repo = new Repository(); - repo.readFields(in); - return repo; - } - @Override public void write(DataOutput out) throws IOException { out.writeLong(id); diff --git a/fe/fe-core/src/main/java/org/apache/doris/backup/RestoreJob.java b/fe/fe-core/src/main/java/org/apache/doris/backup/RestoreJob.java index 12d4ef1d85ef15..3be93d30150a2a 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/backup/RestoreJob.java +++ b/fe/fe-core/src/main/java/org/apache/doris/backup/RestoreJob.java @@ -1281,7 +1281,8 @@ private void downloadSnapshots() { } long signature = catalog.getNextId(); DownloadTask task = new DownloadTask(null, beId, signature, jobId, dbId, - srcToDest, brokerAddrs.get(0), repo.getStorage().getProperties()); + srcToDest, brokerAddrs.get(0), repo.getStorage().getProperties(), + repo.getStorage().getStorageType()); batchTask.addTask(task); unfinishedSignatureToId.put(signature, beId); } diff --git a/fe/fe-core/src/main/java/org/apache/doris/backup/S3Storage.java b/fe/fe-core/src/main/java/org/apache/doris/backup/S3Storage.java new file mode 100644 index 00000000000000..a0e4dd56f30c17 --- /dev/null +++ b/fe/fe-core/src/main/java/org/apache/doris/backup/S3Storage.java @@ -0,0 +1,364 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +package org.apache.doris.backup; + +import org.apache.doris.analysis.StorageBackend; +import org.apache.doris.common.UserException; +import org.apache.doris.common.util.S3URI; + +import org.apache.commons.collections.map.CaseInsensitiveMap; +import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.fs.FileStatus; +import org.apache.hadoop.fs.FileSystem; +import org.apache.http.HttpStatus; +import org.apache.logging.log4j.LogManager; +import org.apache.logging.log4j.Logger; + +import java.io.File; +import java.io.FileNotFoundException; +import java.io.IOException; +import java.net.URI; +import java.nio.file.FileVisitOption; +import java.nio.file.Files; +import java.nio.file.Path; +import java.nio.file.Paths; +import java.time.Duration; +import java.util.Comparator; +import java.util.List; +import java.util.Map; + +import software.amazon.awssdk.auth.credentials.AwsBasicCredentials; +import software.amazon.awssdk.auth.credentials.StaticCredentialsProvider; +import software.amazon.awssdk.auth.signer.AwsS3V4Signer; +import software.amazon.awssdk.core.client.config.ClientOverrideConfiguration; +import software.amazon.awssdk.core.client.config.SdkAdvancedClientOption; +import software.amazon.awssdk.core.retry.RetryPolicy; +import software.amazon.awssdk.core.retry.backoff.EqualJitterBackoffStrategy; +import software.amazon.awssdk.core.sync.RequestBody; +import software.amazon.awssdk.regions.Region; +import software.amazon.awssdk.services.s3.S3Client; +import software.amazon.awssdk.services.s3.S3Configuration; +import software.amazon.awssdk.services.s3.model.CopyObjectRequest; +import software.amazon.awssdk.services.s3.model.DeleteObjectRequest; +import software.amazon.awssdk.services.s3.model.DeleteObjectResponse; +import software.amazon.awssdk.services.s3.model.GetObjectRequest; +import software.amazon.awssdk.services.s3.model.GetObjectResponse; +import software.amazon.awssdk.services.s3.model.HeadObjectRequest; +import software.amazon.awssdk.services.s3.model.PutObjectRequest; +import software.amazon.awssdk.services.s3.model.PutObjectResponse; +import software.amazon.awssdk.services.s3.model.S3Exception; + +public class S3Storage extends BlobStorage { + public static final String S3_AK = "AWS_ACCESS_KEY"; + public static final String S3_SK = "AWS_SECRET_KEY"; + public static final String S3_ENDPOINT = "AWS_ENDPOINT"; + public static final String S3_REGION = "AWS_REGION"; + private static final Logger LOG = LogManager.getLogger(S3Storage.class); + private final CaseInsensitiveMap caseInsensitiveProperties; + private S3Client client; + + public S3Storage(Map properties) { + caseInsensitiveProperties = new CaseInsensitiveMap(); + client = null; + setProperties(properties); + setType(StorageBackend.StorageType.S3); + setName(StorageBackend.StorageType.S3.name()); + } + + @Override + public void setProperties(Map properties) { + super.setProperties(properties); + caseInsensitiveProperties.putAll(properties); + + } + private void checkS3() throws UserException { + if (!caseInsensitiveProperties.containsKey(S3_REGION)) { + throw new UserException("AWS_REGION not found."); + } + if (!caseInsensitiveProperties.containsKey(S3_ENDPOINT)) { + throw new UserException("AWS_ENDPOINT not found."); + } + if (!caseInsensitiveProperties.containsKey(S3_AK)) { + throw new UserException("AWS_ACCESS_KEY not found."); + } + if (!caseInsensitiveProperties.containsKey(S3_SK)) { + throw new UserException("AWS_SECRET_KEY not found."); + } + } + + private S3Client getClient() throws UserException { + if (client == null) { + checkS3(); + URI endpoint = URI.create(caseInsensitiveProperties.get(S3_ENDPOINT).toString()); + AwsBasicCredentials awsBasic = AwsBasicCredentials.create( + caseInsensitiveProperties.get(S3_AK).toString(), + caseInsensitiveProperties.get(S3_SK).toString()); + StaticCredentialsProvider scp = StaticCredentialsProvider.create(awsBasic); + EqualJitterBackoffStrategy backoffStrategy = EqualJitterBackoffStrategy + .builder() + .baseDelay(Duration.ofSeconds(1)) + .maxBackoffTime(Duration.ofMinutes(1)) + .build(); + // retry 3 time with Equal backoff + RetryPolicy retryPolicy = RetryPolicy + .builder() + .numRetries(3) + .backoffStrategy(backoffStrategy) + .build(); + ClientOverrideConfiguration clientConf = ClientOverrideConfiguration + .builder() + // set retry policy + .retryPolicy(retryPolicy) + // using AwsS3V4Signer + .putAdvancedOption(SdkAdvancedClientOption.SIGNER, AwsS3V4Signer.create()) + .build(); + client = S3Client.builder() + .endpointOverride(endpoint) + .credentialsProvider(scp) + .region(Region.of(caseInsensitiveProperties.get(S3_REGION).toString())) + .overrideConfiguration(clientConf) + // disable chunkedEncoding because of bos not supported + .serviceConfiguration(S3Configuration.builder().chunkedEncodingEnabled(false).build()) + .build(); + } + return client; + } + + @Override + public Status downloadWithFileSize(String remoteFilePath, String localFilePath, long fileSize) { + long start = System.currentTimeMillis(); + S3URI uri = new S3URI(remoteFilePath); + // Write the data to a local file + File localFile = new File(localFilePath); + if (localFile.exists()) { + try { + Files.walk(Paths.get(localFilePath), FileVisitOption.FOLLOW_LINKS) + .sorted(Comparator.reverseOrder()) + .map(Path::toFile) + .forEach(File::delete); + } catch (IOException e) { + return new Status( + Status.ErrCode.COMMON_ERROR, "failed to delete exist local file: " + localFilePath); + } + } + try { + GetObjectRequest getObjectRequest = + GetObjectRequest.builder().bucket(uri.getBucket()).key(uri.getKey()).build(); + GetObjectResponse response = getClient().getObject(getObjectRequest, localFile.toPath()); + if (localFile.length() == fileSize) { + LOG.info( + "finished to download from {} to {} with size: {}. cost {} ms", + remoteFilePath, + localFilePath, + fileSize, + (System.currentTimeMillis() - start)); + return Status.OK; + } else { + return new Status(Status.ErrCode.COMMON_ERROR, response.toString()); + } + } catch (S3Exception s3Exception) { + return new Status( + Status.ErrCode.COMMON_ERROR, + "get file from s3 error: " + s3Exception.awsErrorDetails().errorMessage()); + } catch (UserException ue) { + LOG.error("connect to s3 failed: ", ue); + return new Status(Status.ErrCode.COMMON_ERROR, "connect to s3 failed: " + ue.getMessage()); + } catch (Exception e) { + return new Status(Status.ErrCode.COMMON_ERROR, e.toString()); + } + } + + @Override + public Status directUpload(String content, String remoteFile) { + S3URI uri = new S3URI(remoteFile); + try { + PutObjectResponse response = + getClient() + .putObject( + PutObjectRequest.builder().bucket(uri.getBucket()).key(uri.getKey()).build(), + RequestBody.fromBytes(content.getBytes())); + LOG.info("upload content success: " + response.eTag()); + return Status.OK; + } catch (S3Exception e) { + LOG.error("write content failed:", e); + return new Status(Status.ErrCode.COMMON_ERROR, "write content failed: " + e.getMessage()); + } catch (UserException ue) { + LOG.error("connect to s3 failed: ", ue); + return new Status(Status.ErrCode.COMMON_ERROR, "connect to s3 failed: " + ue.getMessage()); + } + } + + public Status copy(String origFilePath, String destFilePath) { + S3URI origUri = new S3URI(origFilePath); + S3URI descUri = new S3URI(destFilePath); + try { + getClient() + .copyObject( + CopyObjectRequest.builder() + .copySource(origUri.getBucket() + "/" + origUri.getKey()) + .destinationBucket(descUri.getBucket()) + .destinationKey(descUri.getKey()) + .build()); + return Status.OK; + } catch (S3Exception e) { + LOG.error("copy file failed: ", e); + return new Status(Status.ErrCode.COMMON_ERROR, "copy file failed: " + e.getMessage()); + } catch (UserException ue) { + LOG.error("copy to s3 failed: ", ue); + return new Status(Status.ErrCode.COMMON_ERROR, "connect to s3 failed: " + ue.getMessage()); + } + } + + @Override + public Status upload(String localPath, String remotePath) { + S3URI uri = new S3URI(remotePath); + try { + PutObjectResponse response = + getClient() + .putObject( + PutObjectRequest.builder().bucket(uri.getBucket()).key(uri.getKey()).build(), + RequestBody.fromFile(new File(localPath))); + LOG.info("upload file " + localPath + " success: " + response.eTag()); + return Status.OK; + } catch (S3Exception e) { + LOG.error("write file failed:", e); + return new Status(Status.ErrCode.COMMON_ERROR, "write file failed: " + e.getMessage()); + } catch (UserException ue) { + LOG.error("connect to s3 failed: ", ue); + return new Status(Status.ErrCode.COMMON_ERROR, "connect to s3 failed: " + ue.getMessage()); + } + } + + @Override + public Status rename(String origFilePath, String destFilePath) { + Status status = copy(origFilePath, destFilePath); + if (status.ok()) { + return delete(origFilePath); + } else { + return status; + } + } + + @Override + public Status delete(String remotePath) { + S3URI uri = new S3URI(remotePath); + try { + DeleteObjectResponse response = + getClient() + .deleteObject( + DeleteObjectRequest.builder().bucket(uri.getBucket()).key(uri.getKey()).build()); + LOG.info("delete file " + remotePath + " success: " + response.toString()); + return Status.OK; + } catch (S3Exception e) { + LOG.error("delete file failed: ", e); + if (e.statusCode() == HttpStatus.SC_NOT_FOUND) { + return Status.OK; + } + return new Status(Status.ErrCode.COMMON_ERROR, "delete file failed: " + e.getMessage()); + } catch (UserException ue) { + LOG.error("connect to s3 failed: ", ue); + return new Status(Status.ErrCode.COMMON_ERROR, "connect to s3 failed: " + ue.getMessage()); + } + } + + @Override + public Status list(String remotePath, List result) { + return list(remotePath, result, true); + } + + // broker file pattern glob is too complex, so we use hadoop directly + public Status list(String remotePath, List result, boolean fileNameOnly) { + try { + checkS3(); + Configuration conf = new Configuration(); + String s3AK = caseInsensitiveProperties.get(S3_AK).toString(); + String s3Sk = caseInsensitiveProperties.get(S3_SK).toString(); + String s3Endpoint = caseInsensitiveProperties.get(S3_ENDPOINT).toString(); + conf.set("fs.s3a.access.key", s3AK); + conf.set("fs.s3a.secret.key", s3Sk); + conf.set("fs.s3a.endpoint", s3Endpoint); + conf.set("fs.s3a.impl.disable.cache", "true"); + conf.set("fs.s3.impl", "org.apache.hadoop.fs.s3a.S3AFileSystem"); + FileSystem s3AFileSystem = FileSystem.get(new URI(remotePath), conf); + org.apache.hadoop.fs.Path pathPattern = new org.apache.hadoop.fs.Path(remotePath); + FileStatus[] files = s3AFileSystem.globStatus(pathPattern); + if (files == null) { + return Status.OK; + } + for (FileStatus fileStatus : files) { + RemoteFile remoteFile = new RemoteFile(fileNameOnly?fileStatus.getPath().getName():fileStatus.getPath().toString(), !fileStatus.isDirectory(), fileStatus.isDirectory()? -1:fileStatus.getLen()); + result.add(remoteFile); + } + } catch (FileNotFoundException e) { + LOG.info("file not found: " + e.getMessage()); + return new Status(Status.ErrCode.NOT_FOUND, "file not found: " + e.getMessage()); + } catch (Exception e) { + LOG.error("errors while get file status ", e); + return new Status(Status.ErrCode.COMMON_ERROR, "errors while get file status " + e.getMessage()); + } + return Status.OK; + } + + @Override + public Status makeDir(String remotePath) { + if (!remotePath.endsWith("/")) { + remotePath += "/"; + } + S3URI uri = new S3URI(remotePath); + try { + PutObjectResponse response = + getClient() + .putObject( + PutObjectRequest.builder().bucket(uri.getBucket()).key(uri.getKey()).build(), + RequestBody.empty()); + LOG.info("makeDir success: " + response.eTag()); + return Status.OK; + } catch (S3Exception e) { + LOG.error("makeDir failed:", e); + return new Status(Status.ErrCode.COMMON_ERROR, "makeDir failed: " + e.getMessage()); + } catch (UserException ue) { + LOG.error("connect to s3 failed: ", ue); + return new Status(Status.ErrCode.COMMON_ERROR, "connect to s3 failed: " + ue.getMessage()); + } + } + + @Override + public Status checkPathExist(String remotePath) { + S3URI uri = new S3URI(remotePath); + try { + getClient() + .headObject(HeadObjectRequest.builder().bucket(uri.getBucket()).key(uri.getKey()).build()); + return Status.OK; + } catch (S3Exception e) { + if (e.statusCode() == HttpStatus.SC_NOT_FOUND) { + return new Status(Status.ErrCode.NOT_FOUND, "remote path does not exist: " + remotePath); + } else { + LOG.error("headObject failed:", e); + return new Status(Status.ErrCode.COMMON_ERROR, "headObject failed: " + e.getMessage()); + } + } catch (UserException ue) { + LOG.error("connect to s3 failed: ", ue); + return new Status(Status.ErrCode.COMMON_ERROR, "connect to s3 failed: " + ue.getMessage()); + } + } + + @Override + public StorageBackend.StorageType getStorageType() { + return StorageBackend.StorageType.S3; + } +} diff --git a/fe/fe-core/src/main/java/org/apache/doris/backup/Status.java b/fe/fe-core/src/main/java/org/apache/doris/backup/Status.java index a27a69b44ab87e..f31cd316e58ea9 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/backup/Status.java +++ b/fe/fe-core/src/main/java/org/apache/doris/backup/Status.java @@ -53,6 +53,15 @@ public boolean ok() { return errCode == ErrCode.OK; } + @Override + public boolean equals(Object other) { + if (other instanceof Status) { + return errCode == ((Status) other).getErrCode(); + } else { + return this == other; + } + } + @Override public String toString() { StringBuilder sb = new StringBuilder(); diff --git a/fe/fe-core/src/main/java/org/apache/doris/common/Config.java b/fe/fe-core/src/main/java/org/apache/doris/common/Config.java index 090d21349e1b83..3e1ed6d78f06b1 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/common/Config.java +++ b/fe/fe-core/src/main/java/org/apache/doris/common/Config.java @@ -392,7 +392,7 @@ public class Config extends ConfigBase { */ @ConfField(mutable = true, masterOnly = true) public static int publish_version_timeout_second = 30; // 30 seconds - + /** * minimal intervals between two publish version action */ @@ -1337,4 +1337,16 @@ public class Config extends ConfigBase { */ @ConfField(mutable = true, masterOnly = true) public static boolean enable_fe_heartbeat_by_thrift = false; + + /** + * If set to true, FE will be started in BDBJE debug mode + */ + @ConfField + public static boolean enable_bdbje_debug_mode = false; + + /** + * This config is used to try skip broker when access bos or other cloud storage via broker + */ + @ConfField(mutable = true, masterOnly = true) + public static boolean enable_access_file_without_broker = false; } diff --git a/fe/fe-core/src/main/java/org/apache/doris/common/util/BrokerUtil.java b/fe/fe-core/src/main/java/org/apache/doris/common/util/BrokerUtil.java index 0178b193b19805..c936e9057c0a7e 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/common/util/BrokerUtil.java +++ b/fe/fe-core/src/main/java/org/apache/doris/common/util/BrokerUtil.java @@ -18,6 +18,10 @@ package org.apache.doris.common.util; import org.apache.doris.analysis.BrokerDesc; +import org.apache.doris.analysis.StorageBackend; +import org.apache.doris.backup.RemoteFile; +import org.apache.doris.backup.S3Storage; +import org.apache.doris.backup.Status; import org.apache.doris.catalog.Catalog; import org.apache.doris.catalog.FsBroker; import org.apache.doris.common.AnalysisException; @@ -49,6 +53,7 @@ import org.apache.doris.thrift.TBrokerVersion; import org.apache.doris.thrift.TNetworkAddress; import org.apache.doris.thrift.TPaloBrokerService; + import com.google.common.base.Preconditions; import com.google.common.collect.Lists; @@ -60,6 +65,7 @@ import java.io.IOException; import java.nio.ByteBuffer; import java.nio.channels.FileChannel; +import java.util.ArrayList; import java.util.Collections; import java.util.List; @@ -77,35 +83,50 @@ public class BrokerUtil { */ public static void parseFile(String path, BrokerDesc brokerDesc, List fileStatuses) throws UserException { - TNetworkAddress address = getAddress(brokerDesc); - TPaloBrokerService.Client client = borrowClient(address); - boolean failed = true; - try { - TBrokerListPathRequest request = new TBrokerListPathRequest( - TBrokerVersion.VERSION_ONE, path, false, brokerDesc.getProperties()); - TBrokerListResponse tBrokerListResponse = null; + if (brokerDesc.getStorageType() == StorageBackend.StorageType.BROKER) { + TNetworkAddress address = getAddress(brokerDesc); + TPaloBrokerService.Client client = borrowClient(address); + boolean failed = true; try { - tBrokerListResponse = client.listPath(request); + TBrokerListPathRequest request = new TBrokerListPathRequest( + TBrokerVersion.VERSION_ONE, path, false, brokerDesc.getProperties()); + TBrokerListResponse tBrokerListResponse = null; + try { + tBrokerListResponse = client.listPath(request); + } catch (TException e) { + reopenClient(client); + tBrokerListResponse = client.listPath(request); + } + if (tBrokerListResponse.getOpStatus().getStatusCode() != TBrokerOperationStatusCode.OK) { + throw new UserException("Broker list path failed. path=" + path + + ",broker=" + address + ",msg=" + tBrokerListResponse.getOpStatus().getMessage()); + } + failed = false; + for (TBrokerFileStatus tBrokerFileStatus : tBrokerListResponse.getFiles()) { + if (tBrokerFileStatus.isDir) { + continue; + } + fileStatuses.add(tBrokerFileStatus); + } } catch (TException e) { - reopenClient(client); - tBrokerListResponse = client.listPath(request); + LOG.warn("Broker list path exception, path={}, address={}, exception={}", path, address, e); + throw new UserException("Broker list path exception. path=" + path + ", broker=" + address); + } finally { + returnClient(client, address, failed); } - if (tBrokerListResponse.getOpStatus().getStatusCode() != TBrokerOperationStatusCode.OK) { - throw new UserException("Broker list path failed. path=" + path - + ",broker=" + address + ",msg=" + tBrokerListResponse.getOpStatus().getMessage()); + } else if (brokerDesc.getStorageType() == StorageBackend.StorageType.S3) { + S3Storage s3 = new S3Storage(brokerDesc.getProperties()); + List rfiles = new ArrayList<>(); + Status st = s3.list(path, rfiles, false); + if (!st.ok()) { + throw new UserException("S3 list path failed. path=" + path + + ",msg=" + st.getErrMsg()); } - failed = false; - for (TBrokerFileStatus tBrokerFileStatus : tBrokerListResponse.getFiles()) { - if (tBrokerFileStatus.isDir) { - continue; + for (RemoteFile r : rfiles) { + if (r.isFile()) { + fileStatuses.add(new TBrokerFileStatus(r.getName(), !r.isFile(), r.getSize(), r.isFile())); } - fileStatuses.add(tBrokerFileStatus); } - } catch (TException e) { - LOG.warn("Broker list path exception, path={}, address={}, exception={}", path, address, e); - throw new UserException("Broker list path exception. path=" + path + ", broker=" + address); - } finally { - returnClient(client, address, failed); } } diff --git a/fe/fe-core/src/main/java/org/apache/doris/common/util/S3URI.java b/fe/fe-core/src/main/java/org/apache/doris/common/util/S3URI.java new file mode 100644 index 00000000000000..e79d395e354040 --- /dev/null +++ b/fe/fe-core/src/main/java/org/apache/doris/common/util/S3URI.java @@ -0,0 +1,114 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +package org.apache.doris.common.util; + +import com.google.common.base.Preconditions; +import com.google.common.collect.ImmutableSet; + +import org.apache.parquet.glob.GlobExpander; + +import java.util.List; +import java.util.Set; + +/** + * This class represents a fully qualified location in S3 for input/output + * operations expressed as as URI. This implementation is provided to + * ensure compatibility with Hadoop Path implementations that may introduce + * encoding issues with native URI implementation. + */ + +public class S3URI { + private static final String SCHEME_DELIM = "://"; + private static final String PATH_DELIM = "/"; + private static final String QUERY_DELIM = "\\?"; + private static final String FRAGMENT_DELIM = "#"; + private static final Set VALID_SCHEMES = ImmutableSet.of("http", "https", "s3", "s3a", "s3n", "bos"); + + private String scheme; + private final String location; + private final String bucket; + private final String key; + + /** + * Creates a new S3URI based on the bucket and key parsed from the location as defined in: + * https://docs.aws.amazon.com/AmazonS3/latest/dev/UsingBucket.html#access-bucket-intro + *

+ * Supported access styles are Virtual Hosted addresses and s3://... URIs with additional + * 's3n' and 's3a' schemes supported for backwards compatibility. + * + * @param location fully qualified URI + */ + public S3URI(String location) { + Preconditions.checkNotNull(location, "Location cannot be null."); + this.location = location; + String[] schemeSplit = location.split(SCHEME_DELIM); + Preconditions.checkState(schemeSplit.length == 2, "Invalid S3 URI: %s", location); + + this.scheme = schemeSplit[0]; + Preconditions.checkState(VALID_SCHEMES.contains(scheme.toLowerCase()), "Invalid scheme: %s", scheme); + String[] authoritySplit = schemeSplit[1].split(PATH_DELIM, 2); + Preconditions.checkState(authoritySplit.length == 2, "Invalid S3 URI: %s", location); + Preconditions.checkState(!authoritySplit[1].trim().isEmpty(), "Invalid S3 key: %s", location); + this.bucket = authoritySplit[0]; + + // Strip query and fragment if they exist + String path = authoritySplit[1]; + path = path.split(QUERY_DELIM)[0]; + path = path.split(FRAGMENT_DELIM)[0]; + key = path; + } + + public List expand(String path) { + return GlobExpander.expand(path); + } + + public String getScheme() { + return this.scheme; + } + + public String getBucketScheme() { + return scheme + "://" + bucket; + } + + /** + * @return S3 bucket + */ + public String getBucket() { + return bucket; + } + + /** + * @return S3 key + */ + public String getKey() { + return key; + } + + /* + * @return original, unmodified location + */ + public String getLocation() { + return location; + } + + @Override + public String toString() { + return location; + } +} + diff --git a/fe/fe-core/src/main/java/org/apache/doris/httpv2/controller/BaseController.java b/fe/fe-core/src/main/java/org/apache/doris/httpv2/controller/BaseController.java index bd36e3c229992d..47511be0113a59 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/httpv2/controller/BaseController.java +++ b/fe/fe-core/src/main/java/org/apache/doris/httpv2/controller/BaseController.java @@ -158,7 +158,6 @@ public List getCookieValues(HttpServletRequest request, String cookieNam for (Cookie cookie : cookies) { if (cookie.getName() != null && cookie.getName().equals(cookieName)) { String sessionId = cookie.getValue(); - LOG.debug("get cookie value. {}: {}", cookie.getName(), sessionId); sessionIds.add(sessionId); } } @@ -173,7 +172,6 @@ public void updateCookieAge(HttpServletRequest request, String cookieName, int a cookie.setMaxAge(age); cookie.setPath("/"); response.addCookie(cookie); - LOG.debug("get update cookie: {} {}", cookie.getName(), cookie.getValue()); } } } diff --git a/fe/fe-core/src/main/java/org/apache/doris/load/ExportJob.java b/fe/fe-core/src/main/java/org/apache/doris/load/ExportJob.java index ddf039660cd894..bf96b851fd1fd6 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/load/ExportJob.java +++ b/fe/fe-core/src/main/java/org/apache/doris/load/ExportJob.java @@ -27,6 +27,7 @@ import org.apache.doris.analysis.PartitionNames; import org.apache.doris.analysis.SlotDescriptor; import org.apache.doris.analysis.SlotRef; +import org.apache.doris.analysis.StorageBackend; import org.apache.doris.analysis.TableName; import org.apache.doris.analysis.TableRef; import org.apache.doris.analysis.TupleDescriptor; @@ -211,7 +212,11 @@ public void setJob(ExportStmt stmt) throws UserException { private void genExecFragment() throws UserException { registerToDesc(); - String tmpExportPathStr = getExportPath() + "/__doris_export_tmp_" + id + "/"; + String tmpExportPathStr = getExportPath(); + // broker will upload file to tp path and than rename to the final file + if (brokerDesc.getStorageType() == StorageBackend.StorageType.BROKER) { + tmpExportPathStr = tmpExportPathStr + "/__doris_export_tmp_" + id + "/"; + } try { URI uri = new URI(tmpExportPathStr); tmpExportPathStr = uri.normalize().toString(); diff --git a/fe/fe-core/src/main/java/org/apache/doris/load/Load.java b/fe/fe-core/src/main/java/org/apache/doris/load/Load.java index ef38979aa38741..2cb32738da280d 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/load/Load.java +++ b/fe/fe-core/src/main/java/org/apache/doris/load/Load.java @@ -37,6 +37,7 @@ import org.apache.doris.analysis.PartitionNames; import org.apache.doris.analysis.SlotDescriptor; import org.apache.doris.analysis.SlotRef; +import org.apache.doris.analysis.StorageBackend; import org.apache.doris.analysis.StringLiteral; import org.apache.doris.analysis.TupleDescriptor; import org.apache.doris.backup.BlobStorage; @@ -2222,7 +2223,7 @@ public void setLoadErrorHubInfo(Map properties) throws DdlExcept properties.remove("path"); // check if broker info is invalid - BlobStorage blobStorage = new BlobStorage(brokerName, properties); + BlobStorage blobStorage = BlobStorage.create(brokerName, StorageBackend.StorageType.BROKER, properties); Status st = blobStorage.checkPathExist(path); if (!st.ok()) { throw new DdlException("failed to visit path: " + path + ", err: " + st.getErrMsg()); diff --git a/fe/fe-core/src/main/java/org/apache/doris/load/loadv2/BrokerLoadPendingTask.java b/fe/fe-core/src/main/java/org/apache/doris/load/loadv2/BrokerLoadPendingTask.java index f82afe146cbb5d..622a1e4488ae1c 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/load/loadv2/BrokerLoadPendingTask.java +++ b/fe/fe-core/src/main/java/org/apache/doris/load/loadv2/BrokerLoadPendingTask.java @@ -18,6 +18,7 @@ package org.apache.doris.load.loadv2; import org.apache.doris.analysis.BrokerDesc; +import org.apache.doris.analysis.StorageBackend; import org.apache.doris.common.AnalysisException; import org.apache.doris.common.UserException; import org.apache.doris.common.util.BrokerUtil; @@ -120,7 +121,9 @@ private void getAllFileStatus() throws UserException { tableTotalFileNum += filteredFileStatuses.size(); LOG.info("get {} files in file group {} for table {}. size: {}. job: {}, broker: {} ", filteredFileStatuses.size(), groupNum, entry.getKey(), groupFileSize, - callback.getCallbackId(), BrokerUtil.getAddress(brokerDesc)); + callback.getCallbackId(), + brokerDesc.getStorageType() == StorageBackend.StorageType.BROKER ? + BrokerUtil.getAddress(brokerDesc): brokerDesc.getStorageType()); groupNum++; } } diff --git a/fe/fe-core/src/main/java/org/apache/doris/planner/BrokerScanNode.java b/fe/fe-core/src/main/java/org/apache/doris/planner/BrokerScanNode.java index 2a47f202982cdb..c5d0b316d0fe0f 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/planner/BrokerScanNode.java +++ b/fe/fe-core/src/main/java/org/apache/doris/planner/BrokerScanNode.java @@ -24,6 +24,7 @@ import org.apache.doris.analysis.IntLiteral; import org.apache.doris.analysis.SlotDescriptor; import org.apache.doris.analysis.SlotRef; +import org.apache.doris.analysis.StorageBackend; import org.apache.doris.analysis.TupleDescriptor; import org.apache.doris.catalog.BrokerTable; import org.apache.doris.catalog.Catalog; @@ -59,6 +60,7 @@ import org.apache.logging.log4j.Logger; import java.nio.charset.Charset; +import java.util.ArrayList; import java.util.Collections; import java.util.Comparator; import java.util.List; @@ -265,13 +267,17 @@ private TScanRangeLocations newLocations(TBrokerScanRangeParams params, BrokerDe // Generate on broker scan range TBrokerScanRange brokerScanRange = new TBrokerScanRange(); brokerScanRange.setParams(params); - FsBroker broker = null; - try { - broker = Catalog.getCurrentCatalog().getBrokerMgr().getBroker(brokerDesc.getName(), selectedBackend.getHost()); - } catch (AnalysisException e) { - throw new UserException(e.getMessage()); + if (brokerDesc.getStorageType() == StorageBackend.StorageType.BROKER) { + FsBroker broker = null; + try { + broker = Catalog.getCurrentCatalog().getBrokerMgr().getBroker(brokerDesc.getName(), selectedBackend.getHost()); + } catch (AnalysisException e) { + throw new UserException(e.getMessage()); + } + brokerScanRange.addToBrokerAddresses(new TNetworkAddress(broker.ip, broker.port)); + } else { + brokerScanRange.setBrokerAddresses(new ArrayList<>()); } - brokerScanRange.addToBrokerAddresses(new TNetworkAddress(broker.ip, broker.port)); // Scan range TScanRange scanRange = new TScanRange(); @@ -408,7 +414,7 @@ private void processFileGroup( ParamCreateContext context, List fileStatuses) throws UserException { - if (fileStatuses == null || fileStatuses.isEmpty()) { + if (fileStatuses == null || fileStatuses.isEmpty()) { return; } TScanRangeLocations curLocations = newLocations(context.params, brokerDesc); diff --git a/fe/fe-core/src/main/java/org/apache/doris/planner/ExportSink.java b/fe/fe-core/src/main/java/org/apache/doris/planner/ExportSink.java index 3d678adb6d88c7..dcddbca033cf46 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/planner/ExportSink.java +++ b/fe/fe-core/src/main/java/org/apache/doris/planner/ExportSink.java @@ -64,11 +64,13 @@ public String getExplainString(String prefix, TExplainLevel explainLevel) { @Override protected TDataSink toThrift() { TDataSink result = new TDataSink(TDataSinkType.EXPORT_SINK); - TExportSink tExportSink = new TExportSink(TFileType.FILE_BROKER, exportPath, columnSeparator, lineDelimiter); + TExportSink tExportSink = new TExportSink(brokerDesc.getFileType(), exportPath, columnSeparator, lineDelimiter); - FsBroker broker = Catalog.getCurrentCatalog().getBrokerMgr().getAnyBroker(brokerDesc.getName()); - if (broker != null) { - tExportSink.addToBrokerAddresses(new TNetworkAddress(broker.ip, broker.port)); + if (brokerDesc.getFileType() == TFileType.FILE_BROKER) { + FsBroker broker = Catalog.getCurrentCatalog().getBrokerMgr().getAnyBroker(brokerDesc.getName()); + if (broker != null) { + tExportSink.addToBrokerAddresses(new TNetworkAddress(broker.ip, broker.port)); + } } tExportSink.setProperties(brokerDesc.getProperties()); diff --git a/fe/fe-core/src/main/java/org/apache/doris/task/DownloadTask.java b/fe/fe-core/src/main/java/org/apache/doris/task/DownloadTask.java index a2aace18b02ccc..46fd73315ca0b4 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/task/DownloadTask.java +++ b/fe/fe-core/src/main/java/org/apache/doris/task/DownloadTask.java @@ -17,6 +17,7 @@ package org.apache.doris.task; +import org.apache.doris.analysis.StorageBackend; import org.apache.doris.catalog.FsBroker; import org.apache.doris.thrift.TDownloadReq; import org.apache.doris.thrift.TNetworkAddress; @@ -31,14 +32,17 @@ public class DownloadTask extends AgentTask { private Map srcToDestPath; private FsBroker brokerAddr; private Map brokerProperties; + private StorageBackend.StorageType storageType; public DownloadTask(TResourceInfo resourceInfo, long backendId, long signature, long jobId, long dbId, - Map srcToDestPath, FsBroker brokerAddr, Map brokerProperties) { + Map srcToDestPath, FsBroker brokerAddr, Map brokerProperties, + StorageBackend.StorageType storageType) { super(resourceInfo, backendId, TTaskType.DOWNLOAD, dbId, -1, -1, -1, -1, signature); this.jobId = jobId; this.srcToDestPath = srcToDestPath; this.brokerAddr = brokerAddr; this.brokerProperties = brokerProperties; + this.storageType = storageType; } public long getJobId() { @@ -61,6 +65,7 @@ public TDownloadReq toThrift() { TNetworkAddress address = new TNetworkAddress(brokerAddr.ip, brokerAddr.port); TDownloadReq req = new TDownloadReq(jobId, srcToDestPath, address); req.setBrokerProp(brokerProperties); + req.setStorageBackend(storageType.toThrift()); return req; } } diff --git a/fe/fe-core/src/main/java/org/apache/doris/task/ExportExportingTask.java b/fe/fe-core/src/main/java/org/apache/doris/task/ExportExportingTask.java index adf79e8fc65b38..ebd5e91ff16898 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/task/ExportExportingTask.java +++ b/fe/fe-core/src/main/java/org/apache/doris/task/ExportExportingTask.java @@ -17,6 +17,7 @@ package org.apache.doris.task; +import org.apache.doris.analysis.StorageBackend; import org.apache.doris.catalog.Catalog; import org.apache.doris.catalog.FsBroker; import org.apache.doris.common.AnalysisException; @@ -145,15 +146,17 @@ protected void exec() { return; } - // move tmp file to final destination - Status mvStatus = moveTmpFiles(); - if (!mvStatus.ok()) { - String failMsg = "move tmp file to final destination fail."; - failMsg += mvStatus.getErrorMsg(); - job.cancel(ExportFailMsg.CancelType.RUN_FAIL, failMsg); - LOG.warn("move tmp file to final destination fail. job:{}", job); - registerProfile(); - return; + if (job.getBrokerDesc().getStorageType() == StorageBackend.StorageType.BROKER) { + // move tmp file to final destination + Status mvStatus = moveTmpFiles(); + if (!mvStatus.ok()) { + String failMsg = "move tmp file to final destination fail."; + failMsg += mvStatus.getErrorMsg(); + job.cancel(ExportFailMsg.CancelType.RUN_FAIL, failMsg); + LOG.warn("move tmp file to final destination fail. job:{}", job); + registerProfile(); + return; + } } // release snapshot diff --git a/fe/fe-core/src/main/java/org/apache/doris/task/UploadTask.java b/fe/fe-core/src/main/java/org/apache/doris/task/UploadTask.java index 6f3203ac9a4038..bcbb9a58d94b8b 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/task/UploadTask.java +++ b/fe/fe-core/src/main/java/org/apache/doris/task/UploadTask.java @@ -17,6 +17,7 @@ package org.apache.doris.task; +import org.apache.doris.analysis.StorageBackend; import org.apache.doris.catalog.FsBroker; import org.apache.doris.thrift.TNetworkAddress; import org.apache.doris.thrift.TResourceInfo; @@ -32,14 +33,17 @@ public class UploadTask extends AgentTask { private Map srcToDestPath; private FsBroker broker; private Map brokerProperties; + private StorageBackend.StorageType storageType; public UploadTask(TResourceInfo resourceInfo, long backendId, long signature, long jobId, Long dbId, - Map srcToDestPath, FsBroker broker, Map brokerProperties) { + Map srcToDestPath, FsBroker broker, Map brokerProperties, + StorageBackend.StorageType storageType) { super(resourceInfo, backendId, TTaskType.UPLOAD, dbId, -1, -1, -1, -1, signature); this.jobId = jobId; this.srcToDestPath = srcToDestPath; this.broker = broker; this.brokerProperties = brokerProperties; + this.storageType = storageType; } public long getJobId() { @@ -62,6 +66,7 @@ public TUploadReq toThrift() { TNetworkAddress address = new TNetworkAddress(broker.ip, broker.port); TUploadReq request = new TUploadReq(jobId, srcToDestPath, address); request.setBrokerProp(brokerProperties); + request.setStorageBackend(storageType.toThrift()); return request; } } diff --git a/fe/fe-core/src/main/jflex/sql_scanner.flex b/fe/fe-core/src/main/jflex/sql_scanner.flex index cf67a3f51db619..5abcf97f233ce1 100644 --- a/fe/fe-core/src/main/jflex/sql_scanner.flex +++ b/fe/fe-core/src/main/jflex/sql_scanner.flex @@ -109,8 +109,10 @@ import org.apache.doris.qe.SqlModeHelper; keywordMap.put("bitmap", new Integer(SqlParserSymbols.KW_BITMAP)); keywordMap.put("bitmap_union", new Integer(SqlParserSymbols.KW_BITMAP_UNION)); keywordMap.put("boolean", new Integer(SqlParserSymbols.KW_BOOLEAN)); - keywordMap.put("both", new Integer(SqlParserSymbols.KW_BOTH)); + // keywordMap.put("both", new Integer(SqlParserSymbols.KW_BOTH)); keywordMap.put("broker", new Integer(SqlParserSymbols.KW_BROKER)); + keywordMap.put("s3", new Integer(SqlParserSymbols.KW_S3)); + keywordMap.put("hdfs", new Integer(SqlParserSymbols.KW_HDFS)); keywordMap.put("buckets", new Integer(SqlParserSymbols.KW_BUCKETS)); keywordMap.put("builtin", new Integer(SqlParserSymbols.KW_BUILTIN)); keywordMap.put("by", new Integer(SqlParserSymbols.KW_BY)); @@ -282,7 +284,7 @@ import org.apache.doris.qe.SqlModeHelper; keywordMap.put("plugin", new Integer(SqlParserSymbols.KW_PLUGIN)); keywordMap.put("plugins", new Integer(SqlParserSymbols.KW_PLUGINS)); keywordMap.put("preceding", new Integer(SqlParserSymbols.KW_PRECEDING)); - keywordMap.put("primary", new Integer(SqlParserSymbols.KW_PRIMARY)); + // keywordMap.put("primary", new Integer(SqlParserSymbols.KW_PRIMARY)); keywordMap.put("proc", new Integer(SqlParserSymbols.KW_PROC)); keywordMap.put("procedure", new Integer(SqlParserSymbols.KW_PROCEDURE)); keywordMap.put("processlist", new Integer(SqlParserSymbols.KW_PROCESSLIST)); diff --git a/fe/fe-core/src/test/java/org/apache/doris/backup/BackupHandlerTest.java b/fe/fe-core/src/test/java/org/apache/doris/backup/BackupHandlerTest.java index fae305c679df3c..ffc51965ce02d7 100644 --- a/fe/fe-core/src/test/java/org/apache/doris/backup/BackupHandlerTest.java +++ b/fe/fe-core/src/test/java/org/apache/doris/backup/BackupHandlerTest.java @@ -24,6 +24,7 @@ import org.apache.doris.analysis.DropRepositoryStmt; import org.apache.doris.analysis.LabelName; import org.apache.doris.analysis.RestoreStmt; +import org.apache.doris.analysis.StorageBackend; import org.apache.doris.analysis.TableName; import org.apache.doris.analysis.TableRef; import org.apache.doris.catalog.BrokerMgr; @@ -242,8 +243,9 @@ public Status getSnapshotInfoFile(String label, String backupTimestamp, List srcToDestPath = Maps.newHashMap(); UploadTask uploadTask = new UploadTask(null, 0, 0, backupJob.getJobId(), CatalogMocker.TEST_DB_ID, - srcToDestPath, null, null); + srcToDestPath, null, null, StorageBackend.StorageType.BROKER); request = new TFinishTaskRequest(); Map> tabletFiles = Maps.newHashMap(); request.setTabletFiles(tabletFiles); @@ -343,7 +345,7 @@ public Status getSnapshotInfoFile(String label, String backupTimestamp, List downloadedTabletIds = Lists.newArrayList(); request.setDownloadedTabletIds(downloadedTabletIds); diff --git a/fe/fe-core/src/test/java/org/apache/doris/backup/BackupJobTest.java b/fe/fe-core/src/test/java/org/apache/doris/backup/BackupJobTest.java index f0dba664ca9150..0c10496b461c2a 100644 --- a/fe/fe-core/src/test/java/org/apache/doris/backup/BackupJobTest.java +++ b/fe/fe-core/src/test/java/org/apache/doris/backup/BackupJobTest.java @@ -18,6 +18,7 @@ package org.apache.doris.backup; import org.apache.doris.analysis.BackupStmt; +import org.apache.doris.analysis.StorageBackend; import org.apache.doris.analysis.TableName; import org.apache.doris.analysis.TableRef; import org.apache.doris.backup.BackupJob.BackupJobState; @@ -116,7 +117,7 @@ public Repository getRepo(long repoId) { private EditLog editLog; private Repository repo = new Repository(repoId, "repo", false, "my_repo", - new BlobStorage("broker", Maps.newHashMap())); + BlobStorage.create("broker", StorageBackend.StorageType.BROKER, Maps.newHashMap())); @BeforeClass public static void start() { diff --git a/fe/fe-core/src/test/java/org/apache/doris/backup/BrokerStorageTest.java b/fe/fe-core/src/test/java/org/apache/doris/backup/BrokerStorageTest.java new file mode 100644 index 00000000000000..b83f76ecd2a374 --- /dev/null +++ b/fe/fe-core/src/test/java/org/apache/doris/backup/BrokerStorageTest.java @@ -0,0 +1,174 @@ +package org.apache.doris.backup; + +import org.apache.doris.common.ClientPool; +import org.apache.doris.common.GenericPool; +import org.apache.doris.common.Pair; +import org.apache.doris.common.jmockit.Deencapsulation; +import org.apache.doris.thrift.TNetworkAddress; +import org.apache.doris.thrift.TPaloBrokerService; + +import org.apache.commons.codec.digest.DigestUtils; +import org.apache.commons.pool2.impl.GenericKeyedObjectPoolConfig; +import org.apache.thrift.TServiceClient; +import org.apache.thrift.protocol.TBinaryProtocol; +import org.apache.thrift.protocol.TProtocol; +import org.apache.thrift.transport.TSocket; +import org.apache.thrift.transport.TTransport; +import org.junit.Assert; +import org.junit.Before; +import org.junit.BeforeClass; +import org.junit.Ignore; +import org.junit.Test; + +import java.io.File; +import java.io.FileInputStream; +import java.io.FileOutputStream; +import java.io.IOException; +import java.io.OutputStream; +import java.util.ArrayList; +import java.util.HashMap; +import java.util.List; +import java.util.Map; +import java.util.Random; +import java.util.UUID; + +import mockit.Expectations; +import mockit.Mock; +import mockit.MockUp; +import mockit.Mocked; +import mockit.Tested; + +@Ignore +public class BrokerStorageTest { + private static String basePath; + private final String bucket = "bos://yang-repo/"; + private final String brokerHost = "xafj-palo-rpm64.xafj.baidu.com"; + private Map properties; + + @Tested + private BrokerStorage storage; + private String testFile; + private String content; + private Pair pair; + @Mocked + GenericPool pool; + + @BeforeClass + public static void init() { + basePath = "broker/" + UUID.randomUUID().toString(); + } + + @Before + public void setUp() throws Exception { + pair = new Pair<>(null, null); + TTransport transport = new TSocket(brokerHost, 8111); + transport.open(); + TProtocol protocol = new TBinaryProtocol(transport); + pair.first = new TPaloBrokerService.Client(protocol); + pair.second = new TNetworkAddress(brokerHost, 8111); + properties = new HashMap<>(); + properties.put("bos_accesskey", System.getenv().getOrDefault("AWS_AK", "")); + properties.put("bos_secret_accesskey", System.getenv().getOrDefault("AWS_SK", "")); + properties.put("bos_endpoint", "http://bj.bcebos.com"); + storage = new BrokerStorage("bos_broker", properties); + testFile = bucket + basePath + "/Ode_to_the_West_Wind"; + content = + "O wild West Wind, thou breath of Autumn's being\n" + + "Thou, from whose unseen presence the leaves dead\n" + + "Are driven, like ghosts from an enchanter fleeing,\n" + + "Yellow, and black, and pale, and hectic red,\n" + + "Pestilence-stricken multitudes:O thou\n" + + "Who chariotest to their dark wintry bed\n" + + "The winged seeds, where they lie cold and low,\n" + + "Each like a corpse within its grave, until\n" + + "Thine azure sister of the Spring shall blow\n" + + "Her clarion o'er the dreaming earth, and fill\n" + + "(Driving sweet buds like flocks to feed in air)\n" + + "With living hues and odors plain and hill:\n" + + "Wild Spirit, which art moving everywhere;\n" + + "Destroyer and preserver; hear, oh, hear!"; + new MockUp() { + @Mock + private Pair getBroker() { + return pair; + } + }; + GenericKeyedObjectPoolConfig brokerPoolConfig = new GenericKeyedObjectPoolConfig(); + new Expectations() { + { + pool.returnObject(withInstanceOf(TNetworkAddress.class), withInstanceOf(TServiceClient.class)); + minTimes =0; + } + }; + Deencapsulation.setField(ClientPool.class, "brokerPool", pool); + Assert.assertEquals(Status.OK, storage.directUpload(content, testFile)); + } + + @Test + public void downloadWithFileSize() throws IOException { + File localFile = File.createTempFile("brokerunittest", ".dat"); + localFile.deleteOnExit(); + Status status = storage.downloadWithFileSize(testFile, localFile.getAbsolutePath(), content.getBytes().length); + Assert.assertEquals(Status.OK, status); + Assert.assertEquals(DigestUtils.md5Hex(content.getBytes()), DigestUtils.md5Hex(new FileInputStream(localFile))); + status = storage.downloadWithFileSize(bucket + basePath + "/Ode_to_the_West_Wind", localFile.getAbsolutePath(), content.getBytes().length + 1); + Assert.assertNotEquals(Status.OK, status); + } + + @Test + public void upload() throws IOException { + File localFile = File.createTempFile("brokerunittest", ".dat"); + localFile.deleteOnExit(); + OutputStream os = new FileOutputStream(localFile); + byte[] buf = new byte[1024 * 1024]; + Random r = new Random(); + r.nextBytes(buf); + os.write(buf); + os.close(); + String remote = bucket + basePath + "/" + localFile.getName(); + Status status = storage.upload(localFile.getAbsolutePath(), remote); + Assert.assertEquals(Status.OK, status); + File localFile2 = File.createTempFile("brokerunittest", ".dat"); + localFile2.deleteOnExit(); + status = storage.downloadWithFileSize(remote, localFile2.getAbsolutePath(), 1024 * 1024); + Assert.assertEquals(Status.OK, status); + Assert.assertEquals(DigestUtils.md5Hex(new FileInputStream(localFile)), + DigestUtils.md5Hex(new FileInputStream(localFile2))); + } + + @Test + public void rename() { + Assert.assertEquals(Status.OK, storage.directUpload(content, testFile + ".bak")); + storage.rename(testFile + ".bak", testFile + ".bak1"); + Assert.assertEquals(Status.OK, storage.checkPathExist(testFile + ".bak1")); + } + + @Test + public void delete() { + String deleteFile = testFile + ".to_be_delete"; + Assert.assertEquals(Status.OK, storage.delete(deleteFile + "xxxx")); + Assert.assertEquals(Status.OK, storage.directUpload(content, deleteFile)); + Assert.assertEquals(Status.OK, storage.delete(deleteFile)); + Assert.assertEquals(Status.ErrCode.NOT_FOUND, storage.checkPathExist(deleteFile).getErrCode()); + Assert.assertEquals(Status.OK, storage.delete(deleteFile + "xxxx")); + } + + @Test + public void list() { + List result = new ArrayList<>(); + String listPath = bucket + basePath + "_list" + "/Ode_to_the_West_Wind"; + Assert.assertEquals(Status.OK, storage.directUpload(content, listPath + ".1")); + Assert.assertEquals(Status.OK, storage.directUpload(content, listPath + ".2")); + Assert.assertEquals(Status.OK, storage.directUpload(content, listPath + ".3")); + Assert.assertEquals(Status.OK, storage.list(bucket + basePath + "_list/*", result)); + Assert.assertEquals(3, result.size()); + } + + @Test + public void checkPathExist() { + Status status = storage.checkPathExist(testFile); + Assert.assertEquals(Status.OK, status); + status = storage.checkPathExist(testFile + ".NOT_EXIST"); + Assert.assertEquals(Status.ErrCode.NOT_FOUND, status.getErrCode()); + } +} \ No newline at end of file diff --git a/fe/fe-core/src/test/java/org/apache/doris/backup/RepositoryTest.java b/fe/fe-core/src/test/java/org/apache/doris/backup/RepositoryTest.java index 026bdd63890ddc..7c83f143b6c29e 100644 --- a/fe/fe-core/src/test/java/org/apache/doris/backup/RepositoryTest.java +++ b/fe/fe-core/src/test/java/org/apache/doris/backup/RepositoryTest.java @@ -19,6 +19,7 @@ import mockit.*; import org.apache.doris.analysis.ShowRepositoriesStmt; +import org.apache.doris.analysis.StorageBackend; import org.apache.doris.catalog.BrokerMgr; import org.apache.doris.catalog.FsBroker; import org.apache.doris.common.AnalysisException; @@ -309,7 +310,7 @@ public void testPersist() { properties.put("bos_endpoint", "http://gz.bcebos.com"); properties.put("bos_accesskey", "a"); properties.put("bos_secret_accesskey", "b"); - BlobStorage storage = new BlobStorage(brokerName, properties); + BlobStorage storage = BlobStorage.create(brokerName, StorageBackend.StorageType.BROKER, properties); repo = new Repository(10000, "repo", false, location, storage); File file = new File("./Repository"); diff --git a/fe/fe-core/src/test/java/org/apache/doris/backup/RestoreJobTest.java b/fe/fe-core/src/test/java/org/apache/doris/backup/RestoreJobTest.java index 87911f156f14c9..bf7fdf6c2e06c8 100644 --- a/fe/fe-core/src/test/java/org/apache/doris/backup/RestoreJobTest.java +++ b/fe/fe-core/src/test/java/org/apache/doris/backup/RestoreJobTest.java @@ -17,6 +17,7 @@ package org.apache.doris.backup; +import org.apache.doris.analysis.StorageBackend; import org.apache.doris.backup.BackupJobInfo.BackupIndexInfo; import org.apache.doris.backup.BackupJobInfo.BackupPartitionInfo; import org.apache.doris.backup.BackupJobInfo.BackupOlapTableInfo; @@ -119,7 +120,7 @@ public Repository getRepo(long repoId) { @Injectable private Repository repo = new Repository(repoId, "repo", false, "bos://my_repo", - new BlobStorage("broker", Maps.newHashMap())); + BlobStorage.create("broker", StorageBackend.StorageType.BROKER, Maps.newHashMap())); private BackupMeta backupMeta; diff --git a/fe/fe-core/src/test/java/org/apache/doris/backup/S3StorageTest.java b/fe/fe-core/src/test/java/org/apache/doris/backup/S3StorageTest.java new file mode 100644 index 00000000000000..f20b697dc4f2b8 --- /dev/null +++ b/fe/fe-core/src/test/java/org/apache/doris/backup/S3StorageTest.java @@ -0,0 +1,150 @@ +package org.apache.doris.backup; + +import org.apache.commons.codec.digest.DigestUtils; +import org.junit.Assert; +import org.junit.Before; +import org.junit.BeforeClass; +import org.junit.Ignore; +import org.junit.Test; + +import java.io.File; +import java.io.FileInputStream; +import java.io.FileOutputStream; +import java.io.IOException; +import java.io.OutputStream; +import java.util.ArrayList; +import java.util.HashMap; +import java.util.List; +import java.util.Map; +import java.util.Random; +import java.util.UUID; + +@Ignore +public class S3StorageTest { + private static String basePath; + private final String bucket = "s3://yang-repo/"; + private Map properties; + private S3Storage storage; + private String testFile; + private String content; + + @BeforeClass + public static void init() { + basePath = "s3/" + UUID.randomUUID().toString(); + } + + @Before + public void setUp() throws Exception { + properties = new HashMap<>(); + properties.put("AWS_ACCESS_KEY", System.getenv().getOrDefault("AWS_AK", "")); + properties.put("AWS_SECRET_KEY", System.getenv().getOrDefault("AWS_SK", "")); + properties.put("AWS_ENDPOINT", "http://s3.bj.bcebos.com"); + properties.put("AWS_REGION", "bj"); + storage = new S3Storage(properties); + testFile = bucket + basePath + "/Ode_to_the_West_Wind"; + + content = + "O wild West Wind, thou breath of Autumn's being\n" + + "Thou, from whose unseen presence the leaves dead\n" + + "Are driven, like ghosts from an enchanter fleeing,\n" + + "Yellow, and black, and pale, and hectic red,\n" + + "Pestilence-stricken multitudes:O thou\n" + + "Who chariotest to their dark wintry bed\n" + + "The winged seeds, where they lie cold and low,\n" + + "Each like a corpse within its grave, until\n" + + "Thine azure sister of the Spring shall blow\n" + + "Her clarion o'er the dreaming earth, and fill\n" + + "(Driving sweet buds like flocks to feed in air)\n" + + "With living hues and odors plain and hill:\n" + + "Wild Spirit, which art moving everywhere;\n" + + "Destroyer and preserver; hear, oh, hear!"; + Assert.assertEquals(Status.OK, storage.directUpload(content, testFile)); + } + + @Test + public void downloadWithFileSize() throws IOException { + File localFile = File.createTempFile("s3unittest", ".dat"); + localFile.deleteOnExit(); + Status status = storage.downloadWithFileSize(testFile, localFile.getAbsolutePath(), content.getBytes().length); + Assert.assertEquals(Status.OK, status); + Assert.assertEquals(DigestUtils.md5Hex(content.getBytes()), DigestUtils.md5Hex(new FileInputStream(localFile))); + status = storage.downloadWithFileSize(bucket + basePath + "/Ode_to_the_West_Wind", localFile.getAbsolutePath(), content.getBytes().length + 1); + Assert.assertNotEquals(Status.OK, status); + } + + @Test + public void upload() throws IOException { + File localFile = File.createTempFile("s3unittest", ".dat"); + localFile.deleteOnExit(); + OutputStream os = new FileOutputStream(localFile); + byte[] buf = new byte[1024 * 1024]; + Random r = new Random(); + r.nextBytes(buf); + os.write(buf); + os.close(); + String remote = bucket + basePath + "/" + localFile.getName(); + Status status = storage.upload(localFile.getAbsolutePath(), remote); + Assert.assertEquals(Status.OK, status); + File localFile2 = File.createTempFile("s3unittest", ".dat"); + localFile2.deleteOnExit(); + status = storage.downloadWithFileSize(remote, localFile2.getAbsolutePath(), 1024 * 1024); + Assert.assertEquals(Status.OK, status); + Assert.assertEquals(DigestUtils.md5Hex(new FileInputStream(localFile)), + DigestUtils.md5Hex(new FileInputStream(localFile2))); + } + + @Test + public void copy() { + Assert.assertEquals(Status.OK, storage.copy(testFile, testFile + ".bak")); + Assert.assertEquals(Status.OK, storage.checkPathExist(testFile + ".bak")); + Assert.assertNotEquals(Status.OK, storage.copy(testFile + ".bakxxx", testFile + ".bak")); + } + + @Test + public void rename() { + Assert.assertEquals(Status.OK, storage.directUpload(content, testFile + ".bak")); + storage.rename(testFile + ".bak", testFile + ".bak1"); + Assert.assertEquals(Status.ErrCode.NOT_FOUND, storage.checkPathExist(testFile + ".bak").getErrCode()); + Assert.assertEquals(Status.OK, storage.checkPathExist(testFile + ".bak1")); + + } + + @Test + public void delete() { + String deleteFile = testFile + ".to_be_delete"; + Assert.assertEquals(Status.OK, storage.directUpload(content, deleteFile)); + Assert.assertEquals(Status.OK, storage.delete(deleteFile)); + Assert.assertEquals(Status.ErrCode.NOT_FOUND, storage.checkPathExist(deleteFile).getErrCode()); + Assert.assertEquals(Status.OK, storage.delete(deleteFile + "xxxx")); + + } + + @Test + public void list() { + List result = new ArrayList<>(); + String listPath = bucket + basePath + "_list" + "/Ode_to_the_West_Wind"; + Assert.assertEquals(Status.OK, storage.directUpload(content, listPath + ".1")); + Assert.assertEquals(Status.OK, storage.directUpload(content, listPath + ".2")); + Assert.assertEquals(Status.OK, storage.directUpload(content, listPath + ".3")); + Assert.assertEquals(Status.OK, storage.list(bucket + basePath + "_list/*", result)); + Assert.assertEquals(3, result.size()); + } + + @Test + public void makeDir() { + String path = bucket + basePath + "/test_path"; + Assert.assertEquals(Status.OK, storage.makeDir(path)); + Assert.assertNotEquals(Status.OK, storage.checkPathExist(path)); + String path1 = bucket + basePath + "/test_path1/"; + Assert.assertEquals(Status.OK, storage.makeDir(path1)); + Assert.assertEquals(Status.OK, storage.checkPathExist(path1)); + } + + @Test + public void checkPathExist() { + Status status = storage.checkPathExist(testFile); + Assert.assertEquals(Status.OK, status); + status = storage.checkPathExist(testFile + ".NOT_EXIST"); + Assert.assertEquals(Status.ErrCode.NOT_FOUND, status.getErrCode()); + } +} \ No newline at end of file diff --git a/fe/fe-core/src/test/java/org/apache/doris/common/util/S3URITest.java b/fe/fe-core/src/test/java/org/apache/doris/common/util/S3URITest.java new file mode 100644 index 00000000000000..0d87235f585475 --- /dev/null +++ b/fe/fe-core/src/test/java/org/apache/doris/common/util/S3URITest.java @@ -0,0 +1,61 @@ +package org.apache.doris.common.util; + +import org.junit.Test; + +import org.junit.Assert; + +public class S3URITest { + @Test + public void testLocationParsing() { + String p1 = "s3://bucket/path/to/file"; + S3URI uri1 = new S3URI(p1); + + Assert.assertEquals("bucket", uri1.getBucket()); + Assert.assertEquals("path/to/file", uri1.getKey()); + Assert.assertEquals(p1, uri1.toString()); + } + @Test + public void testPathLocationParsing() { + String p1 = "s3://bucket/path/"; + S3URI uri1 = new S3URI(p1); + + Assert.assertEquals("bucket", uri1.getBucket()); + Assert.assertEquals("path/", uri1.getKey()); + Assert.assertEquals(p1, uri1.toString()); + } + + @Test + public void testEncodedString() { + String p1 = "s3://bucket/path%20to%20file"; + S3URI uri1 = new S3URI(p1); + + Assert.assertEquals("bucket", uri1.getBucket()); + Assert.assertEquals("path%20to%20file", uri1.getKey()); + Assert.assertEquals(p1, uri1.toString()); + } + + @Test(expected = IllegalStateException.class) + public void missingKey() { + new S3URI("https://bucket/"); + } + + @Test(expected = IllegalStateException.class) + public void relativePathing() { + new S3URI("/path/to/file"); + } + + @Test(expected = IllegalStateException.class) + public void invalidScheme() { + new S3URI("ftp://bucket/"); + } + + @Test + public void testQueryAndFragment() { + String p1 = "s3://bucket/path/to/file?query=foo#bar"; + S3URI uri1 = new S3URI(p1); + + Assert.assertEquals("bucket", uri1.getBucket()); + Assert.assertEquals("path/to/file", uri1.getKey()); + Assert.assertEquals(p1, uri1.toString()); + } +} \ No newline at end of file diff --git a/fe/pom.xml b/fe/pom.xml index 7cd488f2c381c0..91288340d7bb53 100644 --- a/fe/pom.xml +++ b/fe/pom.xml @@ -604,8 +604,14 @@ under the License. org.apache.hadoop hadoop-common - 2.6.5 + 2.7.3 provided + + + jdk.tools + jdk.tools + + @@ -656,6 +662,15 @@ under the License. 2.4.5 provided + + + + org.projectlombok + lombok + 1.18.16 + provided + + diff --git a/gensrc/thrift/AgentService.thrift b/gensrc/thrift/AgentService.thrift index f7cf3186e75ec2..10ee99533aac70 100644 --- a/gensrc/thrift/AgentService.thrift +++ b/gensrc/thrift/AgentService.thrift @@ -186,6 +186,8 @@ struct TUploadReq { 2: required map src_dest_map 3: required Types.TNetworkAddress broker_addr 4: optional map broker_prop + 5: optional Types.TStorageBackendType storage_backend = Types.TStorageBackendType.BROKER + } struct TDownloadReq { @@ -193,6 +195,7 @@ struct TDownloadReq { 2: required map src_dest_map 3: required Types.TNetworkAddress broker_addr 4: optional map broker_prop + 5: optional Types.TStorageBackendType storage_backend = Types.TStorageBackendType.BROKER } struct TSnapshotRequest { diff --git a/gensrc/thrift/Types.thrift b/gensrc/thrift/Types.thrift index 194cf30bdeec12..b872eb255c33cb 100644 --- a/gensrc/thrift/Types.thrift +++ b/gensrc/thrift/Types.thrift @@ -84,6 +84,13 @@ enum TTypeNodeType { STRUCT } +enum TStorageBackendType { + BROKER, + S3, + HDFS, + LOCAL +} + struct TScalarType { 1: required TPrimitiveType type @@ -369,6 +376,8 @@ enum TFileType { FILE_LOCAL, FILE_BROKER, FILE_STREAM, // file content is streaming in the buffer + FILE_S3, + FILE_HDFS, } struct TTabletCommitInfo { diff --git a/thirdparty/build-thirdparty.sh b/thirdparty/build-thirdparty.sh index a13ce3fe60d6b5..7ce36d7ea76a0f 100755 --- a/thirdparty/build-thirdparty.sh +++ b/thirdparty/build-thirdparty.sh @@ -813,13 +813,13 @@ build_croaringbitmap build_orc build_cctz build_tsan_header -# build_aws_c_common -# build_aws_s2n -# build_aws_c_cal -# build_aws_c_io -# build_aws_checksums -# build_aws_c_event_stream -# build_aws_sdk +build_aws_c_common +build_aws_s2n +build_aws_c_cal +build_aws_c_io +build_aws_checksums +build_aws_c_event_stream +build_aws_sdk build_js_and_css echo "Finihsed to build all thirdparties" diff --git a/thirdparty/download-thirdparty.sh b/thirdparty/download-thirdparty.sh index 7b7995879ddd51..f2799659570ce3 100755 --- a/thirdparty/download-thirdparty.sh +++ b/thirdparty/download-thirdparty.sh @@ -200,7 +200,7 @@ do exit 1 fi elif [[ "${!NAME}" =~ $SUFFIX_ZIP ]]; then - if ! $UNZIP_CMD "$TP_SOURCE_DIR/${!NAME}" -d "$TP_SOURCE_DIR/"; then + if ! $UNZIP_CMD -qq "$TP_SOURCE_DIR/${!NAME}" -d "$TP_SOURCE_DIR/"; then echo "Failed to unzip ${!NAME}" exit 1 fi