diff --git a/cpp/src/arrow/filesystem/s3fs.cc b/cpp/src/arrow/filesystem/s3fs.cc index 4336b783307..70b66938fe6 100644 --- a/cpp/src/arrow/filesystem/s3fs.cc +++ b/cpp/src/arrow/filesystem/s3fs.cc @@ -91,6 +91,7 @@ #include "arrow/util/key_value_metadata.h" #include "arrow/util/logging.h" #include "arrow/util/optional.h" +#include "arrow/util/string.h" #include "arrow/util/task_group.h" #include "arrow/util/thread_pool.h" @@ -353,6 +354,12 @@ Result S3Options::FromUri(const Uri& uri, std::string* out_path) { options.scheme = kv.second; } else if (kv.first == "endpoint_override") { options.endpoint_override = kv.second; + } else if (kv.first == "allow_bucket_creation") { + ARROW_ASSIGN_OR_RAISE(options.allow_bucket_creation, + ::arrow::internal::ParseBoolean(kv.second)); + } else if (kv.first == "allow_bucket_deletion") { + ARROW_ASSIGN_OR_RAISE(options.allow_bucket_deletion, + ::arrow::internal::ParseBoolean(kv.second)); } else { return Status::Invalid("Unexpected query parameter in S3 URI: '", kv.first, "'"); } @@ -376,6 +383,8 @@ Result S3Options::FromUri(const std::string& uri_string, bool S3Options::Equals(const S3Options& other) const { return (region == other.region && endpoint_override == other.endpoint_override && scheme == other.scheme && background_writes == other.background_writes && + allow_bucket_creation == other.allow_bucket_creation && + allow_bucket_deletion == other.allow_bucket_deletion && credentials_kind == other.credentials_kind && proxy_options.Equals(other.proxy_options) && GetAccessKey() == other.GetAccessKey() && @@ -1677,6 +1686,27 @@ class S3FileSystem::Impl : public std::enable_shared_from_thisHeadBucket(req); + + if (outcome.IsSuccess()) { + return Status::OK(); + } else if (!IsNotFound(outcome.GetError())) { + return ErrorToStatus( + std::forward_as_tuple("When creating bucket '", bucket, "': "), + outcome.GetError()); + } + + if (!options().allow_bucket_creation) { + return Status::IOError( + "Bucket '", bucket, "' not found. ", + "To create buckets, enable the allow_bucket_creation option."); + } + } + S3Model::CreateBucketConfiguration config; S3Model::CreateBucketRequest req; auto _region = region(); @@ -2373,13 +2403,16 @@ Status S3FileSystem::DeleteDir(const std::string& s) { return Status::NotImplemented("Cannot delete all S3 buckets"); } RETURN_NOT_OK(impl_->DeleteDirContentsAsync(path.bucket, path.key).status()); - if (path.key.empty()) { + if (path.key.empty() && options().allow_bucket_deletion) { // Delete bucket S3Model::DeleteBucketRequest req; req.SetBucket(ToAwsString(path.bucket)); return OutcomeToStatus( std::forward_as_tuple("When deleting bucket '", path.bucket, "': "), impl_->client_->DeleteBucket(req)); + } else if (path.key.empty()) { + return Status::IOError("Would delete bucket '", path.bucket, "'. ", + "To delete buckets, enable the allow_bucket_deletion option."); } else { // Delete "directory" RETURN_NOT_OK(impl_->DeleteObject(path.bucket, path.key + kSep)); diff --git a/cpp/src/arrow/filesystem/s3fs.h b/cpp/src/arrow/filesystem/s3fs.h index c2c99e964e6..fa1fdb96716 100644 --- a/cpp/src/arrow/filesystem/s3fs.h +++ b/cpp/src/arrow/filesystem/s3fs.h @@ -130,6 +130,17 @@ struct ARROW_EXPORT S3Options { /// Whether OutputStream writes will be issued in the background, without blocking. bool background_writes = true; + /// Whether to allow creation of buckets + /// + /// When S3FileSystem creates new buckets, it does not pass any non-default settings. + /// In AWS S3, the bucket and all objects will be not publicly visible, and there + /// will be no bucket policies and no resource tags. To have more control over how + /// buckets are created, use a different API to create them. + bool allow_bucket_creation = false; + + /// Whether to allow deletion of buckets + bool allow_bucket_deletion = false; + /// \brief Default metadata for OpenOutputStream. /// /// This will be ignored if non-empty metadata is passed to OpenOutputStream. diff --git a/cpp/src/arrow/filesystem/s3fs_test.cc b/cpp/src/arrow/filesystem/s3fs_test.cc index b44bc7f50cd..7216af297a0 100644 --- a/cpp/src/arrow/filesystem/s3fs_test.cc +++ b/cpp/src/arrow/filesystem/s3fs_test.cc @@ -405,6 +405,9 @@ class TestS3FS : public S3TestMixin { public: void SetUp() override { S3TestMixin::SetUp(); + // Most tests will create buckets + options_.allow_bucket_creation = true; + options_.allow_bucket_deletion = true; MakeFileSystem(); // Set up test bucket { @@ -1124,6 +1127,25 @@ TEST_F(TestS3FS, FileSystemFromUri) { AssertFileInfo(fs.get(), path, FileType::File, 8); } +TEST_F(TestS3FS, NoCreateDeleteBucket) { + // Create a bucket to try deleting + ASSERT_OK(fs_->CreateDir("test-no-delete")); + + options_.allow_bucket_creation = false; + options_.allow_bucket_deletion = false; + MakeFileSystem(); + + auto maybe_create_dir = fs_->CreateDir("test-no-create"); + ASSERT_RAISES(IOError, maybe_create_dir); + ASSERT_THAT(maybe_create_dir.message(), + ::testing::HasSubstr("Bucket 'test-no-create' not found")); + + auto maybe_delete_dir = fs_->DeleteDir("test-no-delete"); + ASSERT_RAISES(IOError, maybe_delete_dir); + ASSERT_THAT(maybe_delete_dir.message(), + ::testing::HasSubstr("Would delete bucket 'test-no-delete'")); +} + // Simple retry strategy that records errors encountered and its emitted retry delays class TestRetryStrategy : public S3RetryStrategy { public: diff --git a/cpp/src/arrow/util/string.cc b/cpp/src/arrow/util/string.cc index d922311df1c..3a158600552 100644 --- a/cpp/src/arrow/util/string.cc +++ b/cpp/src/arrow/util/string.cc @@ -187,5 +187,15 @@ util::optional Replace(util::string_view s, util::string_view token s.substr(token_start + token.size()).to_string(); } +Result ParseBoolean(util::string_view value) { + if (AsciiEqualsCaseInsensitive(value, "true") || value == "1") { + return true; + } else if (AsciiEqualsCaseInsensitive(value, "false") || value == "0") { + return false; + } else { + return Status::Invalid("String is not a valid boolean value: '", value, "'."); + } +} + } // namespace internal } // namespace arrow diff --git a/cpp/src/arrow/util/string.h b/cpp/src/arrow/util/string.h index 68b8a54e313..d2c8ac38eec 100644 --- a/cpp/src/arrow/util/string.h +++ b/cpp/src/arrow/util/string.h @@ -20,6 +20,7 @@ #include #include +#include "arrow/result.h" #include "arrow/util/optional.h" #include "arrow/util/string_view.h" #include "arrow/util/visibility.h" @@ -75,5 +76,12 @@ ARROW_EXPORT util::optional Replace(util::string_view s, util::string_view token, util::string_view replacement); +/// \brief Get boolean value from string +/// +/// If "1", "true" (case-insensitive), returns true +/// If "0", "false" (case-insensitive), returns false +/// Otherwise, returns Status::Invalid +ARROW_EXPORT +arrow::Result ParseBoolean(util::string_view value); } // namespace internal } // namespace arrow diff --git a/python/pyarrow/_s3fs.pyx b/python/pyarrow/_s3fs.pyx index 4d919c8b9d6..c71acc47840 100644 --- a/python/pyarrow/_s3fs.pyx +++ b/python/pyarrow/_s3fs.pyx @@ -99,6 +99,12 @@ cdef class S3FileSystem(FileSystem): Note: S3 buckets are special and the operations available on them may be limited or more expensive than desired. + When S3FileSystem creates new buckets (assuming allow_bucket_creation is + True), it does not pass any non-default settings. In AWS S3, the bucket and + all objects will be not publicly visible, and will have no bucket policies + and no resource tags. To have more control over how buckets are created, + use a different API to create them. + Parameters ---------- access_key : str, default None @@ -150,6 +156,12 @@ cdef class S3FileSystem(FileSystem): S3FileSystem(proxy_options={'scheme': 'http', 'host': 'localhost', 'port': 8020, 'username': 'username', 'password': 'password'}) + allow_bucket_creation : bool, default False + Whether to allow CreateDir at the bucket-level. This option may also be + passed in a URI query parameter. + allow_bucket_deletion : bool, default False + Whether to allow DeleteDir at the bucket-level. This option may also be + passed in a URI query parameter. """ cdef: @@ -159,7 +171,8 @@ cdef class S3FileSystem(FileSystem): bint anonymous=False, region=None, scheme=None, endpoint_override=None, bint background_writes=True, default_metadata=None, role_arn=None, session_name=None, - external_id=None, load_frequency=900, proxy_options=None): + external_id=None, load_frequency=900, proxy_options=None, + allow_bucket_creation=False, allow_bucket_deletion=False): cdef: CS3Options options shared_ptr[CS3FileSystem] wrapped @@ -253,6 +266,9 @@ cdef class S3FileSystem(FileSystem): "'proxy_options': expected 'dict' or 'str', " f"got {type(proxy_options)} instead.") + options.allow_bucket_creation = allow_bucket_creation + options.allow_bucket_deletion = allow_bucket_deletion + with nogil: wrapped = GetResultValue(CS3FileSystem.Make(options)) @@ -295,6 +311,8 @@ cdef class S3FileSystem(FileSystem): external_id=frombytes(opts.external_id), load_frequency=opts.load_frequency, background_writes=opts.background_writes, + allow_bucket_creation=opts.allow_bucket_creation, + allow_bucket_deletion=opts.allow_bucket_deletion, default_metadata=pyarrow_wrap_metadata(opts.default_metadata), proxy_options={'scheme': frombytes(opts.proxy_options.scheme), 'host': frombytes(opts.proxy_options.host), @@ -302,7 +320,7 @@ cdef class S3FileSystem(FileSystem): 'username': frombytes( opts.proxy_options.username), 'password': frombytes( - opts.proxy_options.password)} + opts.proxy_options.password)}, ),) ) diff --git a/python/pyarrow/includes/libarrow_fs.pxd b/python/pyarrow/includes/libarrow_fs.pxd index e491233e88f..1498e413094 100644 --- a/python/pyarrow/includes/libarrow_fs.pxd +++ b/python/pyarrow/includes/libarrow_fs.pxd @@ -155,6 +155,8 @@ cdef extern from "arrow/filesystem/api.h" namespace "arrow::fs" nogil: c_string endpoint_override c_string scheme c_bool background_writes + c_bool allow_bucket_creation + c_bool allow_bucket_deletion shared_ptr[const CKeyValueMetadata] default_metadata c_string role_arn c_string session_name diff --git a/python/pyarrow/tests/test_dataset.py b/python/pyarrow/tests/test_dataset.py index d2210c4b6c1..8873918c012 100644 --- a/python/pyarrow/tests/test_dataset.py +++ b/python/pyarrow/tests/test_dataset.py @@ -2495,6 +2495,7 @@ def s3_example_simple(s3_server): host, port, access_key, secret_key = s3_server['connection'] uri = ( "s3://{}:{}@mybucket/data.parquet?scheme=http&endpoint_override={}:{}" + "&allow_bucket_creation=True" .format(access_key, secret_key, host, port) ) @@ -2557,9 +2558,10 @@ def test_open_dataset_from_s3_with_filesystem_uri(s3_server): host, port, access_key, secret_key = s3_server['connection'] bucket = 'theirbucket' path = 'nested/folder/data.parquet' - uri = "s3://{}:{}@{}/{}?scheme=http&endpoint_override={}:{}".format( - access_key, secret_key, bucket, path, host, port - ) + uri = "s3://{}:{}@{}/{}?scheme=http&endpoint_override={}:{}"\ + "&allow_bucket_creation=true".format( + access_key, secret_key, bucket, path, host, port + ) fs, path = FileSystem.from_uri(uri) assert path == 'theirbucket/nested/folder/data.parquet' @@ -4529,9 +4531,38 @@ def test_write_dataset_s3_put_only(s3_server): ).to_table() assert result.equals(table) + # Passing create_dir is fine if the bucket already exists + ds.write_dataset( + table, "existing-bucket", filesystem=fs, + format="feather", create_dir=True, partitioning=part, + existing_data_behavior='overwrite_or_ignore' + ) + # check roundtrip + result = ds.dataset( + "existing-bucket", filesystem=fs, format="ipc", partitioning="hive" + ).to_table() + assert result.equals(table) + + # Error enforced by filesystem + with pytest.raises(OSError, + match="Bucket 'non-existing-bucket' not found"): + ds.write_dataset( + table, "non-existing-bucket", filesystem=fs, + format="feather", create_dir=True, + existing_data_behavior='overwrite_or_ignore' + ) + + # Error enforced by minio / S3 service + fs = S3FileSystem( + access_key='limited', + secret_key='limited123', + endpoint_override='{}:{}'.format(host, port), + scheme='http', + allow_bucket_creation=True, + ) with pytest.raises(OSError, match="Access Denied"): ds.write_dataset( - table, "existing-bucket", filesystem=fs, + table, "non-existing-bucket", filesystem=fs, format="feather", create_dir=True, existing_data_behavior='overwrite_or_ignore' ) diff --git a/python/pyarrow/tests/test_fs.py b/python/pyarrow/tests/test_fs.py index 4fd72704a71..eb0617d350c 100644 --- a/python/pyarrow/tests/test_fs.py +++ b/python/pyarrow/tests/test_fs.py @@ -212,7 +212,9 @@ def s3fs(request, s3_server): access_key=access_key, secret_key=secret_key, endpoint_override='{}:{}'.format(host, port), - scheme='http' + scheme='http', + allow_bucket_creation=True, + allow_bucket_deletion=True ) fs.create_dir(bucket) @@ -443,6 +445,12 @@ def test_s3fs_limited_permissions_create_bucket(s3_server): ) fs.create_dir('existing-bucket/test') + with pytest.raises(pa.ArrowIOError, match="Bucket 'new-bucket' not found"): + fs.create_dir('new-bucket') + + with pytest.raises(pa.ArrowIOError, match="Would delete bucket"): + fs.delete_dir('existing-bucket') + def test_file_info_constructor(): dt = datetime.fromtimestamp(1568799826, timezone.utc) @@ -1036,6 +1044,10 @@ def test_s3_options(): assert isinstance(fs, S3FileSystem) assert pickle.loads(pickle.dumps(fs)) == fs + fs = S3FileSystem(allow_bucket_creation=True, allow_bucket_deletion=True) + assert isinstance(fs, S3FileSystem) + assert pickle.loads(pickle.dumps(fs)) == fs + with pytest.raises(ValueError): S3FileSystem(access_key='access') with pytest.raises(ValueError): @@ -1308,8 +1320,9 @@ def test_filesystem_from_uri_s3(s3_server): host, port, access_key, secret_key = s3_server['connection'] - uri = "s3://{}:{}@mybucket/foo/bar?scheme=http&endpoint_override={}:{}" \ - .format(access_key, secret_key, host, port) + uri = "s3://{}:{}@mybucket/foo/bar?scheme=http&endpoint_override={}:{}"\ + "&allow_bucket_creation=True" \ + .format(access_key, secret_key, host, port) fs, path = FileSystem.from_uri(uri) assert isinstance(fs, S3FileSystem) diff --git a/r/R/arrowExports.R b/r/R/arrowExports.R index 8ad56f227fa..4c579840e49 100644 --- a/r/R/arrowExports.R +++ b/r/R/arrowExports.R @@ -1280,8 +1280,8 @@ fs___CopyFiles <- function(source_fs, source_sel, destination_fs, destination_ba invisible(.Call(`_arrow_fs___CopyFiles`, source_fs, source_sel, destination_fs, destination_base_dir, chunk_size, use_threads)) } -fs___S3FileSystem__create <- function(anonymous, access_key, secret_key, session_token, role_arn, session_name, external_id, load_frequency, region, endpoint_override, scheme, proxy_options, background_writes) { - .Call(`_arrow_fs___S3FileSystem__create`, anonymous, access_key, secret_key, session_token, role_arn, session_name, external_id, load_frequency, region, endpoint_override, scheme, proxy_options, background_writes) +fs___S3FileSystem__create <- function(anonymous, access_key, secret_key, session_token, role_arn, session_name, external_id, load_frequency, region, endpoint_override, scheme, proxy_options, background_writes, allow_bucket_creation, allow_bucket_deletion) { + .Call(`_arrow_fs___S3FileSystem__create`, anonymous, access_key, secret_key, session_token, role_arn, session_name, external_id, load_frequency, region, endpoint_override, scheme, proxy_options, background_writes, allow_bucket_creation, allow_bucket_deletion) } fs___S3FileSystem__region <- function(fs) { diff --git a/r/R/filesystem.R b/r/R/filesystem.R index a6d845d4c91..b035430ff65 100644 --- a/r/R/filesystem.R +++ b/r/R/filesystem.R @@ -150,6 +150,10 @@ FileSelector$create <- function(base_dir, allow_not_found = FALSE, recursive = F #' - `scheme`: S3 connection transport (default "https") #' - `background_writes`: logical, whether `OutputStream` writes will be issued #' in the background, without blocking (default `TRUE`) +#' - `allow_bucket_creation`: logical, if TRUE, the filesystem will create +#' buckets if `$CreateDir()` is called on the bucket level (default `FALSE`). +#' - `allow_bucket_deletion`: logical, if TRUE, the filesystem will delete +#' buckets if`$DeleteDir()` is called on the bucket level (default `FALSE`). #' #' @section Methods: #' @@ -189,6 +193,15 @@ FileSelector$create <- function(base_dir, allow_not_found = FALSE, recursive = F #' - `$base_path`: for `SubTreeFileSystem`, the path in `$base_fs` which is considered #' root in this `SubTreeFileSystem`. #' +#' @section Notes: +#' +#' On S3FileSystem, `$CreateDir()` on a top-level directory creates a new bucket. +#' When S3FileSystem creates new buckets (assuming allow_bucket_creation is TRUE), +#' it does not pass any non-default settings. In AWS S3, the bucket and all +#' objects will be not publicly visible, and will have no bucket policies +#' and no resource tags. To have more control over how buckets are created, +#' use a different API to create them. +#' #' @usage NULL #' @format NULL #' @docType class @@ -338,7 +351,7 @@ S3FileSystem$create <- function(anonymous = FALSE, ...) { invalid_args <- intersect( c( "access_key", "secret_key", "session_token", "role_arn", "session_name", - "external_id", "load_frequency" + "external_id", "load_frequency", "allow_bucket_creation", "allow_bucket_deletion" ), names(args) ) @@ -383,7 +396,9 @@ default_s3_options <- list( endpoint_override = "", scheme = "", proxy_options = "", - background_writes = TRUE + background_writes = TRUE, + allow_bucket_creation = FALSE, + allow_bucket_deletion = FALSE ) #' Connect to an AWS S3 bucket diff --git a/r/man/FileSystem.Rd b/r/man/FileSystem.Rd index 2f3dcff670b..1ed01644650 100644 --- a/r/man/FileSystem.Rd +++ b/r/man/FileSystem.Rd @@ -50,6 +50,10 @@ that emulate S3. \item \code{scheme}: S3 connection transport (default "https") \item \code{background_writes}: logical, whether \code{OutputStream} writes will be issued in the background, without blocking (default \code{TRUE}) +\item \code{allow_bucket_creation}: logical, if TRUE, the filesystem will create +buckets if \verb{$CreateDir()} is called on the bucket level (default \code{FALSE}). +\item \code{allow_bucket_deletion}: logical, if TRUE, the filesystem will delete +buckets if\verb{$DeleteDir()} is called on the bucket level (default \code{FALSE}). } } @@ -97,3 +101,14 @@ root in this \code{SubTreeFileSystem}. } } +\section{Notes}{ + + +On S3FileSystem, \verb{$CreateDir()} on a top-level directory creates a new bucket. +When S3FileSystem creates new buckets (assuming allow_bucket_creation is TRUE), +it does not pass any non-default settings. In AWS S3, the bucket and all +objects will be not publicly visible, and will have no bucket policies +and no resource tags. To have more control over how buckets are created, +use a different API to create them. +} + diff --git a/r/src/arrowExports.cpp b/r/src/arrowExports.cpp index 2e3cf0f6055..887327d48f9 100644 --- a/r/src/arrowExports.cpp +++ b/r/src/arrowExports.cpp @@ -3180,8 +3180,8 @@ END_CPP11 } // filesystem.cpp #if defined(ARROW_R_WITH_S3) -std::shared_ptr fs___S3FileSystem__create(bool anonymous, std::string access_key, std::string secret_key, std::string session_token, std::string role_arn, std::string session_name, std::string external_id, int load_frequency, std::string region, std::string endpoint_override, std::string scheme, std::string proxy_options, bool background_writes); -extern "C" SEXP _arrow_fs___S3FileSystem__create(SEXP anonymous_sexp, SEXP access_key_sexp, SEXP secret_key_sexp, SEXP session_token_sexp, SEXP role_arn_sexp, SEXP session_name_sexp, SEXP external_id_sexp, SEXP load_frequency_sexp, SEXP region_sexp, SEXP endpoint_override_sexp, SEXP scheme_sexp, SEXP proxy_options_sexp, SEXP background_writes_sexp){ +std::shared_ptr fs___S3FileSystem__create(bool anonymous, std::string access_key, std::string secret_key, std::string session_token, std::string role_arn, std::string session_name, std::string external_id, int load_frequency, std::string region, std::string endpoint_override, std::string scheme, std::string proxy_options, bool background_writes, bool allow_bucket_creation, bool allow_bucket_deletion); +extern "C" SEXP _arrow_fs___S3FileSystem__create(SEXP anonymous_sexp, SEXP access_key_sexp, SEXP secret_key_sexp, SEXP session_token_sexp, SEXP role_arn_sexp, SEXP session_name_sexp, SEXP external_id_sexp, SEXP load_frequency_sexp, SEXP region_sexp, SEXP endpoint_override_sexp, SEXP scheme_sexp, SEXP proxy_options_sexp, SEXP background_writes_sexp, SEXP allow_bucket_creation_sexp, SEXP allow_bucket_deletion_sexp){ BEGIN_CPP11 arrow::r::Input::type anonymous(anonymous_sexp); arrow::r::Input::type access_key(access_key_sexp); @@ -3196,11 +3196,13 @@ BEGIN_CPP11 arrow::r::Input::type scheme(scheme_sexp); arrow::r::Input::type proxy_options(proxy_options_sexp); arrow::r::Input::type background_writes(background_writes_sexp); - return cpp11::as_sexp(fs___S3FileSystem__create(anonymous, access_key, secret_key, session_token, role_arn, session_name, external_id, load_frequency, region, endpoint_override, scheme, proxy_options, background_writes)); + arrow::r::Input::type allow_bucket_creation(allow_bucket_creation_sexp); + arrow::r::Input::type allow_bucket_deletion(allow_bucket_deletion_sexp); + return cpp11::as_sexp(fs___S3FileSystem__create(anonymous, access_key, secret_key, session_token, role_arn, session_name, external_id, load_frequency, region, endpoint_override, scheme, proxy_options, background_writes, allow_bucket_creation, allow_bucket_deletion)); END_CPP11 } #else -extern "C" SEXP _arrow_fs___S3FileSystem__create(SEXP anonymous_sexp, SEXP access_key_sexp, SEXP secret_key_sexp, SEXP session_token_sexp, SEXP role_arn_sexp, SEXP session_name_sexp, SEXP external_id_sexp, SEXP load_frequency_sexp, SEXP region_sexp, SEXP endpoint_override_sexp, SEXP scheme_sexp, SEXP proxy_options_sexp, SEXP background_writes_sexp){ +extern "C" SEXP _arrow_fs___S3FileSystem__create(SEXP anonymous_sexp, SEXP access_key_sexp, SEXP secret_key_sexp, SEXP session_token_sexp, SEXP role_arn_sexp, SEXP session_name_sexp, SEXP external_id_sexp, SEXP load_frequency_sexp, SEXP region_sexp, SEXP endpoint_override_sexp, SEXP scheme_sexp, SEXP proxy_options_sexp, SEXP background_writes_sexp, SEXP allow_bucket_creation_sexp, SEXP allow_bucket_deletion_sexp){ Rf_error("Cannot call fs___S3FileSystem__create(). See https://arrow.apache.org/docs/r/articles/install.html for help installing Arrow C++ libraries. "); } #endif @@ -5432,7 +5434,7 @@ static const R_CallMethodDef CallEntries[] = { { "_arrow_fs___SubTreeFileSystem__base_path", (DL_FUNC) &_arrow_fs___SubTreeFileSystem__base_path, 1}, { "_arrow_fs___FileSystemFromUri", (DL_FUNC) &_arrow_fs___FileSystemFromUri, 1}, { "_arrow_fs___CopyFiles", (DL_FUNC) &_arrow_fs___CopyFiles, 6}, - { "_arrow_fs___S3FileSystem__create", (DL_FUNC) &_arrow_fs___S3FileSystem__create, 13}, + { "_arrow_fs___S3FileSystem__create", (DL_FUNC) &_arrow_fs___S3FileSystem__create, 15}, { "_arrow_fs___S3FileSystem__region", (DL_FUNC) &_arrow_fs___S3FileSystem__region, 1}, { "_arrow_io___Readable__Read", (DL_FUNC) &_arrow_io___Readable__Read, 2}, { "_arrow_io___InputStream__Close", (DL_FUNC) &_arrow_io___InputStream__Close, 1}, diff --git a/r/src/filesystem.cpp b/r/src/filesystem.cpp index c28f52631a2..bcafef34e41 100644 --- a/r/src/filesystem.cpp +++ b/r/src/filesystem.cpp @@ -282,7 +282,8 @@ std::shared_ptr fs___S3FileSystem__create( std::string session_token = "", std::string role_arn = "", std::string session_name = "", std::string external_id = "", int load_frequency = 900, std::string region = "", std::string endpoint_override = "", std::string scheme = "", - std::string proxy_options = "", bool background_writes = true) { + std::string proxy_options = "", bool background_writes = true, + bool allow_bucket_creation = false, bool allow_bucket_deletion = false) { // We need to ensure that S3 is initialized before we start messing with the // options StopIfNotOk(fs::EnsureS3Initialized()); @@ -321,6 +322,9 @@ std::shared_ptr fs___S3FileSystem__create( /// default true s3_opts.background_writes = background_writes; + s3_opts.allow_bucket_creation = allow_bucket_creation; + s3_opts.allow_bucket_deletion = allow_bucket_deletion; + auto io_context = arrow::io::IOContext(gc_memory_pool()); return ValueOrStop(fs::S3FileSystem::Make(s3_opts, io_context)); } diff --git a/r/tests/testthat/test-s3-minio.R b/r/tests/testthat/test-s3-minio.R index 51db355783b..ad11d04d5e9 100644 --- a/r/tests/testthat/test-s3-minio.R +++ b/r/tests/testthat/test-s3-minio.R @@ -34,7 +34,17 @@ if (arrow_with_s3() && process_is_running("minio server")) { access_key = minio_key, secret_key = minio_secret, scheme = "http", - endpoint_override = paste0("localhost:", minio_port) + endpoint_override = paste0("localhost:", minio_port), + allow_bucket_creation = TRUE, + allow_bucket_deletion = TRUE + ) + limited_fs <- S3FileSystem$create( + access_key = minio_key, + secret_key = minio_secret, + scheme = "http", + endpoint_override = paste0("localhost:", minio_port), + allow_bucket_creation = FALSE, + allow_bucket_deletion = FALSE ) now <- as.character(as.numeric(Sys.time())) fs$CreateDir(now) @@ -180,6 +190,14 @@ if (arrow_with_s3() && process_is_running("minio server")) { expect_length(fs$ls(minio_path("new_dataset_dir")), 1) }) + test_that("CreateDir fails on bucket if allow_bucket_creation=False", { + now_tmp <- paste0(now, "-test-fail-delete") + fs$CreateDir(now_tmp) + + expect_error(limited_fs$CreateDir("should-fail")) + expect_error(limited_fs$DeleteDir(now_tmp)) + }) + test_that("Let's test copy_files too", { td <- make_temp_dir() copy_files(minio_uri("hive_dir"), td)