From c594032fe15ae64622ce37157a6e3b574c5aceba Mon Sep 17 00:00:00 2001 From: Carlos O'Ryan Date: Thu, 30 Sep 2021 13:13:22 +0000 Subject: [PATCH 01/10] ARROW-14236: [C++] add GCS testbench for testing --- ci/docker/conda.dockerfile | 2 + ci/docker/debian-10-cpp.dockerfile | 3 + ci/docker/debian-11-cpp.dockerfile | 3 + ci/docker/fedora-33-cpp.dockerfile | 3 + ci/docker/linux-apt-r.dockerfile | 2 + ci/docker/linux-r.dockerfile | 1 + ci/docker/ubuntu-20.04-cpp.dockerfile | 3 + ci/docker/ubuntu-20.10-cpp.dockerfile | 3 + ci/docker/ubuntu-21.04-cpp.dockerfile | 3 + ci/scripts/install_gcs_testbench.sh | 32 ++++++++ ci/scripts/r_docker_configure.sh | 4 + cpp/src/arrow/filesystem/CMakeLists.txt | 1 - cpp/src/arrow/filesystem/gcsfs.cc | 103 +++++++++++++++++++++++- cpp/src/arrow/filesystem/gcsfs_test.cc | 57 +++++++++++++ cpp/vcpkg.json | 1 + docs/source/developers/docker.rst | 1 + 16 files changed, 220 insertions(+), 2 deletions(-) create mode 100755 ci/scripts/install_gcs_testbench.sh diff --git a/ci/docker/conda.dockerfile b/ci/docker/conda.dockerfile index 2e773b5437e..a27b90bfbfc 100644 --- a/ci/docker/conda.dockerfile +++ b/ci/docker/conda.dockerfile @@ -33,9 +33,11 @@ ENV PATH=${prefix}/bin:$PATH # install conda and minio COPY ci/scripts/install_conda.sh \ ci/scripts/install_minio.sh \ + ci/scripts/install_gcs_testbench.sh \ /arrow/ci/scripts/ RUN /arrow/ci/scripts/install_conda.sh ${arch} linux latest ${prefix} RUN /arrow/ci/scripts/install_minio.sh ${arch} linux latest ${prefix} +RUN /arrow/ci/scripts/install_gcs_testbench.sh default # create a conda environment ADD ci/conda_env_unix.txt /arrow/ci/ diff --git a/ci/docker/debian-10-cpp.dockerfile b/ci/docker/debian-10-cpp.dockerfile index d99a2c161bd..459c06710e4 100644 --- a/ci/docker/debian-10-cpp.dockerfile +++ b/ci/docker/debian-10-cpp.dockerfile @@ -65,6 +65,7 @@ RUN apt-get update -y -q && \ ninja-build \ pkg-config \ protobuf-compiler \ + python3-pip \ rapidjson-dev \ tzdata \ zlib1g-dev && \ @@ -72,8 +73,10 @@ RUN apt-get update -y -q && \ rm -rf /var/lib/apt/lists/* COPY ci/scripts/install_minio.sh \ + ci/scripts/install_gcs_testbench.sh \ /arrow/ci/scripts/ RUN /arrow/ci/scripts/install_minio.sh ${arch} linux latest /usr/local +RUN /arrow/ci/scripts/install_gcs_testbench.sh default ENV ARROW_BUILD_TESTS=ON \ ARROW_DATASET=ON \ diff --git a/ci/docker/debian-11-cpp.dockerfile b/ci/docker/debian-11-cpp.dockerfile index 96c1a1738f7..750be3422bd 100644 --- a/ci/docker/debian-11-cpp.dockerfile +++ b/ci/docker/debian-11-cpp.dockerfile @@ -63,6 +63,7 @@ RUN apt-get update -y -q && \ ninja-build \ pkg-config \ protobuf-compiler-grpc \ + python3-pip \ rapidjson-dev \ tzdata \ zlib1g-dev && \ @@ -70,8 +71,10 @@ RUN apt-get update -y -q && \ rm -rf /var/lib/apt/lists/* COPY ci/scripts/install_minio.sh \ + ci/scripts/install_gcs_testbench.sh \ /arrow/ci/scripts/ RUN /arrow/ci/scripts/install_minio.sh ${arch} linux latest /usr/local +RUN /arrow/ci/scripts/install_gcs_testbench.sh default ENV ARROW_BUILD_TESTS=ON \ ARROW_DATASET=ON \ diff --git a/ci/docker/fedora-33-cpp.dockerfile b/ci/docker/fedora-33-cpp.dockerfile index 9dde6999510..faf7d407ea1 100644 --- a/ci/docker/fedora-33-cpp.dockerfile +++ b/ci/docker/fedora-33-cpp.dockerfile @@ -53,6 +53,7 @@ RUN dnf update -y && \ openssl-devel \ protobuf-devel \ python \ + python-pip \ rapidjson-devel \ re2-devel \ snappy-devel \ @@ -63,8 +64,10 @@ RUN dnf update -y && \ zlib-devel COPY ci/scripts/install_minio.sh \ + ci/scripts/install_gcs_testbench.sh \ /arrow/ci/scripts/ RUN /arrow/ci/scripts/install_minio.sh ${arch} linux latest /usr/local +RUN /arrow/ci/scripts/install_gcs_testbench.sh default ENV ARROW_BUILD_TESTS=ON \ ARROW_DEPENDENCY_SOURCE=SYSTEM \ diff --git a/ci/docker/linux-apt-r.dockerfile b/ci/docker/linux-apt-r.dockerfile index 3a84a11c9b6..4c192cb8309 100644 --- a/ci/docker/linux-apt-r.dockerfile +++ b/ci/docker/linux-apt-r.dockerfile @@ -86,8 +86,10 @@ COPY r/DESCRIPTION /arrow/r/ RUN /arrow/ci/scripts/r_deps.sh /arrow COPY ci/scripts/install_minio.sh \ + ci/scripts/install_gcs_testbench.sh \ /arrow/ci/scripts/ RUN /arrow/ci/scripts/install_minio.sh ${arch} linux latest /usr/local +RUN /arrow/ci/scripts/install_gcs_testbench.sh default # Set up Python 3 and its dependencies RUN ln -s /usr/bin/python3 /usr/local/bin/python && \ diff --git a/ci/docker/linux-r.dockerfile b/ci/docker/linux-r.dockerfile index a501d69955c..568b90c227f 100644 --- a/ci/docker/linux-r.dockerfile +++ b/ci/docker/linux-r.dockerfile @@ -40,6 +40,7 @@ ENV PATH "${RPREFIX}/bin:${PATH}" COPY ci/scripts/r_docker_configure.sh /arrow/ci/scripts/ COPY ci/etc/rprofile /arrow/ci/etc/ COPY ci/scripts/install_minio.sh /arrow/ci/scripts/ +COPY ci/scripts/install_gcs_testbench.sh /arrow/ci/scripts/ RUN /arrow/ci/scripts/r_docker_configure.sh COPY ci/scripts/r_deps.sh /arrow/ci/scripts/ diff --git a/ci/docker/ubuntu-20.04-cpp.dockerfile b/ci/docker/ubuntu-20.04-cpp.dockerfile index 35658b28a86..55568ec264f 100644 --- a/ci/docker/ubuntu-20.04-cpp.dockerfile +++ b/ci/docker/ubuntu-20.04-cpp.dockerfile @@ -88,6 +88,7 @@ RUN apt-get update -y -q && \ ninja-build \ pkg-config \ protobuf-compiler \ + python3-pip \ rapidjson-dev \ tzdata \ wget && \ @@ -95,8 +96,10 @@ RUN apt-get update -y -q && \ rm -rf /var/lib/apt/lists* COPY ci/scripts/install_minio.sh \ + ci/scripts/install_gcs_testbench.sh \ /arrow/ci/scripts/ RUN /arrow/ci/scripts/install_minio.sh ${arch} linux latest /usr/local +RUN /arrow/ci/scripts/install_gcs_testbench.sh default # Prioritize system packages and local installation # The following dependencies will be downloaded due to missing/invalid packages diff --git a/ci/docker/ubuntu-20.10-cpp.dockerfile b/ci/docker/ubuntu-20.10-cpp.dockerfile index 6cefecfd678..680f2abe16b 100644 --- a/ci/docker/ubuntu-20.10-cpp.dockerfile +++ b/ci/docker/ubuntu-20.10-cpp.dockerfile @@ -90,6 +90,7 @@ RUN apt-get update -y -q && \ pkg-config \ protobuf-compiler \ protobuf-compiler-grpc \ + python3-pip \ rapidjson-dev \ tzdata \ wget && \ @@ -97,8 +98,10 @@ RUN apt-get update -y -q && \ rm -rf /var/lib/apt/lists* COPY ci/scripts/install_minio.sh \ + ci/scripts/install_gcs_testbench.sh \ /arrow/ci/scripts/ RUN /arrow/ci/scripts/install_minio.sh ${arch} linux latest /usr/local +RUN /arrow/ci/scripts/install_gcs_testbench.sh default # Prioritize system packages and local installation # The following dependencies will be downloaded due to missing/invalid packages diff --git a/ci/docker/ubuntu-21.04-cpp.dockerfile b/ci/docker/ubuntu-21.04-cpp.dockerfile index 18c377811bc..5e91440eb6f 100644 --- a/ci/docker/ubuntu-21.04-cpp.dockerfile +++ b/ci/docker/ubuntu-21.04-cpp.dockerfile @@ -88,6 +88,7 @@ RUN apt-get update -y -q && \ pkg-config \ protobuf-compiler \ protobuf-compiler-grpc \ + python3-pip \ rapidjson-dev \ tzdata \ wget && \ @@ -95,8 +96,10 @@ RUN apt-get update -y -q && \ rm -rf /var/lib/apt/lists* COPY ci/scripts/install_minio.sh \ + ci/scripts/install_gcs_testbench.sh \ /arrow/ci/scripts/ RUN /arrow/ci/scripts/install_minio.sh ${arch} linux latest /usr/local +RUN /arrow/ci/scripts/install_gcs_testbench.sh default # Prioritize system packages and local installation # The following dependencies will be downloaded due to missing/invalid packages diff --git a/ci/scripts/install_gcs_testbench.sh b/ci/scripts/install_gcs_testbench.sh new file mode 100755 index 00000000000..40d3178b4a2 --- /dev/null +++ b/ci/scripts/install_gcs_testbench.sh @@ -0,0 +1,32 @@ +#!/usr/bin/env bash +# +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +set -e + +if [ "$#" -ne 1 ]; then + echo "Usage: $0 " + exit 1 +fi + +version=$1 +if [[ "${version}" -eq "default" ]]; then + version="v0.7.0" +fi + +pip install "git+https://github.com/googleapis/storage-testbench@${version}" diff --git a/ci/scripts/r_docker_configure.sh b/ci/scripts/r_docker_configure.sh index d138d030eca..34a9eb84726 100755 --- a/ci/scripts/r_docker_configure.sh +++ b/ci/scripts/r_docker_configure.sh @@ -71,6 +71,10 @@ if [ "$ARROW_S3" == "ON" ] || [ "$ARROW_R_DEV" == "TRUE" ]; then if [ -f "/arrow/ci/scripts/install_minio.sh" ] && [ "`which wget`" ]; then /arrow/ci/scripts/install_minio.sh amd64 linux latest /usr/local fi + + if [ -f "/arrow/ci/scripts/install_gcs_testbench.sh"] && [ "`which pip`" ]; then + /arrow/ci/scripts/install_gcs_testbench.sh default + fi fi # Workaround for html help install failure; see https://github.com/r-lib/devtools/issues/2084#issuecomment-530912786 diff --git a/cpp/src/arrow/filesystem/CMakeLists.txt b/cpp/src/arrow/filesystem/CMakeLists.txt index 67ebe54895c..c6e50ed59c6 100644 --- a/cpp/src/arrow/filesystem/CMakeLists.txt +++ b/cpp/src/arrow/filesystem/CMakeLists.txt @@ -31,7 +31,6 @@ add_arrow_test(filesystem-test if(ARROW_GCS) add_arrow_test(gcsfs_test EXTRA_LABELS filesystem) endif() - if(ARROW_S3) add_arrow_test(s3fs_test EXTRA_LABELS filesystem) if(TARGET arrow-s3fs-test) diff --git a/cpp/src/arrow/filesystem/gcsfs.cc b/cpp/src/arrow/filesystem/gcsfs.cc index 58bbbbfd06c..5eb57c14056 100644 --- a/cpp/src/arrow/filesystem/gcsfs.cc +++ b/cpp/src/arrow/filesystem/gcsfs.cc @@ -27,6 +27,39 @@ namespace arrow { namespace fs { +namespace { + +auto constexpr kSep = '/'; + +struct GcsPath { + std::string full_path; + std::string bucket; + std::string object; + + static Result FromString(const std::string& s) { + const auto src = internal::RemoveTrailingSlash(s); + auto const first_sep = src.find_first_of(kSep); + if (first_sep == 0) { + return Status::Invalid("Path cannot start with a separator ('", s, "')"); + } + if (first_sep == std::string::npos) { + return GcsPath{std::string(src), std::string(src), ""}; + } + GcsPath path; + path.full_path = std::string(src); + path.bucket = std::string(src.substr(0, first_sep)); + path.object = std::string(src.substr(first_sep + 1)); + return path; + } + + bool empty() const { return bucket.empty() && object.empty(); } + + bool operator==(const GcsPath& other) const { + return bucket == other.bucket && object == other.object; + } +}; + +} // namespace namespace gcs = google::cloud::storage; @@ -35,11 +68,56 @@ google::cloud::Options AsGoogleCloudOptions(const GcsOptions& o) { if (!o.endpoint_override.empty()) { std::string scheme = o.scheme; if (scheme.empty()) scheme = "https"; + if (scheme == "https") { + options.set( + google::cloud::MakeGoogleDefaultCredentials()); + } else { + options.set( + google::cloud::MakeInsecureCredentials()); + } options.set(scheme + "://" + o.endpoint_override); } return options; } +Status ToArrowStatus(google::cloud::Status const& s) { + std::ostringstream os; + os << "google::cloud::Status(" << s << ")"; + switch (s.code()) { + case google::cloud::StatusCode::kOk: + break; + case google::cloud::StatusCode::kCancelled: + return Status::Cancelled(os.str()); + case google::cloud::StatusCode::kUnknown: + return Status::UnknownError(os.str()); + case google::cloud::StatusCode::kInvalidArgument: + return Status::Invalid(os.str()); + case google::cloud::StatusCode::kDeadlineExceeded: + case google::cloud::StatusCode::kNotFound: + // TODO: it is unclear if a better mapping would be possible. + return Status::UnknownError(os.str()); + case google::cloud::StatusCode::kAlreadyExists: + return Status::AlreadyExists(os.str()); + case google::cloud::StatusCode::kPermissionDenied: + case google::cloud::StatusCode::kUnauthenticated: + return Status::UnknownError(os.str()); + case google::cloud::StatusCode::kResourceExhausted: + return Status::CapacityError(os.str()); + case google::cloud::StatusCode::kFailedPrecondition: + case google::cloud::StatusCode::kAborted: + return Status::UnknownError(os.str()); + case google::cloud::StatusCode::kOutOfRange: + return Status::Invalid(os.str()); + case google::cloud::StatusCode::kUnimplemented: + return Status::NotImplemented(os.str()); + case google::cloud::StatusCode::kInternal: + case google::cloud::StatusCode::kUnavailable: + case google::cloud::StatusCode::kDataLoss: + return Status::UnknownError(os.str()); + } + return Status::OK(); +} + class GcsFileSystem::Impl { public: explicit Impl(GcsOptions o) @@ -47,7 +125,28 @@ class GcsFileSystem::Impl { GcsOptions const& options() const { return options_; } + Result GetFileInfo(GcsPath const& path) { + if (!path.object.empty()) { + auto meta = client_.GetObjectMetadata(path.bucket, path.object); + return GetFileInfoImpl(path, std::move(meta).status(), FileType::File); + } + auto meta = client_.GetBucketMetadata(path.bucket); + return GetFileInfoImpl(path, std::move(meta).status(), FileType::Directory); + } + private: + Result GetFileInfoImpl(GcsPath const& path, google::cloud::Status status, + FileType type) { + if (status.ok()) { + return FileInfo(path.full_path, FileType::File); + } + using ::google::cloud::StatusCode; + if (status.code() == StatusCode::kNotFound) { + return FileInfo(path.full_path, FileType::NotFound); + } + return ToArrowStatus(status); + } + GcsOptions options_; gcs::Client client_; }; @@ -70,7 +169,9 @@ bool GcsFileSystem::Equals(const FileSystem& other) const { } Result GcsFileSystem::GetFileInfo(const std::string& path) { - return Status::NotImplemented("The GCS FileSystem is not fully implemented"); + auto p = GcsPath::FromString(path); + if (!p.ok()) return std::move(p).status(); + return impl_->GetFileInfo(*p); } Result GcsFileSystem::GetFileInfo(const FileSelector& select) { diff --git a/cpp/src/arrow/filesystem/gcsfs_test.cc b/cpp/src/arrow/filesystem/gcsfs_test.cc index 5d8c7b5a40a..7b2317a72a4 100644 --- a/cpp/src/arrow/filesystem/gcsfs_test.cc +++ b/cpp/src/arrow/filesystem/gcsfs_test.cc @@ -19,8 +19,12 @@ #include #include +#include +#include +#include #include +#include #include #include "arrow/testing/gtest_util.h" @@ -30,10 +34,58 @@ namespace arrow { namespace fs { namespace { +namespace bp = boost::process; +namespace gc = google::cloud; +namespace gcs = google::cloud::storage; + using ::testing::IsEmpty; using ::testing::Not; using ::testing::NotNull; +auto const* kPreexistingBucket = "test-bucket-name"; + +class GcsIntegrationTest : public ::testing::Test { + public: + ~GcsIntegrationTest() override { + if (server_process_.valid()) { + // Brutal shutdown + server_process_.terminate(); + server_process_.wait(); + } + } + + protected: + void SetUp() override { + port_ = std::to_string(GetListenPort()); + auto exe_path = bp::search_path("python3"); + ASSERT_THAT(exe_path, Not(IsEmpty())); + + server_process_ = bp::child(boost::this_process::environment(), exe_path, "-m", + "testbench", "--port", port_); + + // Create a bucket in the testbench so additional + auto client = gcs::Client( + google::cloud::Options{} + .set("http://127.0.0.1:" + port_) + .set(gc::MakeInsecureCredentials())); + auto metadata = client.CreateBucketForProject( + kPreexistingBucket, "ignored-by-testbench", gcs::BucketMetadata{}); + EXPECT_TRUE(metadata.ok()) << "Failed to create bucket <" << kPreexistingBucket + << ">, status=" << metadata.status(); + } + + GcsOptions TestGcsOptions() { + GcsOptions options; + options.endpoint_override = "127.0.0.1:" + port_; + options.scheme = "http"; + return options; + } + + private: + std::string port_; + bp::child server_process_; +}; + TEST(GcsFileSystem, OptionsCompare) { GcsOptions a; GcsOptions b; @@ -59,6 +111,11 @@ TEST(GcsFileSystem, FileSystemCompare) { EXPECT_FALSE(a->Equals(*b)); } +TEST_F(GcsIntegrationTest, MakeBucket) { + auto fs = internal::MakeGcsFileSystemForTest(TestGcsOptions()); + ASSERT_OK(fs->GetFileInfo(kPreexistingBucket)); +} + } // namespace } // namespace fs } // namespace arrow diff --git a/cpp/vcpkg.json b/cpp/vcpkg.json index 723f3a46e78..0ad6900f002 100644 --- a/cpp/vcpkg.json +++ b/cpp/vcpkg.json @@ -17,6 +17,7 @@ "benchmark", "boost-filesystem", "boost-multiprecision", + "boost-process", "boost-system", "brotli", "bzip2", diff --git a/docs/source/developers/docker.rst b/docs/source/developers/docker.rst index eaabad90df1..36b4687526e 100644 --- a/docs/source/developers/docker.rst +++ b/docs/source/developers/docker.rst @@ -210,6 +210,7 @@ responsible for. Like: - ``integration_pandas.sh``: execute the pandas integration tests. - ``install_minio.sh``: install minio server for multiple platforms. - ``install_conda.sh``: install miniconda for multiple platforms. +- ``install_gcs_testbench.sh``: install the GCS testbench for multiple platforms. The parametrization (like the C++ CMake options) is achieved via environment variables with useful defaults to keep the build configurations declarative. From 085832bd9e30e27fbeb0baf920424e35496d9f8f Mon Sep 17 00:00:00 2001 From: Carlos O'Ryan Date: Sat, 9 Oct 2021 19:19:52 +0000 Subject: [PATCH 02/10] Fix builds where git(1) is not installed ci --- ci/scripts/install_gcs_testbench.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/ci/scripts/install_gcs_testbench.sh b/ci/scripts/install_gcs_testbench.sh index 40d3178b4a2..d7a44906ebb 100755 --- a/ci/scripts/install_gcs_testbench.sh +++ b/ci/scripts/install_gcs_testbench.sh @@ -29,4 +29,4 @@ if [[ "${version}" -eq "default" ]]; then version="v0.7.0" fi -pip install "git+https://github.com/googleapis/storage-testbench@${version}" +pip install "https://github.com/googleapis/storage-testbench/archive/${version}.tar.gz" From fee487bb80fab6f4215bdad1e9100195e6057abd Mon Sep 17 00:00:00 2001 From: Carlos O'Ryan Date: Mon, 11 Oct 2021 13:46:47 +0000 Subject: [PATCH 03/10] Address review comments --- ci/docker/debian-10-cpp.dockerfile | 3 +- ci/docker/debian-11-cpp.dockerfile | 3 +- ci/docker/fedora-33-cpp.dockerfile | 3 +- ci/docker/linux-apt-r.dockerfile | 3 +- ci/docker/ubuntu-20.04-cpp.dockerfile | 3 +- ci/docker/ubuntu-20.10-cpp.dockerfile | 3 +- ci/docker/ubuntu-21.04-cpp.dockerfile | 3 +- cpp/src/arrow/CMakeLists.txt | 4 +- cpp/src/arrow/filesystem/CMakeLists.txt | 1 + cpp/src/arrow/filesystem/gcsfs.cc | 54 +++-------------- cpp/src/arrow/filesystem/gcsfs_internal.cc | 68 ++++++++++++++++++++++ cpp/src/arrow/filesystem/gcsfs_internal.h | 36 ++++++++++++ cpp/src/arrow/filesystem/gcsfs_test.cc | 48 ++++++++++++++- 13 files changed, 175 insertions(+), 57 deletions(-) create mode 100644 cpp/src/arrow/filesystem/gcsfs_internal.cc create mode 100644 cpp/src/arrow/filesystem/gcsfs_internal.h diff --git a/ci/docker/debian-10-cpp.dockerfile b/ci/docker/debian-10-cpp.dockerfile index 459c06710e4..85206fb3478 100644 --- a/ci/docker/debian-10-cpp.dockerfile +++ b/ci/docker/debian-10-cpp.dockerfile @@ -73,7 +73,8 @@ RUN apt-get update -y -q && \ rm -rf /var/lib/apt/lists/* COPY ci/scripts/install_minio.sh \ - ci/scripts/install_gcs_testbench.sh \ + /arrow/ci/scripts/ +COPY ci/scripts/install_gcs_testbench.sh \ /arrow/ci/scripts/ RUN /arrow/ci/scripts/install_minio.sh ${arch} linux latest /usr/local RUN /arrow/ci/scripts/install_gcs_testbench.sh default diff --git a/ci/docker/debian-11-cpp.dockerfile b/ci/docker/debian-11-cpp.dockerfile index 750be3422bd..8874d5a16d3 100644 --- a/ci/docker/debian-11-cpp.dockerfile +++ b/ci/docker/debian-11-cpp.dockerfile @@ -71,7 +71,8 @@ RUN apt-get update -y -q && \ rm -rf /var/lib/apt/lists/* COPY ci/scripts/install_minio.sh \ - ci/scripts/install_gcs_testbench.sh \ + /arrow/ci/scripts/ +COPY ci/scripts/install_gcs_testbench.sh \ /arrow/ci/scripts/ RUN /arrow/ci/scripts/install_minio.sh ${arch} linux latest /usr/local RUN /arrow/ci/scripts/install_gcs_testbench.sh default diff --git a/ci/docker/fedora-33-cpp.dockerfile b/ci/docker/fedora-33-cpp.dockerfile index faf7d407ea1..0487ce52087 100644 --- a/ci/docker/fedora-33-cpp.dockerfile +++ b/ci/docker/fedora-33-cpp.dockerfile @@ -64,7 +64,8 @@ RUN dnf update -y && \ zlib-devel COPY ci/scripts/install_minio.sh \ - ci/scripts/install_gcs_testbench.sh \ + /arrow/ci/scripts/ +COPY ci/scripts/install_gcs_testbench.sh \ /arrow/ci/scripts/ RUN /arrow/ci/scripts/install_minio.sh ${arch} linux latest /usr/local RUN /arrow/ci/scripts/install_gcs_testbench.sh default diff --git a/ci/docker/linux-apt-r.dockerfile b/ci/docker/linux-apt-r.dockerfile index 4c192cb8309..89d3edfdc8c 100644 --- a/ci/docker/linux-apt-r.dockerfile +++ b/ci/docker/linux-apt-r.dockerfile @@ -86,7 +86,8 @@ COPY r/DESCRIPTION /arrow/r/ RUN /arrow/ci/scripts/r_deps.sh /arrow COPY ci/scripts/install_minio.sh \ - ci/scripts/install_gcs_testbench.sh \ + /arrow/ci/scripts/ +COPY ci/scripts/install_gcs_testbench.sh \ /arrow/ci/scripts/ RUN /arrow/ci/scripts/install_minio.sh ${arch} linux latest /usr/local RUN /arrow/ci/scripts/install_gcs_testbench.sh default diff --git a/ci/docker/ubuntu-20.04-cpp.dockerfile b/ci/docker/ubuntu-20.04-cpp.dockerfile index 55568ec264f..59f134f4318 100644 --- a/ci/docker/ubuntu-20.04-cpp.dockerfile +++ b/ci/docker/ubuntu-20.04-cpp.dockerfile @@ -96,7 +96,8 @@ RUN apt-get update -y -q && \ rm -rf /var/lib/apt/lists* COPY ci/scripts/install_minio.sh \ - ci/scripts/install_gcs_testbench.sh \ + /arrow/ci/scripts/ +COPY ci/scripts/install_gcs_testbench.sh \ /arrow/ci/scripts/ RUN /arrow/ci/scripts/install_minio.sh ${arch} linux latest /usr/local RUN /arrow/ci/scripts/install_gcs_testbench.sh default diff --git a/ci/docker/ubuntu-20.10-cpp.dockerfile b/ci/docker/ubuntu-20.10-cpp.dockerfile index 680f2abe16b..733c22d13b6 100644 --- a/ci/docker/ubuntu-20.10-cpp.dockerfile +++ b/ci/docker/ubuntu-20.10-cpp.dockerfile @@ -98,7 +98,8 @@ RUN apt-get update -y -q && \ rm -rf /var/lib/apt/lists* COPY ci/scripts/install_minio.sh \ - ci/scripts/install_gcs_testbench.sh \ + /arrow/ci/scripts/ +COPY ci/scripts/install_gcs_testbench.sh \ /arrow/ci/scripts/ RUN /arrow/ci/scripts/install_minio.sh ${arch} linux latest /usr/local RUN /arrow/ci/scripts/install_gcs_testbench.sh default diff --git a/ci/docker/ubuntu-21.04-cpp.dockerfile b/ci/docker/ubuntu-21.04-cpp.dockerfile index 5e91440eb6f..fc1fee95cef 100644 --- a/ci/docker/ubuntu-21.04-cpp.dockerfile +++ b/ci/docker/ubuntu-21.04-cpp.dockerfile @@ -96,7 +96,8 @@ RUN apt-get update -y -q && \ rm -rf /var/lib/apt/lists* COPY ci/scripts/install_minio.sh \ - ci/scripts/install_gcs_testbench.sh \ + /arrow/ci/scripts/ +COPY ci/scripts/install_gcs_testbench.sh \ /arrow/ci/scripts/ RUN /arrow/ci/scripts/install_minio.sh ${arch} linux latest /usr/local RUN /arrow/ci/scripts/install_gcs_testbench.sh default diff --git a/cpp/src/arrow/CMakeLists.txt b/cpp/src/arrow/CMakeLists.txt index b82cccf24df..d7e433f4844 100644 --- a/cpp/src/arrow/CMakeLists.txt +++ b/cpp/src/arrow/CMakeLists.txt @@ -452,8 +452,8 @@ if(ARROW_FILESYSTEM) filesystem/util_internal.cc) if(ARROW_GCS) - list(APPEND ARROW_SRCS filesystem/gcsfs.cc) - set_source_files_properties(filesystem/gcsfs.cc + list(APPEND ARROW_SRCS filesystem/gcsfs.cc filesystem/gcsfs_internal.cc) + set_source_files_properties(filesystem/gcsfs.cc filesystem/gcsfs_internal.cc PROPERTIES SKIP_PRECOMPILE_HEADERS ON SKIP_UNITY_BUILD_INCLUSION ON) endif() diff --git a/cpp/src/arrow/filesystem/CMakeLists.txt b/cpp/src/arrow/filesystem/CMakeLists.txt index c6e50ed59c6..67ebe54895c 100644 --- a/cpp/src/arrow/filesystem/CMakeLists.txt +++ b/cpp/src/arrow/filesystem/CMakeLists.txt @@ -31,6 +31,7 @@ add_arrow_test(filesystem-test if(ARROW_GCS) add_arrow_test(gcsfs_test EXTRA_LABELS filesystem) endif() + if(ARROW_S3) add_arrow_test(s3fs_test EXTRA_LABELS filesystem) if(TARGET arrow-s3fs-test) diff --git a/cpp/src/arrow/filesystem/gcsfs.cc b/cpp/src/arrow/filesystem/gcsfs.cc index 5eb57c14056..849fd3aefa4 100644 --- a/cpp/src/arrow/filesystem/gcsfs.cc +++ b/cpp/src/arrow/filesystem/gcsfs.cc @@ -19,8 +19,7 @@ #include -#include - +#include "arrow/filesystem/gcsfs_internal.h" #include "arrow/filesystem/path_util.h" #include "arrow/result.h" #include "arrow/util/checked_cast.h" @@ -80,52 +79,14 @@ google::cloud::Options AsGoogleCloudOptions(const GcsOptions& o) { return options; } -Status ToArrowStatus(google::cloud::Status const& s) { - std::ostringstream os; - os << "google::cloud::Status(" << s << ")"; - switch (s.code()) { - case google::cloud::StatusCode::kOk: - break; - case google::cloud::StatusCode::kCancelled: - return Status::Cancelled(os.str()); - case google::cloud::StatusCode::kUnknown: - return Status::UnknownError(os.str()); - case google::cloud::StatusCode::kInvalidArgument: - return Status::Invalid(os.str()); - case google::cloud::StatusCode::kDeadlineExceeded: - case google::cloud::StatusCode::kNotFound: - // TODO: it is unclear if a better mapping would be possible. - return Status::UnknownError(os.str()); - case google::cloud::StatusCode::kAlreadyExists: - return Status::AlreadyExists(os.str()); - case google::cloud::StatusCode::kPermissionDenied: - case google::cloud::StatusCode::kUnauthenticated: - return Status::UnknownError(os.str()); - case google::cloud::StatusCode::kResourceExhausted: - return Status::CapacityError(os.str()); - case google::cloud::StatusCode::kFailedPrecondition: - case google::cloud::StatusCode::kAborted: - return Status::UnknownError(os.str()); - case google::cloud::StatusCode::kOutOfRange: - return Status::Invalid(os.str()); - case google::cloud::StatusCode::kUnimplemented: - return Status::NotImplemented(os.str()); - case google::cloud::StatusCode::kInternal: - case google::cloud::StatusCode::kUnavailable: - case google::cloud::StatusCode::kDataLoss: - return Status::UnknownError(os.str()); - } - return Status::OK(); -} - class GcsFileSystem::Impl { public: explicit Impl(GcsOptions o) : options_(std::move(o)), client_(AsGoogleCloudOptions(options_)) {} - GcsOptions const& options() const { return options_; } + const GcsOptions& options() const { return options_; } - Result GetFileInfo(GcsPath const& path) { + Result GetFileInfo(const GcsPath& path) { if (!path.object.empty()) { auto meta = client_.GetObjectMetadata(path.bucket, path.object); return GetFileInfoImpl(path, std::move(meta).status(), FileType::File); @@ -135,16 +96,17 @@ class GcsFileSystem::Impl { } private: - Result GetFileInfoImpl(GcsPath const& path, google::cloud::Status status, - FileType type) { + static Result GetFileInfoImpl(const GcsPath& path, + const google::cloud::Status& status, + FileType type) { if (status.ok()) { - return FileInfo(path.full_path, FileType::File); + return FileInfo(path.full_path, type); } using ::google::cloud::StatusCode; if (status.code() == StatusCode::kNotFound) { return FileInfo(path.full_path, FileType::NotFound); } - return ToArrowStatus(status); + return internal::ToArrowStatus(status); } GcsOptions options_; diff --git a/cpp/src/arrow/filesystem/gcsfs_internal.cc b/cpp/src/arrow/filesystem/gcsfs_internal.cc new file mode 100644 index 00000000000..6204ceccd9b --- /dev/null +++ b/cpp/src/arrow/filesystem/gcsfs_internal.cc @@ -0,0 +1,68 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +#include "arrow/filesystem/gcsfs_internal.h" + +#include + +#include + +namespace arrow { +namespace fs { +namespace internal { + +Status ToArrowStatus(const google::cloud::Status& s) { + std::ostringstream os; + os << "google::cloud::Status(" << s << ")"; + switch (s.code()) { + case google::cloud::StatusCode::kOk: + break; + case google::cloud::StatusCode::kCancelled: + return Status::Cancelled(os.str()); + case google::cloud::StatusCode::kUnknown: + return Status::UnknownError(os.str()); + case google::cloud::StatusCode::kInvalidArgument: + return Status::Invalid(os.str()); + case google::cloud::StatusCode::kDeadlineExceeded: + case google::cloud::StatusCode::kNotFound: + // TODO: it is unclear if a better mapping would be possible. + return Status::UnknownError(os.str()); + case google::cloud::StatusCode::kAlreadyExists: + return Status::AlreadyExists(os.str()); + case google::cloud::StatusCode::kPermissionDenied: + case google::cloud::StatusCode::kUnauthenticated: + return Status::UnknownError(os.str()); + case google::cloud::StatusCode::kResourceExhausted: + return Status::CapacityError(os.str()); + case google::cloud::StatusCode::kFailedPrecondition: + case google::cloud::StatusCode::kAborted: + return Status::UnknownError(os.str()); + case google::cloud::StatusCode::kOutOfRange: + return Status::Invalid(os.str()); + case google::cloud::StatusCode::kUnimplemented: + return Status::NotImplemented(os.str()); + case google::cloud::StatusCode::kInternal: + case google::cloud::StatusCode::kUnavailable: + case google::cloud::StatusCode::kDataLoss: + return Status::UnknownError(os.str()); + } + return Status::OK(); +} + +} // namespace internal +} // namespace fs +} // namespace arrow diff --git a/cpp/src/arrow/filesystem/gcsfs_internal.h b/cpp/src/arrow/filesystem/gcsfs_internal.h new file mode 100644 index 00000000000..8d568701ed5 --- /dev/null +++ b/cpp/src/arrow/filesystem/gcsfs_internal.h @@ -0,0 +1,36 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +#pragma once + +#include + +#include +#include +#include + +#include "arrow/filesystem/filesystem.h" + +namespace arrow { +namespace fs { +namespace internal { + +Status ToArrowStatus(const google::cloud::Status& s); + +} // namespace internal +} // namespace fs +} // namespace arrow diff --git a/cpp/src/arrow/filesystem/gcsfs_test.cc b/cpp/src/arrow/filesystem/gcsfs_test.cc index 7b2317a72a4..573aa1fa3eb 100644 --- a/cpp/src/arrow/filesystem/gcsfs_test.cc +++ b/cpp/src/arrow/filesystem/gcsfs_test.cc @@ -27,6 +27,7 @@ #include #include +#include "arrow/filesystem/gcsfs_internal.h" #include "arrow/testing/gtest_util.h" #include "arrow/testing/util.h" @@ -38,6 +39,7 @@ namespace bp = boost::process; namespace gc = google::cloud; namespace gcs = google::cloud::storage; +using ::testing::HasSubstr; using ::testing::IsEmpty; using ::testing::Not; using ::testing::NotNull; @@ -68,9 +70,9 @@ class GcsIntegrationTest : public ::testing::Test { google::cloud::Options{} .set("http://127.0.0.1:" + port_) .set(gc::MakeInsecureCredentials())); - auto metadata = client.CreateBucketForProject( + google::cloud::StatusOr metadata = client.CreateBucketForProject( kPreexistingBucket, "ignored-by-testbench", gcs::BucketMetadata{}); - EXPECT_TRUE(metadata.ok()) << "Failed to create bucket <" << kPreexistingBucket + ASSERT_TRUE(metadata.ok()) << "Failed to create bucket <" << kPreexistingBucket << ">, status=" << metadata.status(); } @@ -97,6 +99,48 @@ TEST(GcsFileSystem, OptionsCompare) { EXPECT_FALSE(b.Equals(c)); } +TEST(GcsFileSystem, ToArrowStatusOK) { + Status actual = internal::ToArrowStatus(google::cloud::Status()); + EXPECT_TRUE(actual.ok()); +} + +TEST(GcsFileSystem, ToArrowStatus) { + struct { + google::cloud::StatusCode input; + arrow::StatusCode expected; + } cases[] = { + {google::cloud::StatusCode::kCancelled, StatusCode::Cancelled}, + {google::cloud::StatusCode::kUnknown, StatusCode::UnknownError}, + {google::cloud::StatusCode::kInvalidArgument, StatusCode::Invalid}, + {google::cloud::StatusCode::kDeadlineExceeded, StatusCode::UnknownError}, + {google::cloud::StatusCode::kNotFound, StatusCode::UnknownError}, + {google::cloud::StatusCode::kAlreadyExists, StatusCode::AlreadyExists}, + {google::cloud::StatusCode::kPermissionDenied, StatusCode::UnknownError}, + {google::cloud::StatusCode::kUnauthenticated, StatusCode::UnknownError}, + {google::cloud::StatusCode::kResourceExhausted, StatusCode::CapacityError}, + {google::cloud::StatusCode::kFailedPrecondition, StatusCode::UnknownError}, + {google::cloud::StatusCode::kAborted, StatusCode::UnknownError}, + {google::cloud::StatusCode::kOutOfRange, StatusCode::Invalid}, + {google::cloud::StatusCode::kUnimplemented, StatusCode::NotImplemented}, + {google::cloud::StatusCode::kInternal, StatusCode::UnknownError}, + {google::cloud::StatusCode::kUnavailable, StatusCode::UnknownError}, + {google::cloud::StatusCode::kDataLoss, StatusCode::UnknownError}, + }; + + for (const auto& test : cases) { + auto status = google::cloud::Status(test.input, "test-message"); + auto message = [&] { + std::ostringstream os; + os << status; + return os.str(); + }(); + SCOPED_TRACE("Testing with status=" + message); + const auto actual = arrow::fs::internal::ToArrowStatus(status); + EXPECT_EQ(actual.code(), test.expected); + EXPECT_THAT(actual.message(), HasSubstr(message)); + } +} + TEST(GcsFileSystem, FileSystemCompare) { auto a = internal::MakeGcsFileSystemForTest(GcsOptions{}); EXPECT_THAT(a, NotNull()); From df02bfed88df991d53f704fa48f0bf2ee8ac5e9d Mon Sep 17 00:00:00 2001 From: Carlos O'Ryan Date: Mon, 11 Oct 2021 21:03:56 +0000 Subject: [PATCH 04/10] Address review comments --- cpp/vcpkg.json | 1 - 1 file changed, 1 deletion(-) diff --git a/cpp/vcpkg.json b/cpp/vcpkg.json index 0ad6900f002..723f3a46e78 100644 --- a/cpp/vcpkg.json +++ b/cpp/vcpkg.json @@ -17,7 +17,6 @@ "benchmark", "boost-filesystem", "boost-multiprecision", - "boost-process", "boost-system", "brotli", "bzip2", From 8f183065407cc85d8d09c62d924350cf26617d6c Mon Sep 17 00:00:00 2001 From: Carlos O'Ryan Date: Tue, 12 Oct 2021 14:09:21 +0000 Subject: [PATCH 05/10] Address review comments --- ci/docker/debian-10-cpp.dockerfile | 6 ++---- ci/docker/debian-11-cpp.dockerfile | 6 ++---- ci/docker/fedora-33-cpp.dockerfile | 6 ++---- ci/docker/linux-apt-r.dockerfile | 6 ++---- ci/docker/ubuntu-20.04-cpp.dockerfile | 6 ++---- ci/docker/ubuntu-20.10-cpp.dockerfile | 6 ++---- ci/docker/ubuntu-21.04-cpp.dockerfile | 6 ++---- 7 files changed, 14 insertions(+), 28 deletions(-) diff --git a/ci/docker/debian-10-cpp.dockerfile b/ci/docker/debian-10-cpp.dockerfile index 85206fb3478..107b44c692e 100644 --- a/ci/docker/debian-10-cpp.dockerfile +++ b/ci/docker/debian-10-cpp.dockerfile @@ -72,10 +72,8 @@ RUN apt-get update -y -q && \ apt-get clean && \ rm -rf /var/lib/apt/lists/* -COPY ci/scripts/install_minio.sh \ - /arrow/ci/scripts/ -COPY ci/scripts/install_gcs_testbench.sh \ - /arrow/ci/scripts/ +COPY ci/scripts/install_minio.sh /arrow/ci/scripts/ +COPY ci/scripts/install_gcs_testbench.sh /arrow/ci/scripts/ RUN /arrow/ci/scripts/install_minio.sh ${arch} linux latest /usr/local RUN /arrow/ci/scripts/install_gcs_testbench.sh default diff --git a/ci/docker/debian-11-cpp.dockerfile b/ci/docker/debian-11-cpp.dockerfile index 8874d5a16d3..b8b8c0d463c 100644 --- a/ci/docker/debian-11-cpp.dockerfile +++ b/ci/docker/debian-11-cpp.dockerfile @@ -70,10 +70,8 @@ RUN apt-get update -y -q && \ apt-get clean && \ rm -rf /var/lib/apt/lists/* -COPY ci/scripts/install_minio.sh \ - /arrow/ci/scripts/ -COPY ci/scripts/install_gcs_testbench.sh \ - /arrow/ci/scripts/ +COPY ci/scripts/install_minio.sh /arrow/ci/scripts/ +COPY ci/scripts/install_gcs_testbench.sh /arrow/ci/scripts/ RUN /arrow/ci/scripts/install_minio.sh ${arch} linux latest /usr/local RUN /arrow/ci/scripts/install_gcs_testbench.sh default diff --git a/ci/docker/fedora-33-cpp.dockerfile b/ci/docker/fedora-33-cpp.dockerfile index 0487ce52087..b9085a470b0 100644 --- a/ci/docker/fedora-33-cpp.dockerfile +++ b/ci/docker/fedora-33-cpp.dockerfile @@ -63,10 +63,8 @@ RUN dnf update -y && \ which \ zlib-devel -COPY ci/scripts/install_minio.sh \ - /arrow/ci/scripts/ -COPY ci/scripts/install_gcs_testbench.sh \ - /arrow/ci/scripts/ +COPY ci/scripts/install_minio.sh /arrow/ci/scripts/ +COPY ci/scripts/install_gcs_testbench.sh /arrow/ci/scripts/ RUN /arrow/ci/scripts/install_minio.sh ${arch} linux latest /usr/local RUN /arrow/ci/scripts/install_gcs_testbench.sh default diff --git a/ci/docker/linux-apt-r.dockerfile b/ci/docker/linux-apt-r.dockerfile index 89d3edfdc8c..afe330594b9 100644 --- a/ci/docker/linux-apt-r.dockerfile +++ b/ci/docker/linux-apt-r.dockerfile @@ -85,10 +85,8 @@ COPY ci/scripts/r_deps.sh /arrow/ci/scripts/ COPY r/DESCRIPTION /arrow/r/ RUN /arrow/ci/scripts/r_deps.sh /arrow -COPY ci/scripts/install_minio.sh \ - /arrow/ci/scripts/ -COPY ci/scripts/install_gcs_testbench.sh \ - /arrow/ci/scripts/ +COPY ci/scripts/install_minio.sh /arrow/ci/scripts/ +COPY ci/scripts/install_gcs_testbench.sh /arrow/ci/scripts/ RUN /arrow/ci/scripts/install_minio.sh ${arch} linux latest /usr/local RUN /arrow/ci/scripts/install_gcs_testbench.sh default diff --git a/ci/docker/ubuntu-20.04-cpp.dockerfile b/ci/docker/ubuntu-20.04-cpp.dockerfile index 59f134f4318..e9b8266d2b9 100644 --- a/ci/docker/ubuntu-20.04-cpp.dockerfile +++ b/ci/docker/ubuntu-20.04-cpp.dockerfile @@ -95,10 +95,8 @@ RUN apt-get update -y -q && \ apt-get clean && \ rm -rf /var/lib/apt/lists* -COPY ci/scripts/install_minio.sh \ - /arrow/ci/scripts/ -COPY ci/scripts/install_gcs_testbench.sh \ - /arrow/ci/scripts/ +COPY ci/scripts/install_minio.sh /arrow/ci/scripts/ +COPY ci/scripts/install_gcs_testbench.sh /arrow/ci/scripts/ RUN /arrow/ci/scripts/install_minio.sh ${arch} linux latest /usr/local RUN /arrow/ci/scripts/install_gcs_testbench.sh default diff --git a/ci/docker/ubuntu-20.10-cpp.dockerfile b/ci/docker/ubuntu-20.10-cpp.dockerfile index 733c22d13b6..fbfcf306769 100644 --- a/ci/docker/ubuntu-20.10-cpp.dockerfile +++ b/ci/docker/ubuntu-20.10-cpp.dockerfile @@ -97,10 +97,8 @@ RUN apt-get update -y -q && \ apt-get clean && \ rm -rf /var/lib/apt/lists* -COPY ci/scripts/install_minio.sh \ - /arrow/ci/scripts/ -COPY ci/scripts/install_gcs_testbench.sh \ - /arrow/ci/scripts/ +COPY ci/scripts/install_minio.sh /arrow/ci/scripts/ +COPY ci/scripts/install_gcs_testbench.sh /arrow/ci/scripts/ RUN /arrow/ci/scripts/install_minio.sh ${arch} linux latest /usr/local RUN /arrow/ci/scripts/install_gcs_testbench.sh default diff --git a/ci/docker/ubuntu-21.04-cpp.dockerfile b/ci/docker/ubuntu-21.04-cpp.dockerfile index fc1fee95cef..0d9d9e504f9 100644 --- a/ci/docker/ubuntu-21.04-cpp.dockerfile +++ b/ci/docker/ubuntu-21.04-cpp.dockerfile @@ -95,10 +95,8 @@ RUN apt-get update -y -q && \ apt-get clean && \ rm -rf /var/lib/apt/lists* -COPY ci/scripts/install_minio.sh \ - /arrow/ci/scripts/ -COPY ci/scripts/install_gcs_testbench.sh \ - /arrow/ci/scripts/ +COPY ci/scripts/install_minio.sh /arrow/ci/scripts/ +COPY ci/scripts/install_gcs_testbench.sh /arrow/ci/scripts/ RUN /arrow/ci/scripts/install_minio.sh ${arch} linux latest /usr/local RUN /arrow/ci/scripts/install_gcs_testbench.sh default From 41ab3d70c1dd239a4bd7046ba4133b0363714bdc Mon Sep 17 00:00:00 2001 From: Carlos O'Ryan Date: Tue, 12 Oct 2021 14:20:38 +0000 Subject: [PATCH 06/10] Address review comments --- cpp/src/arrow/filesystem/gcsfs.cc | 5 ++--- cpp/src/arrow/filesystem/gcsfs_internal.cc | 7 ++++--- cpp/src/arrow/filesystem/gcsfs_test.cc | 16 ++++++++-------- 3 files changed, 14 insertions(+), 14 deletions(-) diff --git a/cpp/src/arrow/filesystem/gcsfs.cc b/cpp/src/arrow/filesystem/gcsfs.cc index 849fd3aefa4..ff911d02ab3 100644 --- a/cpp/src/arrow/filesystem/gcsfs.cc +++ b/cpp/src/arrow/filesystem/gcsfs.cc @@ -131,9 +131,8 @@ bool GcsFileSystem::Equals(const FileSystem& other) const { } Result GcsFileSystem::GetFileInfo(const std::string& path) { - auto p = GcsPath::FromString(path); - if (!p.ok()) return std::move(p).status(); - return impl_->GetFileInfo(*p); + ARROW_ASSIGN_OR_RAISE(auto p, GcsPath::FromString(path)); + return impl_->GetFileInfo(p); } Result GcsFileSystem::GetFileInfo(const FileSelector& select) { diff --git a/cpp/src/arrow/filesystem/gcsfs_internal.cc b/cpp/src/arrow/filesystem/gcsfs_internal.cc index 6204ceccd9b..22df5cebf67 100644 --- a/cpp/src/arrow/filesystem/gcsfs_internal.cc +++ b/cpp/src/arrow/filesystem/gcsfs_internal.cc @@ -38,6 +38,7 @@ Status ToArrowStatus(const google::cloud::Status& s) { case google::cloud::StatusCode::kInvalidArgument: return Status::Invalid(os.str()); case google::cloud::StatusCode::kDeadlineExceeded: + return Status::IOError(os.str()); case google::cloud::StatusCode::kNotFound: // TODO: it is unclear if a better mapping would be possible. return Status::UnknownError(os.str()); @@ -45,12 +46,12 @@ Status ToArrowStatus(const google::cloud::Status& s) { return Status::AlreadyExists(os.str()); case google::cloud::StatusCode::kPermissionDenied: case google::cloud::StatusCode::kUnauthenticated: - return Status::UnknownError(os.str()); + return Status::IOError(os.str()); case google::cloud::StatusCode::kResourceExhausted: return Status::CapacityError(os.str()); case google::cloud::StatusCode::kFailedPrecondition: case google::cloud::StatusCode::kAborted: - return Status::UnknownError(os.str()); + return Status::IOError(os.str()); case google::cloud::StatusCode::kOutOfRange: return Status::Invalid(os.str()); case google::cloud::StatusCode::kUnimplemented: @@ -58,7 +59,7 @@ Status ToArrowStatus(const google::cloud::Status& s) { case google::cloud::StatusCode::kInternal: case google::cloud::StatusCode::kUnavailable: case google::cloud::StatusCode::kDataLoss: - return Status::UnknownError(os.str()); + return Status::IOError(os.str()); } return Status::OK(); } diff --git a/cpp/src/arrow/filesystem/gcsfs_test.cc b/cpp/src/arrow/filesystem/gcsfs_test.cc index 573aa1fa3eb..cfbba8cfc45 100644 --- a/cpp/src/arrow/filesystem/gcsfs_test.cc +++ b/cpp/src/arrow/filesystem/gcsfs_test.cc @@ -112,19 +112,19 @@ TEST(GcsFileSystem, ToArrowStatus) { {google::cloud::StatusCode::kCancelled, StatusCode::Cancelled}, {google::cloud::StatusCode::kUnknown, StatusCode::UnknownError}, {google::cloud::StatusCode::kInvalidArgument, StatusCode::Invalid}, - {google::cloud::StatusCode::kDeadlineExceeded, StatusCode::UnknownError}, + {google::cloud::StatusCode::kDeadlineExceeded, StatusCode::IOError}, {google::cloud::StatusCode::kNotFound, StatusCode::UnknownError}, {google::cloud::StatusCode::kAlreadyExists, StatusCode::AlreadyExists}, - {google::cloud::StatusCode::kPermissionDenied, StatusCode::UnknownError}, - {google::cloud::StatusCode::kUnauthenticated, StatusCode::UnknownError}, + {google::cloud::StatusCode::kPermissionDenied, StatusCode::IOError}, + {google::cloud::StatusCode::kUnauthenticated, StatusCode::IOError}, {google::cloud::StatusCode::kResourceExhausted, StatusCode::CapacityError}, - {google::cloud::StatusCode::kFailedPrecondition, StatusCode::UnknownError}, - {google::cloud::StatusCode::kAborted, StatusCode::UnknownError}, + {google::cloud::StatusCode::kFailedPrecondition, StatusCode::IOError}, + {google::cloud::StatusCode::kAborted, StatusCode::IOError}, {google::cloud::StatusCode::kOutOfRange, StatusCode::Invalid}, {google::cloud::StatusCode::kUnimplemented, StatusCode::NotImplemented}, - {google::cloud::StatusCode::kInternal, StatusCode::UnknownError}, - {google::cloud::StatusCode::kUnavailable, StatusCode::UnknownError}, - {google::cloud::StatusCode::kDataLoss, StatusCode::UnknownError}, + {google::cloud::StatusCode::kInternal, StatusCode::IOError}, + {google::cloud::StatusCode::kUnavailable, StatusCode::IOError}, + {google::cloud::StatusCode::kDataLoss, StatusCode::IOError}, }; for (const auto& test : cases) { From 184af32742c5d7d9143f8dfa81d14e7241f2f0cf Mon Sep 17 00:00:00 2001 From: Carlos O'Ryan Date: Tue, 12 Oct 2021 15:00:38 +0000 Subject: [PATCH 07/10] Address review comments --- cpp/src/arrow/filesystem/gcsfs_test.cc | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/cpp/src/arrow/filesystem/gcsfs_test.cc b/cpp/src/arrow/filesystem/gcsfs_test.cc index cfbba8cfc45..2e56d073892 100644 --- a/cpp/src/arrow/filesystem/gcsfs_test.cc +++ b/cpp/src/arrow/filesystem/gcsfs_test.cc @@ -28,7 +28,7 @@ #include #include "arrow/filesystem/gcsfs_internal.h" -#include "arrow/testing/gtest_util.h" +#include "arrow/filesystem/test_util.h" #include "arrow/testing/util.h" namespace arrow { @@ -65,7 +65,7 @@ class GcsIntegrationTest : public ::testing::Test { server_process_ = bp::child(boost::this_process::environment(), exe_path, "-m", "testbench", "--port", port_); - // Create a bucket in the testbench so additional + // Create a bucket in the testbench, simplifying some of the tests. auto client = gcs::Client( google::cloud::Options{} .set("http://127.0.0.1:" + port_) @@ -157,7 +157,7 @@ TEST(GcsFileSystem, FileSystemCompare) { TEST_F(GcsIntegrationTest, MakeBucket) { auto fs = internal::MakeGcsFileSystemForTest(TestGcsOptions()); - ASSERT_OK(fs->GetFileInfo(kPreexistingBucket)); + arrow::fs::AssertFileInfo(fs.get(), kPreexistingBucket, FileType::Directory); } } // namespace From 9712d7aa98914f8d03e5a6b1f7beb7447c059b22 Mon Sep 17 00:00:00 2001 From: Carlos O'Ryan Date: Tue, 12 Oct 2021 15:54:10 +0000 Subject: [PATCH 08/10] Avoid using production endpoints in unit tests --- cpp/src/arrow/filesystem/gcsfs_test.cc | 13 ++++++++----- 1 file changed, 8 insertions(+), 5 deletions(-) diff --git a/cpp/src/arrow/filesystem/gcsfs_test.cc b/cpp/src/arrow/filesystem/gcsfs_test.cc index 2e56d073892..3a136b656e7 100644 --- a/cpp/src/arrow/filesystem/gcsfs_test.cc +++ b/cpp/src/arrow/filesystem/gcsfs_test.cc @@ -65,7 +65,7 @@ class GcsIntegrationTest : public ::testing::Test { server_process_ = bp::child(boost::this_process::environment(), exe_path, "-m", "testbench", "--port", port_); - // Create a bucket in the testbench, simplifying some of the tests. + // Create a bucket in the testbench. This makes it easier to bootstrap GcsFileSystem and its tests. auto client = gcs::Client( google::cloud::Options{} .set("http://127.0.0.1:" + port_) @@ -142,13 +142,16 @@ TEST(GcsFileSystem, ToArrowStatus) { } TEST(GcsFileSystem, FileSystemCompare) { - auto a = internal::MakeGcsFileSystemForTest(GcsOptions{}); + GcsOptions a_options; + a_options.scheme = "http"; + auto a = internal::MakeGcsFileSystemForTest(a_options); EXPECT_THAT(a, NotNull()); EXPECT_TRUE(a->Equals(*a)); - GcsOptions options; - options.endpoint_override = "localhost:1234"; - auto b = internal::MakeGcsFileSystemForTest(options); + GcsOptions b_options; + b_options.scheme = "http"; + b_options.endpoint_override = "localhost:1234"; + auto b = internal::MakeGcsFileSystemForTest(b_options); EXPECT_THAT(b, NotNull()); EXPECT_TRUE(b->Equals(*b)); From 584cb2f49c6ffe7f7917ec902f3e19ae8ac273ba Mon Sep 17 00:00:00 2001 From: Carlos O'Ryan Date: Tue, 12 Oct 2021 16:00:38 +0000 Subject: [PATCH 09/10] Preserve minio installs --- ci/docker/conda.dockerfile | 2 +- ci/docker/debian-10-cpp.dockerfile | 2 +- ci/docker/debian-11-cpp.dockerfile | 2 +- ci/docker/fedora-33-cpp.dockerfile | 2 +- ci/docker/linux-apt-r.dockerfile | 2 +- ci/docker/ubuntu-20.04-cpp.dockerfile | 2 +- ci/docker/ubuntu-20.10-cpp.dockerfile | 2 +- ci/docker/ubuntu-21.04-cpp.dockerfile | 2 +- ci/scripts/r_docker_configure.sh | 2 +- 9 files changed, 9 insertions(+), 9 deletions(-) diff --git a/ci/docker/conda.dockerfile b/ci/docker/conda.dockerfile index a27b90bfbfc..13bf902e9d9 100644 --- a/ci/docker/conda.dockerfile +++ b/ci/docker/conda.dockerfile @@ -33,10 +33,10 @@ ENV PATH=${prefix}/bin:$PATH # install conda and minio COPY ci/scripts/install_conda.sh \ ci/scripts/install_minio.sh \ - ci/scripts/install_gcs_testbench.sh \ /arrow/ci/scripts/ RUN /arrow/ci/scripts/install_conda.sh ${arch} linux latest ${prefix} RUN /arrow/ci/scripts/install_minio.sh ${arch} linux latest ${prefix} +COPY ci/scripts/install_gcs_testbench.sh /arrow/ci/scripts RUN /arrow/ci/scripts/install_gcs_testbench.sh default # create a conda environment diff --git a/ci/docker/debian-10-cpp.dockerfile b/ci/docker/debian-10-cpp.dockerfile index 107b44c692e..d4a78707bf8 100644 --- a/ci/docker/debian-10-cpp.dockerfile +++ b/ci/docker/debian-10-cpp.dockerfile @@ -73,8 +73,8 @@ RUN apt-get update -y -q && \ rm -rf /var/lib/apt/lists/* COPY ci/scripts/install_minio.sh /arrow/ci/scripts/ -COPY ci/scripts/install_gcs_testbench.sh /arrow/ci/scripts/ RUN /arrow/ci/scripts/install_minio.sh ${arch} linux latest /usr/local +COPY ci/scripts/install_gcs_testbench.sh /arrow/ci/scripts/ RUN /arrow/ci/scripts/install_gcs_testbench.sh default ENV ARROW_BUILD_TESTS=ON \ diff --git a/ci/docker/debian-11-cpp.dockerfile b/ci/docker/debian-11-cpp.dockerfile index b8b8c0d463c..b809c09c99f 100644 --- a/ci/docker/debian-11-cpp.dockerfile +++ b/ci/docker/debian-11-cpp.dockerfile @@ -71,8 +71,8 @@ RUN apt-get update -y -q && \ rm -rf /var/lib/apt/lists/* COPY ci/scripts/install_minio.sh /arrow/ci/scripts/ -COPY ci/scripts/install_gcs_testbench.sh /arrow/ci/scripts/ RUN /arrow/ci/scripts/install_minio.sh ${arch} linux latest /usr/local +COPY ci/scripts/install_gcs_testbench.sh /arrow/ci/scripts/ RUN /arrow/ci/scripts/install_gcs_testbench.sh default ENV ARROW_BUILD_TESTS=ON \ diff --git a/ci/docker/fedora-33-cpp.dockerfile b/ci/docker/fedora-33-cpp.dockerfile index b9085a470b0..5de210b3fa5 100644 --- a/ci/docker/fedora-33-cpp.dockerfile +++ b/ci/docker/fedora-33-cpp.dockerfile @@ -64,8 +64,8 @@ RUN dnf update -y && \ zlib-devel COPY ci/scripts/install_minio.sh /arrow/ci/scripts/ -COPY ci/scripts/install_gcs_testbench.sh /arrow/ci/scripts/ RUN /arrow/ci/scripts/install_minio.sh ${arch} linux latest /usr/local +COPY ci/scripts/install_gcs_testbench.sh /arrow/ci/scripts/ RUN /arrow/ci/scripts/install_gcs_testbench.sh default ENV ARROW_BUILD_TESTS=ON \ diff --git a/ci/docker/linux-apt-r.dockerfile b/ci/docker/linux-apt-r.dockerfile index afe330594b9..fe0a5212b93 100644 --- a/ci/docker/linux-apt-r.dockerfile +++ b/ci/docker/linux-apt-r.dockerfile @@ -86,8 +86,8 @@ COPY r/DESCRIPTION /arrow/r/ RUN /arrow/ci/scripts/r_deps.sh /arrow COPY ci/scripts/install_minio.sh /arrow/ci/scripts/ -COPY ci/scripts/install_gcs_testbench.sh /arrow/ci/scripts/ RUN /arrow/ci/scripts/install_minio.sh ${arch} linux latest /usr/local +COPY ci/scripts/install_gcs_testbench.sh /arrow/ci/scripts/ RUN /arrow/ci/scripts/install_gcs_testbench.sh default # Set up Python 3 and its dependencies diff --git a/ci/docker/ubuntu-20.04-cpp.dockerfile b/ci/docker/ubuntu-20.04-cpp.dockerfile index e9b8266d2b9..577b2d48c6b 100644 --- a/ci/docker/ubuntu-20.04-cpp.dockerfile +++ b/ci/docker/ubuntu-20.04-cpp.dockerfile @@ -96,8 +96,8 @@ RUN apt-get update -y -q && \ rm -rf /var/lib/apt/lists* COPY ci/scripts/install_minio.sh /arrow/ci/scripts/ -COPY ci/scripts/install_gcs_testbench.sh /arrow/ci/scripts/ RUN /arrow/ci/scripts/install_minio.sh ${arch} linux latest /usr/local +COPY ci/scripts/install_gcs_testbench.sh /arrow/ci/scripts/ RUN /arrow/ci/scripts/install_gcs_testbench.sh default # Prioritize system packages and local installation diff --git a/ci/docker/ubuntu-20.10-cpp.dockerfile b/ci/docker/ubuntu-20.10-cpp.dockerfile index fbfcf306769..f34005d6845 100644 --- a/ci/docker/ubuntu-20.10-cpp.dockerfile +++ b/ci/docker/ubuntu-20.10-cpp.dockerfile @@ -98,8 +98,8 @@ RUN apt-get update -y -q && \ rm -rf /var/lib/apt/lists* COPY ci/scripts/install_minio.sh /arrow/ci/scripts/ -COPY ci/scripts/install_gcs_testbench.sh /arrow/ci/scripts/ RUN /arrow/ci/scripts/install_minio.sh ${arch} linux latest /usr/local +COPY ci/scripts/install_gcs_testbench.sh /arrow/ci/scripts/ RUN /arrow/ci/scripts/install_gcs_testbench.sh default # Prioritize system packages and local installation diff --git a/ci/docker/ubuntu-21.04-cpp.dockerfile b/ci/docker/ubuntu-21.04-cpp.dockerfile index 0d9d9e504f9..d6ec48334a6 100644 --- a/ci/docker/ubuntu-21.04-cpp.dockerfile +++ b/ci/docker/ubuntu-21.04-cpp.dockerfile @@ -96,8 +96,8 @@ RUN apt-get update -y -q && \ rm -rf /var/lib/apt/lists* COPY ci/scripts/install_minio.sh /arrow/ci/scripts/ -COPY ci/scripts/install_gcs_testbench.sh /arrow/ci/scripts/ RUN /arrow/ci/scripts/install_minio.sh ${arch} linux latest /usr/local +COPY ci/scripts/install_gcs_testbench.sh /arrow/ci/scripts/ RUN /arrow/ci/scripts/install_gcs_testbench.sh default # Prioritize system packages and local installation diff --git a/ci/scripts/r_docker_configure.sh b/ci/scripts/r_docker_configure.sh index 34a9eb84726..befdcf77ab5 100755 --- a/ci/scripts/r_docker_configure.sh +++ b/ci/scripts/r_docker_configure.sh @@ -72,7 +72,7 @@ if [ "$ARROW_S3" == "ON" ] || [ "$ARROW_R_DEV" == "TRUE" ]; then /arrow/ci/scripts/install_minio.sh amd64 linux latest /usr/local fi - if [ -f "/arrow/ci/scripts/install_gcs_testbench.sh"] && [ "`which pip`" ]; then + if [ -f "/arrow/ci/scripts/install_gcs_testbench.sh" ] && [ "`which pip`" ]; then /arrow/ci/scripts/install_gcs_testbench.sh default fi fi From f3edc48698f0f803e8ed750482268a18e3bf2bcb Mon Sep 17 00:00:00 2001 From: Carlos O'Ryan Date: Tue, 12 Oct 2021 18:20:41 +0000 Subject: [PATCH 10/10] Fix formatting --- cpp/src/arrow/filesystem/gcsfs_test.cc | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/cpp/src/arrow/filesystem/gcsfs_test.cc b/cpp/src/arrow/filesystem/gcsfs_test.cc index 3a136b656e7..0776872e3ac 100644 --- a/cpp/src/arrow/filesystem/gcsfs_test.cc +++ b/cpp/src/arrow/filesystem/gcsfs_test.cc @@ -65,7 +65,8 @@ class GcsIntegrationTest : public ::testing::Test { server_process_ = bp::child(boost::this_process::environment(), exe_path, "-m", "testbench", "--port", port_); - // Create a bucket in the testbench. This makes it easier to bootstrap GcsFileSystem and its tests. + // Create a bucket in the testbench. This makes it easier to bootstrap GcsFileSystem + // and its tests. auto client = gcs::Client( google::cloud::Options{} .set("http://127.0.0.1:" + port_)