Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
56 changes: 53 additions & 3 deletions cpp/src/arrow/filesystem/gcsfs.cc
Original file line number Diff line number Diff line change
Expand Up @@ -32,6 +32,13 @@

namespace arrow {
namespace fs {
struct GcsCredentials {
explicit GcsCredentials(std::shared_ptr<google::cloud::Credentials> c)
: credentials(std::move(c)) {}

std::shared_ptr<google::cloud::Credentials> credentials;
};

namespace {

namespace gcs = google::cloud::storage;
Expand Down Expand Up @@ -247,8 +254,6 @@ class GcsRandomAccessFile : public arrow::io::RandomAccessFile {
std::shared_ptr<io::InputStream> stream_;
};

} // namespace

google::cloud::Options AsGoogleCloudOptions(const GcsOptions& o) {
auto options = google::cloud::Options{};
std::string scheme = o.scheme;
Expand All @@ -264,9 +269,14 @@ google::cloud::Options AsGoogleCloudOptions(const GcsOptions& o) {
if (!o.endpoint_override.empty()) {
options.set<gcs::RestEndpointOption>(scheme + "://" + o.endpoint_override);
}
if (o.credentials && o.credentials->credentials) {
options.set<google::cloud::UnifiedCredentialsOption>(o.credentials->credentials);
}
return options;
}

} // namespace

class GcsFileSystem::Impl {
public:
explicit Impl(GcsOptions o)
Expand Down Expand Up @@ -456,7 +466,47 @@ class GcsFileSystem::Impl {
};

bool GcsOptions::Equals(const GcsOptions& other) const {
return endpoint_override == other.endpoint_override && scheme == other.scheme;
return credentials == other.credentials &&
endpoint_override == other.endpoint_override && scheme == other.scheme;
}

GcsOptions GcsOptions::Defaults() {
return GcsOptions{
std::make_shared<GcsCredentials>(google::cloud::MakeGoogleDefaultCredentials()),
{},
"https"};
}

GcsOptions GcsOptions::Anonymous() {
return GcsOptions{
std::make_shared<GcsCredentials>(google::cloud::MakeInsecureCredentials()),
{},
"http"};
}

GcsOptions GcsOptions::FromAccessToken(const std::string& access_token,
std::chrono::system_clock::time_point expiration) {
return GcsOptions{
std::make_shared<GcsCredentials>(
google::cloud::MakeAccessTokenCredentials(access_token, expiration)),
{},
"https"};
}

GcsOptions GcsOptions::FromImpersonatedServiceAccount(
const GcsCredentials& base_credentials, const std::string& target_service_account) {
return GcsOptions{std::make_shared<GcsCredentials>(
google::cloud::MakeImpersonateServiceAccountCredentials(
base_credentials.credentials, target_service_account)),
{},
"https"};
}

GcsOptions GcsOptions::FromServiceAccountCredentials(const std::string& json_object) {
return GcsOptions{std::make_shared<GcsCredentials>(
google::cloud::MakeServiceAccountCredentials(json_object)),
{},
"https"};
}

std::string GcsFileSystem::type_name() const { return "gcs"; }
Expand Down
56 changes: 56 additions & 0 deletions cpp/src/arrow/filesystem/gcsfs.h
Original file line number Diff line number Diff line change
Expand Up @@ -27,17 +27,73 @@ namespace arrow {
namespace fs {
class GcsFileSystem;
struct GcsOptions;
struct GcsCredentials;
namespace internal {
// TODO(ARROW-1231) - remove, and provide a public API (static GcsFileSystem::Make()).
std::shared_ptr<GcsFileSystem> MakeGcsFileSystemForTest(const GcsOptions& options);
} // namespace internal

/// Options for the GcsFileSystem implementation.
struct ARROW_EXPORT GcsOptions {
std::shared_ptr<GcsCredentials> credentials;

std::string endpoint_override;
std::string scheme;

bool Equals(const GcsOptions& other) const;

/// \brief Initialize with Google Default Credentials
///
/// Create options configured to use [Application Default Credentials][aip/4110]. The
/// details of this mechanism are too involved to describe here, but suffice is to say
/// that applications can override any defaults using an environment variable
/// (`GOOGLE_APPLICATION_CREDENTIALS`), and that the defaults work with most Google
/// Cloud Platform deployment environments (GCE, GKE, Cloud Run, etc.), and that have
/// the same behavior as the `gcloud` CLI tool on your workstation.
///
/// \see https://cloud.google.com/docs/authentication
///
/// [aip/4110]: https://google.aip.dev/auth/4110
static GcsOptions Defaults();

/// \brief Initialize with anonymous credentials
static GcsOptions Anonymous();

/// \brief Initialize with access token
///
/// These credentials are useful when using an out-of-band mechanism to fetch access
/// tokens. Note that access tokens are time limited, you will need to manually refresh
/// the tokens created by the out-of-band mechanism.
static GcsOptions FromAccessToken(const std::string& access_token,
std::chrono::system_clock::time_point expiration);

/// \brief Initialize with service account impersonation
///
/// Service account impersonation allows one principal (a user or service account) to
/// impersonate a service account. It requires that the calling principal has the
/// necessary permissions *on* the service account.
static GcsOptions FromImpersonatedServiceAccount(
const GcsCredentials& base_credentials, const std::string& target_service_account);

/// Creates service account credentials from a JSON object in string form.
///
/// The @p json_object is expected to be in the format described by [aip/4112]. Such an
/// object contains the identity of a service account, as well as a private key that can
/// be used to sign tokens, showing the caller was holding the private key.
///
/// In GCP one can create several "keys" for each service account, and these keys are
/// downloaded as a JSON "key file". The contents of such a file are in the format
/// required by this function. Remember that key files and their contents should be
/// treated as any other secret with security implications, think of them as passwords
/// (because they are!), don't store them or output them where unauthorized persons may
/// read them.
///
/// Most applications should probably use default credentials, maybe pointing them to a
/// file with these contents. Using this function may be useful when the json object is
/// obtained from a Cloud Secret Manager or a similar service.
///
/// [aip/4112]: https://google.aip.dev/auth/4112
static GcsOptions FromServiceAccountCredentials(const std::string& json_object);
};

// - TODO(ARROW-1231) - review this documentation before closing the bug.
Expand Down
58 changes: 52 additions & 6 deletions cpp/src/arrow/filesystem/gcsfs_test.cc
Original file line number Diff line number Diff line change
Expand Up @@ -112,7 +112,7 @@ class GcsTestbench : public ::testing::Environment {
std::string error_;
};

static GcsTestbench* Testbench() {
GcsTestbench* Testbench() {
static auto* const environment = [] { return new GcsTestbench; }();
return environment;
}
Expand Down Expand Up @@ -159,9 +159,8 @@ class GcsIntegrationTest : public ::testing::Test {
std::string NotFoundObjectPath() { return PreexistingBucketPath() + "not-found"; }

GcsOptions TestGcsOptions() {
GcsOptions options;
auto options = GcsOptions::Anonymous();
options.endpoint_override = "127.0.0.1:" + Testbench()->port();
options.scheme = "http";
return options;
}

Expand Down Expand Up @@ -201,16 +200,63 @@ class GcsIntegrationTest : public ::testing::Test {
};

TEST(GcsFileSystem, OptionsCompare) {
GcsOptions a;
GcsOptions b;
auto a = GcsOptions::Anonymous();
auto b = a;
b.endpoint_override = "localhost:1234";
EXPECT_TRUE(a.Equals(a));
EXPECT_TRUE(b.Equals(b));
auto c = b;
c.scheme = "http";
c.scheme = "https";
EXPECT_FALSE(b.Equals(c));
}

TEST(GcsFileSystem, OptionsAnonymous) {
GcsOptions a = GcsOptions::Anonymous();
EXPECT_THAT(a.credentials, NotNull());
EXPECT_EQ(a.scheme, "http");
}

TEST(GcsFileSystem, OptionsAccessToken) {
auto a = GcsOptions::FromAccessToken(
"invalid-access-token-test-only",
std::chrono::system_clock::now() + std::chrono::minutes(5));
EXPECT_THAT(a.credentials, NotNull());
EXPECT_EQ(a.scheme, "https");
}

TEST(GcsFileSystem, OptionsImpersonateServiceAccount) {
auto base = GcsOptions::FromAccessToken(
"invalid-access-token-test-only",
std::chrono::system_clock::now() + std::chrono::minutes(5));
auto a = GcsOptions::FromImpersonatedServiceAccount(
*base.credentials, "invalid-sa-test-only@my-project.iam.gserviceaccount.com");
EXPECT_THAT(a.credentials, NotNull());
EXPECT_EQ(a.scheme, "https");
}

TEST(GcsFileSystem, OptionsServiceAccountCredentials) {
// While this service account key has the correct format, it cannot be used for
// authentication because the key has been deactivated on the server-side, *and* the
// account(s) involved are deleted *and* they are not the accounts or projects do not
// match its contents.
constexpr char kJsonKeyfileContents[] = R"""({
"type": "service_account",
"project_id": "foo-project",
"private_key_id": "a1a111aa1111a11a11a11aa111a111a1a1111111",
"private_key": "-----BEGIN PRIVATE KEY-----\nMIIEvQIBADANBgkqhkiG9w0BAQEFAASCBKcwggSjAgEAAoIBAQCltiF2oP3KJJ+S\ntTc1McylY+TuAi3AdohX7mmqIjd8a3eBYDHs7FlnUrFC4CRijCr0rUqYfg2pmk4a\n6TaKbQRAhWDJ7XD931g7EBvCtd8+JQBNWVKnP9ByJUaO0hWVniM50KTsWtyX3up/\nfS0W2R8Cyx4yvasE8QHH8gnNGtr94iiORDC7De2BwHi/iU8FxMVJAIyDLNfyk0hN\neheYKfIDBgJV2v6VaCOGWaZyEuD0FJ6wFeLybFBwibrLIBE5Y/StCrZoVZ5LocFP\nT4o8kT7bU6yonudSCyNMedYmqHj/iF8B2UN1WrYx8zvoDqZk0nxIglmEYKn/6U7U\ngyETGcW9AgMBAAECggEAC231vmkpwA7JG9UYbviVmSW79UecsLzsOAZnbtbn1VLT\nPg7sup7tprD/LXHoyIxK7S/jqINvPU65iuUhgCg3Rhz8+UiBhd0pCH/arlIdiPuD\n2xHpX8RIxAq6pGCsoPJ0kwkHSw8UTnxPV8ZCPSRyHV71oQHQgSl/WjNhRi6PQroB\nSqc/pS1m09cTwyKQIopBBVayRzmI2BtBxyhQp9I8t5b7PYkEZDQlbdq0j5Xipoov\n9EW0+Zvkh1FGNig8IJ9Wp+SZi3rd7KLpkyKPY7BK/g0nXBkDxn019cET0SdJOHQG\nDiHiv4yTRsDCHZhtEbAMKZEpku4WxtQ+JjR31l8ueQKBgQDkO2oC8gi6vQDcx/CX\nZ23x2ZUyar6i0BQ8eJFAEN+IiUapEeCVazuxJSt4RjYfwSa/p117jdZGEWD0GxMC\n+iAXlc5LlrrWs4MWUc0AHTgXna28/vii3ltcsI0AjWMqaybhBTTNbMFa2/fV2OX2\nUimuFyBWbzVc3Zb9KAG4Y7OmJQKBgQC5324IjXPq5oH8UWZTdJPuO2cgRsvKmR/r\n9zl4loRjkS7FiOMfzAgUiXfH9XCnvwXMqJpuMw2PEUjUT+OyWjJONEK4qGFJkbN5\n3ykc7p5V7iPPc7Zxj4mFvJ1xjkcj+i5LY8Me+gL5mGIrJ2j8hbuv7f+PWIauyjnp\nNx/0GVFRuQKBgGNT4D1L7LSokPmFIpYh811wHliE0Fa3TDdNGZnSPhaD9/aYyy78\nLkxYKuT7WY7UVvLN+gdNoVV5NsLGDa4cAV+CWPfYr5PFKGXMT/Wewcy1WOmJ5des\nAgMC6zq0TdYmMBN6WpKUpEnQtbmh3eMnuvADLJWxbH3wCkg+4xDGg2bpAoGAYRNk\nMGtQQzqoYNNSkfus1xuHPMA8508Z8O9pwKU795R3zQs1NAInpjI1sOVrNPD7Ymwc\nW7mmNzZbxycCUL/yzg1VW4P1a6sBBYGbw1SMtWxun4ZbnuvMc2CTCh+43/1l+FHe\nMmt46kq/2rH2jwx5feTbOE6P6PINVNRJh/9BDWECgYEAsCWcH9D3cI/QDeLG1ao7\nrE2NcknP8N783edM07Z/zxWsIsXhBPY3gjHVz2LDl+QHgPWhGML62M0ja/6SsJW3\nYvLLIc82V7eqcVJTZtaFkuht68qu/Jn1ezbzJMJ4YXDYo1+KFi+2CAGR06QILb+I\nlUtj+/nH3HDQjM4ltYfTPUg=\n-----END PRIVATE KEY-----\n",
"client_email": "foo-email@foo-project.iam.gserviceaccount.com",
"client_id": "100000000000000000001",
"auth_uri": "https://accounts.google.com/o/oauth2/auth",
"token_uri": "https://oauth2.googleapis.com/token",
"auth_provider_x509_cert_url": "https://www.googleapis.com/oauth2/v1/certs",
"client_x509_cert_url": "https://www.googleapis.com/robot/v1/metadata/x509/foo-email%40foo-project.iam.gserviceaccount.com"
})""";

auto a = GcsOptions::FromServiceAccountCredentials(kJsonKeyfileContents);
EXPECT_THAT(a.credentials, NotNull());
EXPECT_EQ(a.scheme, "https");
}

TEST(GcsFileSystem, ToArrowStatusOK) {
Status actual = internal::ToArrowStatus(google::cloud::Status());
EXPECT_TRUE(actual.ok());
Expand Down