From 7f0aee5b565a3450a151bed4a6e98e904e5ad826 Mon Sep 17 00:00:00 2001 From: Carlos O'Ryan Date: Mon, 13 Dec 2021 22:43:42 +0000 Subject: [PATCH 1/2] ARROW-15085: [C++] support credential types in GcsFileSystem --- cpp/src/arrow/filesystem/gcsfs.cc | 55 +++++++++++++++++++++++- cpp/src/arrow/filesystem/gcsfs.h | 57 +++++++++++++++++++++++++ cpp/src/arrow/filesystem/gcsfs_test.cc | 58 +++++++++++++++++++++++--- 3 files changed, 163 insertions(+), 7 deletions(-) diff --git a/cpp/src/arrow/filesystem/gcsfs.cc b/cpp/src/arrow/filesystem/gcsfs.cc index c45d58b5fdb..d55fed97d0e 100644 --- a/cpp/src/arrow/filesystem/gcsfs.cc +++ b/cpp/src/arrow/filesystem/gcsfs.cc @@ -267,6 +267,18 @@ google::cloud::Options AsGoogleCloudOptions(const GcsOptions& o) { return options; } +class GcsCredentialsProvider { + public: + explicit GcsCredentialsProvider(std::shared_ptr credentials) + : credentials_(std::move(credentials)) {} + ~GcsCredentialsProvider() = default; + + std::shared_ptr credentials() const { return credentials_; } + + private: + std::shared_ptr credentials_; +}; + class GcsFileSystem::Impl { public: explicit Impl(GcsOptions o) @@ -456,7 +468,48 @@ class GcsFileSystem::Impl { }; bool GcsOptions::Equals(const GcsOptions& other) const { - return endpoint_override == other.endpoint_override && scheme == other.scheme; + return credentials == other.credentials && + endpoint_override == other.endpoint_override && scheme == other.scheme; +} + +GcsOptions GcsOptions::Defaults() { + return GcsOptions{std::make_shared( + google::cloud::MakeGoogleDefaultCredentials()), + {}, + "https"}; +} + +GcsOptions GcsOptions::Anonymous() { + return GcsOptions{ + std::make_shared(google::cloud::MakeInsecureCredentials()), + {}, + "http"}; +} + +GcsOptions GcsOptions::AccessToken(const std::string& access_token, + std::chrono::system_clock::time_point expiration) { + return GcsOptions{ + std::make_shared( + google::cloud::MakeAccessTokenCredentials(access_token, expiration)), + {}, + "https"}; +} + +GcsOptions GcsOptions::ImpersonateServiceAccount( + const GcsCredentialsProvider& base_credentials, + const std::string& target_service_account) { + return GcsOptions{std::make_shared( + google::cloud::MakeImpersonateServiceAccountCredentials( + base_credentials.credentials(), target_service_account)), + {}, + "https"}; +} + +GcsOptions GcsOptions::ServiceAccountCredentials(const std::string& json_object) { + return GcsOptions{std::make_shared( + google::cloud::MakeServiceAccountCredentials(json_object)), + {}, + "https"}; } std::string GcsFileSystem::type_name() const { return "gcs"; } diff --git a/cpp/src/arrow/filesystem/gcsfs.h b/cpp/src/arrow/filesystem/gcsfs.h index 4e5173e6e3a..8694310c8a4 100644 --- a/cpp/src/arrow/filesystem/gcsfs.h +++ b/cpp/src/arrow/filesystem/gcsfs.h @@ -27,6 +27,7 @@ namespace arrow { namespace fs { class GcsFileSystem; struct GcsOptions; +class GcsCredentialsProvider; namespace internal { // TODO(ARROW-1231) - remove, and provide a public API (static GcsFileSystem::Make()). std::shared_ptr MakeGcsFileSystemForTest(const GcsOptions& options); @@ -34,10 +35,66 @@ std::shared_ptr MakeGcsFileSystemForTest(const GcsOptions& option /// Options for the GcsFileSystem implementation. struct ARROW_EXPORT GcsOptions { + std::shared_ptr credentials; + std::string endpoint_override; std::string scheme; bool Equals(const GcsOptions& other) const; + + /// \brief Initialize with Google Default Credentials + /// + /// Create options configured to use [Application Default Credentials][aip/4110]. The + /// details of this mechanism are too involved to describe here, but suffice is to say + /// that applications can override any defaults using an environment variable + /// (`GOOGLE_APPLICATION_CREDENTIALS`), and that the defaults work with most Google + /// Cloud Platform deployment environments (GCE, GKE, Cloud Run, etc.), and that have + /// the same behavior as the `gcloud` CLI tool on your workstation. + /// + /// \see https://cloud.google.com/docs/authentication + /// + /// [aip/4110]: https://google.aip.dev/auth/4110 + static GcsOptions Defaults(); + + /// \brief Initialize with anonymous credentials + static GcsOptions Anonymous(); + + /// \brief Initialize with access token + /// + /// These credentials are useful when using an out-of-band mechanism to fetch access + /// tokens. Note that access tokens are time limited, you will need to manually refresh + /// the tokens created by the out-of-band mechanism. + static GcsOptions AccessToken(const std::string& access_token, + std::chrono::system_clock::time_point expiration); + + /// \brief Initialize with service account impersonation + /// + /// Service account impersonation allows one principal (a user or service account) to + /// impersonate a service account. It requires that the calling principal has the + /// necessary permissions *on* the service account. + static GcsOptions ImpersonateServiceAccount( + const GcsCredentialsProvider& base_credentials, + const std::string& target_service_account); + + /// Creates service account credentials from a JSON object in string form. + /// + /// The @p json_object is expected to be in the format described by [aip/4112]. Such an + /// object contains the identity of a service account, as well as a private key that can + /// be used to sign tokens, showing the caller was holding the private key. + /// + /// In GCP one can create several "keys" for each service account, and these keys are + /// downloaded as a JSON "key file". The contents of such a file are in the format + /// required by this function. Remember that key files and their contents should be + /// treated as any other secret with security implications, think of them as passwords + /// (because they are!), don't store them or output them where unauthorized persons may + /// read them. + /// + /// Most applications should probably use default credentials, maybe pointing them to a + /// file with these contents. Using this function may be useful when the json object is + /// obtained from a Cloud Secret Manager or a similar service. + /// + /// [aip/4112]: https://google.aip.dev/auth/4112 + static GcsOptions ServiceAccountCredentials(const std::string& json_object); }; // - TODO(ARROW-1231) - review this documentation before closing the bug. diff --git a/cpp/src/arrow/filesystem/gcsfs_test.cc b/cpp/src/arrow/filesystem/gcsfs_test.cc index 3e4431cde81..f896ab918b9 100644 --- a/cpp/src/arrow/filesystem/gcsfs_test.cc +++ b/cpp/src/arrow/filesystem/gcsfs_test.cc @@ -112,7 +112,7 @@ class GcsTestbench : public ::testing::Environment { std::string error_; }; -static GcsTestbench* Testbench() { +GcsTestbench* Testbench() { static auto* const environment = [] { return new GcsTestbench; }(); return environment; } @@ -159,9 +159,8 @@ class GcsIntegrationTest : public ::testing::Test { std::string NotFoundObjectPath() { return PreexistingBucketPath() + "not-found"; } GcsOptions TestGcsOptions() { - GcsOptions options; + auto options = GcsOptions::Anonymous(); options.endpoint_override = "127.0.0.1:" + Testbench()->port(); - options.scheme = "http"; return options; } @@ -201,16 +200,63 @@ class GcsIntegrationTest : public ::testing::Test { }; TEST(GcsFileSystem, OptionsCompare) { - GcsOptions a; - GcsOptions b; + auto a = GcsOptions::Anonymous(); + auto b = a; b.endpoint_override = "localhost:1234"; EXPECT_TRUE(a.Equals(a)); EXPECT_TRUE(b.Equals(b)); auto c = b; - c.scheme = "http"; + c.scheme = "https"; EXPECT_FALSE(b.Equals(c)); } +TEST(GcsFileSystem, OptionsAnonymous) { + GcsOptions a = GcsOptions::Anonymous(); + EXPECT_THAT(a.credentials, NotNull()); + EXPECT_EQ(a.scheme, "http"); +} + +TEST(GcsFileSystem, OptionsAccessToken) { + auto a = + GcsOptions::AccessToken("invalid-access-token-test-only", + std::chrono::system_clock::now() + std::chrono::minutes(5)); + EXPECT_THAT(a.credentials, NotNull()); + EXPECT_EQ(a.scheme, "https"); +} + +TEST(GcsFileSystem, OptionsImpersonateServiceAccount) { + auto base = + GcsOptions::AccessToken("invalid-access-token-test-only", + std::chrono::system_clock::now() + std::chrono::minutes(5)); + auto a = GcsOptions::ImpersonateServiceAccount( + *base.credentials, "invalid-sa-test-only@my-project.iam.gserviceaccount.com"); + EXPECT_THAT(a.credentials, NotNull()); + EXPECT_EQ(a.scheme, "https"); +} + +TEST(GcsFileSystem, OptionsServiceAccountCredentials) { + // While this service account key has the correct format, it cannot be used for + // authentication because the key has been deactivated on the server-side, *and* the + // account(s) involved are deleted *and* they are not the accounts or projects do not + // match its contents. + constexpr char kJsonKeyfileContents[] = R"""({ + "type": "service_account", + "project_id": "foo-project", + "private_key_id": "a1a111aa1111a11a11a11aa111a111a1a1111111", + "private_key": "-----BEGIN PRIVATE KEY-----\nMIIEvQIBADANBgkqhkiG9w0BAQEFAASCBKcwggSjAgEAAoIBAQCltiF2oP3KJJ+S\ntTc1McylY+TuAi3AdohX7mmqIjd8a3eBYDHs7FlnUrFC4CRijCr0rUqYfg2pmk4a\n6TaKbQRAhWDJ7XD931g7EBvCtd8+JQBNWVKnP9ByJUaO0hWVniM50KTsWtyX3up/\nfS0W2R8Cyx4yvasE8QHH8gnNGtr94iiORDC7De2BwHi/iU8FxMVJAIyDLNfyk0hN\neheYKfIDBgJV2v6VaCOGWaZyEuD0FJ6wFeLybFBwibrLIBE5Y/StCrZoVZ5LocFP\nT4o8kT7bU6yonudSCyNMedYmqHj/iF8B2UN1WrYx8zvoDqZk0nxIglmEYKn/6U7U\ngyETGcW9AgMBAAECggEAC231vmkpwA7JG9UYbviVmSW79UecsLzsOAZnbtbn1VLT\nPg7sup7tprD/LXHoyIxK7S/jqINvPU65iuUhgCg3Rhz8+UiBhd0pCH/arlIdiPuD\n2xHpX8RIxAq6pGCsoPJ0kwkHSw8UTnxPV8ZCPSRyHV71oQHQgSl/WjNhRi6PQroB\nSqc/pS1m09cTwyKQIopBBVayRzmI2BtBxyhQp9I8t5b7PYkEZDQlbdq0j5Xipoov\n9EW0+Zvkh1FGNig8IJ9Wp+SZi3rd7KLpkyKPY7BK/g0nXBkDxn019cET0SdJOHQG\nDiHiv4yTRsDCHZhtEbAMKZEpku4WxtQ+JjR31l8ueQKBgQDkO2oC8gi6vQDcx/CX\nZ23x2ZUyar6i0BQ8eJFAEN+IiUapEeCVazuxJSt4RjYfwSa/p117jdZGEWD0GxMC\n+iAXlc5LlrrWs4MWUc0AHTgXna28/vii3ltcsI0AjWMqaybhBTTNbMFa2/fV2OX2\nUimuFyBWbzVc3Zb9KAG4Y7OmJQKBgQC5324IjXPq5oH8UWZTdJPuO2cgRsvKmR/r\n9zl4loRjkS7FiOMfzAgUiXfH9XCnvwXMqJpuMw2PEUjUT+OyWjJONEK4qGFJkbN5\n3ykc7p5V7iPPc7Zxj4mFvJ1xjkcj+i5LY8Me+gL5mGIrJ2j8hbuv7f+PWIauyjnp\nNx/0GVFRuQKBgGNT4D1L7LSokPmFIpYh811wHliE0Fa3TDdNGZnSPhaD9/aYyy78\nLkxYKuT7WY7UVvLN+gdNoVV5NsLGDa4cAV+CWPfYr5PFKGXMT/Wewcy1WOmJ5des\nAgMC6zq0TdYmMBN6WpKUpEnQtbmh3eMnuvADLJWxbH3wCkg+4xDGg2bpAoGAYRNk\nMGtQQzqoYNNSkfus1xuHPMA8508Z8O9pwKU795R3zQs1NAInpjI1sOVrNPD7Ymwc\nW7mmNzZbxycCUL/yzg1VW4P1a6sBBYGbw1SMtWxun4ZbnuvMc2CTCh+43/1l+FHe\nMmt46kq/2rH2jwx5feTbOE6P6PINVNRJh/9BDWECgYEAsCWcH9D3cI/QDeLG1ao7\nrE2NcknP8N783edM07Z/zxWsIsXhBPY3gjHVz2LDl+QHgPWhGML62M0ja/6SsJW3\nYvLLIc82V7eqcVJTZtaFkuht68qu/Jn1ezbzJMJ4YXDYo1+KFi+2CAGR06QILb+I\nlUtj+/nH3HDQjM4ltYfTPUg=\n-----END PRIVATE KEY-----\n", + "client_email": "foo-email@foo-project.iam.gserviceaccount.com", + "client_id": "100000000000000000001", + "auth_uri": "https://accounts.google.com/o/oauth2/auth", + "token_uri": "https://oauth2.googleapis.com/token", + "auth_provider_x509_cert_url": "https://www.googleapis.com/oauth2/v1/certs", + "client_x509_cert_url": "https://www.googleapis.com/robot/v1/metadata/x509/foo-email%40foo-project.iam.gserviceaccount.com" + })"""; + + auto a = GcsOptions::ServiceAccountCredentials(kJsonKeyfileContents); + EXPECT_THAT(a.credentials, NotNull()); + EXPECT_EQ(a.scheme, "https"); +} + TEST(GcsFileSystem, ToArrowStatusOK) { Status actual = internal::ToArrowStatus(google::cloud::Status()); EXPECT_TRUE(actual.ok()); From d0b3a3532039337efbdbfc9c8cc21a955cfa7bc2 Mon Sep 17 00:00:00 2001 From: Carlos O'Ryan Date: Tue, 14 Dec 2021 13:49:00 +0000 Subject: [PATCH 2/2] Address review comments --- cpp/src/arrow/filesystem/gcsfs.cc | 53 ++++++++++++-------------- cpp/src/arrow/filesystem/gcsfs.h | 15 ++++---- cpp/src/arrow/filesystem/gcsfs_test.cc | 16 ++++---- 3 files changed, 40 insertions(+), 44 deletions(-) diff --git a/cpp/src/arrow/filesystem/gcsfs.cc b/cpp/src/arrow/filesystem/gcsfs.cc index d55fed97d0e..b13b65ac48c 100644 --- a/cpp/src/arrow/filesystem/gcsfs.cc +++ b/cpp/src/arrow/filesystem/gcsfs.cc @@ -32,6 +32,13 @@ namespace arrow { namespace fs { +struct GcsCredentials { + explicit GcsCredentials(std::shared_ptr c) + : credentials(std::move(c)) {} + + std::shared_ptr credentials; +}; + namespace { namespace gcs = google::cloud::storage; @@ -247,8 +254,6 @@ class GcsRandomAccessFile : public arrow::io::RandomAccessFile { std::shared_ptr stream_; }; -} // namespace - google::cloud::Options AsGoogleCloudOptions(const GcsOptions& o) { auto options = google::cloud::Options{}; std::string scheme = o.scheme; @@ -264,20 +269,13 @@ google::cloud::Options AsGoogleCloudOptions(const GcsOptions& o) { if (!o.endpoint_override.empty()) { options.set(scheme + "://" + o.endpoint_override); } + if (o.credentials && o.credentials->credentials) { + options.set(o.credentials->credentials); + } return options; } -class GcsCredentialsProvider { - public: - explicit GcsCredentialsProvider(std::shared_ptr credentials) - : credentials_(std::move(credentials)) {} - ~GcsCredentialsProvider() = default; - - std::shared_ptr credentials() const { return credentials_; } - - private: - std::shared_ptr credentials_; -}; +} // namespace class GcsFileSystem::Impl { public: @@ -473,40 +471,39 @@ bool GcsOptions::Equals(const GcsOptions& other) const { } GcsOptions GcsOptions::Defaults() { - return GcsOptions{std::make_shared( - google::cloud::MakeGoogleDefaultCredentials()), - {}, - "https"}; + return GcsOptions{ + std::make_shared(google::cloud::MakeGoogleDefaultCredentials()), + {}, + "https"}; } GcsOptions GcsOptions::Anonymous() { return GcsOptions{ - std::make_shared(google::cloud::MakeInsecureCredentials()), + std::make_shared(google::cloud::MakeInsecureCredentials()), {}, "http"}; } -GcsOptions GcsOptions::AccessToken(const std::string& access_token, - std::chrono::system_clock::time_point expiration) { +GcsOptions GcsOptions::FromAccessToken(const std::string& access_token, + std::chrono::system_clock::time_point expiration) { return GcsOptions{ - std::make_shared( + std::make_shared( google::cloud::MakeAccessTokenCredentials(access_token, expiration)), {}, "https"}; } -GcsOptions GcsOptions::ImpersonateServiceAccount( - const GcsCredentialsProvider& base_credentials, - const std::string& target_service_account) { - return GcsOptions{std::make_shared( +GcsOptions GcsOptions::FromImpersonatedServiceAccount( + const GcsCredentials& base_credentials, const std::string& target_service_account) { + return GcsOptions{std::make_shared( google::cloud::MakeImpersonateServiceAccountCredentials( - base_credentials.credentials(), target_service_account)), + base_credentials.credentials, target_service_account)), {}, "https"}; } -GcsOptions GcsOptions::ServiceAccountCredentials(const std::string& json_object) { - return GcsOptions{std::make_shared( +GcsOptions GcsOptions::FromServiceAccountCredentials(const std::string& json_object) { + return GcsOptions{std::make_shared( google::cloud::MakeServiceAccountCredentials(json_object)), {}, "https"}; diff --git a/cpp/src/arrow/filesystem/gcsfs.h b/cpp/src/arrow/filesystem/gcsfs.h index 8694310c8a4..029660e512e 100644 --- a/cpp/src/arrow/filesystem/gcsfs.h +++ b/cpp/src/arrow/filesystem/gcsfs.h @@ -27,7 +27,7 @@ namespace arrow { namespace fs { class GcsFileSystem; struct GcsOptions; -class GcsCredentialsProvider; +struct GcsCredentials; namespace internal { // TODO(ARROW-1231) - remove, and provide a public API (static GcsFileSystem::Make()). std::shared_ptr MakeGcsFileSystemForTest(const GcsOptions& options); @@ -35,7 +35,7 @@ std::shared_ptr MakeGcsFileSystemForTest(const GcsOptions& option /// Options for the GcsFileSystem implementation. struct ARROW_EXPORT GcsOptions { - std::shared_ptr credentials; + std::shared_ptr credentials; std::string endpoint_override; std::string scheme; @@ -64,17 +64,16 @@ struct ARROW_EXPORT GcsOptions { /// These credentials are useful when using an out-of-band mechanism to fetch access /// tokens. Note that access tokens are time limited, you will need to manually refresh /// the tokens created by the out-of-band mechanism. - static GcsOptions AccessToken(const std::string& access_token, - std::chrono::system_clock::time_point expiration); + static GcsOptions FromAccessToken(const std::string& access_token, + std::chrono::system_clock::time_point expiration); /// \brief Initialize with service account impersonation /// /// Service account impersonation allows one principal (a user or service account) to /// impersonate a service account. It requires that the calling principal has the /// necessary permissions *on* the service account. - static GcsOptions ImpersonateServiceAccount( - const GcsCredentialsProvider& base_credentials, - const std::string& target_service_account); + static GcsOptions FromImpersonatedServiceAccount( + const GcsCredentials& base_credentials, const std::string& target_service_account); /// Creates service account credentials from a JSON object in string form. /// @@ -94,7 +93,7 @@ struct ARROW_EXPORT GcsOptions { /// obtained from a Cloud Secret Manager or a similar service. /// /// [aip/4112]: https://google.aip.dev/auth/4112 - static GcsOptions ServiceAccountCredentials(const std::string& json_object); + static GcsOptions FromServiceAccountCredentials(const std::string& json_object); }; // - TODO(ARROW-1231) - review this documentation before closing the bug. diff --git a/cpp/src/arrow/filesystem/gcsfs_test.cc b/cpp/src/arrow/filesystem/gcsfs_test.cc index f896ab918b9..85ae43aa00a 100644 --- a/cpp/src/arrow/filesystem/gcsfs_test.cc +++ b/cpp/src/arrow/filesystem/gcsfs_test.cc @@ -217,18 +217,18 @@ TEST(GcsFileSystem, OptionsAnonymous) { } TEST(GcsFileSystem, OptionsAccessToken) { - auto a = - GcsOptions::AccessToken("invalid-access-token-test-only", - std::chrono::system_clock::now() + std::chrono::minutes(5)); + auto a = GcsOptions::FromAccessToken( + "invalid-access-token-test-only", + std::chrono::system_clock::now() + std::chrono::minutes(5)); EXPECT_THAT(a.credentials, NotNull()); EXPECT_EQ(a.scheme, "https"); } TEST(GcsFileSystem, OptionsImpersonateServiceAccount) { - auto base = - GcsOptions::AccessToken("invalid-access-token-test-only", - std::chrono::system_clock::now() + std::chrono::minutes(5)); - auto a = GcsOptions::ImpersonateServiceAccount( + auto base = GcsOptions::FromAccessToken( + "invalid-access-token-test-only", + std::chrono::system_clock::now() + std::chrono::minutes(5)); + auto a = GcsOptions::FromImpersonatedServiceAccount( *base.credentials, "invalid-sa-test-only@my-project.iam.gserviceaccount.com"); EXPECT_THAT(a.credentials, NotNull()); EXPECT_EQ(a.scheme, "https"); @@ -252,7 +252,7 @@ TEST(GcsFileSystem, OptionsServiceAccountCredentials) { "client_x509_cert_url": "https://www.googleapis.com/robot/v1/metadata/x509/foo-email%40foo-project.iam.gserviceaccount.com" })"""; - auto a = GcsOptions::ServiceAccountCredentials(kJsonKeyfileContents); + auto a = GcsOptions::FromServiceAccountCredentials(kJsonKeyfileContents); EXPECT_THAT(a.credentials, NotNull()); EXPECT_EQ(a.scheme, "https"); }