diff --git a/google/cloud/storage/BUILD.bazel b/google/cloud/storage/BUILD.bazel index c4edeae27123c..949c3f6286c15 100644 --- a/google/cloud/storage/BUILD.bazel +++ b/google/cloud/storage/BUILD.bazel @@ -76,6 +76,7 @@ cc_library( "@com_github_google_crc32c//:crc32c", "@com_github_nlohmann_json//:nlohmann_json", "@com_google_absl//absl/memory", + "@com_google_absl//absl/strings:cord", "@com_google_absl//absl/strings:str_format", "@com_google_absl//absl/time", "@com_google_absl//absl/types:span", @@ -165,3 +166,16 @@ load(":storage_client_grpc_unit_tests.bzl", "storage_client_grpc_unit_tests") "@com_google_googletest//:gtest_main", ], ) for test in storage_client_grpc_unit_tests] + +load(":storage_client_benchmarks.bzl", "storage_client_benchmarks") + +[cc_binary( + name = benchmark.replace("/", "_").replace(".cc", ""), + srcs = [benchmark], + tags = ["benchmark"], + deps = [ + ":google_cloud_cpp_storage", + "//:common", + "@com_google_benchmark//:benchmark_main", + ], +) for benchmark in storage_client_benchmarks] diff --git a/google/cloud/storage/google_cloud_cpp_storage.bzl b/google/cloud/storage/google_cloud_cpp_storage.bzl index 709f7ba86e9e2..7dd33c1735cb9 100644 --- a/google/cloud/storage/google_cloud_cpp_storage.bzl +++ b/google/cloud/storage/google_cloud_cpp_storage.bzl @@ -54,6 +54,7 @@ google_cloud_cpp_storage_hdrs = [ "internal/complex_option.h", "internal/compute_engine_util.h", "internal/const_buffer.h", + "internal/crc32c.h", "internal/curl_client.h", "internal/curl_download_request.h", "internal/curl_handle.h", @@ -176,6 +177,7 @@ google_cloud_cpp_storage_srcs = [ "internal/bucket_requests.cc", "internal/compute_engine_util.cc", "internal/const_buffer.cc", + "internal/crc32c.cc", "internal/curl_client.cc", "internal/curl_download_request.cc", "internal/curl_handle.cc", diff --git a/google/cloud/storage/google_cloud_cpp_storage.cmake b/google/cloud/storage/google_cloud_cpp_storage.cmake index 2743a21102e0f..c754a9ab9bc89 100644 --- a/google/cloud/storage/google_cloud_cpp_storage.cmake +++ b/google/cloud/storage/google_cloud_cpp_storage.cmake @@ -81,6 +81,8 @@ add_library( internal/compute_engine_util.h internal/const_buffer.cc internal/const_buffer.h + internal/crc32c.cc + internal/crc32c.h internal/curl_client.cc internal/curl_client.h internal/curl_download_request.cc @@ -254,7 +256,8 @@ add_library( well_known_parameters.h) target_link_libraries( google_cloud_cpp_storage - PUBLIC absl::memory + PUBLIC absl::cord + absl::memory absl::strings absl::str_format absl::time @@ -353,6 +356,7 @@ string( "google_cloud_cpp_common" " google_cloud_cpp_rest_internal" " libcurl openssl" + " absl_cord" " absl_memory" " absl_strings" " absl_str_format" @@ -469,6 +473,7 @@ if (BUILD_TESTING) internal/complex_option_test.cc internal/compute_engine_util_test.cc internal/const_buffer_test.cc + internal/crc32c_test.cc internal/curl_client_test.cc internal/curl_download_request_test.cc internal/curl_handle_test.cc @@ -559,6 +564,26 @@ if (BUILD_TESTING) export_list_to_bazel("storage_client_unit_tests.bzl" "storage_client_unit_tests" YEAR "2018") + include(FindBenchmarkWithWorkarounds) + + set(storage_client_benchmarks # cmake-format: sort + internal/crc32c_benchmark.cc) + + # Export the list of benchmarks to a .bzl file so we do not need to maintain + # the list in two places. + export_list_to_bazel("storage_client_benchmarks.bzl" + "storage_client_benchmarks" YEAR "2023") + + # Generate a target for each benchmark. + foreach (fname IN LISTS storage_client_benchmarks) + google_cloud_cpp_add_executable(target "storage" "${fname}") + add_test(NAME ${target} COMMAND ${target}) + target_link_libraries( + ${target} PRIVATE google-cloud-cpp::storage storage_client_testing + benchmark::benchmark_main) + google_cloud_cpp_add_common_options(${target}) + endforeach () + add_subdirectory(tests) add_subdirectory(benchmarks) endif () diff --git a/google/cloud/storage/internal/crc32c.cc b/google/cloud/storage/internal/crc32c.cc new file mode 100644 index 0000000000000..57c16dfe38e20 --- /dev/null +++ b/google/cloud/storage/internal/crc32c.cc @@ -0,0 +1,95 @@ +// Copyright 2023 Google LLC +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// https://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "google/cloud/storage/internal/crc32c.h" +#include "absl/base/config.h" +#if defined(ABSL_LTS_RELEASE_VERSION) && ABSL_LTS_RELEASE_VERSION >= 20230125 +#include "absl/crc/crc32c.h" +#define GOOGLE_CLOUD_CPP_USE_ABSL_CRC32C 1 +#else +#define GOOGLE_CLOUD_CPP_USE_ABSL_CRC32C 0 +#endif // ABSL_LTS_RELEASE_VERSION +#include + +namespace google { +namespace cloud { +namespace storage_internal { +GOOGLE_CLOUD_CPP_INLINE_NAMESPACE_BEGIN + +std::uint32_t ExtendCrc32c(std::uint32_t crc, absl::string_view data) { + return crc32c::Extend(crc, reinterpret_cast(data.data()), + data.size()); +} + +std::uint32_t ExtendCrc32c(std::uint32_t crc, + storage::internal::ConstBufferSequence const& data) { + for (auto const& b : data) { + crc = ExtendCrc32c(crc, absl::string_view{b.data(), b.size()}); + } + return crc; +} + +std::uint32_t ExtendCrc32c(std::uint32_t crc, absl::Cord const& data) { + for (auto i = data.chunk_begin(); i != data.chunk_end(); ++i) { + crc = ExtendCrc32c(crc, *i); + } + return crc; +} + +#if GOOGLE_CLOUD_CPP_USE_ABSL_CRC32C + +std::uint32_t ExtendCrc32c(std::uint32_t crc, absl::string_view data, + std::uint32_t data_crc) { + return static_cast(absl::ConcatCrc32c( + absl::crc32c_t{crc}, absl::crc32c_t{data_crc}, data.size())); +} + +std::uint32_t ExtendCrc32c(std::uint32_t crc, + storage::internal::ConstBufferSequence const& data, + std::uint32_t data_crc) { + auto const size = storage::internal::TotalBytes(data); + return static_cast( + absl::ConcatCrc32c(absl::crc32c_t{crc}, absl::crc32c_t{data_crc}, size)); +} + +std::uint32_t ExtendCrc32c(std::uint32_t crc, absl::Cord const& data, + std::uint32_t data_crc) { + return static_cast(absl::ConcatCrc32c( + absl::crc32c_t{crc}, absl::crc32c_t{data_crc}, data.size())); +} + +#else + +std::uint32_t ExtendCrc32c(std::uint32_t crc, absl::string_view data, + std::uint32_t /*data_crc*/) { + return ExtendCrc32c(crc, data); +} + +std::uint32_t ExtendCrc32c(std::uint32_t crc, + storage::internal::ConstBufferSequence const& data, + std::uint32_t /*data_crc*/) { + return ExtendCrc32c(crc, data); +} + +std::uint32_t ExtendCrc32c(std::uint32_t crc, absl::Cord const& data, + std::uint32_t /*data_crc*/) { + return ExtendCrc32c(crc, data); +} + +#endif // GOOGLE_CLOUD_CPP_USE_ABSL_CRC32C + +GOOGLE_CLOUD_CPP_INLINE_NAMESPACE_END +} // namespace storage_internal +} // namespace cloud +} // namespace google diff --git a/google/cloud/storage/internal/crc32c.h b/google/cloud/storage/internal/crc32c.h new file mode 100644 index 0000000000000..f4ca5ecf5622a --- /dev/null +++ b/google/cloud/storage/internal/crc32c.h @@ -0,0 +1,60 @@ +// Copyright 2023 Google LLC +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// https://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#ifndef GOOGLE_CLOUD_CPP_GOOGLE_CLOUD_STORAGE_INTERNAL_CRC32C_H +#define GOOGLE_CLOUD_CPP_GOOGLE_CLOUD_STORAGE_INTERNAL_CRC32C_H + +#include "google/cloud/storage/internal/const_buffer.h" +#include "google/cloud/storage/version.h" +#include "absl/strings/cord.h" +#include "absl/strings/string_view.h" +#include + +namespace google { +namespace cloud { +namespace storage_internal { +GOOGLE_CLOUD_CPP_INLINE_NAMESPACE_BEGIN + +std::uint32_t ExtendCrc32c(std::uint32_t crc, absl::string_view data); +std::uint32_t ExtendCrc32c(std::uint32_t crc, + storage::internal::ConstBufferSequence const& data); +std::uint32_t ExtendCrc32c(std::uint32_t crc, absl::Cord const& data); + +std::uint32_t ExtendCrc32c(std::uint32_t crc, absl::string_view data, + std::uint32_t data_crc); +std::uint32_t ExtendCrc32c(std::uint32_t crc, + storage::internal::ConstBufferSequence const& data, + std::uint32_t data_crc); +std::uint32_t ExtendCrc32c(std::uint32_t crc, absl::Cord const& data, + std::uint32_t data_crc); + +inline std::uint32_t Crc32c(absl::string_view data) { + return ExtendCrc32c(0, data); +} + +inline std::uint32_t Crc32c( + storage::internal::ConstBufferSequence const& data) { + return ExtendCrc32c(0, data); +} + +inline std::uint32_t Crc32c(absl::Cord const& data) { + return ExtendCrc32c(0, data); +} + +GOOGLE_CLOUD_CPP_INLINE_NAMESPACE_END +} // namespace storage_internal +} // namespace cloud +} // namespace google + +#endif // GOOGLE_CLOUD_CPP_GOOGLE_CLOUD_STORAGE_INTERNAL_CRC32C_H diff --git a/google/cloud/storage/internal/crc32c_benchmark.cc b/google/cloud/storage/internal/crc32c_benchmark.cc new file mode 100644 index 0000000000000..7d04867259492 --- /dev/null +++ b/google/cloud/storage/internal/crc32c_benchmark.cc @@ -0,0 +1,99 @@ +// Copyright 2023 Google LLC +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// https://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "google/cloud/storage/internal/crc32c.h" +#include +#include + +namespace google { +namespace cloud { +namespace storage_internal { +GOOGLE_CLOUD_CPP_INLINE_NAMESPACE_BEGIN +namespace { + +// Run on (128 X 2250 MHz CPU s) +// CPU Caches: +// L1 Data 32 KiB (x64) +// L1 Instruction 32 KiB (x64) +// L2 Unified 512 KiB (x64) +// L3 Unified 16384 KiB (x16) +// Load Average: 1.88, 2.61, 6.87 +// ---------------------------------------------------------------------- +// Benchmark Time CPU Iterations +// ---------------------------------------------------------------------- +// BM_Crc32cDuplicateNonAbseil 25520759 ns 25520833 ns 28 +// BM_Crc32cDuplicate 24168074 ns 24168122 ns 28 +// BM_Crc32cConcat 12213494 ns 12213077 ns 57 + +auto constexpr kMessage = 2 * 1024 * std::size_t{1024}; +auto constexpr kWriteSize = 16 * kMessage; +auto constexpr kUploadSize = 8 * kWriteSize; + +void BM_Crc32cDuplicateNonAbseil(benchmark::State& state) { + auto buffer = std::string(kWriteSize, '0'); + auto crc = std::uint32_t{0}; + for (auto _ : state) { + for (std::size_t offset = 0; offset < kUploadSize; offset += kWriteSize) { + for (std::size_t m = 0; m < kWriteSize; m += kMessage) { + auto w = absl::string_view{buffer}.substr(m, kMessage); + auto c = crc32c::Crc32c(w.data(), w.size()); + benchmark::DoNotOptimize(c); + } + crc = crc32c::Extend(crc, + reinterpret_cast(buffer.data()), + buffer.size()); + } + } + benchmark::DoNotOptimize(crc); +} +BENCHMARK(BM_Crc32cDuplicateNonAbseil); + +void BM_Crc32cDuplicate(benchmark::State& state) { + auto buffer = std::string(kWriteSize, '0'); + auto crc = std::uint32_t{0}; + for (auto _ : state) { + for (std::size_t offset = 0; offset < kUploadSize; offset += kWriteSize) { + for (std::size_t m = 0; m < kWriteSize; m += kMessage) { + auto w = absl::string_view{buffer}.substr(m, kMessage); + auto c = Crc32c(w); + benchmark::DoNotOptimize(c); + } + crc = ExtendCrc32c(crc, buffer); + } + } + benchmark::DoNotOptimize(crc); +} +BENCHMARK(BM_Crc32cDuplicate); + +void BM_Crc32cConcat(benchmark::State& state) { + auto buffer = std::string(kWriteSize, '0'); + auto crc = std::uint32_t{0}; + for (auto _ : state) { + for (std::size_t offset = 0; offset < kUploadSize; offset += kWriteSize) { + for (std::size_t m = 0; m < kWriteSize; m += kMessage) { + auto w = absl::string_view{buffer}.substr(m, kMessage); + auto c = Crc32c(w); + crc = ExtendCrc32c(crc, w, c); + } + } + } + benchmark::DoNotOptimize(crc); +} +BENCHMARK(BM_Crc32cConcat); + +} // namespace +GOOGLE_CLOUD_CPP_INLINE_NAMESPACE_END +} // namespace storage_internal +} // namespace cloud +} // namespace google diff --git a/google/cloud/storage/internal/crc32c_test.cc b/google/cloud/storage/internal/crc32c_test.cc new file mode 100644 index 0000000000000..903b08339c283 --- /dev/null +++ b/google/cloud/storage/internal/crc32c_test.cc @@ -0,0 +1,150 @@ +// Copyright 2023 Google LLC +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// https://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "google/cloud/storage/internal/crc32c.h" +#include +#include + +namespace google { +namespace cloud { +namespace storage_internal { +GOOGLE_CLOUD_CPP_INLINE_NAMESPACE_BEGIN +namespace { + +using ::google::cloud::storage::internal::ConstBuffer; +using ::google::cloud::storage::internal::ConstBufferSequence; + +TEST(Crc32c, Empty) { + auto const expected = std::uint32_t{0}; + EXPECT_EQ(expected, Crc32c(absl::string_view{})); + EXPECT_EQ(expected, Crc32c(absl::Cord{})); + EXPECT_EQ(expected, Crc32c(ConstBufferSequence{})); + EXPECT_EQ(expected, + Crc32c(ConstBufferSequence{ConstBuffer{}, ConstBuffer{}})); +} + +TEST(Crc32c, Quick) { + auto const expected = std::uint32_t{0x22620404}; + auto const input = std::string("The quick brown fox jumps over the lazy dog"); + EXPECT_EQ(expected, Crc32c(absl::string_view(input))); + EXPECT_EQ(expected, Crc32c(absl::Cord(input))); + EXPECT_EQ(expected, Crc32c(ConstBufferSequence{ConstBuffer(input)})); +} + +TEST(Crc32c, ExtendNotPrecomputedStringView) { + auto const expected = std::uint32_t{0x22620404}; + std::vector const inputs{"The", " quick", " brown", + " fox", " jumps", " over", + " the", " lazy", " dog"}; + absl::Cord data; + for (auto const& input : inputs) { + data.Append(input); + } + EXPECT_EQ(expected, Crc32c(data)); +} + +TEST(Crc32c, ExtendNotPrecomputedCord) { + auto const expected = std::uint32_t{0x22620404}; + std::vector const inputs{"The", " quick", " brown", + " fox", " jumps", " over", + " the", " lazy", " dog"}; + auto crc = std::uint32_t{0}; + for (auto const& input : inputs) { + crc = ExtendCrc32c(crc, absl::Cord{input}); + } + EXPECT_EQ(expected, crc); +} + +TEST(Crc32c, ExtendNotPrecomputedConstBuffer) { + auto const expected = std::uint32_t{0x22620404}; + std::vector const inputs{"The", " quick", " brown", + " fox", " jumps", " over", + " the", " lazy", " dog"}; + auto crc = std::uint32_t{0}; + for (auto const& input : inputs) { + crc = ExtendCrc32c(crc, {ConstBuffer{input}}); + } + EXPECT_EQ(expected, crc); +} + +TEST(Crc32c, ExtendNotPrecomputedConstBufferFull) { + auto const expected = std::uint32_t{0x22620404}; + std::vector const inputs{"The", " quick", " brown", + " fox", " jumps", " over", + " the", " lazy", " dog"}; + ConstBufferSequence payload(inputs.size()); + std::transform(inputs.begin(), inputs.end(), payload.begin(), + [](auto const& s) { return ConstBuffer(s); }); + auto crc = std::uint32_t{0}; + crc = ExtendCrc32c(crc, payload); + EXPECT_EQ(expected, crc); +} + +TEST(Crc32c, ExtendPrecomputedStringView) { + auto const expected = std::uint32_t{0x22620404}; + std::vector const inputs{"The", " quick", " brown", + " fox", " jumps", " over", + " the", " lazy", " dog"}; + auto crc = std::uint32_t{0}; + for (auto const& input : inputs) { + auto const input_crc = Crc32c(input); + crc = ExtendCrc32c(crc, absl::string_view{input}, input_crc); + } + EXPECT_EQ(expected, crc); +} + +TEST(Crc32c, ExtendPrecomputedConstBuffer) { + auto const expected = std::uint32_t{0x22620404}; + std::vector const inputs{"The", " quick", " brown", + " fox", " jumps", " over", + " the", " lazy", " dog"}; + auto crc = std::uint32_t{0}; + for (auto const& input : inputs) { + auto const input_crc = Crc32c(input); + crc = ExtendCrc32c(crc, {ConstBuffer{input}}, input_crc); + } + EXPECT_EQ(expected, crc); +} + +TEST(Crc32c, ExtendPrecomputedConstBufferFull) { + auto const expected = std::uint32_t{0x22620404}; + std::vector const inputs{"The", " quick", " brown", + " fox", " jumps", " over", + " the", " lazy", " dog"}; + ConstBufferSequence payload(inputs.size()); + std::transform(inputs.begin(), inputs.end(), payload.begin(), + [](auto const& s) { return ConstBuffer(s); }); + auto crc = std::uint32_t{0}; + crc = ExtendCrc32c(crc, payload, expected); + EXPECT_EQ(expected, crc); +} + +TEST(Crc32c, ExtendPrecomputedCord) { + auto const expected = std::uint32_t{0x22620404}; + std::vector const inputs{"The", " quick", " brown", + " fox", " jumps", " over", + " the", " lazy", " dog"}; + auto crc = std::uint32_t{0}; + for (auto const& input : inputs) { + auto const input_crc = Crc32c(input); + crc = ExtendCrc32c(crc, absl::Cord{input}, input_crc); + } + EXPECT_EQ(expected, crc); +} + +} // namespace +GOOGLE_CLOUD_CPP_INLINE_NAMESPACE_END +} // namespace storage_internal +} // namespace cloud +} // namespace google diff --git a/google/cloud/storage/storage_client_benchmarks.bzl b/google/cloud/storage/storage_client_benchmarks.bzl new file mode 100644 index 0000000000000..ed6c9a6c85b0d --- /dev/null +++ b/google/cloud/storage/storage_client_benchmarks.bzl @@ -0,0 +1,21 @@ +# Copyright 2023 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# https://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# +# DO NOT EDIT -- GENERATED BY CMake -- Change the CMakeLists.txt file if needed + +"""Automatically generated unit tests list - DO NOT EDIT.""" + +storage_client_benchmarks = [ + "internal/crc32c_benchmark.cc", +] diff --git a/google/cloud/storage/storage_client_unit_tests.bzl b/google/cloud/storage/storage_client_unit_tests.bzl index dff1c990efb7d..7c6d2422f4974 100644 --- a/google/cloud/storage/storage_client_unit_tests.bzl +++ b/google/cloud/storage/storage_client_unit_tests.bzl @@ -48,6 +48,7 @@ storage_client_unit_tests = [ "internal/complex_option_test.cc", "internal/compute_engine_util_test.cc", "internal/const_buffer_test.cc", + "internal/crc32c_test.cc", "internal/curl_client_test.cc", "internal/curl_download_request_test.cc", "internal/curl_handle_test.cc",