From 541c1d6173f974d8354991d9d4d9cf9ee3d7117a Mon Sep 17 00:00:00 2001 From: Carlos O'Ryan Date: Fri, 17 Mar 2023 13:36:48 +0000 Subject: [PATCH 1/3] impl(storage): new helpers for CRC32C computation I am adding new functions to compute CRC32C checksums. When using recent versions of Abseil (>= 20230125) these functions can concatenate the checksum of two buffers. I am planning to use this in the GCS+gRPC plugin, where each `Write()` message includes a CRC32C and we also need to send the end-to-end checksum of all the `Write()` messages. --- google/cloud/storage/BUILD.bazel | 14 ++ .../storage/google_cloud_cpp_storage.bzl | 2 + .../storage/google_cloud_cpp_storage.cmake | 27 +++- google/cloud/storage/internal/crc32c.cc | 117 ++++++++++++++ google/cloud/storage/internal/crc32c.h | 60 +++++++ .../storage/internal/crc32c_benchmark.cc | 78 +++++++++ google/cloud/storage/internal/crc32c_test.cc | 150 ++++++++++++++++++ .../storage/storage_client_benchmarks.bzl | 21 +++ .../storage/storage_client_unit_tests.bzl | 1 + 9 files changed, 469 insertions(+), 1 deletion(-) create mode 100644 google/cloud/storage/internal/crc32c.cc create mode 100644 google/cloud/storage/internal/crc32c.h create mode 100644 google/cloud/storage/internal/crc32c_benchmark.cc create mode 100644 google/cloud/storage/internal/crc32c_test.cc create mode 100644 google/cloud/storage/storage_client_benchmarks.bzl diff --git a/google/cloud/storage/BUILD.bazel b/google/cloud/storage/BUILD.bazel index c4edeae27123c..949c3f6286c15 100644 --- a/google/cloud/storage/BUILD.bazel +++ b/google/cloud/storage/BUILD.bazel @@ -76,6 +76,7 @@ cc_library( "@com_github_google_crc32c//:crc32c", "@com_github_nlohmann_json//:nlohmann_json", "@com_google_absl//absl/memory", + "@com_google_absl//absl/strings:cord", "@com_google_absl//absl/strings:str_format", "@com_google_absl//absl/time", "@com_google_absl//absl/types:span", @@ -165,3 +166,16 @@ load(":storage_client_grpc_unit_tests.bzl", "storage_client_grpc_unit_tests") "@com_google_googletest//:gtest_main", ], ) for test in storage_client_grpc_unit_tests] + +load(":storage_client_benchmarks.bzl", "storage_client_benchmarks") + +[cc_binary( + name = benchmark.replace("/", "_").replace(".cc", ""), + srcs = [benchmark], + tags = ["benchmark"], + deps = [ + ":google_cloud_cpp_storage", + "//:common", + "@com_google_benchmark//:benchmark_main", + ], +) for benchmark in storage_client_benchmarks] diff --git a/google/cloud/storage/google_cloud_cpp_storage.bzl b/google/cloud/storage/google_cloud_cpp_storage.bzl index 709f7ba86e9e2..7dd33c1735cb9 100644 --- a/google/cloud/storage/google_cloud_cpp_storage.bzl +++ b/google/cloud/storage/google_cloud_cpp_storage.bzl @@ -54,6 +54,7 @@ google_cloud_cpp_storage_hdrs = [ "internal/complex_option.h", "internal/compute_engine_util.h", "internal/const_buffer.h", + "internal/crc32c.h", "internal/curl_client.h", "internal/curl_download_request.h", "internal/curl_handle.h", @@ -176,6 +177,7 @@ google_cloud_cpp_storage_srcs = [ "internal/bucket_requests.cc", "internal/compute_engine_util.cc", "internal/const_buffer.cc", + "internal/crc32c.cc", "internal/curl_client.cc", "internal/curl_download_request.cc", "internal/curl_handle.cc", diff --git a/google/cloud/storage/google_cloud_cpp_storage.cmake b/google/cloud/storage/google_cloud_cpp_storage.cmake index 2743a21102e0f..c754a9ab9bc89 100644 --- a/google/cloud/storage/google_cloud_cpp_storage.cmake +++ b/google/cloud/storage/google_cloud_cpp_storage.cmake @@ -81,6 +81,8 @@ add_library( internal/compute_engine_util.h internal/const_buffer.cc internal/const_buffer.h + internal/crc32c.cc + internal/crc32c.h internal/curl_client.cc internal/curl_client.h internal/curl_download_request.cc @@ -254,7 +256,8 @@ add_library( well_known_parameters.h) target_link_libraries( google_cloud_cpp_storage - PUBLIC absl::memory + PUBLIC absl::cord + absl::memory absl::strings absl::str_format absl::time @@ -353,6 +356,7 @@ string( "google_cloud_cpp_common" " google_cloud_cpp_rest_internal" " libcurl openssl" + " absl_cord" " absl_memory" " absl_strings" " absl_str_format" @@ -469,6 +473,7 @@ if (BUILD_TESTING) internal/complex_option_test.cc internal/compute_engine_util_test.cc internal/const_buffer_test.cc + internal/crc32c_test.cc internal/curl_client_test.cc internal/curl_download_request_test.cc internal/curl_handle_test.cc @@ -559,6 +564,26 @@ if (BUILD_TESTING) export_list_to_bazel("storage_client_unit_tests.bzl" "storage_client_unit_tests" YEAR "2018") + include(FindBenchmarkWithWorkarounds) + + set(storage_client_benchmarks # cmake-format: sort + internal/crc32c_benchmark.cc) + + # Export the list of benchmarks to a .bzl file so we do not need to maintain + # the list in two places. + export_list_to_bazel("storage_client_benchmarks.bzl" + "storage_client_benchmarks" YEAR "2023") + + # Generate a target for each benchmark. + foreach (fname IN LISTS storage_client_benchmarks) + google_cloud_cpp_add_executable(target "storage" "${fname}") + add_test(NAME ${target} COMMAND ${target}) + target_link_libraries( + ${target} PRIVATE google-cloud-cpp::storage storage_client_testing + benchmark::benchmark_main) + google_cloud_cpp_add_common_options(${target}) + endforeach () + add_subdirectory(tests) add_subdirectory(benchmarks) endif () diff --git a/google/cloud/storage/internal/crc32c.cc b/google/cloud/storage/internal/crc32c.cc new file mode 100644 index 0000000000000..905fe4c4535bd --- /dev/null +++ b/google/cloud/storage/internal/crc32c.cc @@ -0,0 +1,117 @@ +// Copyright 2023 Google LLC +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// https://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "google/cloud/storage/internal/crc32c.h" +#include "absl/base/config.h" +#if defined(ABSL_LTS_RELEASE_VERSION) && ABSL_LTS_RELEASE_VERSION >= 20230125 +#include "absl/crc/crc32c.h" +#define GOOGLE_CLOUD_CPP_USE_ABSL_CRC32C 1 +#else +#include +#define GOOGLE_CLOUD_CPP_USE_ABSL_CRC32C 0 +#endif // ABSL_LTS_RELEASE_VERSION + +namespace google { +namespace cloud { +namespace storage_internal { +GOOGLE_CLOUD_CPP_INLINE_NAMESPACE_BEGIN + +#if GOOGLE_CLOUD_CPP_USE_ABSL_CRC32C + +std::uint32_t ExtendCrc32c(std::uint32_t crc, absl::string_view data) { + return static_cast( + absl::ExtendCrc32c(absl::crc32c_t{crc}, data)); +} + +std::uint32_t ExtendCrc32c(std::uint32_t crc, + storage::internal::ConstBufferSequence const& data) { + auto tmp = absl::crc32c_t{crc}; + for (auto const& b : data) { + tmp = absl::ExtendCrc32c(tmp, absl::string_view{b.data(), b.size()}); + } + return static_cast(tmp); +} + +std::uint32_t ExtendCrc32c(std::uint32_t crc, absl::Cord const& data) { + auto tmp = absl::crc32c_t{crc}; + for (auto i = data.chunk_begin(); i != data.chunk_end(); ++i) { + tmp = absl::ExtendCrc32c(tmp, *i); + } + return static_cast(tmp); +} + +std::uint32_t ExtendCrc32c(std::uint32_t crc, absl::string_view data, + std::uint32_t data_crc) { + return static_cast(absl::ConcatCrc32c( + absl::crc32c_t{crc}, absl::crc32c_t{data_crc}, data.size())); +} + +std::uint32_t ExtendCrc32c(std::uint32_t crc, + storage::internal::ConstBufferSequence const& data, + std::uint32_t data_crc) { + auto const size = storage::internal::TotalBytes(data); + return static_cast( + absl::ConcatCrc32c(absl::crc32c_t{crc}, absl::crc32c_t{data_crc}, size)); +} + +std::uint32_t ExtendCrc32c(std::uint32_t crc, absl::Cord const& data, + std::uint32_t data_crc) { + return static_cast(absl::ConcatCrc32c( + absl::crc32c_t{crc}, absl::crc32c_t{data_crc}, data.size())); +} + +#else + +std::uint32_t ExtendCrc32c(std::uint32_t crc, absl::string_view data) { + return crc32c::Extend(crc, reinterpret_cast(data.data()), + data.size()); +} + +std::uint32_t ExtendCrc32c(std::uint32_t crc, + storage::internal::ConstBufferSequence const& data) { + for (auto const& b : data) { + crc = ExtendCrc32c(crc, absl::string_view{b.data(), b.size()}); + } + return crc; +} + +std::uint32_t ExtendCrc32c(std::uint32_t crc, absl::Cord const& data) { + for (auto i = data.chunk_begin(); i != data.chunk_end(); ++i) { + crc = ExtendCrc32c(crc, *i); + } + return crc; +} + +std::uint32_t ExtendCrc32c(std::uint32_t crc, absl::string_view data, + std::uint32_t /*data_crc*/) { + return ExtendCrc32c(crc, data); +} + +std::uint32_t ExtendCrc32c(std::uint32_t crc, + storage::internal::ConstBufferSequence const& data, + std::uint32_t /*data_crc*/) { + return ExtendCrc32c(crc, data); +} + +std::uint32_t ExtendCrc32c(std::uint32_t crc, absl::Cord const& data, + std::uint32_t /*data_crc*/) { + return ExtendCrc32c(crc, data); +} + +#endif // GOOGLE_CLOUD_CPP_USE_ABSL_CRC32C + +GOOGLE_CLOUD_CPP_INLINE_NAMESPACE_END +} // namespace storage_internal +} // namespace cloud +} // namespace google diff --git a/google/cloud/storage/internal/crc32c.h b/google/cloud/storage/internal/crc32c.h new file mode 100644 index 0000000000000..f4ca5ecf5622a --- /dev/null +++ b/google/cloud/storage/internal/crc32c.h @@ -0,0 +1,60 @@ +// Copyright 2023 Google LLC +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// https://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#ifndef GOOGLE_CLOUD_CPP_GOOGLE_CLOUD_STORAGE_INTERNAL_CRC32C_H +#define GOOGLE_CLOUD_CPP_GOOGLE_CLOUD_STORAGE_INTERNAL_CRC32C_H + +#include "google/cloud/storage/internal/const_buffer.h" +#include "google/cloud/storage/version.h" +#include "absl/strings/cord.h" +#include "absl/strings/string_view.h" +#include + +namespace google { +namespace cloud { +namespace storage_internal { +GOOGLE_CLOUD_CPP_INLINE_NAMESPACE_BEGIN + +std::uint32_t ExtendCrc32c(std::uint32_t crc, absl::string_view data); +std::uint32_t ExtendCrc32c(std::uint32_t crc, + storage::internal::ConstBufferSequence const& data); +std::uint32_t ExtendCrc32c(std::uint32_t crc, absl::Cord const& data); + +std::uint32_t ExtendCrc32c(std::uint32_t crc, absl::string_view data, + std::uint32_t data_crc); +std::uint32_t ExtendCrc32c(std::uint32_t crc, + storage::internal::ConstBufferSequence const& data, + std::uint32_t data_crc); +std::uint32_t ExtendCrc32c(std::uint32_t crc, absl::Cord const& data, + std::uint32_t data_crc); + +inline std::uint32_t Crc32c(absl::string_view data) { + return ExtendCrc32c(0, data); +} + +inline std::uint32_t Crc32c( + storage::internal::ConstBufferSequence const& data) { + return ExtendCrc32c(0, data); +} + +inline std::uint32_t Crc32c(absl::Cord const& data) { + return ExtendCrc32c(0, data); +} + +GOOGLE_CLOUD_CPP_INLINE_NAMESPACE_END +} // namespace storage_internal +} // namespace cloud +} // namespace google + +#endif // GOOGLE_CLOUD_CPP_GOOGLE_CLOUD_STORAGE_INTERNAL_CRC32C_H diff --git a/google/cloud/storage/internal/crc32c_benchmark.cc b/google/cloud/storage/internal/crc32c_benchmark.cc new file mode 100644 index 0000000000000..d404e59a6e443 --- /dev/null +++ b/google/cloud/storage/internal/crc32c_benchmark.cc @@ -0,0 +1,78 @@ +// Copyright 2023 Google LLC +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// https://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "google/cloud/storage/internal/crc32c.h" +#include + +namespace google { +namespace cloud { +namespace storage_internal { +GOOGLE_CLOUD_CPP_INLINE_NAMESPACE_BEGIN +namespace { + +// Run on (128 X 2250 MHz CPU s) +// CPU Caches: +// L1 Data 32 KiB (x64) +// L1 Instruction 32 KiB (x64) +// L2 Unified 512 KiB (x64) +// L3 Unified 16384 KiB (x16) +// Load Average: 11.17, 4.97, 7.43 +// ------------------------------------------------------------- +// Benchmark Time CPU Iterations +// ------------------------------------------------------------- +// BM_Crc32cDuplicate 192261696 ns 192256154 ns 4 +// BM_Crc32cConcat 95538105 ns 95538296 ns 7 + +auto constexpr kMessage = 2 * 1024 * std::size_t{1024}; +auto constexpr kWriteSize = 16 * kMessage; +auto constexpr kUploadSize = 8 * kWriteSize; + +void BM_Crc32cDuplicate(benchmark::State& state) { + auto buffer = std::string(kWriteSize, '0'); + auto crc = std::uint32_t{0}; + for (auto _ : state) { + for (std::size_t offset = 0; offset < kUploadSize; offset += kWriteSize) { + for (std::size_t m = 0; m < kWriteSize; m += kMessage) { + auto w = absl::string_view{buffer}.substr(m, kMessage); + auto c = Crc32c(w); + benchmark::DoNotOptimize(c); + } + crc = ExtendCrc32c(crc, buffer); + } + } + benchmark::DoNotOptimize(crc); +} +BENCHMARK(BM_Crc32cDuplicate); + +void BM_Crc32cConcat(benchmark::State& state) { + auto buffer = std::string(kWriteSize, '0'); + auto crc = std::uint32_t{0}; + for (auto _ : state) { + for (std::size_t offset = 0; offset < kUploadSize; offset += kWriteSize) { + for (std::size_t m = 0; m < kWriteSize; m += kMessage) { + auto w = absl::string_view{buffer}.substr(m, kMessage); + auto c = Crc32c(w); + crc = ExtendCrc32c(crc, w, c); + } + } + } + benchmark::DoNotOptimize(crc); +} +BENCHMARK(BM_Crc32cConcat); + +} // namespace +GOOGLE_CLOUD_CPP_INLINE_NAMESPACE_END +} // namespace storage_internal +} // namespace cloud +} // namespace google diff --git a/google/cloud/storage/internal/crc32c_test.cc b/google/cloud/storage/internal/crc32c_test.cc new file mode 100644 index 0000000000000..903b08339c283 --- /dev/null +++ b/google/cloud/storage/internal/crc32c_test.cc @@ -0,0 +1,150 @@ +// Copyright 2023 Google LLC +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// https://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "google/cloud/storage/internal/crc32c.h" +#include +#include + +namespace google { +namespace cloud { +namespace storage_internal { +GOOGLE_CLOUD_CPP_INLINE_NAMESPACE_BEGIN +namespace { + +using ::google::cloud::storage::internal::ConstBuffer; +using ::google::cloud::storage::internal::ConstBufferSequence; + +TEST(Crc32c, Empty) { + auto const expected = std::uint32_t{0}; + EXPECT_EQ(expected, Crc32c(absl::string_view{})); + EXPECT_EQ(expected, Crc32c(absl::Cord{})); + EXPECT_EQ(expected, Crc32c(ConstBufferSequence{})); + EXPECT_EQ(expected, + Crc32c(ConstBufferSequence{ConstBuffer{}, ConstBuffer{}})); +} + +TEST(Crc32c, Quick) { + auto const expected = std::uint32_t{0x22620404}; + auto const input = std::string("The quick brown fox jumps over the lazy dog"); + EXPECT_EQ(expected, Crc32c(absl::string_view(input))); + EXPECT_EQ(expected, Crc32c(absl::Cord(input))); + EXPECT_EQ(expected, Crc32c(ConstBufferSequence{ConstBuffer(input)})); +} + +TEST(Crc32c, ExtendNotPrecomputedStringView) { + auto const expected = std::uint32_t{0x22620404}; + std::vector const inputs{"The", " quick", " brown", + " fox", " jumps", " over", + " the", " lazy", " dog"}; + absl::Cord data; + for (auto const& input : inputs) { + data.Append(input); + } + EXPECT_EQ(expected, Crc32c(data)); +} + +TEST(Crc32c, ExtendNotPrecomputedCord) { + auto const expected = std::uint32_t{0x22620404}; + std::vector const inputs{"The", " quick", " brown", + " fox", " jumps", " over", + " the", " lazy", " dog"}; + auto crc = std::uint32_t{0}; + for (auto const& input : inputs) { + crc = ExtendCrc32c(crc, absl::Cord{input}); + } + EXPECT_EQ(expected, crc); +} + +TEST(Crc32c, ExtendNotPrecomputedConstBuffer) { + auto const expected = std::uint32_t{0x22620404}; + std::vector const inputs{"The", " quick", " brown", + " fox", " jumps", " over", + " the", " lazy", " dog"}; + auto crc = std::uint32_t{0}; + for (auto const& input : inputs) { + crc = ExtendCrc32c(crc, {ConstBuffer{input}}); + } + EXPECT_EQ(expected, crc); +} + +TEST(Crc32c, ExtendNotPrecomputedConstBufferFull) { + auto const expected = std::uint32_t{0x22620404}; + std::vector const inputs{"The", " quick", " brown", + " fox", " jumps", " over", + " the", " lazy", " dog"}; + ConstBufferSequence payload(inputs.size()); + std::transform(inputs.begin(), inputs.end(), payload.begin(), + [](auto const& s) { return ConstBuffer(s); }); + auto crc = std::uint32_t{0}; + crc = ExtendCrc32c(crc, payload); + EXPECT_EQ(expected, crc); +} + +TEST(Crc32c, ExtendPrecomputedStringView) { + auto const expected = std::uint32_t{0x22620404}; + std::vector const inputs{"The", " quick", " brown", + " fox", " jumps", " over", + " the", " lazy", " dog"}; + auto crc = std::uint32_t{0}; + for (auto const& input : inputs) { + auto const input_crc = Crc32c(input); + crc = ExtendCrc32c(crc, absl::string_view{input}, input_crc); + } + EXPECT_EQ(expected, crc); +} + +TEST(Crc32c, ExtendPrecomputedConstBuffer) { + auto const expected = std::uint32_t{0x22620404}; + std::vector const inputs{"The", " quick", " brown", + " fox", " jumps", " over", + " the", " lazy", " dog"}; + auto crc = std::uint32_t{0}; + for (auto const& input : inputs) { + auto const input_crc = Crc32c(input); + crc = ExtendCrc32c(crc, {ConstBuffer{input}}, input_crc); + } + EXPECT_EQ(expected, crc); +} + +TEST(Crc32c, ExtendPrecomputedConstBufferFull) { + auto const expected = std::uint32_t{0x22620404}; + std::vector const inputs{"The", " quick", " brown", + " fox", " jumps", " over", + " the", " lazy", " dog"}; + ConstBufferSequence payload(inputs.size()); + std::transform(inputs.begin(), inputs.end(), payload.begin(), + [](auto const& s) { return ConstBuffer(s); }); + auto crc = std::uint32_t{0}; + crc = ExtendCrc32c(crc, payload, expected); + EXPECT_EQ(expected, crc); +} + +TEST(Crc32c, ExtendPrecomputedCord) { + auto const expected = std::uint32_t{0x22620404}; + std::vector const inputs{"The", " quick", " brown", + " fox", " jumps", " over", + " the", " lazy", " dog"}; + auto crc = std::uint32_t{0}; + for (auto const& input : inputs) { + auto const input_crc = Crc32c(input); + crc = ExtendCrc32c(crc, absl::Cord{input}, input_crc); + } + EXPECT_EQ(expected, crc); +} + +} // namespace +GOOGLE_CLOUD_CPP_INLINE_NAMESPACE_END +} // namespace storage_internal +} // namespace cloud +} // namespace google diff --git a/google/cloud/storage/storage_client_benchmarks.bzl b/google/cloud/storage/storage_client_benchmarks.bzl new file mode 100644 index 0000000000000..ed6c9a6c85b0d --- /dev/null +++ b/google/cloud/storage/storage_client_benchmarks.bzl @@ -0,0 +1,21 @@ +# Copyright 2023 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# https://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# +# DO NOT EDIT -- GENERATED BY CMake -- Change the CMakeLists.txt file if needed + +"""Automatically generated unit tests list - DO NOT EDIT.""" + +storage_client_benchmarks = [ + "internal/crc32c_benchmark.cc", +] diff --git a/google/cloud/storage/storage_client_unit_tests.bzl b/google/cloud/storage/storage_client_unit_tests.bzl index dff1c990efb7d..7c6d2422f4974 100644 --- a/google/cloud/storage/storage_client_unit_tests.bzl +++ b/google/cloud/storage/storage_client_unit_tests.bzl @@ -48,6 +48,7 @@ storage_client_unit_tests = [ "internal/complex_option_test.cc", "internal/compute_engine_util_test.cc", "internal/const_buffer_test.cc", + "internal/crc32c_test.cc", "internal/curl_client_test.cc", "internal/curl_download_request_test.cc", "internal/curl_handle_test.cc", From 645110870f153bfd90e0fe7f69e919402c20de31 Mon Sep 17 00:00:00 2001 From: Carlos O'Ryan Date: Fri, 17 Mar 2023 18:09:00 +0000 Subject: [PATCH 2/3] Use Abseil only for concat --- google/cloud/storage/internal/crc32c.cc | 40 +++++-------------- .../storage/internal/crc32c_benchmark.cc | 31 +++++++++++--- 2 files changed, 34 insertions(+), 37 deletions(-) diff --git a/google/cloud/storage/internal/crc32c.cc b/google/cloud/storage/internal/crc32c.cc index 905fe4c4535bd..57c16dfe38e20 100644 --- a/google/cloud/storage/internal/crc32c.cc +++ b/google/cloud/storage/internal/crc32c.cc @@ -18,39 +18,37 @@ #include "absl/crc/crc32c.h" #define GOOGLE_CLOUD_CPP_USE_ABSL_CRC32C 1 #else -#include #define GOOGLE_CLOUD_CPP_USE_ABSL_CRC32C 0 #endif // ABSL_LTS_RELEASE_VERSION +#include namespace google { namespace cloud { namespace storage_internal { GOOGLE_CLOUD_CPP_INLINE_NAMESPACE_BEGIN -#if GOOGLE_CLOUD_CPP_USE_ABSL_CRC32C - std::uint32_t ExtendCrc32c(std::uint32_t crc, absl::string_view data) { - return static_cast( - absl::ExtendCrc32c(absl::crc32c_t{crc}, data)); + return crc32c::Extend(crc, reinterpret_cast(data.data()), + data.size()); } std::uint32_t ExtendCrc32c(std::uint32_t crc, storage::internal::ConstBufferSequence const& data) { - auto tmp = absl::crc32c_t{crc}; for (auto const& b : data) { - tmp = absl::ExtendCrc32c(tmp, absl::string_view{b.data(), b.size()}); + crc = ExtendCrc32c(crc, absl::string_view{b.data(), b.size()}); } - return static_cast(tmp); + return crc; } std::uint32_t ExtendCrc32c(std::uint32_t crc, absl::Cord const& data) { - auto tmp = absl::crc32c_t{crc}; for (auto i = data.chunk_begin(); i != data.chunk_end(); ++i) { - tmp = absl::ExtendCrc32c(tmp, *i); + crc = ExtendCrc32c(crc, *i); } - return static_cast(tmp); + return crc; } +#if GOOGLE_CLOUD_CPP_USE_ABSL_CRC32C + std::uint32_t ExtendCrc32c(std::uint32_t crc, absl::string_view data, std::uint32_t data_crc) { return static_cast(absl::ConcatCrc32c( @@ -73,26 +71,6 @@ std::uint32_t ExtendCrc32c(std::uint32_t crc, absl::Cord const& data, #else -std::uint32_t ExtendCrc32c(std::uint32_t crc, absl::string_view data) { - return crc32c::Extend(crc, reinterpret_cast(data.data()), - data.size()); -} - -std::uint32_t ExtendCrc32c(std::uint32_t crc, - storage::internal::ConstBufferSequence const& data) { - for (auto const& b : data) { - crc = ExtendCrc32c(crc, absl::string_view{b.data(), b.size()}); - } - return crc; -} - -std::uint32_t ExtendCrc32c(std::uint32_t crc, absl::Cord const& data) { - for (auto i = data.chunk_begin(); i != data.chunk_end(); ++i) { - crc = ExtendCrc32c(crc, *i); - } - return crc; -} - std::uint32_t ExtendCrc32c(std::uint32_t crc, absl::string_view data, std::uint32_t /*data_crc*/) { return ExtendCrc32c(crc, data); diff --git a/google/cloud/storage/internal/crc32c_benchmark.cc b/google/cloud/storage/internal/crc32c_benchmark.cc index d404e59a6e443..0aad445bd2917 100644 --- a/google/cloud/storage/internal/crc32c_benchmark.cc +++ b/google/cloud/storage/internal/crc32c_benchmark.cc @@ -13,6 +13,7 @@ // limitations under the License. #include "google/cloud/storage/internal/crc32c.h" +#include #include namespace google { @@ -27,17 +28,35 @@ namespace { // L1 Instruction 32 KiB (x64) // L2 Unified 512 KiB (x64) // L3 Unified 16384 KiB (x16) -// Load Average: 11.17, 4.97, 7.43 -// ------------------------------------------------------------- -// Benchmark Time CPU Iterations -// ------------------------------------------------------------- -// BM_Crc32cDuplicate 192261696 ns 192256154 ns 4 -// BM_Crc32cConcat 95538105 ns 95538296 ns 7 +// Load Average: 1.88, 2.61, 6.87 +// ---------------------------------------------------------------------- +// Benchmark Time CPU Iterations +// ---------------------------------------------------------------------- +// BM_Crc32cDuplicateNonAbseil 25520759 ns 25520833 ns 28 +// BM_Crc32cDuplicate 24168074 ns 24168122 ns 28 +// BM_Crc32cConcat 12213494 ns 12213077 ns 57 auto constexpr kMessage = 2 * 1024 * std::size_t{1024}; auto constexpr kWriteSize = 16 * kMessage; auto constexpr kUploadSize = 8 * kWriteSize; +void BM_Crc32cDuplicateNonAbseil(benchmark::State& state) { + auto buffer = std::string(kWriteSize, '0'); + auto crc = std::uint32_t{0}; + for (auto _ : state) { + for (std::size_t offset = 0; offset < kUploadSize; offset += kWriteSize) { + for (std::size_t m = 0; m < kWriteSize; m += kMessage) { + auto w = absl::string_view{buffer}.substr(m, kMessage); + auto c = crc32c::Crc32c(w.data(), w.size()); + benchmark::DoNotOptimize(c); + } + crc = crc32c::Extend(crc, reinterpret_cast(buffer.data()), buffer.size()); + } + } + benchmark::DoNotOptimize(crc); +} +BENCHMARK(BM_Crc32cDuplicateNonAbseil); + void BM_Crc32cDuplicate(benchmark::State& state) { auto buffer = std::string(kWriteSize, '0'); auto crc = std::uint32_t{0}; From 3542d8596f28776fbd40df9141eb53d5ffcdcff5 Mon Sep 17 00:00:00 2001 From: Carlos O'Ryan Date: Fri, 17 Mar 2023 18:13:42 +0000 Subject: [PATCH 3/3] Fix formatting --- google/cloud/storage/internal/crc32c_benchmark.cc | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/google/cloud/storage/internal/crc32c_benchmark.cc b/google/cloud/storage/internal/crc32c_benchmark.cc index 0aad445bd2917..7d04867259492 100644 --- a/google/cloud/storage/internal/crc32c_benchmark.cc +++ b/google/cloud/storage/internal/crc32c_benchmark.cc @@ -13,8 +13,8 @@ // limitations under the License. #include "google/cloud/storage/internal/crc32c.h" -#include #include +#include namespace google { namespace cloud { @@ -50,7 +50,9 @@ void BM_Crc32cDuplicateNonAbseil(benchmark::State& state) { auto c = crc32c::Crc32c(w.data(), w.size()); benchmark::DoNotOptimize(c); } - crc = crc32c::Extend(crc, reinterpret_cast(buffer.data()), buffer.size()); + crc = crc32c::Extend(crc, + reinterpret_cast(buffer.data()), + buffer.size()); } } benchmark::DoNotOptimize(crc);