From 0a0729db9d9bf97ff807b4ad55aee8aa7a330222 Mon Sep 17 00:00:00 2001 From: Ruoxi Sun Date: Thu, 25 Jan 2024 01:18:53 +0800 Subject: [PATCH 1/7] Reproduce --- cpp/src/arrow/compute/key_hash_test.cc | 25 +++++++++++++++++++++++++ 1 file changed, 25 insertions(+) diff --git a/cpp/src/arrow/compute/key_hash_test.cc b/cpp/src/arrow/compute/key_hash_test.cc index 3e6d41525cf..a81b669585e 100644 --- a/cpp/src/arrow/compute/key_hash_test.cc +++ b/cpp/src/arrow/compute/key_hash_test.cc @@ -252,5 +252,30 @@ TEST(VectorHash, BasicString) { RunTestVectorHash(); } TEST(VectorHash, BasicLargeString) { RunTestVectorHash(); } +TEST(VectorHash, TailByteSafety) { + constexpr int mini_batch_size = 1024; + std::vector temp_buffer; + temp_buffer.resize(mini_batch_size * 4); + const auto hardware_flags_for_testing = HardwareFlagsForTesting(); + ASSERT_GT(hardware_flags_for_testing.size(), 0); + for (int i = 0; i < static_cast(hardware_flags_for_testing.size()); ++i) { + const auto hardware_flags = hardware_flags_for_testing[i]; + constexpr auto fixed_length = 5; + FixedSizeBinaryBuilder builder(fixed_size_binary(fixed_length)); + for (int j = 0; j < 883; ++j) { + ASSERT_OK(builder.Append("12345")); + } + ASSERT_OK_AND_ASSIGN(auto array, builder.Finish()); + ASSERT_EQ(array->data()->buffers[1]->capacity(), 4416); + constexpr auto offset = 858; + constexpr auto num_rows = 25; + std::vector hashes32(num_rows); + Hashing32::HashFixed(hardware_flags, + /*combine_hashes=*/false, num_rows, fixed_length, + array->data()->GetValues(1) + offset * fixed_length, + hashes32.data(), temp_buffer.data()); + } +} + } // namespace compute } // namespace arrow From 27a4745395cd218191fa72d1a7a7cc7392a6cec9 Mon Sep 17 00:00:00 2001 From: Ruoxi Sun Date: Thu, 25 Jan 2024 01:40:41 +0800 Subject: [PATCH 2/7] More repro --- cpp/src/arrow/compute/key_hash_test.cc | 41 ++++++++++++++++++++++---- 1 file changed, 36 insertions(+), 5 deletions(-) diff --git a/cpp/src/arrow/compute/key_hash_test.cc b/cpp/src/arrow/compute/key_hash_test.cc index a81b669585e..daa55a620d6 100644 --- a/cpp/src/arrow/compute/key_hash_test.cc +++ b/cpp/src/arrow/compute/key_hash_test.cc @@ -253,6 +253,12 @@ TEST(VectorHash, BasicString) { RunTestVectorHash(); } TEST(VectorHash, BasicLargeString) { RunTestVectorHash(); } TEST(VectorHash, TailByteSafety) { + constexpr auto num_rows_total = 883; + constexpr auto fixed_length = 5; + constexpr auto num_bytes_aligned = 4416; + constexpr auto offset = 858; // 860 + constexpr auto num_rows = 25; // 23 + constexpr int mini_batch_size = 1024; std::vector temp_buffer; temp_buffer.resize(mini_batch_size * 4); @@ -260,15 +266,40 @@ TEST(VectorHash, TailByteSafety) { ASSERT_GT(hardware_flags_for_testing.size(), 0); for (int i = 0; i < static_cast(hardware_flags_for_testing.size()); ++i) { const auto hardware_flags = hardware_flags_for_testing[i]; - constexpr auto fixed_length = 5; FixedSizeBinaryBuilder builder(fixed_size_binary(fixed_length)); - for (int j = 0; j < 883; ++j) { + for (int j = 0; j < num_rows_total; ++j) { ASSERT_OK(builder.Append("12345")); } ASSERT_OK_AND_ASSIGN(auto array, builder.Finish()); - ASSERT_EQ(array->data()->buffers[1]->capacity(), 4416); - constexpr auto offset = 858; - constexpr auto num_rows = 25; + ASSERT_EQ(array->data()->buffers[1]->capacity(), num_bytes_aligned); + std::vector hashes32(num_rows); + Hashing32::HashFixed(hardware_flags, + /*combine_hashes=*/false, num_rows, fixed_length, + array->data()->GetValues(1) + offset * fixed_length, + hashes32.data(), temp_buffer.data()); + } +} + +TEST(VectorHash, TailByteSafetySmall) { + constexpr auto num_rows_total = 1450; + constexpr auto fixed_length = 3; + constexpr auto num_bytes_aligned = 4352; + constexpr auto offset = 1447; + constexpr auto num_rows = 3; + + constexpr int mini_batch_size = 1024; + std::vector temp_buffer; + temp_buffer.resize(mini_batch_size * 4); + const auto hardware_flags_for_testing = HardwareFlagsForTesting(); + ASSERT_GT(hardware_flags_for_testing.size(), 0); + for (int i = 0; i < static_cast(hardware_flags_for_testing.size()); ++i) { + const auto hardware_flags = hardware_flags_for_testing[i]; + FixedSizeBinaryBuilder builder(fixed_size_binary(fixed_length)); + for (int j = 0; j < num_rows_total; ++j) { + ASSERT_OK(builder.Append("123")); + } + ASSERT_OK_AND_ASSIGN(auto array, builder.Finish()); + ASSERT_EQ(array->data()->buffers[1]->capacity(), num_bytes_aligned); std::vector hashes32(num_rows); Hashing32::HashFixed(hardware_flags, /*combine_hashes=*/false, num_rows, fixed_length, From b9a8e7608e37e4228822326d5b82640677f1db38 Mon Sep 17 00:00:00 2001 From: Ruoxi Sun Date: Thu, 25 Jan 2024 09:55:03 +0800 Subject: [PATCH 3/7] A likely fix --- cpp/src/arrow/compute/key_hash_avx2.cc | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/cpp/src/arrow/compute/key_hash_avx2.cc b/cpp/src/arrow/compute/key_hash_avx2.cc index 1b444b57678..aec2800c647 100644 --- a/cpp/src/arrow/compute/key_hash_avx2.cc +++ b/cpp/src/arrow/compute/key_hash_avx2.cc @@ -190,7 +190,7 @@ uint32_t Hashing32::HashFixedLenImp_avx2(uint32_t num_rows, uint64_t length, // Do not process rows that could read past the end of the buffer using 16 // byte loads. Round down number of rows to process to multiple of 2. // - uint64_t num_rows_to_skip = bit_util::CeilDiv(length, kStripeSize); + uint64_t num_rows_to_skip = bit_util::CeilDiv(kStripeSize, length); uint32_t num_rows_to_process = (num_rows_to_skip > num_rows) ? 0 From 0a6cac0812e127b868a1be3f24edb6ef3252bef7 Mon Sep 17 00:00:00 2001 From: Ruoxi Sun Date: Thu, 25 Jan 2024 11:32:32 +0800 Subject: [PATCH 4/7] More test --- cpp/src/arrow/compute/key_hash_test.cc | 28 ++++++++++++++++++++++++++ 1 file changed, 28 insertions(+) diff --git a/cpp/src/arrow/compute/key_hash_test.cc b/cpp/src/arrow/compute/key_hash_test.cc index daa55a620d6..3744cf26884 100644 --- a/cpp/src/arrow/compute/key_hash_test.cc +++ b/cpp/src/arrow/compute/key_hash_test.cc @@ -308,5 +308,33 @@ TEST(VectorHash, TailByteSafetySmall) { } } +TEST(VectorHash, TailByteSafetyBig) { + constexpr auto num_rows_total = 64; + constexpr auto fixed_length = 19; + constexpr auto num_bytes_aligned = 1216; + constexpr auto offset = 63; + constexpr auto num_rows = num_rows_total - offset; + + constexpr int mini_batch_size = 1024; + std::vector temp_buffer; + temp_buffer.resize(mini_batch_size * 4); + const auto hardware_flags_for_testing = HardwareFlagsForTesting(); + ASSERT_GT(hardware_flags_for_testing.size(), 0); + for (int i = 0; i < static_cast(hardware_flags_for_testing.size()); ++i) { + const auto hardware_flags = hardware_flags_for_testing[i]; + FixedSizeBinaryBuilder builder(fixed_size_binary(fixed_length)); + for (int j = 0; j < num_rows_total; ++j) { + ASSERT_OK(builder.Append(std::string(fixed_length, 0))); + } + ASSERT_OK_AND_ASSIGN(auto array, builder.Finish()); + ASSERT_EQ(array->data()->buffers[1]->capacity(), num_bytes_aligned); + std::vector hashes32(num_rows); + Hashing32::HashFixed(hardware_flags, + /*combine_hashes=*/false, num_rows, fixed_length, + array->data()->GetValues(1) + offset * fixed_length, + hashes32.data(), temp_buffer.data()); + } +} + } // namespace compute } // namespace arrow From 82cd673b947aebf8a50f8ce93d3739550f4e6323 Mon Sep 17 00:00:00 2001 From: Ruoxi Sun Date: Fri, 26 Jan 2024 00:42:09 +0800 Subject: [PATCH 5/7] Finish tests --- cpp/src/arrow/compute/key_hash_test.cc | 107 ++++++++++--------------- 1 file changed, 42 insertions(+), 65 deletions(-) diff --git a/cpp/src/arrow/compute/key_hash_test.cc b/cpp/src/arrow/compute/key_hash_test.cc index 3744cf26884..f1d4929b6b6 100644 --- a/cpp/src/arrow/compute/key_hash_test.cc +++ b/cpp/src/arrow/compute/key_hash_test.cc @@ -252,89 +252,66 @@ TEST(VectorHash, BasicString) { RunTestVectorHash(); } TEST(VectorHash, BasicLargeString) { RunTestVectorHash(); } -TEST(VectorHash, TailByteSafety) { - constexpr auto num_rows_total = 883; - constexpr auto fixed_length = 5; - constexpr auto num_bytes_aligned = 4416; - constexpr auto offset = 858; // 860 - constexpr auto num_rows = 25; // 23 +void HashFixedLengthFrom(int fixed_length, int num_rows_total, int start_row) { + int num_rows = num_rows_total - start_row; + auto num_bytes_aligned = + arrow::bit_util::RoundUpToMultipleOf64(fixed_length * num_rows_total); - constexpr int mini_batch_size = 1024; - std::vector temp_buffer; - temp_buffer.resize(mini_batch_size * 4); const auto hardware_flags_for_testing = HardwareFlagsForTesting(); ASSERT_GT(hardware_flags_for_testing.size(), 0); - for (int i = 0; i < static_cast(hardware_flags_for_testing.size()); ++i) { - const auto hardware_flags = hardware_flags_for_testing[i]; - FixedSizeBinaryBuilder builder(fixed_size_binary(fixed_length)); - for (int j = 0; j < num_rows_total; ++j) { - ASSERT_OK(builder.Append("12345")); - } - ASSERT_OK_AND_ASSIGN(auto array, builder.Finish()); - ASSERT_EQ(array->data()->buffers[1]->capacity(), num_bytes_aligned); - std::vector hashes32(num_rows); - Hashing32::HashFixed(hardware_flags, - /*combine_hashes=*/false, num_rows, fixed_length, - array->data()->GetValues(1) + offset * fixed_length, - hashes32.data(), temp_buffer.data()); + + std::vector> hashes32(hardware_flags_for_testing.size()); + std::vector> hashes64(hardware_flags_for_testing.size()); + for (auto& h : hashes32) { + h.resize(num_rows); + } + for (auto& h : hashes64) { + h.resize(num_rows); } -} -TEST(VectorHash, TailByteSafetySmall) { - constexpr auto num_rows_total = 1450; - constexpr auto fixed_length = 3; - constexpr auto num_bytes_aligned = 4352; - constexpr auto offset = 1447; - constexpr auto num_rows = 3; + FixedSizeBinaryBuilder keys_builder(fixed_size_binary(fixed_length)); + for (int j = 0; j < num_rows_total; ++j) { + ASSERT_OK(keys_builder.Append(std::string(fixed_length, 42))); + } + ASSERT_OK_AND_ASSIGN(auto keys, keys_builder.Finish()); + // Make sure the buffer is aligned as expected. + ASSERT_EQ(keys->data()->buffers[1]->capacity(), num_bytes_aligned); constexpr int mini_batch_size = 1024; std::vector temp_buffer; temp_buffer.resize(mini_batch_size * 4); - const auto hardware_flags_for_testing = HardwareFlagsForTesting(); - ASSERT_GT(hardware_flags_for_testing.size(), 0); + for (int i = 0; i < static_cast(hardware_flags_for_testing.size()); ++i) { const auto hardware_flags = hardware_flags_for_testing[i]; - FixedSizeBinaryBuilder builder(fixed_size_binary(fixed_length)); - for (int j = 0; j < num_rows_total; ++j) { - ASSERT_OK(builder.Append("123")); - } - ASSERT_OK_AND_ASSIGN(auto array, builder.Finish()); - ASSERT_EQ(array->data()->buffers[1]->capacity(), num_bytes_aligned); - std::vector hashes32(num_rows); Hashing32::HashFixed(hardware_flags, /*combine_hashes=*/false, num_rows, fixed_length, - array->data()->GetValues(1) + offset * fixed_length, - hashes32.data(), temp_buffer.data()); + keys->data()->GetValues(1) + start_row * fixed_length, + hashes32[i].data(), temp_buffer.data()); + Hashing64::HashFixed( + /*combine_hashes=*/false, num_rows, fixed_length, + keys->data()->GetValues(1) + start_row * fixed_length, + hashes64[i].data()); } -} -TEST(VectorHash, TailByteSafetyBig) { - constexpr auto num_rows_total = 64; - constexpr auto fixed_length = 19; - constexpr auto num_bytes_aligned = 1216; - constexpr auto offset = 63; - constexpr auto num_rows = num_rows_total - offset; - - constexpr int mini_batch_size = 1024; - std::vector temp_buffer; - temp_buffer.resize(mini_batch_size * 4); - const auto hardware_flags_for_testing = HardwareFlagsForTesting(); - ASSERT_GT(hardware_flags_for_testing.size(), 0); - for (int i = 0; i < static_cast(hardware_flags_for_testing.size()); ++i) { - const auto hardware_flags = hardware_flags_for_testing[i]; - FixedSizeBinaryBuilder builder(fixed_size_binary(fixed_length)); - for (int j = 0; j < num_rows_total; ++j) { - ASSERT_OK(builder.Append(std::string(fixed_length, 0))); + // Verify that all implementations (scalar, SIMD) give the same hashes. + for (int i = 1; i < static_cast(hardware_flags_for_testing.size()); ++i) { + for (int j = 0; j < num_rows; ++j) { + ASSERT_EQ(hashes32[i][j], hashes32[0][j]) + << "scalar and simd approaches yielded different 32-bit hashes"; + ASSERT_EQ(hashes64[i][j], hashes64[0][j]) + << "scalar and simd approaches yielded different 64-bit hashes"; } - ASSERT_OK_AND_ASSIGN(auto array, builder.Finish()); - ASSERT_EQ(array->data()->buffers[1]->capacity(), num_bytes_aligned); - std::vector hashes32(num_rows); - Hashing32::HashFixed(hardware_flags, - /*combine_hashes=*/false, num_rows, fixed_length, - array->data()->GetValues(1) + offset * fixed_length, - hashes32.data(), temp_buffer.data()); } } +// Some carefully chosen cases that may cause troubles like GH-39778. +TEST(VectorHash, FixedSizeTailByteSafety) { + // Tow cases of fixed_length < stripe (16-byte). + HashFixedLengthFrom(/*fixed_length=*/3, /*num_rows_total=*/1450, /*start_row=*/1447); + HashFixedLengthFrom(/*fixed_length=*/5, /*num_rows_total=*/883, /*start_row=*/858); + // Case of fixed_length > stripe (16-byte). + HashFixedLengthFrom(/*fixed_length=*/19, /*num_rows_total=*/64, /*start_row=*/63); +} + } // namespace compute } // namespace arrow From 840ffc51c7c83d423a6232f819645eac4255baad Mon Sep 17 00:00:00 2001 From: Ruoxi Sun Date: Fri, 26 Jan 2024 02:36:39 +0800 Subject: [PATCH 6/7] Change confusing names --- cpp/src/arrow/compute/key_hash.cc | 142 +++++++++++++------------ cpp/src/arrow/compute/key_hash.h | 22 ++-- cpp/src/arrow/compute/key_hash_test.cc | 39 ++++--- 3 files changed, 104 insertions(+), 99 deletions(-) diff --git a/cpp/src/arrow/compute/key_hash.cc b/cpp/src/arrow/compute/key_hash.cc index f5867b405ec..1902b9ce9a8 100644 --- a/cpp/src/arrow/compute/key_hash.cc +++ b/cpp/src/arrow/compute/key_hash.cc @@ -105,23 +105,23 @@ inline void Hashing32::StripeMask(int i, uint32_t* mask1, uint32_t* mask2, } template -void Hashing32::HashFixedLenImp(uint32_t num_rows, uint64_t length, const uint8_t* keys, - uint32_t* hashes) { +void Hashing32::HashFixedLenImp(uint32_t num_rows, uint64_t key_length, + const uint8_t* keys, uint32_t* hashes) { // Calculate the number of rows that skip the last 16 bytes // uint32_t num_rows_safe = num_rows; - while (num_rows_safe > 0 && (num_rows - num_rows_safe) * length < kStripeSize) { + while (num_rows_safe > 0 && (num_rows - num_rows_safe) * key_length < kStripeSize) { --num_rows_safe; } // Compute masks for the last 16 byte stripe // - uint64_t num_stripes = bit_util::CeilDiv(length, kStripeSize); + uint64_t num_stripes = bit_util::CeilDiv(key_length, kStripeSize); uint32_t mask1, mask2, mask3, mask4; - StripeMask(((length - 1) & (kStripeSize - 1)) + 1, &mask1, &mask2, &mask3, &mask4); + StripeMask(((key_length - 1) & (kStripeSize - 1)) + 1, &mask1, &mask2, &mask3, &mask4); for (uint32_t i = 0; i < num_rows_safe; ++i) { - const uint8_t* key = keys + static_cast(i) * length; + const uint8_t* key = keys + static_cast(i) * key_length; uint32_t acc1, acc2, acc3, acc4; ProcessFullStripes(num_stripes, key, &acc1, &acc2, &acc3, &acc4); ProcessLastStripe(mask1, mask2, mask3, mask4, key + (num_stripes - 1) * kStripeSize, @@ -138,11 +138,11 @@ void Hashing32::HashFixedLenImp(uint32_t num_rows, uint64_t length, const uint8_ uint32_t last_stripe_copy[4]; for (uint32_t i = num_rows_safe; i < num_rows; ++i) { - const uint8_t* key = keys + static_cast(i) * length; + const uint8_t* key = keys + static_cast(i) * key_length; uint32_t acc1, acc2, acc3, acc4; ProcessFullStripes(num_stripes, key, &acc1, &acc2, &acc3, &acc4); memcpy(last_stripe_copy, key + (num_stripes - 1) * kStripeSize, - length - (num_stripes - 1) * kStripeSize); + key_length - (num_stripes - 1) * kStripeSize); ProcessLastStripe(mask1, mask2, mask3, mask4, reinterpret_cast(last_stripe_copy), &acc1, &acc2, &acc3, &acc4); @@ -168,15 +168,16 @@ void Hashing32::HashVarLenImp(uint32_t num_rows, const T* offsets, } for (uint32_t i = 0; i < num_rows_safe; ++i) { - uint64_t length = offsets[i + 1] - offsets[i]; + uint64_t key_length = offsets[i + 1] - offsets[i]; // Compute masks for the last 16 byte stripe. // For an empty string set number of stripes to 1 but mask to all zeroes. // - int is_non_empty = length == 0 ? 0 : 1; - uint64_t num_stripes = bit_util::CeilDiv(length, kStripeSize) + (1 - is_non_empty); + int is_non_empty = key_length == 0 ? 0 : 1; + uint64_t num_stripes = + bit_util::CeilDiv(key_length, kStripeSize) + (1 - is_non_empty); uint32_t mask1, mask2, mask3, mask4; - StripeMask(((length - is_non_empty) & (kStripeSize - 1)) + is_non_empty, &mask1, + StripeMask(((key_length - is_non_empty) & (kStripeSize - 1)) + is_non_empty, &mask1, &mask2, &mask3, &mask4); const uint8_t* key = concatenated_keys + offsets[i]; @@ -198,23 +199,24 @@ void Hashing32::HashVarLenImp(uint32_t num_rows, const T* offsets, uint32_t last_stripe_copy[4]; for (uint32_t i = num_rows_safe; i < num_rows; ++i) { - uint64_t length = offsets[i + 1] - offsets[i]; + uint64_t key_length = offsets[i + 1] - offsets[i]; // Compute masks for the last 16 byte stripe. // For an empty string set number of stripes to 1 but mask to all zeroes. // - int is_non_empty = length == 0 ? 0 : 1; - uint64_t num_stripes = bit_util::CeilDiv(length, kStripeSize) + (1 - is_non_empty); + int is_non_empty = key_length == 0 ? 0 : 1; + uint64_t num_stripes = + bit_util::CeilDiv(key_length, kStripeSize) + (1 - is_non_empty); uint32_t mask1, mask2, mask3, mask4; - StripeMask(((length - is_non_empty) & (kStripeSize - 1)) + is_non_empty, &mask1, + StripeMask(((key_length - is_non_empty) & (kStripeSize - 1)) + is_non_empty, &mask1, &mask2, &mask3, &mask4); const uint8_t* key = concatenated_keys + offsets[i]; uint32_t acc1, acc2, acc3, acc4; ProcessFullStripes(num_stripes, key, &acc1, &acc2, &acc3, &acc4); - if (length > 0) { + if (key_length > 0) { memcpy(last_stripe_copy, key + (num_stripes - 1) * kStripeSize, - length - (num_stripes - 1) * kStripeSize); + key_length - (num_stripes - 1) * kStripeSize); } if (num_stripes > 0) { ProcessLastStripe(mask1, mask2, mask3, mask4, @@ -309,9 +311,9 @@ void Hashing32::HashIntImp(uint32_t num_keys, const T* keys, uint32_t* hashes) { } } -void Hashing32::HashInt(bool combine_hashes, uint32_t num_keys, uint64_t length_key, +void Hashing32::HashInt(bool combine_hashes, uint32_t num_keys, uint64_t key_length, const uint8_t* keys, uint32_t* hashes) { - switch (length_key) { + switch (key_length) { case sizeof(uint8_t): if (combine_hashes) { HashIntImp(num_keys, keys, hashes); @@ -352,27 +354,27 @@ void Hashing32::HashInt(bool combine_hashes, uint32_t num_keys, uint64_t length_ } } -void Hashing32::HashFixed(int64_t hardware_flags, bool combine_hashes, uint32_t num_rows, - uint64_t length, const uint8_t* keys, uint32_t* hashes, - uint32_t* hashes_temp_for_combine) { - if (ARROW_POPCOUNT64(length) == 1 && length <= sizeof(uint64_t)) { - HashInt(combine_hashes, num_rows, length, keys, hashes); +void Hashing32::HashFixed(int64_t hardware_flags, bool combine_hashes, uint32_t num_keys, + uint64_t key_length, const uint8_t* keys, uint32_t* hashes, + uint32_t* temp_hashes_for_combine) { + if (ARROW_POPCOUNT64(key_length) == 1 && key_length <= sizeof(uint64_t)) { + HashInt(combine_hashes, num_keys, key_length, keys, hashes); return; } uint32_t num_processed = 0; #if defined(ARROW_HAVE_RUNTIME_AVX2) if (hardware_flags & arrow::internal::CpuInfo::AVX2) { - num_processed = HashFixedLen_avx2(combine_hashes, num_rows, length, keys, hashes, - hashes_temp_for_combine); + num_processed = HashFixedLen_avx2(combine_hashes, num_keys, key_length, keys, hashes, + temp_hashes_for_combine); } #endif if (combine_hashes) { - HashFixedLenImp(num_rows - num_processed, length, keys + length * num_processed, - hashes + num_processed); + HashFixedLenImp(num_keys - num_processed, key_length, + keys + key_length * num_processed, hashes + num_processed); } else { - HashFixedLenImp(num_rows - num_processed, length, - keys + length * num_processed, hashes + num_processed); + HashFixedLenImp(num_keys - num_processed, key_length, + keys + key_length * num_processed, hashes + num_processed); } } @@ -423,13 +425,13 @@ void Hashing32::HashMultiColumn(const std::vector& cols, } if (cols[icol].metadata().is_fixed_length) { - uint32_t col_width = cols[icol].metadata().fixed_length; - if (col_width == 0) { + uint32_t key_length = cols[icol].metadata().fixed_length; + if (key_length == 0) { HashBit(icol > 0, cols[icol].bit_offset(1), batch_size_next, cols[icol].data(1) + first_row / 8, hashes + first_row); } else { - HashFixed(ctx->hardware_flags, icol > 0, batch_size_next, col_width, - cols[icol].data(1) + first_row * col_width, hashes + first_row, + HashFixed(ctx->hardware_flags, icol > 0, batch_size_next, key_length, + cols[icol].data(1) + first_row * key_length, hashes + first_row, hash_temp); } } else if (cols[icol].metadata().fixed_length == sizeof(uint32_t)) { @@ -463,8 +465,9 @@ void Hashing32::HashMultiColumn(const std::vector& cols, Status Hashing32::HashBatch(const ExecBatch& key_batch, uint32_t* hashes, std::vector& column_arrays, int64_t hardware_flags, util::TempVectorStack* temp_stack, - int64_t offset, int64_t length) { - RETURN_NOT_OK(ColumnArraysFromExecBatch(key_batch, offset, length, &column_arrays)); + int64_t start_rows, int64_t num_rows) { + RETURN_NOT_OK( + ColumnArraysFromExecBatch(key_batch, start_rows, num_rows, &column_arrays)); LightContext ctx; ctx.hardware_flags = hardware_flags; @@ -574,23 +577,23 @@ inline void Hashing64::StripeMask(int i, uint64_t* mask1, uint64_t* mask2, } template -void Hashing64::HashFixedLenImp(uint32_t num_rows, uint64_t length, const uint8_t* keys, - uint64_t* hashes) { +void Hashing64::HashFixedLenImp(uint32_t num_rows, uint64_t key_length, + const uint8_t* keys, uint64_t* hashes) { // Calculate the number of rows that skip the last 32 bytes // uint32_t num_rows_safe = num_rows; - while (num_rows_safe > 0 && (num_rows - num_rows_safe) * length < kStripeSize) { + while (num_rows_safe > 0 && (num_rows - num_rows_safe) * key_length < kStripeSize) { --num_rows_safe; } // Compute masks for the last 32 byte stripe // - uint64_t num_stripes = bit_util::CeilDiv(length, kStripeSize); + uint64_t num_stripes = bit_util::CeilDiv(key_length, kStripeSize); uint64_t mask1, mask2, mask3, mask4; - StripeMask(((length - 1) & (kStripeSize - 1)) + 1, &mask1, &mask2, &mask3, &mask4); + StripeMask(((key_length - 1) & (kStripeSize - 1)) + 1, &mask1, &mask2, &mask3, &mask4); for (uint32_t i = 0; i < num_rows_safe; ++i) { - const uint8_t* key = keys + static_cast(i) * length; + const uint8_t* key = keys + static_cast(i) * key_length; uint64_t acc1, acc2, acc3, acc4; ProcessFullStripes(num_stripes, key, &acc1, &acc2, &acc3, &acc4); ProcessLastStripe(mask1, mask2, mask3, mask4, key + (num_stripes - 1) * kStripeSize, @@ -607,11 +610,11 @@ void Hashing64::HashFixedLenImp(uint32_t num_rows, uint64_t length, const uint8_ uint64_t last_stripe_copy[4]; for (uint32_t i = num_rows_safe; i < num_rows; ++i) { - const uint8_t* key = keys + static_cast(i) * length; + const uint8_t* key = keys + static_cast(i) * key_length; uint64_t acc1, acc2, acc3, acc4; ProcessFullStripes(num_stripes, key, &acc1, &acc2, &acc3, &acc4); memcpy(last_stripe_copy, key + (num_stripes - 1) * kStripeSize, - length - (num_stripes - 1) * kStripeSize); + key_length - (num_stripes - 1) * kStripeSize); ProcessLastStripe(mask1, mask2, mask3, mask4, reinterpret_cast(last_stripe_copy), &acc1, &acc2, &acc3, &acc4); @@ -637,15 +640,16 @@ void Hashing64::HashVarLenImp(uint32_t num_rows, const T* offsets, } for (uint32_t i = 0; i < num_rows_safe; ++i) { - uint64_t length = offsets[i + 1] - offsets[i]; + uint64_t key_length = offsets[i + 1] - offsets[i]; // Compute masks for the last 32 byte stripe. // For an empty string set number of stripes to 1 but mask to all zeroes. // - int is_non_empty = length == 0 ? 0 : 1; - uint64_t num_stripes = bit_util::CeilDiv(length, kStripeSize) + (1 - is_non_empty); + int is_non_empty = key_length == 0 ? 0 : 1; + uint64_t num_stripes = + bit_util::CeilDiv(key_length, kStripeSize) + (1 - is_non_empty); uint64_t mask1, mask2, mask3, mask4; - StripeMask(((length - is_non_empty) & (kStripeSize - 1)) + is_non_empty, &mask1, + StripeMask(((key_length - is_non_empty) & (kStripeSize - 1)) + is_non_empty, &mask1, &mask2, &mask3, &mask4); const uint8_t* key = concatenated_keys + offsets[i]; @@ -667,22 +671,23 @@ void Hashing64::HashVarLenImp(uint32_t num_rows, const T* offsets, uint64_t last_stripe_copy[4]; for (uint32_t i = num_rows_safe; i < num_rows; ++i) { - uint64_t length = offsets[i + 1] - offsets[i]; + uint64_t key_length = offsets[i + 1] - offsets[i]; // Compute masks for the last 32 byte stripe // - int is_non_empty = length == 0 ? 0 : 1; - uint64_t num_stripes = bit_util::CeilDiv(length, kStripeSize) + (1 - is_non_empty); + int is_non_empty = key_length == 0 ? 0 : 1; + uint64_t num_stripes = + bit_util::CeilDiv(key_length, kStripeSize) + (1 - is_non_empty); uint64_t mask1, mask2, mask3, mask4; - StripeMask(((length - is_non_empty) & (kStripeSize - 1)) + is_non_empty, &mask1, + StripeMask(((key_length - is_non_empty) & (kStripeSize - 1)) + is_non_empty, &mask1, &mask2, &mask3, &mask4); const uint8_t* key = concatenated_keys + offsets[i]; uint64_t acc1, acc2, acc3, acc4; ProcessFullStripes(num_stripes, key, &acc1, &acc2, &acc3, &acc4); - if (length > 0) { + if (key_length > 0) { memcpy(last_stripe_copy, key + (num_stripes - 1) * kStripeSize, - length - (num_stripes - 1) * kStripeSize); + key_length - (num_stripes - 1) * kStripeSize); } if (num_stripes > 0) { ProcessLastStripe(mask1, mask2, mask3, mask4, @@ -759,9 +764,9 @@ void Hashing64::HashIntImp(uint32_t num_keys, const T* keys, uint64_t* hashes) { } } -void Hashing64::HashInt(bool combine_hashes, uint32_t num_keys, uint64_t length_key, +void Hashing64::HashInt(bool combine_hashes, uint32_t num_keys, uint64_t key_length, const uint8_t* keys, uint64_t* hashes) { - switch (length_key) { + switch (key_length) { case sizeof(uint8_t): if (combine_hashes) { HashIntImp(num_keys, keys, hashes); @@ -802,17 +807,17 @@ void Hashing64::HashInt(bool combine_hashes, uint32_t num_keys, uint64_t length_ } } -void Hashing64::HashFixed(bool combine_hashes, uint32_t num_rows, uint64_t length, +void Hashing64::HashFixed(bool combine_hashes, uint32_t num_keys, uint64_t key_length, const uint8_t* keys, uint64_t* hashes) { - if (ARROW_POPCOUNT64(length) == 1 && length <= sizeof(uint64_t)) { - HashInt(combine_hashes, num_rows, length, keys, hashes); + if (ARROW_POPCOUNT64(key_length) == 1 && key_length <= sizeof(uint64_t)) { + HashInt(combine_hashes, num_keys, key_length, keys, hashes); return; } if (combine_hashes) { - HashFixedLenImp(num_rows, length, keys, hashes); + HashFixedLenImp(num_keys, key_length, keys, hashes); } else { - HashFixedLenImp(num_rows, length, keys, hashes); + HashFixedLenImp(num_keys, key_length, keys, hashes); } } @@ -860,13 +865,13 @@ void Hashing64::HashMultiColumn(const std::vector& cols, } if (cols[icol].metadata().is_fixed_length) { - uint64_t col_width = cols[icol].metadata().fixed_length; - if (col_width == 0) { + uint64_t key_length = cols[icol].metadata().fixed_length; + if (key_length == 0) { HashBit(icol > 0, cols[icol].bit_offset(1), batch_size_next, cols[icol].data(1) + first_row / 8, hashes + first_row); } else { - HashFixed(icol > 0, batch_size_next, col_width, - cols[icol].data(1) + first_row * col_width, hashes + first_row); + HashFixed(icol > 0, batch_size_next, key_length, + cols[icol].data(1) + first_row * key_length, hashes + first_row); } } else if (cols[icol].metadata().fixed_length == sizeof(uint32_t)) { HashVarLen(icol > 0, batch_size_next, cols[icol].offsets() + first_row, @@ -897,8 +902,9 @@ void Hashing64::HashMultiColumn(const std::vector& cols, Status Hashing64::HashBatch(const ExecBatch& key_batch, uint64_t* hashes, std::vector& column_arrays, int64_t hardware_flags, util::TempVectorStack* temp_stack, - int64_t offset, int64_t length) { - RETURN_NOT_OK(ColumnArraysFromExecBatch(key_batch, offset, length, &column_arrays)); + int64_t start_row, int64_t num_rows) { + RETURN_NOT_OK( + ColumnArraysFromExecBatch(key_batch, start_row, num_rows, &column_arrays)); LightContext ctx; ctx.hardware_flags = hardware_flags; diff --git a/cpp/src/arrow/compute/key_hash.h b/cpp/src/arrow/compute/key_hash.h index b193716c9bd..1173df5ed10 100644 --- a/cpp/src/arrow/compute/key_hash.h +++ b/cpp/src/arrow/compute/key_hash.h @@ -51,10 +51,10 @@ class ARROW_EXPORT Hashing32 { static Status HashBatch(const ExecBatch& key_batch, uint32_t* hashes, std::vector& column_arrays, int64_t hardware_flags, util::TempVectorStack* temp_stack, - int64_t offset, int64_t length); + int64_t start_row, int64_t num_rows); static void HashFixed(int64_t hardware_flags, bool combine_hashes, uint32_t num_keys, - uint64_t length_key, const uint8_t* keys, uint32_t* hashes, + uint64_t key_length, const uint8_t* keys, uint32_t* hashes, uint32_t* temp_hashes_for_combine); private: @@ -100,7 +100,7 @@ class ARROW_EXPORT Hashing32 { static inline void StripeMask(int i, uint32_t* mask1, uint32_t* mask2, uint32_t* mask3, uint32_t* mask4); template - static void HashFixedLenImp(uint32_t num_rows, uint64_t length, const uint8_t* keys, + static void HashFixedLenImp(uint32_t num_rows, uint64_t key_length, const uint8_t* keys, uint32_t* hashes); template static void HashVarLenImp(uint32_t num_rows, const T* offsets, @@ -112,7 +112,7 @@ class ARROW_EXPORT Hashing32 { const uint8_t* keys, uint32_t* hashes); template static void HashIntImp(uint32_t num_keys, const T* keys, uint32_t* hashes); - static void HashInt(bool combine_hashes, uint32_t num_keys, uint64_t length_key, + static void HashInt(bool combine_hashes, uint32_t num_keys, uint64_t key_length, const uint8_t* keys, uint32_t* hashes); #if defined(ARROW_HAVE_RUNTIME_AVX2) @@ -129,11 +129,11 @@ class ARROW_EXPORT Hashing32 { __m256i mask_last_stripe, const uint8_t* keys, int64_t offset_A, int64_t offset_B); template - static uint32_t HashFixedLenImp_avx2(uint32_t num_rows, uint64_t length, + static uint32_t HashFixedLenImp_avx2(uint32_t num_rows, uint64_t key_length, const uint8_t* keys, uint32_t* hashes, uint32_t* hashes_temp_for_combine); static uint32_t HashFixedLen_avx2(bool combine_hashes, uint32_t num_rows, - uint64_t length, const uint8_t* keys, + uint64_t key_length, const uint8_t* keys, uint32_t* hashes, uint32_t* hashes_temp_for_combine); template static uint32_t HashVarLenImp_avx2(uint32_t num_rows, const T* offsets, @@ -164,9 +164,9 @@ class ARROW_EXPORT Hashing64 { static Status HashBatch(const ExecBatch& key_batch, uint64_t* hashes, std::vector& column_arrays, int64_t hardware_flags, util::TempVectorStack* temp_stack, - int64_t offset, int64_t length); + int64_t start_row, int64_t num_rows); - static void HashFixed(bool combine_hashes, uint32_t num_keys, uint64_t length_key, + static void HashFixed(bool combine_hashes, uint32_t num_keys, uint64_t key_length, const uint8_t* keys, uint64_t* hashes); private: @@ -203,7 +203,7 @@ class ARROW_EXPORT Hashing64 { static inline void StripeMask(int i, uint64_t* mask1, uint64_t* mask2, uint64_t* mask3, uint64_t* mask4); template - static void HashFixedLenImp(uint32_t num_rows, uint64_t length, const uint8_t* keys, + static void HashFixedLenImp(uint32_t num_rows, uint64_t key_length, const uint8_t* keys, uint64_t* hashes); template static void HashVarLenImp(uint32_t num_rows, const T* offsets, @@ -211,11 +211,11 @@ class ARROW_EXPORT Hashing64 { template static void HashBitImp(int64_t bit_offset, uint32_t num_keys, const uint8_t* keys, uint64_t* hashes); - static void HashBit(bool T_COMBINE_HASHES, int64_t bit_offset, uint32_t num_keys, + static void HashBit(bool combine_hashes, int64_t bit_offset, uint32_t num_keys, const uint8_t* keys, uint64_t* hashes); template static void HashIntImp(uint32_t num_keys, const T* keys, uint64_t* hashes); - static void HashInt(bool T_COMBINE_HASHES, uint32_t num_keys, uint64_t length_key, + static void HashInt(bool combine_hashes, uint32_t num_keys, uint64_t key_length, const uint8_t* keys, uint64_t* hashes); }; diff --git a/cpp/src/arrow/compute/key_hash_test.cc b/cpp/src/arrow/compute/key_hash_test.cc index f1d4929b6b6..22720b97a15 100644 --- a/cpp/src/arrow/compute/key_hash_test.cc +++ b/cpp/src/arrow/compute/key_hash_test.cc @@ -252,10 +252,10 @@ TEST(VectorHash, BasicString) { RunTestVectorHash(); } TEST(VectorHash, BasicLargeString) { RunTestVectorHash(); } -void HashFixedLengthFrom(int fixed_length, int num_rows_total, int start_row) { - int num_rows = num_rows_total - start_row; +void HashFixedLengthFrom(int key_length, int num_rows, int start_row) { + int num_rows_to_hash = num_rows - start_row; auto num_bytes_aligned = - arrow::bit_util::RoundUpToMultipleOf64(fixed_length * num_rows_total); + arrow::bit_util::RoundUpToMultipleOf64(key_length * num_rows); const auto hardware_flags_for_testing = HardwareFlagsForTesting(); ASSERT_GT(hardware_flags_for_testing.size(), 0); @@ -263,15 +263,15 @@ void HashFixedLengthFrom(int fixed_length, int num_rows_total, int start_row) { std::vector> hashes32(hardware_flags_for_testing.size()); std::vector> hashes64(hardware_flags_for_testing.size()); for (auto& h : hashes32) { - h.resize(num_rows); + h.resize(num_rows_to_hash); } for (auto& h : hashes64) { - h.resize(num_rows); + h.resize(num_rows_to_hash); } - FixedSizeBinaryBuilder keys_builder(fixed_size_binary(fixed_length)); - for (int j = 0; j < num_rows_total; ++j) { - ASSERT_OK(keys_builder.Append(std::string(fixed_length, 42))); + FixedSizeBinaryBuilder keys_builder(fixed_size_binary(key_length)); + for (int j = 0; j < num_rows; ++j) { + ASSERT_OK(keys_builder.Append(std::string(key_length, 42))); } ASSERT_OK_AND_ASSIGN(auto keys, keys_builder.Finish()); // Make sure the buffer is aligned as expected. @@ -284,18 +284,17 @@ void HashFixedLengthFrom(int fixed_length, int num_rows_total, int start_row) { for (int i = 0; i < static_cast(hardware_flags_for_testing.size()); ++i) { const auto hardware_flags = hardware_flags_for_testing[i]; Hashing32::HashFixed(hardware_flags, - /*combine_hashes=*/false, num_rows, fixed_length, - keys->data()->GetValues(1) + start_row * fixed_length, + /*combine_hashes=*/false, num_rows_to_hash, key_length, + keys->data()->GetValues(1) + start_row * key_length, hashes32[i].data(), temp_buffer.data()); Hashing64::HashFixed( - /*combine_hashes=*/false, num_rows, fixed_length, - keys->data()->GetValues(1) + start_row * fixed_length, - hashes64[i].data()); + /*combine_hashes=*/false, num_rows_to_hash, key_length, + keys->data()->GetValues(1) + start_row * key_length, hashes64[i].data()); } // Verify that all implementations (scalar, SIMD) give the same hashes. for (int i = 1; i < static_cast(hardware_flags_for_testing.size()); ++i) { - for (int j = 0; j < num_rows; ++j) { + for (int j = 0; j < num_rows_to_hash; ++j) { ASSERT_EQ(hashes32[i][j], hashes32[0][j]) << "scalar and simd approaches yielded different 32-bit hashes"; ASSERT_EQ(hashes64[i][j], hashes64[0][j]) @@ -305,12 +304,12 @@ void HashFixedLengthFrom(int fixed_length, int num_rows_total, int start_row) { } // Some carefully chosen cases that may cause troubles like GH-39778. -TEST(VectorHash, FixedSizeTailByteSafety) { - // Tow cases of fixed_length < stripe (16-byte). - HashFixedLengthFrom(/*fixed_length=*/3, /*num_rows_total=*/1450, /*start_row=*/1447); - HashFixedLengthFrom(/*fixed_length=*/5, /*num_rows_total=*/883, /*start_row=*/858); - // Case of fixed_length > stripe (16-byte). - HashFixedLengthFrom(/*fixed_length=*/19, /*num_rows_total=*/64, /*start_row=*/63); +TEST(VectorHash, FixedLengthTailByteSafety) { + // Tow cases of key_length < stripe (16-byte). + HashFixedLengthFrom(/*key_length=*/3, /*num_rows=*/1450, /*start_row=*/1447); + HashFixedLengthFrom(/*key_length=*/5, /*num_rows=*/883, /*start_row=*/858); + // Case of key_length > stripe (16-byte). + HashFixedLengthFrom(/*key_length=*/19, /*num_rows=*/64, /*start_row=*/63); } } // namespace compute From 5fea9bbd934561f4359c9f4df2a3a7e049f0a1b1 Mon Sep 17 00:00:00 2001 From: Ruoxi Sun Date: Fri, 26 Jan 2024 10:20:03 +0800 Subject: [PATCH 7/7] Fix lint --- cpp/src/arrow/compute/key_hash_test.cc | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/cpp/src/arrow/compute/key_hash_test.cc b/cpp/src/arrow/compute/key_hash_test.cc index 22720b97a15..c998df7169c 100644 --- a/cpp/src/arrow/compute/key_hash_test.cc +++ b/cpp/src/arrow/compute/key_hash_test.cc @@ -254,8 +254,7 @@ TEST(VectorHash, BasicLargeString) { RunTestVectorHash(); } void HashFixedLengthFrom(int key_length, int num_rows, int start_row) { int num_rows_to_hash = num_rows - start_row; - auto num_bytes_aligned = - arrow::bit_util::RoundUpToMultipleOf64(key_length * num_rows); + auto num_bytes_aligned = arrow::bit_util::RoundUpToMultipleOf64(key_length * num_rows); const auto hardware_flags_for_testing = HardwareFlagsForTesting(); ASSERT_GT(hardware_flags_for_testing.size(), 0);