From 542394cf4d0ef9c2e60895415ce4cc989ea227ab Mon Sep 17 00:00:00 2001 From: niranda perera Date: Tue, 15 Jun 2021 15:26:35 -0400 Subject: [PATCH 01/19] working AAA --- .../arrow/compute/kernels/scalar_if_else.cc | 247 +++++++++++++++++- .../compute/kernels/scalar_if_else_test.cc | 52 ++++ 2 files changed, 298 insertions(+), 1 deletion(-) diff --git a/cpp/src/arrow/compute/kernels/scalar_if_else.cc b/cpp/src/arrow/compute/kernels/scalar_if_else.cc index 54e0725fce7..fa25b7ad382 100644 --- a/cpp/src/arrow/compute/kernels/scalar_if_else.cc +++ b/cpp/src/arrow/compute/kernels/scalar_if_else.cc @@ -455,6 +455,238 @@ struct IfElseFunctor> { } }; +// const int32_t* offsets = reinterpret_cast(strings_data.buffer[1]->data()) + +// strings_data.offset; +// const uint8_t* bytes = strings_data.buffer[2]->data(); +// +// const int32_t& first_offset = offsets[0]; +// const uint8_t& first_byte = bytes[first_offset]; + +// const offset_type* offsets = array.GetValues(1); +// const uint8_t* data = array.GetValues(2, /*absolute_offset=*/0); +// const int64_t length = offsets[row + 1] - offsets[row]; +// value = util::string_view(reinterpret_cast(data + +// offsets[row]), length); + +// only number types needs to be handled for Fixed sized primitive data types because, +// internal::GenerateTypeAgnosticPrimitive forwards types to the corresponding unsigned +// int type +template +struct IfElseFunctor> { + using OffsetType = typename TypeTraits::OffsetType::c_type; + using ArrayType = typename TypeTraits::ArrayType; + + // A - Array + // S - Scalar + + // AAA + static Status Call(KernelContext* ctx, const ArrayData& cond, const ArrayData& left, + const ArrayData& right, ArrayData* out) { + // first calculate the space needed for the output data buffer, by traversing the + // cond.data array + const uint8_t* cond_data = cond.buffers[1]->data(); + BitBlockCounter bit_counter(cond_data, cond.offset, cond.length); + + const auto* left_offsets = left.GetValues(1); + const uint8_t* left_data = left.buffers[2]->data(); + const auto* right_offsets = right.GetValues(1); + const uint8_t* right_data = right.buffers[2]->data(); + + // reserve an additional space + ARROW_ASSIGN_OR_RAISE(auto out_offset_buf, + ctx->Allocate((cond.length + 1) * sizeof(OffsetType))); + auto* out_offsets = reinterpret_cast(out_offset_buf->mutable_data()); + out_offsets[0] = 0; + + // allocate data buffer conservatively + auto data_buff_alloc = + static_cast((left_offsets[left.length] - left_offsets[0]) + + (right_offsets[right.length] - right_offsets[0])); + ARROW_ASSIGN_OR_RAISE(std::shared_ptr out_data_buf, + ctx->Allocate(data_buff_alloc)); + uint8_t* out_data = out_data_buf->mutable_data(); + + int64_t offset = cond.offset; + OffsetType total_bytes_written = 0; + while (offset < cond.offset + cond.length) { + const BitBlockCount& block = bit_counter.NextWord(); + + OffsetType bytes_written = 0; + if (block.AllSet()) { + // from left + bytes_written = left_offsets[offset + block.length] - left_offsets[offset]; + std::memcpy(out_data, left_data + left_offsets[offset], bytes_written); + // normalize the out_offsets by reducing start offset + // offset - cond.offset + 1 --> [1, cond.length + 1) + std::transform(left_offsets + offset, left_offsets + offset + block.length + 1, + out_offsets + offset - cond.offset + 1, + [&](const OffsetType& src_offset) { + return out_offsets[offset - cond.offset] + src_offset - + left_offsets[offset]; + }); + } else if (block.NoneSet()) { + // from right + bytes_written = right_offsets[offset + block.length] - right_offsets[offset]; + std::memcpy(out_data, right_data + right_offsets[offset], bytes_written); + // normalize the out_offsets by reducing start offset + // (offset - cond.offset + 1) is [1, cond.length + 1) + std::transform(right_offsets + offset, right_offsets + offset + block.length + 1, + out_offsets + offset - cond.offset + 1, + [&](const OffsetType& src_offset) { + return out_offsets[offset - cond.offset] + src_offset - + right_offsets[offset]; + }); + } else if (block.popcount) { // selectively copy from left + for (auto i = 0; i < block.length; ++i) { + OffsetType current_length; + if (BitUtil::GetBit(cond_data, offset + i)) { + current_length = left_offsets[offset + i + 1] - left_offsets[offset + i]; + std::memcpy(out_data + bytes_written, left_data + left_offsets[offset + i], + current_length); + } else { + current_length = right_offsets[offset + i + 1] - right_offsets[offset + i]; + std::memcpy(out_data + bytes_written, right_data + right_offsets[offset + i], + current_length); + } + out_offsets[offset + i - cond.offset + 1] = + out_offsets[offset + i - cond.offset] + current_length; + bytes_written += current_length; + } + } + + offset += block.length; + out_data += bytes_written; + total_bytes_written += bytes_written; + } + + // resize the data buffer + ARROW_RETURN_NOT_OK(out_data_buf->Resize(total_bytes_written)); + + out->buffers[1] = std::move(out_offset_buf); + out->buffers[2] = std::move(out_data_buf); + return Status::OK(); + } + + // ASA + static Status Call(KernelContext* ctx, const ArrayData& cond, const Scalar& left, + const ArrayData& right, ArrayData* out) { + // ARROW_ASSIGN_OR_RAISE(std::shared_ptr out_buf, + // ctx->Allocate(cond.length * sizeof(T))); + // T* out_values = reinterpret_cast(out_buf->mutable_data()); + // + // // copy right data to out_buff + // const T* right_data = right.GetValues(1); + // std::memcpy(out_values, right_data, right.length * sizeof(T)); + // + // const auto* cond_data = cond.buffers[1]->data(); // this is a BoolArray + // BitBlockCounter bit_counter(cond_data, cond.offset, cond.length); + // + // // selectively copy values from left data + // T left_data = internal::UnboxScalar::Unbox(left); + // int64_t offset = cond.offset; + // + // // todo this can be improved by intrinsics. ex: _mm*_mask_store_e* (vmovdqa*) + // while (offset < cond.offset + cond.length) { + // const BitBlockCount& block = bit_counter.NextWord(); + // if (block.AllSet()) { // all from left + // std::fill(out_values, out_values + block.length, left_data); + // } else if (block.popcount) { // selectively copy from left + // for (int64_t i = 0; i < block.length; ++i) { + // if (BitUtil::GetBit(cond_data, offset + i)) { + // out_values[i] = left_data; + // } + // } + // } + // + // offset += block.length; + // out_values += block.length; + // } + // + // out->buffers[1] = std::move(out_buf); + return Status::OK(); + } + + // AAS + static Status Call(KernelContext* ctx, const ArrayData& cond, const ArrayData& left, + const Scalar& right, ArrayData* out) { + // ARROW_ASSIGN_OR_RAISE(std::shared_ptr out_buf, + // ctx->Allocate(cond.length * sizeof(T))); + // T* out_values = reinterpret_cast(out_buf->mutable_data()); + // + // // copy left data to out_buff + // const T* left_data = left.GetValues(1); + // std::memcpy(out_values, left_data, left.length * sizeof(T)); + // + // const auto* cond_data = cond.buffers[1]->data(); // this is a BoolArray + // BitBlockCounter bit_counter(cond_data, cond.offset, cond.length); + // + // // selectively copy values from left data + // T right_data = internal::UnboxScalar::Unbox(right); + // int64_t offset = cond.offset; + // + // // todo this can be improved by intrinsics. ex: _mm*_mask_store_e* (vmovdqa*) + // // left data is already in the output buffer. Therefore, mask needs to be + // inverted while (offset < cond.offset + cond.length) { + // const BitBlockCount& block = bit_counter.NextWord(); + // if (block.NoneSet()) { // all from right + // std::fill(out_values, out_values + block.length, right_data); + // } else if (block.popcount) { // selectively copy from right + // for (int64_t i = 0; i < block.length; ++i) { + // if (!BitUtil::GetBit(cond_data, offset + i)) { + // out_values[i] = right_data; + // } + // } + // } + // + // offset += block.length; + // out_values += block.length; + // } + // + // out->buffers[1] = std::move(out_buf); + return Status::OK(); + } + + // ASS + static Status Call(KernelContext* ctx, const ArrayData& cond, const Scalar& left, + const Scalar& right, ArrayData* out) { + // ARROW_ASSIGN_OR_RAISE(std::shared_ptr out_buf, + // ctx->Allocate(cond.length * sizeof(T))); + // T* out_values = reinterpret_cast(out_buf->mutable_data()); + // + // // copy right data to out_buff + // T right_data = internal::UnboxScalar::Unbox(right); + // std::fill(out_values, out_values + cond.length, right_data); + // + // const auto* cond_data = cond.buffers[1]->data(); // this is a BoolArray + // BitBlockCounter bit_counter(cond_data, cond.offset, cond.length); + // + // // selectively copy values from left data + // T left_data = internal::UnboxScalar::Unbox(left); + // int64_t offset = cond.offset; + // + // // todo this can be improved by intrinsics. ex: _mm*_mask_store_e* (vmovdqa*) + // while (offset < cond.offset + cond.length) { + // const BitBlockCount& block = bit_counter.NextWord(); + // if (block.AllSet()) { // all from left + // std::fill(out_values, out_values + block.length, left_data); + // } else if (block.popcount) { // selectively copy from left + // for (int64_t i = 0; i < block.length; ++i) { + // if (BitUtil::GetBit(cond_data, offset + i)) { + // out_values[i] = left_data; + // } + // } + // } + // + // offset += block.length; + // out_values += block.length; + // } + // + // out->buffers[1] = std::move(out_buf); + return Status::OK(); + } +}; + template struct IfElseFunctor> { // A - Array, S - Scalar, X = Array/Scalar @@ -676,6 +908,19 @@ void AddPrimitiveIfElseKernels(const std::shared_ptr& scalar_fun } } +void AddBinaryIfElseKernels(const std::shared_ptr& scalar_function, + const std::vector>& types) { + for (auto&& type : types) { + auto exec = internal::GenerateTypeAgnosticVarBinaryBase(*type); + // cond array needs to be boolean always + ScalarKernel kernel({boolean(), type, type}, type, exec); + kernel.null_handling = NullHandling::COMPUTED_NO_PREALLOCATE; + kernel.mem_allocation = MemAllocation::NO_PREALLOCATE; + + DCHECK_OK(scalar_function->AddKernel(std::move(kernel))); + } +} + } // namespace const FunctionDoc if_else_doc{"Choose values based on a condition", @@ -698,7 +943,7 @@ void RegisterScalarIfElse(FunctionRegistry* registry) { AddPrimitiveIfElseKernels(func, TemporalTypes()); AddPrimitiveIfElseKernels(func, {boolean()}); AddNullIfElseKernel(func); - // todo add binary kernels + AddBinaryIfElseKernels(func, BaseBinaryTypes()); DCHECK_OK(registry->AddFunction(std::move(func))); } diff --git a/cpp/src/arrow/compute/kernels/scalar_if_else_test.cc b/cpp/src/arrow/compute/kernels/scalar_if_else_test.cc index 670a2d42a3a..62f033f38d7 100644 --- a/cpp/src/arrow/compute/kernels/scalar_if_else_test.cc +++ b/cpp/src/arrow/compute/kernels/scalar_if_else_test.cc @@ -316,5 +316,57 @@ TEST_F(TestIfElseKernel, IfElseDispatchBest) { CheckDispatchBest(name, {null(), uint8(), int8()}, {boolean(), int16(), int16()}); } +template +class TestIfElseBaseBinary : public ::testing::Test {}; + +using BaseBinaryTypes = + ::testing::Types; + +TYPED_TEST_SUITE(TestIfElseBaseBinary, BaseBinaryTypes); + +TYPED_TEST(TestIfElseBaseBinary, IfElseFixedSize) { + auto type = TypeTraits::type_singleton(); + + CheckIfElseOutput(ArrayFromJSON(boolean(), "[true, true, true, false]"), + ArrayFromJSON(type, R"(["a", "ab", "abc", "abcd"])"), + ArrayFromJSON(type, R"(["lmno", "lmn", "lm", "l"])"), + ArrayFromJSON(type, R"(["a", "ab", "abc", "l"])")); + + CheckIfElseOutput(ArrayFromJSON(boolean(), R"([true, true, true, false])"), + ArrayFromJSON(type, R"(["a", "ab", "abc", "abcd"])"), + ArrayFromJSON(type, R"(["lmno", "lmn", "lm", null])"), + ArrayFromJSON(type, R"(["a", "ab", "abc", null])")); + + CheckIfElseOutput(ArrayFromJSON(boolean(), R"([true, true, true, false])"), + ArrayFromJSON(type, R"(["a", "ab", null, "abcd"])"), + ArrayFromJSON(type, R"(["lmno", "lmn", "lm", null])"), + ArrayFromJSON(type, R"(["a", "ab", null, null])")); + + CheckIfElseOutput(ArrayFromJSON(boolean(), R"([true, true, true, false])"), + ArrayFromJSON(type, R"(["a", "ab", null, "abcd"])"), + ArrayFromJSON(type, R"(["lmno", "lmn", "lm", "l"])"), + ArrayFromJSON(type, R"(["a", "ab", null, "l"])")); + + CheckIfElseOutput(ArrayFromJSON(boolean(), R"([null, true, true, false])"), + ArrayFromJSON(type, R"(["a", "ab", null, "abcd"])"), + ArrayFromJSON(type, R"(["lmno", "lmn", "lm", "l"])"), + ArrayFromJSON(type, R"([null, "ab", null, "l"])")); + + CheckIfElseOutput(ArrayFromJSON(boolean(), R"([null, true, true, false])"), + ArrayFromJSON(type, R"(["a", "ab", null, "abcd"])"), + ArrayFromJSON(type, R"(["lmno", "lmn", "lm", null])"), + ArrayFromJSON(type, R"([null, "ab", null, null])")); + + CheckIfElseOutput(ArrayFromJSON(boolean(), R"([null, true, true, false])"), + ArrayFromJSON(type, R"(["a", "ab", "abc", "abcd"])"), + ArrayFromJSON(type, R"(["lmno", "lmn", "lm", null])"), + ArrayFromJSON(type, R"([null, "ab", "abc", null])")); + + CheckIfElseOutput(ArrayFromJSON(boolean(), R"([null, true, true, false])"), + ArrayFromJSON(type, R"(["a", "ab", "abc", "abcd"])"), + ArrayFromJSON(type, R"(["lmno", "lmn", "lm", "l"])"), + ArrayFromJSON(type, R"([null, "ab", "abc", "l"])")); +} + } // namespace compute } // namespace arrow From ebc4712d54e6c7db1fdd339ba2e653eaa663927d Mon Sep 17 00:00:00 2001 From: niranda perera Date: Tue, 15 Jun 2021 16:21:04 -0400 Subject: [PATCH 02/19] complete binary AAA impl --- .../arrow/compute/kernels/scalar_if_else.cc | 53 +++++++------------ .../compute/kernels/scalar_if_else_test.cc | 50 +++++++++++++++-- 2 files changed, 63 insertions(+), 40 deletions(-) diff --git a/cpp/src/arrow/compute/kernels/scalar_if_else.cc b/cpp/src/arrow/compute/kernels/scalar_if_else.cc index fa25b7ad382..37f21726124 100644 --- a/cpp/src/arrow/compute/kernels/scalar_if_else.cc +++ b/cpp/src/arrow/compute/kernels/scalar_if_else.cc @@ -455,23 +455,6 @@ struct IfElseFunctor> { } }; -// const int32_t* offsets = reinterpret_cast(strings_data.buffer[1]->data()) + -// strings_data.offset; -// const uint8_t* bytes = strings_data.buffer[2]->data(); -// -// const int32_t& first_offset = offsets[0]; -// const uint8_t& first_byte = bytes[first_offset]; - -// const offset_type* offsets = array.GetValues(1); -// const uint8_t* data = array.GetValues(2, /*absolute_offset=*/0); -// const int64_t length = offsets[row + 1] - offsets[row]; -// value = util::string_view(reinterpret_cast(data + -// offsets[row]), length); - -// only number types needs to be handled for Fixed sized primitive data types because, -// internal::GenerateTypeAgnosticPrimitive forwards types to the corresponding unsigned -// int type template struct IfElseFunctor> { using OffsetType = typename TypeTraits::OffsetType::c_type; @@ -483,8 +466,6 @@ struct IfElseFunctor> { // AAA static Status Call(KernelContext* ctx, const ArrayData& cond, const ArrayData& left, const ArrayData& right, ArrayData* out) { - // first calculate the space needed for the output data buffer, by traversing the - // cond.data array const uint8_t* cond_data = cond.buffers[1]->data(); BitBlockCounter bit_counter(cond_data, cond.offset, cond.length); @@ -517,26 +498,28 @@ struct IfElseFunctor> { // from left bytes_written = left_offsets[offset + block.length] - left_offsets[offset]; std::memcpy(out_data, left_data + left_offsets[offset], bytes_written); - // normalize the out_offsets by reducing start offset - // offset - cond.offset + 1 --> [1, cond.length + 1) - std::transform(left_offsets + offset, left_offsets + offset + block.length + 1, - out_offsets + offset - cond.offset + 1, - [&](const OffsetType& src_offset) { - return out_offsets[offset - cond.offset] + src_offset - - left_offsets[offset]; - }); + // normalize the out_offsets by reducing input start offset, and adding the + // offset upto the word + // offset - cond.offset --> [0, cond.length + 1) + std::transform( + left_offsets + offset + 1, left_offsets + offset + block.length + 1, + out_offsets + offset - cond.offset + 1, [&](const OffsetType& src_offset) { + return src_offset + out_offsets[offset - cond.offset] - + left_offsets[offset]; + }); } else if (block.NoneSet()) { // from right bytes_written = right_offsets[offset + block.length] - right_offsets[offset]; std::memcpy(out_data, right_data + right_offsets[offset], bytes_written); - // normalize the out_offsets by reducing start offset - // (offset - cond.offset + 1) is [1, cond.length + 1) - std::transform(right_offsets + offset, right_offsets + offset + block.length + 1, - out_offsets + offset - cond.offset + 1, - [&](const OffsetType& src_offset) { - return out_offsets[offset - cond.offset] + src_offset - - right_offsets[offset]; - }); + // normalize the out_offsets by reducing input start offset, and adding the + // offset upto the word + // offset - cond.offset --> [0, cond.length + 1) + std::transform( + right_offsets + offset + 1, right_offsets + offset + block.length + 1, + out_offsets + offset - cond.offset + 1, [&](const OffsetType& src_offset) { + return src_offset + out_offsets[offset - cond.offset] - + right_offsets[offset]; + }); } else if (block.popcount) { // selectively copy from left for (auto i = 0; i < block.length; ++i) { OffsetType current_length; diff --git a/cpp/src/arrow/compute/kernels/scalar_if_else_test.cc b/cpp/src/arrow/compute/kernels/scalar_if_else_test.cc index 62f033f38d7..6bf795696d7 100644 --- a/cpp/src/arrow/compute/kernels/scalar_if_else_test.cc +++ b/cpp/src/arrow/compute/kernels/scalar_if_else_test.cc @@ -57,15 +57,12 @@ TYPED_TEST(TestIfElsePrimitive, IfElseFixedSizeRand) { random::RandomArrayGenerator rand(/*seed=*/0); int64_t len = 1000; - - // adding 64 consecutive 1's and 0's in the cond array to test all-true/ all-false - // word code paths ASSERT_OK_AND_ASSIGN(auto temp1, MakeArrayFromScalar(BooleanScalar(true), 64)); ASSERT_OK_AND_ASSIGN(auto temp2, MakeArrayFromScalar(BooleanScalar(false), 64)); auto temp3 = rand.ArrayOf(boolean(), len - 64 * 2, /*null_probability=*/0.01); + ASSERT_OK_AND_ASSIGN(auto concat, Concatenate({temp1, temp2, temp3})); auto cond = std::static_pointer_cast(concat); - auto left = std::static_pointer_cast( rand.ArrayOf(type, len, /*null_probability=*/0.01)); auto right = std::static_pointer_cast( @@ -324,7 +321,7 @@ using BaseBinaryTypes = TYPED_TEST_SUITE(TestIfElseBaseBinary, BaseBinaryTypes); -TYPED_TEST(TestIfElseBaseBinary, IfElseFixedSize) { +TYPED_TEST(TestIfElseBaseBinary, IfElseBaseBinary) { auto type = TypeTraits::type_singleton(); CheckIfElseOutput(ArrayFromJSON(boolean(), "[true, true, true, false]"), @@ -368,5 +365,48 @@ TYPED_TEST(TestIfElseBaseBinary, IfElseFixedSize) { ArrayFromJSON(type, R"([null, "ab", "abc", "l"])")); } +TYPED_TEST(TestIfElseBaseBinary, IfElseBaseBinaryRand) { + using ArrayType = typename TypeTraits::ArrayType; + using OffsetType = typename TypeTraits::OffsetType::c_type; + auto type = TypeTraits::type_singleton(); + + random::RandomArrayGenerator rand(/*seed=*/0); + int64_t len = 130; + + ASSERT_OK_AND_ASSIGN(auto temp1, MakeArrayFromScalar(BooleanScalar(true), 64)); + ASSERT_OK_AND_ASSIGN(auto temp2, MakeArrayFromScalar(BooleanScalar(false), 64)); + auto temp3 = rand.ArrayOf(boolean(), len - 64 * 2, /*null_probability=*/0.01); + + ASSERT_OK_AND_ASSIGN(auto concat, Concatenate({temp1, temp2, temp3})); + auto cond = std::static_pointer_cast(concat); + + auto left = std::static_pointer_cast( + rand.ArrayOf(type, len, /*null_probability=*/0.01)); + auto right = std::static_pointer_cast( + rand.ArrayOf(type, len, /*null_probability=*/0.01)); + + typename TypeTraits::BuilderType builder; + + for (int64_t i = 0; i < len; ++i) { + if (!cond->IsValid(i) || (cond->Value(i) && !left->IsValid(i)) || + (!cond->Value(i) && !right->IsValid(i))) { + ASSERT_OK(builder.AppendNull()); + continue; + } + + OffsetType offset; + const uint8_t* val; + if (cond->Value(i)) { + val = left->GetValue(i, &offset); + } else { + val = right->GetValue(i, &offset); + } + ASSERT_OK(builder.Append(val, offset)); + } + ASSERT_OK_AND_ASSIGN(auto expected_data, builder.Finish()); + + CheckIfElseOutput(cond, left, right, expected_data); +} + } // namespace compute } // namespace arrow From 129f408783f7443014bc555b210792ec74a8cc03 Mon Sep 17 00:00:00 2001 From: niranda perera Date: Tue, 15 Jun 2021 19:16:52 -0400 Subject: [PATCH 03/19] simplifying logic --- .../arrow/compute/kernels/scalar_if_else.cc | 131 ++++++++++++------ cpp/src/arrow/util/bit_util.h | 4 +- 2 files changed, 91 insertions(+), 44 deletions(-) diff --git a/cpp/src/arrow/compute/kernels/scalar_if_else.cc b/cpp/src/arrow/compute/kernels/scalar_if_else.cc index 37f21726124..9262499e8ac 100644 --- a/cpp/src/arrow/compute/kernels/scalar_if_else.cc +++ b/cpp/src/arrow/compute/kernels/scalar_if_else.cc @@ -496,49 +496,45 @@ struct IfElseFunctor> { OffsetType bytes_written = 0; if (block.AllSet()) { // from left - bytes_written = left_offsets[offset + block.length] - left_offsets[offset]; - std::memcpy(out_data, left_data + left_offsets[offset], bytes_written); + bytes_written = left_offsets[block.length] - left_offsets[0]; + std::memcpy(out_data, left_data + left_offsets[0], bytes_written); // normalize the out_offsets by reducing input start offset, and adding the // offset upto the word - // offset - cond.offset --> [0, cond.length + 1) - std::transform( - left_offsets + offset + 1, left_offsets + offset + block.length + 1, - out_offsets + offset - cond.offset + 1, [&](const OffsetType& src_offset) { - return src_offset + out_offsets[offset - cond.offset] - - left_offsets[offset]; - }); + std::transform(left_offsets + 1, left_offsets + block.length + 1, out_offsets + 1, + [&](const OffsetType& src_offset) { + return src_offset - left_offsets[0] + out_offsets[0]; + }); } else if (block.NoneSet()) { // from right - bytes_written = right_offsets[offset + block.length] - right_offsets[offset]; - std::memcpy(out_data, right_data + right_offsets[offset], bytes_written); + bytes_written = right_offsets[block.length] - right_offsets[0]; + std::memcpy(out_data, right_data + right_offsets[0], bytes_written); // normalize the out_offsets by reducing input start offset, and adding the // offset upto the word - // offset - cond.offset --> [0, cond.length + 1) - std::transform( - right_offsets + offset + 1, right_offsets + offset + block.length + 1, - out_offsets + offset - cond.offset + 1, [&](const OffsetType& src_offset) { - return src_offset + out_offsets[offset - cond.offset] - - right_offsets[offset]; - }); + std::transform(right_offsets + 1, right_offsets + block.length + 1, + out_offsets + 1, [&](const OffsetType& src_offset) { + return src_offset - right_offsets[0] + out_offsets[0]; + }); } else if (block.popcount) { // selectively copy from left for (auto i = 0; i < block.length; ++i) { OffsetType current_length; if (BitUtil::GetBit(cond_data, offset + i)) { - current_length = left_offsets[offset + i + 1] - left_offsets[offset + i]; - std::memcpy(out_data + bytes_written, left_data + left_offsets[offset + i], + current_length = left_offsets[i + 1] - left_offsets[i]; + std::memcpy(out_data + bytes_written, left_data + left_offsets[i], current_length); } else { - current_length = right_offsets[offset + i + 1] - right_offsets[offset + i]; - std::memcpy(out_data + bytes_written, right_data + right_offsets[offset + i], + current_length = right_offsets[i + 1] - right_offsets[i]; + std::memcpy(out_data + bytes_written, right_data + right_offsets[i], current_length); } - out_offsets[offset + i - cond.offset + 1] = - out_offsets[offset + i - cond.offset] + current_length; + out_offsets[i + 1] = out_offsets[i] + current_length; bytes_written += current_length; } } offset += block.length; + left_offsets += block.length; + right_offsets += block.length; + out_offsets += block.length; out_data += bytes_written; total_bytes_written += bytes_written; } @@ -554,39 +550,90 @@ struct IfElseFunctor> { // ASA static Status Call(KernelContext* ctx, const ArrayData& cond, const Scalar& left, const ArrayData& right, ArrayData* out) { - // ARROW_ASSIGN_OR_RAISE(std::shared_ptr out_buf, - // ctx->Allocate(cond.length * sizeof(T))); - // T* out_values = reinterpret_cast(out_buf->mutable_data()); + // const uint8_t* cond_data = cond.buffers[1]->data(); + // BitBlockCounter bit_counter(cond_data, cond.offset, cond.length); // - // // copy right data to out_buff - // const T* right_data = right.GetValues(1); - // std::memcpy(out_values, right_data, right.length * sizeof(T)); + // const auto casted_left = reinterpret_cast(left); + // const uint8_t* left_data = casted_left.value->data(); + // int64_t left_size = casted_left.value->size(); + // util::basic_string_view left_view(left_data, left_size); // - // const auto* cond_data = cond.buffers[1]->data(); // this is a BoolArray - // BitBlockCounter bit_counter(cond_data, cond.offset, cond.length); + // const auto* right_offsets = right.GetValues(1); + // const uint8_t* right_data = right.buffers[2]->data(); // - // // selectively copy values from left data - // T left_data = internal::UnboxScalar::Unbox(left); - // int64_t offset = cond.offset; + // // reserve an additional space + // ARROW_ASSIGN_OR_RAISE(auto out_offset_buf, + // ctx->Allocate((cond.length + 1) * sizeof(OffsetType))); + // auto* out_offsets = + // reinterpret_cast(out_offset_buf->mutable_data()); out_offsets[0] = + // 0; // - // // todo this can be improved by intrinsics. ex: _mm*_mask_store_e* (vmovdqa*) + // // allocate data buffer conservatively + // auto data_buff_alloc = + // left_size * cond.length + (right_offsets[right.length] - right_offsets[0]); + // ARROW_ASSIGN_OR_RAISE(std::shared_ptr out_data_buf, + // ctx->Allocate(data_buff_alloc)); + // uint8_t* out_data = out_data_buf->mutable_data(); + // + // int64_t offset = cond.offset; + // OffsetType total_bytes_written = 0; // while (offset < cond.offset + cond.length) { // const BitBlockCount& block = bit_counter.NextWord(); - // if (block.AllSet()) { // all from left - // std::fill(out_values, out_values + block.length, left_data); + // + // OffsetType bytes_written = 0; + // if (block.AllSet()) { + // // from left + // bytes_written = left_size * block.length; + // for (auto i = 0; i < block.length; i++) { + // std::memcpy(out_data + i * left_size, left_data, left_size); + // out_data_buf[] + // } + // } else if (block.NoneSet()) { + // // from right + // bytes_written = right_offsets[offset + block.length] - + // right_offsets[offset]; std::memcpy(out_data, right_data + + // right_offsets[offset], bytes_written); + // // normalize the out_offsets by reducing input start offset, and adding the + // // offset upto the word + // // offset - cond.offset --> [0, cond.length + 1) + // std::transform( + // right_offsets + offset + 1, right_offsets + offset + block.length + 1, + // out_offsets + offset - cond.offset + 1, [&](const OffsetType& + // src_offset) { + // return src_offset - right_offsets[offset] + + // out_offsets[offset - cond.offset]; + // }); // } else if (block.popcount) { // selectively copy from left - // for (int64_t i = 0; i < block.length; ++i) { + // for (auto i = 0; i < block.length; ++i) { + // OffsetType current_length; // if (BitUtil::GetBit(cond_data, offset + i)) { - // out_values[i] = left_data; + // current_length = left_offsets[offset + i + 1] - left_offsets[offset + + // i]; std::memcpy(out_data + bytes_written, left_data + + // left_offsets[offset + i], + // current_length); + // } else { + // current_length = right_offsets[offset + i + 1] - right_offsets[offset + + // i]; std::memcpy(out_data + bytes_written, right_data + + // right_offsets[offset + i], + // current_length); // } + // out_offsets[offset + i - cond.offset + 1] = + // out_offsets[offset + i - cond.offset] + current_length; + // bytes_written += current_length; // } // } // // offset += block.length; - // out_values += block.length; + // out_data += bytes_written; + // total_bytes_written += bytes_written; // } // - // out->buffers[1] = std::move(out_buf); + // // resize the data buffer + // ARROW_RETURN_NOT_OK(out_data_buf->Resize(total_bytes_written)); + // + // out->buffers[1] = std::move(out_offset_buf); + // out->buffers[2] = std::move(out_data_buf); + // return Status::OK(); return Status::OK(); } diff --git a/cpp/src/arrow/util/bit_util.h b/cpp/src/arrow/util/bit_util.h index 1e97e467610..a43fd76092d 100644 --- a/cpp/src/arrow/util/bit_util.h +++ b/cpp/src/arrow/util/bit_util.h @@ -290,12 +290,12 @@ static constexpr uint8_t kPrecedingWrappingBitmask[] = {255, 1, 3, 7, 15, 31, 63 // the bitwise complement version of kPrecedingBitmask static constexpr uint8_t kTrailingBitmask[] = {255, 254, 252, 248, 240, 224, 192, 128}; -static inline bool GetBit(const uint8_t* bits, uint64_t i) { +static constexpr bool GetBit(const uint8_t* bits, uint64_t i) { return (bits[i >> 3] >> (i & 0x07)) & 1; } // Gets the i-th bit from a byte. Should only be used with i <= 7. -static inline bool GetBitFromByte(uint8_t byte, uint8_t i) { return byte & kBitmask[i]; } +static constexpr bool GetBitFromByte(uint8_t byte, uint8_t i) { return byte & kBitmask[i]; } static inline void ClearBit(uint8_t* bits, int64_t i) { bits[i / 8] &= kFlippedBitmask[i % 8]; From b31f791da0782890136df169be09b07135f782db Mon Sep 17 00:00:00 2001 From: niranda perera Date: Wed, 16 Jun 2021 01:09:16 -0400 Subject: [PATCH 04/19] adding AAS, ASA, ASS --- .../arrow/compute/kernels/scalar_if_else.cc | 364 ++++++++++-------- .../compute/kernels/scalar_if_else_test.cc | 78 ++-- cpp/src/arrow/util/bit_block_counter.h | 8 + 3 files changed, 260 insertions(+), 190 deletions(-) diff --git a/cpp/src/arrow/compute/kernels/scalar_if_else.cc b/cpp/src/arrow/compute/kernels/scalar_if_else.cc index 9262499e8ac..e29d46430f5 100644 --- a/cpp/src/arrow/compute/kernels/scalar_if_else.cc +++ b/cpp/src/arrow/compute/kernels/scalar_if_else.cc @@ -550,169 +550,231 @@ struct IfElseFunctor> { // ASA static Status Call(KernelContext* ctx, const ArrayData& cond, const Scalar& left, const ArrayData& right, ArrayData* out) { - // const uint8_t* cond_data = cond.buffers[1]->data(); - // BitBlockCounter bit_counter(cond_data, cond.offset, cond.length); - // - // const auto casted_left = reinterpret_cast(left); - // const uint8_t* left_data = casted_left.value->data(); - // int64_t left_size = casted_left.value->size(); - // util::basic_string_view left_view(left_data, left_size); - // - // const auto* right_offsets = right.GetValues(1); - // const uint8_t* right_data = right.buffers[2]->data(); - // - // // reserve an additional space - // ARROW_ASSIGN_OR_RAISE(auto out_offset_buf, - // ctx->Allocate((cond.length + 1) * sizeof(OffsetType))); - // auto* out_offsets = - // reinterpret_cast(out_offset_buf->mutable_data()); out_offsets[0] = - // 0; - // - // // allocate data buffer conservatively - // auto data_buff_alloc = - // left_size * cond.length + (right_offsets[right.length] - right_offsets[0]); - // ARROW_ASSIGN_OR_RAISE(std::shared_ptr out_data_buf, - // ctx->Allocate(data_buff_alloc)); - // uint8_t* out_data = out_data_buf->mutable_data(); - // - // int64_t offset = cond.offset; - // OffsetType total_bytes_written = 0; - // while (offset < cond.offset + cond.length) { - // const BitBlockCount& block = bit_counter.NextWord(); - // - // OffsetType bytes_written = 0; - // if (block.AllSet()) { - // // from left - // bytes_written = left_size * block.length; - // for (auto i = 0; i < block.length; i++) { - // std::memcpy(out_data + i * left_size, left_data, left_size); - // out_data_buf[] - // } - // } else if (block.NoneSet()) { - // // from right - // bytes_written = right_offsets[offset + block.length] - - // right_offsets[offset]; std::memcpy(out_data, right_data + - // right_offsets[offset], bytes_written); - // // normalize the out_offsets by reducing input start offset, and adding the - // // offset upto the word - // // offset - cond.offset --> [0, cond.length + 1) - // std::transform( - // right_offsets + offset + 1, right_offsets + offset + block.length + 1, - // out_offsets + offset - cond.offset + 1, [&](const OffsetType& - // src_offset) { - // return src_offset - right_offsets[offset] + - // out_offsets[offset - cond.offset]; - // }); - // } else if (block.popcount) { // selectively copy from left - // for (auto i = 0; i < block.length; ++i) { - // OffsetType current_length; - // if (BitUtil::GetBit(cond_data, offset + i)) { - // current_length = left_offsets[offset + i + 1] - left_offsets[offset + - // i]; std::memcpy(out_data + bytes_written, left_data + - // left_offsets[offset + i], - // current_length); - // } else { - // current_length = right_offsets[offset + i + 1] - right_offsets[offset + - // i]; std::memcpy(out_data + bytes_written, right_data + - // right_offsets[offset + i], - // current_length); - // } - // out_offsets[offset + i - cond.offset + 1] = - // out_offsets[offset + i - cond.offset] + current_length; - // bytes_written += current_length; - // } - // } - // - // offset += block.length; - // out_data += bytes_written; - // total_bytes_written += bytes_written; - // } - // - // // resize the data buffer - // ARROW_RETURN_NOT_OK(out_data_buf->Resize(total_bytes_written)); - // - // out->buffers[1] = std::move(out_offset_buf); - // out->buffers[2] = std::move(out_data_buf); - // return Status::OK(); + const uint8_t* cond_data = cond.buffers[1]->data(); + BitBlockCounter bit_counter(cond_data, cond.offset, cond.length); + + util::string_view left_data = internal::UnboxScalar::Unbox(left); + int64_t left_size = left_data.size(); + + const auto* right_offsets = right.GetValues(1); + const uint8_t* right_data = right.buffers[2]->data(); + + // reserve an additional space + ARROW_ASSIGN_OR_RAISE(auto out_offset_buf, + ctx->Allocate((cond.length + 1) * sizeof(OffsetType))); + auto* out_offsets = reinterpret_cast(out_offset_buf->mutable_data()); + out_offsets[0] = 0; + + // allocate data buffer conservatively + auto data_buff_alloc = + left_size * cond.length + (right_offsets[right.length] - right_offsets[0]); + ARROW_ASSIGN_OR_RAISE(std::shared_ptr out_data_buf, + ctx->Allocate(data_buff_alloc)); + uint8_t* out_data = out_data_buf->mutable_data(); + + int64_t offset = cond.offset; + OffsetType total_bytes_written = 0; + while (offset < cond.offset + cond.length) { + const BitBlockCount& block = bit_counter.NextWord(); + + OffsetType bytes_written = 0; + if (block.AllSet()) { + // from left + bytes_written = block.length * left_size; + for (int i = 0; i < block.length; i++) { + std::memcpy(out_data + i * left_size, left_data.cbegin(), left_size); + out_offsets[i + 1] = out_offsets[0] + (i + 1) * left_size; + } + } else if (block.NoneSet()) { + // from right + bytes_written = right_offsets[block.length] - right_offsets[0]; + std::memcpy(out_data, right_data + right_offsets[0], bytes_written); + // normalize the out_offsets by reducing input start offset, and adding the + // offset upto the word + std::transform(right_offsets + 1, right_offsets + block.length + 1, + out_offsets + 1, [&](const OffsetType& src_offset) { + return src_offset - right_offsets[0] + out_offsets[0]; + }); + } else if (block.popcount) { // selectively copy from left + for (auto i = 0; i < block.length; ++i) { + OffsetType current_length; + if (BitUtil::GetBit(cond_data, offset + i)) { + current_length = left_size; + std::memcpy(out_data + bytes_written, left_data.cbegin(), current_length); + } else { + current_length = right_offsets[i + 1] - right_offsets[i]; + std::memcpy(out_data + bytes_written, right_data + right_offsets[i], + current_length); + } + out_offsets[i + 1] = out_offsets[i] + current_length; + bytes_written += current_length; + } + } + + offset += block.length; + right_offsets += block.length; + out_offsets += block.length; + out_data += bytes_written; + total_bytes_written += bytes_written; + } + + // resize the data buffer + ARROW_RETURN_NOT_OK(out_data_buf->Resize(total_bytes_written)); + + out->buffers[1] = std::move(out_offset_buf); + out->buffers[2] = std::move(out_data_buf); return Status::OK(); } // AAS static Status Call(KernelContext* ctx, const ArrayData& cond, const ArrayData& left, const Scalar& right, ArrayData* out) { - // ARROW_ASSIGN_OR_RAISE(std::shared_ptr out_buf, - // ctx->Allocate(cond.length * sizeof(T))); - // T* out_values = reinterpret_cast(out_buf->mutable_data()); - // - // // copy left data to out_buff - // const T* left_data = left.GetValues(1); - // std::memcpy(out_values, left_data, left.length * sizeof(T)); - // - // const auto* cond_data = cond.buffers[1]->data(); // this is a BoolArray - // BitBlockCounter bit_counter(cond_data, cond.offset, cond.length); - // - // // selectively copy values from left data - // T right_data = internal::UnboxScalar::Unbox(right); - // int64_t offset = cond.offset; - // - // // todo this can be improved by intrinsics. ex: _mm*_mask_store_e* (vmovdqa*) - // // left data is already in the output buffer. Therefore, mask needs to be - // inverted while (offset < cond.offset + cond.length) { - // const BitBlockCount& block = bit_counter.NextWord(); - // if (block.NoneSet()) { // all from right - // std::fill(out_values, out_values + block.length, right_data); - // } else if (block.popcount) { // selectively copy from right - // for (int64_t i = 0; i < block.length; ++i) { - // if (!BitUtil::GetBit(cond_data, offset + i)) { - // out_values[i] = right_data; - // } - // } - // } - // - // offset += block.length; - // out_values += block.length; - // } - // - // out->buffers[1] = std::move(out_buf); + const uint8_t* cond_data = cond.buffers[1]->data(); + BitBlockCounter bit_counter(cond_data, cond.offset, cond.length); + + const auto* left_offsets = left.GetValues(1); + const uint8_t* left_data = left.buffers[2]->data(); + + util::string_view right_data = internal::UnboxScalar::Unbox(right); + int64_t right_size = right_data.size(); + + // reserve an additional space + ARROW_ASSIGN_OR_RAISE(auto out_offset_buf, + ctx->Allocate((cond.length + 1) * sizeof(OffsetType))); + auto* out_offsets = reinterpret_cast(out_offset_buf->mutable_data()); + out_offsets[0] = 0; + + // allocate data buffer conservatively + auto data_buff_alloc = + right_size * cond.length + (left_offsets[left.length] - left_offsets[0]); + ARROW_ASSIGN_OR_RAISE(std::shared_ptr out_data_buf, + ctx->Allocate(data_buff_alloc)); + uint8_t* out_data = out_data_buf->mutable_data(); + + int64_t offset = cond.offset; + OffsetType total_bytes_written = 0; + while (offset < cond.offset + cond.length) { + const BitBlockCount& block = bit_counter.NextWord(); + + OffsetType bytes_written = 0; + if (block.AllSet()) { + // from left + bytes_written = left_offsets[block.length] - left_offsets[0]; + std::memcpy(out_data, left_data + left_offsets[0], bytes_written); + // normalize the out_offsets by reducing input start offset, and adding the + // offset upto the word + std::transform(left_offsets + 1, left_offsets + block.length + 1, out_offsets + 1, + [&](const OffsetType& src_offset) { + return src_offset - left_offsets[0] + out_offsets[0]; + }); + } else if (block.NoneSet()) { + // from right + bytes_written = block.length * right_size; + for (int i = 0; i < block.length; i++) { + std::memcpy(out_data + i * right_size, right_data.cbegin(), right_size); + out_offsets[i + 1] = out_offsets[0] + (i + 1) * right_size; + } + } else if (block.popcount) { // selectively copy from left + for (auto i = 0; i < block.length; ++i) { + OffsetType current_length; + if (BitUtil::GetBit(cond_data, offset + i)) { + current_length = left_offsets[i + 1] - left_offsets[i]; + std::memcpy(out_data + bytes_written, left_data + left_offsets[i], + current_length); + } else { + current_length = right_size; + std::memcpy(out_data + bytes_written, right_data.cbegin(), current_length); + } + out_offsets[i + 1] = out_offsets[i] + current_length; + bytes_written += current_length; + } + } + + offset += block.length; + left_offsets += block.length; + out_offsets += block.length; + out_data += bytes_written; + total_bytes_written += bytes_written; + } + + // resize the data buffer + ARROW_RETURN_NOT_OK(out_data_buf->Resize(total_bytes_written)); + + out->buffers[1] = std::move(out_offset_buf); + out->buffers[2] = std::move(out_data_buf); return Status::OK(); } // ASS static Status Call(KernelContext* ctx, const ArrayData& cond, const Scalar& left, const Scalar& right, ArrayData* out) { - // ARROW_ASSIGN_OR_RAISE(std::shared_ptr out_buf, - // ctx->Allocate(cond.length * sizeof(T))); - // T* out_values = reinterpret_cast(out_buf->mutable_data()); - // - // // copy right data to out_buff - // T right_data = internal::UnboxScalar::Unbox(right); - // std::fill(out_values, out_values + cond.length, right_data); - // - // const auto* cond_data = cond.buffers[1]->data(); // this is a BoolArray - // BitBlockCounter bit_counter(cond_data, cond.offset, cond.length); - // - // // selectively copy values from left data - // T left_data = internal::UnboxScalar::Unbox(left); - // int64_t offset = cond.offset; - // - // // todo this can be improved by intrinsics. ex: _mm*_mask_store_e* (vmovdqa*) - // while (offset < cond.offset + cond.length) { - // const BitBlockCount& block = bit_counter.NextWord(); - // if (block.AllSet()) { // all from left - // std::fill(out_values, out_values + block.length, left_data); - // } else if (block.popcount) { // selectively copy from left - // for (int64_t i = 0; i < block.length; ++i) { - // if (BitUtil::GetBit(cond_data, offset + i)) { - // out_values[i] = left_data; - // } - // } - // } - // - // offset += block.length; - // out_values += block.length; - // } - // - // out->buffers[1] = std::move(out_buf); + const uint8_t* cond_data = cond.buffers[1]->data(); + BitBlockCounter bit_counter(cond_data, cond.offset, cond.length); + + util::string_view left_data = internal::UnboxScalar::Unbox(left); + int64_t left_size = left_data.size(); + + util::string_view right_data = internal::UnboxScalar::Unbox(right); + int64_t right_size = right_data.size(); + + // reserve an additional space + ARROW_ASSIGN_OR_RAISE(auto out_offset_buf, + ctx->Allocate((cond.length + 1) * sizeof(OffsetType))); + auto* out_offsets = reinterpret_cast(out_offset_buf->mutable_data()); + out_offsets[0] = 0; + + // allocate data buffer conservatively + auto data_buff_alloc = right_size * cond.length + left_size * cond.length; + ARROW_ASSIGN_OR_RAISE(std::shared_ptr out_data_buf, + ctx->Allocate(data_buff_alloc)); + uint8_t* out_data = out_data_buf->mutable_data(); + + int64_t offset = cond.offset; + OffsetType total_bytes_written = 0; + while (offset < cond.offset + cond.length) { + const BitBlockCount& block = bit_counter.NextWord(); + + OffsetType bytes_written = 0; + if (block.AllSet()) { + // from left + bytes_written = block.length * left_size; + for (int i = 0; i < block.length; i++) { + std::memcpy(out_data + i * left_size, left_data.cbegin(), left_size); + out_offsets[i + 1] = out_offsets[0] + (i + 1) * left_size; + } + } else if (block.NoneSet()) { + // from right + bytes_written = block.length * right_size; + for (int i = 0; i < block.length; i++) { + std::memcpy(out_data + i * right_size, right_data.cbegin(), right_size); + out_offsets[i + 1] = out_offsets[0] + (i + 1) * right_size; + } + } else if (block.popcount) { // selectively copy from left + for (auto i = 0; i < block.length; ++i) { + OffsetType current_length; + if (BitUtil::GetBit(cond_data, offset + i)) { + current_length = left_size; + std::memcpy(out_data + bytes_written, left_data.cbegin(), current_length); + } else { + current_length = right_size; + std::memcpy(out_data + bytes_written, right_data.cbegin(), current_length); + } + out_offsets[i + 1] = out_offsets[i] + current_length; + bytes_written += current_length; + } + } + + offset += block.length; + out_offsets += block.length; + out_data += bytes_written; + total_bytes_written += bytes_written; + } + + // resize the data buffer + ARROW_RETURN_NOT_OK(out_data_buf->Resize(total_bytes_written)); + + out->buffers[1] = std::move(out_offset_buf); + out->buffers[2] = std::move(out_data_buf); return Status::OK(); } }; diff --git a/cpp/src/arrow/compute/kernels/scalar_if_else_test.cc b/cpp/src/arrow/compute/kernels/scalar_if_else_test.cc index 6bf795696d7..f57717aa7f2 100644 --- a/cpp/src/arrow/compute/kernels/scalar_if_else_test.cc +++ b/cpp/src/arrow/compute/kernels/scalar_if_else_test.cc @@ -324,45 +324,45 @@ TYPED_TEST_SUITE(TestIfElseBaseBinary, BaseBinaryTypes); TYPED_TEST(TestIfElseBaseBinary, IfElseBaseBinary) { auto type = TypeTraits::type_singleton(); - CheckIfElseOutput(ArrayFromJSON(boolean(), "[true, true, true, false]"), - ArrayFromJSON(type, R"(["a", "ab", "abc", "abcd"])"), - ArrayFromJSON(type, R"(["lmno", "lmn", "lm", "l"])"), - ArrayFromJSON(type, R"(["a", "ab", "abc", "l"])")); - - CheckIfElseOutput(ArrayFromJSON(boolean(), R"([true, true, true, false])"), - ArrayFromJSON(type, R"(["a", "ab", "abc", "abcd"])"), - ArrayFromJSON(type, R"(["lmno", "lmn", "lm", null])"), - ArrayFromJSON(type, R"(["a", "ab", "abc", null])")); - - CheckIfElseOutput(ArrayFromJSON(boolean(), R"([true, true, true, false])"), - ArrayFromJSON(type, R"(["a", "ab", null, "abcd"])"), - ArrayFromJSON(type, R"(["lmno", "lmn", "lm", null])"), - ArrayFromJSON(type, R"(["a", "ab", null, null])")); - - CheckIfElseOutput(ArrayFromJSON(boolean(), R"([true, true, true, false])"), - ArrayFromJSON(type, R"(["a", "ab", null, "abcd"])"), - ArrayFromJSON(type, R"(["lmno", "lmn", "lm", "l"])"), - ArrayFromJSON(type, R"(["a", "ab", null, "l"])")); - - CheckIfElseOutput(ArrayFromJSON(boolean(), R"([null, true, true, false])"), - ArrayFromJSON(type, R"(["a", "ab", null, "abcd"])"), - ArrayFromJSON(type, R"(["lmno", "lmn", "lm", "l"])"), - ArrayFromJSON(type, R"([null, "ab", null, "l"])")); - - CheckIfElseOutput(ArrayFromJSON(boolean(), R"([null, true, true, false])"), - ArrayFromJSON(type, R"(["a", "ab", null, "abcd"])"), - ArrayFromJSON(type, R"(["lmno", "lmn", "lm", null])"), - ArrayFromJSON(type, R"([null, "ab", null, null])")); - - CheckIfElseOutput(ArrayFromJSON(boolean(), R"([null, true, true, false])"), - ArrayFromJSON(type, R"(["a", "ab", "abc", "abcd"])"), - ArrayFromJSON(type, R"(["lmno", "lmn", "lm", null])"), - ArrayFromJSON(type, R"([null, "ab", "abc", null])")); - - CheckIfElseOutput(ArrayFromJSON(boolean(), R"([null, true, true, false])"), - ArrayFromJSON(type, R"(["a", "ab", "abc", "abcd"])"), - ArrayFromJSON(type, R"(["lmno", "lmn", "lm", "l"])"), - ArrayFromJSON(type, R"([null, "ab", "abc", "l"])")); + CheckWithDifferentShapes(ArrayFromJSON(boolean(), "[true, true, true, false]"), + ArrayFromJSON(type, R"(["a", "ab", "abc", "abcd"])"), + ArrayFromJSON(type, R"(["lmno", "lmn", "lm", "l"])"), + ArrayFromJSON(type, R"(["a", "ab", "abc", "l"])")); + + CheckWithDifferentShapes(ArrayFromJSON(boolean(), R"([true, true, true, false])"), + ArrayFromJSON(type, R"(["a", "ab", "abc", "abcd"])"), + ArrayFromJSON(type, R"(["lmno", "lmn", "lm", null])"), + ArrayFromJSON(type, R"(["a", "ab", "abc", null])")); + + CheckWithDifferentShapes(ArrayFromJSON(boolean(), R"([true, true, true, false])"), + ArrayFromJSON(type, R"(["a", "ab", null, "abcd"])"), + ArrayFromJSON(type, R"(["lmno", "lmn", "lm", null])"), + ArrayFromJSON(type, R"(["a", "ab", null, null])")); + + CheckWithDifferentShapes(ArrayFromJSON(boolean(), R"([true, true, true, false])"), + ArrayFromJSON(type, R"(["a", "ab", null, "abcd"])"), + ArrayFromJSON(type, R"(["lmno", "lmn", "lm", "l"])"), + ArrayFromJSON(type, R"(["a", "ab", null, "l"])")); + + CheckWithDifferentShapes(ArrayFromJSON(boolean(), R"([null, true, true, false])"), + ArrayFromJSON(type, R"(["a", "ab", null, "abcd"])"), + ArrayFromJSON(type, R"(["lmno", "lmn", "lm", "l"])"), + ArrayFromJSON(type, R"([null, "ab", null, "l"])")); + + CheckWithDifferentShapes(ArrayFromJSON(boolean(), R"([null, true, true, false])"), + ArrayFromJSON(type, R"(["a", "ab", null, "abcd"])"), + ArrayFromJSON(type, R"(["lmno", "lmn", "lm", null])"), + ArrayFromJSON(type, R"([null, "ab", null, null])")); + + CheckWithDifferentShapes(ArrayFromJSON(boolean(), R"([null, true, true, false])"), + ArrayFromJSON(type, R"(["a", "ab", "abc", "abcd"])"), + ArrayFromJSON(type, R"(["lmno", "lmn", "lm", null])"), + ArrayFromJSON(type, R"([null, "ab", "abc", null])")); + + CheckWithDifferentShapes(ArrayFromJSON(boolean(), R"([null, true, true, false])"), + ArrayFromJSON(type, R"(["a", "ab", "abc", "abcd"])"), + ArrayFromJSON(type, R"(["lmno", "lmn", "lm", "l"])"), + ArrayFromJSON(type, R"([null, "ab", "abc", "l"])")); } TYPED_TEST(TestIfElseBaseBinary, IfElseBaseBinaryRand) { diff --git a/cpp/src/arrow/util/bit_block_counter.h b/cpp/src/arrow/util/bit_block_counter.h index 803b825e1b2..d228dae981e 100644 --- a/cpp/src/arrow/util/bit_block_counter.h +++ b/cpp/src/arrow/util/bit_block_counter.h @@ -89,6 +89,14 @@ struct BitBlockCount { bool AllSet() const { return this->length == this->popcount; } }; +template +struct BitBlockCount1 { + int16_t popcount; + + bool NoneSet() const { return this->popcount == 0; } + bool AllSet() const { return length == this->popcount; } +}; + /// \brief A class that scans through a true/false bitmap to compute popcounts /// 64 or 256 bits at a time. This is used to accelerate processing of /// mostly-not-null array data. From 01a5af1f50b3700a7a8373f4b26d544c4cbd873a Mon Sep 17 00:00:00 2001 From: niranda perera Date: Wed, 16 Jun 2021 01:13:22 -0400 Subject: [PATCH 05/19] adding todos --- cpp/src/arrow/compute/kernels/scalar_if_else.cc | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/cpp/src/arrow/compute/kernels/scalar_if_else.cc b/cpp/src/arrow/compute/kernels/scalar_if_else.cc index e29d46430f5..2b908cf1b03 100644 --- a/cpp/src/arrow/compute/kernels/scalar_if_else.cc +++ b/cpp/src/arrow/compute/kernels/scalar_if_else.cc @@ -581,7 +581,7 @@ struct IfElseFunctor> { if (block.AllSet()) { // from left bytes_written = block.length * left_size; - for (int i = 0; i < block.length; i++) { + for (int i = 0; i < block.length; i++) { // todo use std::fill may be? std::memcpy(out_data + i * left_size, left_data.cbegin(), left_size); out_offsets[i + 1] = out_offsets[0] + (i + 1) * left_size; } @@ -639,6 +639,8 @@ struct IfElseFunctor> { int64_t right_size = right_data.size(); // reserve an additional space + // todo use cond.data to calculate the out_data_buf size precisely, by summing true + // bits ARROW_ASSIGN_OR_RAISE(auto out_offset_buf, ctx->Allocate((cond.length + 1) * sizeof(OffsetType))); auto* out_offsets = reinterpret_cast(out_offset_buf->mutable_data()); From c5bc550b70648aeb42f9eb81f7c05a983ed0baab Mon Sep 17 00:00:00 2001 From: niranda perera Date: Wed, 16 Jun 2021 12:20:18 -0400 Subject: [PATCH 06/19] working on comments --- .../arrow/compute/kernels/scalar_if_else.cc | 24 +++++++++---------- .../compute/kernels/scalar_if_else_test.cc | 6 ++--- cpp/src/arrow/util/bit_block_counter.h | 8 ------- 3 files changed, 14 insertions(+), 24 deletions(-) diff --git a/cpp/src/arrow/compute/kernels/scalar_if_else.cc b/cpp/src/arrow/compute/kernels/scalar_if_else.cc index 2b908cf1b03..9354dbd0378 100644 --- a/cpp/src/arrow/compute/kernels/scalar_if_else.cc +++ b/cpp/src/arrow/compute/kernels/scalar_if_else.cc @@ -514,7 +514,7 @@ struct IfElseFunctor> { out_offsets + 1, [&](const OffsetType& src_offset) { return src_offset - right_offsets[0] + out_offsets[0]; }); - } else if (block.popcount) { // selectively copy from left + } else { // selectively copy from left for (auto i = 0; i < block.length; ++i) { OffsetType current_length; if (BitUtil::GetBit(cond_data, offset + i)) { @@ -554,7 +554,7 @@ struct IfElseFunctor> { BitBlockCounter bit_counter(cond_data, cond.offset, cond.length); util::string_view left_data = internal::UnboxScalar::Unbox(left); - int64_t left_size = left_data.size(); + size_t left_size = left_data.size(); const auto* right_offsets = right.GetValues(1); const uint8_t* right_data = right.buffers[2]->data(); @@ -583,7 +583,7 @@ struct IfElseFunctor> { bytes_written = block.length * left_size; for (int i = 0; i < block.length; i++) { // todo use std::fill may be? std::memcpy(out_data + i * left_size, left_data.cbegin(), left_size); - out_offsets[i + 1] = out_offsets[0] + (i + 1) * left_size; + out_offsets[i + 1] = out_offsets[i] + left_size; } } else if (block.NoneSet()) { // from right @@ -595,7 +595,7 @@ struct IfElseFunctor> { out_offsets + 1, [&](const OffsetType& src_offset) { return src_offset - right_offsets[0] + out_offsets[0]; }); - } else if (block.popcount) { // selectively copy from left + } else { // selectively copy from left for (auto i = 0; i < block.length; ++i) { OffsetType current_length; if (BitUtil::GetBit(cond_data, offset + i)) { @@ -636,7 +636,7 @@ struct IfElseFunctor> { const uint8_t* left_data = left.buffers[2]->data(); util::string_view right_data = internal::UnboxScalar::Unbox(right); - int64_t right_size = right_data.size(); + size_t right_size = right_data.size(); // reserve an additional space // todo use cond.data to calculate the out_data_buf size precisely, by summing true @@ -674,9 +674,9 @@ struct IfElseFunctor> { bytes_written = block.length * right_size; for (int i = 0; i < block.length; i++) { std::memcpy(out_data + i * right_size, right_data.cbegin(), right_size); - out_offsets[i + 1] = out_offsets[0] + (i + 1) * right_size; + out_offsets[i + 1] = out_offsets[i] + right_size; } - } else if (block.popcount) { // selectively copy from left + } else { // selectively copy from left for (auto i = 0; i < block.length; ++i) { OffsetType current_length; if (BitUtil::GetBit(cond_data, offset + i)) { @@ -714,10 +714,10 @@ struct IfElseFunctor> { BitBlockCounter bit_counter(cond_data, cond.offset, cond.length); util::string_view left_data = internal::UnboxScalar::Unbox(left); - int64_t left_size = left_data.size(); + size_t left_size = left_data.size(); util::string_view right_data = internal::UnboxScalar::Unbox(right); - int64_t right_size = right_data.size(); + size_t right_size = right_data.size(); // reserve an additional space ARROW_ASSIGN_OR_RAISE(auto out_offset_buf, @@ -742,16 +742,16 @@ struct IfElseFunctor> { bytes_written = block.length * left_size; for (int i = 0; i < block.length; i++) { std::memcpy(out_data + i * left_size, left_data.cbegin(), left_size); - out_offsets[i + 1] = out_offsets[0] + (i + 1) * left_size; + out_offsets[i + 1] = out_offsets[i] + left_size; } } else if (block.NoneSet()) { // from right bytes_written = block.length * right_size; for (int i = 0; i < block.length; i++) { std::memcpy(out_data + i * right_size, right_data.cbegin(), right_size); - out_offsets[i + 1] = out_offsets[0] + (i + 1) * right_size; + out_offsets[i + 1] = out_offsets[i] + right_size; } - } else if (block.popcount) { // selectively copy from left + } else { // selectively copy from left for (auto i = 0; i < block.length; ++i) { OffsetType current_length; if (BitUtil::GetBit(cond_data, offset + i)) { diff --git a/cpp/src/arrow/compute/kernels/scalar_if_else_test.cc b/cpp/src/arrow/compute/kernels/scalar_if_else_test.cc index f57717aa7f2..002846a4db2 100644 --- a/cpp/src/arrow/compute/kernels/scalar_if_else_test.cc +++ b/cpp/src/arrow/compute/kernels/scalar_if_else_test.cc @@ -316,10 +316,7 @@ TEST_F(TestIfElseKernel, IfElseDispatchBest) { template class TestIfElseBaseBinary : public ::testing::Test {}; -using BaseBinaryTypes = - ::testing::Types; - -TYPED_TEST_SUITE(TestIfElseBaseBinary, BaseBinaryTypes); +TYPED_TEST_SUITE(TestIfElseBaseBinary, BinaryTypes); TYPED_TEST(TestIfElseBaseBinary, IfElseBaseBinary) { auto type = TypeTraits::type_singleton(); @@ -373,6 +370,7 @@ TYPED_TEST(TestIfElseBaseBinary, IfElseBaseBinaryRand) { random::RandomArrayGenerator rand(/*seed=*/0); int64_t len = 130; + // this is to check the BitBlockCount::AllSet/ NoneSet code paths ASSERT_OK_AND_ASSIGN(auto temp1, MakeArrayFromScalar(BooleanScalar(true), 64)); ASSERT_OK_AND_ASSIGN(auto temp2, MakeArrayFromScalar(BooleanScalar(false), 64)); auto temp3 = rand.ArrayOf(boolean(), len - 64 * 2, /*null_probability=*/0.01); diff --git a/cpp/src/arrow/util/bit_block_counter.h b/cpp/src/arrow/util/bit_block_counter.h index d228dae981e..803b825e1b2 100644 --- a/cpp/src/arrow/util/bit_block_counter.h +++ b/cpp/src/arrow/util/bit_block_counter.h @@ -89,14 +89,6 @@ struct BitBlockCount { bool AllSet() const { return this->length == this->popcount; } }; -template -struct BitBlockCount1 { - int16_t popcount; - - bool NoneSet() const { return this->popcount == 0; } - bool AllSet() const { return length == this->popcount; } -}; - /// \brief A class that scans through a true/false bitmap to compute popcounts /// 64 or 256 bits at a time. This is used to accelerate processing of /// mostly-not-null array data. From c27fbe200be25f454194c7b49d8d00c5d429e108 Mon Sep 17 00:00:00 2001 From: niranda perera Date: Wed, 30 Jun 2021 12:38:03 -0400 Subject: [PATCH 07/19] fixing null issue --- cpp/src/arrow/compute/kernels/scalar_if_else.cc | 17 +++++++++++------ .../compute/kernels/scalar_if_else_test.cc | 2 +- 2 files changed, 12 insertions(+), 7 deletions(-) diff --git a/cpp/src/arrow/compute/kernels/scalar_if_else.cc b/cpp/src/arrow/compute/kernels/scalar_if_else.cc index 9354dbd0378..c7f8596dba2 100644 --- a/cpp/src/arrow/compute/kernels/scalar_if_else.cc +++ b/cpp/src/arrow/compute/kernels/scalar_if_else.cc @@ -460,8 +460,13 @@ struct IfElseFunctor> { using OffsetType = typename TypeTraits::OffsetType::c_type; using ArrayType = typename TypeTraits::ArrayType; - // A - Array - // S - Scalar + // A - Array, S - Scalar, X = Array/Scalar + + // SXX + static Status Call(KernelContext* ctx, const BooleanScalar& cond, const Datum& left, + const Datum& right, Datum* out) { + return Status::OK(); + } // AAA static Status Call(KernelContext* ctx, const ArrayData& cond, const ArrayData& left, @@ -936,12 +941,12 @@ struct ResolveIfElseExec { template <> struct ResolveIfElseExec { static Status Exec(KernelContext* ctx, const ExecBatch& batch, Datum* out) { - if (batch[0].is_scalar()) { + if (batch[0].is_scalar() && batch[1].is_scalar() && batch[2].is_scalar()) { *out = MakeNullScalar(null()); } else { - const std::shared_ptr& cond_array = batch[0].array(); - ARROW_ASSIGN_OR_RAISE( - *out, MakeArrayOfNull(null(), cond_array->length, ctx->memory_pool())); + int64_t len = + std::max(batch[0].length(), std::max(batch[1].length(), batch[2].length())); + ARROW_ASSIGN_OR_RAISE(*out, MakeArrayOfNull(null(), len, ctx->memory_pool())); } return Status::OK(); } diff --git a/cpp/src/arrow/compute/kernels/scalar_if_else_test.cc b/cpp/src/arrow/compute/kernels/scalar_if_else_test.cc index 002846a4db2..185fe1646a7 100644 --- a/cpp/src/arrow/compute/kernels/scalar_if_else_test.cc +++ b/cpp/src/arrow/compute/kernels/scalar_if_else_test.cc @@ -270,7 +270,7 @@ TEST_F(TestIfElseKernel, IfElseBooleanRand) { } TEST_F(TestIfElseKernel, IfElseNull) { - CheckIfElseOutput(ArrayFromJSON(boolean(), "[null, null, null, null]"), + CheckWithDifferentShapes(ArrayFromJSON(boolean(), "[null, null, null, null]"), ArrayFromJSON(null(), "[null, null, null, null]"), ArrayFromJSON(null(), "[null, null, null, null]"), ArrayFromJSON(null(), "[null, null, null, null]")); From fc92a9bca341e9e07c6de1014cbc185ab3ad348a Mon Sep 17 00:00:00 2001 From: niranda perera Date: Wed, 30 Jun 2021 17:20:37 -0400 Subject: [PATCH 08/19] templating null-promotion logic --- .../arrow/compute/kernels/codegen_internal.h | 16 ++-- .../arrow/compute/kernels/scalar_if_else.cc | 77 +++++++++++++------ .../compute/kernels/scalar_if_else_test.cc | 6 +- 3 files changed, 65 insertions(+), 34 deletions(-) diff --git a/cpp/src/arrow/compute/kernels/codegen_internal.h b/cpp/src/arrow/compute/kernels/codegen_internal.h index 12e80423f7f..d5cd9a4d30e 100644 --- a/cpp/src/arrow/compute/kernels/codegen_internal.h +++ b/cpp/src/arrow/compute/kernels/codegen_internal.h @@ -311,7 +311,7 @@ struct UnboxScalar> { }; template -struct UnboxScalar> { +struct UnboxScalar> { static util::string_view Unbox(const Scalar& val) { if (!val.is_valid) return util::string_view(); return util::string_view(*checked_cast(val).value); @@ -1222,26 +1222,26 @@ ArrayKernelExec GenerateSignedInteger(detail::GetTypeId get_id) { // bits). // // See "Numeric" above for description of the generator functor -template