-
Notifications
You must be signed in to change notification settings - Fork 4k
GH-46063: [C++][Compute] Fix the issue that MinMax kernel emits -inf/inf for all-NaN input #48459
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from all commits
7317ced
4894506
7559543
4730f95
461bd5c
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -1841,6 +1841,24 @@ class TestPrimitiveMinMaxKernel : public ::testing::Test { | |
| AssertMinMaxIsNull(array, options); | ||
| } | ||
|
|
||
| void AssertMinMaxIsNaN(const Datum& array, const ScalarAggregateOptions& options) { | ||
| ASSERT_OK_AND_ASSIGN(Datum out, MinMax(array, options)); | ||
| for (const auto& val : out.scalar_as<StructScalar>().value) { | ||
| ASSERT_TRUE(std::isnan(checked_cast<const ScalarType&>(*val).value)); | ||
| } | ||
| } | ||
|
|
||
| void AssertMinMaxIsNaN(const std::string& json, const ScalarAggregateOptions& options) { | ||
| auto array = ArrayFromJSON(type_singleton(), json); | ||
| AssertMinMaxIsNaN(array, options); | ||
| } | ||
|
|
||
| void AssertMinMaxIsNaN(const std::vector<std::string>& json, | ||
| const ScalarAggregateOptions& options) { | ||
| auto array = ChunkedArrayFromJSON(type_singleton(), json); | ||
| AssertMinMaxIsNaN(array, options); | ||
| } | ||
|
|
||
| std::shared_ptr<DataType> type_singleton() { | ||
| return default_type_instance<ArrowType>(); | ||
| } | ||
|
|
@@ -1963,6 +1981,9 @@ TYPED_TEST(TestFloatingMinMaxKernel, Floats) { | |
| this->AssertMinMaxIs("[5, Inf, 2, 3, 4]", 2.0, INFINITY, options); | ||
| this->AssertMinMaxIs("[5, NaN, 2, 3, 4]", 2, 5, options); | ||
| this->AssertMinMaxIs("[5, -Inf, 2, 3, 4]", -INFINITY, 5, options); | ||
| this->AssertMinMaxIs("[NaN, null, 42]", 42, 42, options); | ||
| this->AssertMinMaxIsNaN("[NaN, NaN]", options); | ||
| this->AssertMinMaxIsNaN("[NaN, null]", options); | ||
| this->AssertMinMaxIs(chunked_input1, 1, 9, options); | ||
| this->AssertMinMaxIs(chunked_input2, 1, 9, options); | ||
| this->AssertMinMaxIs(chunked_input3, 1, 9, options); | ||
|
|
@@ -1980,6 +2001,7 @@ TYPED_TEST(TestFloatingMinMaxKernel, Floats) { | |
| this->AssertMinMaxIs("[5, -Inf, 2, 3, 4]", -INFINITY, 5, options); | ||
| this->AssertMinMaxIsNull("[5, null, 2, 3, 4]", options); | ||
| this->AssertMinMaxIsNull("[5, -Inf, null, 3, 4]", options); | ||
| this->AssertMinMaxIsNull("[NaN, null]", options); | ||
|
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Are these tests also executed with Float16 or is there a separate test for it?
Contributor
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Same as my other comment. |
||
| this->AssertMinMaxIsNull(chunked_input1, options); | ||
| this->AssertMinMaxIsNull(chunked_input2, options); | ||
| this->AssertMinMaxIsNull(chunked_input3, options); | ||
|
|
||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -16,6 +16,7 @@ | |
| // under the License. | ||
|
|
||
| #include <cmath> | ||
| #include <concepts> | ||
| #include <functional> | ||
| #include <memory> | ||
| #include <string> | ||
|
|
@@ -270,52 +271,53 @@ struct GroupedCountImpl : public GroupedAggregator { | |
| // ---------------------------------------------------------------------- | ||
| // MinMax implementation | ||
|
|
||
| // XXX: Consider making these concepts complete and moving to public header. | ||
|
|
||
| template <typename T> | ||
| concept CBooleanConcept = std::same_as<T, bool>; | ||
|
|
||
| // XXX: Ideally we want to have std::floating_point<Float16> = true. | ||
| template <typename T> | ||
| concept CFloatingPointConcept = std::floating_point<T> || std::same_as<T, util::Float16>; | ||
|
|
||
| template <typename T> | ||
| concept CDecimalConcept = std::same_as<T, Decimal32> || std::same_as<T, Decimal64> || | ||
| std::same_as<T, Decimal128> || std::same_as<T, Decimal256>; | ||
|
|
||
| template <typename CType> | ||
| struct AntiExtrema { | ||
| static constexpr CType anti_min() { return std::numeric_limits<CType>::max(); } | ||
| static constexpr CType anti_max() { return std::numeric_limits<CType>::min(); } | ||
| }; | ||
|
|
||
| template <> | ||
| struct AntiExtrema<bool> { | ||
| static constexpr bool anti_min() { return true; } | ||
| static constexpr bool anti_max() { return false; } | ||
| }; | ||
|
|
||
| template <> | ||
| struct AntiExtrema<float> { | ||
| static constexpr float anti_min() { return std::numeric_limits<float>::infinity(); } | ||
| static constexpr float anti_max() { return -std::numeric_limits<float>::infinity(); } | ||
| template <CBooleanConcept CType> | ||
zanmato1984 marked this conversation as resolved.
Show resolved
Hide resolved
|
||
| struct AntiExtrema<CType> { | ||
| static constexpr CType anti_min() { return true; } | ||
| static constexpr CType anti_max() { return false; } | ||
| }; | ||
|
|
||
| template <> | ||
| struct AntiExtrema<double> { | ||
| static constexpr double anti_min() { return std::numeric_limits<double>::infinity(); } | ||
| static constexpr double anti_max() { return -std::numeric_limits<double>::infinity(); } | ||
| template <CFloatingPointConcept CType> | ||
| struct AntiExtrema<CType> { | ||
| static constexpr CType anti_min() { return std::numeric_limits<CType>::quiet_NaN(); } | ||
| static constexpr CType anti_max() { return std::numeric_limits<CType>::quiet_NaN(); } | ||
| }; | ||
|
|
||
| template <> | ||
| struct AntiExtrema<Decimal32> { | ||
| static constexpr Decimal32 anti_min() { return BasicDecimal32::GetMaxSentinel(); } | ||
| static constexpr Decimal32 anti_max() { return BasicDecimal32::GetMinSentinel(); } | ||
| template <CDecimalConcept CType> | ||
| struct AntiExtrema<CType> { | ||
| static constexpr CType anti_min() { return CType::GetMaxSentinel(); } | ||
| static constexpr CType anti_max() { return CType::GetMinSentinel(); } | ||
| }; | ||
|
|
||
| template <> | ||
| struct AntiExtrema<Decimal64> { | ||
| static constexpr Decimal64 anti_min() { return BasicDecimal64::GetMaxSentinel(); } | ||
| static constexpr Decimal64 anti_max() { return BasicDecimal64::GetMinSentinel(); } | ||
| }; | ||
|
|
||
| template <> | ||
| struct AntiExtrema<Decimal128> { | ||
| static constexpr Decimal128 anti_min() { return BasicDecimal128::GetMaxSentinel(); } | ||
| static constexpr Decimal128 anti_max() { return BasicDecimal128::GetMinSentinel(); } | ||
| template <typename CType> | ||
| struct MinMaxOp { | ||
| static constexpr CType min(CType a, CType b) { return std::min(a, b); } | ||
| static constexpr CType max(CType a, CType b) { return std::max(a, b); } | ||
| }; | ||
|
|
||
| template <> | ||
| struct AntiExtrema<Decimal256> { | ||
| static constexpr Decimal256 anti_min() { return BasicDecimal256::GetMaxSentinel(); } | ||
| static constexpr Decimal256 anti_max() { return BasicDecimal256::GetMinSentinel(); } | ||
| template <CFloatingPointConcept CType> | ||
| struct MinMaxOp<CType> { | ||
| static constexpr CType min(CType a, CType b) { return std::fmin(a, b); } | ||
| static constexpr CType max(CType a, CType b) { return std::fmax(a, b); } | ||
|
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Do
Contributor
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. No, but we don't either specialize for |
||
| }; | ||
|
|
||
| template <typename Type, typename Enable = void> | ||
|
|
@@ -352,8 +354,8 @@ struct GroupedMinMaxImpl final : public GroupedAggregator { | |
| VisitGroupedValues<Type>( | ||
| batch, | ||
| [&](uint32_t g, CType val) { | ||
| GetSet::Set(raw_mins, g, std::min(GetSet::Get(raw_mins, g), val)); | ||
| GetSet::Set(raw_maxes, g, std::max(GetSet::Get(raw_maxes, g), val)); | ||
| GetSet::Set(raw_mins, g, MinMaxOp<CType>::min(GetSet::Get(raw_mins, g), val)); | ||
| GetSet::Set(raw_maxes, g, MinMaxOp<CType>::max(GetSet::Get(raw_maxes, g), val)); | ||
| bit_util::SetBit(has_values_.mutable_data(), g); | ||
| }, | ||
| [&](uint32_t g) { bit_util::SetBit(has_nulls_.mutable_data(), g); }); | ||
|
|
@@ -373,12 +375,12 @@ struct GroupedMinMaxImpl final : public GroupedAggregator { | |
| auto g = group_id_mapping.GetValues<uint32_t>(1); | ||
| for (uint32_t other_g = 0; static_cast<int64_t>(other_g) < group_id_mapping.length; | ||
| ++other_g, ++g) { | ||
| GetSet::Set( | ||
| raw_mins, *g, | ||
| std::min(GetSet::Get(raw_mins, *g), GetSet::Get(other_raw_mins, other_g))); | ||
| GetSet::Set( | ||
| raw_maxes, *g, | ||
| std::max(GetSet::Get(raw_maxes, *g), GetSet::Get(other_raw_maxes, other_g))); | ||
| GetSet::Set(raw_mins, *g, | ||
| MinMaxOp<CType>::min(GetSet::Get(raw_mins, *g), | ||
| GetSet::Get(other_raw_mins, other_g))); | ||
| GetSet::Set(raw_maxes, *g, | ||
| MinMaxOp<CType>::max(GetSet::Get(raw_maxes, *g), | ||
| GetSet::Get(other_raw_maxes, other_g))); | ||
|
|
||
| if (bit_util::GetBit(other->has_values_.data(), other_g)) { | ||
| bit_util::SetBit(has_values_.mutable_data(), *g); | ||
|
|
||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I think we also want a float16 column? :)
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
The min/max kernel for half-float is not implemented so we are not able to test it.
The current half-float is not intact in terms of: 1) whether the type is in floating point category is inconsistent: e.g. type trait
is_floating_type<HalfFloatType>istruebutg_floating_typesdoesn't include it (that's why floating point kernels don't register for half-float). 2) Somestdfunctions don't work with our half-float representation: e.g.std::is_nan/fmin/fmax, it won't compile if we try to add certain half-float kernels.There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Hmm, I see. Thanks for the clarification.