From 7ac9235dd94e9ea2bfb013d94d936fb8270f2e14 Mon Sep 17 00:00:00 2001 From: Clif Houck Date: Tue, 13 Feb 2024 11:32:21 -0600 Subject: [PATCH 01/21] Add casts for float16 <-> float32 --- .../compute/kernels/scalar_cast_internal.cc | 36 +++++++++++++++++++ .../compute/kernels/scalar_cast_numeric.cc | 7 +++- 2 files changed, 42 insertions(+), 1 deletion(-) diff --git a/cpp/src/arrow/compute/kernels/scalar_cast_internal.cc b/cpp/src/arrow/compute/kernels/scalar_cast_internal.cc index 8cf5a04addb..dc219164b4d 100644 --- a/cpp/src/arrow/compute/kernels/scalar_cast_internal.cc +++ b/cpp/src/arrow/compute/kernels/scalar_cast_internal.cc @@ -20,11 +20,13 @@ #include "arrow/compute/kernels/common_internal.h" #include "arrow/extension_type.h" #include "arrow/util/checked_cast.h" +#include "arrow/util/float16.h" namespace arrow { using internal::checked_cast; using internal::PrimitiveScalarBase; +using arrow::util::Float16; namespace compute { namespace internal { @@ -47,6 +49,36 @@ struct CastPrimitive { } }; +template<> struct CastPrimitive> { + static void Exec(const ArraySpan& arr, ArraySpan* out) { + const float* in_values = arr.GetValues(1); + uint16_t* out_values = out->GetValues(1); + for (int64_t i = 0; i < arr.length; ++i) { + float val = *in_values; + uint16_t converted = Float16(val).bits(); + std::memcpy(out_values, static_cast(&converted), sizeof(uint16_t)); + out_values++; + in_values++; + } + } +}; + +template<> struct CastPrimitive> { + static void Exec(const ArraySpan& arr, ArraySpan* out) { + const uint16_t* in_values = arr.GetValues(1); + float* out_values = out->GetValues(1); + for (int64_t i = 0; i < arr.length; ++i) { + Float16 val = Float16::FromBits(*in_values); + float converted = val.ToFloat(); + std::memcpy(out_values, static_cast(&converted), sizeof(float)); + out_values++; + in_values++; + } + } +}; + + + template struct CastPrimitive::value>> { // memcpy output @@ -79,6 +111,8 @@ void CastNumberImpl(Type::type out_type, const ArraySpan& input, ArraySpan* out) return CastPrimitive::Exec(input, out); case Type::DOUBLE: return CastPrimitive::Exec(input, out); + case Type::HALF_FLOAT: + return CastPrimitive::Exec(input, out); default: break; } @@ -109,6 +143,8 @@ void CastNumberToNumberUnsafe(Type::type in_type, Type::type out_type, return CastNumberImpl(out_type, input, out); case Type::DOUBLE: return CastNumberImpl(out_type, input, out); + case Type::HALF_FLOAT: + return CastNumberImpl(out_type, input, out); default: DCHECK(false); break; diff --git a/cpp/src/arrow/compute/kernels/scalar_cast_numeric.cc b/cpp/src/arrow/compute/kernels/scalar_cast_numeric.cc index b054e57f04d..9200a2ec4c9 100644 --- a/cpp/src/arrow/compute/kernels/scalar_cast_numeric.cc +++ b/cpp/src/arrow/compute/kernels/scalar_cast_numeric.cc @@ -833,9 +833,14 @@ std::vector> GetNumericCasts() { auto cast_half_float = std::make_shared("cast_half_float", Type::HALF_FLOAT); AddCommonCasts(Type::HALF_FLOAT, float16(), cast_half_float.get()); + DCHECK_OK(cast_half_float.get()->AddKernel(Type::FLOAT, + {InputType(Type::FLOAT)}, float16(), CastFloatingToFloating)); functions.push_back(cast_half_float); - functions.push_back(GetCastToFloating("cast_float")); + auto cast_float = GetCastToFloating("cast_float"); + DCHECK_OK(cast_float.get()->AddKernel(Type::HALF_FLOAT, + {InputType(Type::HALF_FLOAT)}, float32(), CastFloatingToFloating)); + functions.push_back(cast_float); functions.push_back(GetCastToFloating("cast_double")); functions.push_back(GetCastToDecimal128()); From f9568f01ccd8420ddc2ee7d745f65e2f70421f33 Mon Sep 17 00:00:00 2001 From: Clif Houck Date: Fri, 16 Feb 2024 09:15:25 -0600 Subject: [PATCH 02/21] Better template definitions for cast to/from halffloat --- .../compute/kernels/scalar_cast_internal.cc | 39 +++++++++++-------- .../compute/kernels/scalar_cast_numeric.cc | 8 +++- 2 files changed, 29 insertions(+), 18 deletions(-) diff --git a/cpp/src/arrow/compute/kernels/scalar_cast_internal.cc b/cpp/src/arrow/compute/kernels/scalar_cast_internal.cc index dc219164b4d..f77b4b21fb5 100644 --- a/cpp/src/arrow/compute/kernels/scalar_cast_internal.cc +++ b/cpp/src/arrow/compute/kernels/scalar_cast_internal.cc @@ -19,6 +19,7 @@ #include "arrow/compute/cast_internal.h" #include "arrow/compute/kernels/common_internal.h" #include "arrow/extension_type.h" +#include "arrow/type_traits.h" #include "arrow/util/checked_cast.h" #include "arrow/util/float16.h" @@ -49,36 +50,40 @@ struct CastPrimitive { } }; -template<> struct CastPrimitive> { +// Converting floating types to half float. +template +struct CastPrimitive> { + static void Exec(const ArraySpan& arr, ArraySpan* out) { + using InT = typename InType::c_type; + const InT* in_values = arr.GetValues(1); + uint16_t* out_values = out->GetValues(1); + for (int64_t i = 0; i < arr.length; ++i) { + *out_values++ = Float16(*in_values++).bits(); + } + } +}; + +template<> +struct CastPrimitive> { static void Exec(const ArraySpan& arr, ArraySpan* out) { - const float* in_values = arr.GetValues(1); - uint16_t* out_values = out->GetValues(1); + const uint16_t* in_values = arr.GetValues(1); + float* out_values = out->GetValues(1); for (int64_t i = 0; i < arr.length; ++i) { - float val = *in_values; - uint16_t converted = Float16(val).bits(); - std::memcpy(out_values, static_cast(&converted), sizeof(uint16_t)); - out_values++; - in_values++; + *out_values++ = Float16::FromBits(*in_values++).ToFloat(); } } }; -template<> struct CastPrimitive> { +template<> struct CastPrimitive> { static void Exec(const ArraySpan& arr, ArraySpan* out) { const uint16_t* in_values = arr.GetValues(1); - float* out_values = out->GetValues(1); + double* out_values = out->GetValues(1); for (int64_t i = 0; i < arr.length; ++i) { - Float16 val = Float16::FromBits(*in_values); - float converted = val.ToFloat(); - std::memcpy(out_values, static_cast(&converted), sizeof(float)); - out_values++; - in_values++; + *out_values++ = Float16::FromBits(*in_values++).ToDouble(); } } }; - - template struct CastPrimitive::value>> { // memcpy output diff --git a/cpp/src/arrow/compute/kernels/scalar_cast_numeric.cc b/cpp/src/arrow/compute/kernels/scalar_cast_numeric.cc index 9200a2ec4c9..f36d64d539b 100644 --- a/cpp/src/arrow/compute/kernels/scalar_cast_numeric.cc +++ b/cpp/src/arrow/compute/kernels/scalar_cast_numeric.cc @@ -835,13 +835,19 @@ std::vector> GetNumericCasts() { AddCommonCasts(Type::HALF_FLOAT, float16(), cast_half_float.get()); DCHECK_OK(cast_half_float.get()->AddKernel(Type::FLOAT, {InputType(Type::FLOAT)}, float16(), CastFloatingToFloating)); + DCHECK_OK(cast_half_float.get()->AddKernel(Type::DOUBLE, + {InputType(Type::DOUBLE)}, float16(), CastFloatingToFloating)); functions.push_back(cast_half_float); auto cast_float = GetCastToFloating("cast_float"); DCHECK_OK(cast_float.get()->AddKernel(Type::HALF_FLOAT, {InputType(Type::HALF_FLOAT)}, float32(), CastFloatingToFloating)); functions.push_back(cast_float); - functions.push_back(GetCastToFloating("cast_double")); + + auto cast_double = GetCastToFloating("cast_double"); + DCHECK_OK(cast_double.get()->AddKernel(Type::HALF_FLOAT, + {InputType(Type::HALF_FLOAT)}, float64(), CastFloatingToFloating)); + functions.push_back(cast_double); functions.push_back(GetCastToDecimal128()); functions.push_back(GetCastToDecimal256()); From b626ee42e43eacb9a074eb8560e899db94a504f3 Mon Sep 17 00:00:00 2001 From: Clif Houck Date: Tue, 20 Feb 2024 14:25:40 -0600 Subject: [PATCH 03/21] Fold casting from half float to float32/64 into GetCastToFloating --- cpp/src/arrow/compute/kernels/scalar_cast_numeric.cc | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/cpp/src/arrow/compute/kernels/scalar_cast_numeric.cc b/cpp/src/arrow/compute/kernels/scalar_cast_numeric.cc index f36d64d539b..d07f638837a 100644 --- a/cpp/src/arrow/compute/kernels/scalar_cast_numeric.cc +++ b/cpp/src/arrow/compute/kernels/scalar_cast_numeric.cc @@ -715,6 +715,10 @@ std::shared_ptr GetCastToFloating(std::string name) { DCHECK_OK(func->AddKernel(in_ty->id(), {in_ty}, out_ty, CastFloatingToFloating)); } + // From half-float to float/double + DCHECK_OK(func->AddKernel(Type::HALF_FLOAT, + {InputType(Type::HALF_FLOAT)}, out_ty, CastFloatingToFloating)); + // From other numbers to floating point AddCommonNumberCasts(out_ty, func.get()); @@ -723,6 +727,7 @@ std::shared_ptr GetCastToFloating(std::string name) { CastFunctor::Exec)); DCHECK_OK(func->AddKernel(Type::DECIMAL256, {InputType(Type::DECIMAL256)}, out_ty, CastFunctor::Exec)); + return func; } @@ -840,13 +845,9 @@ std::vector> GetNumericCasts() { functions.push_back(cast_half_float); auto cast_float = GetCastToFloating("cast_float"); - DCHECK_OK(cast_float.get()->AddKernel(Type::HALF_FLOAT, - {InputType(Type::HALF_FLOAT)}, float32(), CastFloatingToFloating)); functions.push_back(cast_float); auto cast_double = GetCastToFloating("cast_double"); - DCHECK_OK(cast_double.get()->AddKernel(Type::HALF_FLOAT, - {InputType(Type::HALF_FLOAT)}, float64(), CastFloatingToFloating)); functions.push_back(cast_double); functions.push_back(GetCastToDecimal128()); From 8f1aad081229bff521a7f55b32d77c5be0e57210 Mon Sep 17 00:00:00 2001 From: Clif Houck Date: Tue, 20 Feb 2024 14:26:43 -0600 Subject: [PATCH 04/21] Comment on CastPrimitive specializations from half float to float32/64 --- cpp/src/arrow/compute/kernels/scalar_cast_internal.cc | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/cpp/src/arrow/compute/kernels/scalar_cast_internal.cc b/cpp/src/arrow/compute/kernels/scalar_cast_internal.cc index f77b4b21fb5..9ceb8dd9f48 100644 --- a/cpp/src/arrow/compute/kernels/scalar_cast_internal.cc +++ b/cpp/src/arrow/compute/kernels/scalar_cast_internal.cc @@ -63,8 +63,8 @@ struct CastPrimitive -struct CastPrimitive> { +// Converting from half float to other floating types. +template<> struct CastPrimitive> { static void Exec(const ArraySpan& arr, ArraySpan* out) { const uint16_t* in_values = arr.GetValues(1); float* out_values = out->GetValues(1); From b1c7edf9295818642a2d3b7dbcec551ed6772557 Mon Sep 17 00:00:00 2001 From: Clif Houck Date: Mon, 26 Feb 2024 13:47:37 -0600 Subject: [PATCH 05/21] string to float16 casts --- .../compute/kernels/scalar_cast_numeric.cc | 14 ++++++++++++++ .../arrow/compute/kernels/scalar_cast_string.cc | 5 +++++ cpp/src/arrow/type_traits.h | 1 + cpp/src/arrow/util/formatting.cc | 11 +++++++++++ cpp/src/arrow/util/formatting.h | 7 +++++++ cpp/src/arrow/util/value_parsing.cc | 14 ++++++++++++++ cpp/src/arrow/util/value_parsing.h | 17 +++++++++++++++++ 7 files changed, 69 insertions(+) diff --git a/cpp/src/arrow/compute/kernels/scalar_cast_numeric.cc b/cpp/src/arrow/compute/kernels/scalar_cast_numeric.cc index d07f638837a..351eff2f1dd 100644 --- a/cpp/src/arrow/compute/kernels/scalar_cast_numeric.cc +++ b/cpp/src/arrow/compute/kernels/scalar_cast_numeric.cc @@ -293,6 +293,13 @@ struct CastFunctor< } }; +template <> +struct CastFunctor> { + static Status Exec(KernelContext* ctx, const ExecSpan& batch, ExecResult* out) { + return applicator::ScalarUnaryNotNull>::Exec(ctx, batch, out); + } +}; + // ---------------------------------------------------------------------- // Decimal to integer @@ -838,6 +845,13 @@ std::vector> GetNumericCasts() { auto cast_half_float = std::make_shared("cast_half_float", Type::HALF_FLOAT); AddCommonCasts(Type::HALF_FLOAT, float16(), cast_half_float.get()); + + // Cast from other strings to half float. + for (const std::shared_ptr& in_ty : BaseBinaryTypes()) { + auto exec = GenerateVarBinaryBase(*in_ty); + DCHECK_OK(cast_half_float->AddKernel(in_ty->id(), {in_ty}, TypeTraits::type_singleton(), exec)); + } + DCHECK_OK(cast_half_float.get()->AddKernel(Type::FLOAT, {InputType(Type::FLOAT)}, float16(), CastFloatingToFloating)); DCHECK_OK(cast_half_float.get()->AddKernel(Type::DOUBLE, diff --git a/cpp/src/arrow/compute/kernels/scalar_cast_string.cc b/cpp/src/arrow/compute/kernels/scalar_cast_string.cc index a6576e4e4c2..1ef02278a84 100644 --- a/cpp/src/arrow/compute/kernels/scalar_cast_string.cc +++ b/cpp/src/arrow/compute/kernels/scalar_cast_string.cc @@ -437,6 +437,11 @@ void AddNumberToStringCasts(CastFunction* func) { GenerateNumeric(*in_ty), NullHandling::COMPUTED_NO_PREALLOCATE)); } + + DCHECK_OK( + func->AddKernel(Type::HALF_FLOAT, {float16()}, out_ty, + NumericToStringCastFunctor::Exec, + NullHandling::COMPUTED_NO_PREALLOCATE)); } template diff --git a/cpp/src/arrow/type_traits.h b/cpp/src/arrow/type_traits.h index ed66c9367dc..8caf4400fe8 100644 --- a/cpp/src/arrow/type_traits.h +++ b/cpp/src/arrow/type_traits.h @@ -305,6 +305,7 @@ struct TypeTraits { using BuilderType = HalfFloatBuilder; using ScalarType = HalfFloatScalar; using TensorType = HalfFloatTensor; + using CType = uint16_t; static constexpr int64_t bytes_required(int64_t elements) { return elements * static_cast(sizeof(uint16_t)); diff --git a/cpp/src/arrow/util/formatting.cc b/cpp/src/arrow/util/formatting.cc index c16d42ce5cf..b021ca450d0 100644 --- a/cpp/src/arrow/util/formatting.cc +++ b/cpp/src/arrow/util/formatting.cc @@ -19,10 +19,12 @@ #include "arrow/util/config.h" #include "arrow/util/double_conversion.h" #include "arrow/util/logging.h" +#include "arrow/util/float16.h" namespace arrow { using util::double_conversion::DoubleToStringConverter; +using util::Float16; static constexpr int kMinBufferSize = DoubleToStringConverter::kBase10MaximalLength + 1; @@ -87,5 +89,14 @@ int FloatToStringFormatter::FormatFloat(double v, char* out_buffer, int out_size return builder.position(); } +int FloatToStringFormatter::FormatFloat(uint16_t v, char* out_buffer, int out_size) { + DCHECK_GE(out_size, kMinBufferSize); + util::double_conversion::StringBuilder builder(out_buffer, out_size); + bool result = impl_->converter_.ToShortest(Float16::FromBits(v).ToFloat(), &builder); + DCHECK(result); + ARROW_UNUSED(result); + return builder.position(); +} + } // namespace internal } // namespace arrow diff --git a/cpp/src/arrow/util/formatting.h b/cpp/src/arrow/util/formatting.h index 71bae74629e..6125f792ff9 100644 --- a/cpp/src/arrow/util/formatting.h +++ b/cpp/src/arrow/util/formatting.h @@ -268,6 +268,7 @@ class ARROW_EXPORT FloatToStringFormatter { // Returns the number of characters written int FormatFloat(float v, char* out_buffer, int out_size); int FormatFloat(double v, char* out_buffer, int out_size); + int FormatFloat(uint16_t v, char* out_buffer, int out_size); protected: struct Impl; @@ -301,6 +302,12 @@ class FloatToStringFormatterMixin : public FloatToStringFormatter { } }; +template <> +class StringFormatter : public FloatToStringFormatterMixin { + public: + using FloatToStringFormatterMixin::FloatToStringFormatterMixin; +}; + template <> class StringFormatter : public FloatToStringFormatterMixin { public: diff --git a/cpp/src/arrow/util/value_parsing.cc b/cpp/src/arrow/util/value_parsing.cc index f6a24ac1467..ed08ddf08f9 100644 --- a/cpp/src/arrow/util/value_parsing.cc +++ b/cpp/src/arrow/util/value_parsing.cc @@ -23,6 +23,9 @@ #include #include "arrow/vendored/fast_float/fast_float.h" +#include "arrow/util/float16.h" + +using arrow::util::Float16; namespace arrow { namespace internal { @@ -43,6 +46,17 @@ bool StringToFloat(const char* s, size_t length, char decimal_point, double* out return res.ec == std::errc() && res.ptr == s + length; } +// Half float +bool StringToFloat(const char* s, size_t length, char decimal_point, uint16_t* out) { + ::arrow_vendored::fast_float::parse_options options{ + ::arrow_vendored::fast_float::chars_format::general, decimal_point}; + float temp_out; + const auto res = + ::arrow_vendored::fast_float::from_chars_advanced(s, s + length, temp_out, options); + *out = Float16::FromFloat(temp_out).bits(); + return res.ec == std::errc() && res.ptr == s + length; +} + // ---------------------------------------------------------------------- // strptime-like parsing diff --git a/cpp/src/arrow/util/value_parsing.h b/cpp/src/arrow/util/value_parsing.h index b3c711840f3..609906052cd 100644 --- a/cpp/src/arrow/util/value_parsing.h +++ b/cpp/src/arrow/util/value_parsing.h @@ -135,6 +135,9 @@ bool StringToFloat(const char* s, size_t length, char decimal_point, float* out) ARROW_EXPORT bool StringToFloat(const char* s, size_t length, char decimal_point, double* out); +ARROW_EXPORT +bool StringToFloat(const char* s, size_t length, char decimal_point, uint16_t* out); + template <> struct StringConverter { using value_type = float; @@ -163,6 +166,20 @@ struct StringConverter { const char decimal_point; }; +template <> +struct StringConverter { + using value_type = uint16_t; + + explicit StringConverter(char decimal_point = '.') : decimal_point(decimal_point) {} + + bool Convert(const HalfFloatType&, const char* s, size_t length, value_type* out) { + return ARROW_PREDICT_TRUE(StringToFloat(s, length, decimal_point, out)); + } + + private: + const char decimal_point; +}; + // NOTE: HalfFloatType would require a half<->float conversion library inline uint8_t ParseDecimalDigit(char c) { return static_cast(c - '0'); } From d21d8bcbfb4058c60976d543b5eee01313fc3eeb Mon Sep 17 00:00:00 2001 From: Clif Houck Date: Wed, 28 Feb 2024 15:46:17 -0600 Subject: [PATCH 06/21] Add casts from int to half float --- .../compute/kernels/scalar_cast_internal.cc | 14 ++++++ .../compute/kernels/scalar_cast_numeric.cc | 49 +++++++++++++------ 2 files changed, 49 insertions(+), 14 deletions(-) diff --git a/cpp/src/arrow/compute/kernels/scalar_cast_internal.cc b/cpp/src/arrow/compute/kernels/scalar_cast_internal.cc index 9ceb8dd9f48..2eaa7000151 100644 --- a/cpp/src/arrow/compute/kernels/scalar_cast_internal.cc +++ b/cpp/src/arrow/compute/kernels/scalar_cast_internal.cc @@ -93,6 +93,20 @@ struct CastPrimitive: } }; +// Cast int to half float +template +struct CastPrimitive> { + static void Exec(const ArraySpan& arr, ArraySpan* out) { + using InT = typename InType::c_type; + const InT* in_values = arr.GetValues(1); + uint16_t* out_values = out->GetValues(1); + for (int64_t i = 0; i < arr.length; ++i) { + float temp = static_cast(*in_values++); + *out_values++ = Float16(temp).bits(); + } + } +}; + template void CastNumberImpl(Type::type out_type, const ArraySpan& input, ArraySpan* out) { switch (out_type) { diff --git a/cpp/src/arrow/compute/kernels/scalar_cast_numeric.cc b/cpp/src/arrow/compute/kernels/scalar_cast_numeric.cc index 351eff2f1dd..49858560dbc 100644 --- a/cpp/src/arrow/compute/kernels/scalar_cast_numeric.cc +++ b/cpp/src/arrow/compute/kernels/scalar_cast_numeric.cc @@ -151,6 +151,10 @@ Status CheckFloatToIntTruncation(const ExecValue& input, const ExecResult& outpu return CheckFloatToIntTruncationImpl(input.array, *output.array_span()); case Type::DOUBLE: return CheckFloatToIntTruncationImpl(input.array, *output.array_span()); + // FIXME: Is this OK? It seems like the underlying check may not operator correctly + // on a type that is backed by a uint16_t. + case Type::HALF_FLOAT: + return CheckFloatToIntTruncationImpl(input.array, *output.array_span()); default: break; } @@ -696,6 +700,10 @@ std::shared_ptr GetCastToInteger(std::string name) { DCHECK_OK(func->AddKernel(in_ty->id(), {in_ty}, out_ty, CastFloatingToInteger)); } + // Cast from half-float + DCHECK_OK(func->AddKernel(Type::HALF_FLOAT, {InputType(Type::HALF_FLOAT)}, + out_ty, CastFloatingToInteger)); + // From other numbers to integer AddCommonNumberCasts(out_ty, func.get()); @@ -807,6 +815,32 @@ std::shared_ptr GetCastToDecimal256() { return func; } +std::shared_ptr GetCastToHalfFloat() { + // HalfFloat is a bit brain-damaged for now + auto func = + std::make_shared("func", Type::HALF_FLOAT); + AddCommonCasts(Type::HALF_FLOAT, float16(), func.get()); + + // Casts from integer to floating point + for (const std::shared_ptr& in_ty : IntTypes()) { + DCHECK_OK(func->AddKernel(in_ty->id(), {in_ty}, + TypeTraits::type_singleton(), CastIntegerToFloating)); + } + + // Cast from other strings to half float. + for (const std::shared_ptr& in_ty : BaseBinaryTypes()) { + auto exec = GenerateVarBinaryBase(*in_ty); + DCHECK_OK(func->AddKernel(in_ty->id(), {in_ty}, + TypeTraits::type_singleton(), exec)); + } + + DCHECK_OK(func.get()->AddKernel(Type::FLOAT, + {InputType(Type::FLOAT)}, float16(), CastFloatingToFloating)); + DCHECK_OK(func.get()->AddKernel(Type::DOUBLE, + {InputType(Type::DOUBLE)}, float16(), CastFloatingToFloating)); + return func; +} + } // namespace std::vector> GetNumericCasts() { @@ -842,20 +876,7 @@ std::vector> GetNumericCasts() { functions.push_back(GetCastToInteger("cast_uint64")); // HalfFloat is a bit brain-damaged for now - auto cast_half_float = - std::make_shared("cast_half_float", Type::HALF_FLOAT); - AddCommonCasts(Type::HALF_FLOAT, float16(), cast_half_float.get()); - - // Cast from other strings to half float. - for (const std::shared_ptr& in_ty : BaseBinaryTypes()) { - auto exec = GenerateVarBinaryBase(*in_ty); - DCHECK_OK(cast_half_float->AddKernel(in_ty->id(), {in_ty}, TypeTraits::type_singleton(), exec)); - } - - DCHECK_OK(cast_half_float.get()->AddKernel(Type::FLOAT, - {InputType(Type::FLOAT)}, float16(), CastFloatingToFloating)); - DCHECK_OK(cast_half_float.get()->AddKernel(Type::DOUBLE, - {InputType(Type::DOUBLE)}, float16(), CastFloatingToFloating)); + auto cast_half_float = GetCastToHalfFloat(); functions.push_back(cast_half_float); auto cast_float = GetCastToFloating("cast_float"); From 8f56a6f60219a65ad801c4d19d220fccab74d37c Mon Sep 17 00:00:00 2001 From: Clif Houck Date: Wed, 28 Feb 2024 15:48:37 -0600 Subject: [PATCH 07/21] Add HalfFloatType support to ConvertNumber --- cpp/src/arrow/ipc/json_simple.cc | 24 +++++++++++++++++++++++- 1 file changed, 23 insertions(+), 1 deletion(-) diff --git a/cpp/src/arrow/ipc/json_simple.cc b/cpp/src/arrow/ipc/json_simple.cc index ceeabe01677..1438e0365a3 100644 --- a/cpp/src/arrow/ipc/json_simple.cc +++ b/cpp/src/arrow/ipc/json_simple.cc @@ -38,6 +38,7 @@ #include "arrow/util/decimal.h" #include "arrow/util/logging.h" #include "arrow/util/value_parsing.h" +#include "arrow/util/float16.h" #include "arrow/json/rapidjson_defs.h" @@ -52,6 +53,7 @@ namespace rj = arrow::rapidjson; namespace arrow { using internal::ParseValue; +using util::Float16; namespace ipc { namespace internal { @@ -232,7 +234,7 @@ enable_if_physical_signed_integer ConvertNumber(const rj::Value& json // Convert single unsigned integer value template -enable_if_physical_unsigned_integer ConvertNumber(const rj::Value& json_obj, +enable_if_unsigned_integer ConvertNumber(const rj::Value& json_obj, const DataType& type, typename T::c_type* out) { if (json_obj.IsUint64()) { @@ -249,6 +251,26 @@ enable_if_physical_unsigned_integer ConvertNumber(const rj::Value& js } } +// Convert float16/HalfFloatType +template +enable_if_half_float ConvertNumber(const rj::Value& json_obj, + const DataType& type, + uint16_t* out) { + if (json_obj.IsDouble()) { + double f64 = json_obj.GetDouble(); + Float16 f16 = Float16(f64); + *out = f16.bits(); + if (f16.ToDouble() == f64) { + return Status::OK(); + } else { + return Status::Invalid("Value ", f64, " out of bounds for ", type); + } + } else { + *out = static_cast(0); + return JSONTypeError("unsigned int", json_obj.GetType()); + } +} + // Convert single floating point value template enable_if_physical_floating_point ConvertNumber(const rj::Value& json_obj, From 0ba90e823c16152ddc2d96508a84951dc6c00a88 Mon Sep 17 00:00:00 2001 From: Clif Houck Date: Tue, 5 Mar 2024 11:19:11 -0600 Subject: [PATCH 08/21] Fix casting half float to int --- .../compute/kernels/scalar_cast_internal.cc | 16 ++++++ .../compute/kernels/scalar_cast_numeric.cc | 49 ++++++++++++++----- 2 files changed, 52 insertions(+), 13 deletions(-) diff --git a/cpp/src/arrow/compute/kernels/scalar_cast_internal.cc b/cpp/src/arrow/compute/kernels/scalar_cast_internal.cc index 2eaa7000151..7ed43b1ef7e 100644 --- a/cpp/src/arrow/compute/kernels/scalar_cast_internal.cc +++ b/cpp/src/arrow/compute/kernels/scalar_cast_internal.cc @@ -107,6 +107,22 @@ struct CastPrimitive> { } }; +// Cast half float to int +template +struct CastPrimitive> { + static void Exec(const ArraySpan& arr, ArraySpan* out) { + std::cout << "HalfFloat to Int CastPrimitive()!\n"; + using OutT = typename OutType::c_type; + const uint16_t* in_values = arr.GetValues(1); + OutT* out_values = out->GetValues(1); + for (int64_t i = 0; i < arr.length; ++i) { + *out_values++ = static_cast(Float16::FromBits(*in_values++).ToFloat()); + } + } +}; + + + template void CastNumberImpl(Type::type out_type, const ArraySpan& input, ArraySpan* out) { switch (out_type) { diff --git a/cpp/src/arrow/compute/kernels/scalar_cast_numeric.cc b/cpp/src/arrow/compute/kernels/scalar_cast_numeric.cc index 49858560dbc..0890cc02bc5 100644 --- a/cpp/src/arrow/compute/kernels/scalar_cast_numeric.cc +++ b/cpp/src/arrow/compute/kernels/scalar_cast_numeric.cc @@ -23,6 +23,7 @@ #include "arrow/compute/kernels/util_internal.h" #include "arrow/scalar.h" #include "arrow/util/bit_block_counter.h" +#include "arrow/util/float16.h" #include "arrow/util/int_util.h" #include "arrow/util/value_parsing.h" @@ -34,6 +35,7 @@ using internal::IntegersCanFit; using internal::OptionalBitBlockCounter; using internal::ParseValue; using internal::PrimitiveScalarBase; +using util::Float16; namespace compute { namespace internal { @@ -56,18 +58,39 @@ Status CastFloatingToFloating(KernelContext*, const ExecSpan& batch, ExecResult* // ---------------------------------------------------------------------- // Implement fast safe floating point to integer cast +// +template +struct WasTruncated { + static bool Check(OutT out_val, InT in_val) { + return static_cast(out_val) != in_val; + } + + static bool CheckMaybeNull(OutT out_val, InT in_val, bool is_valid) { + return is_valid && static_cast(out_val) != in_val; + } +}; + +// Half float to int +template +struct WasTruncated { + using OutT = typename OutType::c_type; + static bool Check(OutT out_val, uint16_t in_val) { + return static_cast(out_val) != Float16::FromBits(in_val).ToFloat(); + } + + static bool CheckMaybeNull(OutT out_val, uint16_t in_val, bool is_valid) { + return is_valid && static_cast(out_val) != Float16::FromBits(in_val).ToFloat(); + } + +}; // InType is a floating point type we are planning to cast to integer template ARROW_DISABLE_UBSAN("float-cast-overflow") Status CheckFloatTruncation(const ArraySpan& input, const ArraySpan& output) { - auto WasTruncated = [&](OutT out_val, InT in_val) -> bool { - return static_cast(out_val) != in_val; - }; - auto WasTruncatedMaybeNull = [&](OutT out_val, InT in_val, bool is_valid) -> bool { - return is_valid && static_cast(out_val) != in_val; - }; auto GetErrorMessage = [&](InT val) { return Status::Invalid("Float value ", val, " was truncated converting to ", *output.type); @@ -86,26 +109,26 @@ Status CheckFloatTruncation(const ArraySpan& input, const ArraySpan& output) { if (block.popcount == block.length) { // Fast path: branchless for (int64_t i = 0; i < block.length; ++i) { - block_out_of_bounds |= WasTruncated(out_data[i], in_data[i]); + block_out_of_bounds |= WasTruncated::Check(out_data[i], in_data[i]); } } else if (block.popcount > 0) { // Indices have nulls, must only boundscheck non-null values for (int64_t i = 0; i < block.length; ++i) { - block_out_of_bounds |= WasTruncatedMaybeNull( + block_out_of_bounds |= WasTruncated::CheckMaybeNull( out_data[i], in_data[i], bit_util::GetBit(bitmap, offset_position + i)); } } if (ARROW_PREDICT_FALSE(block_out_of_bounds)) { if (input.GetNullCount() > 0) { for (int64_t i = 0; i < block.length; ++i) { - if (WasTruncatedMaybeNull(out_data[i], in_data[i], + if (WasTruncated::CheckMaybeNull(out_data[i], in_data[i], bit_util::GetBit(bitmap, offset_position + i))) { return GetErrorMessage(in_data[i]); } } } else { for (int64_t i = 0; i < block.length; ++i) { - if (WasTruncated(out_data[i], in_data[i])) { + if (WasTruncated::Check(out_data[i], in_data[i])) { return GetErrorMessage(in_data[i]); } } @@ -701,7 +724,7 @@ std::shared_ptr GetCastToInteger(std::string name) { } // Cast from half-float - DCHECK_OK(func->AddKernel(Type::HALF_FLOAT, {InputType(Type::HALF_FLOAT)}, + DCHECK_OK(func->AddKernel(Type::HALF_FLOAT, {InputType(Type::HALF_FLOAT)}, out_ty, CastFloatingToInteger)); // From other numbers to integer @@ -823,14 +846,14 @@ std::shared_ptr GetCastToHalfFloat() { // Casts from integer to floating point for (const std::shared_ptr& in_ty : IntTypes()) { - DCHECK_OK(func->AddKernel(in_ty->id(), {in_ty}, + DCHECK_OK(func->AddKernel(in_ty->id(), {in_ty}, TypeTraits::type_singleton(), CastIntegerToFloating)); } // Cast from other strings to half float. for (const std::shared_ptr& in_ty : BaseBinaryTypes()) { auto exec = GenerateVarBinaryBase(*in_ty); - DCHECK_OK(func->AddKernel(in_ty->id(), {in_ty}, + DCHECK_OK(func->AddKernel(in_ty->id(), {in_ty}, TypeTraits::type_singleton(), exec)); } From 465093be00abed9b14d268b4e20a13dc2d4440cb Mon Sep 17 00:00:00 2001 From: Clif Houck Date: Wed, 6 Mar 2024 11:30:54 -0600 Subject: [PATCH 09/21] Fix to CastPrimitve for other floating point types to half float --- cpp/src/arrow/compare.cc | 31 +++++++++++++++++++ .../compute/kernels/scalar_cast_internal.cc | 2 +- .../arrow/compute/kernels/scalar_cast_test.cc | 17 +++++++++- cpp/src/arrow/ipc/json_simple.cc | 24 ++++++++------ 4 files changed, 63 insertions(+), 11 deletions(-) diff --git a/cpp/src/arrow/compare.cc b/cpp/src/arrow/compare.cc index bb632e2eb91..f821c61e7f0 100644 --- a/cpp/src/arrow/compare.cc +++ b/cpp/src/arrow/compare.cc @@ -44,6 +44,7 @@ #include "arrow/util/bitmap_ops.h" #include "arrow/util/bitmap_reader.h" #include "arrow/util/checked_cast.h" +#include "arrow/util/float16.h" #include "arrow/util/key_value_metadata.h" #include "arrow/util/logging.h" #include "arrow/util/macros.h" @@ -59,6 +60,7 @@ using internal::BitmapReader; using internal::BitmapUInt64Reader; using internal::checked_cast; using internal::OptionalBitmapEquals; +using util::Float16; // ---------------------------------------------------------------------- // Public method implementations @@ -95,6 +97,31 @@ struct FloatingEquality { const T epsilon; }; +// For half-float equality. +template +struct FloatingEquality { + explicit FloatingEquality(const EqualOptions& options) + : epsilon(static_cast(options.atol())) {} + + bool operator()(uint16_t x, uint16_t y) const { + Float16 f_x = Float16::FromBits(x); + Float16 f_y = Float16::FromBits(y); + if (x == y) { + return Flags::signed_zeros_equal || (f_x.signbit() == f_y.signbit()); + } + if (Flags::nans_equal && f_x.is_nan() && f_y.is_nan()) { + return true; + } + if (Flags::approximate && (fabs(f_x.ToFloat() - f_y.ToFloat()) <= epsilon)) { + return true; + } + return false; + } + + const float epsilon; +}; + + template struct FloatingEqualityDispatcher { const EqualOptions& options; @@ -259,6 +286,8 @@ class RangeDataEqualsImpl { Status Visit(const DoubleType& type) { return CompareFloating(type); } + Status Visit(const HalfFloatType& type) { return CompareFloating(type); } + // Also matches StringType Status Visit(const BinaryType& type) { return CompareBinary(type); } @@ -863,6 +892,8 @@ class ScalarEqualsVisitor { Status Visit(const DoubleScalar& left) { return CompareFloating(left); } + Status Visit(const HalfFloatScalar& left) { return CompareFloating(left); } + template enable_if_t::value, Status> Visit(const T& left) { const auto& right = checked_cast(right_); diff --git a/cpp/src/arrow/compute/kernels/scalar_cast_internal.cc b/cpp/src/arrow/compute/kernels/scalar_cast_internal.cc index 7ed43b1ef7e..1889d4740ff 100644 --- a/cpp/src/arrow/compute/kernels/scalar_cast_internal.cc +++ b/cpp/src/arrow/compute/kernels/scalar_cast_internal.cc @@ -52,7 +52,7 @@ struct CastPrimitive { // Converting floating types to half float. template -struct CastPrimitive> { +struct CastPrimitive> { static void Exec(const ArraySpan& arr, ArraySpan* out) { using InT = typename InType::c_type; const InT* in_values = arr.GetValues(1); diff --git a/cpp/src/arrow/compute/kernels/scalar_cast_test.cc b/cpp/src/arrow/compute/kernels/scalar_cast_test.cc index a8acf68f66c..18520f743e9 100644 --- a/cpp/src/arrow/compute/kernels/scalar_cast_test.cc +++ b/cpp/src/arrow/compute/kernels/scalar_cast_test.cc @@ -389,7 +389,7 @@ TEST(Cast, ToIntDowncastUnsafe) { } TEST(Cast, FloatingToInt) { - for (auto from : {float32(), float64()}) { + for (auto from : {float16(), float32(), float64()}) { for (auto to : {int32(), int64()}) { // float to int no truncation CheckCast(ArrayFromJSON(from, "[1.0, null, 0.0, -1.0, 5.0]"), @@ -407,6 +407,16 @@ TEST(Cast, FloatingToInt) { } } +TEST(Cast, FloatingToFloating) { + for (auto from : {float16(), float32(), float64()}) { + for (auto to : {float16(), float32(), float64()}) { + CheckCast(ArrayFromJSON(from, "[1.0, 0.0, -1.0, 5.0]"), + ArrayFromJSON(to, "[1.0, 0.0, -1.0, 5.0]")); + + } + } +} + TEST(Cast, IntToFloating) { for (auto from : {uint32(), int32()}) { std::string two_24 = "[16777216, 16777217]"; @@ -2228,6 +2238,11 @@ TEST(Cast, FloatingToString) { CheckCast( ArrayFromJSON(float64(), "[0.0, -0.0, 1.5, -Inf, Inf, NaN, null]"), ArrayFromJSON(string_type, R"(["0", "-0", "1.5", "-inf", "inf", "nan", null])")); + + CheckCast( + ArrayFromJSON(float16(), "[0.0, -0.0, 1.5, -Inf, Inf, NaN, null]"), + ArrayFromJSON(string_type, R"(["0", "-0", "1.5", "-inf", "inf", "nan", null])")); + } } diff --git a/cpp/src/arrow/ipc/json_simple.cc b/cpp/src/arrow/ipc/json_simple.cc index 1438e0365a3..c64bf3665f6 100644 --- a/cpp/src/arrow/ipc/json_simple.cc +++ b/cpp/src/arrow/ipc/json_simple.cc @@ -252,20 +252,26 @@ enable_if_unsigned_integer ConvertNumber(const rj::Value& json_obj, } // Convert float16/HalfFloatType -template +template enable_if_half_float ConvertNumber(const rj::Value& json_obj, const DataType& type, uint16_t* out) { if (json_obj.IsDouble()) { double f64 = json_obj.GetDouble(); - Float16 f16 = Float16(f64); - *out = f16.bits(); - if (f16.ToDouble() == f64) { - return Status::OK(); - } else { - return Status::Invalid("Value ", f64, " out of bounds for ", type); - } - } else { + *out = Float16(f64).bits(); + return Status::OK(); + } else if (json_obj.IsUint()) { + uint32_t u32t = json_obj.GetUint(); + double f64 = static_cast(u32t); + *out = Float16(f64).bits(); + return Status::OK(); + } else if (json_obj.IsInt()) { + int32_t i32t = json_obj.GetInt(); + double f64 = static_cast(i32t); + *out = Float16(f64).bits(); + return Status::OK(); + } + else { *out = static_cast(0); return JSONTypeError("unsigned int", json_obj.GetType()); } From fb72199b0d4afa88c356e147d9d83a8a2cf2170b Mon Sep 17 00:00:00 2001 From: Clif Houck Date: Fri, 8 Mar 2024 10:46:37 -0600 Subject: [PATCH 10/21] Remove fixme comment --- cpp/src/arrow/compute/kernels/scalar_cast_numeric.cc | 2 -- 1 file changed, 2 deletions(-) diff --git a/cpp/src/arrow/compute/kernels/scalar_cast_numeric.cc b/cpp/src/arrow/compute/kernels/scalar_cast_numeric.cc index 0890cc02bc5..d272bb9bb1a 100644 --- a/cpp/src/arrow/compute/kernels/scalar_cast_numeric.cc +++ b/cpp/src/arrow/compute/kernels/scalar_cast_numeric.cc @@ -174,8 +174,6 @@ Status CheckFloatToIntTruncation(const ExecValue& input, const ExecResult& outpu return CheckFloatToIntTruncationImpl(input.array, *output.array_span()); case Type::DOUBLE: return CheckFloatToIntTruncationImpl(input.array, *output.array_span()); - // FIXME: Is this OK? It seems like the underlying check may not operator correctly - // on a type that is backed by a uint16_t. case Type::HALF_FLOAT: return CheckFloatToIntTruncationImpl(input.array, *output.array_span()); default: From cad49363325568b755bddd15150c6c1b76f8a44d Mon Sep 17 00:00:00 2001 From: Clif Houck Date: Fri, 8 Mar 2024 10:57:33 -0600 Subject: [PATCH 11/21] Remove debug std::cout --- cpp/src/arrow/compute/kernels/scalar_cast_internal.cc | 1 - 1 file changed, 1 deletion(-) diff --git a/cpp/src/arrow/compute/kernels/scalar_cast_internal.cc b/cpp/src/arrow/compute/kernels/scalar_cast_internal.cc index 1889d4740ff..9c6bbdc0b61 100644 --- a/cpp/src/arrow/compute/kernels/scalar_cast_internal.cc +++ b/cpp/src/arrow/compute/kernels/scalar_cast_internal.cc @@ -111,7 +111,6 @@ struct CastPrimitive> { template struct CastPrimitive> { static void Exec(const ArraySpan& arr, ArraySpan* out) { - std::cout << "HalfFloat to Int CastPrimitive()!\n"; using OutT = typename OutType::c_type; const uint16_t* in_values = arr.GetValues(1); OutT* out_values = out->GetValues(1); From 9b2e53f6b093f5cdf2ad015521bbcd41c2ce48d7 Mon Sep 17 00:00:00 2001 From: Clif Houck Date: Fri, 8 Mar 2024 11:14:03 -0600 Subject: [PATCH 12/21] clang format run --- cpp/src/arrow/compare.cc | 5 +- .../compute/kernels/scalar_cast_internal.cc | 72 ++++++++--------- .../compute/kernels/scalar_cast_numeric.cc | 77 ++++++++++--------- .../compute/kernels/scalar_cast_string.cc | 7 +- .../arrow/compute/kernels/scalar_cast_test.cc | 6 +- cpp/src/arrow/ipc/json_simple.cc | 46 ++++++----- cpp/src/arrow/util/formatting.cc | 4 +- cpp/src/arrow/util/value_parsing.cc | 2 +- 8 files changed, 108 insertions(+), 111 deletions(-) diff --git a/cpp/src/arrow/compare.cc b/cpp/src/arrow/compare.cc index f821c61e7f0..e983b47e39d 100644 --- a/cpp/src/arrow/compare.cc +++ b/cpp/src/arrow/compare.cc @@ -100,8 +100,8 @@ struct FloatingEquality { // For half-float equality. template struct FloatingEquality { - explicit FloatingEquality(const EqualOptions& options) - : epsilon(static_cast(options.atol())) {} + explicit FloatingEquality(const EqualOptions& options) + : epsilon(static_cast(options.atol())) {} bool operator()(uint16_t x, uint16_t y) const { Float16 f_x = Float16::FromBits(x); @@ -121,7 +121,6 @@ struct FloatingEquality { const float epsilon; }; - template struct FloatingEqualityDispatcher { const EqualOptions& options; diff --git a/cpp/src/arrow/compute/kernels/scalar_cast_internal.cc b/cpp/src/arrow/compute/kernels/scalar_cast_internal.cc index 9c6bbdc0b61..d8c40887596 100644 --- a/cpp/src/arrow/compute/kernels/scalar_cast_internal.cc +++ b/cpp/src/arrow/compute/kernels/scalar_cast_internal.cc @@ -25,9 +25,9 @@ namespace arrow { +using arrow::util::Float16; using internal::checked_cast; using internal::PrimitiveScalarBase; -using arrow::util::Float16; namespace compute { namespace internal { @@ -64,24 +64,26 @@ struct CastPrimitive struct CastPrimitive> { - static void Exec(const ArraySpan& arr, ArraySpan* out) { - const uint16_t* in_values = arr.GetValues(1); - float* out_values = out->GetValues(1); - for (int64_t i = 0; i < arr.length; ++i) { - *out_values++ = Float16::FromBits(*in_values++).ToFloat(); - } +template <> +struct CastPrimitive> { + static void Exec(const ArraySpan& arr, ArraySpan* out) { + const uint16_t* in_values = arr.GetValues(1); + float* out_values = out->GetValues(1); + for (int64_t i = 0; i < arr.length; ++i) { + *out_values++ = Float16::FromBits(*in_values++).ToFloat(); } + } }; -template<> struct CastPrimitive> { - static void Exec(const ArraySpan& arr, ArraySpan* out) { - const uint16_t* in_values = arr.GetValues(1); - double* out_values = out->GetValues(1); - for (int64_t i = 0; i < arr.length; ++i) { - *out_values++ = Float16::FromBits(*in_values++).ToDouble(); - } +template <> +struct CastPrimitive> { + static void Exec(const ArraySpan& arr, ArraySpan* out) { + const uint16_t* in_values = arr.GetValues(1); + double* out_values = out->GetValues(1); + for (int64_t i = 0; i < arr.length; ++i) { + *out_values++ = Float16::FromBits(*in_values++).ToDouble(); } + } }; template @@ -94,34 +96,32 @@ struct CastPrimitive: }; // Cast int to half float -template +template struct CastPrimitive> { - static void Exec(const ArraySpan& arr, ArraySpan* out) { - using InT = typename InType::c_type; - const InT* in_values = arr.GetValues(1); - uint16_t* out_values = out->GetValues(1); - for (int64_t i = 0; i < arr.length; ++i) { - float temp = static_cast(*in_values++); - *out_values++ = Float16(temp).bits(); - } - } + static void Exec(const ArraySpan& arr, ArraySpan* out) { + using InT = typename InType::c_type; + const InT* in_values = arr.GetValues(1); + uint16_t* out_values = out->GetValues(1); + for (int64_t i = 0; i < arr.length; ++i) { + float temp = static_cast(*in_values++); + *out_values++ = Float16(temp).bits(); + } + } }; // Cast half float to int -template +template struct CastPrimitive> { - static void Exec(const ArraySpan& arr, ArraySpan* out) { - using OutT = typename OutType::c_type; - const uint16_t* in_values = arr.GetValues(1); - OutT* out_values = out->GetValues(1); - for (int64_t i = 0; i < arr.length; ++i) { - *out_values++ = static_cast(Float16::FromBits(*in_values++).ToFloat()); - } - } + static void Exec(const ArraySpan& arr, ArraySpan* out) { + using OutT = typename OutType::c_type; + const uint16_t* in_values = arr.GetValues(1); + OutT* out_values = out->GetValues(1); + for (int64_t i = 0; i < arr.length; ++i) { + *out_values++ = static_cast(Float16::FromBits(*in_values++).ToFloat()); + } + } }; - - template void CastNumberImpl(Type::type out_type, const ArraySpan& input, ArraySpan* out) { switch (out_type) { diff --git a/cpp/src/arrow/compute/kernels/scalar_cast_numeric.cc b/cpp/src/arrow/compute/kernels/scalar_cast_numeric.cc index d272bb9bb1a..3df86e7d693 100644 --- a/cpp/src/arrow/compute/kernels/scalar_cast_numeric.cc +++ b/cpp/src/arrow/compute/kernels/scalar_cast_numeric.cc @@ -59,31 +59,29 @@ Status CastFloatingToFloating(KernelContext*, const ExecSpan& batch, ExecResult* // ---------------------------------------------------------------------- // Implement fast safe floating point to integer cast // -template +template struct WasTruncated { - static bool Check(OutT out_val, InT in_val) { - return static_cast(out_val) != in_val; - } + static bool Check(OutT out_val, InT in_val) { + return static_cast(out_val) != in_val; + } - static bool CheckMaybeNull(OutT out_val, InT in_val, bool is_valid) { - return is_valid && static_cast(out_val) != in_val; - } + static bool CheckMaybeNull(OutT out_val, InT in_val, bool is_valid) { + return is_valid && static_cast(out_val) != in_val; + } }; // Half float to int -template +template struct WasTruncated { - using OutT = typename OutType::c_type; - static bool Check(OutT out_val, uint16_t in_val) { - return static_cast(out_val) != Float16::FromBits(in_val).ToFloat(); - } - - static bool CheckMaybeNull(OutT out_val, uint16_t in_val, bool is_valid) { - return is_valid && static_cast(out_val) != Float16::FromBits(in_val).ToFloat(); - } + using OutT = typename OutType::c_type; + static bool Check(OutT out_val, uint16_t in_val) { + return static_cast(out_val) != Float16::FromBits(in_val).ToFloat(); + } + static bool CheckMaybeNull(OutT out_val, uint16_t in_val, bool is_valid) { + return is_valid && static_cast(out_val) != Float16::FromBits(in_val).ToFloat(); + } }; // InType is a floating point type we are planning to cast to integer @@ -109,7 +107,8 @@ Status CheckFloatTruncation(const ArraySpan& input, const ArraySpan& output) { if (block.popcount == block.length) { // Fast path: branchless for (int64_t i = 0; i < block.length; ++i) { - block_out_of_bounds |= WasTruncated::Check(out_data[i], in_data[i]); + block_out_of_bounds |= + WasTruncated::Check(out_data[i], in_data[i]); } } else if (block.popcount > 0) { // Indices have nulls, must only boundscheck non-null values @@ -121,8 +120,9 @@ Status CheckFloatTruncation(const ArraySpan& input, const ArraySpan& output) { if (ARROW_PREDICT_FALSE(block_out_of_bounds)) { if (input.GetNullCount() > 0) { for (int64_t i = 0; i < block.length; ++i) { - if (WasTruncated::CheckMaybeNull(out_data[i], in_data[i], - bit_util::GetBit(bitmap, offset_position + i))) { + if (WasTruncated::CheckMaybeNull( + out_data[i], in_data[i], + bit_util::GetBit(bitmap, offset_position + i))) { return GetErrorMessage(in_data[i]); } } @@ -175,7 +175,8 @@ Status CheckFloatToIntTruncation(const ExecValue& input, const ExecResult& outpu case Type::DOUBLE: return CheckFloatToIntTruncationImpl(input.array, *output.array_span()); case Type::HALF_FLOAT: - return CheckFloatToIntTruncationImpl(input.array, *output.array_span()); + return CheckFloatToIntTruncationImpl(input.array, + *output.array_span()); default: break; } @@ -320,9 +321,11 @@ struct CastFunctor< template <> struct CastFunctor> { - static Status Exec(KernelContext* ctx, const ExecSpan& batch, ExecResult* out) { - return applicator::ScalarUnaryNotNull>::Exec(ctx, batch, out); - } + static Status Exec(KernelContext* ctx, const ExecSpan& batch, ExecResult* out) { + return applicator::ScalarUnaryNotNull>::Exec(ctx, batch, + out); + } }; // ---------------------------------------------------------------------- @@ -722,8 +725,8 @@ std::shared_ptr GetCastToInteger(std::string name) { } // Cast from half-float - DCHECK_OK(func->AddKernel(Type::HALF_FLOAT, {InputType(Type::HALF_FLOAT)}, - out_ty, CastFloatingToInteger)); + DCHECK_OK(func->AddKernel(Type::HALF_FLOAT, {InputType(Type::HALF_FLOAT)}, out_ty, + CastFloatingToInteger)); // From other numbers to integer AddCommonNumberCasts(out_ty, func.get()); @@ -752,8 +755,8 @@ std::shared_ptr GetCastToFloating(std::string name) { } // From half-float to float/double - DCHECK_OK(func->AddKernel(Type::HALF_FLOAT, - {InputType(Type::HALF_FLOAT)}, out_ty, CastFloatingToFloating)); + DCHECK_OK(func->AddKernel(Type::HALF_FLOAT, {InputType(Type::HALF_FLOAT)}, out_ty, + CastFloatingToFloating)); // From other numbers to floating point AddCommonNumberCasts(out_ty, func.get()); @@ -838,27 +841,27 @@ std::shared_ptr GetCastToDecimal256() { std::shared_ptr GetCastToHalfFloat() { // HalfFloat is a bit brain-damaged for now - auto func = - std::make_shared("func", Type::HALF_FLOAT); + auto func = std::make_shared("func", Type::HALF_FLOAT); AddCommonCasts(Type::HALF_FLOAT, float16(), func.get()); // Casts from integer to floating point for (const std::shared_ptr& in_ty : IntTypes()) { DCHECK_OK(func->AddKernel(in_ty->id(), {in_ty}, - TypeTraits::type_singleton(), CastIntegerToFloating)); + TypeTraits::type_singleton(), + CastIntegerToFloating)); } // Cast from other strings to half float. for (const std::shared_ptr& in_ty : BaseBinaryTypes()) { auto exec = GenerateVarBinaryBase(*in_ty); DCHECK_OK(func->AddKernel(in_ty->id(), {in_ty}, - TypeTraits::type_singleton(), exec)); + TypeTraits::type_singleton(), exec)); } - DCHECK_OK(func.get()->AddKernel(Type::FLOAT, - {InputType(Type::FLOAT)}, float16(), CastFloatingToFloating)); - DCHECK_OK(func.get()->AddKernel(Type::DOUBLE, - {InputType(Type::DOUBLE)}, float16(), CastFloatingToFloating)); + DCHECK_OK(func.get()->AddKernel(Type::FLOAT, {InputType(Type::FLOAT)}, float16(), + CastFloatingToFloating)); + DCHECK_OK(func.get()->AddKernel(Type::DOUBLE, {InputType(Type::DOUBLE)}, float16(), + CastFloatingToFloating)); return func; } diff --git a/cpp/src/arrow/compute/kernels/scalar_cast_string.cc b/cpp/src/arrow/compute/kernels/scalar_cast_string.cc index 1ef02278a84..3a8352a9b87 100644 --- a/cpp/src/arrow/compute/kernels/scalar_cast_string.cc +++ b/cpp/src/arrow/compute/kernels/scalar_cast_string.cc @@ -438,10 +438,9 @@ void AddNumberToStringCasts(CastFunction* func) { NullHandling::COMPUTED_NO_PREALLOCATE)); } - DCHECK_OK( - func->AddKernel(Type::HALF_FLOAT, {float16()}, out_ty, - NumericToStringCastFunctor::Exec, - NullHandling::COMPUTED_NO_PREALLOCATE)); + DCHECK_OK(func->AddKernel(Type::HALF_FLOAT, {float16()}, out_ty, + NumericToStringCastFunctor::Exec, + NullHandling::COMPUTED_NO_PREALLOCATE)); } template diff --git a/cpp/src/arrow/compute/kernels/scalar_cast_test.cc b/cpp/src/arrow/compute/kernels/scalar_cast_test.cc index 18520f743e9..37c39f50619 100644 --- a/cpp/src/arrow/compute/kernels/scalar_cast_test.cc +++ b/cpp/src/arrow/compute/kernels/scalar_cast_test.cc @@ -411,9 +411,8 @@ TEST(Cast, FloatingToFloating) { for (auto from : {float16(), float32(), float64()}) { for (auto to : {float16(), float32(), float64()}) { CheckCast(ArrayFromJSON(from, "[1.0, 0.0, -1.0, 5.0]"), - ArrayFromJSON(to, "[1.0, 0.0, -1.0, 5.0]")); - - } + ArrayFromJSON(to, "[1.0, 0.0, -1.0, 5.0]")); + } } } @@ -2242,7 +2241,6 @@ TEST(Cast, FloatingToString) { CheckCast( ArrayFromJSON(float16(), "[0.0, -0.0, 1.5, -Inf, Inf, NaN, null]"), ArrayFromJSON(string_type, R"(["0", "-0", "1.5", "-inf", "inf", "nan", null])")); - } } diff --git a/cpp/src/arrow/ipc/json_simple.cc b/cpp/src/arrow/ipc/json_simple.cc index c64bf3665f6..9fd449831c9 100644 --- a/cpp/src/arrow/ipc/json_simple.cc +++ b/cpp/src/arrow/ipc/json_simple.cc @@ -36,9 +36,9 @@ #include "arrow/type_traits.h" #include "arrow/util/checked_cast.h" #include "arrow/util/decimal.h" +#include "arrow/util/float16.h" #include "arrow/util/logging.h" #include "arrow/util/value_parsing.h" -#include "arrow/util/float16.h" #include "arrow/json/rapidjson_defs.h" @@ -235,8 +235,8 @@ enable_if_physical_signed_integer ConvertNumber(const rj::Value& json // Convert single unsigned integer value template enable_if_unsigned_integer ConvertNumber(const rj::Value& json_obj, - const DataType& type, - typename T::c_type* out) { + const DataType& type, + typename T::c_type* out) { if (json_obj.IsUint64()) { uint64_t v64 = json_obj.GetUint64(); *out = static_cast(v64); @@ -254,27 +254,25 @@ enable_if_unsigned_integer ConvertNumber(const rj::Value& json_obj, // Convert float16/HalfFloatType template enable_if_half_float ConvertNumber(const rj::Value& json_obj, - const DataType& type, - uint16_t* out) { - if (json_obj.IsDouble()) { - double f64 = json_obj.GetDouble(); - *out = Float16(f64).bits(); - return Status::OK(); - } else if (json_obj.IsUint()) { - uint32_t u32t = json_obj.GetUint(); - double f64 = static_cast(u32t); - *out = Float16(f64).bits(); - return Status::OK(); - } else if (json_obj.IsInt()) { - int32_t i32t = json_obj.GetInt(); - double f64 = static_cast(i32t); - *out = Float16(f64).bits(); - return Status::OK(); - } - else { - *out = static_cast(0); - return JSONTypeError("unsigned int", json_obj.GetType()); - } + const DataType& type, uint16_t* out) { + if (json_obj.IsDouble()) { + double f64 = json_obj.GetDouble(); + *out = Float16(f64).bits(); + return Status::OK(); + } else if (json_obj.IsUint()) { + uint32_t u32t = json_obj.GetUint(); + double f64 = static_cast(u32t); + *out = Float16(f64).bits(); + return Status::OK(); + } else if (json_obj.IsInt()) { + int32_t i32t = json_obj.GetInt(); + double f64 = static_cast(i32t); + *out = Float16(f64).bits(); + return Status::OK(); + } else { + *out = static_cast(0); + return JSONTypeError("unsigned int", json_obj.GetType()); + } } // Convert single floating point value diff --git a/cpp/src/arrow/util/formatting.cc b/cpp/src/arrow/util/formatting.cc index b021ca450d0..c5a7e03f857 100644 --- a/cpp/src/arrow/util/formatting.cc +++ b/cpp/src/arrow/util/formatting.cc @@ -18,13 +18,13 @@ #include "arrow/util/formatting.h" #include "arrow/util/config.h" #include "arrow/util/double_conversion.h" -#include "arrow/util/logging.h" #include "arrow/util/float16.h" +#include "arrow/util/logging.h" namespace arrow { -using util::double_conversion::DoubleToStringConverter; using util::Float16; +using util::double_conversion::DoubleToStringConverter; static constexpr int kMinBufferSize = DoubleToStringConverter::kBase10MaximalLength + 1; diff --git a/cpp/src/arrow/util/value_parsing.cc b/cpp/src/arrow/util/value_parsing.cc index ed08ddf08f9..e84aac995e3 100644 --- a/cpp/src/arrow/util/value_parsing.cc +++ b/cpp/src/arrow/util/value_parsing.cc @@ -22,8 +22,8 @@ #include #include -#include "arrow/vendored/fast_float/fast_float.h" #include "arrow/util/float16.h" +#include "arrow/vendored/fast_float/fast_float.h" using arrow::util::Float16; From 9d1a9794d7ad7ada2ecce582a2d31db21f738cdc Mon Sep 17 00:00:00 2001 From: Clif Houck Date: Fri, 8 Mar 2024 11:18:59 -0600 Subject: [PATCH 13/21] Update notes about data types available in different arrow libraries --- docs/source/status.rst | 11 +++++------ 1 file changed, 5 insertions(+), 6 deletions(-) diff --git a/docs/source/status.rst b/docs/source/status.rst index 9af2fd1921e..71d33eaa652 100644 --- a/docs/source/status.rst +++ b/docs/source/status.rst @@ -40,7 +40,7 @@ Data Types +-------------------+-------+-------+-------+------------+-------+-------+-------+-------+ | UInt8/16/32/64 | ✓ | ✓ | ✓ | ✓ | ✓ | ✓ | ✓ | ✓ | +-------------------+-------+-------+-------+------------+-------+-------+-------+-------+ -| Float16 | ✓ (1) | ✓ (2) | ✓ | ✓ | ✓ (3)| ✓ | ✓ | | +| Float16 | ✓ | ✓ (1) | ✓ | ✓ | ✓ (2)| ✓ | ✓ | | +-------------------+-------+-------+-------+------------+-------+-------+-------+-------+ | Float32/64 | ✓ | ✓ | ✓ | ✓ | ✓ | ✓ | ✓ | ✓ | +-------------------+-------+-------+-------+------------+-------+-------+-------+-------+ @@ -104,7 +104,7 @@ Data Types | Data type | C++ | Java | Go | JavaScript | C# | Rust | Julia | Swift | | (special) | | | | | | | | | +===================+=======+=======+=======+============+=======+=======+=======+=======+ -| Dictionary | ✓ | ✓ (4) | ✓ | ✓ | ✓ | ✓ (3) | ✓ | | +| Dictionary | ✓ | ✓ (3) | ✓ | ✓ | ✓ | ✓ (3) | ✓ | | +-------------------+-------+-------+-------+------------+-------+-------+-------+-------+ | Extension | ✓ | ✓ | ✓ | | | ✓ | ✓ | | +-------------------+-------+-------+-------+------------+-------+-------+-------+-------+ @@ -113,10 +113,9 @@ Data Types Notes: -* \(1) Casting to/from Float16 in C++ is not supported. -* \(2) Casting to/from Float16 in Java is not supported. -* \(3) Float16 support in C# is only available when targeting .NET 6+. -* \(4) Nested dictionaries not supported +* \(1) Casting to/from Float16 in Java is not supported. +* \(2) Float16 support in C# is only available when targeting .NET 6+. +* \(3) Nested dictionaries not supported .. seealso:: The :ref:`format_columnar` specification. From 6f3e299ca47fab9cb621f7c6d4b573130d14b927 Mon Sep 17 00:00:00 2001 From: Clif Houck Date: Mon, 18 Mar 2024 11:48:27 -0500 Subject: [PATCH 14/21] Possible fix for TestBatchToTensorTest failure --- cpp/src/arrow/record_batch_test.cc | 3 +++ 1 file changed, 3 insertions(+) diff --git a/cpp/src/arrow/record_batch_test.cc b/cpp/src/arrow/record_batch_test.cc index 81154452d72..12fe892ff3b 100644 --- a/cpp/src/arrow/record_batch_test.cc +++ b/cpp/src/arrow/record_batch_test.cc @@ -36,11 +36,14 @@ #include "arrow/testing/gtest_util.h" #include "arrow/testing/random.h" #include "arrow/type.h" +#include "arrow/util/float16.h" #include "arrow/util/iterator.h" #include "arrow/util/key_value_metadata.h" namespace arrow { +using util::Float16; + class TestRecordBatch : public ::testing::Test {}; TEST_F(TestRecordBatch, Equals) { From d9d3f76d59b685c17fe813476170d9d8838d5cf3 Mon Sep 17 00:00:00 2001 From: Clif Houck Date: Mon, 18 Mar 2024 15:00:51 -0500 Subject: [PATCH 15/21] 'fix' TestIntegers failure --- cpp/src/arrow/ipc/json_simple_test.cc | 31 ++++++++++++++++++++++----- 1 file changed, 26 insertions(+), 5 deletions(-) diff --git a/cpp/src/arrow/ipc/json_simple_test.cc b/cpp/src/arrow/ipc/json_simple_test.cc index ea3a9ae1a14..306a6378e6a 100644 --- a/cpp/src/arrow/ipc/json_simple_test.cc +++ b/cpp/src/arrow/ipc/json_simple_test.cc @@ -44,6 +44,7 @@ #include "arrow/util/bitmap_builders.h" #include "arrow/util/checked_cast.h" #include "arrow/util/decimal.h" +#include "arrow/util/float16.h" #if defined(_MSC_VER) // "warning C4307: '+': integral constant overflow" @@ -51,6 +52,9 @@ #endif namespace arrow { + +using util::Float16; + namespace ipc { namespace internal { namespace json { @@ -185,6 +189,18 @@ class TestIntegers : public ::testing::Test { TYPED_TEST_SUITE_P(TestIntegers); +template +std::vector TestIntegersMutateIfNeeded(std::vector data) { + return data; +} + +// TODO: This works, but is it the right way to do this? +template <> +std::vector TestIntegersMutateIfNeeded(std::vector data) { + std::for_each(data.begin(), data.end(), [](HalfFloatType::c_type &value) { value = Float16(value).bits(); }); + return data; +} + TYPED_TEST_P(TestIntegers, Basics) { using T = TypeParam; using c_type = typename T::c_type; @@ -193,16 +209,16 @@ TYPED_TEST_P(TestIntegers, Basics) { auto type = this->type(); AssertJSONArray(type, "[]", {}); - AssertJSONArray(type, "[4, 0, 5]", {4, 0, 5}); - AssertJSONArray(type, "[4, null, 5]", {true, false, true}, {4, 0, 5}); + AssertJSONArray(type, "[4, 0, 5]", TestIntegersMutateIfNeeded({4, 0, 5})); + AssertJSONArray(type, "[4, null, 5]", {true, false, true}, TestIntegersMutateIfNeeded({4, 0, 5})); // Test limits const auto min_val = std::numeric_limits::min(); const auto max_val = std::numeric_limits::max(); std::string json_string = JSONArray(0, 1, min_val); - AssertJSONArray(type, json_string, {0, 1, min_val}); + AssertJSONArray(type, json_string, TestIntegersMutateIfNeeded({0, 1, min_val})); json_string = JSONArray(0, 1, max_val); - AssertJSONArray(type, json_string, {0, 1, max_val}); + AssertJSONArray(type, json_string, TestIntegersMutateIfNeeded({0, 1, max_val})); } TYPED_TEST_P(TestIntegers, Errors) { @@ -269,7 +285,12 @@ INSTANTIATE_TYPED_TEST_SUITE_P(TestUInt8, TestIntegers, UInt8Type); INSTANTIATE_TYPED_TEST_SUITE_P(TestUInt16, TestIntegers, UInt16Type); INSTANTIATE_TYPED_TEST_SUITE_P(TestUInt32, TestIntegers, UInt32Type); INSTANTIATE_TYPED_TEST_SUITE_P(TestUInt64, TestIntegers, UInt64Type); -INSTANTIATE_TYPED_TEST_SUITE_P(TestHalfFloat, TestIntegers, HalfFloatType); +// FIXME: I understand that HalfFloatType is backed by a uint16_t, but does it +// make sense to run this test over it? +// The way ConvertNumber for HalfFloatType is currently written, it allows the +// conversion of floating point notation to a half float, which causes failures +// in this test, one example is asserting 0.0 cannot be parsed as a half float. +// INSTANTIATE_TYPED_TEST_SUITE_P(TestHalfFloat, TestIntegers, HalfFloatType); template class TestStrings : public ::testing::Test { From 66750d3045edaf321247fea94d9e5f8b42af152c Mon Sep 17 00:00:00 2001 From: Clif Houck Date: Mon, 18 Mar 2024 15:07:43 -0500 Subject: [PATCH 16/21] clang-format --- cpp/src/arrow/ipc/json_simple_test.cc | 16 ++++++++++------ 1 file changed, 10 insertions(+), 6 deletions(-) diff --git a/cpp/src/arrow/ipc/json_simple_test.cc b/cpp/src/arrow/ipc/json_simple_test.cc index 306a6378e6a..b3f7fc5b345 100644 --- a/cpp/src/arrow/ipc/json_simple_test.cc +++ b/cpp/src/arrow/ipc/json_simple_test.cc @@ -190,14 +190,17 @@ class TestIntegers : public ::testing::Test { TYPED_TEST_SUITE_P(TestIntegers); template -std::vector TestIntegersMutateIfNeeded(std::vector data) { - return data; +std::vector TestIntegersMutateIfNeeded( + std::vector data) { + return data; } // TODO: This works, but is it the right way to do this? template <> -std::vector TestIntegersMutateIfNeeded(std::vector data) { - std::for_each(data.begin(), data.end(), [](HalfFloatType::c_type &value) { value = Float16(value).bits(); }); +std::vector TestIntegersMutateIfNeeded( + std::vector data) { + std::for_each(data.begin(), data.end(), + [](HalfFloatType::c_type& value) { value = Float16(value).bits(); }); return data; } @@ -210,7 +213,8 @@ TYPED_TEST_P(TestIntegers, Basics) { AssertJSONArray(type, "[]", {}); AssertJSONArray(type, "[4, 0, 5]", TestIntegersMutateIfNeeded({4, 0, 5})); - AssertJSONArray(type, "[4, null, 5]", {true, false, true}, TestIntegersMutateIfNeeded({4, 0, 5})); + AssertJSONArray(type, "[4, null, 5]", {true, false, true}, + TestIntegersMutateIfNeeded({4, 0, 5})); // Test limits const auto min_val = std::numeric_limits::min(); @@ -286,7 +290,7 @@ INSTANTIATE_TYPED_TEST_SUITE_P(TestUInt16, TestIntegers, UInt16Type); INSTANTIATE_TYPED_TEST_SUITE_P(TestUInt32, TestIntegers, UInt32Type); INSTANTIATE_TYPED_TEST_SUITE_P(TestUInt64, TestIntegers, UInt64Type); // FIXME: I understand that HalfFloatType is backed by a uint16_t, but does it -// make sense to run this test over it? +// make sense to run this test over it? // The way ConvertNumber for HalfFloatType is currently written, it allows the // conversion of floating point notation to a half float, which causes failures // in this test, one example is asserting 0.0 cannot be parsed as a half float. From a3eb4f8dcbf337f98a5b921924a698f01a497821 Mon Sep 17 00:00:00 2001 From: Clif Houck Date: Tue, 19 Mar 2024 08:48:50 -0500 Subject: [PATCH 17/21] use Float16 operator for comparing equality --- cpp/src/arrow/compare.cc | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/cpp/src/arrow/compare.cc b/cpp/src/arrow/compare.cc index e983b47e39d..5275d04a261 100644 --- a/cpp/src/arrow/compare.cc +++ b/cpp/src/arrow/compare.cc @@ -106,7 +106,7 @@ struct FloatingEquality { bool operator()(uint16_t x, uint16_t y) const { Float16 f_x = Float16::FromBits(x); Float16 f_y = Float16::FromBits(y); - if (x == y) { + if (f_x == f_y) { return Flags::signed_zeros_equal || (f_x.signbit() == f_y.signbit()); } if (Flags::nans_equal && f_x.is_nan() && f_y.is_nan()) { From 6a646a43617c6cad6d63878c294512cc291e00ce Mon Sep 17 00:00:00 2001 From: Clif Houck Date: Wed, 20 Mar 2024 09:52:57 -0500 Subject: [PATCH 18/21] Revert "use Float16 operator for comparing equality" This reverts commit d422b191918815758ecc604be21aa1ba1f21d40a. Apparently bitwise equal uint16_ts are not being treated as equal when using Float16's equality operator which is *very concerning* to me. --- cpp/src/arrow/compare.cc | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/cpp/src/arrow/compare.cc b/cpp/src/arrow/compare.cc index 5275d04a261..e983b47e39d 100644 --- a/cpp/src/arrow/compare.cc +++ b/cpp/src/arrow/compare.cc @@ -106,7 +106,7 @@ struct FloatingEquality { bool operator()(uint16_t x, uint16_t y) const { Float16 f_x = Float16::FromBits(x); Float16 f_y = Float16::FromBits(y); - if (f_x == f_y) { + if (x == y) { return Flags::signed_zeros_equal || (f_x.signbit() == f_y.signbit()); } if (Flags::nans_equal && f_x.is_nan() && f_y.is_nan()) { From 90ab8a07f908fbdf813449785ad95ccf8f5803fe Mon Sep 17 00:00:00 2001 From: Clif Houck Date: Wed, 20 Mar 2024 17:37:07 -0500 Subject: [PATCH 19/21] Change c_glib test-half-float-scalar string test to match current behavior --- c_glib/test/test-half-float-scalar.rb | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/c_glib/test/test-half-float-scalar.rb b/c_glib/test/test-half-float-scalar.rb index ac41f91ece6..3073d84d796 100644 --- a/c_glib/test/test-half-float-scalar.rb +++ b/c_glib/test/test-half-float-scalar.rb @@ -41,7 +41,7 @@ def test_equal end def test_to_s - assert_equal("[\n #{@half_float}\n]", @scalar.to_s) + assert_equal("1.0009765625", @scalar.to_s) end def test_value From c9348e8dd276d9cd1baa1443a4541ac206efb144 Mon Sep 17 00:00:00 2001 From: Clif Houck Date: Fri, 22 Mar 2024 08:31:24 -0500 Subject: [PATCH 20/21] Refactor FloatingToString --- .../arrow/compute/kernels/scalar_cast_test.cc | 16 +++++----------- 1 file changed, 5 insertions(+), 11 deletions(-) diff --git a/cpp/src/arrow/compute/kernels/scalar_cast_test.cc b/cpp/src/arrow/compute/kernels/scalar_cast_test.cc index 37c39f50619..4155e428a62 100644 --- a/cpp/src/arrow/compute/kernels/scalar_cast_test.cc +++ b/cpp/src/arrow/compute/kernels/scalar_cast_test.cc @@ -2230,17 +2230,11 @@ TEST(Cast, IntToString) { TEST(Cast, FloatingToString) { for (auto string_type : {utf8(), large_utf8()}) { - CheckCast( - ArrayFromJSON(float32(), "[0.0, -0.0, 1.5, -Inf, Inf, NaN, null]"), - ArrayFromJSON(string_type, R"(["0", "-0", "1.5", "-inf", "inf", "nan", null])")); - - CheckCast( - ArrayFromJSON(float64(), "[0.0, -0.0, 1.5, -Inf, Inf, NaN, null]"), - ArrayFromJSON(string_type, R"(["0", "-0", "1.5", "-inf", "inf", "nan", null])")); - - CheckCast( - ArrayFromJSON(float16(), "[0.0, -0.0, 1.5, -Inf, Inf, NaN, null]"), - ArrayFromJSON(string_type, R"(["0", "-0", "1.5", "-inf", "inf", "nan", null])")); + for (auto float_type : {float16(), float32(), float64()}) { + CheckCast(ArrayFromJSON(float_type, "[0.0, -0.0, 1.5, -Inf, Inf, NaN, null]"), + ArrayFromJSON(string_type, + R"(["0", "-0", "1.5", "-inf", "inf", "nan", null])")); + } } } From 87b244c5d326570808a7860ab4328817db8854e2 Mon Sep 17 00:00:00 2001 From: Clif Houck Date: Fri, 22 Mar 2024 08:40:22 -0500 Subject: [PATCH 21/21] Make float type the outer loop in FloatingToString test --- cpp/src/arrow/compute/kernels/scalar_cast_test.cc | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/cpp/src/arrow/compute/kernels/scalar_cast_test.cc b/cpp/src/arrow/compute/kernels/scalar_cast_test.cc index 4155e428a62..af62b4da2ca 100644 --- a/cpp/src/arrow/compute/kernels/scalar_cast_test.cc +++ b/cpp/src/arrow/compute/kernels/scalar_cast_test.cc @@ -2229,8 +2229,8 @@ TEST(Cast, IntToString) { } TEST(Cast, FloatingToString) { - for (auto string_type : {utf8(), large_utf8()}) { - for (auto float_type : {float16(), float32(), float64()}) { + for (auto float_type : {float16(), float32(), float64()}) { + for (auto string_type : {utf8(), large_utf8()}) { CheckCast(ArrayFromJSON(float_type, "[0.0, -0.0, 1.5, -Inf, Inf, NaN, null]"), ArrayFromJSON(string_type, R"(["0", "-0", "1.5", "-inf", "inf", "nan", null])"));