diff --git a/cpp/src/arrow/python/arrow_to_pandas.cc b/cpp/src/arrow/python/arrow_to_pandas.cc index 125892afe59..6ffc36d2069 100644 --- a/cpp/src/arrow/python/arrow_to_pandas.cc +++ b/cpp/src/arrow/python/arrow_to_pandas.cc @@ -639,11 +639,11 @@ static Status ConvertTimes(PandasOptions options, const ChunkedArray& data, static Status ConvertDecimals(PandasOptions options, const ChunkedArray& data, PyObject** out_values) { PyAcquireGIL lock; - OwnedRef decimal_ref; - OwnedRef Decimal_ref; - RETURN_NOT_OK(internal::ImportModule("decimal", &decimal_ref)); - RETURN_NOT_OK(internal::ImportFromModule(decimal_ref, "Decimal", &Decimal_ref)); - PyObject* Decimal = Decimal_ref.obj(); + OwnedRef decimal; + OwnedRef Decimal; + RETURN_NOT_OK(internal::ImportModule("decimal", &decimal)); + RETURN_NOT_OK(internal::ImportFromModule(decimal, "Decimal", &Decimal)); + PyObject* decimal_constructor = Decimal.obj(); for (int c = 0; c < data.num_chunks(); c++) { const auto& arr = static_cast(*data.chunk(c)); @@ -653,7 +653,8 @@ static Status ConvertDecimals(PandasOptions options, const ChunkedArray& data, Py_INCREF(Py_None); *out_values++ = Py_None; } else { - *out_values++ = internal::DecimalFromString(Decimal, arr.FormatValue(i)); + *out_values++ = + internal::DecimalFromString(decimal_constructor, arr.FormatValue(i)); RETURN_IF_PYERROR(); } } diff --git a/cpp/src/arrow/python/builtin_convert.cc b/cpp/src/arrow/python/builtin_convert.cc index a286c6bd5e9..891793cc9dc 100644 --- a/cpp/src/arrow/python/builtin_convert.cc +++ b/cpp/src/arrow/python/builtin_convert.cc @@ -76,7 +76,18 @@ class ScalarVisitor { timestamp_count_(0), float_count_(0), binary_count_(0), - unicode_count_(0) {} + unicode_count_(0), + decimal_count_(0), + max_decimal_metadata_(std::numeric_limits::min(), + std::numeric_limits::min()), + decimal_type_() { + OwnedRefNoGIL decimal_module; + Status status = ::arrow::py::internal::ImportModule("decimal", &decimal_module); + DCHECK(status.ok()) << "Unable to import decimal module"; + status = ::arrow::py::internal::ImportFromModule(decimal_module, "Decimal", + &decimal_type_); + DCHECK(status.ok()) << "Unable to import decimal.Decimal"; + } Status Visit(PyObject* obj) { ++total_count_; @@ -111,10 +122,16 @@ class ScalarVisitor { ss << type->ToString(); return Status::Invalid(ss.str()); } + } else if (PyObject_IsInstance(obj, decimal_type_.obj())) { + // Don't infer anything if we encounter a Decimal('nan') + if (!internal::PyDecimal_ISNAN(obj)) { + RETURN_NOT_OK(max_decimal_metadata_.Update(obj)); + } + ++decimal_count_; } else { // TODO(wesm): accumulate error information somewhere static std::string supported_types = - "bool, float, integer, date, datetime, bytes, unicode"; + "bool, float, integer, date, datetime, bytes, unicode, decimal"; std::stringstream ss; ss << "Error inferring Arrow data type for collection of Python objects. "; RETURN_NOT_OK(InvalidConversion(obj, supported_types, &ss)); @@ -125,7 +142,9 @@ class ScalarVisitor { std::shared_ptr GetType() { // TODO(wesm): handling mixed-type cases - if (float_count_) { + if (decimal_count_) { + return decimal(max_decimal_metadata_.precision(), max_decimal_metadata_.scale()); + } else if (float_count_) { return float64(); } else if (int_count_) { // TODO(wesm): tighter type later @@ -157,8 +176,13 @@ class ScalarVisitor { int64_t float_count_; int64_t binary_count_; int64_t unicode_count_; + int64_t decimal_count_; + + internal::DecimalMetadata max_decimal_metadata_; + // Place to accumulate errors // std::vector errors_; + OwnedRefNoGIL decimal_type_; }; static constexpr int MAX_NESTING_LEVELS = 32; @@ -379,17 +403,14 @@ class TypedConverter : public SeqConverter { BuilderType* typed_builder_; }; -// We use the CRTP trick here to devirtualize the AppendItem() and AppendNull() +// We use the CRTP trick here to devirtualize the AppendItem(), AppendNull(), and IsNull() // method calls. template class TypedConverterVisitor : public TypedConverter { public: Status AppendSingle(PyObject* obj) override { - if (obj == Py_None) { - return static_cast(this)->AppendNull(); - } else { - return static_cast(this)->AppendItem(obj); - } + auto self = static_cast(this); + return self->IsNull(obj) ? self->AppendNull() : self->AppendItem(obj); } Status AppendMultiple(PyObject* obj, int64_t size) override { @@ -409,6 +430,7 @@ class TypedConverterVisitor : public TypedConverter { // Append a missing item (default implementation) Status AppendNull() { return this->typed_builder_->AppendNull(); } + bool IsNull(PyObject* obj) const { return obj == Py_None; } }; class NullConverter : public TypedConverterVisitor { @@ -830,12 +852,16 @@ class DecimalConverter public: // Append a non-missing item Status AppendItem(PyObject* obj) { - /// TODO(phillipc): Check for nan? Decimal128 value; const auto& type = static_cast(*typed_builder_->type()); RETURN_NOT_OK(internal::DecimalFromPythonDecimal(obj, type, &value)); return typed_builder_->Append(value); } + + bool IsNull(PyObject* obj) const { + return obj == Py_None || obj == numpy_nan || internal::PyFloat_isnan(obj) || + (internal::PyDecimal_Check(obj) && internal::PyDecimal_ISNAN(obj)); + } }; // Dynamic constructor for sequence converters diff --git a/cpp/src/arrow/python/helpers.cc b/cpp/src/arrow/python/helpers.cc index df1db99911b..1c83205e877 100644 --- a/cpp/src/arrow/python/helpers.cc +++ b/cpp/src/arrow/python/helpers.cc @@ -61,6 +61,7 @@ namespace internal { Status ImportModule(const std::string& module_name, OwnedRef* ref) { PyObject* module = PyImport_ImportModule(module_name.c_str()); RETURN_IF_PYERROR(); + DCHECK_NE(module, nullptr) << "unable to import the " << module_name << " module"; ref->reset(module); return Status::OK(); } @@ -71,6 +72,7 @@ Status ImportFromModule(const OwnedRef& module, const std::string& name, OwnedRe PyObject* attr = PyObject_GetAttrString(module.obj(), name.c_str()); RETURN_IF_PYERROR(); + DCHECK_NE(attr, nullptr) << "unable to import the " << name << " object"; ref->reset(attr); return Status::OK(); } @@ -93,8 +95,13 @@ Status PythonDecimalToString(PyObject* python_decimal, std::string* out) { return Status::OK(); } -Status InferDecimalPrecisionAndScale(PyObject* python_decimal, int32_t* precision, - int32_t* scale) { +// \brief Infer the precision and scale of a Python decimal.Decimal instance +// \param python_decimal[in] An instance of decimal.Decimal +// \param precision[out] The value of the inferred precision +// \param scale[out] The value of the inferred scale +// \return The status of the operation +static Status InferDecimalPrecisionAndScale(PyObject* python_decimal, int32_t* precision, + int32_t* scale) { DCHECK_NE(python_decimal, NULLPTR); DCHECK_NE(precision, NULLPTR); DCHECK_NE(scale, NULLPTR); @@ -193,6 +200,53 @@ Status UInt64FromPythonInt(PyObject* obj, uint64_t* out) { return Status::OK(); } +bool PyFloat_isnan(PyObject* obj) { + return PyFloat_Check(obj) && std::isnan(PyFloat_AS_DOUBLE(obj)); +} + +bool PyDecimal_Check(PyObject* obj) { + // TODO(phillipc): Is this expensive? + OwnedRef Decimal; + OwnedRef decimal; + Status status = ImportModule("decimal", &decimal); + DCHECK(status.ok()) << "Error during import of the decimal module"; + status = ImportFromModule(decimal, "Decimal", &Decimal); + DCHECK(status.ok()) + << "Error during import of the Decimal object from the decimal module"; + const int32_t result = PyObject_IsInstance(obj, Decimal.obj()); + DCHECK_NE(result, -1) << " error during PyObject_IsInstance check"; + return result == 1; +} + +bool PyDecimal_ISNAN(PyObject* obj) { + DCHECK(PyDecimal_Check(obj)) << "obj is not an instance of decimal.Decimal"; + OwnedRef is_nan(PyObject_CallMethod(obj, "is_nan", "")); + return PyObject_IsTrue(is_nan.obj()) == 1; +} + +DecimalMetadata::DecimalMetadata() + : precision_(std::numeric_limits::min()), + scale_(std::numeric_limits::min()) {} + +DecimalMetadata::DecimalMetadata(int32_t precision, int32_t scale) + : precision_(precision), scale_(scale) {} + +Status DecimalMetadata::Update(int32_t suggested_precision, int32_t suggested_scale) { + precision_ = std::max(precision_, suggested_precision); + scale_ = std::max(scale_, suggested_scale); + return Status::OK(); +} + +Status DecimalMetadata::Update(PyObject* object) { + DCHECK(PyDecimal_Check(object)) << "Object is not a Python Decimal"; + DCHECK(!PyDecimal_ISNAN(object)) + << "Decimal object cannot be NAN when inferring precision and scale"; + int32_t precision; + int32_t scale; + RETURN_NOT_OK(InferDecimalPrecisionAndScale(object, &precision, &scale)); + return Update(precision, scale); +} + } // namespace internal } // namespace py } // namespace arrow diff --git a/cpp/src/arrow/python/helpers.h b/cpp/src/arrow/python/helpers.h index c0171aa2f5a..d39c62824c2 100644 --- a/cpp/src/arrow/python/helpers.h +++ b/cpp/src/arrow/python/helpers.h @@ -36,29 +36,89 @@ namespace py { class OwnedRef; -ARROW_EXPORT -std::shared_ptr GetPrimitiveType(Type::type type); +// \brief Get an arrow DataType instance from Arrow's Type::type enum +// \param[in] type One of the values of Arrow's Type::type enum +// \return A shared pointer to DataType +ARROW_EXPORT std::shared_ptr GetPrimitiveType(Type::type type); namespace internal { +// \brief Import a Python module +// \param[in] module_name The name of the module +// \param[out] ref The OwnedRef containing the module PyObject* Status ImportModule(const std::string& module_name, OwnedRef* ref); -Status ImportFromModule(const OwnedRef& module, const std::string& module_name, - OwnedRef* ref); +// \brief Import an object from a Python module +// \param[in] module A Python module +// \param[in] name The name of the object to import +// \param[out] ref The OwnedRef containing the \c name attribute of the Python module \c +// module +Status ImportFromModule(const OwnedRef& module, const std::string& name, OwnedRef* ref); + +// \brief Convert a Python Decimal object to a C++ string +// \param[in] python_decimal A Python decimal.Decimal instance +// \param[out] The string representation of the Python Decimal instance +// \return The status of the operation Status PythonDecimalToString(PyObject* python_decimal, std::string* out); -Status InferDecimalPrecisionAndScale(PyObject* python_decimal, - int32_t* precision = NULLPTR, - int32_t* scale = NULLPTR); - +// \brief Convert a C++ std::string to a Python Decimal instance +// \param[in] decimal_constructor The decimal type object +// \param[in] decimal_string A decimal string +// \return An instance of decimal.Decimal PyObject* DecimalFromString(PyObject* decimal_constructor, const std::string& decimal_string); + +// \brief Convert a Python decimal to an Arrow Decimal128 object +// \param[in] python_decimal A Python decimal.Decimal instance +// \param[in] arrow_type An instance of arrow::DecimalType +// \param[out] out A pointer to a Decimal128 +// \return The status of the operation Status DecimalFromPythonDecimal(PyObject* python_decimal, const DecimalType& arrow_type, Decimal128* out); + +// \brief Check whether obj is an integer, independent of Python versions. bool IsPyInteger(PyObject* obj); +// \brief Check whether obj is nan +bool PyFloat_isnan(PyObject* obj); + +// \brief Check whether obj is an instance of Decimal +bool PyDecimal_Check(PyObject* obj); + +// \brief Check whether obj is nan. This function will abort the program if the argument +// is not a Decimal instance +bool PyDecimal_ISNAN(PyObject* obj); + +// \brief Convert a Python integer into an unsigned 64-bit integer +// \param[in] obj A Python integer +// \param[out] out A pointer to a C uint64_t to hold the result of the conversion +// \return The status of the operation Status UInt64FromPythonInt(PyObject* obj, uint64_t* out); +// \brief Helper class to track and update the precision and scale of a decimal +class DecimalMetadata { + public: + DecimalMetadata(); + DecimalMetadata(int32_t precision, int32_t scale); + + // \brief Adjust the precision and scale of a decimal type given a new precision and a + // new scale \param[in] suggested_precision A candidate precision \param[in] + // suggested_scale A candidate scale \return The status of the operation + Status Update(int32_t suggested_precision, int32_t suggested_scale); + + // \brief A convenient interface for updating the precision and scale based on a Python + // Decimal object \param object A Python Decimal object \return The status of the + // operation + Status Update(PyObject* object); + + int32_t precision() const { return precision_; } + int32_t scale() const { return scale_; } + + private: + int32_t precision_; + int32_t scale_; +}; + } // namespace internal } // namespace py } // namespace arrow diff --git a/cpp/src/arrow/python/numpy-internal.h b/cpp/src/arrow/python/numpy-internal.h index 6c9c871a100..8d4308065c2 100644 --- a/cpp/src/arrow/python/numpy-internal.h +++ b/cpp/src/arrow/python/numpy-internal.h @@ -54,6 +54,9 @@ class Ndarray1DIndexer { T* data() const { return data_; } + T* begin() const { return data(); } + T* end() const { return begin() + size() * stride_; } + bool is_strided() const { return stride_ == 1; } T& operator[](size_type index) { return data_[index * stride_]; } diff --git a/cpp/src/arrow/python/numpy_to_arrow.cc b/cpp/src/arrow/python/numpy_to_arrow.cc index 23418ad920c..79a911ba457 100644 --- a/cpp/src/arrow/python/numpy_to_arrow.cc +++ b/cpp/src/arrow/python/numpy_to_arrow.cc @@ -67,17 +67,9 @@ constexpr int64_t kBinaryMemoryLimit = std::numeric_limits::max(); namespace { -inline bool PyFloat_isnan(PyObject* obj) { - if (PyFloat_Check(obj)) { - double val = PyFloat_AS_DOUBLE(obj); - return val != val; - } else { - return false; - } -} - inline bool PandasObjectIsNull(PyObject* obj) { - return obj == Py_None || obj == numpy_nan || PyFloat_isnan(obj); + return obj == Py_None || obj == numpy_nan || internal::PyFloat_isnan(obj) || + (internal::PyDecimal_Check(obj) && internal::PyDecimal_ISNAN(obj)); } inline bool PyObject_is_string(PyObject* obj) { @@ -88,10 +80,8 @@ inline bool PyObject_is_string(PyObject* obj) { #endif } -inline bool PyObject_is_float(PyObject* obj) { return PyFloat_Check(obj); } - inline bool PyObject_is_integer(PyObject* obj) { - return (!PyBool_Check(obj)) && PyArray_IsIntegerScalar(obj); + return !PyBool_Check(obj) && PyArray_IsIntegerScalar(obj); } template @@ -743,59 +733,38 @@ Status NumPyConverter::ConvertDates() { Status NumPyConverter::ConvertDecimals() { PyAcquireGIL lock; - // Import the decimal module and Decimal class - OwnedRef decimal; - OwnedRef Decimal; - RETURN_NOT_OK(internal::ImportModule("decimal", &decimal)); - RETURN_NOT_OK(internal::ImportFromModule(decimal, "Decimal", &Decimal)); - + internal::DecimalMetadata max_decimal_metadata; Ndarray1DIndexer objects(arr_); - PyObject* object = objects[0]; if (type_ == NULLPTR) { - int32_t precision; - int32_t desired_scale; - - int32_t tmp_precision; - int32_t tmp_scale; - - RETURN_NOT_OK( - internal::InferDecimalPrecisionAndScale(objects[0], &precision, &desired_scale)); - - for (int64_t i = 1; i < length_; ++i) { - RETURN_NOT_OK(internal::InferDecimalPrecisionAndScale(objects[i], &tmp_precision, - &tmp_scale)); - precision = std::max(precision, tmp_precision); - - if (std::abs(desired_scale) < std::abs(tmp_scale)) { - desired_scale = tmp_scale; - } + for (PyObject* object : objects) { + RETURN_NOT_OK(max_decimal_metadata.Update(object)); } - type_ = ::arrow::decimal(precision, desired_scale); + type_ = + ::arrow::decimal(max_decimal_metadata.precision(), max_decimal_metadata.scale()); } Decimal128Builder builder(type_, pool_); RETURN_NOT_OK(builder.Resize(length_)); const auto& decimal_type = static_cast(*type_); - PyObject* Decimal_type_object = Decimal.obj(); - - for (int64_t i = 0; i < length_; ++i) { - object = objects[i]; - if (PyObject_IsInstance(object, Decimal_type_object)) { - Decimal128 value; - RETURN_NOT_OK(internal::DecimalFromPythonDecimal(object, decimal_type, &value)); - RETURN_NOT_OK(builder.Append(value)); - } else if (PandasObjectIsNull(object)) { - RETURN_NOT_OK(builder.AppendNull()); - } else { + for (PyObject* object : objects) { + if (ARROW_PREDICT_FALSE(!internal::PyDecimal_Check(object))) { std::stringstream ss; ss << "Error converting from Python objects to Decimal: "; RETURN_NOT_OK(InvalidConversion(object, "decimal.Decimal", &ss)); return Status::Invalid(ss.str()); } + + if (PandasObjectIsNull(object)) { + RETURN_NOT_OK(builder.AppendNull()); + } else { + Decimal128 value; + RETURN_NOT_OK(internal::DecimalFromPythonDecimal(object, decimal_type, &value)); + RETURN_NOT_OK(builder.Append(value)); + } } return PushBuilderResult(&builder); } @@ -1045,18 +1014,13 @@ Status NumPyConverter::ConvertObjectsInfer() { objects.Init(arr_); PyDateTime_IMPORT; - OwnedRef decimal; - OwnedRef Decimal; - RETURN_NOT_OK(internal::ImportModule("decimal", &decimal)); - RETURN_NOT_OK(internal::ImportFromModule(decimal, "Decimal", &Decimal)); - for (int64_t i = 0; i < length_; ++i) { PyObject* obj = objects[i]; if (PandasObjectIsNull(obj)) { continue; } else if (PyObject_is_string(obj)) { return ConvertObjectStrings(); - } else if (PyObject_is_float(obj)) { + } else if (PyFloat_Check(obj)) { return ConvertObjectFloats(); } else if (PyBool_Check(obj)) { return ConvertBooleans(); @@ -1069,7 +1033,7 @@ Status NumPyConverter::ConvertObjectsInfer() { return ConvertDateTimes(); } else if (PyTime_Check(obj)) { return ConvertTimes(); - } else if (PyObject_IsInstance(const_cast(obj), Decimal.obj())) { + } else if (internal::PyDecimal_Check(obj)) { return ConvertDecimals(); } else if (PyList_Check(obj)) { std::shared_ptr inferred_type; diff --git a/cpp/src/arrow/python/python-test.cc b/cpp/src/arrow/python/python-test.cc index b76caaecee6..f0bd015f037 100644 --- a/cpp/src/arrow/python/python-test.cc +++ b/cpp/src/arrow/python/python-test.cc @@ -15,10 +15,10 @@ // specific language governing permissions and limitations // under the License. -#include "gtest/gtest.h" - #include +#include + #include "arrow/python/platform.h" #include "arrow/array.h" @@ -33,15 +33,6 @@ namespace arrow { namespace py { -TEST(PyBuffer, InvalidInputObject) { - std::shared_ptr res; - PyObject* input = Py_None; - auto old_refcnt = Py_REFCNT(input); - ASSERT_RAISES(PythonError, PyBuffer::FromPyObject(input, &res)); - PyErr_Clear(); - ASSERT_EQ(old_refcnt, Py_REFCNT(input)); -} - TEST(OwnedRef, TestMoves) { PyAcquireGIL lock; std::vector vec; @@ -78,12 +69,13 @@ TEST(OwnedRefNoGIL, TestMoves) { class DecimalTest : public ::testing::Test { public: - DecimalTest() : lock_(), decimal_module_(), decimal_constructor_() { - auto s = internal::ImportModule("decimal", &decimal_module_); + DecimalTest() : lock_(), decimal_constructor_() { + OwnedRef decimal_module; + auto s = internal::ImportModule("decimal", &decimal_module); DCHECK(s.ok()) << s.message(); - DCHECK_NE(decimal_module_.obj(), NULLPTR); + DCHECK_NE(decimal_module.obj(), NULLPTR); - s = internal::ImportFromModule(decimal_module_, "Decimal", &decimal_constructor_); + s = internal::ImportFromModule(decimal_module, "Decimal", &decimal_constructor_); DCHECK(s.ok()) << s.message(); DCHECK_NE(decimal_constructor_.obj(), NULLPTR); @@ -94,16 +86,26 @@ class DecimalTest : public ::testing::Test { return ref; } + PyObject* decimal_constructor() const { return decimal_constructor_.obj(); } + private: PyAcquireGIL lock_; - OwnedRef decimal_module_; OwnedRef decimal_constructor_; }; +TEST(PyBuffer, InvalidInputObject) { + std::shared_ptr res; + PyObject* input = Py_None; + auto old_refcnt = Py_REFCNT(input); + ASSERT_RAISES(PythonError, PyBuffer::FromPyObject(input, &res)); + PyErr_Clear(); + ASSERT_EQ(old_refcnt, Py_REFCNT(input)); +} + TEST_F(DecimalTest, TestPythonDecimalToString) { std::string decimal_string("-39402950693754869342983"); - OwnedRef python_object = this->CreatePythonDecimal(decimal_string); + OwnedRef python_object(this->CreatePythonDecimal(decimal_string)); ASSERT_NE(python_object.obj(), nullptr); std::string string_result; @@ -114,35 +116,29 @@ TEST_F(DecimalTest, TestInferPrecisionAndScale) { std::string decimal_string("-394029506937548693.42983"); OwnedRef python_decimal(this->CreatePythonDecimal(decimal_string)); - int32_t precision; - int32_t scale; - - ASSERT_OK( - internal::InferDecimalPrecisionAndScale(python_decimal.obj(), &precision, &scale)); + internal::DecimalMetadata metadata; + ASSERT_OK(metadata.Update(python_decimal.obj())); const auto expected_precision = static_cast(decimal_string.size() - 2); // 1 for -, 1 for . const int32_t expected_scale = 5; - ASSERT_EQ(expected_precision, precision); - ASSERT_EQ(expected_scale, scale); + ASSERT_EQ(expected_precision, metadata.precision()); + ASSERT_EQ(expected_scale, metadata.scale()); } TEST_F(DecimalTest, TestInferPrecisionAndNegativeScale) { std::string decimal_string("-3.94042983E+10"); OwnedRef python_decimal(this->CreatePythonDecimal(decimal_string)); - int32_t precision; - int32_t scale; - - ASSERT_OK( - internal::InferDecimalPrecisionAndScale(python_decimal.obj(), &precision, &scale)); + internal::DecimalMetadata metadata; + ASSERT_OK(metadata.Update(python_decimal.obj())); const auto expected_precision = 9; const int32_t expected_scale = -2; - ASSERT_EQ(expected_precision, precision); - ASSERT_EQ(expected_scale, scale); + ASSERT_EQ(expected_precision, metadata.precision()); + ASSERT_EQ(expected_scale, metadata.scale()); } TEST(PandasConversionTest, TestObjectBlockWriteFails) { @@ -226,14 +222,12 @@ TEST_F(DecimalTest, FromPythonDecimalRescaleTruncateable) { TEST_F(DecimalTest, TestOverflowFails) { Decimal128 value; - int32_t precision; - int32_t scale; OwnedRef python_decimal( this->CreatePythonDecimal("9999999999999999999999999999999999999.9")); - ASSERT_OK( - internal::InferDecimalPrecisionAndScale(python_decimal.obj(), &precision, &scale)); - ASSERT_EQ(38, precision); - ASSERT_EQ(1, scale); + internal::DecimalMetadata metadata; + ASSERT_OK(metadata.Update(python_decimal.obj())); + ASSERT_EQ(38, metadata.precision()); + ASSERT_EQ(1, metadata.scale()); auto type = ::arrow::decimal(38, 38); const auto& decimal_type = static_cast(*type); @@ -241,5 +235,63 @@ TEST_F(DecimalTest, TestOverflowFails) { decimal_type, &value)); } +TEST_F(DecimalTest, TestNoneAndNaN) { + OwnedRef list_ref(PyList_New(4)); + PyObject* list = list_ref.obj(); + + ASSERT_NE(list, nullptr); + + PyObject* constructor = this->decimal_constructor(); + PyObject* decimal_value = internal::DecimalFromString(constructor, "1.234"); + ASSERT_NE(decimal_value, nullptr); + + Py_INCREF(Py_None); + PyObject* missing_value1 = Py_None; + ASSERT_NE(missing_value1, nullptr); + + PyObject* missing_value2 = PyFloat_FromDouble(NPY_NAN); + ASSERT_NE(missing_value2, nullptr); + + PyObject* missing_value3 = internal::DecimalFromString(constructor, "nan"); + ASSERT_NE(missing_value3, nullptr); + + // This steals a reference to each object, so we don't need to decref them later, + // just the list + ASSERT_EQ(PyList_SetItem(list, 0, decimal_value), 0); + ASSERT_EQ(PyList_SetItem(list, 1, missing_value1), 0); + ASSERT_EQ(PyList_SetItem(list, 2, missing_value2), 0); + ASSERT_EQ(PyList_SetItem(list, 3, missing_value3), 0); + + MemoryPool* pool = default_memory_pool(); + std::shared_ptr arr; + ASSERT_OK(ConvertPySequence(list, pool, &arr)); + ASSERT_TRUE(arr->IsValid(0)); + ASSERT_TRUE(arr->IsNull(1)); + ASSERT_TRUE(arr->IsNull(2)); + ASSERT_TRUE(arr->IsNull(3)); +} + +TEST_F(DecimalTest, TestMixedPrecisionAndScale) { + PyObject* value2 = internal::DecimalFromString(this->decimal_constructor(), "0.001"); + PyObject* value1 = internal::DecimalFromString(this->decimal_constructor(), "1.01E5"); + + OwnedRef list_ref(PyList_New(2)); + PyObject* list = list_ref.obj(); + + ASSERT_NE(list, nullptr); + ASSERT_EQ(PyList_SetItem(list, 0, value1), 0); + ASSERT_EQ(PyList_SetItem(list, 1, value2), 0); + + MemoryPool* pool = default_memory_pool(); + std::shared_ptr arr; + ASSERT_OK(ConvertPySequence(list, pool, &arr)); + const auto& type = static_cast(*arr->type()); + + int32_t expected_precision = 9; + int32_t expected_scale = 3; + ASSERT_EQ(expected_precision, type.precision()); + ASSERT_EQ(expected_scale, type.scale()); +} + } // namespace py } // namespace arrow diff --git a/cpp/src/arrow/util/decimal-test.cc b/cpp/src/arrow/util/decimal-test.cc index e4406747d55..f3f348cf692 100644 --- a/cpp/src/arrow/util/decimal-test.cc +++ b/cpp/src/arrow/util/decimal-test.cc @@ -14,7 +14,6 @@ // KIND, either express or implied. See the License for the // specific language governing permissions and limitations // under the License. -// #include #include diff --git a/python/pyarrow/tests/test_convert_builtin.py b/python/pyarrow/tests/test_convert_builtin.py index 8423ff00b67..516431a74a8 100644 --- a/python/pyarrow/tests/test_convert_builtin.py +++ b/python/pyarrow/tests/test_convert_builtin.py @@ -639,3 +639,10 @@ def test_structarray_from_arrays_coerce(): pa.StructArray.from_arrays(arrays) assert result.equals(expected) + + +def test_decimal_array_with_none_and_nan(): + values = [decimal.Decimal('1.234'), None, np.nan, decimal.Decimal('nan')] + array = pa.array(values) + assert array.type == pa.decimal128(4, 3) + assert array.to_pylist() == values[:2] + [None, None]