From 8e816ec25c2b75966f2adeaba9e2951f4dddf9f6 Mon Sep 17 00:00:00 2001 From: Phillip Cloud Date: Wed, 14 Feb 2018 17:43:21 -0500 Subject: [PATCH 01/31] ARROW-2145: [Python] Decimal conversion not working for NaN values --- cpp/CMakeLists.txt | 6 +- cpp/cmake_modules/ThirdpartyToolchain.cmake | 22 +- cpp/src/arrow/python/arrow_to_pandas.cc | 13 +- cpp/src/arrow/python/builtin_convert.cc | 46 +++- cpp/src/arrow/python/helpers.cc | 90 +++++- cpp/src/arrow/python/helpers.h | 76 ++++- cpp/src/arrow/python/numpy-internal.h | 3 + cpp/src/arrow/python/numpy_to_arrow.cc | 76 ++--- cpp/src/arrow/python/python-test.cc | 192 ++++++++++--- cpp/src/arrow/util/decimal-test.cc | 73 ++--- cpp/src/arrow/util/decimal.cc | 276 ++++++++----------- cpp/src/arrow/util/decimal.h | 2 +- python/pyarrow/tests/test_convert_builtin.py | 7 + python/pyarrow/tests/test_convert_pandas.py | 10 + 14 files changed, 574 insertions(+), 318 deletions(-) diff --git a/cpp/CMakeLists.txt b/cpp/CMakeLists.txt index 8c0e95634fc..47692a831fa 100644 --- a/cpp/CMakeLists.txt +++ b/cpp/CMakeLists.txt @@ -574,11 +574,13 @@ set(ARROW_LINK_LIBS set(ARROW_SHARED_PRIVATE_LINK_LIBS ${BOOST_SYSTEM_LIBRARY} - ${BOOST_FILESYSTEM_LIBRARY}) + ${BOOST_FILESYSTEM_LIBRARY} + ${BOOST_REGEX_LIBRARY}) set(ARROW_STATIC_PRIVATE_LINK_LIBS ${BOOST_SYSTEM_LIBRARY} - ${BOOST_FILESYSTEM_LIBRARY}) + ${BOOST_FILESYSTEM_LIBRARY} + ${BOOST_REGEX_LIBRARY}) if (NOT MSVC) set(ARROW_LINK_LIBS diff --git a/cpp/cmake_modules/ThirdpartyToolchain.cmake b/cpp/cmake_modules/ThirdpartyToolchain.cmake index 944ca1d3bb1..4103af41b53 100644 --- a/cpp/cmake_modules/ThirdpartyToolchain.cmake +++ b/cpp/cmake_modules/ThirdpartyToolchain.cmake @@ -157,8 +157,11 @@ if (ARROW_BOOST_VENDORED) "${BOOST_LIB_DIR}/${CMAKE_STATIC_LIBRARY_PREFIX}boost_system${CMAKE_STATIC_LIBRARY_SUFFIX}") set(BOOST_STATIC_FILESYSTEM_LIBRARY "${BOOST_LIB_DIR}/${CMAKE_STATIC_LIBRARY_PREFIX}boost_filesystem${CMAKE_STATIC_LIBRARY_SUFFIX}") + set(BOOST_STATIC_REGEX_LIBRARY + "${BOOST_LIB_DIR}/${CMAKE_STATIC_LIBRARY_PREFIX}boost_regex${CMAKE_STATIC_LIBRARY_SUFFIX}") set(BOOST_SYSTEM_LIBRARY "${BOOST_STATIC_SYSTEM_LIBRARY}") set(BOOST_FILESYSTEM_LIBRARY "${BOOST_STATIC_FILESYSTEM_LIBRARY}") + set(BOOST_REGEX_LIBRARY "${BOOST_STATIC_REGEX_LIBRARY}") if (ARROW_BOOST_HEADER_ONLY) set(BOOST_BUILD_PRODUCTS) set(BOOST_CONFIGURE_COMMAND "") @@ -166,7 +169,8 @@ if (ARROW_BOOST_VENDORED) else() set(BOOST_BUILD_PRODUCTS ${BOOST_SYSTEM_LIBRARY} - ${BOOST_FILESYSTEM_LIBRARY}) + ${BOOST_FILESYSTEM_LIBRARY} + ${BOOST_REGEX_LIBRARY}) set(BOOST_CONFIGURE_COMMAND "./bootstrap.sh" "--prefix=${BOOST_PREFIX}" @@ -210,16 +214,19 @@ else() if (ARROW_BOOST_HEADER_ONLY) find_package(Boost REQUIRED) else() - find_package(Boost COMPONENTS system filesystem REQUIRED) + find_package(Boost COMPONENTS system filesystem regex REQUIRED) if ("${CMAKE_BUILD_TYPE}" STREQUAL "DEBUG") set(BOOST_SHARED_SYSTEM_LIBRARY ${Boost_SYSTEM_LIBRARY_DEBUG}) set(BOOST_SHARED_FILESYSTEM_LIBRARY ${Boost_FILESYSTEM_LIBRARY_DEBUG}) + set(BOOST_SHARED_REGEX_LIBRARY ${Boost_REGEX_LIBRARY_DEBUG}) else() set(BOOST_SHARED_SYSTEM_LIBRARY ${Boost_SYSTEM_LIBRARY_RELEASE}) set(BOOST_SHARED_FILESYSTEM_LIBRARY ${Boost_FILESYSTEM_LIBRARY_RELEASE}) + set(BOOST_SHARED_REGEX_LIBRARY ${Boost_REGEX_LIBRARY_RELEASE}) endif() set(BOOST_SYSTEM_LIBRARY boost_system_shared) set(BOOST_FILESYSTEM_LIBRARY boost_filesystem_shared) + set(BOOST_REGEX_LIBRARY boost_regex_shared) endif() else() # Find static boost headers and libs @@ -228,16 +235,19 @@ else() if (ARROW_BOOST_HEADER_ONLY) find_package(Boost REQUIRED) else() - find_package(Boost COMPONENTS system filesystem REQUIRED) + find_package(Boost COMPONENTS system filesystem regex REQUIRED) if ("${CMAKE_BUILD_TYPE}" STREQUAL "DEBUG") set(BOOST_STATIC_SYSTEM_LIBRARY ${Boost_SYSTEM_LIBRARY_DEBUG}) set(BOOST_STATIC_FILESYSTEM_LIBRARY ${Boost_FILESYSTEM_LIBRARY_DEBUG}) + set(BOOST_STATIC_REGEX_LIBRARY ${Boost_REGEX_LIBRARY_DEBUG}) else() set(BOOST_STATIC_SYSTEM_LIBRARY ${Boost_SYSTEM_LIBRARY_RELEASE}) set(BOOST_STATIC_FILESYSTEM_LIBRARY ${Boost_FILESYSTEM_LIBRARY_RELEASE}) + set(BOOST_STATIC_REGEX_LIBRARY ${Boost_REGEX_LIBRARY_RELEASE}) endif() set(BOOST_SYSTEM_LIBRARY boost_system_static) set(BOOST_FILESYSTEM_LIBRARY boost_filesystem_static) + set(BOOST_REGEX_LIBRARY boost_regex_static) endif() endif() endif() @@ -254,7 +264,11 @@ if (NOT ARROW_BOOST_HEADER_ONLY) STATIC_LIB "${BOOST_STATIC_FILESYSTEM_LIBRARY}" SHARED_LIB "${BOOST_SHARED_FILESYSTEM_LIBRARY}") - SET(ARROW_BOOST_LIBS boost_system boost_filesystem) + ADD_THIRDPARTY_LIB(boost_regex + STATIC_LIB "${BOOST_STATIC_REGEX_LIBRARY}" + SHARED_LIB "${BOOST_SHARED_REGEX_LIBRARY}") + + SET(ARROW_BOOST_LIBS boost_system boost_filesystem boost_regex) endif() include_directories(SYSTEM ${Boost_INCLUDE_DIR}) diff --git a/cpp/src/arrow/python/arrow_to_pandas.cc b/cpp/src/arrow/python/arrow_to_pandas.cc index aefd4d76dfa..17b87bf4eca 100644 --- a/cpp/src/arrow/python/arrow_to_pandas.cc +++ b/cpp/src/arrow/python/arrow_to_pandas.cc @@ -640,11 +640,11 @@ static Status ConvertTimes(PandasOptions options, const ChunkedArray& data, static Status ConvertDecimals(PandasOptions options, const ChunkedArray& data, PyObject** out_values) { PyAcquireGIL lock; - OwnedRef decimal_ref; - OwnedRef Decimal_ref; - RETURN_NOT_OK(internal::ImportModule("decimal", &decimal_ref)); - RETURN_NOT_OK(internal::ImportFromModule(decimal_ref, "Decimal", &Decimal_ref)); - PyObject* Decimal = Decimal_ref.obj(); + OwnedRef decimal; + OwnedRef Decimal; + RETURN_NOT_OK(internal::ImportModule("decimal", &decimal)); + RETURN_NOT_OK(internal::ImportFromModule(decimal, "Decimal", &Decimal)); + PyObject* decimal_constructor = Decimal.obj(); for (int c = 0; c < data.num_chunks(); c++) { const auto& arr = static_cast(*data.chunk(c)); @@ -654,7 +654,8 @@ static Status ConvertDecimals(PandasOptions options, const ChunkedArray& data, Py_INCREF(Py_None); *out_values++ = Py_None; } else { - *out_values++ = internal::DecimalFromString(Decimal, arr.FormatValue(i)); + *out_values++ = + internal::DecimalFromString(decimal_constructor, arr.FormatValue(i)); RETURN_IF_PYERROR(); } } diff --git a/cpp/src/arrow/python/builtin_convert.cc b/cpp/src/arrow/python/builtin_convert.cc index a286c6bd5e9..891793cc9dc 100644 --- a/cpp/src/arrow/python/builtin_convert.cc +++ b/cpp/src/arrow/python/builtin_convert.cc @@ -76,7 +76,18 @@ class ScalarVisitor { timestamp_count_(0), float_count_(0), binary_count_(0), - unicode_count_(0) {} + unicode_count_(0), + decimal_count_(0), + max_decimal_metadata_(std::numeric_limits::min(), + std::numeric_limits::min()), + decimal_type_() { + OwnedRefNoGIL decimal_module; + Status status = ::arrow::py::internal::ImportModule("decimal", &decimal_module); + DCHECK(status.ok()) << "Unable to import decimal module"; + status = ::arrow::py::internal::ImportFromModule(decimal_module, "Decimal", + &decimal_type_); + DCHECK(status.ok()) << "Unable to import decimal.Decimal"; + } Status Visit(PyObject* obj) { ++total_count_; @@ -111,10 +122,16 @@ class ScalarVisitor { ss << type->ToString(); return Status::Invalid(ss.str()); } + } else if (PyObject_IsInstance(obj, decimal_type_.obj())) { + // Don't infer anything if we encounter a Decimal('nan') + if (!internal::PyDecimal_ISNAN(obj)) { + RETURN_NOT_OK(max_decimal_metadata_.Update(obj)); + } + ++decimal_count_; } else { // TODO(wesm): accumulate error information somewhere static std::string supported_types = - "bool, float, integer, date, datetime, bytes, unicode"; + "bool, float, integer, date, datetime, bytes, unicode, decimal"; std::stringstream ss; ss << "Error inferring Arrow data type for collection of Python objects. "; RETURN_NOT_OK(InvalidConversion(obj, supported_types, &ss)); @@ -125,7 +142,9 @@ class ScalarVisitor { std::shared_ptr GetType() { // TODO(wesm): handling mixed-type cases - if (float_count_) { + if (decimal_count_) { + return decimal(max_decimal_metadata_.precision(), max_decimal_metadata_.scale()); + } else if (float_count_) { return float64(); } else if (int_count_) { // TODO(wesm): tighter type later @@ -157,8 +176,13 @@ class ScalarVisitor { int64_t float_count_; int64_t binary_count_; int64_t unicode_count_; + int64_t decimal_count_; + + internal::DecimalMetadata max_decimal_metadata_; + // Place to accumulate errors // std::vector errors_; + OwnedRefNoGIL decimal_type_; }; static constexpr int MAX_NESTING_LEVELS = 32; @@ -379,17 +403,14 @@ class TypedConverter : public SeqConverter { BuilderType* typed_builder_; }; -// We use the CRTP trick here to devirtualize the AppendItem() and AppendNull() +// We use the CRTP trick here to devirtualize the AppendItem(), AppendNull(), and IsNull() // method calls. template class TypedConverterVisitor : public TypedConverter { public: Status AppendSingle(PyObject* obj) override { - if (obj == Py_None) { - return static_cast(this)->AppendNull(); - } else { - return static_cast(this)->AppendItem(obj); - } + auto self = static_cast(this); + return self->IsNull(obj) ? self->AppendNull() : self->AppendItem(obj); } Status AppendMultiple(PyObject* obj, int64_t size) override { @@ -409,6 +430,7 @@ class TypedConverterVisitor : public TypedConverter { // Append a missing item (default implementation) Status AppendNull() { return this->typed_builder_->AppendNull(); } + bool IsNull(PyObject* obj) const { return obj == Py_None; } }; class NullConverter : public TypedConverterVisitor { @@ -830,12 +852,16 @@ class DecimalConverter public: // Append a non-missing item Status AppendItem(PyObject* obj) { - /// TODO(phillipc): Check for nan? Decimal128 value; const auto& type = static_cast(*typed_builder_->type()); RETURN_NOT_OK(internal::DecimalFromPythonDecimal(obj, type, &value)); return typed_builder_->Append(value); } + + bool IsNull(PyObject* obj) const { + return obj == Py_None || obj == numpy_nan || internal::PyFloat_isnan(obj) || + (internal::PyDecimal_Check(obj) && internal::PyDecimal_ISNAN(obj)); + } }; // Dynamic constructor for sequence converters diff --git a/cpp/src/arrow/python/helpers.cc b/cpp/src/arrow/python/helpers.cc index df1db99911b..efbcdf9b4db 100644 --- a/cpp/src/arrow/python/helpers.cc +++ b/cpp/src/arrow/python/helpers.cc @@ -61,6 +61,7 @@ namespace internal { Status ImportModule(const std::string& module_name, OwnedRef* ref) { PyObject* module = PyImport_ImportModule(module_name.c_str()); RETURN_IF_PYERROR(); + DCHECK_NE(module, nullptr) << "unable to import the " << module_name << " module"; ref->reset(module); return Status::OK(); } @@ -71,6 +72,7 @@ Status ImportFromModule(const OwnedRef& module, const std::string& name, OwnedRe PyObject* attr = PyObject_GetAttrString(module.obj(), name.c_str()); RETURN_IF_PYERROR(); + DCHECK_NE(attr, nullptr) << "unable to import the " << name << " object"; ref->reset(attr); return Status::OK(); } @@ -93,13 +95,19 @@ Status PythonDecimalToString(PyObject* python_decimal, std::string* out) { return Status::OK(); } -Status InferDecimalPrecisionAndScale(PyObject* python_decimal, int32_t* precision, - int32_t* scale) { +// \brief Infer the precision and scale of a Python decimal.Decimal instance +// \param python_decimal[in] An instance of decimal.Decimal +// \param precision[out] The value of the inferred precision +// \param scale[out] The value of the inferred scale +// \return The status of the operation +static Status InferDecimalPrecisionAndScale(PyObject* python_decimal, int32_t* precision, + int32_t* scale) { DCHECK_NE(python_decimal, NULLPTR); DCHECK_NE(precision, NULLPTR); DCHECK_NE(scale, NULLPTR); - OwnedRef as_tuple(PyObject_CallMethod(python_decimal, "as_tuple", "()")); + // TODO(phillipc): Make sure we perform PyDecimal_Check(python_decimal) as a DCHECK + OwnedRef as_tuple(PyObject_CallMethod(python_decimal, "as_tuple", "")); RETURN_IF_PYERROR(); DCHECK(PyTuple_Check(as_tuple.obj())); @@ -117,8 +125,23 @@ Status InferDecimalPrecisionAndScale(PyObject* python_decimal, int32_t* precisio const auto exponent = static_cast(PyLong_AsLong(py_exponent.obj())); RETURN_IF_PYERROR(); - *precision = num_digits; - *scale = -exponent; + const int32_t abs_exponent = std::abs(exponent); + + int32_t num_additional_zeros; + + if (num_digits <= abs_exponent) { + DCHECK_NE(exponent, 0) << "exponent should never be zero here"; + + // we have leading/trailing zeros, leading if exponent is negative + num_additional_zeros = exponent < 0 ? abs_exponent - num_digits : exponent; + *scale = static_cast(exponent < 0) * -exponent; + } else { + // we can use the number of digits as the precision + num_additional_zeros = 0; + *scale = -exponent; + } + + *precision = num_digits + num_additional_zeros; return Status::OK(); } @@ -193,6 +216,63 @@ Status UInt64FromPythonInt(PyObject* obj, uint64_t* out) { return Status::OK(); } +bool PyFloat_isnan(PyObject* obj) { + return PyFloat_Check(obj) && std::isnan(PyFloat_AS_DOUBLE(obj)); +} + +bool PyDecimal_Check(PyObject* obj) { + // TODO(phillipc): Is this expensive? + OwnedRef Decimal; + OwnedRef decimal; + Status status = ImportModule("decimal", &decimal); + DCHECK(status.ok()) << "Error during import of the decimal module"; + status = ImportFromModule(decimal, "Decimal", &Decimal); + DCHECK(status.ok()) + << "Error during import of the Decimal object from the decimal module"; + const int32_t result = PyObject_IsInstance(obj, Decimal.obj()); + DCHECK_NE(result, -1) << " error during PyObject_IsInstance check"; + return result == 1; +} + +bool PyDecimal_ISNAN(PyObject* obj) { + DCHECK(PyDecimal_Check(obj)) << "obj is not an instance of decimal.Decimal"; + OwnedRef is_nan(PyObject_CallMethod(obj, "is_nan", "")); + return PyObject_IsTrue(is_nan.obj()) == 1; +} + +DecimalMetadata::DecimalMetadata() + : DecimalMetadata(std::numeric_limits::min(), + std::numeric_limits::min()) {} + +DecimalMetadata::DecimalMetadata(int32_t precision, int32_t scale) + : precision_(precision), scale_(scale) {} + +Status DecimalMetadata::Update(int32_t suggested_precision, int32_t suggested_scale) { + const int32_t current_precision = precision_; + precision_ = std::max(current_precision, suggested_precision); + + const int32_t current_scale = scale_; + scale_ = std::max(current_scale, suggested_scale); + + // if our suggested scale is zero and we don't yet have enough precision then we need to + // add whatever the current scale is to the precision + if (suggested_scale == 0 && suggested_precision > current_precision) { + precision_ += scale_; + } + + return Status::OK(); +} + +Status DecimalMetadata::Update(PyObject* object) { + DCHECK(PyDecimal_Check(object)) << "Object is not a Python Decimal"; + DCHECK(!PyDecimal_ISNAN(object)) + << "Decimal object cannot be NAN when inferring precision and scale"; + int32_t precision; + int32_t scale; + RETURN_NOT_OK(InferDecimalPrecisionAndScale(object, &precision, &scale)); + return Update(precision, scale); +} + } // namespace internal } // namespace py } // namespace arrow diff --git a/cpp/src/arrow/python/helpers.h b/cpp/src/arrow/python/helpers.h index c0171aa2f5a..d39c62824c2 100644 --- a/cpp/src/arrow/python/helpers.h +++ b/cpp/src/arrow/python/helpers.h @@ -36,29 +36,89 @@ namespace py { class OwnedRef; -ARROW_EXPORT -std::shared_ptr GetPrimitiveType(Type::type type); +// \brief Get an arrow DataType instance from Arrow's Type::type enum +// \param[in] type One of the values of Arrow's Type::type enum +// \return A shared pointer to DataType +ARROW_EXPORT std::shared_ptr GetPrimitiveType(Type::type type); namespace internal { +// \brief Import a Python module +// \param[in] module_name The name of the module +// \param[out] ref The OwnedRef containing the module PyObject* Status ImportModule(const std::string& module_name, OwnedRef* ref); -Status ImportFromModule(const OwnedRef& module, const std::string& module_name, - OwnedRef* ref); +// \brief Import an object from a Python module +// \param[in] module A Python module +// \param[in] name The name of the object to import +// \param[out] ref The OwnedRef containing the \c name attribute of the Python module \c +// module +Status ImportFromModule(const OwnedRef& module, const std::string& name, OwnedRef* ref); + +// \brief Convert a Python Decimal object to a C++ string +// \param[in] python_decimal A Python decimal.Decimal instance +// \param[out] The string representation of the Python Decimal instance +// \return The status of the operation Status PythonDecimalToString(PyObject* python_decimal, std::string* out); -Status InferDecimalPrecisionAndScale(PyObject* python_decimal, - int32_t* precision = NULLPTR, - int32_t* scale = NULLPTR); - +// \brief Convert a C++ std::string to a Python Decimal instance +// \param[in] decimal_constructor The decimal type object +// \param[in] decimal_string A decimal string +// \return An instance of decimal.Decimal PyObject* DecimalFromString(PyObject* decimal_constructor, const std::string& decimal_string); + +// \brief Convert a Python decimal to an Arrow Decimal128 object +// \param[in] python_decimal A Python decimal.Decimal instance +// \param[in] arrow_type An instance of arrow::DecimalType +// \param[out] out A pointer to a Decimal128 +// \return The status of the operation Status DecimalFromPythonDecimal(PyObject* python_decimal, const DecimalType& arrow_type, Decimal128* out); + +// \brief Check whether obj is an integer, independent of Python versions. bool IsPyInteger(PyObject* obj); +// \brief Check whether obj is nan +bool PyFloat_isnan(PyObject* obj); + +// \brief Check whether obj is an instance of Decimal +bool PyDecimal_Check(PyObject* obj); + +// \brief Check whether obj is nan. This function will abort the program if the argument +// is not a Decimal instance +bool PyDecimal_ISNAN(PyObject* obj); + +// \brief Convert a Python integer into an unsigned 64-bit integer +// \param[in] obj A Python integer +// \param[out] out A pointer to a C uint64_t to hold the result of the conversion +// \return The status of the operation Status UInt64FromPythonInt(PyObject* obj, uint64_t* out); +// \brief Helper class to track and update the precision and scale of a decimal +class DecimalMetadata { + public: + DecimalMetadata(); + DecimalMetadata(int32_t precision, int32_t scale); + + // \brief Adjust the precision and scale of a decimal type given a new precision and a + // new scale \param[in] suggested_precision A candidate precision \param[in] + // suggested_scale A candidate scale \return The status of the operation + Status Update(int32_t suggested_precision, int32_t suggested_scale); + + // \brief A convenient interface for updating the precision and scale based on a Python + // Decimal object \param object A Python Decimal object \return The status of the + // operation + Status Update(PyObject* object); + + int32_t precision() const { return precision_; } + int32_t scale() const { return scale_; } + + private: + int32_t precision_; + int32_t scale_; +}; + } // namespace internal } // namespace py } // namespace arrow diff --git a/cpp/src/arrow/python/numpy-internal.h b/cpp/src/arrow/python/numpy-internal.h index 6c9c871a100..8d4308065c2 100644 --- a/cpp/src/arrow/python/numpy-internal.h +++ b/cpp/src/arrow/python/numpy-internal.h @@ -54,6 +54,9 @@ class Ndarray1DIndexer { T* data() const { return data_; } + T* begin() const { return data(); } + T* end() const { return begin() + size() * stride_; } + bool is_strided() const { return stride_ == 1; } T& operator[](size_type index) { return data_[index * stride_]; } diff --git a/cpp/src/arrow/python/numpy_to_arrow.cc b/cpp/src/arrow/python/numpy_to_arrow.cc index 23418ad920c..79a911ba457 100644 --- a/cpp/src/arrow/python/numpy_to_arrow.cc +++ b/cpp/src/arrow/python/numpy_to_arrow.cc @@ -67,17 +67,9 @@ constexpr int64_t kBinaryMemoryLimit = std::numeric_limits::max(); namespace { -inline bool PyFloat_isnan(PyObject* obj) { - if (PyFloat_Check(obj)) { - double val = PyFloat_AS_DOUBLE(obj); - return val != val; - } else { - return false; - } -} - inline bool PandasObjectIsNull(PyObject* obj) { - return obj == Py_None || obj == numpy_nan || PyFloat_isnan(obj); + return obj == Py_None || obj == numpy_nan || internal::PyFloat_isnan(obj) || + (internal::PyDecimal_Check(obj) && internal::PyDecimal_ISNAN(obj)); } inline bool PyObject_is_string(PyObject* obj) { @@ -88,10 +80,8 @@ inline bool PyObject_is_string(PyObject* obj) { #endif } -inline bool PyObject_is_float(PyObject* obj) { return PyFloat_Check(obj); } - inline bool PyObject_is_integer(PyObject* obj) { - return (!PyBool_Check(obj)) && PyArray_IsIntegerScalar(obj); + return !PyBool_Check(obj) && PyArray_IsIntegerScalar(obj); } template @@ -743,59 +733,38 @@ Status NumPyConverter::ConvertDates() { Status NumPyConverter::ConvertDecimals() { PyAcquireGIL lock; - // Import the decimal module and Decimal class - OwnedRef decimal; - OwnedRef Decimal; - RETURN_NOT_OK(internal::ImportModule("decimal", &decimal)); - RETURN_NOT_OK(internal::ImportFromModule(decimal, "Decimal", &Decimal)); - + internal::DecimalMetadata max_decimal_metadata; Ndarray1DIndexer objects(arr_); - PyObject* object = objects[0]; if (type_ == NULLPTR) { - int32_t precision; - int32_t desired_scale; - - int32_t tmp_precision; - int32_t tmp_scale; - - RETURN_NOT_OK( - internal::InferDecimalPrecisionAndScale(objects[0], &precision, &desired_scale)); - - for (int64_t i = 1; i < length_; ++i) { - RETURN_NOT_OK(internal::InferDecimalPrecisionAndScale(objects[i], &tmp_precision, - &tmp_scale)); - precision = std::max(precision, tmp_precision); - - if (std::abs(desired_scale) < std::abs(tmp_scale)) { - desired_scale = tmp_scale; - } + for (PyObject* object : objects) { + RETURN_NOT_OK(max_decimal_metadata.Update(object)); } - type_ = ::arrow::decimal(precision, desired_scale); + type_ = + ::arrow::decimal(max_decimal_metadata.precision(), max_decimal_metadata.scale()); } Decimal128Builder builder(type_, pool_); RETURN_NOT_OK(builder.Resize(length_)); const auto& decimal_type = static_cast(*type_); - PyObject* Decimal_type_object = Decimal.obj(); - - for (int64_t i = 0; i < length_; ++i) { - object = objects[i]; - if (PyObject_IsInstance(object, Decimal_type_object)) { - Decimal128 value; - RETURN_NOT_OK(internal::DecimalFromPythonDecimal(object, decimal_type, &value)); - RETURN_NOT_OK(builder.Append(value)); - } else if (PandasObjectIsNull(object)) { - RETURN_NOT_OK(builder.AppendNull()); - } else { + for (PyObject* object : objects) { + if (ARROW_PREDICT_FALSE(!internal::PyDecimal_Check(object))) { std::stringstream ss; ss << "Error converting from Python objects to Decimal: "; RETURN_NOT_OK(InvalidConversion(object, "decimal.Decimal", &ss)); return Status::Invalid(ss.str()); } + + if (PandasObjectIsNull(object)) { + RETURN_NOT_OK(builder.AppendNull()); + } else { + Decimal128 value; + RETURN_NOT_OK(internal::DecimalFromPythonDecimal(object, decimal_type, &value)); + RETURN_NOT_OK(builder.Append(value)); + } } return PushBuilderResult(&builder); } @@ -1045,18 +1014,13 @@ Status NumPyConverter::ConvertObjectsInfer() { objects.Init(arr_); PyDateTime_IMPORT; - OwnedRef decimal; - OwnedRef Decimal; - RETURN_NOT_OK(internal::ImportModule("decimal", &decimal)); - RETURN_NOT_OK(internal::ImportFromModule(decimal, "Decimal", &Decimal)); - for (int64_t i = 0; i < length_; ++i) { PyObject* obj = objects[i]; if (PandasObjectIsNull(obj)) { continue; } else if (PyObject_is_string(obj)) { return ConvertObjectStrings(); - } else if (PyObject_is_float(obj)) { + } else if (PyFloat_Check(obj)) { return ConvertObjectFloats(); } else if (PyBool_Check(obj)) { return ConvertBooleans(); @@ -1069,7 +1033,7 @@ Status NumPyConverter::ConvertObjectsInfer() { return ConvertDateTimes(); } else if (PyTime_Check(obj)) { return ConvertTimes(); - } else if (PyObject_IsInstance(const_cast(obj), Decimal.obj())) { + } else if (internal::PyDecimal_Check(obj)) { return ConvertDecimals(); } else if (PyList_Check(obj)) { std::shared_ptr inferred_type; diff --git a/cpp/src/arrow/python/python-test.cc b/cpp/src/arrow/python/python-test.cc index b76caaecee6..2c44427866a 100644 --- a/cpp/src/arrow/python/python-test.cc +++ b/cpp/src/arrow/python/python-test.cc @@ -15,10 +15,10 @@ // specific language governing permissions and limitations // under the License. -#include "gtest/gtest.h" - #include +#include + #include "arrow/python/platform.h" #include "arrow/array.h" @@ -33,15 +33,6 @@ namespace arrow { namespace py { -TEST(PyBuffer, InvalidInputObject) { - std::shared_ptr res; - PyObject* input = Py_None; - auto old_refcnt = Py_REFCNT(input); - ASSERT_RAISES(PythonError, PyBuffer::FromPyObject(input, &res)); - PyErr_Clear(); - ASSERT_EQ(old_refcnt, Py_REFCNT(input)); -} - TEST(OwnedRef, TestMoves) { PyAcquireGIL lock; std::vector vec; @@ -78,12 +69,13 @@ TEST(OwnedRefNoGIL, TestMoves) { class DecimalTest : public ::testing::Test { public: - DecimalTest() : lock_(), decimal_module_(), decimal_constructor_() { - auto s = internal::ImportModule("decimal", &decimal_module_); + DecimalTest() : lock_(), decimal_constructor_() { + OwnedRef decimal_module; + auto s = internal::ImportModule("decimal", &decimal_module); DCHECK(s.ok()) << s.message(); - DCHECK_NE(decimal_module_.obj(), NULLPTR); + DCHECK_NE(decimal_module.obj(), NULLPTR); - s = internal::ImportFromModule(decimal_module_, "Decimal", &decimal_constructor_); + s = internal::ImportFromModule(decimal_module, "Decimal", &decimal_constructor_); DCHECK(s.ok()) << s.message(); DCHECK_NE(decimal_constructor_.obj(), NULLPTR); @@ -94,16 +86,26 @@ class DecimalTest : public ::testing::Test { return ref; } + PyObject* decimal_constructor() const { return decimal_constructor_.obj(); } + private: PyAcquireGIL lock_; - OwnedRef decimal_module_; OwnedRef decimal_constructor_; }; +TEST(PyBuffer, InvalidInputObject) { + std::shared_ptr res; + PyObject* input = Py_None; + auto old_refcnt = Py_REFCNT(input); + ASSERT_RAISES(PythonError, PyBuffer::FromPyObject(input, &res)); + PyErr_Clear(); + ASSERT_EQ(old_refcnt, Py_REFCNT(input)); +} + TEST_F(DecimalTest, TestPythonDecimalToString) { std::string decimal_string("-39402950693754869342983"); - OwnedRef python_object = this->CreatePythonDecimal(decimal_string); + OwnedRef python_object(this->CreatePythonDecimal(decimal_string)); ASSERT_NE(python_object.obj(), nullptr); std::string string_result; @@ -114,35 +116,57 @@ TEST_F(DecimalTest, TestInferPrecisionAndScale) { std::string decimal_string("-394029506937548693.42983"); OwnedRef python_decimal(this->CreatePythonDecimal(decimal_string)); - int32_t precision; - int32_t scale; - - ASSERT_OK( - internal::InferDecimalPrecisionAndScale(python_decimal.obj(), &precision, &scale)); + internal::DecimalMetadata metadata; + ASSERT_OK(metadata.Update(python_decimal.obj())); const auto expected_precision = static_cast(decimal_string.size() - 2); // 1 for -, 1 for . const int32_t expected_scale = 5; - ASSERT_EQ(expected_precision, precision); - ASSERT_EQ(expected_scale, scale); + ASSERT_EQ(expected_precision, metadata.precision()); + ASSERT_EQ(expected_scale, metadata.scale()); } TEST_F(DecimalTest, TestInferPrecisionAndNegativeScale) { std::string decimal_string("-3.94042983E+10"); OwnedRef python_decimal(this->CreatePythonDecimal(decimal_string)); - int32_t precision; - int32_t scale; - - ASSERT_OK( - internal::InferDecimalPrecisionAndScale(python_decimal.obj(), &precision, &scale)); + internal::DecimalMetadata metadata; + ASSERT_OK(metadata.Update(python_decimal.obj())); const auto expected_precision = 9; const int32_t expected_scale = -2; - ASSERT_EQ(expected_precision, precision); - ASSERT_EQ(expected_scale, scale); + ASSERT_EQ(expected_precision, metadata.precision()); + ASSERT_EQ(expected_scale, metadata.scale()); +} + +TEST_F(DecimalTest, TestInferAllLeadingZeros) { + std::string decimal_string("0.001"); + OwnedRef python_decimal(this->CreatePythonDecimal(decimal_string)); + + internal::DecimalMetadata metadata; + ASSERT_OK(metadata.Update(python_decimal.obj())); + ASSERT_EQ(3, metadata.precision()); + ASSERT_EQ(3, metadata.scale()); +} + +TEST_F(DecimalTest, TestInferAllLeadingZerosExponentialNotationPositive) { + std::string decimal_string("0.01E5"); + OwnedRef python_decimal(this->CreatePythonDecimal(decimal_string)); + internal::DecimalMetadata metadata; + ASSERT_OK(metadata.Update(python_decimal.obj())); + ASSERT_EQ(4, metadata.precision()); + ASSERT_EQ(0, metadata.scale()); +} + +TEST_F(DecimalTest, TestInferAllLeadingZerosExponentialNotationNegative) { + std::string decimal_string("0.01E3"); + OwnedRef python_decimal(this->CreatePythonDecimal(decimal_string)); + internal::DecimalMetadata metadata; + ASSERT_OK(metadata.Update(python_decimal.obj())); + ASSERT_EQ(2, metadata.precision()); + ASSERT_EQ(0, metadata.scale()); } TEST(PandasConversionTest, TestObjectBlockWriteFails) { @@ -226,14 +250,12 @@ TEST_F(DecimalTest, FromPythonDecimalRescaleTruncateable) { TEST_F(DecimalTest, TestOverflowFails) { Decimal128 value; - int32_t precision; - int32_t scale; OwnedRef python_decimal( this->CreatePythonDecimal("9999999999999999999999999999999999999.9")); - ASSERT_OK( - internal::InferDecimalPrecisionAndScale(python_decimal.obj(), &precision, &scale)); - ASSERT_EQ(38, precision); - ASSERT_EQ(1, scale); + internal::DecimalMetadata metadata; + ASSERT_OK(metadata.Update(python_decimal.obj())); + ASSERT_EQ(38, metadata.precision()); + ASSERT_EQ(1, metadata.scale()); auto type = ::arrow::decimal(38, 38); const auto& decimal_type = static_cast(*type); @@ -241,5 +263,101 @@ TEST_F(DecimalTest, TestOverflowFails) { decimal_type, &value)); } +TEST_F(DecimalTest, TestNoneAndNaN) { + OwnedRef list_ref(PyList_New(4)); + PyObject* list = list_ref.obj(); + + ASSERT_NE(list, nullptr); + + PyObject* constructor = this->decimal_constructor(); + PyObject* decimal_value = internal::DecimalFromString(constructor, "1.234"); + ASSERT_NE(decimal_value, nullptr); + + Py_INCREF(Py_None); + PyObject* missing_value1 = Py_None; + ASSERT_NE(missing_value1, nullptr); + + PyObject* missing_value2 = PyFloat_FromDouble(NPY_NAN); + ASSERT_NE(missing_value2, nullptr); + + PyObject* missing_value3 = internal::DecimalFromString(constructor, "nan"); + ASSERT_NE(missing_value3, nullptr); + + // This steals a reference to each object, so we don't need to decref them later, + // just the list + ASSERT_EQ(PyList_SetItem(list, 0, decimal_value), 0); + ASSERT_EQ(PyList_SetItem(list, 1, missing_value1), 0); + ASSERT_EQ(PyList_SetItem(list, 2, missing_value2), 0); + ASSERT_EQ(PyList_SetItem(list, 3, missing_value3), 0); + + MemoryPool* pool = default_memory_pool(); + std::shared_ptr arr; + ASSERT_OK(ConvertPySequence(list, pool, &arr)); + ASSERT_TRUE(arr->IsValid(0)); + ASSERT_TRUE(arr->IsNull(1)); + ASSERT_TRUE(arr->IsNull(2)); + ASSERT_TRUE(arr->IsNull(3)); +} + +TEST_F(DecimalTest, TestMixedPrecisionAndScale) { + std::vector strings{{"0.001", "1.01E5", "1.01E5"}}; + + OwnedRef list_ref(PyList_New(static_cast(strings.size()))); + PyObject* list = list_ref.obj(); + + ASSERT_NE(list, nullptr); + + // PyList_SetItem steals a reference to the item so we don't decref it later + for (size_t i = 0; i < strings.size(); ++i) { + PyList_SetItem( + list, i, internal::DecimalFromString(this->decimal_constructor(), strings.at(i))); + } + + MemoryPool* pool = default_memory_pool(); + std::shared_ptr arr; + ASSERT_OK(ConvertPySequence(list, pool, &arr)); + const auto& type = static_cast(*arr->type()); + + int32_t expected_precision = 9; + int32_t expected_scale = 3; + ASSERT_EQ(expected_precision, type.precision()); + ASSERT_EQ(expected_scale, type.scale()); +} + +TEST_F(DecimalTest, TestMixedPrecisionAndScaleSequenceConvert) { + PyAcquireGIL lock; + MemoryPool* pool = default_memory_pool(); + std::shared_ptr arr; + + PyObject* value1 = this->CreatePythonDecimal("0.01").detach(); + ASSERT_NE(value1, nullptr); + + PyObject* value2 = this->CreatePythonDecimal("0.001").detach(); + ASSERT_NE(value2, nullptr); + + OwnedRef list_ref(PyList_New(2)); + PyObject* list = list_ref.obj(); + + // This steals a reference to each object, so we don't need to decref them later + // just the list + ASSERT_EQ(PyList_SetItem(list, 0, value1), 0); + ASSERT_EQ(PyList_SetItem(list, 1, value2), 0); + + ASSERT_OK(ConvertPySequence(list, pool, &arr)); + + const auto& type = static_cast(*arr->type()); + ASSERT_EQ(3, type.precision()); + ASSERT_EQ(3, type.scale()); +} + +TEST_F(DecimalTest, SimpleInference) { + OwnedRef value(this->CreatePythonDecimal("0.01")); + ASSERT_NE(value.obj(), nullptr); + internal::DecimalMetadata metadata; + ASSERT_OK(metadata.Update(value.obj())); + ASSERT_EQ(2, metadata.precision()); + ASSERT_EQ(2, metadata.scale()); +} + } // namespace py } // namespace arrow diff --git a/cpp/src/arrow/util/decimal-test.cc b/cpp/src/arrow/util/decimal-test.cc index e4406747d55..6db46d48512 100644 --- a/cpp/src/arrow/util/decimal-test.cc +++ b/cpp/src/arrow/util/decimal-test.cc @@ -14,7 +14,6 @@ // KIND, either express or implied. See the License for the // specific language governing permissions and limitations // under the License. -// #include #include @@ -37,7 +36,7 @@ class DecimalTestFixture : public ::testing::Test { TEST_F(DecimalTestFixture, TestToString) { Decimal128 decimal(this->integer_value_); - int scale = 5; + int32_t scale = 5; std::string result = decimal.ToString(scale); ASSERT_EQ(result, this->string_value_); } @@ -45,7 +44,7 @@ TEST_F(DecimalTestFixture, TestToString) { TEST_F(DecimalTestFixture, TestFromString) { Decimal128 expected(this->integer_value_); Decimal128 result; - int precision, scale; + int32_t precision, scale; ASSERT_OK(Decimal128::FromString(this->string_value_, &result, &precision, &scale)); ASSERT_EQ(result, expected); ASSERT_EQ(precision, 8); @@ -55,8 +54,8 @@ TEST_F(DecimalTestFixture, TestFromString) { TEST_F(DecimalTestFixture, TestStringStartingWithPlus) { std::string plus_value("+234.234"); Decimal128 out; - int scale; - int precision; + int32_t scale; + int32_t precision; ASSERT_OK(Decimal128::FromString(plus_value, &out, &precision, &scale)); ASSERT_EQ(234234, out); ASSERT_EQ(6, precision); @@ -67,8 +66,8 @@ TEST_F(DecimalTestFixture, TestStringStartingWithPlus128) { std::string plus_value("+2342394230592.232349023094"); Decimal128 expected_value("2342394230592232349023094"); Decimal128 out; - int scale; - int precision; + int32_t scale; + int32_t precision; ASSERT_OK(Decimal128::FromString(plus_value, &out, &precision, &scale)); ASSERT_EQ(expected_value, out); ASSERT_EQ(25, precision); @@ -90,9 +89,7 @@ TEST(DecimalTest, TestFromDecimalString128) { Decimal128 result; ASSERT_OK(Decimal128::FromString(string_value, &result)); Decimal128 expected(static_cast(-230492239423435324)); - expected *= 100; - expected -= 12; - ASSERT_EQ(result, expected); + ASSERT_EQ(result, expected * 100 - 12); // Sanity check that our number is actually using more than 64 bits ASSERT_NE(result.high_bits(), 0); @@ -194,36 +191,36 @@ TEST(DecimalTest, TestInvalidInputWithLeadingZeros) { TEST(DecimalZerosTest, LeadingZerosNoDecimalPoint) { std::string string_value("0000000"); Decimal128 d; - int precision; - int scale; + int32_t precision; + int32_t scale; ASSERT_OK(Decimal128::FromString(string_value, &d, &precision, &scale)); - ASSERT_EQ(precision, 7); - ASSERT_EQ(scale, 0); - ASSERT_EQ(d, 0); + ASSERT_EQ(0, precision); + ASSERT_EQ(0, scale); + ASSERT_EQ(0, d); } TEST(DecimalZerosTest, LeadingZerosDecimalPoint) { std::string string_value("000.0000"); Decimal128 d; - int precision; - int scale; + int32_t precision; + int32_t scale; ASSERT_OK(Decimal128::FromString(string_value, &d, &precision, &scale)); // We explicitly do not support this for now, otherwise this would be ASSERT_EQ - ASSERT_NE(precision, 7); + ASSERT_EQ(4, precision); - ASSERT_EQ(scale, 4); - ASSERT_EQ(d, 0); + ASSERT_EQ(4, scale); + ASSERT_EQ(0, d); } TEST(DecimalZerosTest, NoLeadingZerosDecimalPoint) { std::string string_value(".00000"); Decimal128 d; - int precision; - int scale; + int32_t precision; + int32_t scale; ASSERT_OK(Decimal128::FromString(string_value, &d, &precision, &scale)); - ASSERT_EQ(precision, 5); - ASSERT_EQ(scale, 5); - ASSERT_EQ(d, 0); + ASSERT_EQ(5, precision); + ASSERT_EQ(5, scale); + ASSERT_EQ(0, d); } template @@ -335,16 +332,16 @@ INSTANTIATE_TEST_CASE_P(Decimal128ParsingTest, Decimal128ParsingTest, std::make_tuple("0.00123", 123ULL, 5), std::make_tuple("1.23E-8", 123ULL, 10), std::make_tuple("-1.23E-8", -123LL, 10), - std::make_tuple("1.23E+3", 123ULL, -1), - std::make_tuple("-1.23E+3", -123LL, -1), - std::make_tuple("1.23E+5", 123ULL, -3), - std::make_tuple("1.2345E+7", 12345ULL, -3), + std::make_tuple("1.23E+3", 1230ULL, 0), + std::make_tuple("-1.23E+3", -1230LL, 0), + std::make_tuple("1.23E+5", 123000ULL, 0), + std::make_tuple("1.2345E+7", 12345000ULL, 0), std::make_tuple("1.23e-8", 123ULL, 10), std::make_tuple("-1.23e-8", -123LL, 10), - std::make_tuple("1.23e+3", 123ULL, -1), - std::make_tuple("-1.23e+3", -123LL, -1), - std::make_tuple("1.23e+5", 123ULL, -3), - std::make_tuple("1.2345e+7", 12345ULL, -3))); + std::make_tuple("1.23e+3", 1230ULL, 0), + std::make_tuple("-1.23e+3", -1230LL, 0), + std::make_tuple("1.23e+5", 123000ULL, 0), + std::make_tuple("1.2345e+7", 12345000ULL, 0))); class Decimal128ParsingTestInvalid : public ::testing::TestWithParam {}; @@ -375,4 +372,14 @@ TEST(Decimal128Test, TestSmallNumberFormat) { ASSERT_EQ(expected, result); } +TEST(Decimal128Test, TestNoDecimalPointExponential) { + Decimal128 value; + int32_t precision; + int32_t scale; + ASSERT_OK(Decimal128::FromString("1E1", &value, &precision, &scale)); + ASSERT_EQ(10, value.low_bits()); + ASSERT_EQ(2, precision); + ASSERT_EQ(0, scale); +} + } // namespace arrow diff --git a/cpp/src/arrow/util/decimal.cc b/cpp/src/arrow/util/decimal.cc index a3c8cda7654..816bb4ab075 100644 --- a/cpp/src/arrow/util/decimal.cc +++ b/cpp/src/arrow/util/decimal.cc @@ -23,12 +23,55 @@ #include #include +#include + #include "arrow/util/bit-util.h" #include "arrow/util/decimal.h" #include "arrow/util/logging.h" namespace arrow { +static const Decimal128 ScaleMultipliers[] = { + Decimal128(0LL), + Decimal128(10LL), + Decimal128(100LL), + Decimal128(1000LL), + Decimal128(10000LL), + Decimal128(100000LL), + Decimal128(1000000LL), + Decimal128(10000000LL), + Decimal128(100000000LL), + Decimal128(1000000000LL), + Decimal128(10000000000LL), + Decimal128(100000000000LL), + Decimal128(1000000000000LL), + Decimal128(10000000000000LL), + Decimal128(100000000000000LL), + Decimal128(1000000000000000LL), + Decimal128(10000000000000000LL), + Decimal128(100000000000000000LL), + Decimal128(1000000000000000000LL), + Decimal128(0LL, 10000000000000000000ULL), + Decimal128(5LL, 7766279631452241920ULL), + Decimal128(54LL, 3875820019684212736ULL), + Decimal128(542LL, 1864712049423024128ULL), + Decimal128(5421LL, 200376420520689664ULL), + Decimal128(54210LL, 2003764205206896640ULL), + Decimal128(542101LL, 1590897978359414784ULL), + Decimal128(5421010LL, 15908979783594147840ULL), + Decimal128(54210108LL, 11515845246265065472ULL), + Decimal128(542101086LL, 4477988020393345024ULL), + Decimal128(5421010862LL, 7886392056514347008ULL), + Decimal128(54210108624LL, 5076944270305263616ULL), + Decimal128(542101086242LL, 13875954555633532928ULL), + Decimal128(5421010862427LL, 9632337040368467968ULL), + Decimal128(54210108624275LL, 4089650035136921600ULL), + Decimal128(542101086242752LL, 4003012203950112768ULL), + Decimal128(5421010862427522LL, 3136633892082024448ULL), + Decimal128(54210108624275221LL, 12919594847110692864ULL), + Decimal128(542101086242752217LL, 68739955140067328ULL), + Decimal128(5421010862427522170LL, 687399551400673280ULL)}; + static constexpr uint64_t kIntMask = 0xFFFFFFFF; static constexpr auto kCarryBit = static_cast(1) << static_cast(32); @@ -187,8 +230,6 @@ static constexpr int64_t kPowersOfTen[kInt64DecimalDigits + 1] = {1LL, 100000000000000000LL, 1000000000000000000LL}; -static inline bool isdigit(char value) { return std::isdigit(value) != 0; } - static void StringToInteger(const std::string& str, Decimal128* out) { using std::size_t; @@ -212,160 +253,124 @@ static void StringToInteger(const std::string& str, Decimal128* out) { } } -Status Decimal128::FromString(const std::string& s, Decimal128* out, int* precision, - int* scale) { - // Implements this regex: "(\\+?|-?)((0*)(\\d*))(\\.(\\d+))?((E|e)(\\+|-)?\\d+)?"; - if (s.empty()) { - return Status::Invalid("Empty string cannot be converted to decimal"); - } +static const boost::regex DECIMAL_REGEX( + // sign of the number + "(?[-+]?)" - std::string::const_iterator charp = s.cbegin(); - std::string::const_iterator end = s.cend(); + // digits around the decimal point + "(((?\\d+)\\.(?\\d*)|\\.(?\\d+)" + ")" - char first_char = *charp; - bool is_negative = false; - if (first_char == '+' || first_char == '-') { - is_negative = first_char == '-'; - ++charp; - } + // optional exponent + "([eE](?[-+]?\\d+))?" - if (charp == end) { - std::stringstream ss; - ss << "Single character: '" << first_char << "' is not a valid decimal value"; - return Status::Invalid(ss.str()); - } + // otherwise + "|" - std::string::const_iterator numeric_string_start = charp; + // we're just an integer + "(?\\d+)" - DCHECK_LT(charp, end); + // or an integer with an exponent + "(?:[eE](?[-+]?\\d+))?)"); - // skip leading zeros - charp = std::find_if_not(charp, end, [](char value) { return value == '0'; }); +static inline bool is_zero_character(char c) { return c == '0'; } - // all zeros and no decimal point - if (charp == end) { - if (out != NULLPTR) { - *out = 0; - } +Status Decimal128::FromString(const std::string& s, Decimal128* out, int32_t* precision, + int32_t* scale) { + if (s.empty()) { + return Status::Invalid("Empty string cannot be converted to decimal"); + } - // Not sure what other libraries assign precision to for this case (this case of - // a string consisting only of one or more zeros) + // case of all zeros + if (std::all_of(s.cbegin(), s.cend(), is_zero_character)) { if (precision != NULLPTR) { - *precision = static_cast(charp - numeric_string_start); + *precision = 0; } if (scale != NULLPTR) { *scale = 0; } + *out = 0; return Status::OK(); } - std::string::const_iterator whole_part_start = charp; + boost::smatch results; + const bool matches = boost::regex_match(s, results, DECIMAL_REGEX); + + if (!matches) { + std::stringstream ss; + ss << s << " does not match"; + return Status::Invalid(ss.str()); + } + + const std::string sign = results["SIGN"]; + const std::string integer = results["INTEGER"]; - charp = std::find_if_not(charp, end, isdigit); + const std::string left_digits = results["LEFT_DIGITS"]; + const std::string first_right_digits = results["FIRST_RIGHT_DIGITS"]; - std::string::const_iterator whole_part_end = charp; - std::string whole_part(whole_part_start, whole_part_end); + const std::string second_right_digits = results["SECOND_RIGHT_DIGITS"]; - if (charp != end && *charp == '.') { - ++charp; + const std::string first_exp_value = results["FIRST_EXP_VALUE"]; + const std::string second_exp_value = results["SECOND_EXP_VALUE"]; - if (charp == end) { - return Status::Invalid( - "Decimal point must be followed by at least one base ten digit. Reached the " - "end of the string."); - } + std::string whole_part; + std::string fractional_part; + std::string exponent_value; - if (std::isdigit(*charp) == 0) { - std::stringstream ss; - ss << "Decimal point must be followed by a base ten digit. Found '" << *charp - << "'"; - return Status::Invalid(ss.str()); - } + if (!integer.empty()) { + whole_part = integer; + } else if (!left_digits.empty()) { + DCHECK(second_right_digits.empty()) << s << " " << second_right_digits; + whole_part = left_digits; + fractional_part = first_right_digits; } else { - if (charp != end) { - std::stringstream ss; - ss << "Expected base ten digit or decimal point but found '" << *charp - << "' instead."; - return Status::Invalid(ss.str()); - } + DCHECK(first_right_digits.empty()) << s << " " << first_right_digits; + fractional_part = second_right_digits; } - std::string::const_iterator fractional_part_start = charp; - - // The rest must be digits or an exponent - if (charp != end) { - charp = std::find_if_not(charp, end, isdigit); + // skip leading zeros before the decimal point + std::string::const_iterator without_leading_zeros = + std::find_if_not(whole_part.cbegin(), whole_part.cend(), is_zero_character); + whole_part = std::string(without_leading_zeros, whole_part.cend()); - // The while loop has ended before the end of the string which means we've hit a - // character that isn't a base ten digit or "E" for exponent - if (charp != end && *charp != 'E' && *charp != 'e') { - std::stringstream ss; - ss << "Found non base ten digit character '" << *charp - << "' before the end of the string"; - return Status::Invalid(ss.str()); - } + if (!first_exp_value.empty()) { + exponent_value = first_exp_value; + } else { + exponent_value = second_exp_value; } - std::string::const_iterator fractional_part_end = charp; - std::string fractional_part(fractional_part_start, fractional_part_end); - if (precision != NULLPTR) { - *precision = static_cast(whole_part.size() + fractional_part.size()); + *precision = static_cast(whole_part.size() + fractional_part.size()); } - if (charp != end) { - // we must have an exponent, if this aborts then we have somehow not caught this and - // raised a proper error - DCHECK(*charp == 'E' || *charp == 'e'); - - ++charp; - - const char value = *charp; - const bool starts_with_plus_or_minus = value == '+' || value == '-'; - - // we use this to construct the adjusted exponent integer later - std::string::const_iterator digit_start = charp; - - // skip plus or minus - charp += starts_with_plus_or_minus; - - // confirm that the rest of the characters are digits - charp = std::find_if_not(charp, end, isdigit); - - if (charp != end) { - // we have something other than digits here - std::stringstream ss; - ss << "Found non decimal digit exponent value '" << *charp << "'"; - return Status::Invalid(ss.str()); - } - - if (scale != NULLPTR) { - // compute the scale from the adjusted exponent - std::string adjusted_exponent_string(digit_start, end); - DCHECK(std::all_of(adjusted_exponent_string.cbegin() + starts_with_plus_or_minus, - adjusted_exponent_string.cend(), isdigit)) - << "Non decimal digit character found in " << adjusted_exponent_string; - const auto adjusted_exponent = - static_cast(std::stol(adjusted_exponent_string)); - const auto len = static_cast(whole_part.size() + fractional_part.size()); - + if (scale != NULLPTR) { + if (!exponent_value.empty()) { + auto adjusted_exponent = static_cast(std::stol(exponent_value)); + auto len = static_cast(whole_part.size() + fractional_part.size()); *scale = -adjusted_exponent + len - 1; - } - } else { - if (scale != NULLPTR) { - *scale = static_cast(fractional_part.size()); + } else { + *scale = static_cast(fractional_part.size()); } } if (out != NULLPTR) { - // zero out in case we've passed in a previously used value *out = 0; StringToInteger(whole_part + fractional_part, out); - if (is_negative) { + if (sign == "-") { out->Negate(); } + + if (scale != NULLPTR && *scale < 0) { + const int32_t abs_scale = std::abs(*scale); + *out *= ScaleMultipliers[abs_scale]; + + if (precision != NULLPTR) { + *precision += abs_scale; + } + *scale = 0; + } } return Status::OK(); @@ -813,47 +818,6 @@ Decimal128 operator%(const Decimal128& left, const Decimal128& right) { return remainder; } -static const Decimal128 ScaleMultipliers[] = { - Decimal128(1), - Decimal128(10), - Decimal128(100), - Decimal128(1000), - Decimal128(10000), - Decimal128(100000), - Decimal128(1000000), - Decimal128(10000000), - Decimal128(100000000), - Decimal128(1000000000), - Decimal128(10000000000), - Decimal128(100000000000), - Decimal128(1000000000000), - Decimal128(10000000000000), - Decimal128(100000000000000), - Decimal128(1000000000000000), - Decimal128(10000000000000000), - Decimal128(100000000000000000), - Decimal128(1000000000000000000), - Decimal128("10000000000000000000"), - Decimal128("100000000000000000000"), - Decimal128("1000000000000000000000"), - Decimal128("10000000000000000000000"), - Decimal128("100000000000000000000000"), - Decimal128("1000000000000000000000000"), - Decimal128("10000000000000000000000000"), - Decimal128("100000000000000000000000000"), - Decimal128("1000000000000000000000000000"), - Decimal128("10000000000000000000000000000"), - Decimal128("100000000000000000000000000000"), - Decimal128("1000000000000000000000000000000"), - Decimal128("10000000000000000000000000000000"), - Decimal128("100000000000000000000000000000000"), - Decimal128("1000000000000000000000000000000000"), - Decimal128("10000000000000000000000000000000000"), - Decimal128("100000000000000000000000000000000000"), - Decimal128("1000000000000000000000000000000000000"), - Decimal128("10000000000000000000000000000000000000"), - Decimal128("100000000000000000000000000000000000000")}; - static bool RescaleWouldCauseDataLoss(const Decimal128& value, int32_t delta_scale, int32_t abs_delta_scale, Decimal128* result) { Decimal128 multiplier(ScaleMultipliers[abs_delta_scale]); diff --git a/cpp/src/arrow/util/decimal.h b/cpp/src/arrow/util/decimal.h index 1594090a0d3..79a99ba6aba 100644 --- a/cpp/src/arrow/util/decimal.h +++ b/cpp/src/arrow/util/decimal.h @@ -124,7 +124,7 @@ class ARROW_EXPORT Decimal128 { /// \brief Convert a decimal string to an Decimal128 value, optionally including /// precision and scale if they're passed in and not null. static Status FromString(const std::string& s, Decimal128* out, - int* precision = NULLPTR, int* scale = NULLPTR); + int32_t* precision = NULLPTR, int32_t* scale = NULLPTR); /// \brief Convert Decimal128 from one scale to another Status Rescale(int32_t original_scale, int32_t new_scale, Decimal128* out) const; diff --git a/python/pyarrow/tests/test_convert_builtin.py b/python/pyarrow/tests/test_convert_builtin.py index 8423ff00b67..516431a74a8 100644 --- a/python/pyarrow/tests/test_convert_builtin.py +++ b/python/pyarrow/tests/test_convert_builtin.py @@ -639,3 +639,10 @@ def test_structarray_from_arrays_coerce(): pa.StructArray.from_arrays(arrays) assert result.equals(expected) + + +def test_decimal_array_with_none_and_nan(): + values = [decimal.Decimal('1.234'), None, np.nan, decimal.Decimal('nan')] + array = pa.array(values) + assert array.type == pa.decimal128(4, 3) + assert array.to_pylist() == values[:2] + [None, None] diff --git a/python/pyarrow/tests/test_convert_pandas.py b/python/pyarrow/tests/test_convert_pandas.py index 986aeffcacc..c5294bf91e3 100644 --- a/python/pyarrow/tests/test_convert_pandas.py +++ b/python/pyarrow/tests/test_convert_pandas.py @@ -1161,6 +1161,16 @@ def test_decimal_fails_with_truncation(self): with pytest.raises(pa.ArrowException): pa.array(data2, type=type2) + def test_decimal_with_different_precisions(self): + data = [ + decimal.Decimal('0.01'), + decimal.Decimal('0.001'), + ] + series = pd.Series(data) + array = pa.array(series) + assert array.to_pylist() == data + assert array.type == pa.decimal128(3, 3) + class TestListTypes(object): """ From f562378b2422a38dd5c7c378f0d2acd18348acd3 Mon Sep 17 00:00:00 2001 From: Phillip Cloud Date: Fri, 23 Feb 2018 12:53:48 -0500 Subject: [PATCH 02/31] IWYU --- cpp/src/arrow/python/helpers.cc | 2 ++ 1 file changed, 2 insertions(+) diff --git a/cpp/src/arrow/python/helpers.cc b/cpp/src/arrow/python/helpers.cc index efbcdf9b4db..908498dd116 100644 --- a/cpp/src/arrow/python/helpers.cc +++ b/cpp/src/arrow/python/helpers.cc @@ -15,6 +15,8 @@ // specific language governing permissions and limitations // under the License. +#include +#include #include #include "arrow/python/common.h" From 8893a45c2a2adcfb3b1c1a4767bf9021d02b9b56 Mon Sep 17 00:00:00 2001 From: Phillip Cloud Date: Fri, 23 Feb 2018 12:58:24 -0500 Subject: [PATCH 03/31] Revert header change --- cpp/src/arrow/python/python-test.cc | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/cpp/src/arrow/python/python-test.cc b/cpp/src/arrow/python/python-test.cc index 2c44427866a..7ff8d5817e6 100644 --- a/cpp/src/arrow/python/python-test.cc +++ b/cpp/src/arrow/python/python-test.cc @@ -15,9 +15,9 @@ // specific language governing permissions and limitations // under the License. -#include +#include "gtest/gtest.h" -#include +#include #include "arrow/python/platform.h" From 0665f6e9c4bf66603c76dd243cdc4e8c93bc5c6d Mon Sep 17 00:00:00 2001 From: Phillip Cloud Date: Fri, 23 Feb 2018 13:00:24 -0500 Subject: [PATCH 04/31] Revert test change --- cpp/src/arrow/python/python-test.cc | 18 +++++++++--------- 1 file changed, 9 insertions(+), 9 deletions(-) diff --git a/cpp/src/arrow/python/python-test.cc b/cpp/src/arrow/python/python-test.cc index 7ff8d5817e6..b23510d2d05 100644 --- a/cpp/src/arrow/python/python-test.cc +++ b/cpp/src/arrow/python/python-test.cc @@ -33,6 +33,15 @@ namespace arrow { namespace py { +TEST(PyBuffer, InvalidInputObject) { + std::shared_ptr res; + PyObject* input = Py_None; + auto old_refcnt = Py_REFCNT(input); + ASSERT_RAISES(PythonError, PyBuffer::FromPyObject(input, &res)); + PyErr_Clear(); + ASSERT_EQ(old_refcnt, Py_REFCNT(input)); +} + TEST(OwnedRef, TestMoves) { PyAcquireGIL lock; std::vector vec; @@ -93,15 +102,6 @@ class DecimalTest : public ::testing::Test { OwnedRef decimal_constructor_; }; -TEST(PyBuffer, InvalidInputObject) { - std::shared_ptr res; - PyObject* input = Py_None; - auto old_refcnt = Py_REFCNT(input); - ASSERT_RAISES(PythonError, PyBuffer::FromPyObject(input, &res)); - PyErr_Clear(); - ASSERT_EQ(old_refcnt, Py_REFCNT(input)); -} - TEST_F(DecimalTest, TestPythonDecimalToString) { std::string decimal_string("-39402950693754869342983"); From e6ac864648239b28e1a476fdb760eda42c2436b0 Mon Sep 17 00:00:00 2001 From: Phillip Cloud Date: Fri, 23 Feb 2018 16:28:35 -0500 Subject: [PATCH 05/31] Install libboost-regex-dev on travis --- ci/travis_install_linux.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/ci/travis_install_linux.sh b/ci/travis_install_linux.sh index acee9ebcb2a..74fde277413 100755 --- a/ci/travis_install_linux.sh +++ b/ci/travis_install_linux.sh @@ -19,7 +19,7 @@ sudo apt-get install -y -q \ gdb ccache libboost-dev libboost-filesystem-dev \ - libboost-system-dev libjemalloc-dev + libboost-system-dev libboost-regex-dev libjemalloc-dev if [ "$ARROW_TRAVIS_VALGRIND" == "1" ]; then sudo apt-get install -y -q valgrind From 50e35d6c9a325c38ab323d34df5c9b4b057c3633 Mon Sep 17 00:00:00 2001 From: Phillip Cloud Date: Mon, 26 Feb 2018 09:43:04 -0600 Subject: [PATCH 06/31] Use shared boost on parquet CI build --- ci/travis_build_parquet_cpp.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/ci/travis_build_parquet_cpp.sh b/ci/travis_build_parquet_cpp.sh index 7d2e3ab7364..f64a85d621d 100755 --- a/ci/travis_build_parquet_cpp.sh +++ b/ci/travis_build_parquet_cpp.sh @@ -38,7 +38,7 @@ cmake \ -GNinja \ -DCMAKE_BUILD_TYPE=debug \ -DCMAKE_INSTALL_PREFIX=$ARROW_PYTHON_PARQUET_HOME \ - -DPARQUET_BOOST_USE_SHARED=off \ + -DPARQUET_BOOST_USE_SHARED=on \ -DPARQUET_BUILD_BENCHMARKS=off \ -DPARQUET_BUILD_EXECUTABLES=off \ -DPARQUET_BUILD_TESTS=off \ From 8be22a69b003411235e373617a6ba736a4bbc881 Mon Sep 17 00:00:00 2001 From: Phillip Cloud Date: Mon, 26 Feb 2018 13:54:33 -0600 Subject: [PATCH 07/31] Install boost with c++11 option --- c_glib/Brewfile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/c_glib/Brewfile b/c_glib/Brewfile index 9fe5c3b6163..6ebd1dec4d1 100644 --- a/c_glib/Brewfile +++ b/c_glib/Brewfile @@ -16,7 +16,7 @@ # under the License. brew "autoconf-archive" -brew "boost" +brew "boost", args: ["--c++11"] brew "ccache" brew "cmake" brew "git" From 7c7270a067960e0d5727f5058c43e4e6c581c384 Mon Sep 17 00:00:00 2001 From: Phillip Cloud Date: Mon, 26 Feb 2018 14:31:01 -0600 Subject: [PATCH 08/31] Show boost install --- c_glib/Brewfile | 1 - ci/travis_before_script_c_glib.sh | 1 + 2 files changed, 1 insertion(+), 1 deletion(-) diff --git a/c_glib/Brewfile b/c_glib/Brewfile index 6ebd1dec4d1..0a190865215 100644 --- a/c_glib/Brewfile +++ b/c_glib/Brewfile @@ -16,7 +16,6 @@ # under the License. brew "autoconf-archive" -brew "boost", args: ["--c++11"] brew "ccache" brew "cmake" brew "git" diff --git a/ci/travis_before_script_c_glib.sh b/ci/travis_before_script_c_glib.sh index 27d1e86fd95..30048e60c4f 100755 --- a/ci/travis_before_script_c_glib.sh +++ b/ci/travis_before_script_c_glib.sh @@ -23,6 +23,7 @@ source $TRAVIS_BUILD_DIR/ci/travis_env_common.sh if [ $TRAVIS_OS_NAME = "osx" ]; then brew update && brew bundle --file=$TRAVIS_BUILD_DIR/c_glib/Brewfile + brew install boost --c++11 else # Linux sudo apt-get install -y -q gtk-doc-tools autoconf-archive libgirepository1.0-dev fi From 77a41ee1ac0a0229886e6d285ea676fa2f6c8ef7 Mon Sep 17 00:00:00 2001 From: Phillip Cloud Date: Mon, 26 Feb 2018 15:20:11 -0600 Subject: [PATCH 09/31] Install boost first --- ci/travis_before_script_c_glib.sh | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/ci/travis_before_script_c_glib.sh b/ci/travis_before_script_c_glib.sh index 30048e60c4f..259bec2188e 100755 --- a/ci/travis_before_script_c_glib.sh +++ b/ci/travis_before_script_c_glib.sh @@ -22,8 +22,7 @@ set -ex source $TRAVIS_BUILD_DIR/ci/travis_env_common.sh if [ $TRAVIS_OS_NAME = "osx" ]; then - brew update && brew bundle --file=$TRAVIS_BUILD_DIR/c_glib/Brewfile - brew install boost --c++11 + brew update && brew install boost --c++11 && brew bundle --file=$TRAVIS_BUILD_DIR/c_glib/Brewfile else # Linux sudo apt-get install -y -q gtk-doc-tools autoconf-archive libgirepository1.0-dev fi From 4c74c63cd622c62df2a8cc3bee7939cddcef8d90 Mon Sep 17 00:00:00 2001 From: Phillip Cloud Date: Mon, 26 Feb 2018 18:20:05 -0600 Subject: [PATCH 10/31] NULLPTR to nullptr --- cpp/src/arrow/util/decimal.cc | 22 +++++++++++----------- 1 file changed, 11 insertions(+), 11 deletions(-) diff --git a/cpp/src/arrow/util/decimal.cc b/cpp/src/arrow/util/decimal.cc index 816bb4ab075..48380a9c980 100644 --- a/cpp/src/arrow/util/decimal.cc +++ b/cpp/src/arrow/util/decimal.cc @@ -92,7 +92,7 @@ std::array Decimal128::ToBytes() const { } void Decimal128::ToBytes(uint8_t* out) const { - DCHECK_NE(out, NULLPTR); + DCHECK_NE(out, nullptr); reinterpret_cast(out)[0] = BitUtil::ToLittleEndian(low_bits_); reinterpret_cast(out)[1] = BitUtil::ToLittleEndian(high_bits_); } @@ -233,7 +233,7 @@ static constexpr int64_t kPowersOfTen[kInt64DecimalDigits + 1] = {1LL, static void StringToInteger(const std::string& str, Decimal128* out) { using std::size_t; - DCHECK_NE(out, NULLPTR) << "Decimal128 output variable cannot be NULLPTR"; + DCHECK_NE(out, nullptr) << "Decimal128 output variable cannot be nullptr"; DCHECK_EQ(*out, 0) << "When converting a string to Decimal128 the initial output must be 0"; @@ -283,11 +283,11 @@ Status Decimal128::FromString(const std::string& s, Decimal128* out, int32_t* pr // case of all zeros if (std::all_of(s.cbegin(), s.cend(), is_zero_character)) { - if (precision != NULLPTR) { + if (precision != nullptr) { *precision = 0; } - if (scale != NULLPTR) { + if (scale != nullptr) { *scale = 0; } @@ -300,7 +300,7 @@ Status Decimal128::FromString(const std::string& s, Decimal128* out, int32_t* pr if (!matches) { std::stringstream ss; - ss << s << " does not match"; + ss << "The string " << s << " is not a valid decimal number"; return Status::Invalid(ss.str()); } @@ -341,11 +341,11 @@ Status Decimal128::FromString(const std::string& s, Decimal128* out, int32_t* pr exponent_value = second_exp_value; } - if (precision != NULLPTR) { + if (precision != nullptr) { *precision = static_cast(whole_part.size() + fractional_part.size()); } - if (scale != NULLPTR) { + if (scale != nullptr) { if (!exponent_value.empty()) { auto adjusted_exponent = static_cast(std::stol(exponent_value)); auto len = static_cast(whole_part.size() + fractional_part.size()); @@ -355,18 +355,18 @@ Status Decimal128::FromString(const std::string& s, Decimal128* out, int32_t* pr } } - if (out != NULLPTR) { + if (out != nullptr) { *out = 0; StringToInteger(whole_part + fractional_part, out); if (sign == "-") { out->Negate(); } - if (scale != NULLPTR && *scale < 0) { + if (scale != nullptr && *scale < 0) { const int32_t abs_scale = std::abs(*scale); *out *= ScaleMultipliers[abs_scale]; - if (precision != NULLPTR) { + if (precision != nullptr) { *precision += abs_scale; } *scale = 0; @@ -836,7 +836,7 @@ static bool RescaleWouldCauseDataLoss(const Decimal128& value, int32_t delta_sca Status Decimal128::Rescale(int32_t original_scale, int32_t new_scale, Decimal128* out) const { - DCHECK_NE(out, NULLPTR) << "out is nullptr"; + DCHECK_NE(out, nullptr) << "out is nullptr"; DCHECK_NE(original_scale, new_scale) << "original_scale != new_scale"; const int32_t delta_scale = new_scale - original_scale; From d9052029ac5d3de94d4e76733ccd4b27dfb61ae9 Mon Sep 17 00:00:00 2001 From: Phillip Cloud Date: Mon, 26 Feb 2018 18:20:15 -0600 Subject: [PATCH 11/31] DCHECK_OK --- cpp/src/arrow/util/logging.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/cpp/src/arrow/util/logging.h b/cpp/src/arrow/util/logging.h index 4ca4d22527f..23880b8b924 100644 --- a/cpp/src/arrow/util/logging.h +++ b/cpp/src/arrow/util/logging.h @@ -30,7 +30,7 @@ namespace arrow { // // Add more as needed. -// Log levels. LOG ignores them, so their values are abitrary. +// Log levels. LOG ignores them, so their values are arbitrary. #define ARROW_DEBUG (-1) #define ARROW_INFO 0 @@ -76,6 +76,7 @@ namespace arrow { #define ARROW_DFATAL ARROW_FATAL #define DCHECK(condition) ARROW_CHECK(condition) +#define DCHECK_OK(status) (ARROW_CHECK((status).ok()) << (status).message()) #define DCHECK_EQ(val1, val2) ARROW_CHECK((val1) == (val2)) #define DCHECK_NE(val1, val2) ARROW_CHECK((val1) != (val2)) #define DCHECK_LE(val1, val2) ARROW_CHECK((val1) <= (val2)) From 281f79841ee54719a69f3614673bfd9a5c2cdb38 Mon Sep 17 00:00:00 2001 From: Phillip Cloud Date: Mon, 26 Feb 2018 18:20:39 -0600 Subject: [PATCH 12/31] DCHECK_OK --- cpp/src/arrow/python/builtin_convert.cc | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/cpp/src/arrow/python/builtin_convert.cc b/cpp/src/arrow/python/builtin_convert.cc index 891793cc9dc..d84959ee23e 100644 --- a/cpp/src/arrow/python/builtin_convert.cc +++ b/cpp/src/arrow/python/builtin_convert.cc @@ -83,10 +83,10 @@ class ScalarVisitor { decimal_type_() { OwnedRefNoGIL decimal_module; Status status = ::arrow::py::internal::ImportModule("decimal", &decimal_module); - DCHECK(status.ok()) << "Unable to import decimal module"; + DCHECK_OK(status); status = ::arrow::py::internal::ImportFromModule(decimal_module, "Decimal", &decimal_type_); - DCHECK(status.ok()) << "Unable to import decimal.Decimal"; + DCHECK_OK(status); } Status Visit(PyObject* obj) { From 1df6923d1c1fabcb8073aa2e758c1962d18fd391 Mon Sep 17 00:00:00 2001 From: Phillip Cloud Date: Mon, 26 Feb 2018 18:21:05 -0600 Subject: [PATCH 13/31] DCHECK_OK --- cpp/src/arrow/python/helpers.cc | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/cpp/src/arrow/python/helpers.cc b/cpp/src/arrow/python/helpers.cc index 908498dd116..ab04ef18833 100644 --- a/cpp/src/arrow/python/helpers.cc +++ b/cpp/src/arrow/python/helpers.cc @@ -227,10 +227,9 @@ bool PyDecimal_Check(PyObject* obj) { OwnedRef Decimal; OwnedRef decimal; Status status = ImportModule("decimal", &decimal); - DCHECK(status.ok()) << "Error during import of the decimal module"; + DCHECK_OK(status); status = ImportFromModule(decimal, "Decimal", &Decimal); - DCHECK(status.ok()) - << "Error during import of the Decimal object from the decimal module"; + DCHECK_OK(status); const int32_t result = PyObject_IsInstance(obj, Decimal.obj()); DCHECK_NE(result, -1) << " error during PyObject_IsInstance check"; return result == 1; From db664f229986f4bf4a34343b8fd04c6780801fef Mon Sep 17 00:00:00 2001 From: Phillip Cloud Date: Mon, 26 Feb 2018 18:21:39 -0600 Subject: [PATCH 14/31] DCHECK_Ok --- cpp/src/arrow/python/python-test.cc | 10 ++++------ 1 file changed, 4 insertions(+), 6 deletions(-) diff --git a/cpp/src/arrow/python/python-test.cc b/cpp/src/arrow/python/python-test.cc index b23510d2d05..7ad3d8fa778 100644 --- a/cpp/src/arrow/python/python-test.cc +++ b/cpp/src/arrow/python/python-test.cc @@ -80,14 +80,12 @@ class DecimalTest : public ::testing::Test { public: DecimalTest() : lock_(), decimal_constructor_() { OwnedRef decimal_module; - auto s = internal::ImportModule("decimal", &decimal_module); - DCHECK(s.ok()) << s.message(); - DCHECK_NE(decimal_module.obj(), NULLPTR); - s = internal::ImportFromModule(decimal_module, "Decimal", &decimal_constructor_); - DCHECK(s.ok()) << s.message(); + Status status = internal::ImportModule("decimal", &decimal_module); + DCHECK_OK(status); - DCHECK_NE(decimal_constructor_.obj(), NULLPTR); + status = internal::ImportFromModule(decimal_module, "Decimal", &decimal_constructor_); + DCHECK_OK(status); } OwnedRef CreatePythonDecimal(const std::string& string_value) { From 092a96247d41503b961b3c4e1dfcefb7d391b03f Mon Sep 17 00:00:00 2001 From: Phillip Cloud Date: Mon, 26 Feb 2018 18:22:37 -0600 Subject: [PATCH 15/31] Fix order of operands --- cpp/src/arrow/python/python-test.cc | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/cpp/src/arrow/python/python-test.cc b/cpp/src/arrow/python/python-test.cc index 7ad3d8fa778..51698aac7cb 100644 --- a/cpp/src/arrow/python/python-test.cc +++ b/cpp/src/arrow/python/python-test.cc @@ -283,10 +283,10 @@ TEST_F(DecimalTest, TestNoneAndNaN) { // This steals a reference to each object, so we don't need to decref them later, // just the list - ASSERT_EQ(PyList_SetItem(list, 0, decimal_value), 0); - ASSERT_EQ(PyList_SetItem(list, 1, missing_value1), 0); - ASSERT_EQ(PyList_SetItem(list, 2, missing_value2), 0); - ASSERT_EQ(PyList_SetItem(list, 3, missing_value3), 0); + ASSERT_EQ(0, PyList_SetItem(list, 0, decimal_value)); + ASSERT_EQ(0, PyList_SetItem(list, 1, missing_value1)); + ASSERT_EQ(0, PyList_SetItem(list, 2, missing_value2)); + ASSERT_EQ(0, PyList_SetItem(list, 3, missing_value3)); MemoryPool* pool = default_memory_pool(); std::shared_ptr arr; From 418754ff200fba3b9049404c4f93b4370b2200aa Mon Sep 17 00:00:00 2001 From: Phillip Cloud Date: Mon, 26 Feb 2018 18:22:55 -0600 Subject: [PATCH 16/31] Check return value of PyList_SetItem --- cpp/src/arrow/python/python-test.cc | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/cpp/src/arrow/python/python-test.cc b/cpp/src/arrow/python/python-test.cc index 51698aac7cb..a8e642bbf9a 100644 --- a/cpp/src/arrow/python/python-test.cc +++ b/cpp/src/arrow/python/python-test.cc @@ -306,9 +306,11 @@ TEST_F(DecimalTest, TestMixedPrecisionAndScale) { ASSERT_NE(list, nullptr); // PyList_SetItem steals a reference to the item so we don't decref it later - for (size_t i = 0; i < strings.size(); ++i) { - PyList_SetItem( - list, i, internal::DecimalFromString(this->decimal_constructor(), strings.at(i))); + PyObject* decimal_constructor = this->decimal_constructor(); + for (Py_ssize_t i = 0; i < static_cast(strings.size()); ++i) { + const int result = PyList_SetItem( + list, i, internal::DecimalFromString(decimal_constructor, strings.at(i))); + ASSERT_EQ(0, result); } MemoryPool* pool = default_memory_pool(); From b24ff259875b02f922cc9a353cfff03e02fbfa80 Mon Sep 17 00:00:00 2001 From: Phillip Cloud Date: Mon, 26 Feb 2018 18:23:31 -0600 Subject: [PATCH 17/31] Add DecimalMetadata::Update test for ignoring NaN values --- cpp/src/arrow/python/python-test.cc | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/cpp/src/arrow/python/python-test.cc b/cpp/src/arrow/python/python-test.cc index a8e642bbf9a..16ac1e332b1 100644 --- a/cpp/src/arrow/python/python-test.cc +++ b/cpp/src/arrow/python/python-test.cc @@ -359,5 +359,13 @@ TEST_F(DecimalTest, SimpleInference) { ASSERT_EQ(2, metadata.scale()); } +TEST_F(DecimalTest, UpdateWithNaN) { + internal::DecimalMetadata metadata; + OwnedRef nan_value(this->CreatePythonDecimal("nan")); + ASSERT_OK(metadata.Update(nan_value.obj())); + ASSERT_EQ(std::numeric_limits::min(), metadata.precision()); + ASSERT_EQ(std::numeric_limits::min(), metadata.scale()); +} + } // namespace py } // namespace arrow From 3190b1a379de7f66fd5a54a0c0541bcc07f1bf50 Mon Sep 17 00:00:00 2001 From: Phillip Cloud Date: Mon, 26 Feb 2018 18:23:50 -0600 Subject: [PATCH 18/31] Ignore nans in decimal metadata update --- cpp/src/arrow/python/builtin_convert.cc | 5 +---- cpp/src/arrow/python/helpers.cc | 7 +++++-- 2 files changed, 6 insertions(+), 6 deletions(-) diff --git a/cpp/src/arrow/python/builtin_convert.cc b/cpp/src/arrow/python/builtin_convert.cc index d84959ee23e..7b345692c4d 100644 --- a/cpp/src/arrow/python/builtin_convert.cc +++ b/cpp/src/arrow/python/builtin_convert.cc @@ -123,10 +123,7 @@ class ScalarVisitor { return Status::Invalid(ss.str()); } } else if (PyObject_IsInstance(obj, decimal_type_.obj())) { - // Don't infer anything if we encounter a Decimal('nan') - if (!internal::PyDecimal_ISNAN(obj)) { - RETURN_NOT_OK(max_decimal_metadata_.Update(obj)); - } + RETURN_NOT_OK(max_decimal_metadata_.Update(obj)); ++decimal_count_; } else { // TODO(wesm): accumulate error information somewhere diff --git a/cpp/src/arrow/python/helpers.cc b/cpp/src/arrow/python/helpers.cc index ab04ef18833..5746b69362a 100644 --- a/cpp/src/arrow/python/helpers.cc +++ b/cpp/src/arrow/python/helpers.cc @@ -266,8 +266,11 @@ Status DecimalMetadata::Update(int32_t suggested_precision, int32_t suggested_sc Status DecimalMetadata::Update(PyObject* object) { DCHECK(PyDecimal_Check(object)) << "Object is not a Python Decimal"; - DCHECK(!PyDecimal_ISNAN(object)) - << "Decimal object cannot be NAN when inferring precision and scale"; + + if (ARROW_PREDICT_FALSE(PyDecimal_ISNAN(object))) { + return Status::OK(); + } + int32_t precision; int32_t scale; RETURN_NOT_OK(InferDecimalPrecisionAndScale(object, &precision, &scale)); From a05b31617529ee248c7940865f34880a4c772eee Mon Sep 17 00:00:00 2001 From: Phillip Cloud Date: Mon, 26 Feb 2018 19:31:12 -0600 Subject: [PATCH 19/31] Refactor import decimal and acquire the gil before importing --- cpp/src/arrow/python/builtin_convert.cc | 8 +++----- cpp/src/arrow/python/helpers.cc | 12 ++++++++---- cpp/src/arrow/python/helpers.h | 3 +++ cpp/src/arrow/python/numpy_to_arrow.cc | 18 +++++++++++++++--- 4 files changed, 29 insertions(+), 12 deletions(-) diff --git a/cpp/src/arrow/python/builtin_convert.cc b/cpp/src/arrow/python/builtin_convert.cc index 7b345692c4d..bb88eb48cd1 100644 --- a/cpp/src/arrow/python/builtin_convert.cc +++ b/cpp/src/arrow/python/builtin_convert.cc @@ -81,11 +81,9 @@ class ScalarVisitor { max_decimal_metadata_(std::numeric_limits::min(), std::numeric_limits::min()), decimal_type_() { - OwnedRefNoGIL decimal_module; - Status status = ::arrow::py::internal::ImportModule("decimal", &decimal_module); - DCHECK_OK(status); - status = ::arrow::py::internal::ImportFromModule(decimal_module, "Decimal", - &decimal_type_); + + PyAcquireGIL lock; + Status status = internal::ImportDecimalType(&decimal_type_); DCHECK_OK(status); } diff --git a/cpp/src/arrow/python/helpers.cc b/cpp/src/arrow/python/helpers.cc index 5746b69362a..429068dd1ac 100644 --- a/cpp/src/arrow/python/helpers.cc +++ b/cpp/src/arrow/python/helpers.cc @@ -79,6 +79,13 @@ Status ImportFromModule(const OwnedRef& module, const std::string& name, OwnedRe return Status::OK(); } +Status ImportDecimalType(OwnedRef* decimal_type) { + OwnedRef decimal_module; + RETURN_NOT_OK(ImportModule("decimal", &decimal_module)); + RETURN_NOT_OK(ImportFromModule(decimal_module, "Decimal", decimal_type)); + return Status::OK(); +} + Status PythonDecimalToString(PyObject* python_decimal, std::string* out) { // Call Python's str(decimal_object) OwnedRef str_obj(PyObject_Str(python_decimal)); @@ -225,10 +232,7 @@ bool PyFloat_isnan(PyObject* obj) { bool PyDecimal_Check(PyObject* obj) { // TODO(phillipc): Is this expensive? OwnedRef Decimal; - OwnedRef decimal; - Status status = ImportModule("decimal", &decimal); - DCHECK_OK(status); - status = ImportFromModule(decimal, "Decimal", &Decimal); + Status status = ImportDecimalType(&Decimal); DCHECK_OK(status); const int32_t result = PyObject_IsInstance(obj, Decimal.obj()); DCHECK_NE(result, -1) << " error during PyObject_IsInstance check"; diff --git a/cpp/src/arrow/python/helpers.h b/cpp/src/arrow/python/helpers.h index d39c62824c2..6be0e49b18a 100644 --- a/cpp/src/arrow/python/helpers.h +++ b/cpp/src/arrow/python/helpers.h @@ -55,6 +55,9 @@ Status ImportModule(const std::string& module_name, OwnedRef* ref); // module Status ImportFromModule(const OwnedRef& module, const std::string& name, OwnedRef* ref); +// \brief Import +Status ImportDecimalType(OwnedRef* decimal_type); + // \brief Convert a Python Decimal object to a C++ string // \param[in] python_decimal A Python decimal.Decimal instance // \param[out] The string representation of the Python Decimal instance diff --git a/cpp/src/arrow/python/numpy_to_arrow.cc b/cpp/src/arrow/python/numpy_to_arrow.cc index 79a911ba457..04a71c1f640 100644 --- a/cpp/src/arrow/python/numpy_to_arrow.cc +++ b/cpp/src/arrow/python/numpy_to_arrow.cc @@ -300,13 +300,18 @@ class NumPyConverter { arr_(reinterpret_cast(ao)), dtype_(PyArray_DESCR(arr_)), mask_(nullptr), - use_pandas_null_sentinels_(use_pandas_null_sentinels) { + use_pandas_null_sentinels_(use_pandas_null_sentinels), + decimal_type_() { if (mo != nullptr && mo != Py_None) { mask_ = reinterpret_cast(mo); } length_ = static_cast(PyArray_SIZE(arr_)); itemsize_ = static_cast(PyArray_DESCR(arr_)->elsize); stride_ = static_cast(PyArray_STRIDES(arr_)[0]); + + PyAcquireGIL lock; + Status status = internal::ImportDecimalType(&decimal_type_); + DCHECK_OK(status); } bool is_strided() const { return itemsize_ != stride_; } @@ -481,6 +486,8 @@ class NumPyConverter { bool use_pandas_null_sentinels_; + OwnedRefNoGIL decimal_type_; + // Used in visitor pattern std::vector> out_arrays_; @@ -751,11 +758,16 @@ Status NumPyConverter::ConvertDecimals() { const auto& decimal_type = static_cast(*type_); for (PyObject* object : objects) { - if (ARROW_PREDICT_FALSE(!internal::PyDecimal_Check(object))) { + const int is_decimal = PyObject_IsInstance(object, decimal_type_.obj()); + + if (ARROW_PREDICT_FALSE(is_decimal == 0)) { std::stringstream ss; ss << "Error converting from Python objects to Decimal: "; RETURN_NOT_OK(InvalidConversion(object, "decimal.Decimal", &ss)); return Status::Invalid(ss.str()); + } else if (ARROW_PREDICT_FALSE(is_decimal == -1)) { + DCHECK_NE(PyErr_Occurred(), nullptr); + RETURN_IF_PYERROR(); } if (PandasObjectIsNull(object)) { @@ -1033,7 +1045,7 @@ Status NumPyConverter::ConvertObjectsInfer() { return ConvertDateTimes(); } else if (PyTime_Check(obj)) { return ConvertTimes(); - } else if (internal::PyDecimal_Check(obj)) { + } else if (PyObject_IsInstance(obj, decimal_type_.obj()) == 1) { return ConvertDecimals(); } else if (PyList_Check(obj)) { std::shared_ptr inferred_type; From 4e6db3c7482341523b3f78f5d0ab40c9e6f5d11d Mon Sep 17 00:00:00 2001 From: Phillip Cloud Date: Mon, 26 Feb 2018 19:31:30 -0600 Subject: [PATCH 20/31] Formatting --- cpp/src/arrow/python/builtin_convert.cc | 1 - 1 file changed, 1 deletion(-) diff --git a/cpp/src/arrow/python/builtin_convert.cc b/cpp/src/arrow/python/builtin_convert.cc index bb88eb48cd1..d2f900f6ae8 100644 --- a/cpp/src/arrow/python/builtin_convert.cc +++ b/cpp/src/arrow/python/builtin_convert.cc @@ -81,7 +81,6 @@ class ScalarVisitor { max_decimal_metadata_(std::numeric_limits::min(), std::numeric_limits::min()), decimal_type_() { - PyAcquireGIL lock; Status status = internal::ImportDecimalType(&decimal_type_); DCHECK_OK(status); From 29e1ebc48bd481dfb8d87b0aaa5708dbb0bb69d4 Mon Sep 17 00:00:00 2001 From: Phillip Cloud Date: Mon, 26 Feb 2018 19:38:45 -0600 Subject: [PATCH 21/31] boost osx debugging --- ci/travis_before_script_c_glib.sh | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/ci/travis_before_script_c_glib.sh b/ci/travis_before_script_c_glib.sh index 259bec2188e..bfd421fa7c1 100755 --- a/ci/travis_before_script_c_glib.sh +++ b/ci/travis_before_script_c_glib.sh @@ -22,7 +22,10 @@ set -ex source $TRAVIS_BUILD_DIR/ci/travis_env_common.sh if [ $TRAVIS_OS_NAME = "osx" ]; then - brew update && brew install boost --c++11 && brew bundle --file=$TRAVIS_BUILD_DIR/c_glib/Brewfile + brew update && \ + brew remove boost --force && \ + brew install boost --c++11 && \ + brew bundle --file=$TRAVIS_BUILD_DIR/c_glib/Brewfile else # Linux sudo apt-get install -y -q gtk-doc-tools autoconf-archive libgirepository1.0-dev fi From b4bcfd97a36df8fcb6b117e1330e095fc652d742 Mon Sep 17 00:00:00 2001 From: Phillip Cloud Date: Wed, 28 Feb 2018 10:28:39 -0500 Subject: [PATCH 22/31] DCHECK_OK for release builds --- cpp/src/arrow/util/logging.h | 3 +++ 1 file changed, 3 insertions(+) diff --git a/cpp/src/arrow/util/logging.h b/cpp/src/arrow/util/logging.h index 23880b8b924..c823f06bdf6 100644 --- a/cpp/src/arrow/util/logging.h +++ b/cpp/src/arrow/util/logging.h @@ -53,6 +53,9 @@ namespace arrow { #define DCHECK(condition) \ ARROW_IGNORE_EXPR(condition) \ while (false) ::arrow::internal::NullLog() +#define DCHECK_OK(status) \ + ARROW_IGNORE_EXPR(status) \ + while (false) ::arrow::internal::NullLog() #define DCHECK_EQ(val1, val2) \ ARROW_IGNORE_EXPR(val1) \ while (false) ::arrow::internal::NullLog() From 78cbf51cb3aa53e6df214333d0c942926ced5269 Mon Sep 17 00:00:00 2001 From: Phillip Cloud Date: Wed, 28 Feb 2018 10:31:46 -0500 Subject: [PATCH 23/31] More script debugging --- ci/travis_before_script_c_glib.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/ci/travis_before_script_c_glib.sh b/ci/travis_before_script_c_glib.sh index bfd421fa7c1..724b746ff7b 100755 --- a/ci/travis_before_script_c_glib.sh +++ b/ci/travis_before_script_c_glib.sh @@ -23,7 +23,7 @@ source $TRAVIS_BUILD_DIR/ci/travis_env_common.sh if [ $TRAVIS_OS_NAME = "osx" ]; then brew update && \ - brew remove boost --force && \ + brew remove boost --force --ignore-dependencies && \ brew install boost --c++11 && \ brew bundle --file=$TRAVIS_BUILD_DIR/c_glib/Brewfile else # Linux From 03ee9995021b4bebacaf3b3151349d409b083898 Mon Sep 17 00:00:00 2001 From: Phillip Cloud Date: Wed, 28 Feb 2018 17:25:40 -0500 Subject: [PATCH 24/31] Fix boost root --- .travis.yml | 5 +++-- ci/travis_before_script_c_glib.sh | 7 +------ ci/travis_before_script_cpp.sh | 24 ++++++++++++++++++++---- ci/travis_install_osx.sh | 21 +++++++++++++++++++++ 4 files changed, 45 insertions(+), 12 deletions(-) create mode 100644 ci/travis_install_osx.sh diff --git a/.travis.yml b/.travis.yml index a4c74657e52..b1241e793bc 100644 --- a/.travis.yml +++ b/.travis.yml @@ -174,7 +174,7 @@ matrix: - $TRAVIS_BUILD_DIR/ci/travis_before_script_c_glib.sh script: - $TRAVIS_BUILD_DIR/ci/travis_script_c_glib.sh - # [OS X] C++ & glib w/ XCode 8.3 & autotools + # [OS X] C++ & glib w/ XCode 8.3 & autotools & homebrew - compiler: clang osx_image: xcode8.3 os: osx @@ -185,7 +185,8 @@ matrix: - BUILD_SYSTEM=autotools before_script: - if [ $ARROW_CI_C_GLIB_AFFECTED != "1" ]; then exit; fi - - $TRAVIS_BUILD_DIR/ci/travis_before_script_cpp.sh --only-library + - $TRAVIS_BUILD_DIR/ci/travis_install_osx.sh + - $TRAVIS_BUILD_DIR/ci/travis_before_script_cpp.sh --only-library --homebrew - $TRAVIS_BUILD_DIR/ci/travis_before_script_c_glib.sh script: - $TRAVIS_BUILD_DIR/ci/travis_script_c_glib.sh diff --git a/ci/travis_before_script_c_glib.sh b/ci/travis_before_script_c_glib.sh index 724b746ff7b..033fbd7c6ce 100755 --- a/ci/travis_before_script_c_glib.sh +++ b/ci/travis_before_script_c_glib.sh @@ -21,12 +21,7 @@ set -ex source $TRAVIS_BUILD_DIR/ci/travis_env_common.sh -if [ $TRAVIS_OS_NAME = "osx" ]; then - brew update && \ - brew remove boost --force --ignore-dependencies && \ - brew install boost --c++11 && \ - brew bundle --file=$TRAVIS_BUILD_DIR/c_glib/Brewfile -else # Linux +if [ $TRAVIS_OS_NAME = "linux" ]; then sudo apt-get install -y -q gtk-doc-tools autoconf-archive libgirepository1.0-dev fi diff --git a/ci/travis_before_script_cpp.sh b/ci/travis_before_script_cpp.sh index 17b5deb36b5..b9afbee7863 100755 --- a/ci/travis_before_script_cpp.sh +++ b/ci/travis_before_script_cpp.sh @@ -22,10 +22,22 @@ set -ex source $TRAVIS_BUILD_DIR/ci/travis_env_common.sh -if [ "$1" == "--only-library" ]; then - only_library_mode=yes -else - only_library_mode=no +only_library_mode=no +using_homebrew=no + +while true; do + case "$1" in + --only-library) + only_library_mode=yes + shift ;; + --homebrew) + using_homebrew=yes + shift ;; + *) break ;; + esac +done + +if [ "$only_library_mode" == "no" ]; then source $TRAVIS_BUILD_DIR/ci/travis_install_conda.sh fi @@ -78,6 +90,10 @@ if [ $TRAVIS_OS_NAME == "linux" ]; then -DBUILD_WARNING_LEVEL=$ARROW_BUILD_WARNING_LEVEL \ $ARROW_CPP_DIR else + if [ "$using_homebrew" = "yes" ]; then + # build against homebrew's boost if we're using it + export BOOST_ROOT=/usr/local/opt/boost + fi cmake $CMAKE_COMMON_FLAGS \ $CMAKE_OSX_FLAGS \ -DCMAKE_BUILD_TYPE=$ARROW_BUILD_TYPE \ diff --git a/ci/travis_install_osx.sh b/ci/travis_install_osx.sh new file mode 100644 index 00000000000..3fa0b299e0a --- /dev/null +++ b/ci/travis_install_osx.sh @@ -0,0 +1,21 @@ +#!/usr/bin/env bash + +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +brew update +brew bundle --file=$TRAVIS_BUILD_DIR/c_glib/Brewfile From ae5db5fd20547cf02bb3a366903603b8535303ed Mon Sep 17 00:00:00 2001 From: Phillip Cloud Date: Wed, 28 Feb 2018 18:04:18 -0500 Subject: [PATCH 25/31] Perms --- ci/travis_install_osx.sh | 0 1 file changed, 0 insertions(+), 0 deletions(-) mode change 100644 => 100755 ci/travis_install_osx.sh diff --git a/ci/travis_install_osx.sh b/ci/travis_install_osx.sh old mode 100644 new mode 100755 From 99505a9f1fd01814855f270502c20e4d27cada0d Mon Sep 17 00:00:00 2001 From: Phillip Cloud Date: Wed, 28 Feb 2018 18:39:47 -0500 Subject: [PATCH 26/31] Silence cmake complaints about boost version --- c_glib/Brewfile | 1 + 1 file changed, 1 insertion(+) diff --git a/c_glib/Brewfile b/c_glib/Brewfile index 0a190865215..75c070d1a67 100644 --- a/c_glib/Brewfile +++ b/c_glib/Brewfile @@ -16,6 +16,7 @@ # under the License. brew "autoconf-archive" +brew "boost@1.65" brew "ccache" brew "cmake" brew "git" From 00be57814db2a0a856be809b537078a842a63158 Mon Sep 17 00:00:00 2001 From: Phillip Cloud Date: Wed, 28 Feb 2018 18:46:33 -0500 Subject: [PATCH 27/31] Add tests to accommodate decimal values --- python/pyarrow/tests/test_convert_builtin.py | 3 +++ python/pyarrow/tests/test_convert_pandas.py | 4 ++++ 2 files changed, 7 insertions(+) diff --git a/python/pyarrow/tests/test_convert_builtin.py b/python/pyarrow/tests/test_convert_builtin.py index 516431a74a8..19b59a49b6d 100644 --- a/python/pyarrow/tests/test_convert_builtin.py +++ b/python/pyarrow/tests/test_convert_builtin.py @@ -646,3 +646,6 @@ def test_decimal_array_with_none_and_nan(): array = pa.array(values) assert array.type == pa.decimal128(4, 3) assert array.to_pylist() == values[:2] + [None, None] + + array = pa.array(values, type=pa.decimal128(10, 4)) + assert array.to_pylist() == [decimal.Decimal('1.2340'), None, None, None] diff --git a/python/pyarrow/tests/test_convert_pandas.py b/python/pyarrow/tests/test_convert_pandas.py index c5294bf91e3..813fbdf40bb 100644 --- a/python/pyarrow/tests/test_convert_pandas.py +++ b/python/pyarrow/tests/test_convert_pandas.py @@ -1171,6 +1171,10 @@ def test_decimal_with_different_precisions(self): assert array.to_pylist() == data assert array.type == pa.decimal128(3, 3) + array = pa.array(data, type=pa.decimal128(12, 5)) + expected = [decimal.Decimal('0.01000'), decimal.Decimal('0.00100')] + assert array.to_pylist() == expected + class TestListTypes(object): """ From ab3e4a544a370d3168dcec4718a0b8e943e22607 Mon Sep 17 00:00:00 2001 From: Phillip Cloud Date: Wed, 28 Feb 2018 18:54:01 -0500 Subject: [PATCH 28/31] Brewfile --- c_glib/Brewfile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/c_glib/Brewfile b/c_glib/Brewfile index 75c070d1a67..2ebf10f335b 100644 --- a/c_glib/Brewfile +++ b/c_glib/Brewfile @@ -16,7 +16,7 @@ # under the License. brew "autoconf-archive" -brew "boost@1.65" +brew "boost@1.65.0" brew "ccache" brew "cmake" brew "git" From 0d45688aeb3525398843e3cb80bb22586f41ca53 Mon Sep 17 00:00:00 2001 From: Phillip Cloud Date: Thu, 1 Mar 2018 10:38:32 -0500 Subject: [PATCH 29/31] Pass version as argument --- c_glib/Brewfile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/c_glib/Brewfile b/c_glib/Brewfile index 2ebf10f335b..531824942fd 100644 --- a/c_glib/Brewfile +++ b/c_glib/Brewfile @@ -16,7 +16,7 @@ # under the License. brew "autoconf-archive" -brew "boost@1.65.0" +brew "boost", ["1.65.0"] brew "ccache" brew "cmake" brew "git" From 1fc2a96366e1018cf94b1d71f09ebe1d84dce36b Mon Sep 17 00:00:00 2001 From: Phillip Cloud Date: Thu, 1 Mar 2018 12:20:23 -0500 Subject: [PATCH 30/31] Args must be a ruby Hash --- c_glib/Brewfile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/c_glib/Brewfile b/c_glib/Brewfile index 531824942fd..955072e1ea7 100644 --- a/c_glib/Brewfile +++ b/c_glib/Brewfile @@ -16,7 +16,7 @@ # under the License. brew "autoconf-archive" -brew "boost", ["1.65.0"] +brew "boost", args: ["1.65.0"] brew "ccache" brew "cmake" brew "git" From 97fcb961fd3581432ffd121fb4c54d61640e070a Mon Sep 17 00:00:00 2001 From: Phillip Cloud Date: Thu, 1 Mar 2018 15:11:07 -0500 Subject: [PATCH 31/31] Make sure we only install if glibc is affected --- ci/travis_install_osx.sh | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/ci/travis_install_osx.sh b/ci/travis_install_osx.sh index 3fa0b299e0a..b03a5f16a31 100755 --- a/ci/travis_install_osx.sh +++ b/ci/travis_install_osx.sh @@ -17,5 +17,7 @@ # specific language governing permissions and limitations # under the License. -brew update -brew bundle --file=$TRAVIS_BUILD_DIR/c_glib/Brewfile +if [ "$ARROW_CI_C_GLIB_AFFECTED" = "1" ]; then + brew update + brew bundle --file=$TRAVIS_BUILD_DIR/c_glib/Brewfile +fi