Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
13 changes: 7 additions & 6 deletions cpp/src/arrow/python/arrow_to_pandas.cc
Original file line number Diff line number Diff line change
Expand Up @@ -639,11 +639,11 @@ static Status ConvertTimes(PandasOptions options, const ChunkedArray& data,
static Status ConvertDecimals(PandasOptions options, const ChunkedArray& data,
PyObject** out_values) {
PyAcquireGIL lock;
OwnedRef decimal_ref;
OwnedRef Decimal_ref;
RETURN_NOT_OK(internal::ImportModule("decimal", &decimal_ref));
RETURN_NOT_OK(internal::ImportFromModule(decimal_ref, "Decimal", &Decimal_ref));
PyObject* Decimal = Decimal_ref.obj();
OwnedRef decimal;
OwnedRef Decimal;
RETURN_NOT_OK(internal::ImportModule("decimal", &decimal));
RETURN_NOT_OK(internal::ImportFromModule(decimal, "Decimal", &Decimal));
PyObject* decimal_constructor = Decimal.obj();

for (int c = 0; c < data.num_chunks(); c++) {
const auto& arr = static_cast<const arrow::Decimal128Array&>(*data.chunk(c));
Expand All @@ -653,7 +653,8 @@ static Status ConvertDecimals(PandasOptions options, const ChunkedArray& data,
Py_INCREF(Py_None);
*out_values++ = Py_None;
} else {
*out_values++ = internal::DecimalFromString(Decimal, arr.FormatValue(i));
*out_values++ =
internal::DecimalFromString(decimal_constructor, arr.FormatValue(i));
RETURN_IF_PYERROR();
}
}
Expand Down
46 changes: 36 additions & 10 deletions cpp/src/arrow/python/builtin_convert.cc
Original file line number Diff line number Diff line change
Expand Up @@ -76,7 +76,18 @@ class ScalarVisitor {
timestamp_count_(0),
float_count_(0),
binary_count_(0),
unicode_count_(0) {}
unicode_count_(0),
decimal_count_(0),
max_decimal_metadata_(std::numeric_limits<int32_t>::min(),
std::numeric_limits<int32_t>::min()),
decimal_type_() {
OwnedRefNoGIL decimal_module;
Status status = ::arrow::py::internal::ImportModule("decimal", &decimal_module);
DCHECK(status.ok()) << "Unable to import decimal module";
status = ::arrow::py::internal::ImportFromModule(decimal_module, "Decimal",
&decimal_type_);
DCHECK(status.ok()) << "Unable to import decimal.Decimal";
}

Status Visit(PyObject* obj) {
++total_count_;
Expand Down Expand Up @@ -111,10 +122,16 @@ class ScalarVisitor {
ss << type->ToString();
return Status::Invalid(ss.str());
}
} else if (PyObject_IsInstance(obj, decimal_type_.obj())) {
// Don't infer anything if we encounter a Decimal('nan')
if (!internal::PyDecimal_ISNAN(obj)) {
RETURN_NOT_OK(max_decimal_metadata_.Update(obj));
}
++decimal_count_;
} else {
// TODO(wesm): accumulate error information somewhere
static std::string supported_types =
"bool, float, integer, date, datetime, bytes, unicode";
"bool, float, integer, date, datetime, bytes, unicode, decimal";
std::stringstream ss;
ss << "Error inferring Arrow data type for collection of Python objects. ";
RETURN_NOT_OK(InvalidConversion(obj, supported_types, &ss));
Expand All @@ -125,7 +142,9 @@ class ScalarVisitor {

std::shared_ptr<DataType> GetType() {
// TODO(wesm): handling mixed-type cases
if (float_count_) {
if (decimal_count_) {
return decimal(max_decimal_metadata_.precision(), max_decimal_metadata_.scale());
} else if (float_count_) {
return float64();
} else if (int_count_) {
// TODO(wesm): tighter type later
Expand Down Expand Up @@ -157,8 +176,13 @@ class ScalarVisitor {
int64_t float_count_;
int64_t binary_count_;
int64_t unicode_count_;
int64_t decimal_count_;

internal::DecimalMetadata max_decimal_metadata_;

// Place to accumulate errors
// std::vector<Status> errors_;
OwnedRefNoGIL decimal_type_;
};

static constexpr int MAX_NESTING_LEVELS = 32;
Expand Down Expand Up @@ -379,17 +403,14 @@ class TypedConverter : public SeqConverter {
BuilderType* typed_builder_;
};

// We use the CRTP trick here to devirtualize the AppendItem() and AppendNull()
// We use the CRTP trick here to devirtualize the AppendItem(), AppendNull(), and IsNull()
// method calls.
template <typename BuilderType, class Derived>
class TypedConverterVisitor : public TypedConverter<BuilderType> {
public:
Status AppendSingle(PyObject* obj) override {
if (obj == Py_None) {
return static_cast<Derived*>(this)->AppendNull();
} else {
return static_cast<Derived*>(this)->AppendItem(obj);
}
auto self = static_cast<Derived*>(this);
return self->IsNull(obj) ? self->AppendNull() : self->AppendItem(obj);
}

Status AppendMultiple(PyObject* obj, int64_t size) override {
Expand All @@ -409,6 +430,7 @@ class TypedConverterVisitor : public TypedConverter<BuilderType> {

// Append a missing item (default implementation)
Status AppendNull() { return this->typed_builder_->AppendNull(); }
bool IsNull(PyObject* obj) const { return obj == Py_None; }
};

class NullConverter : public TypedConverterVisitor<NullBuilder, NullConverter> {
Expand Down Expand Up @@ -830,12 +852,16 @@ class DecimalConverter
public:
// Append a non-missing item
Status AppendItem(PyObject* obj) {
/// TODO(phillipc): Check for nan?
Decimal128 value;
const auto& type = static_cast<const DecimalType&>(*typed_builder_->type());
RETURN_NOT_OK(internal::DecimalFromPythonDecimal(obj, type, &value));
return typed_builder_->Append(value);
}

bool IsNull(PyObject* obj) const {
return obj == Py_None || obj == numpy_nan || internal::PyFloat_isnan(obj) ||
(internal::PyDecimal_Check(obj) && internal::PyDecimal_ISNAN(obj));
}
};

// Dynamic constructor for sequence converters
Expand Down
58 changes: 56 additions & 2 deletions cpp/src/arrow/python/helpers.cc
Original file line number Diff line number Diff line change
Expand Up @@ -61,6 +61,7 @@ namespace internal {
Status ImportModule(const std::string& module_name, OwnedRef* ref) {
PyObject* module = PyImport_ImportModule(module_name.c_str());
RETURN_IF_PYERROR();
DCHECK_NE(module, nullptr) << "unable to import the " << module_name << " module";
ref->reset(module);
return Status::OK();
}
Expand All @@ -71,6 +72,7 @@ Status ImportFromModule(const OwnedRef& module, const std::string& name, OwnedRe

PyObject* attr = PyObject_GetAttrString(module.obj(), name.c_str());
RETURN_IF_PYERROR();
DCHECK_NE(attr, nullptr) << "unable to import the " << name << " object";
ref->reset(attr);
return Status::OK();
}
Expand All @@ -93,8 +95,13 @@ Status PythonDecimalToString(PyObject* python_decimal, std::string* out) {
return Status::OK();
}

Status InferDecimalPrecisionAndScale(PyObject* python_decimal, int32_t* precision,
int32_t* scale) {
// \brief Infer the precision and scale of a Python decimal.Decimal instance
// \param python_decimal[in] An instance of decimal.Decimal
// \param precision[out] The value of the inferred precision
// \param scale[out] The value of the inferred scale
// \return The status of the operation
static Status InferDecimalPrecisionAndScale(PyObject* python_decimal, int32_t* precision,
int32_t* scale) {
DCHECK_NE(python_decimal, NULLPTR);
DCHECK_NE(precision, NULLPTR);
DCHECK_NE(scale, NULLPTR);
Expand Down Expand Up @@ -193,6 +200,53 @@ Status UInt64FromPythonInt(PyObject* obj, uint64_t* out) {
return Status::OK();
}

bool PyFloat_isnan(PyObject* obj) {
return PyFloat_Check(obj) && std::isnan(PyFloat_AS_DOUBLE(obj));
}

bool PyDecimal_Check(PyObject* obj) {
// TODO(phillipc): Is this expensive?
OwnedRef Decimal;
OwnedRef decimal;
Status status = ImportModule("decimal", &decimal);
DCHECK(status.ok()) << "Error during import of the decimal module";
status = ImportFromModule(decimal, "Decimal", &Decimal);
DCHECK(status.ok())
<< "Error during import of the Decimal object from the decimal module";
const int32_t result = PyObject_IsInstance(obj, Decimal.obj());
DCHECK_NE(result, -1) << " error during PyObject_IsInstance check";
return result == 1;
}

bool PyDecimal_ISNAN(PyObject* obj) {
DCHECK(PyDecimal_Check(obj)) << "obj is not an instance of decimal.Decimal";
OwnedRef is_nan(PyObject_CallMethod(obj, "is_nan", ""));
return PyObject_IsTrue(is_nan.obj()) == 1;
}

DecimalMetadata::DecimalMetadata()
: precision_(std::numeric_limits<int32_t>::min()),
scale_(std::numeric_limits<int32_t>::min()) {}

DecimalMetadata::DecimalMetadata(int32_t precision, int32_t scale)
: precision_(precision), scale_(scale) {}

Status DecimalMetadata::Update(int32_t suggested_precision, int32_t suggested_scale) {
precision_ = std::max(precision_, suggested_precision);
scale_ = std::max(scale_, suggested_scale);
return Status::OK();
}

Status DecimalMetadata::Update(PyObject* object) {
DCHECK(PyDecimal_Check(object)) << "Object is not a Python Decimal";
DCHECK(!PyDecimal_ISNAN(object))
<< "Decimal object cannot be NAN when inferring precision and scale";
int32_t precision;
int32_t scale;
RETURN_NOT_OK(InferDecimalPrecisionAndScale(object, &precision, &scale));
return Update(precision, scale);
}

} // namespace internal
} // namespace py
} // namespace arrow
76 changes: 68 additions & 8 deletions cpp/src/arrow/python/helpers.h
Original file line number Diff line number Diff line change
Expand Up @@ -36,29 +36,89 @@ namespace py {

class OwnedRef;

ARROW_EXPORT
std::shared_ptr<DataType> GetPrimitiveType(Type::type type);
// \brief Get an arrow DataType instance from Arrow's Type::type enum
// \param[in] type One of the values of Arrow's Type::type enum
// \return A shared pointer to DataType
ARROW_EXPORT std::shared_ptr<DataType> GetPrimitiveType(Type::type type);

namespace internal {

// \brief Import a Python module
// \param[in] module_name The name of the module
// \param[out] ref The OwnedRef containing the module PyObject*
Status ImportModule(const std::string& module_name, OwnedRef* ref);
Status ImportFromModule(const OwnedRef& module, const std::string& module_name,
OwnedRef* ref);

// \brief Import an object from a Python module
// \param[in] module A Python module
// \param[in] name The name of the object to import
// \param[out] ref The OwnedRef containing the \c name attribute of the Python module \c
// module
Status ImportFromModule(const OwnedRef& module, const std::string& name, OwnedRef* ref);

// \brief Convert a Python Decimal object to a C++ string
// \param[in] python_decimal A Python decimal.Decimal instance
// \param[out] The string representation of the Python Decimal instance
// \return The status of the operation
Status PythonDecimalToString(PyObject* python_decimal, std::string* out);

Status InferDecimalPrecisionAndScale(PyObject* python_decimal,
int32_t* precision = NULLPTR,
int32_t* scale = NULLPTR);

// \brief Convert a C++ std::string to a Python Decimal instance
// \param[in] decimal_constructor The decimal type object
// \param[in] decimal_string A decimal string
// \return An instance of decimal.Decimal
PyObject* DecimalFromString(PyObject* decimal_constructor,
const std::string& decimal_string);

// \brief Convert a Python decimal to an Arrow Decimal128 object
// \param[in] python_decimal A Python decimal.Decimal instance
// \param[in] arrow_type An instance of arrow::DecimalType
// \param[out] out A pointer to a Decimal128
// \return The status of the operation
Status DecimalFromPythonDecimal(PyObject* python_decimal, const DecimalType& arrow_type,
Decimal128* out);

// \brief Check whether obj is an integer, independent of Python versions.
bool IsPyInteger(PyObject* obj);

// \brief Check whether obj is nan
bool PyFloat_isnan(PyObject* obj);

// \brief Check whether obj is an instance of Decimal
bool PyDecimal_Check(PyObject* obj);

// \brief Check whether obj is nan. This function will abort the program if the argument
// is not a Decimal instance
bool PyDecimal_ISNAN(PyObject* obj);

// \brief Convert a Python integer into an unsigned 64-bit integer
// \param[in] obj A Python integer
// \param[out] out A pointer to a C uint64_t to hold the result of the conversion
// \return The status of the operation
Status UInt64FromPythonInt(PyObject* obj, uint64_t* out);

// \brief Helper class to track and update the precision and scale of a decimal
class DecimalMetadata {
public:
DecimalMetadata();
DecimalMetadata(int32_t precision, int32_t scale);

// \brief Adjust the precision and scale of a decimal type given a new precision and a
// new scale \param[in] suggested_precision A candidate precision \param[in]
// suggested_scale A candidate scale \return The status of the operation
Status Update(int32_t suggested_precision, int32_t suggested_scale);

// \brief A convenient interface for updating the precision and scale based on a Python
// Decimal object \param object A Python Decimal object \return The status of the
// operation
Status Update(PyObject* object);

int32_t precision() const { return precision_; }
int32_t scale() const { return scale_; }

private:
int32_t precision_;
int32_t scale_;
};

} // namespace internal
} // namespace py
} // namespace arrow
Expand Down
3 changes: 3 additions & 0 deletions cpp/src/arrow/python/numpy-internal.h
Original file line number Diff line number Diff line change
Expand Up @@ -54,6 +54,9 @@ class Ndarray1DIndexer {

T* data() const { return data_; }

T* begin() const { return data(); }
T* end() const { return begin() + size() * stride_; }

bool is_strided() const { return stride_ == 1; }

T& operator[](size_type index) { return data_[index * stride_]; }
Expand Down
Loading