From a34542afa0f6bc7c518514e4d9147da98a08ab3d Mon Sep 17 00:00:00 2001 From: Wes McKinney Date: Sun, 24 May 2020 11:43:03 -0500 Subject: [PATCH] Implement cast metafunction to provide a single point for executing eager casts, use in Python Typos Rebase and fix bugs Review comments. Move cast to compute.py Consolidate _compute.pyx, compute.pxi and CastOptions wrapping decruft Move compute-related code to _compute.pyx/_compute.pxd --- cpp/src/arrow/compute/cast.cc | 49 +++++++-- cpp/src/arrow/compute/cast.h | 22 +++- cpp/src/arrow/compute/function.h | 7 +- cpp/src/arrow/compute/registry.cc | 1 + cpp/src/arrow/compute/registry_internal.h | 1 + python/CMakeLists.txt | 4 +- python/pyarrow/_compute.pxd | 37 +++++++ python/pyarrow/_compute.pyx | 123 +++++++++++++++++----- python/pyarrow/_dataset.pyx | 1 + python/pyarrow/array.pxi | 56 +--------- python/pyarrow/compute.pxi | 101 ------------------ python/pyarrow/compute.py | 59 +++++++++++ python/pyarrow/includes/libarrow.pxd | 10 +- python/pyarrow/lib.pxd | 12 +-- python/pyarrow/lib.pyx | 3 - python/pyarrow/table.pxi | 27 +---- python/pyarrow/tests/test_array.py | 2 +- python/pyarrow/types.pxi | 2 +- python/setup.py | 2 +- 19 files changed, 274 insertions(+), 245 deletions(-) create mode 100644 python/pyarrow/_compute.pxd delete mode 100644 python/pyarrow/compute.pxi diff --git a/cpp/src/arrow/compute/cast.cc b/cpp/src/arrow/compute/cast.cc index 63b8e509edc..32861795d55 100644 --- a/cpp/src/arrow/compute/cast.cc +++ b/cpp/src/arrow/compute/cast.cc @@ -26,13 +26,14 @@ #include "arrow/compute/cast_internal.h" #include "arrow/compute/kernel.h" +#include "arrow/compute/registry.h" namespace arrow { namespace compute { namespace internal { -std::unordered_map> g_cast_table; +std::unordered_map> g_cast_table; static std::once_flag cast_table_initialized; void AddCastFunctions(const std::vector>& funcs) { @@ -51,6 +52,38 @@ void InitCastTable() { void EnsureInitCastTable() { std::call_once(cast_table_initialized, InitCastTable); } +// A function that overrides Function::Execute to dispatch to the appropriate +// target-type-specific CastFunction +// +// This corresponds to the standard SQL CAST(expr AS target_type) +// +// As a "metafunction" this function has no kernels and is intended to be used +// through its Execute function +class CastMetaFunction : public ScalarFunction { + public: + CastMetaFunction() : ScalarFunction("cast", Arity::Unary()) {} + + Result Execute(const std::vector& args, const FunctionOptions* options, + ExecContext* ctx) const override { + auto cast_options = static_cast(options); + if (cast_options == nullptr || cast_options->to_type == nullptr) { + return Status::Invalid( + "Cast requires that options be passed with " + "the to_type populated"); + } + if (args[0].type()->Equals(*cast_options->to_type)) { + return args[0]; + } + ARROW_ASSIGN_OR_RAISE(std::shared_ptr cast_func, + GetCastFunction(cast_options->to_type)); + return cast_func->Execute(args, options, ctx); + } +}; + +void RegisterScalarCast(FunctionRegistry* registry) { + DCHECK_OK(registry->AddFunction(std::make_shared())); +} + } // namespace internal struct CastFunction::CastFunctionImpl { @@ -138,16 +171,15 @@ Result CastFunction::DispatchExact( } } +Result Cast(const Datum& value, const CastOptions& options, ExecContext* ctx) { + return CallFunction("cast", {value}, &options, ctx); +} + Result Cast(const Datum& value, std::shared_ptr to_type, const CastOptions& options, ExecContext* ctx) { - if (value.type()->Equals(*to_type)) { - return value; - } CastOptions options_with_to_type = options; options_with_to_type.to_type = to_type; - ARROW_ASSIGN_OR_RAISE(std::shared_ptr cast_func, - GetCastFunction(to_type)); - return cast_func->Execute({Datum(value)}, &options_with_to_type, ctx); + return Cast(value, options_with_to_type, ctx); } Result> Cast(const Array& value, std::shared_ptr to_type, @@ -156,7 +188,7 @@ Result> Cast(const Array& value, std::shared_ptr> GetCastFunction( +Result> GetCastFunction( const std::shared_ptr& to_type) { internal::EnsureInitCastTable(); auto it = internal::g_cast_table.find(static_cast(to_type->id())); @@ -169,6 +201,7 @@ Result> GetCastFunction( bool CanCast(const DataType& from_type, const DataType& to_type) { // TODO + internal::EnsureInitCastTable(); auto it = internal::g_cast_table.find(static_cast(from_type.id())); if (it == internal::g_cast_table.end()) { return false; diff --git a/cpp/src/arrow/compute/cast.h b/cpp/src/arrow/compute/cast.h index 93961a0fd3b..be6d7b20dd9 100644 --- a/cpp/src/arrow/compute/cast.h +++ b/cpp/src/arrow/compute/cast.h @@ -103,7 +103,7 @@ class CastFunction : public ScalarFunction { }; ARROW_EXPORT -Result> GetCastFunction( +Result> GetCastFunction( const std::shared_ptr& to_type); /// \brief Return true if a cast function is defined @@ -117,7 +117,7 @@ bool CanCast(const DataType& from_type, const DataType& to_type); /// \param[in] value array to cast /// \param[in] to_type type to cast to /// \param[in] options casting options -/// \param[in] context the function execution context, optional +/// \param[in] ctx the function execution context, optional /// \return the resulting array /// /// \since 1.0.0 @@ -125,13 +125,25 @@ bool CanCast(const DataType& from_type, const DataType& to_type); ARROW_EXPORT Result> Cast(const Array& value, std::shared_ptr to_type, const CastOptions& options = CastOptions::Safe(), - ExecContext* context = NULLPTR); + ExecContext* ctx = NULLPTR); + +/// \brief Cast from one array type to another +/// \param[in] value array to cast +/// \param[in] options casting options. The "to_type" field must be populated +/// \param[in] ctx the function execution context, optional +/// \return the resulting array +/// +/// \since 1.0.0 +/// \note API not yet finalized +ARROW_EXPORT +Result Cast(const Datum& value, const CastOptions& options, + ExecContext* ctx = NULLPTR); /// \brief Cast from one value to another /// \param[in] value datum to cast /// \param[in] to_type type to cast to /// \param[in] options casting options -/// \param[in] context the function execution context, optional +/// \param[in] ctx the function execution context, optional /// \return the resulting datum /// /// \since 1.0.0 @@ -139,7 +151,7 @@ Result> Cast(const Array& value, std::shared_ptr Cast(const Datum& value, std::shared_ptr to_type, const CastOptions& options = CastOptions::Safe(), - ExecContext* context = NULLPTR); + ExecContext* ctx = NULLPTR); } // namespace compute } // namespace arrow diff --git a/cpp/src/arrow/compute/function.h b/cpp/src/arrow/compute/function.h index 4280235d678..28dc975dcc9 100644 --- a/cpp/src/arrow/compute/function.h +++ b/cpp/src/arrow/compute/function.h @@ -99,8 +99,11 @@ class ARROW_EXPORT Function { /// \brief Convenience for invoking a function with kernel dispatch and /// memory allocation details taken care of - Result Execute(const std::vector& args, const FunctionOptions* options, - ExecContext* ctx = NULLPTR) const; + /// + /// This function can be overridden in subclasses + virtual Result Execute(const std::vector& args, + const FunctionOptions* options, + ExecContext* ctx = NULLPTR) const; protected: Function(std::string name, Function::Kind kind, const Arity& arity) diff --git a/cpp/src/arrow/compute/registry.cc b/cpp/src/arrow/compute/registry.cc index c7b6099908a..c2f1ffd4ba7 100644 --- a/cpp/src/arrow/compute/registry.cc +++ b/cpp/src/arrow/compute/registry.cc @@ -100,6 +100,7 @@ static std::unique_ptr CreateBuiltInRegistry() { // Scalar functions RegisterScalarArithmetic(registry.get()); RegisterScalarBoolean(registry.get()); + RegisterScalarCast(registry.get()); RegisterScalarComparison(registry.get()); RegisterScalarSetLookup(registry.get()); diff --git a/cpp/src/arrow/compute/registry_internal.h b/cpp/src/arrow/compute/registry_internal.h index 75e53c793fe..596998189b0 100644 --- a/cpp/src/arrow/compute/registry_internal.h +++ b/cpp/src/arrow/compute/registry_internal.h @@ -27,6 +27,7 @@ namespace internal { // Built-in scalar / elementwise functions void RegisterScalarArithmetic(FunctionRegistry* registry); void RegisterScalarBoolean(FunctionRegistry* registry); +void RegisterScalarCast(FunctionRegistry* registry); void RegisterScalarComparison(FunctionRegistry* registry); void RegisterScalarSetLookup(FunctionRegistry* registry); diff --git a/python/CMakeLists.txt b/python/CMakeLists.txt index a8536387205..99a08df91f4 100644 --- a/python/CMakeLists.txt +++ b/python/CMakeLists.txt @@ -377,9 +377,9 @@ endif() set(CYTHON_EXTENSIONS lib _fs + _compute _csv - _json - _compute) + _json) set(LINK_LIBS arrow_shared arrow_python_shared) diff --git a/python/pyarrow/_compute.pxd b/python/pyarrow/_compute.pxd new file mode 100644 index 00000000000..6a0a6bcf1c4 --- /dev/null +++ b/python/pyarrow/_compute.pxd @@ -0,0 +1,37 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +# cython: language_level = 3 + +from pyarrow.lib cimport * +from pyarrow.includes.common cimport * +from pyarrow.includes.libarrow cimport * + + +cdef class FunctionOptions: + + cdef const CFunctionOptions* get_options(self) except NULL + + +cdef class CastOptions(FunctionOptions): + cdef: + CCastOptions options + + @staticmethod + cdef wrap(CCastOptions options) + + cdef inline CCastOptions unwrap(self) nogil diff --git a/python/pyarrow/_compute.pyx b/python/pyarrow/_compute.pyx index caf4f3b44c7..481a1887a24 100644 --- a/python/pyarrow/_compute.pyx +++ b/python/pyarrow/_compute.pyx @@ -17,18 +17,9 @@ # cython: language_level = 3 -from pyarrow.lib cimport ( - Array, - wrap_datum, - check_status, - ChunkedArray, - ScalarValue -) +from pyarrow.compat import frombytes, tobytes, ordered_dict +from pyarrow.lib cimport * from pyarrow.includes.libarrow cimport * -from pyarrow.includes.common cimport * - -from pyarrow.compat import frombytes, tobytes - cdef wrap_scalar_function(const shared_ptr[CFunction]& sp_func): cdef ScalarFunction func = ScalarFunction.__new__(ScalarFunction) @@ -169,7 +160,7 @@ num_kernels: {} def num_kernels(self): return self.base_func.num_kernels() - def call(self, args, options=None): + def call(self, args, FunctionOptions options=None): cdef: const CFunctionOptions* c_options = NULL vector[CDatum] c_args @@ -177,8 +168,8 @@ num_kernels: {} _pack_compute_args(args, &c_args) - if isinstance(options, FunctionOptions): - c_options = ( options).options() + if options is not None: + c_options = options.get_options() with nogil: result = GetResultValue(self.base_func.Execute(c_args, c_options)) @@ -273,26 +264,106 @@ def call_function(name, args, options=None): cdef class FunctionOptions: - cdef const CFunctionOptions* options(self) except NULL: + cdef const CFunctionOptions* get_options(self) except NULL: raise NotImplementedError("Unimplemented base options") cdef class CastOptions(FunctionOptions): - cdef: - CCastOptions cast_options + + __slots__ = () # avoid mistakingly creating attributes + + def __init__(self, DataType target_type=None, allow_int_overflow=None, + allow_time_truncate=None, allow_time_overflow=None, + allow_float_truncate=None, allow_invalid_utf8=None): + if allow_int_overflow is not None: + self.allow_int_overflow = allow_int_overflow + if allow_time_truncate is not None: + self.allow_time_truncate = allow_time_truncate + if allow_time_overflow is not None: + self.allow_time_overflow = allow_time_overflow + if allow_float_truncate is not None: + self.allow_float_truncate = allow_float_truncate + if allow_invalid_utf8 is not None: + self.allow_invalid_utf8 = allow_invalid_utf8 + + cdef const CFunctionOptions* get_options(self) except NULL: + return &self.options + + @staticmethod + cdef wrap(CCastOptions options): + cdef CastOptions self = CastOptions.__new__(CastOptions) + self.options = options + return self + + cdef inline CCastOptions unwrap(self) nogil: + return self.options @staticmethod - def safe(): - cdef CastOptions options = CastOptions() - options.cast_options = CCastOptions.Safe() + def safe(target_type=None): + options = CastOptions.wrap(CCastOptions.Safe()) + options._set_type(target_type) + return options @staticmethod - def unsafe(): - cdef CastOptions options = CastOptions() - options.cast_options = CCastOptions.Unsafe() + def unsafe(target_type=None): + options = CastOptions.wrap(CCastOptions.Unsafe()) + options._set_type(target_type) + return options + + def _set_type(self, target_type=None): + if target_type is not None: + self.options.to_type = ( + ( ensure_type(target_type)).sp_type + ) + + def is_safe(self): + return not ( + self.options.allow_int_overflow or + self.options.allow_time_truncate or + self.options.allow_time_overflow or + self.options.allow_float_truncate or + self.options.allow_invalid_utf8 + ) + + @property + def allow_int_overflow(self): + return self.options.allow_int_overflow + + @allow_int_overflow.setter + def allow_int_overflow(self, bint flag): + self.options.allow_int_overflow = flag + + @property + def allow_time_truncate(self): + return self.options.allow_time_truncate + + @allow_time_truncate.setter + def allow_time_truncate(self, bint flag): + self.options.allow_time_truncate = flag + + @property + def allow_time_overflow(self): + return self.options.allow_time_overflow + + @allow_time_overflow.setter + def allow_time_overflow(self, bint flag): + self.options.allow_time_overflow = flag + + @property + def allow_float_truncate(self): + return self.options.allow_float_truncate + + @allow_float_truncate.setter + def allow_float_truncate(self, bint flag): + self.options.allow_float_truncate = flag + + @property + def allow_invalid_utf8(self): + return self.options.allow_invalid_utf8 - cdef const CFunctionOptions* options(self) except NULL: - return &self.cast_options + @allow_invalid_utf8.setter + def allow_invalid_utf8(self, bint flag): + self.options.allow_invalid_utf8 = flag cdef class FilterOptions(FunctionOptions): @@ -314,5 +385,5 @@ cdef class FilterOptions(FunctionOptions): null_selection_behavior) ) - cdef const CFunctionOptions* options(self) except NULL: + cdef const CFunctionOptions* get_options(self) except NULL: return &self.filter_options diff --git a/python/pyarrow/_dataset.pyx b/python/pyarrow/_dataset.pyx index 1eba6abb5fc..a7efea1e359 100644 --- a/python/pyarrow/_dataset.pyx +++ b/python/pyarrow/_dataset.pyx @@ -28,6 +28,7 @@ from pyarrow.includes.libarrow_dataset cimport * from pyarrow.compat import frombytes, tobytes from pyarrow._fs cimport FileSystem, FileInfo, FileSelector from pyarrow._csv cimport ParseOptions +from pyarrow._compute cimport CastOptions def _forbid_instantiation(klass, subclasses_instead=True): diff --git a/python/pyarrow/array.pxi b/python/pyarrow/array.pxi index 230ef7c0651..c25bbbe2c20 100644 --- a/python/pyarrow/array.pxi +++ b/python/pyarrow/array.pxi @@ -622,61 +622,13 @@ cdef class Array(_PandasConvertible): result = self.ap.Diff(deref(other.ap)) return frombytes(result) - def cast(self, object target_type, bint safe=True): + def cast(self, object target_type, safe=True): """ - Cast array values to another data type. + Cast array values to another data type - Example - ------- - - >>> from datetime import datetime - >>> import pyarrow as pa - >>> arr = pa.array([datetime(2010, 1, 1), datetime(2015, 1, 1)]) - >>> arr.type - TimestampType(timestamp[us]) - - You can use ``pyarrow.DataType`` objects to specify the target type: - - >>> arr.cast(pa.timestamp('ms')) - - [ - 1262304000000, - 1420070400000 - ] - >>> arr.cast(pa.timestamp('ms')).type - TimestampType(timestamp[ms]) - - Alternatively, it is also supported to use the string aliases for these - types: - - >>> arr.cast('timestamp[ms]') - - [ - 1262304000000, - 1420070400000 - ] - >>> arr.cast('timestamp[ms]').type - TimestampType(timestamp[ms]) - - Parameters - ---------- - target_type : DataType - Type to cast to - safe : bool, default True - Check for overflows or other unsafe conversions - - Returns - ------- - casted : Array + See pyarrow.compute.cast for usage """ - cdef: - CCastOptions options = CCastOptions(safe) - DataType type = ensure_type(target_type) - shared_ptr[CArray] result - - with nogil: - result = GetResultValue(Cast(self.ap[0], type.sp_type, options)) - return pyarrow_wrap_array(result) + return _pc().cast(self, target_type, safe=safe) def view(self, object target_type): """ diff --git a/python/pyarrow/compute.pxi b/python/pyarrow/compute.pxi deleted file mode 100644 index d0c0d4d5826..00000000000 --- a/python/pyarrow/compute.pxi +++ /dev/null @@ -1,101 +0,0 @@ -# Licensed to the Apache Software Foundation (ASF) under one -# or more contributor license agreements. See the NOTICE file -# distributed with this work for additional information -# regarding copyright ownership. The ASF licenses this file -# to you under the Apache License, Version 2.0 (the -# "License"); you may not use this file except in compliance -# with the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, -# software distributed under the License is distributed on an -# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -# KIND, either express or implied. See the License for the -# specific language governing permissions and limitations -# under the License. - - -cdef class CastOptions: - - __slots__ = () # avoid mistakingly creating attributes - - def __init__(self, allow_int_overflow=None, - allow_time_truncate=None, allow_time_overflow=None, - allow_float_truncate=None, allow_invalid_utf8=None): - if allow_int_overflow is not None: - self.allow_int_overflow = allow_int_overflow - if allow_time_truncate is not None: - self.allow_time_truncate = allow_time_truncate - if allow_time_overflow is not None: - self.allow_time_overflow = allow_time_overflow - if allow_float_truncate is not None: - self.allow_float_truncate = allow_float_truncate - if allow_invalid_utf8 is not None: - self.allow_invalid_utf8 = allow_invalid_utf8 - - @staticmethod - cdef wrap(CCastOptions options): - cdef CastOptions self = CastOptions.__new__(CastOptions) - self.options = options - return self - - @staticmethod - def safe(): - return CastOptions.wrap(CCastOptions.Safe()) - - @staticmethod - def unsafe(): - return CastOptions.wrap(CCastOptions.Unsafe()) - - def is_safe(self): - return not ( - self.options.allow_int_overflow or - self.options.allow_time_truncate or - self.options.allow_time_overflow or - self.options.allow_float_truncate or - self.options.allow_invalid_utf8 - ) - - cdef inline CCastOptions unwrap(self) nogil: - return self.options - - @property - def allow_int_overflow(self): - return self.options.allow_int_overflow - - @allow_int_overflow.setter - def allow_int_overflow(self, bint flag): - self.options.allow_int_overflow = flag - - @property - def allow_time_truncate(self): - return self.options.allow_time_truncate - - @allow_time_truncate.setter - def allow_time_truncate(self, bint flag): - self.options.allow_time_truncate = flag - - @property - def allow_time_overflow(self): - return self.options.allow_time_overflow - - @allow_time_overflow.setter - def allow_time_overflow(self, bint flag): - self.options.allow_time_overflow = flag - - @property - def allow_float_truncate(self): - return self.options.allow_float_truncate - - @allow_float_truncate.setter - def allow_float_truncate(self, bint flag): - self.options.allow_float_truncate = flag - - @property - def allow_invalid_utf8(self): - return self.options.allow_invalid_utf8 - - @allow_invalid_utf8.setter - def allow_invalid_utf8(self, bint flag): - self.options.allow_invalid_utf8 = flag diff --git a/python/pyarrow/compute.py b/python/pyarrow/compute.py index eee193875cc..6e3628fa29e 100644 --- a/python/pyarrow/compute.py +++ b/python/pyarrow/compute.py @@ -23,6 +23,65 @@ function_registry, call_function ) +import pyarrow._compute as _pc + + +def cast(arr, target_type, safe=True): + """ + Cast array values to another data type. Can also be invoked as an array + instance method. + + Parameters + ---------- + arr : Array or ChunkedArray + target_type : DataType or type string alias + Type to cast to + safe : bool, default True + Check for overflows or other unsafe conversions + + Examples + -------- + >>> from datetime import datetime + >>> import pyarrow as pa + >>> arr = pa.array([datetime(2010, 1, 1), datetime(2015, 1, 1)]) + >>> arr.type + TimestampType(timestamp[us]) + + You can use ``pyarrow.DataType`` objects to specify the target type: + + >>> cast(arr, pa.timestamp('ms')) + + [ + 2010-01-01 00:00:00.000, + 2015-01-01 00:00:00.000 + ] + + >>> cast(arr, pa.timestamp('ms')).type + TimestampType(timestamp[ms]) + + Alternatively, it is also supported to use the string aliases for these + types: + + >>> arr.cast('timestamp[ms]') + + [ + 1262304000000, + 1420070400000 + ] + >>> arr.cast('timestamp[ms]').type + TimestampType(timestamp[ms]) + + Returns + ------- + casted : Array + """ + if target_type is None: + raise ValueError("Cast target type must not be None") + if safe: + options = _pc.CastOptions.safe(target_type) + else: + options = _pc.CastOptions.unsafe(target_type) + return call_function("cast", [arr], options) def sum(array): diff --git a/python/pyarrow/includes/libarrow.pxd b/python/pyarrow/includes/libarrow.pxd index dc09f04a61e..9b2e296f300 100644 --- a/python/pyarrow/includes/libarrow.pxd +++ b/python/pyarrow/includes/libarrow.pxd @@ -1525,6 +1525,7 @@ cdef extern from "arrow/compute/api.h" namespace "arrow::compute" nogil: @staticmethod CCastOptions Unsafe() + shared_ptr[CDataType] to_type c_bool allow_int_overflow c_bool allow_time_truncate c_bool allow_time_overflow @@ -1570,15 +1571,6 @@ cdef extern from "arrow/compute/api.h" namespace "arrow::compute" nogil: shared_ptr[CTable] table() shared_ptr[CScalar] scalar() - CResult[shared_ptr[CArray]] Cast" arrow::compute::Cast"( - const CArray& array, - const shared_ptr[CDataType]& to_type, - const CCastOptions& options) - - CResult[CDatum] Cast(const CDatum& value, - const shared_ptr[CDataType]& to_type, - const CCastOptions& options) - CResult[CDatum] Take(const CDatum& values, const CDatum& indices, const CTakeOptions& options) diff --git a/python/pyarrow/lib.pxd b/python/pyarrow/lib.pxd index 3487941cefd..79d0400ff97 100644 --- a/python/pyarrow/lib.pxd +++ b/python/pyarrow/lib.pxd @@ -575,16 +575,6 @@ cdef class Codec: cdef inline CCodec* unwrap(self) nogil -cdef class CastOptions: - cdef: - CCastOptions options - - @staticmethod - cdef wrap(CCastOptions options) - - cdef inline CCastOptions unwrap(self) nogil - - cdef get_input_stream(object source, c_bool use_memory_map, shared_ptr[CInputStream]* reader) cdef get_reader(object source, c_bool use_memory_map, @@ -592,7 +582,7 @@ cdef get_reader(object source, c_bool use_memory_map, cdef get_writer(object source, shared_ptr[COutputStream]* writer) # Default is allow_none=False -cdef DataType ensure_type(object type, c_bool allow_none=*) +cpdef DataType ensure_type(object type, bint allow_none=*) # Exceptions may be raised when converting dict values, so need to # check exception state on return diff --git a/python/pyarrow/lib.pyx b/python/pyarrow/lib.pyx index 41903dec9ca..e33f5d01565 100644 --- a/python/pyarrow/lib.pyx +++ b/python/pyarrow/lib.pyx @@ -132,9 +132,6 @@ include "table.pxi" # Tensors include "tensor.pxi" -# Compute -include "compute.pxi" - # File IO include "io.pxi" include "io-hdfs.pxi" diff --git a/python/pyarrow/table.pxi b/python/pyarrow/table.pxi index 322023dde8a..043da82015f 100644 --- a/python/pyarrow/table.pxi +++ b/python/pyarrow/table.pxi @@ -238,32 +238,13 @@ cdef class ChunkedArray(_PandasConvertible): return values return values.astype(dtype) - def cast(self, object target_type, bint safe=True): + def cast(self, object target_type, safe=True): """ - Cast values to another data type + Cast array values to another data type - Parameters - ---------- - target_type : DataType - Type to cast to - safe : bool, default True - Check for overflows or other unsafe conversions - - Returns - ------- - casted : ChunkedArray + See pyarrow.compute.cast for usage """ - cdef: - CCastOptions options = CCastOptions(safe) - DataType type = ensure_type(target_type) - shared_ptr[CArray] result - CDatum out - - with nogil: - out = GetResultValue(Cast(CDatum(self.sp_chunked_array), - type.sp_type, options)) - - return pyarrow_wrap_chunked_array(out.chunked_array()) + return _pc().cast(self, target_type, safe=safe) def dictionary_encode(self): """ diff --git a/python/pyarrow/tests/test_array.py b/python/pyarrow/tests/test_array.py index 9cc80ed99c1..9f63a355bbd 100644 --- a/python/pyarrow/tests/test_array.py +++ b/python/pyarrow/tests/test_array.py @@ -950,7 +950,7 @@ def test_cast_none(): # ARROW-3735: Ensure that calling cast(None) doesn't segfault. arr = pa.array([1, 2, 3]) - with pytest.raises(TypeError): + with pytest.raises(ValueError): arr.cast(None) diff --git a/python/pyarrow/types.pxi b/python/pyarrow/types.pxi index c2ba86a12fd..a828228a6cd 100644 --- a/python/pyarrow/types.pxi +++ b/python/pyarrow/types.pxi @@ -2509,7 +2509,7 @@ def type_for_alias(name): return alias() -cdef DataType ensure_type(object ty, c_bool allow_none=False): +cpdef DataType ensure_type(object ty, bint allow_none=False): if allow_none and ty is None: return None elif isinstance(ty, DataType): diff --git a/python/setup.py b/python/setup.py index f6f6b45bd39..453cc38e65e 100755 --- a/python/setup.py +++ b/python/setup.py @@ -178,8 +178,8 @@ def initialize_options(self): '_fs', '_csv', '_json', - '_cuda', '_compute', + '_cuda', '_flight', '_dataset', '_parquet',