Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 2 additions & 1 deletion python/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -368,7 +368,8 @@ set(PYARROW_CPP_SRCS
${PYARROW_CPP_SOURCE_DIR}/python_test.cc
${PYARROW_CPP_SOURCE_DIR}/python_to_arrow.cc
${PYARROW_CPP_SOURCE_DIR}/pyarrow.cc
${PYARROW_CPP_SOURCE_DIR}/udf.cc)
${PYARROW_CPP_SOURCE_DIR}/udf.cc
${PYARROW_CPP_SOURCE_DIR}/util.cc)
set_source_files_properties(${PYARROW_CPP_SOURCE_DIR}/numpy_init.cc
PROPERTIES SKIP_UNITY_BUILD_INCLUSION ON)

Expand Down
1 change: 1 addition & 0 deletions python/pyarrow/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -191,6 +191,7 @@ def print_entry(label, value):
SparseCOOTensor, SparseCSRMatrix, SparseCSCMatrix,
SparseCSFTensor,
infer_type, from_numpy_dtype,
arange,
NullArray,
NumericArray, IntegerArray, FloatingPointArray,
BooleanArray,
Expand Down
41 changes: 36 additions & 5 deletions python/pyarrow/array.pxi
Original file line number Diff line number Diff line change
Expand Up @@ -572,22 +572,53 @@ def infer_type(values, mask=None, from_pandas=False):
return pyarrow_wrap_data_type(out)


def arange(int64_t start, int64_t stop, int64_t step=1, *, memory_pool=None):
"""
Create an array of evenly spaced values within a given interval.

This function is similar to Python's `range` function.
The resulting array will contain values starting from `start` up to but not
including `stop`, with a step size of `step`.

Parameters
----------
start : int
The starting value for the sequence. The returned array will include this value.
stop : int
The stopping value for the sequence. The returned array will not include this value.
step : int, default 1
The spacing between values.
memory_pool : MemoryPool, optional
A memory pool to use for memory allocations.

Raises
------
ArrowInvalid
If `step` is zero.

Returns
-------
arange : Array
"""
cdef CMemoryPool* pool = maybe_unbox_memory_pool(memory_pool)
with nogil:
c_array = GetResultValue(Arange(start, stop, step, pool))
return pyarrow_wrap_array(c_array)


def _normalize_slice(object arrow_obj, slice key):
"""
Slices with step not equal to 1 (or None) will produce a copy
rather than a zero-copy view
"""
cdef:
Py_ssize_t start, stop, step
int64_t start, stop, step
Py_ssize_t n = len(arrow_obj)

start, stop, step = key.indices(n)

if step != 1:
indices = list(range(start, stop, step))
if len(indices) == 0:
return arrow_obj.slice(0, 0)
return arrow_obj.take(indices)
return arrow_obj.take(arange(start, stop, step))
else:
length = max(stop - start, 0)
return arrow_obj.slice(start, length)
Expand Down
3 changes: 3 additions & 0 deletions python/pyarrow/includes/libarrow_python.pxd
Original file line number Diff line number Diff line change
Expand Up @@ -73,6 +73,9 @@ cdef extern from "arrow/python/api.h" namespace "arrow::py" nogil:
object obj, object mask, const PyConversionOptions& options,
CMemoryPool* pool)

CResult[shared_ptr[CArray]] Arange(int64_t start, int64_t stop,
int64_t step, CMemoryPool* pool)

CResult[shared_ptr[CDataType]] NumPyDtypeToArrow(object dtype)

CStatus NdarrayToArrow(CMemoryPool* pool, object ao, object mo,
Expand Down
1 change: 1 addition & 0 deletions python/pyarrow/src/arrow/python/api.h
Original file line number Diff line number Diff line change
Expand Up @@ -26,3 +26,4 @@
#include "arrow/python/numpy_convert.h"
#include "arrow/python/numpy_to_arrow.h"
#include "arrow/python/python_to_arrow.h"
#include "arrow/python/util.h"
50 changes: 50 additions & 0 deletions python/pyarrow/src/arrow/python/util.cc
Original file line number Diff line number Diff line change
@@ -0,0 +1,50 @@
// Licensed to the Apache Software Foundation (ASF) under one
// or more contributor license agreements. See the NOTICE file
// distributed with this work for additional information
// regarding copyright ownership. The ASF licenses this file
// to you under the Apache License, Version 2.0 (the
// "License"); you may not use this file except in compliance
// with the License. You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing,
// software distributed under the License is distributed on an
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
// KIND, either express or implied. See the License for the
// specific language governing permissions and limitations
// under the License.

#include "arrow/python/util.h"

#include "arrow/array.h"
#include "arrow/python/common.h"

namespace arrow ::py {

Result<std::shared_ptr<Array>> Arange(int64_t start, int64_t stop, int64_t step,
MemoryPool* pool) {
int64_t size;
if (step == 0) {
return Status::Invalid("Step must not be zero");
}
if (step > 0 && stop > start) {
// Ceiling division for positive step
size = (stop - start + step - 1) / step;
} else if (step < 0 && stop < start) {
// Ceiling division for negative step
size = (start - stop - step - 1) / (-step);
} else {
return MakeEmptyArray(int64());
}
std::shared_ptr<Buffer> data_buffer;
ARROW_ASSIGN_OR_RAISE(data_buffer, AllocateBuffer(size * sizeof(int64_t), pool));
auto values = reinterpret_cast<int64_t*>(data_buffer->mutable_data());
for (int64_t i = 0; i < size; ++i) {
values[i] = start + i * step;
}
auto data = ArrayData::Make(int64(), size, {nullptr, data_buffer}, 0);
return MakeArray(data);
}

} // namespace arrow::py
40 changes: 40 additions & 0 deletions python/pyarrow/src/arrow/python/util.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,40 @@
// Licensed to the Apache Software Foundation (ASF) under one
// or more contributor license agreements. See the NOTICE file
// distributed with this work for additional information
// regarding copyright ownership. The ASF licenses this file
// to you under the Apache License, Version 2.0 (the
// "License"); you may not use this file except in compliance
// with the License. You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing,
// software distributed under the License is distributed on an
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
// KIND, either express or implied. See the License for the
// specific language governing permissions and limitations
// under the License.

#pragma once

#include "arrow/python/common.h"
#include "arrow/python/visibility.h"

namespace arrow::py {

/// \brief Create an array of evenly spaced values within a given interval.
/// This function is similar to Python's `range` function.
/// The resulting array will contain values starting from `start` up to but not
/// including `stop`, with a step size of `step`. If `step` is zero, the function
/// will return an error.
/// The resulting array will have a data type of `int64`.
/// \param[in] start initial value of the sequence.
/// \param[in] stop final value of the sequence (exclusive).
/// \param[in] step step size between consecutive values.
/// \param[in] pool Memory pool for any memory allocations.
/// \return Result Array
ARROW_PYTHON_EXPORT
Result<std::shared_ptr<Array>> Arange(int64_t start, int64_t stop, int64_t step,
MemoryPool* pool);

} // namespace arrow::py
26 changes: 26 additions & 0 deletions python/pyarrow/tests/test_array.py
Original file line number Diff line number Diff line change
Expand Up @@ -536,6 +536,32 @@ def test_array_slice_negative_step():
assert result.equals(expected)


def test_arange():
cases = [
(5, 103), # Default step
(-2, 128, 3),
(4, 103, 5),
(10, -7, -1),
(100, -20, -3),
(0, 0), # Empty array
(2, 10, -1), # Empty array
(10, 3, 1), # Empty array
]
for case in cases:
result = pa.arange(*case)
result.validate(full=True)
assert result.equals(pa.array(list(range(*case)), type=pa.int64()))

# Validate memory_pool keyword argument
result = pa.arange(-1, 101, memory_pool=pa.default_memory_pool())
result.validate(full=True)
assert result.equals(pa.array(list(range(-1, 101)), type=pa.int64()))

# Special case for invalid step (arange does not accept step of 0)
with pytest.raises(pa.ArrowInvalid):
pa.arange(0, 10, 0)


def test_array_diff():
# ARROW-6252
arr1 = pa.array(['foo'], type=pa.utf8())
Expand Down
Loading