Skip to content
18 changes: 18 additions & 0 deletions pandas/core/arrays/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -458,11 +458,23 @@ def take(self, indexer, allow_fill=True, fill_value=None):
Fill value to replace -1 values with. If applicable, this should
use the sentinel missing value for this type.

Returns
-------
ExtensionArray

Raises
------
IndexError
When the indexer is out of bounds for the array.

Notes
-----
This should follow pandas' semantics where -1 indicates missing values.
Positions where indexer is ``-1`` should be filled with the missing
value for this type.
This gives rise to the special case of a take on an empty
ExtensionArray that does not raises an IndexError straight away
when the `indexer` is all ``-1``.

This is called by ``Series.__getitem__``, ``.loc``, ``iloc``, when the
indexer is a sequence of values.
Expand All @@ -477,6 +489,12 @@ def take(self, indexer, allow_fill=True, fill_value=None):
def take(self, indexer, allow_fill=True, fill_value=None):
indexer = np.asarray(indexer)
mask = indexer == -1

# take on empty array not handled as desired by numpy
# in case of -1 (all missing take)
if not len(self) and mask.all():
return type(self)([np.nan] * len(indexer))

result = self.data.take(indexer)
result[mask] = np.nan # NA for this type
return type(self)(result)
Expand Down
47 changes: 47 additions & 0 deletions pandas/tests/extension/base/getitem.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,8 @@
import pytest
import numpy as np

import pandas as pd
import pandas.util.testing as tm

from .base import BaseExtensionTests

Expand Down Expand Up @@ -120,3 +122,48 @@ def test_take_sequence(self, data):
assert result.iloc[0] == data[0]
assert result.iloc[1] == data[1]
assert result.iloc[2] == data[3]

def test_take(self, data, na_value, na_cmp):
result = data.take([0, -1])
assert result.dtype == data.dtype
assert result[0] == data[0]
na_cmp(result[1], na_value)

with tm.assert_raises_regex(IndexError, "out of bounds"):
data.take([len(data) + 1])

def test_take_empty(self, data, na_value, na_cmp):
empty = data[:0]
result = empty.take([-1])
na_cmp(result[0], na_value)

with tm.assert_raises_regex(IndexError, "cannot do a non-empty take"):
empty.take([0, 1])
Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@TomAugspurger I didn't find any existing tests for take, so not sure the indexing tests is the best place (maybe rather the BaseMethodsTests).
And, I could also add tests for the actual use cases where you get this (eg reindex on an empty series)

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This seems like the right place for testing a behavior specific to take.


@pytest.mark.xfail(reason="Series.take with extension array buggy for -1")
def test_take_series(self, data):
s = pd.Series(data)
result = s.take([0, -1])
expected = pd.Series(
data._constructor_from_sequence([data[0], data[len(data) - 1]]),
index=[0, len(data) - 1])
self.assert_series_equal(result, expected)

def test_reindex(self, data, na_value):
s = pd.Series(data)
result = s.reindex([0, 1, 3])
expected = pd.Series(data.take([0, 1, 3]), index=[0, 1, 3])
self.assert_series_equal(result, expected)

n = len(data)
result = s.reindex([-1, 0, n])
expected = pd.Series(
data._constructor_from_sequence([na_value, data[0], na_value]),
index=[-1, 0, n])
self.assert_series_equal(result, expected)

result = s.reindex([n, n + 1])
expected = pd.Series(
data._constructor_from_sequence([na_value, na_value]),
index=[n, n + 1])
self.assert_series_equal(result, expected)
13 changes: 13 additions & 0 deletions pandas/tests/extension/category/test_categorical.py
Original file line number Diff line number Diff line change
Expand Up @@ -84,6 +84,19 @@ def test_getitem_scalar(self):
# to break things by changing.
pass

@pytest.mark.xfail(reason="Categorical.take buggy")
def test_take(self):
# TODO remove this once Categorical.take is fixed
pass

@pytest.mark.xfail(reason="Categorical.take buggy")
def test_take_empty(self):
pass

@pytest.mark.xfail(reason="test not written correctly for categorical")
def test_reindex(self):
pass


class TestSetitem(base.BaseSetitemTests):
pass
Expand Down
4 changes: 4 additions & 0 deletions pandas/tests/extension/decimal/array.py
Original file line number Diff line number Diff line change
Expand Up @@ -81,6 +81,10 @@ def take(self, indexer, allow_fill=True, fill_value=None):
indexer = np.asarray(indexer)
mask = indexer == -1

# take on empty array not handled as desired by numpy in case of -1
if not len(self) and mask.all():
return type(self)([self._na_value] * len(indexer))

indexer = _ensure_platform_int(indexer)
out = self.values.take(indexer)
out[mask] = self._na_value
Expand Down
8 changes: 6 additions & 2 deletions pandas/tests/extension/json/array.py
Original file line number Diff line number Diff line change
Expand Up @@ -89,8 +89,12 @@ def isna(self):
return np.array([x == self._na_value for x in self.data])

def take(self, indexer, allow_fill=True, fill_value=None):
output = [self.data[loc] if loc != -1 else self._na_value
for loc in indexer]
try:
output = [self.data[loc] if loc != -1 else self._na_value
for loc in indexer]
except IndexError:
raise IndexError("Index is out of bounds or cannot do a "
"non-empty take from an empty array.")
return self._constructor_from_sequence(output)

def copy(self, deep=False):
Expand Down