Skip to content
Merged
3 changes: 1 addition & 2 deletions doc/source/whatsnew/v1.1.0.rst
Original file line number Diff line number Diff line change
Expand Up @@ -175,8 +175,7 @@ Other API changes
- Added :meth:`DataFrame.value_counts` (:issue:`5377`)
- :meth:`Groupby.groups` now returns an abbreviated representation when called on large dataframes (:issue:`1135`)
- ``loc`` lookups with an object-dtype :class:`Index` and an integer key will now raise ``KeyError`` instead of ``TypeError`` when key is missing (:issue:`31905`)
- Using a :func:`pandas.api.indexers.BaseIndexer` with ``cov``, ``corr`` will now raise a ``NotImplementedError`` (:issue:`32865`)
- Using a :func:`pandas.api.indexers.BaseIndexer` with ``count``, ``min``, ``max``, ``median``, ``skew`` will now return correct results for any monotonic :func:`pandas.api.indexers.BaseIndexer` descendant (:issue:`32865`)
- Using a :func:`pandas.api.indexers.BaseIndexer` with ``count``, ``min``, ``max``, ``median``, ``skew``, ``cov``, ``corr`` will now return correct results for any monotonic :func:`pandas.api.indexers.BaseIndexer` descendant (:issue:`32865`)
- Added a :func:`pandas.api.indexers.FixedForwardWindowIndexer` class to support forward-looking windows during ``rolling`` operations.
-

Expand Down
22 changes: 0 additions & 22 deletions pandas/core/window/common.py
Original file line number Diff line number Diff line change
Expand Up @@ -324,25 +324,3 @@ def func(arg, window, min_periods=None):
return cfunc(arg, window, min_periods)

return func


def validate_baseindexer_support(func_name: Optional[str]) -> None:
# GH 32865: These functions work correctly with a BaseIndexer subclass
BASEINDEXER_WHITELIST = {
"count",
"min",
"max",
"mean",
"sum",
"median",
"std",
"var",
"skew",
"kurt",
"quantile",
}
if isinstance(func_name, str) and func_name not in BASEINDEXER_WHITELIST:
raise NotImplementedError(
f"{func_name} is not supported with using a BaseIndexer "
f"subclasses. You can use .apply() with {func_name}."
)
35 changes: 20 additions & 15 deletions pandas/core/window/rolling.py
Original file line number Diff line number Diff line change
Expand Up @@ -48,7 +48,6 @@
calculate_center_offset,
calculate_min_periods,
get_weighted_roll_func,
validate_baseindexer_support,
zsqrt,
)
from pandas.core.window.indexers import (
Expand Down Expand Up @@ -393,12 +392,11 @@ def _get_cython_func_type(self, func: str) -> Callable:
return self._get_roll_func(f"{func}_variable")
return partial(self._get_roll_func(f"{func}_fixed"), win=self._get_window())

def _get_window_indexer(self, window: int, func_name: Optional[str]) -> BaseIndexer:
def _get_window_indexer(self, window: int) -> BaseIndexer:
"""
Return an indexer class that will compute the window start and end bounds
"""
if isinstance(self.window, BaseIndexer):
validate_baseindexer_support(func_name)
return self.window
if self.is_freq_type:
return VariableWindowIndexer(index_array=self._on.asi8, window_size=window)
Expand Down Expand Up @@ -444,7 +442,7 @@ def _apply(

blocks, obj = self._create_blocks()
block_list = list(blocks)
window_indexer = self._get_window_indexer(window, name)
window_indexer = self._get_window_indexer(window)

results = []
exclude: List[Scalar] = []
Expand Down Expand Up @@ -1632,20 +1630,23 @@ def quantile(self, quantile, interpolation="linear", **kwargs):
"""

def cov(self, other=None, pairwise=None, ddof=1, **kwargs):
if isinstance(self.window, BaseIndexer):
validate_baseindexer_support("cov")

if other is None:
other = self._selected_obj
# only default unset
pairwise = True if pairwise is None else pairwise
other = self._shallow_copy(other)

# GH 16058: offset window
if self.is_freq_type:
window = self.win_freq
# GH 32865. We leverage rolling.mean, so we pass
# to the rolling constructors the data used when constructing self:
# window width, frequency data, or a BaseIndexer subclass
if isinstance(self.window, BaseIndexer):
window = self.window
Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Fix for cov.

else:
window = self._get_window(other)
# GH 16058: offset window
if self.is_freq_type:
window = self.win_freq
else:
window = self._get_window(other)

def _get_cov(X, Y):
# GH #12373 : rolling functions error on float32 data
Expand Down Expand Up @@ -1778,15 +1779,19 @@ def _get_cov(X, Y):
)

def corr(self, other=None, pairwise=None, **kwargs):
if isinstance(self.window, BaseIndexer):
validate_baseindexer_support("corr")

if other is None:
other = self._selected_obj
# only default unset
pairwise = True if pairwise is None else pairwise
other = self._shallow_copy(other)
window = self._get_window(other) if not self.is_freq_type else self.win_freq

# GH 32865. We leverage rolling.cov and rolling.std here, so we pass
# to the rolling constructors the data used when constructing self:
# window width, frequency data, or a BaseIndexer subclass
if isinstance(self.window, BaseIndexer):
window = self.window
else:
window = self._get_window(other) if not self.is_freq_type else self.win_freq
Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Fix for corr.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

can you add a comment here and on cov explaining what e are doing (generally not specific to BaseIndexer)

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@jreback Done. Please take a look if it's what you had in mind.


def _get_corr(a, b):
a = a.rolling(
Expand Down
50 changes: 37 additions & 13 deletions pandas/tests/window/test_base_indexer.py
Original file line number Diff line number Diff line change
Expand Up @@ -82,19 +82,6 @@ def get_window_bounds(self, num_values, min_periods, center, closed):
df.rolling(indexer, win_type="boxcar")


@pytest.mark.parametrize("func", ["cov", "corr"])
def test_notimplemented_functions(func):
# GH 32865
class CustomIndexer(BaseIndexer):
def get_window_bounds(self, num_values, min_periods, center, closed):
return np.array([0, 1]), np.array([1, 2])

df = DataFrame({"values": range(2)})
indexer = CustomIndexer()
with pytest.raises(NotImplementedError, match=f"{func} is not supported"):
getattr(df.rolling(indexer), func)()


@pytest.mark.parametrize("constructor", [Series, DataFrame])
@pytest.mark.parametrize(
"func,np_func,expected,np_kwargs",
Expand Down Expand Up @@ -210,3 +197,40 @@ def test_rolling_forward_skewness(constructor):
]
)
tm.assert_equal(result, expected)


@pytest.mark.parametrize(
"func,expected",
[
("cov", [2.0, 2.0, 2.0, 97.0, 2.0, -93.0, 2.0, 2.0, np.nan, np.nan],),
(
"corr",
[
1.0,
1.0,
1.0,
0.8704775290207161,
0.018229084250926637,
-0.861357304646493,
1.0,
1.0,
np.nan,
np.nan,
],
),
],
)
def test_rolling_forward_cov_corr(func, expected):
values1 = np.arange(10).reshape(-1, 1)
values2 = values1 * 2
values1[5, 0] = 100
values = np.concatenate([values1, values2], axis=1)

indexer = FixedForwardWindowIndexer(window_size=3)
rolling = DataFrame(values).rolling(window=indexer, min_periods=3)
# We are interested in checking only pairwise covariance / correlation
result = getattr(rolling, func)().loc[(slice(None), 1), 0]
result = result.reset_index(drop=True)
expected = Series(expected)
expected.name = result.name
tm.assert_equal(result, expected)