From 64054441e0f68e31a87bfba705f83726bf4abe3a Mon Sep 17 00:00:00 2001 From: Felix Claessen Date: Thu, 10 Sep 2020 09:32:42 +0200 Subject: [PATCH 1/3] BUG GH35219 Fix bug when resampling with DST transition --- pandas/core/resample.py | 2 +- pandas/tests/resample/test_datetime_index.py | 19 +++++++++++++++++++ 2 files changed, 20 insertions(+), 1 deletion(-) diff --git a/pandas/core/resample.py b/pandas/core/resample.py index e82a1d4d2cda8..2a81c596c72d6 100644 --- a/pandas/core/resample.py +++ b/pandas/core/resample.py @@ -1088,7 +1088,7 @@ def _upsample(self, method, limit=None, fill_value=None): res_index = self._adjust_binner_for_upsample(binner) # if we have the same frequency as our axis, then we are equal sampling - if limit is None and to_offset(ax.inferred_freq) == self.freq: + if limit is None and to_offset(ax.inferred_freq) == self.freq and len(obj) == len(res_index): result = obj.copy() result.index = res_index else: diff --git a/pandas/tests/resample/test_datetime_index.py b/pandas/tests/resample/test_datetime_index.py index e7637a598403f..28b0d528f5e71 100644 --- a/pandas/tests/resample/test_datetime_index.py +++ b/pandas/tests/resample/test_datetime_index.py @@ -1742,3 +1742,22 @@ def test_resample_apply_product(): columns=["A", "B"], ) tm.assert_frame_equal(result, expected) + + +@pytest.mark.parametrize( + "first,last,freq_in,freq_out,exp_last", + [ + ("2020-03-28", "2020-03-31", "D", "24H", "2020-03-30 01:00"), # includes transition into DST + ("2020-03-28", "2020-10-27", "D", "24H", "2020-10-27 00:00"), # includes transition into and out of DST + ("2020-10-25", "2020-10-27", "D", "24H", "2020-10-26 23:00"), # includes transition out of DST + ("2020-03-28", "2020-03-31", "24H", "D", "2020-03-30 00:00"), # same as above, but from 24H to D + ("2020-03-28", "2020-10-27", "24H", "D", "2020-10-27 00:00"), + ("2020-10-25", "2020-10-27", "24H", "D", "2020-10-26 00:00"), + ], +) +def test_resample_calendar_day_with_dst(first: str, last: str, freq_in: str, freq_out: str, exp_last: str): + # GH 35219 + ts = pd.Series(1., pd.date_range(first, last, freq=freq_in, tz="Europe/Amsterdam")) + result = ts.resample(freq_out).pad() + expected = pd.Series(1., pd.date_range(first, exp_last, freq=freq_out, tz="Europe/Amsterdam")) + tm.assert_series_equal(result, expected) From e98edacd08e9e21ad9137f8cd3517a1641afbb82 Mon Sep 17 00:00:00 2001 From: Felix Claessen Date: Thu, 10 Sep 2020 09:41:51 +0200 Subject: [PATCH 2/3] CLN GH35219 Run black. --- pandas/core/resample.py | 6 ++- pandas/tests/resample/test_datetime_index.py | 42 ++++++++++++++++---- 2 files changed, 40 insertions(+), 8 deletions(-) diff --git a/pandas/core/resample.py b/pandas/core/resample.py index 2a81c596c72d6..d3f9b75ca4a91 100644 --- a/pandas/core/resample.py +++ b/pandas/core/resample.py @@ -1088,7 +1088,11 @@ def _upsample(self, method, limit=None, fill_value=None): res_index = self._adjust_binner_for_upsample(binner) # if we have the same frequency as our axis, then we are equal sampling - if limit is None and to_offset(ax.inferred_freq) == self.freq and len(obj) == len(res_index): + if ( + limit is None + and to_offset(ax.inferred_freq) == self.freq + and len(obj) == len(res_index) + ): result = obj.copy() result.index = res_index else: diff --git a/pandas/tests/resample/test_datetime_index.py b/pandas/tests/resample/test_datetime_index.py index 28b0d528f5e71..bdb1b7afb8b1d 100644 --- a/pandas/tests/resample/test_datetime_index.py +++ b/pandas/tests/resample/test_datetime_index.py @@ -1747,17 +1747,45 @@ def test_resample_apply_product(): @pytest.mark.parametrize( "first,last,freq_in,freq_out,exp_last", [ - ("2020-03-28", "2020-03-31", "D", "24H", "2020-03-30 01:00"), # includes transition into DST - ("2020-03-28", "2020-10-27", "D", "24H", "2020-10-27 00:00"), # includes transition into and out of DST - ("2020-10-25", "2020-10-27", "D", "24H", "2020-10-26 23:00"), # includes transition out of DST - ("2020-03-28", "2020-03-31", "24H", "D", "2020-03-30 00:00"), # same as above, but from 24H to D + ( + "2020-03-28", + "2020-03-31", + "D", + "24H", + "2020-03-30 01:00", + ), # includes transition into DST + ( + "2020-03-28", + "2020-10-27", + "D", + "24H", + "2020-10-27 00:00", + ), # includes transition into and out of DST + ( + "2020-10-25", + "2020-10-27", + "D", + "24H", + "2020-10-26 23:00", + ), # includes transition out of DST + ( + "2020-03-28", + "2020-03-31", + "24H", + "D", + "2020-03-30 00:00", + ), # same as above, but from 24H to D ("2020-03-28", "2020-10-27", "24H", "D", "2020-10-27 00:00"), ("2020-10-25", "2020-10-27", "24H", "D", "2020-10-26 00:00"), ], ) -def test_resample_calendar_day_with_dst(first: str, last: str, freq_in: str, freq_out: str, exp_last: str): +def test_resample_calendar_day_with_dst( + first: str, last: str, freq_in: str, freq_out: str, exp_last: str +): # GH 35219 - ts = pd.Series(1., pd.date_range(first, last, freq=freq_in, tz="Europe/Amsterdam")) + ts = pd.Series(1.0, pd.date_range(first, last, freq=freq_in, tz="Europe/Amsterdam")) result = ts.resample(freq_out).pad() - expected = pd.Series(1., pd.date_range(first, exp_last, freq=freq_out, tz="Europe/Amsterdam")) + expected = pd.Series( + 1.0, pd.date_range(first, exp_last, freq=freq_out, tz="Europe/Amsterdam") + ) tm.assert_series_equal(result, expected) From 384c01061ded85312388895db2d3808f6b6283be Mon Sep 17 00:00:00 2001 From: Felix Claessen Date: Thu, 10 Sep 2020 10:08:15 +0200 Subject: [PATCH 3/3] DOC GH35219 Add whatsnew entry. --- doc/source/whatsnew/v1.2.0.rst | 1 + 1 file changed, 1 insertion(+) diff --git a/doc/source/whatsnew/v1.2.0.rst b/doc/source/whatsnew/v1.2.0.rst index 6612f741d925d..52b6f27db5d04 100644 --- a/doc/source/whatsnew/v1.2.0.rst +++ b/doc/source/whatsnew/v1.2.0.rst @@ -248,6 +248,7 @@ Groupby/resample/rolling - Bug in :meth:`DataFrameGroupBy.count` and :meth:`SeriesGroupBy.sum` returning ``NaN`` for missing categories when grouped on multiple ``Categoricals``. Now returning ``0`` (:issue:`35028`) - Bug in :meth:`DataFrameGroupBy.apply` that would some times throw an erroneous ``ValueError`` if the grouping axis had duplicate entries (:issue:`16646`) +- Bug in :meth:`DataFrame.resample(...)` that would throw a ``ValueError`` when resampling from "D" to "24H" over a transition into daylight savings time (DST) (:issue:`35219`) - - - Bug in :meth:`DataFrameGroupBy.apply` where a non-nuisance grouping column would be dropped from the output columns if another groupby method was called before ``.apply()`` (:issue:`34656`)