From 1211711a1e29add886f87cca36128a6f34315f23 Mon Sep 17 00:00:00 2001 From: prossahl Date: Tue, 6 Aug 2013 17:37:31 +0100 Subject: [PATCH 1/3] Stricter testing of 'monotocity' when reindexing with ffill or bfill. --- pandas/core/index.py | 8 ++++---- pandas/tests/test_frame.py | 23 +++++++++++++++++++++++ pandas/tseries/tests/test_timeseries.py | 2 +- 3 files changed, 28 insertions(+), 5 deletions(-) diff --git a/pandas/core/index.py b/pandas/core/index.py index 5175e01d116c0..3ef1e6d3eabef 100644 --- a/pandas/core/index.py +++ b/pandas/core/index.py @@ -811,12 +811,12 @@ def get_indexer(self, target, method=None, limit=None): 'objects') if method == 'pad': - if not self.is_monotonic: - raise AssertionError('Must be monotonic for forward fill') + if not self.is_monotonic or not target.is_monotonic: + raise ValueError('Must be monotonic for forward fill') indexer = self._engine.get_pad_indexer(target.values, limit) elif method == 'backfill': - if not self.is_monotonic: - raise AssertionError('Must be monotonic for backward fill') + if not self.is_monotonic or not target.is_monotonic: + raise ValueError('Must be monotonic for backward fill') indexer = self._engine.get_backfill_indexer(target.values, limit) elif method is None: indexer = self._engine.get_indexer(target.values) diff --git a/pandas/tests/test_frame.py b/pandas/tests/test_frame.py index 1b405eae08797..8754fc3260893 100644 --- a/pandas/tests/test_frame.py +++ b/pandas/tests/test_frame.py @@ -1629,6 +1629,29 @@ def test_nested_exception(self): repr(df) except Exception as e: self.assertNotEqual(type(e), UnboundLocalError) + + def test_reverse_reindex_ffill_raises(self): + dr = pd.date_range('2013-08-01', periods=6, freq='B') + data = np.random.randn(6,1) + df = pd.DataFrame(data, index=dr, columns=list('A')) + df['A'][3] = np.nan + df_rev = pd.DataFrame(data, index=dr[::-1], columns=list('A')) + # Reverse index is not 'monotonic' + self.assertRaises(ValueError, df_rev.reindex, df.index, method='pad') + self.assertRaises(ValueError, df_rev.reindex, df.index, method='ffill') + self.assertRaises(ValueError, df_rev.reindex, df.index, method='bfill') + + def test_reversed_reindex_ffill_raises(self): + dr = pd.date_range('2013-08-01', periods=6, freq='B') + data = np.random.randn(6,1) + df = pd.DataFrame(data, index=dr, columns=list('A')) + df['A'][3] = np.nan + df = pd.DataFrame(data, index=dr, columns=list('A')) + # Reversed reindex is not 'monotonic' + self.assertRaises(ValueError, df.reindex, dr[::-1], method='pad') + self.assertRaises(ValueError, df.reindex, dr[::-1], method='ffill') + self.assertRaises(ValueError, df.reindex, dr[::-1], method='bfill') + _seriesd = tm.getSeriesData() _tsd = tm.getTimeSeriesData() diff --git a/pandas/tseries/tests/test_timeseries.py b/pandas/tseries/tests/test_timeseries.py index e0413531d05b4..5719ffea31e34 100644 --- a/pandas/tseries/tests/test_timeseries.py +++ b/pandas/tseries/tests/test_timeseries.py @@ -546,7 +546,7 @@ def test_pad_require_monotonicity(self): rng2 = rng[::2][::-1] - self.assertRaises(AssertionError, rng2.get_indexer, rng, + self.assertRaises(ValueError, rng2.get_indexer, rng, method='pad') def test_frame_ctor_datetime64_column(self): From 5c45e430a7f617387d925af4e790fa29b46093d2 Mon Sep 17 00:00:00 2001 From: prossahl Date: Thu, 8 Aug 2013 17:12:03 +0100 Subject: [PATCH 2/3] Updated documentation for issue 4484. --- doc/source/basics.rst | 6 +++--- doc/source/missing_data.rst | 3 +++ doc/source/release.rst | 2 ++ 3 files changed, 8 insertions(+), 3 deletions(-) diff --git a/doc/source/basics.rst b/doc/source/basics.rst index c37776b3a3cd8..c3657100a1eaa 100644 --- a/doc/source/basics.rst +++ b/doc/source/basics.rst @@ -724,6 +724,8 @@ We illustrate these fill methods on a simple TimeSeries: ts2.reindex(ts.index, method='ffill') ts2.reindex(ts.index, method='bfill') +Note these methods require that the indexes are **order increasing**. + Note the same result could have been achieved using :ref:`fillna `: @@ -731,9 +733,7 @@ Note the same result could have been achieved using :ref:`fillna ts2.reindex(ts.index).fillna(method='ffill') -Note these methods generally assume that the indexes are **sorted**. They may -be modified in the future to be a bit more flexible but as time series data is -ordered most of the time anyway, this has not been a major priority. +Note that this method does not check the order of the index. .. _basics.drop: diff --git a/doc/source/missing_data.rst b/doc/source/missing_data.rst index 0c8efb4e905ec..c6da92f4d6cc2 100644 --- a/doc/source/missing_data.rst +++ b/doc/source/missing_data.rst @@ -205,6 +205,9 @@ To remind you, these are the available filling methods: With time series data, using pad/ffill is extremely common so that the "last known value" is available at every time point. +The ``ffill()`` function is equivalent to ``fillna(method='ffill')`` +and ``bfill()`` is equivalent to ``fillna(method='bfill')`` + .. _missing_data.dropna: Dropping axis labels with missing data: dropna diff --git a/doc/source/release.rst b/doc/source/release.rst index 35f422ccad9dc..784f17596fa09 100644 --- a/doc/source/release.rst +++ b/doc/source/release.rst @@ -50,6 +50,8 @@ pandas 0.13 **API Changes** + - ``DataFrame.reindex()`` and forward/backward filling now raises ValueError + if either index is not monotonic (:issue: `4484`). - ``pandas`` now is Python 2/3 compatible without the need for 2to3 thanks to @jtratner. As a result, pandas now uses iterators more extensively. This also led to the introduction of substantive parts of the Benjamin From b401edd9abe493d2bac33d3771548cb4f85cdab8 Mon Sep 17 00:00:00 2001 From: prossahl Date: Fri, 9 Aug 2013 09:16:10 +0100 Subject: [PATCH 3/3] Fix tslib.get_period_field segfault and added tests. --- pandas/tests/test_tslib.py | 18 ++++++++++++++++++ pandas/tslib.pyx | 7 +++++-- 2 files changed, 23 insertions(+), 2 deletions(-) create mode 100644 pandas/tests/test_tslib.py diff --git a/pandas/tests/test_tslib.py b/pandas/tests/test_tslib.py new file mode 100644 index 0000000000000..5b65be7f7299f --- /dev/null +++ b/pandas/tests/test_tslib.py @@ -0,0 +1,18 @@ +import unittest + +import numpy as np +import pandas as pd + +class TestPeriodField(unittest.TestCase): + _multiprocess_can_split_ = True + + def test_get_period_field_raises_on_out_of_range(self): + self.assertRaises(ValueError, pd.tslib.get_period_field, -1, 0, 0) + + def test_get_period_field_array_raises_on_out_of_range(self): + self.assertRaises(ValueError, pd.tslib.get_period_field_arr, -1, np.empty(1), 0) + +if __name__ == '__main__': + import nose + nose.runmodule(argv=[__file__, '-vvs', '-x', '--pdb', '--pdb-failure'], + exit=False) diff --git a/pandas/tslib.pyx b/pandas/tslib.pyx index 1c12b627f0690..c22917d4b720d 100644 --- a/pandas/tslib.pyx +++ b/pandas/tslib.pyx @@ -2417,6 +2417,8 @@ ctypedef int (*accessor)(int64_t ordinal, int freq) except INT32_MIN def get_period_field(int code, int64_t value, int freq): cdef accessor f = _get_accessor_func(code) + if f == NULL: + raise ValueError('Unrecognized code: %s' % code) return f(value, freq) def get_period_field_arr(int code, ndarray[int64_t] arr, int freq): @@ -2426,6 +2428,8 @@ def get_period_field_arr(int code, ndarray[int64_t] arr, int freq): accessor f f = _get_accessor_func(code) + if f == NULL: + raise ValueError('Unrecognized code: %s' % code) sz = len(arr) out = np.empty(sz, dtype=np.int64) @@ -2460,8 +2464,7 @@ cdef accessor _get_accessor_func(int code): return &pday_of_year elif code == 10: return &pweekday - else: - raise ValueError('Unrecognized code: %s' % code) + return NULL def extract_ordinals(ndarray[object] values, freq):