From f0b34b0f8eab22ddde9eb377f83ebf5545acb6b7 Mon Sep 17 00:00:00 2001 From: Patrick McKenna Date: Fri, 22 Apr 2022 16:46:55 -0700 Subject: [PATCH 01/21] add tests for existing behavior Currently all timedelta64 sums involve int -> float conversion. --- pandas/tests/series/test_reductions.py | 30 ++++++++++++++++++++++++++ 1 file changed, 30 insertions(+) diff --git a/pandas/tests/series/test_reductions.py b/pandas/tests/series/test_reductions.py index a552d9d84329f..c71e4a91fe34c 100644 --- a/pandas/tests/series/test_reductions.py +++ b/pandas/tests/series/test_reductions.py @@ -1,3 +1,5 @@ +from hypothesis import given +import hypothesis.strategies as st import numpy as np import pytest @@ -51,6 +53,34 @@ def test_td64_sum_empty(skipna): assert result == pd.Timedelta(0) +@given( + st.integers( + min_value=0, + max_value=10 ** (np.finfo(np.float64).precision), + ).map(pd.Timedelta) +) +def test_td64_summation_retains_ns_precision_over_expected_range(value: pd.Timedelta): + result = pd.Series(value).sum() + + assert result == value + + +@given( + st.integers( + min_value=10 ** (np.finfo(np.float64).precision), + max_value=pd.Timedelta.max.value, + ) + .filter(lambda i: int(np.float64(i)) != i) + .map(pd.Timedelta) +) +def test_td64_summation_loses_ns_precision_if_float_conversion_rounds( + value: pd.Timedelta, +): + result = pd.Series(value).sum() + + assert result != value + + def test_td64_summation_overflow(): # GH#9442 ser = Series(pd.date_range("20130101", periods=100000, freq="H")) From 287ca8870b3a9fad9c123d73dc5c9baf36a48e15 Mon Sep 17 00:00:00 2001 From: Patrick McKenna Date: Sat, 23 Apr 2022 12:51:10 -0700 Subject: [PATCH 02/21] note spurious overflow for single elem td64 series --- pandas/tests/series/test_reductions.py | 16 +++++++++++++++- 1 file changed, 15 insertions(+), 1 deletion(-) diff --git a/pandas/tests/series/test_reductions.py b/pandas/tests/series/test_reductions.py index c71e4a91fe34c..164ff9bcd3807 100644 --- a/pandas/tests/series/test_reductions.py +++ b/pandas/tests/series/test_reductions.py @@ -68,7 +68,7 @@ def test_td64_summation_retains_ns_precision_over_expected_range(value: pd.Timed @given( st.integers( min_value=10 ** (np.finfo(np.float64).precision), - max_value=pd.Timedelta.max.value, + max_value=pd.Timedelta.max.value - 2**9, ) .filter(lambda i: int(np.float64(i)) != i) .map(pd.Timedelta) @@ -81,6 +81,20 @@ def test_td64_summation_loses_ns_precision_if_float_conversion_rounds( assert result != value +@given( + st.integers( + min_value=pd.Timedelta.max.value - 2**9 + 1, + max_value=pd.Timedelta.max.value, + ).map(pd.Timedelta) +) +def test_td64_summation_raises_spurious_overflow_error_for_single_elem_series_with_near_max_value( + value: pd.Timedelta, +): + msg = "Python int too large to convert to C long" + with pytest.raises(OverflowError, match=msg): + pd.Series(value).sum() + + def test_td64_summation_overflow(): # GH#9442 ser = Series(pd.date_range("20130101", periods=100000, freq="H")) From 992ca95381a4dfefcc00a2a5fe108406d2438fd8 Mon Sep 17 00:00:00 2001 From: Patrick McKenna Date: Sat, 23 Apr 2022 14:39:03 -0700 Subject: [PATCH 03/21] finer-grained testing for td64 sum overflow errors --- pandas/tests/series/test_reductions.py | 36 ++++++++++++-------------- 1 file changed, 17 insertions(+), 19 deletions(-) diff --git a/pandas/tests/series/test_reductions.py b/pandas/tests/series/test_reductions.py index 164ff9bcd3807..53b74cff98877 100644 --- a/pandas/tests/series/test_reductions.py +++ b/pandas/tests/series/test_reductions.py @@ -1,4 +1,4 @@ -from hypothesis import given +from hypothesis import given, note import hypothesis.strategies as st import numpy as np import pytest @@ -95,29 +95,27 @@ def test_td64_summation_raises_spurious_overflow_error_for_single_elem_series_wi pd.Series(value).sum() -def test_td64_summation_overflow(): - # GH#9442 - ser = Series(pd.date_range("20130101", periods=100000, freq="H")) - ser[0] += pd.Timedelta("1s 1ms") +@given(st.integers(min_value=1, max_value=2**10).map(pd.Timedelta)) +def test_td64_summation_raises_overflow_error_for_small_overflows(value: pd.Timedelta): + s = pd.Series([pd.Timedelta.max, value]) - # mean - result = (ser - ser.min()).mean() - expected = pd.Timedelta((pd.TimedeltaIndex(ser - ser.min()).asi8 / len(ser)).sum()) + msg = "Python int too large to convert to C long" + with pytest.raises(OverflowError, match=msg): + s.sum() - # the computation is converted to float so - # might be some loss of precision - assert np.allclose(result.value / 1000, expected.value / 1000) - # sum - msg = "overflow in timedelta operation" - with pytest.raises(ValueError, match=msg): - (ser - ser.min()).sum() +@given( + st.integers( + min_value=2**10 + 1, + max_value=pd.Timedelta.max.value, + ).map(pd.Timedelta) +) +def test_td64_summation_raises_value_error_for_most_overflows(value: pd.Timedelta): + s = pd.Series([pd.Timedelta.max, value]) - s1 = ser[0:10000] + msg = "overflow in timedelta operation" with pytest.raises(ValueError, match=msg): - (s1 - s1.min()).sum() - s2 = ser[0:1000] - (s2 - s2.min()).sum() + s.sum() def test_prod_numpy16_bug(): From 1ea03e66d0e2f3fbe6d447d7f116e70379a63b84 Mon Sep 17 00:00:00 2001 From: Patrick McKenna Date: Sun, 24 Apr 2022 11:08:05 -0700 Subject: [PATCH 04/21] styling, ex msg fixes --- pandas/tests/series/test_reductions.py | 39 +++++++++++++++++--------- 1 file changed, 25 insertions(+), 14 deletions(-) diff --git a/pandas/tests/series/test_reductions.py b/pandas/tests/series/test_reductions.py index 53b74cff98877..778bddcb3f36a 100644 --- a/pandas/tests/series/test_reductions.py +++ b/pandas/tests/series/test_reductions.py @@ -1,4 +1,6 @@ -from hypothesis import given, note +import platform + +from hypothesis import given import hypothesis.strategies as st import numpy as np import pytest @@ -11,6 +13,13 @@ import pandas._testing as tm +@pytest.fixture(name="overflow_msg", scope="module") +def fixture_overflow_msg() -> str: + if platform.system() == "Windows": + return "int too big to convert" + return "Python int too large to convert to C long" + + @pytest.mark.parametrize("operation, expected", [("min", "a"), ("max", "b")]) def test_reductions_series_strings(operation, expected): # GH#31746 @@ -60,7 +69,7 @@ def test_td64_sum_empty(skipna): ).map(pd.Timedelta) ) def test_td64_summation_retains_ns_precision_over_expected_range(value: pd.Timedelta): - result = pd.Series(value).sum() + result = Series(value).sum() assert result == value @@ -76,31 +85,33 @@ def test_td64_summation_retains_ns_precision_over_expected_range(value: pd.Timed def test_td64_summation_loses_ns_precision_if_float_conversion_rounds( value: pd.Timedelta, ): - result = pd.Series(value).sum() + result = Series(value).sum() assert result != value @given( - st.integers( + value=st.integers( min_value=pd.Timedelta.max.value - 2**9 + 1, max_value=pd.Timedelta.max.value, ).map(pd.Timedelta) ) -def test_td64_summation_raises_spurious_overflow_error_for_single_elem_series_with_near_max_value( +def test_td64_summation_raises_spurious_overflow_error_for_single_elem_series( value: pd.Timedelta, + overflow_msg: str, ): - msg = "Python int too large to convert to C long" - with pytest.raises(OverflowError, match=msg): - pd.Series(value).sum() + with pytest.raises(OverflowError, match=overflow_msg): + Series(value).sum() -@given(st.integers(min_value=1, max_value=2**10).map(pd.Timedelta)) -def test_td64_summation_raises_overflow_error_for_small_overflows(value: pd.Timedelta): - s = pd.Series([pd.Timedelta.max, value]) +@given(value=st.integers(min_value=1, max_value=2**10).map(pd.Timedelta)) +def test_td64_summation_raises_overflow_error_for_small_overflows( + value: pd.Timedelta, + overflow_msg: str, +): + s = Series([pd.Timedelta.max, value]) - msg = "Python int too large to convert to C long" - with pytest.raises(OverflowError, match=msg): + with pytest.raises(OverflowError, match=overflow_msg): s.sum() @@ -111,7 +122,7 @@ def test_td64_summation_raises_overflow_error_for_small_overflows(value: pd.Time ).map(pd.Timedelta) ) def test_td64_summation_raises_value_error_for_most_overflows(value: pd.Timedelta): - s = pd.Series([pd.Timedelta.max, value]) + s = Series([pd.Timedelta.max, value]) msg = "overflow in timedelta operation" with pytest.raises(ValueError, match=msg): From ac91c584d2b8a12b97d25a4e9566b49fa5b8f520 Mon Sep 17 00:00:00 2001 From: Patrick McKenna Date: Sun, 24 Apr 2022 17:30:57 -0700 Subject: [PATCH 05/21] consolidate, parameterize td64 addition overflow tests --- pandas/tests/arithmetic/test_timedelta64.py | 113 +++++++++++++++----- 1 file changed, 89 insertions(+), 24 deletions(-) diff --git a/pandas/tests/arithmetic/test_timedelta64.py b/pandas/tests/arithmetic/test_timedelta64.py index bb7949c9f08e2..62bad9a58ed9e 100644 --- a/pandas/tests/arithmetic/test_timedelta64.py +++ b/pandas/tests/arithmetic/test_timedelta64.py @@ -4,7 +4,11 @@ datetime, timedelta, ) +from operator import add, attrgetter +from typing import Callable, Iterable, Union +from hypothesis import given +import hypothesis.strategies as st import numpy as np import pytest @@ -15,6 +19,7 @@ import pandas as pd from pandas import ( + array, DataFrame, DatetimeIndex, NaT, @@ -25,6 +30,7 @@ offsets, timedelta_range, ) +from pandas.core.arrays import TimedeltaArray import pandas._testing as tm from pandas.core.api import ( Float64Index, @@ -38,6 +44,14 @@ ) +ScalarOrBoxType = Union[Timedelta, TimedeltaArray, TimedeltaIndex, Series, DataFrame] + + +positive_tds = st.integers(min_value=1, max_value=pd.Timedelta.max.value).map( + pd.Timedelta +) + + def assert_dtype(obj, expected_dtype): """ Helper to check the dtype for a Series, Index, or single-column DataFrame. @@ -275,6 +289,81 @@ def test_comparisons_coverage(self): # Timedelta64[ns] dtype Arithmetic Operations +@pytest.fixture( + name="scalar_or_box_factory", + params=[Timedelta, TimedeltaArray, TimedeltaIndex, Series, DataFrame], + ids=attrgetter("__name__"), + scope="module", +) +def fixture_scalar_or_box_factory( + request: pytest.FixtureRequest, +) -> Callable[[Timedelta], ScalarOrBoxType]: + type_ = request.param + + def factory(value): + if type_ is Timedelta: + return value + elif type_ is DataFrame: + return Series(value).to_frame() + else: + return type_(pd.array([value])) + + return factory + + +@pytest.fixture( + name="commute", + params=[True, False], + scope="module", + ids=lambda v: "commuted" if v else "", +) +def fixture_commute(request: pytest.FixtureRequest) -> Callable[[Iterable], tuple]: + def f(*args): + return tuple(reversed(args)) if request.param else args + + return f + + +@given(increment_value=positive_tds) +def test_addition_raises_expected_error_if_result_would_overflow( + increment_value: Timedelta, + scalar_or_box_factory: Callable[[Timedelta], ScalarOrBoxType], + commute: Callable[[Iterable], tuple], +): + initial_value = scalar_or_box_factory(pd.Timedelta.max) + to_add = scalar_or_box_factory(increment_value) + values = commute(initial_value, to_add) + + if isinstance(initial_value, pd.Timedelta): + msg = "Python int too large to convert to C long" + else: + msg = "Overflow in int64 addition" + + with pytest.raises(OverflowError, match=msg): + add(*values) + + +@given(increment_value=positive_tds) +def test_timedelta_additon_raises_expected_error_if_result_would_overflow( + increment_value: Timedelta, + scalar_or_box_factory: Callable[[Timedelta], ScalarOrBoxType], + commute: Callable[[Iterable], tuple], +): + initial_value = pd.Timestamp.max + to_add = scalar_or_box_factory(increment_value) + values = commute(initial_value, to_add) + + if isinstance(to_add, pd.Timedelta): + ex = OutOfBoundsDatetime + msg = "Out of bounds nanosecond timestamp" + else: + ex = OverflowError + msg = "Overflow in int64 addition" + + with pytest.raises(ex, match=msg): + add(*values) + + class TestTimedelta64ArithmeticUnsorted: # Tests moved from type-specific test files but not # yet sorted/parametrized/de-duplicated @@ -687,30 +776,6 @@ def test_tdarr_add_timestamp_nat_masking(self, box_with_array, str_ts): assert res[1] is NaT def test_tdi_add_overflow(self): - # See GH#14068 - # preliminary test scalar analogue of vectorized tests below - # TODO: Make raised error message more informative and test - with pytest.raises(OutOfBoundsDatetime, match="10155196800000000000"): - pd.to_timedelta(106580, "D") + Timestamp("2000") - with pytest.raises(OutOfBoundsDatetime, match="10155196800000000000"): - Timestamp("2000") + pd.to_timedelta(106580, "D") - - _NaT = NaT.value + 1 - msg = "Overflow in int64 addition" - with pytest.raises(OverflowError, match=msg): - pd.to_timedelta([106580], "D") + Timestamp("2000") - with pytest.raises(OverflowError, match=msg): - Timestamp("2000") + pd.to_timedelta([106580], "D") - with pytest.raises(OverflowError, match=msg): - pd.to_timedelta([_NaT]) - Timedelta("1 days") - with pytest.raises(OverflowError, match=msg): - pd.to_timedelta(["5 days", _NaT]) - Timedelta("1 days") - with pytest.raises(OverflowError, match=msg): - ( - pd.to_timedelta([_NaT, "5 days", "1 hours"]) - - pd.to_timedelta(["7 seconds", _NaT, "4 hours"]) - ) - # These should not overflow! exp = TimedeltaIndex([NaT]) result = pd.to_timedelta([NaT]) - Timedelta("1 days") From b8e4a5158cc6f55d6d1e147145169c75e2d5441c Mon Sep 17 00:00:00 2001 From: Patrick McKenna Date: Sun, 24 Apr 2022 20:07:32 -0700 Subject: [PATCH 06/21] add scalar multiplication tests --- pandas/tests/arithmetic/test_timedelta64.py | 76 +++++++++++++++++---- 1 file changed, 62 insertions(+), 14 deletions(-) diff --git a/pandas/tests/arithmetic/test_timedelta64.py b/pandas/tests/arithmetic/test_timedelta64.py index 62bad9a58ed9e..9fd928c3c359c 100644 --- a/pandas/tests/arithmetic/test_timedelta64.py +++ b/pandas/tests/arithmetic/test_timedelta64.py @@ -4,7 +4,7 @@ datetime, timedelta, ) -from operator import add, attrgetter +from operator import add, attrgetter, mul from typing import Callable, Iterable, Union from hypothesis import given @@ -50,6 +50,7 @@ positive_tds = st.integers(min_value=1, max_value=pd.Timedelta.max.value).map( pd.Timedelta ) +xfail_no_overflow_check = pytest.mark.xfail(reason="No overflow check") def assert_dtype(obj, expected_dtype): @@ -324,17 +325,17 @@ def f(*args): return f -@given(increment_value=positive_tds) +@given(to_add=positive_tds) def test_addition_raises_expected_error_if_result_would_overflow( - increment_value: Timedelta, + to_add: Timedelta, scalar_or_box_factory: Callable[[Timedelta], ScalarOrBoxType], commute: Callable[[Iterable], tuple], ): - initial_value = scalar_or_box_factory(pd.Timedelta.max) - to_add = scalar_or_box_factory(increment_value) - values = commute(initial_value, to_add) + left = scalar_or_box_factory(pd.Timedelta.max) + right = scalar_or_box_factory(to_add) + values = commute(left, right) - if isinstance(initial_value, pd.Timedelta): + if pd.api.types.is_scalar(left): msg = "Python int too large to convert to C long" else: msg = "Overflow in int64 addition" @@ -343,17 +344,34 @@ def test_addition_raises_expected_error_if_result_would_overflow( add(*values) -@given(increment_value=positive_tds) -def test_timedelta_additon_raises_expected_error_if_result_would_overflow( - increment_value: Timedelta, +@xfail_no_overflow_check +@given(to_sub=positive_tds) +def test_subtraction_raises_expected_error_if_result_would_overflow( + to_sub: Timedelta, scalar_or_box_factory: Callable[[Timedelta], ScalarOrBoxType], commute: Callable[[Iterable], tuple], ): - initial_value = pd.Timestamp.max - to_add = scalar_or_box_factory(increment_value) - values = commute(initial_value, to_add) + left = scalar_or_box_factory(pd.Timedelta.min) + right = scalar_or_box_factory(to_sub) + values = commute(left, right) - if isinstance(to_add, pd.Timedelta): + with pytest.raises(OverflowError): + left = -1 * abs(values[0]) + right = abs(values[1]) + left - right + + +@given(to_add=positive_tds) +def test_timestamp_additon_raises_expected_error_if_result_would_overflow( + to_add: Timedelta, + scalar_or_box_factory: Callable[[Timedelta], ScalarOrBoxType], + commute: Callable[[Iterable], tuple], +): + left = pd.Timestamp.max + right = scalar_or_box_factory(to_add) + values = commute(left, right) + + if pd.api.types.is_scalar(right): ex = OutOfBoundsDatetime msg = "Out of bounds nanosecond timestamp" else: @@ -364,6 +382,36 @@ def test_timedelta_additon_raises_expected_error_if_result_would_overflow( add(*values) +@given(value=st.floats().filter(lambda f: abs(f) > 1)) +@pytest.mark.parametrize( + argnames="initial_value", + argvalues=[ + pd.Timedelta.max, + pytest.param(pd.array([pd.Timedelta.max]), marks=xfail_no_overflow_check), + pytest.param( + pd.TimedeltaIndex([pd.Timedelta.max]), + marks=xfail_no_overflow_check, + ), + pytest.param(pd.Series(pd.Timedelta.max), marks=xfail_no_overflow_check), + pytest.param( + pd.Series(pd.Timedelta.max).to_frame(), + marks=xfail_no_overflow_check, + ), + ], + ids=attrgetter("__class__.__name__"), +) +def test_scalar_multiplication_raises_expected_error_if_result_would_overflow( + value: float, + initial_value: ScalarOrBoxType, + commute: Callable[[Iterable], tuple], +): + values = commute(initial_value, value) + + msg = "Python int too large to convert to C long|cannot convert float infinity to integer" + with pytest.raises(OverflowError, match=msg): + mul(*values) + + class TestTimedelta64ArithmeticUnsorted: # Tests moved from type-specific test files but not # yet sorted/parametrized/de-duplicated From 4c72f1ebe70fbdfcf30940daac2cbeed3ba0d8bd Mon Sep 17 00:00:00 2001 From: Patrick McKenna Date: Mon, 25 Apr 2022 13:06:41 -0700 Subject: [PATCH 07/21] add tests for scalar multiplication --- pandas/tests/arithmetic/test_timedelta64.py | 265 +++++++++++++------- 1 file changed, 170 insertions(+), 95 deletions(-) diff --git a/pandas/tests/arithmetic/test_timedelta64.py b/pandas/tests/arithmetic/test_timedelta64.py index 9fd928c3c359c..835aa1c7fb031 100644 --- a/pandas/tests/arithmetic/test_timedelta64.py +++ b/pandas/tests/arithmetic/test_timedelta64.py @@ -4,8 +4,17 @@ datetime, timedelta, ) -from operator import add, attrgetter, mul -from typing import Callable, Iterable, Union +from itertools import ( + chain, + combinations_with_replacement, + product, +) +from operator import attrgetter +from typing import ( + NamedTuple, + Type, + Union, +) from hypothesis import given import hypothesis.strategies as st @@ -19,7 +28,6 @@ import pandas as pd from pandas import ( - array, DataFrame, DatetimeIndex, NaT, @@ -30,29 +38,94 @@ offsets, timedelta_range, ) -from pandas.core.arrays import TimedeltaArray import pandas._testing as tm from pandas.core.api import ( Float64Index, Int64Index, UInt64Index, ) +from pandas.core.arrays import ( + DatetimeArray, + TimedeltaArray, +) from pandas.tests.arithmetic.common import ( assert_invalid_addsub_type, assert_invalid_comparison, get_upcast_box, ) +timedelta_types = (Timedelta, TimedeltaArray, TimedeltaIndex, Series, DataFrame) +timestamp_types = (Timestamp, DatetimeArray, DatetimeIndex, Series, DataFrame) +containers = slice(1, None) +get_item_names = lambda t: "-".join(map(attrgetter("__name__"), t)) -ScalarOrBoxType = Union[Timedelta, TimedeltaArray, TimedeltaIndex, Series, DataFrame] +class BinaryOpTypes(NamedTuple): + """ + The expected operand and result types for a binary operation. + """ + + left: Type + right: Type + result: Type + + def __str__(self) -> str: + return get_item_names(self) + + def __repr__(self) -> str: + return f"BinaryOpTypes({self})" + + +positive_tds = st.integers(min_value=1, max_value=Timedelta.max.value).map(Timedelta) -positive_tds = st.integers(min_value=1, max_value=pd.Timedelta.max.value).map( - pd.Timedelta -) xfail_no_overflow_check = pytest.mark.xfail(reason="No overflow check") +@pytest.fixture( + name="add_sub_types", + scope="module", + params=tuple(combinations_with_replacement(timedelta_types, 2)), + ids=get_item_names, +) +def fixture_add_sub_types(request: pytest.FixtureRequest) -> BinaryOpTypes: + """ + Expected types when adding, subtracting Timedeltas. + """ + return_type = max(request.param, key=lambda t: timedelta_types.index(t)) + return BinaryOpTypes(*request.param, return_type) + + +@pytest.fixture( + name="ts_add_sub_types", + scope="module", + params=tuple(product(timedelta_types, timestamp_types)), + ids=get_item_names, +) +def fixture_ts_add_sub_types(request: pytest.FixtureRequest) -> BinaryOpTypes: + """ + Expected types when adding, subtracting Timedeltas and Timestamps. + """ + type_hierarchy = { + name: i + for i, name in chain(enumerate(timedelta_types), enumerate(timestamp_types)) + } + return_type = timestamp_types[max(type_hierarchy[t] for t in request.param)] + + return BinaryOpTypes(*request.param, return_type) + + +def wrap_value(value: Union[Timestamp, Timedelta], type_): + """ + Return value wrapped in a container of given type_, or as-is if type_ is a scalar. + """ + if type_ in (Timedelta, Timestamp): + return value + elif type_ is DataFrame: + return Series(value).to_frame() + else: + return type_(pd.array([value])) + + def assert_dtype(obj, expected_dtype): """ Helper to check the dtype for a Series, Index, or single-column DataFrame. @@ -290,88 +363,80 @@ def test_comparisons_coverage(self): # Timedelta64[ns] dtype Arithmetic Operations -@pytest.fixture( - name="scalar_or_box_factory", - params=[Timedelta, TimedeltaArray, TimedeltaIndex, Series, DataFrame], - ids=attrgetter("__name__"), - scope="module", -) -def fixture_scalar_or_box_factory( - request: pytest.FixtureRequest, -) -> Callable[[Timedelta], ScalarOrBoxType]: - type_ = request.param - - def factory(value): - if type_ is Timedelta: - return value - elif type_ is DataFrame: - return Series(value).to_frame() - else: - return type_(pd.array([value])) - - return factory +@given(positive_td=positive_tds) +def test_add_raises_expected_error_if_result_would_overflow( + add_sub_types: BinaryOpTypes, + positive_td: Timedelta, +): + left = wrap_value(Timedelta.max, add_sub_types.left) + right = wrap_value(positive_td, add_sub_types.right) + if add_sub_types.result is Timedelta: + msg = "|".join( + [ + "int too big to convert", + "Python int too large to convert to C long", + ] + ) + else: + msg = "Overflow in int64 addition" -@pytest.fixture( - name="commute", - params=[True, False], - scope="module", - ids=lambda v: "commuted" if v else "", -) -def fixture_commute(request: pytest.FixtureRequest) -> Callable[[Iterable], tuple]: - def f(*args): - return tuple(reversed(args)) if request.param else args + with pytest.raises(OverflowError, match=msg): + left + right - return f + with pytest.raises(OverflowError, match=msg): + right + left -@given(to_add=positive_tds) -def test_addition_raises_expected_error_if_result_would_overflow( - to_add: Timedelta, - scalar_or_box_factory: Callable[[Timedelta], ScalarOrBoxType], - commute: Callable[[Iterable], tuple], +@xfail_no_overflow_check +@given(positive_td=positive_tds) +def test_sub_raises_expected_error_if_result_would_overflow( + add_sub_types: BinaryOpTypes, + positive_td: Timedelta, ): - left = scalar_or_box_factory(pd.Timedelta.max) - right = scalar_or_box_factory(to_add) - values = commute(left, right) + left = wrap_value(Timedelta.min, add_sub_types.left) + right = wrap_value(positive_td, add_sub_types.right) - if pd.api.types.is_scalar(left): - msg = "Python int too large to convert to C long" - else: - msg = "Overflow in int64 addition" + msg = "Overflow in int64 addition" + with pytest.raises(OverflowError, match=msg): + left - right with pytest.raises(OverflowError, match=msg): - add(*values) + (-1 * right) - abs(left) -@xfail_no_overflow_check -@given(to_sub=positive_tds) -def test_subtraction_raises_expected_error_if_result_would_overflow( - to_sub: Timedelta, - scalar_or_box_factory: Callable[[Timedelta], ScalarOrBoxType], - commute: Callable[[Iterable], tuple], +@given(td_value=positive_tds) +def test_add_timestamp_raises_expected_error_if_result_would_overflow( + ts_add_sub_types: BinaryOpTypes, + td_value: Timedelta, ): - left = scalar_or_box_factory(pd.Timedelta.min) - right = scalar_or_box_factory(to_sub) - values = commute(left, right) + left = wrap_value(td_value, ts_add_sub_types.left) + right = wrap_value(Timestamp.max, ts_add_sub_types.right) - with pytest.raises(OverflowError): - left = -1 * abs(values[0]) - right = abs(values[1]) - left - right + if ts_add_sub_types.result is Timestamp: + ex = OutOfBoundsDatetime + msg = "Out of bounds nanosecond timestamp" + else: + ex = OverflowError + msg = "Overflow in int64 addition" + + with pytest.raises(ex, match=msg): + left + right + + with pytest.raises(ex, match=msg): + right + left -@given(to_add=positive_tds) -def test_timestamp_additon_raises_expected_error_if_result_would_overflow( - to_add: Timedelta, - scalar_or_box_factory: Callable[[Timedelta], ScalarOrBoxType], - commute: Callable[[Iterable], tuple], +@xfail_no_overflow_check +@given(td_value=positive_tds) +def test_sub_timestamp_raises_expected_error_if_result_would_overflow( + ts_add_sub_types: BinaryOpTypes, + td_value: Timedelta, ): - left = pd.Timestamp.max - right = scalar_or_box_factory(to_add) - values = commute(left, right) + right = wrap_value(td_value, ts_add_sub_types[0]) + left = wrap_value(Timestamp.min, ts_add_sub_types[1]) - if pd.api.types.is_scalar(right): + if ts_add_sub_types.result is Timestamp: ex = OutOfBoundsDatetime msg = "Out of bounds nanosecond timestamp" else: @@ -379,37 +444,47 @@ def test_timestamp_additon_raises_expected_error_if_result_would_overflow( msg = "Overflow in int64 addition" with pytest.raises(ex, match=msg): - add(*values) + left - right +@given(value=st.floats().filter(lambda f: abs(f) > 1)) +def test_scalar_multiplication_raises_expected_error_if_result_would_overflow( + value: float, +): + td = Timedelta.max + + msg = "|".join( + [ + "cannot convert float infinity to integer", + "Python int too large to convert to C long", + ] + ) + with pytest.raises(OverflowError, match=msg): + td * value + + with pytest.raises(OverflowError, match=msg): + value * td + + +@xfail_no_overflow_check @given(value=st.floats().filter(lambda f: abs(f) > 1)) @pytest.mark.parametrize( - argnames="initial_value", - argvalues=[ - pd.Timedelta.max, - pytest.param(pd.array([pd.Timedelta.max]), marks=xfail_no_overflow_check), - pytest.param( - pd.TimedeltaIndex([pd.Timedelta.max]), - marks=xfail_no_overflow_check, - ), - pytest.param(pd.Series(pd.Timedelta.max), marks=xfail_no_overflow_check), - pytest.param( - pd.Series(pd.Timedelta.max).to_frame(), - marks=xfail_no_overflow_check, - ), - ], - ids=attrgetter("__class__.__name__"), + argnames="td_type", + argvalues=timedelta_types[containers], + ids=attrgetter("__name__"), ) -def test_scalar_multiplication_raises_expected_error_if_result_would_overflow( +def test_container_scalar_multiplication_raises_expected_error_if_result_would_overflow( value: float, - initial_value: ScalarOrBoxType, - commute: Callable[[Iterable], tuple], + td_type: Type, ): - values = commute(initial_value, value) + td = wrap_value(Timedelta.max, td_type) + + msg = "Overflow in int64 addition" + with pytest.raises(OverflowError, match=msg): + td * value - msg = "Python int too large to convert to C long|cannot convert float infinity to integer" with pytest.raises(OverflowError, match=msg): - mul(*values) + value * td class TestTimedelta64ArithmeticUnsorted: From aeef81cb16d378e555932ac98d72a74836d7ee14 Mon Sep 17 00:00:00 2001 From: Patrick McKenna Date: Mon, 25 Apr 2022 14:09:54 -0700 Subject: [PATCH 08/21] mypy, win38 fixes --- pandas/tests/arithmetic/test_timedelta64.py | 27 ++++++++------------- 1 file changed, 10 insertions(+), 17 deletions(-) diff --git a/pandas/tests/arithmetic/test_timedelta64.py b/pandas/tests/arithmetic/test_timedelta64.py index 835aa1c7fb031..218b4a1112237 100644 --- a/pandas/tests/arithmetic/test_timedelta64.py +++ b/pandas/tests/arithmetic/test_timedelta64.py @@ -87,12 +87,12 @@ def __repr__(self) -> str: params=tuple(combinations_with_replacement(timedelta_types, 2)), ids=get_item_names, ) -def fixture_add_sub_types(request: pytest.FixtureRequest) -> BinaryOpTypes: +def fixture_add_sub_types(request) -> BinaryOpTypes: """ Expected types when adding, subtracting Timedeltas. """ return_type = max(request.param, key=lambda t: timedelta_types.index(t)) - return BinaryOpTypes(*request.param, return_type) + return BinaryOpTypes(request.param[0], request.param[1], return_type) @pytest.fixture( @@ -101,7 +101,7 @@ def fixture_add_sub_types(request: pytest.FixtureRequest) -> BinaryOpTypes: params=tuple(product(timedelta_types, timestamp_types)), ids=get_item_names, ) -def fixture_ts_add_sub_types(request: pytest.FixtureRequest) -> BinaryOpTypes: +def fixture_ts_add_sub_types(request) -> BinaryOpTypes: """ Expected types when adding, subtracting Timedeltas and Timestamps. """ @@ -111,7 +111,7 @@ def fixture_ts_add_sub_types(request: pytest.FixtureRequest) -> BinaryOpTypes: } return_type = timestamp_types[max(type_hierarchy[t] for t in request.param)] - return BinaryOpTypes(*request.param, return_type) + return BinaryOpTypes(request.param[0], request.param[1], return_type) def wrap_value(value: Union[Timestamp, Timedelta], type_): @@ -413,12 +413,8 @@ def test_add_timestamp_raises_expected_error_if_result_would_overflow( left = wrap_value(td_value, ts_add_sub_types.left) right = wrap_value(Timestamp.max, ts_add_sub_types.right) - if ts_add_sub_types.result is Timestamp: - ex = OutOfBoundsDatetime - msg = "Out of bounds nanosecond timestamp" - else: - ex = OverflowError - msg = "Overflow in int64 addition" + ex = (OutOfBoundsDatetime, OverflowError) + msg = "|".join(["Out of bounds nanosecond timestamp", "Overflow in int64 addition"]) with pytest.raises(ex, match=msg): left + right @@ -436,12 +432,8 @@ def test_sub_timestamp_raises_expected_error_if_result_would_overflow( right = wrap_value(td_value, ts_add_sub_types[0]) left = wrap_value(Timestamp.min, ts_add_sub_types[1]) - if ts_add_sub_types.result is Timestamp: - ex = OutOfBoundsDatetime - msg = "Out of bounds nanosecond timestamp" - else: - ex = OverflowError - msg = "Overflow in int64 addition" + ex = (OutOfBoundsDatetime, OverflowError) + msg = "|".join(["Out of bounds nanosecond timestamp", "Overflow in int64 addition"]) with pytest.raises(ex, match=msg): left - right @@ -457,6 +449,7 @@ def test_scalar_multiplication_raises_expected_error_if_result_would_overflow( [ "cannot convert float infinity to integer", "Python int too large to convert to C long", + "int too big to convert", ] ) with pytest.raises(OverflowError, match=msg): @@ -470,7 +463,7 @@ def test_scalar_multiplication_raises_expected_error_if_result_would_overflow( @given(value=st.floats().filter(lambda f: abs(f) > 1)) @pytest.mark.parametrize( argnames="td_type", - argvalues=timedelta_types[containers], + argvalues=timedelta_types[containers], # type: ignore[arg-type] ids=attrgetter("__name__"), ) def test_container_scalar_multiplication_raises_expected_error_if_result_would_overflow( From 438339d204e3cb222c93687c2804313db44b0c03 Mon Sep 17 00:00:00 2001 From: Patrick McKenna Date: Mon, 25 Apr 2022 15:39:31 -0700 Subject: [PATCH 09/21] use box_expected where possible --- pandas/tests/arithmetic/test_timedelta64.py | 15 ++++++++++----- 1 file changed, 10 insertions(+), 5 deletions(-) diff --git a/pandas/tests/arithmetic/test_timedelta64.py b/pandas/tests/arithmetic/test_timedelta64.py index 218b4a1112237..d264a1c756bcc 100644 --- a/pandas/tests/arithmetic/test_timedelta64.py +++ b/pandas/tests/arithmetic/test_timedelta64.py @@ -118,12 +118,17 @@ def wrap_value(value: Union[Timestamp, Timedelta], type_): """ Return value wrapped in a container of given type_, or as-is if type_ is a scalar. """ - if type_ in (Timedelta, Timestamp): - return value - elif type_ is DataFrame: - return Series(value).to_frame() + if issubclass(type_, (Timedelta, Timestamp)): + return type_(value) + + if issubclass(type_, pd.core.arrays.ExtensionArray): + box_cls = pd.array + elif issubclass(type_, pd.Index): + box_cls = pd.Index else: - return type_(pd.array([value])) + box_cls = type_ + + return type_(tm.box_expected([value], box_cls)) def assert_dtype(obj, expected_dtype): From bd62e0844af1604089c7bd7e7a4bb06f0ddaf5af Mon Sep 17 00:00:00 2001 From: Patrick McKenna Date: Tue, 26 Apr 2022 11:23:43 -0700 Subject: [PATCH 10/21] consolidate all new, some old td64 overflow tests in new module Some td64 overflow tests remain in other modules: - tests/tslibs/test_conversion.py::test_ensure_timedelta64ns_overflows() - tests/tslibs/test_timedeltas.py::test_huge_nanoseconds_overflow() - tests/scalar/timedelta/test_timedelta.py::test_mul_preserves_reso() - tests/scalar/timedelta/test_constructors.py::test_construct_from_td64_with_unit(),test_overflow_on_construction() Still TBD whether these should remain there or also be migrated. See: github.com/pandas-dev/pandas/pull/46854#discussion_r858131625 --- pandas/tests/arithmetic/test_timedelta64.py | 256 +--------------- pandas/tests/series/test_reductions.py | 39 --- pandas/tests/test_timedelta64_overflow.py | 322 ++++++++++++++++++++ 3 files changed, 323 insertions(+), 294 deletions(-) create mode 100644 pandas/tests/test_timedelta64_overflow.py diff --git a/pandas/tests/arithmetic/test_timedelta64.py b/pandas/tests/arithmetic/test_timedelta64.py index d264a1c756bcc..b1748e7adee77 100644 --- a/pandas/tests/arithmetic/test_timedelta64.py +++ b/pandas/tests/arithmetic/test_timedelta64.py @@ -4,27 +4,11 @@ datetime, timedelta, ) -from itertools import ( - chain, - combinations_with_replacement, - product, -) -from operator import attrgetter -from typing import ( - NamedTuple, - Type, - Union, -) -from hypothesis import given -import hypothesis.strategies as st import numpy as np import pytest -from pandas.errors import ( - OutOfBoundsDatetime, - PerformanceWarning, -) +from pandas.errors import PerformanceWarning import pandas as pd from pandas import ( @@ -44,92 +28,12 @@ Int64Index, UInt64Index, ) -from pandas.core.arrays import ( - DatetimeArray, - TimedeltaArray, -) from pandas.tests.arithmetic.common import ( assert_invalid_addsub_type, assert_invalid_comparison, get_upcast_box, ) -timedelta_types = (Timedelta, TimedeltaArray, TimedeltaIndex, Series, DataFrame) -timestamp_types = (Timestamp, DatetimeArray, DatetimeIndex, Series, DataFrame) -containers = slice(1, None) -get_item_names = lambda t: "-".join(map(attrgetter("__name__"), t)) - - -class BinaryOpTypes(NamedTuple): - """ - The expected operand and result types for a binary operation. - """ - - left: Type - right: Type - result: Type - - def __str__(self) -> str: - return get_item_names(self) - - def __repr__(self) -> str: - return f"BinaryOpTypes({self})" - - -positive_tds = st.integers(min_value=1, max_value=Timedelta.max.value).map(Timedelta) - -xfail_no_overflow_check = pytest.mark.xfail(reason="No overflow check") - - -@pytest.fixture( - name="add_sub_types", - scope="module", - params=tuple(combinations_with_replacement(timedelta_types, 2)), - ids=get_item_names, -) -def fixture_add_sub_types(request) -> BinaryOpTypes: - """ - Expected types when adding, subtracting Timedeltas. - """ - return_type = max(request.param, key=lambda t: timedelta_types.index(t)) - return BinaryOpTypes(request.param[0], request.param[1], return_type) - - -@pytest.fixture( - name="ts_add_sub_types", - scope="module", - params=tuple(product(timedelta_types, timestamp_types)), - ids=get_item_names, -) -def fixture_ts_add_sub_types(request) -> BinaryOpTypes: - """ - Expected types when adding, subtracting Timedeltas and Timestamps. - """ - type_hierarchy = { - name: i - for i, name in chain(enumerate(timedelta_types), enumerate(timestamp_types)) - } - return_type = timestamp_types[max(type_hierarchy[t] for t in request.param)] - - return BinaryOpTypes(request.param[0], request.param[1], return_type) - - -def wrap_value(value: Union[Timestamp, Timedelta], type_): - """ - Return value wrapped in a container of given type_, or as-is if type_ is a scalar. - """ - if issubclass(type_, (Timedelta, Timestamp)): - return type_(value) - - if issubclass(type_, pd.core.arrays.ExtensionArray): - box_cls = pd.array - elif issubclass(type_, pd.Index): - box_cls = pd.Index - else: - box_cls = type_ - - return type_(tm.box_expected([value], box_cls)) - def assert_dtype(obj, expected_dtype): """ @@ -368,123 +272,6 @@ def test_comparisons_coverage(self): # Timedelta64[ns] dtype Arithmetic Operations -@given(positive_td=positive_tds) -def test_add_raises_expected_error_if_result_would_overflow( - add_sub_types: BinaryOpTypes, - positive_td: Timedelta, -): - left = wrap_value(Timedelta.max, add_sub_types.left) - right = wrap_value(positive_td, add_sub_types.right) - - if add_sub_types.result is Timedelta: - msg = "|".join( - [ - "int too big to convert", - "Python int too large to convert to C long", - ] - ) - else: - msg = "Overflow in int64 addition" - - with pytest.raises(OverflowError, match=msg): - left + right - - with pytest.raises(OverflowError, match=msg): - right + left - - -@xfail_no_overflow_check -@given(positive_td=positive_tds) -def test_sub_raises_expected_error_if_result_would_overflow( - add_sub_types: BinaryOpTypes, - positive_td: Timedelta, -): - left = wrap_value(Timedelta.min, add_sub_types.left) - right = wrap_value(positive_td, add_sub_types.right) - - msg = "Overflow in int64 addition" - with pytest.raises(OverflowError, match=msg): - left - right - - with pytest.raises(OverflowError, match=msg): - (-1 * right) - abs(left) - - -@given(td_value=positive_tds) -def test_add_timestamp_raises_expected_error_if_result_would_overflow( - ts_add_sub_types: BinaryOpTypes, - td_value: Timedelta, -): - left = wrap_value(td_value, ts_add_sub_types.left) - right = wrap_value(Timestamp.max, ts_add_sub_types.right) - - ex = (OutOfBoundsDatetime, OverflowError) - msg = "|".join(["Out of bounds nanosecond timestamp", "Overflow in int64 addition"]) - - with pytest.raises(ex, match=msg): - left + right - - with pytest.raises(ex, match=msg): - right + left - - -@xfail_no_overflow_check -@given(td_value=positive_tds) -def test_sub_timestamp_raises_expected_error_if_result_would_overflow( - ts_add_sub_types: BinaryOpTypes, - td_value: Timedelta, -): - right = wrap_value(td_value, ts_add_sub_types[0]) - left = wrap_value(Timestamp.min, ts_add_sub_types[1]) - - ex = (OutOfBoundsDatetime, OverflowError) - msg = "|".join(["Out of bounds nanosecond timestamp", "Overflow in int64 addition"]) - - with pytest.raises(ex, match=msg): - left - right - - -@given(value=st.floats().filter(lambda f: abs(f) > 1)) -def test_scalar_multiplication_raises_expected_error_if_result_would_overflow( - value: float, -): - td = Timedelta.max - - msg = "|".join( - [ - "cannot convert float infinity to integer", - "Python int too large to convert to C long", - "int too big to convert", - ] - ) - with pytest.raises(OverflowError, match=msg): - td * value - - with pytest.raises(OverflowError, match=msg): - value * td - - -@xfail_no_overflow_check -@given(value=st.floats().filter(lambda f: abs(f) > 1)) -@pytest.mark.parametrize( - argnames="td_type", - argvalues=timedelta_types[containers], # type: ignore[arg-type] - ids=attrgetter("__name__"), -) -def test_container_scalar_multiplication_raises_expected_error_if_result_would_overflow( - value: float, - td_type: Type, -): - td = wrap_value(Timedelta.max, td_type) - - msg = "Overflow in int64 addition" - with pytest.raises(OverflowError, match=msg): - td * value - - with pytest.raises(OverflowError, match=msg): - value * td - - class TestTimedelta64ArithmeticUnsorted: # Tests moved from type-specific test files but not # yet sorted/parametrized/de-duplicated @@ -872,47 +659,6 @@ def test_tdi_ops_attributes(self): assert result.freq is None -class TestAddSubNaTMasking: - # TODO: parametrize over boxes - - @pytest.mark.parametrize("str_ts", ["1950-01-01", "1980-01-01"]) - def test_tdarr_add_timestamp_nat_masking(self, box_with_array, str_ts): - # GH#17991 checking for overflow-masking with NaT - tdinat = pd.to_timedelta(["24658 days 11:15:00", "NaT"]) - tdobj = tm.box_expected(tdinat, box_with_array) - - ts = Timestamp(str_ts) - ts_variants = [ - ts, - ts.to_pydatetime(), - ts.to_datetime64().astype("datetime64[ns]"), - ts.to_datetime64().astype("datetime64[D]"), - ] - - for variant in ts_variants: - res = tdobj + variant - if box_with_array is DataFrame: - assert res.iloc[1, 1] is NaT - else: - assert res[1] is NaT - - def test_tdi_add_overflow(self): - # These should not overflow! - exp = TimedeltaIndex([NaT]) - result = pd.to_timedelta([NaT]) - Timedelta("1 days") - tm.assert_index_equal(result, exp) - - exp = TimedeltaIndex(["4 days", NaT]) - result = pd.to_timedelta(["5 days", NaT]) - Timedelta("1 days") - tm.assert_index_equal(result, exp) - - exp = TimedeltaIndex([NaT, NaT, "5 hours"]) - result = pd.to_timedelta([NaT, "5 days", "1 hours"]) + pd.to_timedelta( - ["7 seconds", NaT, "4 hours"] - ) - tm.assert_index_equal(result, exp) - - class TestTimedeltaArraylikeAddSubOps: # Tests for timedelta64[ns] __add__, __sub__, __radd__, __rsub__ diff --git a/pandas/tests/series/test_reductions.py b/pandas/tests/series/test_reductions.py index f78057ccd1c83..1a24441d6a9df 100644 --- a/pandas/tests/series/test_reductions.py +++ b/pandas/tests/series/test_reductions.py @@ -81,45 +81,6 @@ def test_td64_summation_loses_ns_precision_if_float_conversion_rounds( assert result != value -@given( - st.integers( - min_value=pd.Timedelta.max.value - 2**9 + 1, - max_value=pd.Timedelta.max.value, - ).map(pd.Timedelta) -) -def test_td64_summation_raises_spurious_overflow_error_for_single_elem_series( - value: pd.Timedelta, -): - s = Series(value) - - msg = "int too big to convert|Python int too large to convert to C long" - with pytest.raises(OverflowError, match=msg): - s.sum() - - -@given(st.integers(min_value=1, max_value=2**10).map(pd.Timedelta)) -def test_td64_summation_raises_overflow_error_for_small_overflows(value: pd.Timedelta): - s = Series([pd.Timedelta.max, value]) - - msg = "int too big to convert|Python int too large to convert to C long" - with pytest.raises(OverflowError, match=msg): - s.sum() - - -@given( - st.integers( - min_value=2**10 + 1, - max_value=pd.Timedelta.max.value, - ).map(pd.Timedelta) -) -def test_td64_summation_raises_value_error_for_most_overflows(value: pd.Timedelta): - s = Series([pd.Timedelta.max, value]) - - msg = "overflow in timedelta operation" - with pytest.raises(ValueError, match=msg): - s.sum() - - def test_prod_numpy16_bug(): ser = Series([1.0, 1.0, 1.0], index=range(3)) result = ser.prod() diff --git a/pandas/tests/test_timedelta64_overflow.py b/pandas/tests/test_timedelta64_overflow.py new file mode 100644 index 0000000000000..7bcc389628f71 --- /dev/null +++ b/pandas/tests/test_timedelta64_overflow.py @@ -0,0 +1,322 @@ +""" +Check overflow behavior of operations on timedelta-valued +DataFrames/Series/Indexes/ExtensionArrays. +""" + +from itertools import ( + chain, + combinations_with_replacement, + product, +) +from operator import attrgetter +from typing import ( + NamedTuple, + Type, + Union, +) + +from hypothesis import given +import hypothesis.strategies as st +import pytest + +from pandas.errors import OutOfBoundsDatetime + +from pandas import ( + DataFrame, + DatetimeIndex, + Index, + NaT, + Series, + Timedelta, + TimedeltaIndex, + Timestamp, + array, + to_timedelta, +) +import pandas._testing as tm +from pandas.core.arrays import ( + DatetimeArray, + ExtensionArray, + TimedeltaArray, +) + +timedelta_types = (Timedelta, TimedeltaArray, TimedeltaIndex, Series, DataFrame) +timestamp_types = (Timestamp, DatetimeArray, DatetimeIndex, Series, DataFrame) +containers = slice(1, None) +get_item_names = lambda t: "-".join(map(attrgetter("__name__"), t)) + + +# TODO: consolidate remaining td64 overflow tests here? +# - tests/tslibs/test_conversion.py::test_ensure_timedelta64ns_overflows() +# - tests/tslibs/test_timedeltas.py::test_huge_nanoseconds_overflow() +# - tests/scalar/timedelta/test_timedelta.py::test_mul_preserves_reso() +# - tests/scalar/timedelta/test_constructors.py::test_construct_from_td64_with_unit(), +# test_overflow_on_construction() + + +class BinaryOpTypes(NamedTuple): + """ + The expected operand and result types for a binary operation. + """ + + left: Type + right: Type + result: Type + + def __str__(self) -> str: + return get_item_names(self) + + def __repr__(self) -> str: + return f"BinaryOpTypes({self})" + + +positive_tds = st.integers(min_value=1, max_value=Timedelta.max.value).map(Timedelta) + +xfail_no_overflow_check = pytest.mark.xfail(reason="No overflow check") + + +@given( + st.integers( + min_value=Timedelta.max.value - 2**9 + 1, + max_value=Timedelta.max.value, + ).map(Timedelta) +) +def test_td64_summation_raises_spurious_overflow_error_for_single_elem_series( + value: Timedelta, +): + s = Series(value) + + msg = "int too big to convert|Python int too large to convert to C long" + with pytest.raises(OverflowError, match=msg): + s.sum() + + +@given(st.integers(min_value=1, max_value=2**10).map(Timedelta)) +def test_td64_summation_raises_overflow_error_for_small_overflows(value: Timedelta): + s = Series([Timedelta.max, value]) + + msg = "int too big to convert|Python int too large to convert to C long" + with pytest.raises(OverflowError, match=msg): + s.sum() + + +@given( + st.integers( + min_value=2**10 + 1, + max_value=Timedelta.max.value, + ).map(Timedelta) +) +def test_td64_summation_raises_value_error_for_most_overflows(value: Timedelta): + s = Series([Timedelta.max, value]) + + msg = "overflow in timedelta operation" + with pytest.raises(ValueError, match=msg): + s.sum() + + +@pytest.fixture( + name="add_sub_types", + scope="module", + params=tuple(combinations_with_replacement(timedelta_types, 2)), + ids=get_item_names, +) +def fixture_add_sub_types(request) -> BinaryOpTypes: + """ + Expected types when adding, subtracting Timedeltas. + """ + return_type = max(request.param, key=lambda t: timedelta_types.index(t)) + return BinaryOpTypes(request.param[0], request.param[1], return_type) + + +@pytest.fixture( + name="ts_add_sub_types", + scope="module", + params=tuple(product(timedelta_types, timestamp_types)), + ids=get_item_names, +) +def fixture_ts_add_sub_types(request) -> BinaryOpTypes: + """ + Expected types when adding, subtracting Timedeltas and Timestamps. + """ + type_hierarchy = { + name: i + for i, name in chain(enumerate(timedelta_types), enumerate(timestamp_types)) + } + return_type = timestamp_types[max(type_hierarchy[t] for t in request.param)] + + return BinaryOpTypes(request.param[0], request.param[1], return_type) + + +def wrap_value(value: Union[Timestamp, Timedelta], type_): + """ + Return value wrapped in a container of given type_, or as-is if type_ is a scalar. + """ + if issubclass(type_, (Timedelta, Timestamp)): + return type_(value) + + if issubclass(type_, ExtensionArray): + box_cls = array + elif issubclass(type_, Index): + box_cls = Index + else: + box_cls = type_ + + return type_(tm.box_expected([value], box_cls)) + + +@given(positive_td=positive_tds) +def test_add_raises_expected_error_if_result_would_overflow( + add_sub_types: BinaryOpTypes, + positive_td: Timedelta, +): + left = wrap_value(Timedelta.max, add_sub_types.left) + right = wrap_value(positive_td, add_sub_types.right) + + if add_sub_types.result is Timedelta: + msg = "|".join( + [ + "int too big to convert", + "Python int too large to convert to C long", + ] + ) + else: + msg = "Overflow in int64 addition" + + with pytest.raises(OverflowError, match=msg): + left + right + + with pytest.raises(OverflowError, match=msg): + right + left + + +@xfail_no_overflow_check +@given(positive_td=positive_tds) +def test_sub_raises_expected_error_if_result_would_overflow( + add_sub_types: BinaryOpTypes, + positive_td: Timedelta, +): + left = wrap_value(Timedelta.min, add_sub_types.left) + right = wrap_value(positive_td, add_sub_types.right) + + msg = "Overflow in int64 addition" + with pytest.raises(OverflowError, match=msg): + left - right + + with pytest.raises(OverflowError, match=msg): + (-1 * right) - abs(left) + + +@given(td_value=positive_tds) +def test_add_timestamp_raises_expected_error_if_result_would_overflow( + ts_add_sub_types: BinaryOpTypes, + td_value: Timedelta, +): + left = wrap_value(td_value, ts_add_sub_types.left) + right = wrap_value(Timestamp.max, ts_add_sub_types.right) + + ex = (OutOfBoundsDatetime, OverflowError) + msg = "|".join(["Out of bounds nanosecond timestamp", "Overflow in int64 addition"]) + + with pytest.raises(ex, match=msg): + left + right + + with pytest.raises(ex, match=msg): + right + left + + +@xfail_no_overflow_check +@given(td_value=positive_tds) +def test_sub_timestamp_raises_expected_error_if_result_would_overflow( + ts_add_sub_types: BinaryOpTypes, + td_value: Timedelta, +): + right = wrap_value(td_value, ts_add_sub_types[0]) + left = wrap_value(Timestamp.min, ts_add_sub_types[1]) + + ex = (OutOfBoundsDatetime, OverflowError) + msg = "|".join(["Out of bounds nanosecond timestamp", "Overflow in int64 addition"]) + + with pytest.raises(ex, match=msg): + left - right + + +@given(value=st.floats().filter(lambda f: abs(f) > 1)) +def test_scalar_multiplication_raises_expected_error_if_result_would_overflow( + value: float, +): + td = Timedelta.max + + msg = "|".join( + [ + "cannot convert float infinity to integer", + "Python int too large to convert to C long", + "int too big to convert", + ] + ) + with pytest.raises(OverflowError, match=msg): + td * value + + with pytest.raises(OverflowError, match=msg): + value * td + + +@xfail_no_overflow_check +@given(value=st.floats().filter(lambda f: abs(f) > 1)) +@pytest.mark.parametrize( + argnames="td_type", + argvalues=timedelta_types[containers], # type: ignore[arg-type] + ids=attrgetter("__name__"), +) +def test_container_scalar_multiplication_raises_expected_error_if_result_would_overflow( + value: float, + td_type: Type, +): + td = wrap_value(Timedelta.max, td_type) + + msg = "Overflow in int64 addition" + with pytest.raises(OverflowError, match=msg): + td * value + + with pytest.raises(OverflowError, match=msg): + value * td + + +class TestAddSubNaTMasking: + # TODO: parametrize over boxes + + @pytest.mark.parametrize("str_ts", ["1950-01-01", "1980-01-01"]) + def test_tdarr_add_timestamp_nat_masking(self, box_with_array, str_ts): + # GH#17991 checking for overflow-masking with NaT + tdinat = to_timedelta(["24658 days 11:15:00", "NaT"]) + tdobj = tm.box_expected(tdinat, box_with_array) + + ts = Timestamp(str_ts) + ts_variants = [ + ts, + ts.to_pydatetime(), + ts.to_datetime64().astype("datetime64[ns]"), + ts.to_datetime64().astype("datetime64[D]"), + ] + + for variant in ts_variants: + res = tdobj + variant + if box_with_array is DataFrame: + assert res.iloc[1, 1] is NaT + else: + assert res[1] is NaT + + def test_tdi_add_overflow(self): + # These should not overflow! + exp = TimedeltaIndex([NaT]) + result = to_timedelta([NaT]) - Timedelta("1 days") + tm.assert_index_equal(result, exp) + + exp = TimedeltaIndex(["4 days", NaT]) + result = to_timedelta(["5 days", NaT]) - Timedelta("1 days") + tm.assert_index_equal(result, exp) + + exp = TimedeltaIndex([NaT, NaT, "5 hours"]) + result = to_timedelta([NaT, "5 days", "1 hours"]) + to_timedelta( + ["7 seconds", NaT, "4 hours"] + ) + tm.assert_index_equal(result, exp) From 1424dee1feeeadde72a35366d33b48890db1515f Mon Sep 17 00:00:00 2001 From: Patrick McKenna Date: Tue, 26 Apr 2022 15:24:45 -0700 Subject: [PATCH 11/21] remove hypothesis overuse --- pandas/tests/arithmetic/test_timedelta64.py | 41 ++ pandas/tests/test_timedelta64_overflow.py | 441 +++++++++----------- 2 files changed, 235 insertions(+), 247 deletions(-) diff --git a/pandas/tests/arithmetic/test_timedelta64.py b/pandas/tests/arithmetic/test_timedelta64.py index b1748e7adee77..1e095ff22b642 100644 --- a/pandas/tests/arithmetic/test_timedelta64.py +++ b/pandas/tests/arithmetic/test_timedelta64.py @@ -659,6 +659,47 @@ def test_tdi_ops_attributes(self): assert result.freq is None +class TestAddSubNaTMasking: + # TODO: parametrize over boxes + + @pytest.mark.parametrize("str_ts", ["1950-01-01", "1980-01-01"]) + def test_tdarr_add_timestamp_nat_masking(self, box_with_array, str_ts): + # GH#17991 checking for overflow-masking with NaT + tdinat = pd.to_timedelta(["24658 days 11:15:00", "NaT"]) + tdobj = tm.box_expected(tdinat, box_with_array) + + ts = Timestamp(str_ts) + ts_variants = [ + ts, + ts.to_pydatetime(), + ts.to_datetime64().astype("datetime64[ns]"), + ts.to_datetime64().astype("datetime64[D]"), + ] + + for variant in ts_variants: + res = tdobj + variant + if box_with_array is DataFrame: + assert res.iloc[1, 1] is NaT + else: + assert res[1] is NaT + + def test_tdi_add_overflow(self): + # These should not overflow! + exp = TimedeltaIndex([NaT]) + result = pd.to_timedelta([NaT]) - Timedelta("1 days") + tm.assert_index_equal(result, exp) + + exp = TimedeltaIndex(["4 days", NaT]) + result = pd.to_timedelta(["5 days", NaT]) - Timedelta("1 days") + tm.assert_index_equal(result, exp) + + exp = TimedeltaIndex([NaT, NaT, "5 hours"]) + result = pd.to_timedelta([NaT, "5 days", "1 hours"]) + pd.to_timedelta( + ["7 seconds", NaT, "4 hours"] + ) + tm.assert_index_equal(result, exp) + + class TestTimedeltaArraylikeAddSubOps: # Tests for timedelta64[ns] __add__, __sub__, __radd__, __rsub__ diff --git a/pandas/tests/test_timedelta64_overflow.py b/pandas/tests/test_timedelta64_overflow.py index 7bcc389628f71..7e5bf0f6acba9 100644 --- a/pandas/tests/test_timedelta64_overflow.py +++ b/pandas/tests/test_timedelta64_overflow.py @@ -1,24 +1,22 @@ """ Check overflow behavior of operations on timedelta-valued -DataFrames/Series/Indexes/ExtensionArrays. +ExtensionArrays/Indexes/Series/DataFrames. """ -from itertools import ( - chain, - combinations_with_replacement, - product, +from contextlib import ( + AbstractContextManager, + nullcontext, ) -from operator import attrgetter +from functools import partial from typing import ( - NamedTuple, + List, Type, Union, ) -from hypothesis import given -import hypothesis.strategies as st import pytest +from pandas._libs.lib import is_list_like from pandas.errors import OutOfBoundsDatetime from pandas import ( @@ -31,7 +29,6 @@ TimedeltaIndex, Timestamp, array, - to_timedelta, ) import pandas._testing as tm from pandas.core.arrays import ( @@ -40,114 +37,32 @@ TimedeltaArray, ) -timedelta_types = (Timedelta, TimedeltaArray, TimedeltaIndex, Series, DataFrame) -timestamp_types = (Timestamp, DatetimeArray, DatetimeIndex, Series, DataFrame) -containers = slice(1, None) -get_item_names = lambda t: "-".join(map(attrgetter("__name__"), t)) +td64_types = (Timedelta, TimedeltaArray, TimedeltaIndex, Series, DataFrame) +td64_box_types = td64_types[slice(1, None)] +td64_arraylike_types = td64_types[slice(1, 4)] +dt64_types = (Timestamp, DatetimeArray, DatetimeIndex, Series, DataFrame) +TD64_TYPE = Union[Timedelta, TimedeltaArray, TimedeltaIndex, Series, DataFrame] +TD64_BOX_TYPE = Union[TimedeltaArray, TimedeltaIndex, Series, DataFrame] +TD64_ARRAYLIKE_TYPE = Union[TimedeltaArray, TimedeltaIndex, Series] +DT64_TYPE = Union[Timestamp, DatetimeArray, DatetimeIndex, Series, DataFrame] -# TODO: consolidate remaining td64 overflow tests here? -# - tests/tslibs/test_conversion.py::test_ensure_timedelta64ns_overflows() -# - tests/tslibs/test_timedeltas.py::test_huge_nanoseconds_overflow() -# - tests/scalar/timedelta/test_timedelta.py::test_mul_preserves_reso() -# - tests/scalar/timedelta/test_constructors.py::test_construct_from_td64_with_unit(), -# test_overflow_on_construction() - -class BinaryOpTypes(NamedTuple): - """ - The expected operand and result types for a binary operation. - """ - - left: Type - right: Type - result: Type - - def __str__(self) -> str: - return get_item_names(self) - - def __repr__(self) -> str: - return f"BinaryOpTypes({self})" - - -positive_tds = st.integers(min_value=1, max_value=Timedelta.max.value).map(Timedelta) - -xfail_no_overflow_check = pytest.mark.xfail(reason="No overflow check") - - -@given( - st.integers( - min_value=Timedelta.max.value - 2**9 + 1, - max_value=Timedelta.max.value, - ).map(Timedelta) -) -def test_td64_summation_raises_spurious_overflow_error_for_single_elem_series( - value: Timedelta, -): - s = Series(value) - - msg = "int too big to convert|Python int too large to convert to C long" - with pytest.raises(OverflowError, match=msg): - s.sum() - - -@given(st.integers(min_value=1, max_value=2**10).map(Timedelta)) -def test_td64_summation_raises_overflow_error_for_small_overflows(value: Timedelta): - s = Series([Timedelta.max, value]) - - msg = "int too big to convert|Python int too large to convert to C long" - with pytest.raises(OverflowError, match=msg): - s.sum() - - -@given( - st.integers( - min_value=2**10 + 1, - max_value=Timedelta.max.value, - ).map(Timedelta) -) -def test_td64_summation_raises_value_error_for_most_overflows(value: Timedelta): - s = Series([Timedelta.max, value]) - - msg = "overflow in timedelta operation" - with pytest.raises(ValueError, match=msg): - s.sum() - - -@pytest.fixture( - name="add_sub_types", - scope="module", - params=tuple(combinations_with_replacement(timedelta_types, 2)), - ids=get_item_names, -) -def fixture_add_sub_types(request) -> BinaryOpTypes: - """ - Expected types when adding, subtracting Timedeltas. - """ - return_type = max(request.param, key=lambda t: timedelta_types.index(t)) - return BinaryOpTypes(request.param[0], request.param[1], return_type) - - -@pytest.fixture( - name="ts_add_sub_types", - scope="module", - params=tuple(product(timedelta_types, timestamp_types)), - ids=get_item_names, +TD64_VALUE_ERROR_MSG = "overflow in timedelta operation" +TD64_OVERFLOW_MSG = "|".join( + [ + "int too big to convert", + "Python int too large to convert to C long", + "Overflow in int64 addition", + ] ) -def fixture_ts_add_sub_types(request) -> BinaryOpTypes: - """ - Expected types when adding, subtracting Timedeltas and Timestamps. - """ - type_hierarchy = { - name: i - for i, name in chain(enumerate(timedelta_types), enumerate(timestamp_types)) - } - return_type = timestamp_types[max(type_hierarchy[t] for t in request.param)] - return BinaryOpTypes(request.param[0], request.param[1], return_type) +does_not_raise = nullcontext +raises_overflow_error = partial(pytest.raises, OverflowError, match=TD64_OVERFLOW_MSG) +raises_value_error = partial(pytest.raises, ValueError, match=TD64_VALUE_ERROR_MSG) -def wrap_value(value: Union[Timestamp, Timedelta], type_): +def wrap_value(value, type_): """ Return value wrapped in a container of given type_, or as-is if type_ is a scalar. """ @@ -161,162 +76,194 @@ def wrap_value(value: Union[Timestamp, Timedelta], type_): else: box_cls = type_ - return type_(tm.box_expected([value], box_cls)) + if not is_list_like(value): + value = [value] + return tm.box_expected(value, box_cls, transpose=False) -@given(positive_td=positive_tds) -def test_add_raises_expected_error_if_result_would_overflow( - add_sub_types: BinaryOpTypes, - positive_td: Timedelta, -): - left = wrap_value(Timedelta.max, add_sub_types.left) - right = wrap_value(positive_td, add_sub_types.right) +@pytest.fixture(name="td64_type", params=td64_types, scope="module") +def fixture_td64_type(request) -> Type[TD64_TYPE]: + return request.param - if add_sub_types.result is Timedelta: - msg = "|".join( - [ - "int too big to convert", - "Python int too large to convert to C long", - ] - ) - else: - msg = "Overflow in int64 addition" - with pytest.raises(OverflowError, match=msg): - left + right +@pytest.fixture(name="td64_arraylike_type", params=td64_arraylike_types, scope="module") +def fixture_td64_arraylike_type(request) -> Type[TD64_ARRAYLIKE_TYPE]: + return request.param - with pytest.raises(OverflowError, match=msg): - right + left +@pytest.fixture(name="td64_box_type", params=td64_box_types, scope="module") +def fixture_td64_box_type(request) -> Type[TD64_BOX_TYPE]: + return request.param -@xfail_no_overflow_check -@given(positive_td=positive_tds) -def test_sub_raises_expected_error_if_result_would_overflow( - add_sub_types: BinaryOpTypes, - positive_td: Timedelta, -): - left = wrap_value(Timedelta.min, add_sub_types.left) - right = wrap_value(positive_td, add_sub_types.right) - msg = "Overflow in int64 addition" - with pytest.raises(OverflowError, match=msg): - left - right +@pytest.fixture(name="dt64_type", params=dt64_types, scope="module") +def fixture_dt64_type(request) -> Type[DT64_TYPE]: + return request.param - with pytest.raises(OverflowError, match=msg): - (-1 * right) - abs(left) +@pytest.fixture(name="max_td64") +def fixture_max_td64(td64_box_type: Type[TD64_BOX_TYPE]) -> TD64_BOX_TYPE: + """ + A 1-elem ExtensionArray/Index/Series, or 2x1 DataFrame, w/ all elements set to + Timestamp.max. + """ + return wrap_value(Timedelta.max, td64_box_type) -@given(td_value=positive_tds) -def test_add_timestamp_raises_expected_error_if_result_would_overflow( - ts_add_sub_types: BinaryOpTypes, - td_value: Timedelta, -): - left = wrap_value(td_value, ts_add_sub_types.left) - right = wrap_value(Timestamp.max, ts_add_sub_types.right) - ex = (OutOfBoundsDatetime, OverflowError) - msg = "|".join(["Out of bounds nanosecond timestamp", "Overflow in int64 addition"]) +@pytest.fixture( + name="positive_td64", + params=[Timedelta(1), Timedelta(1024), Timedelta.max], + ids=["1ns", "1024ns", "td_max"], +) +def fixture_positive_td64(request, td64_type: Type[TD64_TYPE]) -> TD64_TYPE: + """ + A scalar, 1-elem ExtensionArray/Index/Series, or 2x1 DataFrame. + """ + value = request.param + return wrap_value(value, td64_type) - with pytest.raises(ex, match=msg): - left + right - with pytest.raises(ex, match=msg): - right + left +class TestBoxReductionMethods: + """ + For timedelta64-valued ExtensionArrays/Indexes/Series/DataFrames. + """ + @pytest.mark.parametrize( + ["value", "expected_exs"], + [ + (Timedelta.min, does_not_raise()), + (Timedelta.min + Timedelta(511), does_not_raise()), + (Timedelta.max - Timedelta(511), raises_overflow_error()), + (Timedelta.max, raises_overflow_error()), + ], + ) + def test_arraylike_sum_fails_with_large_single_elem( + self, + value: Timedelta, + expected_exs: AbstractContextManager, + td64_arraylike_type: Type[TD64_ARRAYLIKE_TYPE], + ): + td64_arraylike = wrap_value(value, td64_arraylike_type) + with expected_exs: + result = td64_arraylike.sum() + # for large negative values, sum() doesn't raise but does return NaT + assert result is NaT + + @pytest.mark.parametrize( + ("values", "expected_exs"), + ( + ([Timedelta.min] * 2, raises_value_error()), + ([Timedelta.min, Timedelta(-1025)], raises_value_error()), + ([Timedelta.min, Timedelta(-1024)], does_not_raise()), + ([Timedelta.min, Timedelta(-1)], does_not_raise()), + ([Timedelta.max, Timedelta(1)], raises_overflow_error()), + ([Timedelta.max, Timedelta(1024)], raises_overflow_error()), + ([Timedelta.max, Timedelta(1025)], raises_value_error()), + ([Timedelta.max] * 2, raises_value_error()), + ), + ) + def test_arraylike_sum_usually_raises_for_overflow( + self, + values: List[Timedelta], + expected_exs: AbstractContextManager, + td64_arraylike_type: Type[TD64_ARRAYLIKE_TYPE], + ): + td64_arraylike = wrap_value(values, td64_arraylike_type) + with expected_exs: + result = td64_arraylike.sum() + # for small negative overflows, sum() doesn't raise but does return NaT + assert result is NaT + + @pytest.mark.parametrize( + "values", + ( + [Timedelta.min] * 2, + [Timedelta.min, Timedelta(-1)], + [Timedelta.max, Timedelta(1)], + [Timedelta.max] * 2, + ), + ids=["double_td_min", "over_by_-1ns", "over_by_1ns", "double_td_max"], + ) + def test_df_sum_returns_nat_for_all_overflows(self, values: List[Timedelta]): + td64_df = wrap_value(values, DataFrame) + result = td64_df.sum() + expected = Series(NaT, index=[0], dtype="timedelta64[ns]") -@xfail_no_overflow_check -@given(td_value=positive_tds) -def test_sub_timestamp_raises_expected_error_if_result_would_overflow( - ts_add_sub_types: BinaryOpTypes, - td_value: Timedelta, -): - right = wrap_value(td_value, ts_add_sub_types[0]) - left = wrap_value(Timestamp.min, ts_add_sub_types[1]) + tm.assert_series_equal(result, expected) - ex = (OutOfBoundsDatetime, OverflowError) - msg = "|".join(["Out of bounds nanosecond timestamp", "Overflow in int64 addition"]) - with pytest.raises(ex, match=msg): - left - right +class TestBinaryOps: + """ + Operations between timedelta64-valued ExtensionArrays/Indexes/Series/DataFrames, and + a numeric/timelike scalar or timelike-valued ExtensionArray/Index/Series/DataFrame. + """ + def test_add_raises_if_result_would_overflow( + self, + max_td64: TD64_TYPE, + positive_td64: TD64_BOX_TYPE, + ): + with raises_overflow_error(): + max_td64 + positive_td64 -@given(value=st.floats().filter(lambda f: abs(f) > 1)) -def test_scalar_multiplication_raises_expected_error_if_result_would_overflow( - value: float, -): - td = Timedelta.max + with raises_overflow_error(): + positive_td64 + max_td64 - msg = "|".join( + @pytest.mark.parametrize( + ["rval", "expected_exs"], [ - "cannot convert float infinity to integer", - "Python int too large to convert to C long", - "int too big to convert", - ] + (Timedelta(1), does_not_raise()), + (Timedelta(2), raises_overflow_error()), + (Timedelta.max, raises_overflow_error()), + ], ) - with pytest.raises(OverflowError, match=msg): - td * value - - with pytest.raises(OverflowError, match=msg): - value * td - - -@xfail_no_overflow_check -@given(value=st.floats().filter(lambda f: abs(f) > 1)) -@pytest.mark.parametrize( - argnames="td_type", - argvalues=timedelta_types[containers], # type: ignore[arg-type] - ids=attrgetter("__name__"), -) -def test_container_scalar_multiplication_raises_expected_error_if_result_would_overflow( - value: float, - td_type: Type, -): - td = wrap_value(Timedelta.max, td_type) - - msg = "Overflow in int64 addition" - with pytest.raises(OverflowError, match=msg): - td * value - - with pytest.raises(OverflowError, match=msg): - value * td - - -class TestAddSubNaTMasking: - # TODO: parametrize over boxes - - @pytest.mark.parametrize("str_ts", ["1950-01-01", "1980-01-01"]) - def test_tdarr_add_timestamp_nat_masking(self, box_with_array, str_ts): - # GH#17991 checking for overflow-masking with NaT - tdinat = to_timedelta(["24658 days 11:15:00", "NaT"]) - tdobj = tm.box_expected(tdinat, box_with_array) - - ts = Timestamp(str_ts) - ts_variants = [ - ts, - ts.to_pydatetime(), - ts.to_datetime64().astype("datetime64[ns]"), - ts.to_datetime64().astype("datetime64[D]"), - ] - - for variant in ts_variants: - res = tdobj + variant - if box_with_array is DataFrame: - assert res.iloc[1, 1] is NaT - else: - assert res[1] is NaT - - def test_tdi_add_overflow(self): - # These should not overflow! - exp = TimedeltaIndex([NaT]) - result = to_timedelta([NaT]) - Timedelta("1 days") - tm.assert_index_equal(result, exp) - - exp = TimedeltaIndex(["4 days", NaT]) - result = to_timedelta(["5 days", NaT]) - Timedelta("1 days") - tm.assert_index_equal(result, exp) - - exp = TimedeltaIndex([NaT, NaT, "5 hours"]) - result = to_timedelta([NaT, "5 days", "1 hours"]) + to_timedelta( - ["7 seconds", NaT, "4 hours"] - ) - tm.assert_index_equal(result, exp) + def test_sub_raises_if_result_would_overflow( + self, + max_td64: TD64_BOX_TYPE, + rval: Timedelta, + expected_exs: AbstractContextManager, + td64_type: Type[TD64_TYPE], + ): + rvalue = wrap_value(rval, td64_type) + min_td64 = -1 * max_td64 + + with expected_exs: + min_td64 - rvalue + + with expected_exs: + -1 * rvalue - max_td64 + + def test_add_dt64_raises_if_result_would_overflow( + self, + max_td64: TD64_BOX_TYPE, + dt64_type: Type[DT64_TYPE], + ): + max_dt64 = wrap_value(Timestamp.max, dt64_type) + ex = (OutOfBoundsDatetime, OverflowError) + msg = TD64_OVERFLOW_MSG + "|Out of bounds nanosecond timestamp" + + with pytest.raises(ex, match=msg): + max_td64 + max_dt64 + + with pytest.raises(ex, match=msg): + max_dt64 + max_td64 + + def test_sub_td64_raises_if_result_would_overflow( + self, + max_td64: TD64_BOX_TYPE, + dt64_type: Type[DT64_TYPE], + ): + min_dt64 = wrap_value(Timestamp.min, dt64_type) + ex = (OutOfBoundsDatetime, OverflowError) + msg = TD64_OVERFLOW_MSG + "|Out of bounds nanosecond timestamp" + + with pytest.raises(ex, match=msg): + min_dt64 - max_td64 + + @pytest.mark.xfail(reason="Not implemented") + def test_scalar_mul_raises_if_result_would_overflow(self, max_td64: TD64_BOX_TYPE): + with raises_overflow_error(): + max_td64 * 1.01 + + with raises_overflow_error(): + 1.01 * max_td64 From 6c3b48274f88fa228ae9a9c229bdfe4add0e9d09 Mon Sep 17 00:00:00 2001 From: Patrick McKenna Date: Wed, 27 Apr 2022 12:26:29 -0700 Subject: [PATCH 12/21] address PR code style feedback --- pandas/tests/series/test_reductions.py | 12 +++++------- pandas/tests/test_timedelta64_overflow.py | 18 +++++++++--------- 2 files changed, 14 insertions(+), 16 deletions(-) diff --git a/pandas/tests/series/test_reductions.py b/pandas/tests/series/test_reductions.py index 1a24441d6a9df..3277afe1c4511 100644 --- a/pandas/tests/series/test_reductions.py +++ b/pandas/tests/series/test_reductions.py @@ -53,12 +53,7 @@ def test_td64_sum_empty(skipna): assert result == pd.Timedelta(0) -@given( - st.integers( - min_value=0, - max_value=10 ** (np.finfo(np.float64).precision), - ).map(pd.Timedelta) -) +@given(st.integers(min_value=0, max_value=10**15).map(pd.Timedelta)) def test_td64_summation_retains_ns_precision_over_expected_range(value: pd.Timedelta): result = Series(value).sum() @@ -67,7 +62,7 @@ def test_td64_summation_retains_ns_precision_over_expected_range(value: pd.Timed @given( st.integers( - min_value=10 ** (np.finfo(np.float64).precision), + min_value=10**15, max_value=pd.Timedelta.max.value - 2**9, ) .filter(lambda i: int(np.float64(i)) != i) @@ -76,6 +71,9 @@ def test_td64_summation_retains_ns_precision_over_expected_range(value: pd.Timed def test_td64_summation_loses_ns_precision_if_float_conversion_rounds( value: pd.Timedelta, ): + """ + The computation involves int->float conversion, so there can be loss of precision. + """ result = Series(value).sum() assert result != value diff --git a/pandas/tests/test_timedelta64_overflow.py b/pandas/tests/test_timedelta64_overflow.py index 7e5bf0f6acba9..c4e90c52cea6a 100644 --- a/pandas/tests/test_timedelta64_overflow.py +++ b/pandas/tests/test_timedelta64_overflow.py @@ -62,19 +62,19 @@ raises_value_error = partial(pytest.raises, ValueError, match=TD64_VALUE_ERROR_MSG) -def wrap_value(value, type_): +def wrap_value(value, cls): """ - Return value wrapped in a container of given type_, or as-is if type_ is a scalar. + Return value wrapped in a container of given cls, or as-is if cls is a scalar. """ - if issubclass(type_, (Timedelta, Timestamp)): - return type_(value) + if issubclass(cls, (Timedelta, Timestamp)): + return cls(value) - if issubclass(type_, ExtensionArray): + if issubclass(cls, ExtensionArray): box_cls = array - elif issubclass(type_, Index): + elif issubclass(cls, Index): box_cls = Index else: - box_cls = type_ + box_cls = cls if not is_list_like(value): value = [value] @@ -240,7 +240,7 @@ def test_add_dt64_raises_if_result_would_overflow( ): max_dt64 = wrap_value(Timestamp.max, dt64_type) ex = (OutOfBoundsDatetime, OverflowError) - msg = TD64_OVERFLOW_MSG + "|Out of bounds nanosecond timestamp" + msg = "|".join([TD64_OVERFLOW_MSG, "Out of bounds nanosecond timestamp"]) with pytest.raises(ex, match=msg): max_td64 + max_dt64 @@ -255,7 +255,7 @@ def test_sub_td64_raises_if_result_would_overflow( ): min_dt64 = wrap_value(Timestamp.min, dt64_type) ex = (OutOfBoundsDatetime, OverflowError) - msg = TD64_OVERFLOW_MSG + "|Out of bounds nanosecond timestamp" + msg = "|".join([TD64_OVERFLOW_MSG, "Out of bounds nanosecond timestamp"]) with pytest.raises(ex, match=msg): min_dt64 - max_td64 From c14d0c8c3d3339200af2a77c2f8315b6ca797174 Mon Sep 17 00:00:00 2001 From: Patrick McKenna Date: Wed, 27 Apr 2022 15:13:00 -0700 Subject: [PATCH 13/21] put new tests back in pre-existing modules Still need to DRY up a few things. --- pandas/tests/arithmetic/test_timedelta64.py | 231 ++++++++++++++++- pandas/tests/reductions/test_reductions.py | 169 ++++++++++++ pandas/tests/series/test_reductions.py | 28 -- pandas/tests/test_timedelta64_overflow.py | 269 -------------------- 4 files changed, 386 insertions(+), 311 deletions(-) delete mode 100644 pandas/tests/test_timedelta64_overflow.py diff --git a/pandas/tests/arithmetic/test_timedelta64.py b/pandas/tests/arithmetic/test_timedelta64.py index 1e095ff22b642..ef85e5684b2d3 100644 --- a/pandas/tests/arithmetic/test_timedelta64.py +++ b/pandas/tests/arithmetic/test_timedelta64.py @@ -1,14 +1,27 @@ # Arithmetic tests for DataFrame/Series/Index/Array classes that should # behave identically. +from contextlib import ( + AbstractContextManager, + nullcontext, +) from datetime import ( datetime, timedelta, ) +from functools import partial +from typing import ( + Type, + Union, +) import numpy as np import pytest -from pandas.errors import PerformanceWarning +from pandas._libs.lib import is_list_like +from pandas.errors import ( + OutOfBoundsDatetime, + PerformanceWarning, +) import pandas as pd from pandas import ( @@ -28,12 +41,35 @@ Int64Index, UInt64Index, ) +from pandas.core.arrays import ( + DatetimeArray, + ExtensionArray, + TimedeltaArray, +) from pandas.tests.arithmetic.common import ( assert_invalid_addsub_type, assert_invalid_comparison, get_upcast_box, ) +TD64_TYPE = Union[Timedelta, TimedeltaArray, TimedeltaIndex, Series, DataFrame] +TD64_BOX_TYPE = Union[TimedeltaArray, TimedeltaIndex, Series, DataFrame] +DT64_TYPE = Union[Timestamp, DatetimeArray, DatetimeIndex, Series, DataFrame] + + +TD64_VALUE_ERROR_MSG = "overflow in timedelta operation" +TD64_OVERFLOW_MSG = "|".join( + [ + "int too big to convert", + "Python int too large to convert to C long", + "Overflow in int64 addition", + ] +) + +does_not_raise = nullcontext +raises_overflow_error = partial(pytest.raises, OverflowError, match=TD64_OVERFLOW_MSG) +raises_value_error = partial(pytest.raises, ValueError, match=TD64_VALUE_ERROR_MSG) + def assert_dtype(obj, expected_dtype): """ @@ -56,6 +92,87 @@ def get_expected_name(box, names): return exname +def wrap_value(value, cls): + """ + Return value wrapped in a container of given cls, or as-is if cls is a scalar. + """ + if not issubclass(cls, pd.core.arraylike.OpsMixin): + return cls(value) + + if issubclass(cls, ExtensionArray): + box_cls = pd.array + elif issubclass(cls, pd.Index): + box_cls = pd.Index + else: + box_cls = cls + + if not is_list_like(value): + value = [value] + return tm.box_expected(value, box_cls, transpose=False) + + +def get_result_type( + td64_type: Type[TD64_TYPE], + dt64_type: Type[DT64_TYPE], +) -> Type[DT64_TYPE]: + """ + Expected result when adding, subtracting timedelta64-valued box and + datetime64-valued box or scalar. + """ + dt64 = wrap_value(Timestamp.now(), dt64_type) + td64 = wrap_value(Timedelta(0), td64_type) + return type(dt64 + td64) + + +@pytest.fixture( + name="td64_type", + params=[Timedelta, TimedeltaArray, TimedeltaIndex, Series, DataFrame], + scope="module", +) +def fixture_td64_type(request) -> Type[TD64_TYPE]: + return request.param + + +@pytest.fixture( + name="td64_box_type", + params=[TimedeltaArray, TimedeltaIndex, Series, DataFrame], + scope="module", +) +def fixture_td64_box_type(request) -> Type[TD64_BOX_TYPE]: + return request.param + + +@pytest.fixture( + name="dt64_type", + params=[Timestamp, DatetimeArray, DatetimeIndex, Series, DataFrame], + scope="module", +) +def fixture_dt64_type(request) -> Type[DT64_TYPE]: + return request.param + + +@pytest.fixture(name="max_td64") +def fixture_max_td64(td64_box_type: Type[TD64_BOX_TYPE]) -> TD64_BOX_TYPE: + """ + A 1-elem ExtensionArray/Index/Series, or 2x1 DataFrame, w/ all elements set to + Timestamp.max. + """ + return wrap_value(Timedelta.max, td64_box_type) + + +@pytest.fixture( + name="positive_td64", + params=[Timedelta(1), Timedelta(1024), Timedelta.max], + ids=["1ns", "1024ns", "td_max"], +) +def fixture_positive_td64(request, td64_type: Type[TD64_TYPE]) -> TD64_TYPE: + """ + A scalar, 1-elem ExtensionArray/Index/Series, or 2x1 DataFrame. + """ + value = request.param + return wrap_value(value, td64_type) + + # ------------------------------------------------------------------ # Timedelta64[ns] dtype Comparisons @@ -2062,18 +2179,104 @@ def test_td64arr_pow_invalid(self, scalar_td, box_with_array): td1**scalar_td -def test_add_timestamp_to_timedelta(): - # GH: 35897 - timestamp = Timestamp("2021-01-01") - result = timestamp + timedelta_range("0s", "1s", periods=31) - expected = DatetimeIndex( +class TestAddSub: + """ + Addition/subtraction between a timedelta64-valued + ExtensionArrays/Indexes/Series/DataFrames, and a timedelta64 scalar or + timedelta64-valued ExtensionArray/Index/Series/DataFrame. + """ + + def test_add_raises_if_result_would_overflow( + self, + max_td64: TD64_TYPE, + positive_td64: TD64_BOX_TYPE, + ): + with raises_overflow_error(): + max_td64 + positive_td64 + + with raises_overflow_error(): + positive_td64 + max_td64 + + @pytest.mark.parametrize( + ["rval", "expected_exs"], [ - timestamp - + ( - pd.to_timedelta("0.033333333s") * i - + pd.to_timedelta("0.000000001s") * divmod(i, 3)[0] - ) - for i in range(31) - ] + (Timedelta(1), does_not_raise()), + (Timedelta(2), raises_overflow_error()), + (Timedelta.max, raises_overflow_error()), + ], ) - tm.assert_index_equal(result, expected) + def test_sub_raises_if_result_would_overflow( + self, + max_td64: TD64_BOX_TYPE, + rval: Timedelta, + expected_exs: AbstractContextManager, + td64_type: Type[TD64_TYPE], + ): + rvalue = wrap_value(rval, td64_type) + min_td64 = -1 * max_td64 + + with expected_exs: + min_td64 - rvalue + + with expected_exs: + -1 * rvalue - max_td64 + + +class TestNumericScalarMulDiv: + """ + Operations on timedelta64-valued ExtensionArray/Index/Series/DataFrame and a + numeric scalar. + """ + + @pytest.mark.xfail(reason="Not implemented") + def test_scalar_mul_raises_if_result_would_overflow(self, max_td64: TD64_BOX_TYPE): + with raises_overflow_error(): + max_td64 * 1.01 + + with raises_overflow_error(): + 1.01 * max_td64 + + +class TestAddSubDatetime64: + """ + Operations on timedelta64-valued ExtensionArray/Index/Series/DataFrame, and a + datetime64 scalar or datetime64-valued ExtensionArray/Index/Series/DataFrame. + """ + + def test_add(self, td64_box_type: Type[TD64_BOX_TYPE], dt64_type: Type[DT64_TYPE]): + # GH: 35897 + dt64 = wrap_value(Timestamp(2020, 1, 2), dt64_type) + td64_box = wrap_value(Timedelta(hours=3), td64_box_type) + + expected_type = get_result_type(td64_box_type, dt64_type) + expected = wrap_value(Timestamp(2020, 1, 2, 3), expected_type) + result = dt64 + td64_box + + tm.assert_equal(result, expected) + + def test_add_dt64_raises_if_result_would_overflow( + self, + max_td64: TD64_BOX_TYPE, + dt64_type: Type[DT64_TYPE], + ): + max_dt64 = wrap_value(Timestamp.max, dt64_type) + ex = (OutOfBoundsDatetime, OverflowError) + msg = "|".join([TD64_OVERFLOW_MSG, "Out of bounds nanosecond timestamp"]) + + with pytest.raises(ex, match=msg): + max_td64 + max_dt64 + + with pytest.raises(ex, match=msg): + max_dt64 + max_td64 + + def test_sub_td64_raises_if_result_would_overflow( + self, + max_td64: TD64_BOX_TYPE, + dt64_type: Type[DT64_TYPE], + ): + min_dt64 = wrap_value(Timestamp.min, dt64_type) + ex = (OutOfBoundsDatetime, OverflowError) + msg = "|".join([TD64_OVERFLOW_MSG, "Out of bounds nanosecond timestamp"]) + + with pytest.raises(ex, match=msg): + min_dt64 - max_td64 diff --git a/pandas/tests/reductions/test_reductions.py b/pandas/tests/reductions/test_reductions.py index 7677b8950c7a3..1bf153763ed09 100644 --- a/pandas/tests/reductions/test_reductions.py +++ b/pandas/tests/reductions/test_reductions.py @@ -1,11 +1,23 @@ +from contextlib import ( + AbstractContextManager, + nullcontext, +) from datetime import ( datetime, timedelta, ) +from functools import partial +from typing import ( + List, + Type, + Union, +) import numpy as np import pytest +from pandas._libs.lib import is_list_like + import pandas as pd from pandas import ( Categorical, @@ -20,6 +32,7 @@ Timedelta, TimedeltaIndex, Timestamp, + array, date_range, isna, timedelta_range, @@ -27,6 +40,36 @@ ) import pandas._testing as tm from pandas.core import nanops +from pandas.core.arrays import ( + ExtensionArray, + TimedeltaArray, +) + +TD64_ARRAYLIKE_TYPE = Union[TimedeltaArray, TimedeltaIndex, Series] + + +TD64_VALUE_ERROR_MSG = "overflow in timedelta operation" +TD64_OVERFLOW_MSG = "|".join( + [ + "int too big to convert", + "Python int too large to convert to C long", + "Overflow in int64 addition", + ] +) + + +does_not_raise = nullcontext +raises_overflow_error = partial(pytest.raises, OverflowError, match=TD64_OVERFLOW_MSG) +raises_value_error = partial(pytest.raises, ValueError, match=TD64_VALUE_ERROR_MSG) + + +@pytest.fixture( + name="td64_arraylike_type", + params=(TimedeltaArray, TimedeltaIndex, Series), + scope="module", +) +def fixture_td64_arraylike_type(request) -> Type[TD64_ARRAYLIKE_TYPE]: + return request.param def get_objs(): @@ -48,6 +91,25 @@ def get_objs(): return objs +def wrap_value(value, cls): + """ + Return value wrapped in a box of given cls, or as-is if cls is a scalar. + """ + if not issubclass(cls, pd.core.arraylike.OpsMixin): + return cls(value) + + if issubclass(cls, ExtensionArray): + box_cls = array + elif issubclass(cls, Index): + box_cls = Index + else: + box_cls = cls + + if not is_list_like(value): + value = [value] + return tm.box_expected(value, box_cls, transpose=False) + + objs = get_objs() @@ -1527,3 +1589,110 @@ def test_multimode_complex(self, array, expected, dtype): # Complex numbers are sorted by their magnitude result = Series(array, dtype=dtype).mode() tm.assert_series_equal(result, expected) + + +class TestTimedelta64: + """ + For timedelta64-valued ExtensionArrays/Indexes/Series/DataFrames. + """ + + @pytest.mark.parametrize( + "value", + [Timedelta(-(10**15) + 1), Timedelta(10**15 + 1)], + ) + def test_single_elem_sum_retains_ns_precision_over_expected_range( + self, + value: Timedelta, + td64_arraylike_type: Type[TD64_ARRAYLIKE_TYPE], + ): + td64_arraylike = wrap_value(value, td64_arraylike_type) + result = td64_arraylike.sum() + + assert result == value + + @pytest.mark.parametrize( + "value", + [ + Timedelta.min + Timedelta(512), + Timedelta(-(10**16) - 1), + Timedelta(10**16 + 1), + Timedelta.max - Timedelta(512), + ], + ) + def test_single_elem_sum_loses_ns_precision_if_float_conversion_rounds( + self, + value: Timedelta, + td64_arraylike_type: Type[TD64_ARRAYLIKE_TYPE], + ): + """ + The computation involves int->float conversion, so there can be loss of + precision. + """ + td64_arraylike = wrap_value(value, td64_arraylike_type) + result = td64_arraylike.sum() + + assert result != value + # assert np.isclose(result, value) + + @pytest.mark.parametrize( + ["value", "expected_exs"], + [ + (Timedelta.min, does_not_raise()), + (Timedelta.min + Timedelta(511), does_not_raise()), + (Timedelta.max - Timedelta(511), raises_overflow_error()), + (Timedelta.max, raises_overflow_error()), + ], + ) + def test_single_elem_sum_fails_for_large_values( + self, + value: Timedelta, + expected_exs: AbstractContextManager, + td64_arraylike_type: Type[TD64_ARRAYLIKE_TYPE], + ): + td64_arraylike = wrap_value(value, td64_arraylike_type) + with expected_exs: + result = td64_arraylike.sum() + # for large negative values, sum() doesn't raise but does return NaT + assert result is NaT + + @pytest.mark.parametrize( + ("values", "expected_exs"), + ( + ([Timedelta.min] * 2, raises_value_error()), + ([Timedelta.min, Timedelta(-1025)], raises_value_error()), + ([Timedelta.min, Timedelta(-1024)], does_not_raise()), + ([Timedelta.min, Timedelta(-1)], does_not_raise()), + ([Timedelta.max, Timedelta(1)], raises_overflow_error()), + ([Timedelta.max, Timedelta(1024)], raises_overflow_error()), + ([Timedelta.max, Timedelta(1025)], raises_value_error()), + ([Timedelta.max] * 2, raises_value_error()), + ), + ) + def test_arraylike_sum_usually_raises_for_overflow( + self, + values: List[Timedelta], + expected_exs: AbstractContextManager, + td64_arraylike_type: Type[TD64_ARRAYLIKE_TYPE], + ): + td64_arraylike = wrap_value(values, td64_arraylike_type) + with expected_exs: + result = td64_arraylike.sum() + # for small negative overflows, sum() doesn't raise but does return NaT + assert result is NaT + + @pytest.mark.parametrize( + "values", + ( + [Timedelta.min] * 2, + [Timedelta.min, Timedelta(-1)], + [Timedelta.max, Timedelta(1)], + [Timedelta.max] * 2, + ), + ids=["double_td_min", "over_by_-1ns", "over_by_1ns", "double_td_max"], + ) + def test_df_sum_returns_nat_for_all_overflows(self, values: List[Timedelta]): + td64_df = wrap_value(values, DataFrame) + result = td64_df.sum() + expected = Series(NaT, index=[0], dtype="timedelta64[ns]") + + tm.assert_series_equal(result, expected) diff --git a/pandas/tests/series/test_reductions.py b/pandas/tests/series/test_reductions.py index 3277afe1c4511..47cd2a53d89bb 100644 --- a/pandas/tests/series/test_reductions.py +++ b/pandas/tests/series/test_reductions.py @@ -1,5 +1,3 @@ -from hypothesis import given -import hypothesis.strategies as st import numpy as np import pytest @@ -53,32 +51,6 @@ def test_td64_sum_empty(skipna): assert result == pd.Timedelta(0) -@given(st.integers(min_value=0, max_value=10**15).map(pd.Timedelta)) -def test_td64_summation_retains_ns_precision_over_expected_range(value: pd.Timedelta): - result = Series(value).sum() - - assert result == value - - -@given( - st.integers( - min_value=10**15, - max_value=pd.Timedelta.max.value - 2**9, - ) - .filter(lambda i: int(np.float64(i)) != i) - .map(pd.Timedelta) -) -def test_td64_summation_loses_ns_precision_if_float_conversion_rounds( - value: pd.Timedelta, -): - """ - The computation involves int->float conversion, so there can be loss of precision. - """ - result = Series(value).sum() - - assert result != value - - def test_prod_numpy16_bug(): ser = Series([1.0, 1.0, 1.0], index=range(3)) result = ser.prod() diff --git a/pandas/tests/test_timedelta64_overflow.py b/pandas/tests/test_timedelta64_overflow.py deleted file mode 100644 index c4e90c52cea6a..0000000000000 --- a/pandas/tests/test_timedelta64_overflow.py +++ /dev/null @@ -1,269 +0,0 @@ -""" -Check overflow behavior of operations on timedelta-valued -ExtensionArrays/Indexes/Series/DataFrames. -""" - -from contextlib import ( - AbstractContextManager, - nullcontext, -) -from functools import partial -from typing import ( - List, - Type, - Union, -) - -import pytest - -from pandas._libs.lib import is_list_like -from pandas.errors import OutOfBoundsDatetime - -from pandas import ( - DataFrame, - DatetimeIndex, - Index, - NaT, - Series, - Timedelta, - TimedeltaIndex, - Timestamp, - array, -) -import pandas._testing as tm -from pandas.core.arrays import ( - DatetimeArray, - ExtensionArray, - TimedeltaArray, -) - -td64_types = (Timedelta, TimedeltaArray, TimedeltaIndex, Series, DataFrame) -td64_box_types = td64_types[slice(1, None)] -td64_arraylike_types = td64_types[slice(1, 4)] -dt64_types = (Timestamp, DatetimeArray, DatetimeIndex, Series, DataFrame) - -TD64_TYPE = Union[Timedelta, TimedeltaArray, TimedeltaIndex, Series, DataFrame] -TD64_BOX_TYPE = Union[TimedeltaArray, TimedeltaIndex, Series, DataFrame] -TD64_ARRAYLIKE_TYPE = Union[TimedeltaArray, TimedeltaIndex, Series] -DT64_TYPE = Union[Timestamp, DatetimeArray, DatetimeIndex, Series, DataFrame] - - -TD64_VALUE_ERROR_MSG = "overflow in timedelta operation" -TD64_OVERFLOW_MSG = "|".join( - [ - "int too big to convert", - "Python int too large to convert to C long", - "Overflow in int64 addition", - ] -) - -does_not_raise = nullcontext -raises_overflow_error = partial(pytest.raises, OverflowError, match=TD64_OVERFLOW_MSG) -raises_value_error = partial(pytest.raises, ValueError, match=TD64_VALUE_ERROR_MSG) - - -def wrap_value(value, cls): - """ - Return value wrapped in a container of given cls, or as-is if cls is a scalar. - """ - if issubclass(cls, (Timedelta, Timestamp)): - return cls(value) - - if issubclass(cls, ExtensionArray): - box_cls = array - elif issubclass(cls, Index): - box_cls = Index - else: - box_cls = cls - - if not is_list_like(value): - value = [value] - return tm.box_expected(value, box_cls, transpose=False) - - -@pytest.fixture(name="td64_type", params=td64_types, scope="module") -def fixture_td64_type(request) -> Type[TD64_TYPE]: - return request.param - - -@pytest.fixture(name="td64_arraylike_type", params=td64_arraylike_types, scope="module") -def fixture_td64_arraylike_type(request) -> Type[TD64_ARRAYLIKE_TYPE]: - return request.param - - -@pytest.fixture(name="td64_box_type", params=td64_box_types, scope="module") -def fixture_td64_box_type(request) -> Type[TD64_BOX_TYPE]: - return request.param - - -@pytest.fixture(name="dt64_type", params=dt64_types, scope="module") -def fixture_dt64_type(request) -> Type[DT64_TYPE]: - return request.param - - -@pytest.fixture(name="max_td64") -def fixture_max_td64(td64_box_type: Type[TD64_BOX_TYPE]) -> TD64_BOX_TYPE: - """ - A 1-elem ExtensionArray/Index/Series, or 2x1 DataFrame, w/ all elements set to - Timestamp.max. - """ - return wrap_value(Timedelta.max, td64_box_type) - - -@pytest.fixture( - name="positive_td64", - params=[Timedelta(1), Timedelta(1024), Timedelta.max], - ids=["1ns", "1024ns", "td_max"], -) -def fixture_positive_td64(request, td64_type: Type[TD64_TYPE]) -> TD64_TYPE: - """ - A scalar, 1-elem ExtensionArray/Index/Series, or 2x1 DataFrame. - """ - value = request.param - return wrap_value(value, td64_type) - - -class TestBoxReductionMethods: - """ - For timedelta64-valued ExtensionArrays/Indexes/Series/DataFrames. - """ - - @pytest.mark.parametrize( - ["value", "expected_exs"], - [ - (Timedelta.min, does_not_raise()), - (Timedelta.min + Timedelta(511), does_not_raise()), - (Timedelta.max - Timedelta(511), raises_overflow_error()), - (Timedelta.max, raises_overflow_error()), - ], - ) - def test_arraylike_sum_fails_with_large_single_elem( - self, - value: Timedelta, - expected_exs: AbstractContextManager, - td64_arraylike_type: Type[TD64_ARRAYLIKE_TYPE], - ): - td64_arraylike = wrap_value(value, td64_arraylike_type) - with expected_exs: - result = td64_arraylike.sum() - # for large negative values, sum() doesn't raise but does return NaT - assert result is NaT - - @pytest.mark.parametrize( - ("values", "expected_exs"), - ( - ([Timedelta.min] * 2, raises_value_error()), - ([Timedelta.min, Timedelta(-1025)], raises_value_error()), - ([Timedelta.min, Timedelta(-1024)], does_not_raise()), - ([Timedelta.min, Timedelta(-1)], does_not_raise()), - ([Timedelta.max, Timedelta(1)], raises_overflow_error()), - ([Timedelta.max, Timedelta(1024)], raises_overflow_error()), - ([Timedelta.max, Timedelta(1025)], raises_value_error()), - ([Timedelta.max] * 2, raises_value_error()), - ), - ) - def test_arraylike_sum_usually_raises_for_overflow( - self, - values: List[Timedelta], - expected_exs: AbstractContextManager, - td64_arraylike_type: Type[TD64_ARRAYLIKE_TYPE], - ): - td64_arraylike = wrap_value(values, td64_arraylike_type) - with expected_exs: - result = td64_arraylike.sum() - # for small negative overflows, sum() doesn't raise but does return NaT - assert result is NaT - - @pytest.mark.parametrize( - "values", - ( - [Timedelta.min] * 2, - [Timedelta.min, Timedelta(-1)], - [Timedelta.max, Timedelta(1)], - [Timedelta.max] * 2, - ), - ids=["double_td_min", "over_by_-1ns", "over_by_1ns", "double_td_max"], - ) - def test_df_sum_returns_nat_for_all_overflows(self, values: List[Timedelta]): - td64_df = wrap_value(values, DataFrame) - result = td64_df.sum() - expected = Series(NaT, index=[0], dtype="timedelta64[ns]") - - tm.assert_series_equal(result, expected) - - -class TestBinaryOps: - """ - Operations between timedelta64-valued ExtensionArrays/Indexes/Series/DataFrames, and - a numeric/timelike scalar or timelike-valued ExtensionArray/Index/Series/DataFrame. - """ - - def test_add_raises_if_result_would_overflow( - self, - max_td64: TD64_TYPE, - positive_td64: TD64_BOX_TYPE, - ): - with raises_overflow_error(): - max_td64 + positive_td64 - - with raises_overflow_error(): - positive_td64 + max_td64 - - @pytest.mark.parametrize( - ["rval", "expected_exs"], - [ - (Timedelta(1), does_not_raise()), - (Timedelta(2), raises_overflow_error()), - (Timedelta.max, raises_overflow_error()), - ], - ) - def test_sub_raises_if_result_would_overflow( - self, - max_td64: TD64_BOX_TYPE, - rval: Timedelta, - expected_exs: AbstractContextManager, - td64_type: Type[TD64_TYPE], - ): - rvalue = wrap_value(rval, td64_type) - min_td64 = -1 * max_td64 - - with expected_exs: - min_td64 - rvalue - - with expected_exs: - -1 * rvalue - max_td64 - - def test_add_dt64_raises_if_result_would_overflow( - self, - max_td64: TD64_BOX_TYPE, - dt64_type: Type[DT64_TYPE], - ): - max_dt64 = wrap_value(Timestamp.max, dt64_type) - ex = (OutOfBoundsDatetime, OverflowError) - msg = "|".join([TD64_OVERFLOW_MSG, "Out of bounds nanosecond timestamp"]) - - with pytest.raises(ex, match=msg): - max_td64 + max_dt64 - - with pytest.raises(ex, match=msg): - max_dt64 + max_td64 - - def test_sub_td64_raises_if_result_would_overflow( - self, - max_td64: TD64_BOX_TYPE, - dt64_type: Type[DT64_TYPE], - ): - min_dt64 = wrap_value(Timestamp.min, dt64_type) - ex = (OutOfBoundsDatetime, OverflowError) - msg = "|".join([TD64_OVERFLOW_MSG, "Out of bounds nanosecond timestamp"]) - - with pytest.raises(ex, match=msg): - min_dt64 - max_td64 - - @pytest.mark.xfail(reason="Not implemented") - def test_scalar_mul_raises_if_result_would_overflow(self, max_td64: TD64_BOX_TYPE): - with raises_overflow_error(): - max_td64 * 1.01 - - with raises_overflow_error(): - 1.01 * max_td64 From f2e0ba4d8b8f75cfe61ef13bcf944b93977f2023 Mon Sep 17 00:00:00 2001 From: Patrick McKenna Date: Wed, 27 Apr 2022 16:20:30 -0700 Subject: [PATCH 14/21] dedupe test setup helpers --- pandas/_testing/__init__.py | 27 ++++++- pandas/tests/arithmetic/test_timedelta64.py | 75 ++++++------------- pandas/tests/reductions/test_reductions.py | 82 +++++---------------- 3 files changed, 68 insertions(+), 116 deletions(-) diff --git a/pandas/_testing/__init__.py b/pandas/_testing/__init__.py index 0a62ee956be61..01c7bad3d2d1a 100644 --- a/pandas/_testing/__init__.py +++ b/pandas/_testing/__init__.py @@ -1,9 +1,10 @@ from __future__ import annotations -import collections +import collections.abc from datetime import datetime from decimal import Decimal from functools import wraps +from inspect import isclass import operator import os import re @@ -276,6 +277,30 @@ def box_expected(expected, box_cls, transpose=True): return expected +def wrap_value(value, cls, transpose=False): + """ + If cls is a scalar type, return value as an instance of it, otherwise return value + wrapped in the box type indicated by cls. + + Designed to play nicely with box_expected (and the box_with_array fixture). + """ + if isclass(cls) and not issubclass(cls, pd.core.arraylike.OpsMixin): + return cls(value) + + if cls in (np.array, np.ndarray): + pass + elif cls is pd.array or issubclass(cls, ExtensionArray): + cls = pd.array + elif issubclass(cls, pd.Index): + cls = pd.Index + + if not isinstance( + value, (collections.abc.Sequence, pd.core.arraylike.OpsMixin, np.ndarray) + ): + value = [value] + return box_expected(value, cls, transpose) + + def to_array(obj): """ Similar to pd.array, but does not cast numpy dtypes to nullable dtypes. diff --git a/pandas/tests/arithmetic/test_timedelta64.py b/pandas/tests/arithmetic/test_timedelta64.py index ef85e5684b2d3..345d9436c578f 100644 --- a/pandas/tests/arithmetic/test_timedelta64.py +++ b/pandas/tests/arithmetic/test_timedelta64.py @@ -17,7 +17,6 @@ import numpy as np import pytest -from pandas._libs.lib import is_list_like from pandas.errors import ( OutOfBoundsDatetime, PerformanceWarning, @@ -43,7 +42,6 @@ ) from pandas.core.arrays import ( DatetimeArray, - ExtensionArray, TimedeltaArray, ) from pandas.tests.arithmetic.common import ( @@ -52,12 +50,11 @@ get_upcast_box, ) -TD64_TYPE = Union[Timedelta, TimedeltaArray, TimedeltaIndex, Series, DataFrame] TD64_BOX_TYPE = Union[TimedeltaArray, TimedeltaIndex, Series, DataFrame] +TD64_TYPE = Union[Timedelta, TD64_BOX_TYPE] DT64_TYPE = Union[Timestamp, DatetimeArray, DatetimeIndex, Series, DataFrame] -TD64_VALUE_ERROR_MSG = "overflow in timedelta operation" TD64_OVERFLOW_MSG = "|".join( [ "int too big to convert", @@ -66,9 +63,9 @@ ] ) + does_not_raise = nullcontext -raises_overflow_error = partial(pytest.raises, OverflowError, match=TD64_OVERFLOW_MSG) -raises_value_error = partial(pytest.raises, ValueError, match=TD64_VALUE_ERROR_MSG) +td64_overflow_error = partial(pytest.raises, OverflowError, match=TD64_OVERFLOW_MSG) def assert_dtype(obj, expected_dtype): @@ -92,25 +89,6 @@ def get_expected_name(box, names): return exname -def wrap_value(value, cls): - """ - Return value wrapped in a container of given cls, or as-is if cls is a scalar. - """ - if not issubclass(cls, pd.core.arraylike.OpsMixin): - return cls(value) - - if issubclass(cls, ExtensionArray): - box_cls = pd.array - elif issubclass(cls, pd.Index): - box_cls = pd.Index - else: - box_cls = cls - - if not is_list_like(value): - value = [value] - return tm.box_expected(value, box_cls, transpose=False) - - def get_result_type( td64_type: Type[TD64_TYPE], dt64_type: Type[DT64_TYPE], @@ -119,8 +97,8 @@ def get_result_type( Expected result when adding, subtracting timedelta64-valued box and datetime64-valued box or scalar. """ - dt64 = wrap_value(Timestamp.now(), dt64_type) - td64 = wrap_value(Timedelta(0), td64_type) + dt64 = tm.wrap_value(Timestamp.now(), dt64_type) + td64 = tm.wrap_value(Timedelta(0), td64_type) return type(dt64 + td64) @@ -133,15 +111,6 @@ def fixture_td64_type(request) -> Type[TD64_TYPE]: return request.param -@pytest.fixture( - name="td64_box_type", - params=[TimedeltaArray, TimedeltaIndex, Series, DataFrame], - scope="module", -) -def fixture_td64_box_type(request) -> Type[TD64_BOX_TYPE]: - return request.param - - @pytest.fixture( name="dt64_type", params=[Timestamp, DatetimeArray, DatetimeIndex, Series, DataFrame], @@ -152,12 +121,12 @@ def fixture_dt64_type(request) -> Type[DT64_TYPE]: @pytest.fixture(name="max_td64") -def fixture_max_td64(td64_box_type: Type[TD64_BOX_TYPE]) -> TD64_BOX_TYPE: +def fixture_max_td64(box_with_array) -> TD64_BOX_TYPE: """ A 1-elem ExtensionArray/Index/Series, or 2x1 DataFrame, w/ all elements set to Timestamp.max. """ - return wrap_value(Timedelta.max, td64_box_type) + return tm.wrap_value(Timedelta.max, box_with_array) @pytest.fixture( @@ -170,7 +139,7 @@ def fixture_positive_td64(request, td64_type: Type[TD64_TYPE]) -> TD64_TYPE: A scalar, 1-elem ExtensionArray/Index/Series, or 2x1 DataFrame. """ value = request.param - return wrap_value(value, td64_type) + return tm.wrap_value(value, td64_type) # ------------------------------------------------------------------ @@ -2191,18 +2160,18 @@ def test_add_raises_if_result_would_overflow( max_td64: TD64_TYPE, positive_td64: TD64_BOX_TYPE, ): - with raises_overflow_error(): + with td64_overflow_error(): max_td64 + positive_td64 - with raises_overflow_error(): + with td64_overflow_error(): positive_td64 + max_td64 @pytest.mark.parametrize( ["rval", "expected_exs"], [ (Timedelta(1), does_not_raise()), - (Timedelta(2), raises_overflow_error()), - (Timedelta.max, raises_overflow_error()), + (Timedelta(2), td64_overflow_error()), + (Timedelta.max, td64_overflow_error()), ], ) def test_sub_raises_if_result_would_overflow( @@ -2212,7 +2181,7 @@ def test_sub_raises_if_result_would_overflow( expected_exs: AbstractContextManager, td64_type: Type[TD64_TYPE], ): - rvalue = wrap_value(rval, td64_type) + rvalue = tm.wrap_value(rval, td64_type) min_td64 = -1 * max_td64 with expected_exs: @@ -2230,10 +2199,10 @@ class TestNumericScalarMulDiv: @pytest.mark.xfail(reason="Not implemented") def test_scalar_mul_raises_if_result_would_overflow(self, max_td64: TD64_BOX_TYPE): - with raises_overflow_error(): + with td64_overflow_error(): max_td64 * 1.01 - with raises_overflow_error(): + with td64_overflow_error(): 1.01 * max_td64 @@ -2243,13 +2212,13 @@ class TestAddSubDatetime64: datetime64 scalar or datetime64-valued ExtensionArray/Index/Series/DataFrame. """ - def test_add(self, td64_box_type: Type[TD64_BOX_TYPE], dt64_type: Type[DT64_TYPE]): + def test_add(self, box_with_array, dt64_type: Type[DT64_TYPE]): # GH: 35897 - dt64 = wrap_value(Timestamp(2020, 1, 2), dt64_type) - td64_box = wrap_value(Timedelta(hours=3), td64_box_type) + dt64 = tm.wrap_value(Timestamp(2020, 1, 2), dt64_type) + td64_box = tm.wrap_value(Timedelta(hours=3), box_with_array) - expected_type = get_result_type(td64_box_type, dt64_type) - expected = wrap_value(Timestamp(2020, 1, 2, 3), expected_type) + expected_type = get_result_type(box_with_array, dt64_type) + expected = tm.wrap_value(Timestamp(2020, 1, 2, 3), expected_type) result = dt64 + td64_box tm.assert_equal(result, expected) @@ -2259,7 +2228,7 @@ def test_add_dt64_raises_if_result_would_overflow( max_td64: TD64_BOX_TYPE, dt64_type: Type[DT64_TYPE], ): - max_dt64 = wrap_value(Timestamp.max, dt64_type) + max_dt64 = tm.wrap_value(Timestamp.max, dt64_type) ex = (OutOfBoundsDatetime, OverflowError) msg = "|".join([TD64_OVERFLOW_MSG, "Out of bounds nanosecond timestamp"]) @@ -2274,7 +2243,7 @@ def test_sub_td64_raises_if_result_would_overflow( max_td64: TD64_BOX_TYPE, dt64_type: Type[DT64_TYPE], ): - min_dt64 = wrap_value(Timestamp.min, dt64_type) + min_dt64 = tm.wrap_value(Timestamp.min, dt64_type) ex = (OutOfBoundsDatetime, OverflowError) msg = "|".join([TD64_OVERFLOW_MSG, "Out of bounds nanosecond timestamp"]) diff --git a/pandas/tests/reductions/test_reductions.py b/pandas/tests/reductions/test_reductions.py index 1bf153763ed09..23595dbe9ef2a 100644 --- a/pandas/tests/reductions/test_reductions.py +++ b/pandas/tests/reductions/test_reductions.py @@ -7,17 +7,11 @@ timedelta, ) from functools import partial -from typing import ( - List, - Type, - Union, -) +from typing import List import numpy as np import pytest -from pandas._libs.lib import is_list_like - import pandas as pd from pandas import ( Categorical, @@ -32,7 +26,6 @@ Timedelta, TimedeltaIndex, Timestamp, - array, date_range, isna, timedelta_range, @@ -40,13 +33,6 @@ ) import pandas._testing as tm from pandas.core import nanops -from pandas.core.arrays import ( - ExtensionArray, - TimedeltaArray, -) - -TD64_ARRAYLIKE_TYPE = Union[TimedeltaArray, TimedeltaIndex, Series] - TD64_VALUE_ERROR_MSG = "overflow in timedelta operation" TD64_OVERFLOW_MSG = "|".join( @@ -59,17 +45,8 @@ does_not_raise = nullcontext -raises_overflow_error = partial(pytest.raises, OverflowError, match=TD64_OVERFLOW_MSG) -raises_value_error = partial(pytest.raises, ValueError, match=TD64_VALUE_ERROR_MSG) - - -@pytest.fixture( - name="td64_arraylike_type", - params=(TimedeltaArray, TimedeltaIndex, Series), - scope="module", -) -def fixture_td64_arraylike_type(request) -> Type[TD64_ARRAYLIKE_TYPE]: - return request.param +td64_overflow_error = partial(pytest.raises, OverflowError, match=TD64_OVERFLOW_MSG) +td64_value_error = partial(pytest.raises, ValueError, match=TD64_VALUE_ERROR_MSG) def get_objs(): @@ -91,25 +68,6 @@ def get_objs(): return objs -def wrap_value(value, cls): - """ - Return value wrapped in a box of given cls, or as-is if cls is a scalar. - """ - if not issubclass(cls, pd.core.arraylike.OpsMixin): - return cls(value) - - if issubclass(cls, ExtensionArray): - box_cls = array - elif issubclass(cls, Index): - box_cls = Index - else: - box_cls = cls - - if not is_list_like(value): - value = [value] - return tm.box_expected(value, box_cls, transpose=False) - - objs = get_objs() @@ -1603,9 +1561,9 @@ class TestTimedelta64: def test_single_elem_sum_retains_ns_precision_over_expected_range( self, value: Timedelta, - td64_arraylike_type: Type[TD64_ARRAYLIKE_TYPE], + index_or_series_or_array, ): - td64_arraylike = wrap_value(value, td64_arraylike_type) + td64_arraylike = tm.wrap_value(value, index_or_series_or_array) result = td64_arraylike.sum() assert result == value @@ -1622,13 +1580,13 @@ def test_single_elem_sum_retains_ns_precision_over_expected_range( def test_single_elem_sum_loses_ns_precision_if_float_conversion_rounds( self, value: Timedelta, - td64_arraylike_type: Type[TD64_ARRAYLIKE_TYPE], + index_or_series_or_array, ): """ The computation involves int->float conversion, so there can be loss of precision. """ - td64_arraylike = wrap_value(value, td64_arraylike_type) + td64_arraylike = tm.wrap_value(value, index_or_series_or_array) result = td64_arraylike.sum() assert result != value @@ -1639,17 +1597,17 @@ def test_single_elem_sum_loses_ns_precision_if_float_conversion_rounds( [ (Timedelta.min, does_not_raise()), (Timedelta.min + Timedelta(511), does_not_raise()), - (Timedelta.max - Timedelta(511), raises_overflow_error()), - (Timedelta.max, raises_overflow_error()), + (Timedelta.max - Timedelta(511), td64_overflow_error()), + (Timedelta.max, td64_overflow_error()), ], ) def test_single_elem_sum_fails_for_large_values( self, value: Timedelta, expected_exs: AbstractContextManager, - td64_arraylike_type: Type[TD64_ARRAYLIKE_TYPE], + index_or_series_or_array, ): - td64_arraylike = wrap_value(value, td64_arraylike_type) + td64_arraylike = tm.wrap_value(value, index_or_series_or_array) with expected_exs: result = td64_arraylike.sum() # for large negative values, sum() doesn't raise but does return NaT @@ -1658,23 +1616,23 @@ def test_single_elem_sum_fails_for_large_values( @pytest.mark.parametrize( ("values", "expected_exs"), ( - ([Timedelta.min] * 2, raises_value_error()), - ([Timedelta.min, Timedelta(-1025)], raises_value_error()), + ([Timedelta.min] * 2, td64_value_error()), + ([Timedelta.min, Timedelta(-1025)], td64_value_error()), ([Timedelta.min, Timedelta(-1024)], does_not_raise()), ([Timedelta.min, Timedelta(-1)], does_not_raise()), - ([Timedelta.max, Timedelta(1)], raises_overflow_error()), - ([Timedelta.max, Timedelta(1024)], raises_overflow_error()), - ([Timedelta.max, Timedelta(1025)], raises_value_error()), - ([Timedelta.max] * 2, raises_value_error()), + ([Timedelta.max, Timedelta(1)], td64_overflow_error()), + ([Timedelta.max, Timedelta(1024)], td64_overflow_error()), + ([Timedelta.max, Timedelta(1025)], td64_value_error()), + ([Timedelta.max] * 2, td64_value_error()), ), ) def test_arraylike_sum_usually_raises_for_overflow( self, values: List[Timedelta], expected_exs: AbstractContextManager, - td64_arraylike_type: Type[TD64_ARRAYLIKE_TYPE], + index_or_series_or_array, ): - td64_arraylike = wrap_value(values, td64_arraylike_type) + td64_arraylike = tm.wrap_value(values, index_or_series_or_array) with expected_exs: result = td64_arraylike.sum() # for small negative overflows, sum() doesn't raise but does return NaT @@ -1691,7 +1649,7 @@ def test_arraylike_sum_usually_raises_for_overflow( ids=["double_td_min", "over_by_-1ns", "over_by_1ns", "double_td_max"], ) def test_df_sum_returns_nat_for_all_overflows(self, values: List[Timedelta]): - td64_df = wrap_value(values, DataFrame) + td64_df = tm.wrap_value(values, DataFrame) result = td64_df.sum() expected = Series(NaT, index=[0], dtype="timedelta64[ns]") From e03a902c65ea88b467b851f37f63ca9d8ddae93e Mon Sep 17 00:00:00 2001 From: Patrick McKenna Date: Wed, 27 Apr 2022 17:00:29 -0700 Subject: [PATCH 15/21] obey the linting gods --- pandas/_testing/__init__.py | 11 +++++------ 1 file changed, 5 insertions(+), 6 deletions(-) diff --git a/pandas/_testing/__init__.py b/pandas/_testing/__init__.py index 908b192f9a2f1..e002e5f66f5cf 100644 --- a/pandas/_testing/__init__.py +++ b/pandas/_testing/__init__.py @@ -1,6 +1,7 @@ from __future__ import annotations -import collections.abc +import collections +from collections import abc from datetime import datetime from decimal import Decimal from functools import wraps @@ -294,12 +295,10 @@ def wrap_value(value, cls, transpose=False): pass elif cls is pd.array or issubclass(cls, ExtensionArray): cls = pd.array - elif issubclass(cls, pd.Index): - cls = pd.Index + elif issubclass(cls, Index): + cls = Index - if not isinstance( - value, (collections.abc.Sequence, pd.core.arraylike.OpsMixin, np.ndarray) - ): + if not isinstance(value, (abc.Sequence, pd.core.arraylike.OpsMixin, np.ndarray)): value = [value] return box_expected(value, cls, transpose) From 15ef8342f28d03bb793036eb5258e704efb44c10 Mon Sep 17 00:00:00 2001 From: Patrick McKenna Date: Thu, 28 Apr 2022 09:37:04 -0700 Subject: [PATCH 16/21] adjust for platform/env-specific overflow behavior --- pandas/tests/reductions/test_reductions.py | 32 ++++++++++++++++++---- 1 file changed, 27 insertions(+), 5 deletions(-) diff --git a/pandas/tests/reductions/test_reductions.py b/pandas/tests/reductions/test_reductions.py index 23595dbe9ef2a..a7ef01b2ae851 100644 --- a/pandas/tests/reductions/test_reductions.py +++ b/pandas/tests/reductions/test_reductions.py @@ -7,6 +7,7 @@ timedelta, ) from functools import partial +import os from typing import List import numpy as np @@ -48,6 +49,18 @@ td64_overflow_error = partial(pytest.raises, OverflowError, match=TD64_OVERFLOW_MSG) td64_value_error = partial(pytest.raises, ValueError, match=TD64_VALUE_ERROR_MSG) +# TODO: more robust platform/env detection? +xfail_on_arm = pytest.mark.xfail( + os.environ.get("CIRCLECI") == "true", + reason="ints wrap on arm?", + raises=AssertionError, +) +xfail_with_array_data_manager = pytest.mark.xfail( + os.environ.get("PANDAS_DATA_MANAGER") == "array", + reason="unclear", + raises=(ValueError, OverflowError), +) + def get_objs(): indexes = [ @@ -1590,7 +1603,7 @@ def test_single_elem_sum_loses_ns_precision_if_float_conversion_rounds( result = td64_arraylike.sum() assert result != value - # assert np.isclose(result, value) + assert np.isclose(result.value, value.value) @pytest.mark.parametrize( ["value", "expected_exs"], @@ -1641,14 +1654,23 @@ def test_arraylike_sum_usually_raises_for_overflow( @pytest.mark.parametrize( "values", ( - [Timedelta.min] * 2, + pytest.param([Timedelta.min] * 2, marks=xfail_with_array_data_manager), [Timedelta.min, Timedelta(-1)], - [Timedelta.max, Timedelta(1)], - [Timedelta.max] * 2, + pytest.param( + [Timedelta.max, Timedelta(1)], + marks=[xfail_on_arm, xfail_with_array_data_manager], + ), + pytest.param( + [Timedelta.max] * 2, + marks=[xfail_on_arm, xfail_with_array_data_manager], + ), ), ids=["double_td_min", "over_by_-1ns", "over_by_1ns", "double_td_max"], ) - def test_df_sum_returns_nat_for_all_overflows(self, values: List[Timedelta]): + def test_df_sum_usually_returns_nat_for_overflows(self, values: List[Timedelta]): + """ + Special case behavior for some values, for some platforms/configs. + """ td64_df = tm.wrap_value(values, DataFrame) result = td64_df.sum() expected = Series(NaT, index=[0], dtype="timedelta64[ns]") From 13d198ebe8169dacd0e4c6c79d817110396e1155 Mon Sep 17 00:00:00 2001 From: Patrick McKenna Date: Tue, 3 May 2022 10:02:36 -0700 Subject: [PATCH 17/21] use newer type hint syntax --- pandas/tests/arithmetic/test_timedelta64.py | 62 ++++++++++++--------- pandas/tests/reductions/test_reductions.py | 7 ++- 2 files changed, 40 insertions(+), 29 deletions(-) diff --git a/pandas/tests/arithmetic/test_timedelta64.py b/pandas/tests/arithmetic/test_timedelta64.py index 345d9436c578f..c80f312d18ada 100644 --- a/pandas/tests/arithmetic/test_timedelta64.py +++ b/pandas/tests/arithmetic/test_timedelta64.py @@ -1,5 +1,7 @@ # Arithmetic tests for DataFrame/Series/Index/Array classes that should # behave identically. +from __future__ import annotations + from contextlib import ( AbstractContextManager, nullcontext, @@ -9,10 +11,6 @@ timedelta, ) from functools import partial -from typing import ( - Type, - Union, -) import numpy as np import pytest @@ -50,11 +48,6 @@ get_upcast_box, ) -TD64_BOX_TYPE = Union[TimedeltaArray, TimedeltaIndex, Series, DataFrame] -TD64_TYPE = Union[Timedelta, TD64_BOX_TYPE] -DT64_TYPE = Union[Timestamp, DatetimeArray, DatetimeIndex, Series, DataFrame] - - TD64_OVERFLOW_MSG = "|".join( [ "int too big to convert", @@ -90,9 +83,9 @@ def get_expected_name(box, names): def get_result_type( - td64_type: Type[TD64_TYPE], - dt64_type: Type[DT64_TYPE], -) -> Type[DT64_TYPE]: + td64_type: type(TimedeltaArray | TimedeltaIndex | Series | DataFrame), + dt64_type: type(Timestamp | DatetimeArray | DatetimeIndex | Series | DataFrame), +) -> type(Timestamp | DatetimeArray | DatetimeIndex | Series | DataFrame): """ Expected result when adding, subtracting timedelta64-valued box and datetime64-valued box or scalar. @@ -107,7 +100,9 @@ def get_result_type( params=[Timedelta, TimedeltaArray, TimedeltaIndex, Series, DataFrame], scope="module", ) -def fixture_td64_type(request) -> Type[TD64_TYPE]: +def fixture_td64_type( + request, +) -> type(Timedelta | TimedeltaArray | TimedeltaIndex | Series | DataFrame): return request.param @@ -116,12 +111,16 @@ def fixture_td64_type(request) -> Type[TD64_TYPE]: params=[Timestamp, DatetimeArray, DatetimeIndex, Series, DataFrame], scope="module", ) -def fixture_dt64_type(request) -> Type[DT64_TYPE]: +def fixture_dt64_type( + request, +) -> type(Timestamp | DatetimeArray | DatetimeIndex | Series | DataFrame): return request.param @pytest.fixture(name="max_td64") -def fixture_max_td64(box_with_array) -> TD64_BOX_TYPE: +def fixture_max_td64( + box_with_array, +) -> TimedeltaArray | TimedeltaIndex | Series | DataFrame: """ A 1-elem ExtensionArray/Index/Series, or 2x1 DataFrame, w/ all elements set to Timestamp.max. @@ -134,7 +133,10 @@ def fixture_max_td64(box_with_array) -> TD64_BOX_TYPE: params=[Timedelta(1), Timedelta(1024), Timedelta.max], ids=["1ns", "1024ns", "td_max"], ) -def fixture_positive_td64(request, td64_type: Type[TD64_TYPE]) -> TD64_TYPE: +def fixture_positive_td64( + request, + td64_type: type(Timedelta | TimedeltaArray | TimedeltaIndex | Series | DataFrame), +) -> Timedelta | TimedeltaArray | TimedeltaIndex | Series | DataFrame: """ A scalar, 1-elem ExtensionArray/Index/Series, or 2x1 DataFrame. """ @@ -2157,8 +2159,8 @@ class TestAddSub: def test_add_raises_if_result_would_overflow( self, - max_td64: TD64_TYPE, - positive_td64: TD64_BOX_TYPE, + max_td64: Timedelta | TimedeltaArray | TimedeltaIndex | Series | DataFrame, + positive_td64: TimedeltaArray | TimedeltaIndex | Series | DataFrame, ): with td64_overflow_error(): max_td64 + positive_td64 @@ -2176,10 +2178,12 @@ def test_add_raises_if_result_would_overflow( ) def test_sub_raises_if_result_would_overflow( self, - max_td64: TD64_BOX_TYPE, + max_td64: TimedeltaArray | TimedeltaIndex | Series | DataFrame, rval: Timedelta, expected_exs: AbstractContextManager, - td64_type: Type[TD64_TYPE], + td64_type: type( + Timedelta | TimedeltaArray | TimedeltaIndex | Series | DataFrame + ), ): rvalue = tm.wrap_value(rval, td64_type) min_td64 = -1 * max_td64 @@ -2198,7 +2202,9 @@ class TestNumericScalarMulDiv: """ @pytest.mark.xfail(reason="Not implemented") - def test_scalar_mul_raises_if_result_would_overflow(self, max_td64: TD64_BOX_TYPE): + def test_scalar_mul_raises_if_result_would_overflow( + self, max_td64: TimedeltaArray | TimedeltaIndex | Series | DataFrame + ): with td64_overflow_error(): max_td64 * 1.01 @@ -2212,7 +2218,11 @@ class TestAddSubDatetime64: datetime64 scalar or datetime64-valued ExtensionArray/Index/Series/DataFrame. """ - def test_add(self, box_with_array, dt64_type: Type[DT64_TYPE]): + def test_add( + self, + box_with_array, + dt64_type: type(Timestamp | DatetimeArray | DatetimeIndex | Series | DataFrame), + ): # GH: 35897 dt64 = tm.wrap_value(Timestamp(2020, 1, 2), dt64_type) td64_box = tm.wrap_value(Timedelta(hours=3), box_with_array) @@ -2225,8 +2235,8 @@ def test_add(self, box_with_array, dt64_type: Type[DT64_TYPE]): def test_add_dt64_raises_if_result_would_overflow( self, - max_td64: TD64_BOX_TYPE, - dt64_type: Type[DT64_TYPE], + max_td64: TimedeltaArray | TimedeltaIndex | Series | DataFrame, + dt64_type: type(Timestamp | DatetimeArray | DatetimeIndex | Series | DataFrame), ): max_dt64 = tm.wrap_value(Timestamp.max, dt64_type) ex = (OutOfBoundsDatetime, OverflowError) @@ -2240,8 +2250,8 @@ def test_add_dt64_raises_if_result_would_overflow( def test_sub_td64_raises_if_result_would_overflow( self, - max_td64: TD64_BOX_TYPE, - dt64_type: Type[DT64_TYPE], + max_td64: TimedeltaArray | TimedeltaIndex | Series | DataFrame, + dt64_type: type(Timestamp | DatetimeArray | DatetimeIndex | Series | DataFrame), ): min_dt64 = tm.wrap_value(Timestamp.min, dt64_type) ex = (OutOfBoundsDatetime, OverflowError) diff --git a/pandas/tests/reductions/test_reductions.py b/pandas/tests/reductions/test_reductions.py index a7ef01b2ae851..22acf7284443a 100644 --- a/pandas/tests/reductions/test_reductions.py +++ b/pandas/tests/reductions/test_reductions.py @@ -1,3 +1,5 @@ +from __future__ import annotations + from contextlib import ( AbstractContextManager, nullcontext, @@ -8,7 +10,6 @@ ) from functools import partial import os -from typing import List import numpy as np import pytest @@ -1641,7 +1642,7 @@ def test_single_elem_sum_fails_for_large_values( ) def test_arraylike_sum_usually_raises_for_overflow( self, - values: List[Timedelta], + values: list[Timedelta], expected_exs: AbstractContextManager, index_or_series_or_array, ): @@ -1667,7 +1668,7 @@ def test_arraylike_sum_usually_raises_for_overflow( ), ids=["double_td_min", "over_by_-1ns", "over_by_1ns", "double_td_max"], ) - def test_df_sum_usually_returns_nat_for_overflows(self, values: List[Timedelta]): + def test_df_sum_usually_returns_nat_for_overflows(self, values: list[Timedelta]): """ Special case behavior for some values, for some platforms/configs. """ From b37aa4debeab4bf0218932ad70349cf692d91948 Mon Sep 17 00:00:00 2001 From: Patrick McKenna Date: Tue, 3 May 2022 10:16:12 -0700 Subject: [PATCH 18/21] update tests to reflect recent changes --- pandas/tests/reductions/test_reductions.py | 26 ++++++++++------------ 1 file changed, 12 insertions(+), 14 deletions(-) diff --git a/pandas/tests/reductions/test_reductions.py b/pandas/tests/reductions/test_reductions.py index 22acf7284443a..0f36862f7224c 100644 --- a/pandas/tests/reductions/test_reductions.py +++ b/pandas/tests/reductions/test_reductions.py @@ -1607,25 +1607,23 @@ def test_single_elem_sum_loses_ns_precision_if_float_conversion_rounds( assert np.isclose(result.value, value.value) @pytest.mark.parametrize( - ["value", "expected_exs"], - [ - (Timedelta.min, does_not_raise()), - (Timedelta.min + Timedelta(511), does_not_raise()), - (Timedelta.max - Timedelta(511), td64_overflow_error()), - (Timedelta.max, td64_overflow_error()), - ], + "value", + ( + Timedelta.min, + Timedelta.min + Timedelta(511), + Timedelta.max - Timedelta(511), + Timedelta.max, + ), ) def test_single_elem_sum_fails_for_large_values( self, value: Timedelta, - expected_exs: AbstractContextManager, index_or_series_or_array, ): td64_arraylike = tm.wrap_value(value, index_or_series_or_array) - with expected_exs: - result = td64_arraylike.sum() - # for large negative values, sum() doesn't raise but does return NaT - assert result is NaT + result = td64_arraylike.sum() + + assert result is NaT @pytest.mark.parametrize( ("values", "expected_exs"), @@ -1634,8 +1632,8 @@ def test_single_elem_sum_fails_for_large_values( ([Timedelta.min, Timedelta(-1025)], td64_value_error()), ([Timedelta.min, Timedelta(-1024)], does_not_raise()), ([Timedelta.min, Timedelta(-1)], does_not_raise()), - ([Timedelta.max, Timedelta(1)], td64_overflow_error()), - ([Timedelta.max, Timedelta(1024)], td64_overflow_error()), + ([Timedelta.max, Timedelta(1)], does_not_raise()), + ([Timedelta.max, Timedelta(1024)], does_not_raise()), ([Timedelta.max, Timedelta(1025)], td64_value_error()), ([Timedelta.max] * 2, td64_value_error()), ), From cd59647a23c1f883fb24155e4156a2dcd0bb1edd Mon Sep 17 00:00:00 2001 From: Patrick McKenna Date: Tue, 3 May 2022 11:55:00 -0700 Subject: [PATCH 19/21] remove scalar-box tests, use existing fixtures --- pandas/_testing/__init__.py | 31 +--- pandas/tests/arithmetic/test_timedelta64.py | 181 +++++++++----------- pandas/tests/reductions/test_reductions.py | 22 +-- 3 files changed, 99 insertions(+), 135 deletions(-) diff --git a/pandas/_testing/__init__.py b/pandas/_testing/__init__.py index e002e5f66f5cf..399b2db1c4c65 100644 --- a/pandas/_testing/__init__.py +++ b/pandas/_testing/__init__.py @@ -1,7 +1,6 @@ from __future__ import annotations import collections -from collections import abc from datetime import datetime from decimal import Decimal from functools import wraps @@ -247,19 +246,21 @@ def box_expected(expected, box_cls, transpose=True): Parameters ---------- expected : np.ndarray, Index, Series - box_cls : {Index, Series, DataFrame} + box_cls : {Index, Series, DataFrame, pd.array, ExtensionArray} Returns ------- subclass of box_cls """ - if box_cls is pd.array: + if box_cls is pd.array or ( + isclass(box_cls) and issubclass(box_cls, ExtensionArray) + ): if isinstance(expected, RangeIndex): # pd.array would return an IntegerArray expected = PandasArray(np.asarray(expected._values)) else: expected = pd.array(expected) - elif box_cls is Index: + elif isclass(box_cls) and issubclass(box_cls, Index): expected = Index._with_infer(expected) elif box_cls is Series: expected = Series(expected) @@ -281,28 +282,6 @@ def box_expected(expected, box_cls, transpose=True): return expected -def wrap_value(value, cls, transpose=False): - """ - If cls is a scalar type, return value as an instance of it, otherwise return value - wrapped in the box type indicated by cls. - - Designed to play nicely with box_expected (and the box_with_array fixture). - """ - if isclass(cls) and not issubclass(cls, pd.core.arraylike.OpsMixin): - return cls(value) - - if cls in (np.array, np.ndarray): - pass - elif cls is pd.array or issubclass(cls, ExtensionArray): - cls = pd.array - elif issubclass(cls, Index): - cls = Index - - if not isinstance(value, (abc.Sequence, pd.core.arraylike.OpsMixin, np.ndarray)): - value = [value] - return box_expected(value, cls, transpose) - - def to_array(obj): """ Similar to pd.array, but does not cast numpy dtypes to nullable dtypes. diff --git a/pandas/tests/arithmetic/test_timedelta64.py b/pandas/tests/arithmetic/test_timedelta64.py index c80f312d18ada..d25e4730b51d2 100644 --- a/pandas/tests/arithmetic/test_timedelta64.py +++ b/pandas/tests/arithmetic/test_timedelta64.py @@ -48,7 +48,7 @@ get_upcast_box, ) -TD64_OVERFLOW_MSG = "|".join( +TIMEDELTA_OVERFLOW_MSG = "|".join( [ "int too big to convert", "Python int too large to convert to C long", @@ -58,7 +58,7 @@ does_not_raise = nullcontext -td64_overflow_error = partial(pytest.raises, OverflowError, match=TD64_OVERFLOW_MSG) +td_overflow_error = partial(pytest.raises, OverflowError, match=TIMEDELTA_OVERFLOW_MSG) def assert_dtype(obj, expected_dtype): @@ -83,65 +83,56 @@ def get_expected_name(box, names): def get_result_type( - td64_type: type(TimedeltaArray | TimedeltaIndex | Series | DataFrame), - dt64_type: type(Timestamp | DatetimeArray | DatetimeIndex | Series | DataFrame), -) -> type(Timestamp | DatetimeArray | DatetimeIndex | Series | DataFrame): + td_type: type(TimedeltaArray | TimedeltaIndex | Series | DataFrame), + dt_type: type(DatetimeArray | DatetimeIndex | Series | DataFrame), +) -> type(DatetimeArray | DatetimeIndex | Series | DataFrame): """ - Expected result when adding, subtracting timedelta64-valued box and - datetime64-valued box or scalar. + Expected result for add/sub between Timestamp-valued and Timedelta-valued boxes. """ - dt64 = tm.wrap_value(Timestamp.now(), dt64_type) - td64 = tm.wrap_value(Timedelta(0), td64_type) - return type(dt64 + td64) - - -@pytest.fixture( - name="td64_type", - params=[Timedelta, TimedeltaArray, TimedeltaIndex, Series, DataFrame], - scope="module", -) -def fixture_td64_type( - request, -) -> type(Timedelta | TimedeltaArray | TimedeltaIndex | Series | DataFrame): - return request.param - - -@pytest.fixture( - name="dt64_type", - params=[Timestamp, DatetimeArray, DatetimeIndex, Series, DataFrame], - scope="module", -) -def fixture_dt64_type( - request, -) -> type(Timestamp | DatetimeArray | DatetimeIndex | Series | DataFrame): - return request.param - - -@pytest.fixture(name="max_td64") -def fixture_max_td64( + result_types = { + (DatetimeArray, TimedeltaArray): DatetimeArray, + (DatetimeArray, TimedeltaIndex): DatetimeIndex, + (DatetimeArray, Series): Series, + (DatetimeArray, DataFrame): DataFrame, + (DatetimeIndex, TimedeltaArray): DatetimeIndex, + (DatetimeIndex, TimedeltaIndex): DatetimeIndex, + (DatetimeIndex, Series): Series, + (DatetimeIndex, DataFrame): DataFrame, + (Series, TimedeltaArray): Series, + (Series, TimedeltaIndex): Series, + (Series, Series): Series, + (Series, DataFrame): DataFrame, + } + + return result_types.get((dt_type, td_type), DataFrame) + + +@pytest.fixture(name="td_max_box") +def fixture_td_max_box( box_with_array, ) -> TimedeltaArray | TimedeltaIndex | Series | DataFrame: """ A 1-elem ExtensionArray/Index/Series, or 2x1 DataFrame, w/ all elements set to Timestamp.max. """ - return tm.wrap_value(Timedelta.max, box_with_array) + return tm.box_expected((Timedelta.max,), box_with_array) @pytest.fixture( - name="positive_td64", + name="positive_td_box", params=[Timedelta(1), Timedelta(1024), Timedelta.max], ids=["1ns", "1024ns", "td_max"], ) -def fixture_positive_td64( +def fixture_positive_td_box( request, - td64_type: type(Timedelta | TimedeltaArray | TimedeltaIndex | Series | DataFrame), -) -> Timedelta | TimedeltaArray | TimedeltaIndex | Series | DataFrame: + box_with_array, +) -> TimedeltaArray | TimedeltaIndex | Series | DataFrame: """ - A scalar, 1-elem ExtensionArray/Index/Series, or 2x1 DataFrame. + A 1-elem ExtensionArray/Index/Series, or 2x1 DataFrame, w/ all elements set to the + same positive Timestamp. """ - value = request.param - return tm.wrap_value(value, td64_type) + value = (request.param,) + return tm.box_expected(value, box_with_array) # ------------------------------------------------------------------ @@ -2152,110 +2143,104 @@ def test_td64arr_pow_invalid(self, scalar_td, box_with_array): class TestAddSub: """ - Addition/subtraction between a timedelta64-valued - ExtensionArrays/Indexes/Series/DataFrames, and a timedelta64 scalar or - timedelta64-valued ExtensionArray/Index/Series/DataFrame. + Add/sub between 2 Timestamp-valued ExtensionArrays/Indexes/Series/DataFrames. """ def test_add_raises_if_result_would_overflow( self, - max_td64: Timedelta | TimedeltaArray | TimedeltaIndex | Series | DataFrame, - positive_td64: TimedeltaArray | TimedeltaIndex | Series | DataFrame, + td_max_box: TimedeltaArray | TimedeltaIndex | Series | DataFrame, + positive_td_box: TimedeltaArray | TimedeltaIndex | Series | DataFrame, ): - with td64_overflow_error(): - max_td64 + positive_td64 + with td_overflow_error(): + td_max_box + positive_td_box - with td64_overflow_error(): - positive_td64 + max_td64 + with td_overflow_error(): + positive_td_box + td_max_box @pytest.mark.parametrize( - ["rval", "expected_exs"], + ["positive_td", "expected_exs"], [ + # can't use positive_td_box fixture b/c errors vary (Timedelta(1), does_not_raise()), - (Timedelta(2), td64_overflow_error()), - (Timedelta.max, td64_overflow_error()), + (Timedelta(2), td_overflow_error()), + (Timedelta.max, td_overflow_error()), ], ) def test_sub_raises_if_result_would_overflow( self, - max_td64: TimedeltaArray | TimedeltaIndex | Series | DataFrame, - rval: Timedelta, + td_max_box: TimedeltaArray | TimedeltaIndex | Series | DataFrame, + positive_td: Timedelta, expected_exs: AbstractContextManager, - td64_type: type( - Timedelta | TimedeltaArray | TimedeltaIndex | Series | DataFrame - ), + box_with_array, ): - rvalue = tm.wrap_value(rval, td64_type) - min_td64 = -1 * max_td64 + positive_td_box = tm.box_expected((positive_td,), box_with_array) + td_min_box = -1 * td_max_box with expected_exs: - min_td64 - rvalue + td_min_box - positive_td_box with expected_exs: - -1 * rvalue - max_td64 + -1 * positive_td_box - td_max_box class TestNumericScalarMulDiv: """ - Operations on timedelta64-valued ExtensionArray/Index/Series/DataFrame and a + Operations on Timedelta-valued ExtensionArray/Index/Series/DataFrame and a numeric scalar. """ - @pytest.mark.xfail(reason="Not implemented") + @pytest.mark.xfail(reason="Not implemented", raises=pytest.fail.Exception) def test_scalar_mul_raises_if_result_would_overflow( - self, max_td64: TimedeltaArray | TimedeltaIndex | Series | DataFrame + self, + td_max_box: TimedeltaArray | TimedeltaIndex | Series | DataFrame, ): - with td64_overflow_error(): - max_td64 * 1.01 + with td_overflow_error(): + td_max_box * 1.01 - with td64_overflow_error(): - 1.01 * max_td64 + with td_overflow_error(): + 1.01 * td_max_box -class TestAddSubDatetime64: +class TestAddSubTimestampBox: """ - Operations on timedelta64-valued ExtensionArray/Index/Series/DataFrame, and a - datetime64 scalar or datetime64-valued ExtensionArray/Index/Series/DataFrame. + Add/sub between Timedelta-valued and Timestamp-valued + ExtensionArrays/Indexes/Series/DataFrames. """ - def test_add( - self, - box_with_array, - dt64_type: type(Timestamp | DatetimeArray | DatetimeIndex | Series | DataFrame), - ): + def test_add(self, box_with_array, box_with_array2): # GH: 35897 - dt64 = tm.wrap_value(Timestamp(2020, 1, 2), dt64_type) - td64_box = tm.wrap_value(Timedelta(hours=3), box_with_array) + td_box = tm.box_expected((Timedelta(hours=3),), box_with_array) + dt_box = tm.box_expected((Timestamp(2020, 1, 2),), box_with_array2) - expected_type = get_result_type(box_with_array, dt64_type) - expected = tm.wrap_value(Timestamp(2020, 1, 2, 3), expected_type) - result = dt64 + td64_box + expected_type = get_result_type(type(td_box), type(dt_box)) + expected = tm.box_expected((Timestamp(2020, 1, 2, 3),), expected_type) + result = dt_box + td_box tm.assert_equal(result, expected) - def test_add_dt64_raises_if_result_would_overflow( + def test_add_raises_if_result_would_overflow( self, - max_td64: TimedeltaArray | TimedeltaIndex | Series | DataFrame, - dt64_type: type(Timestamp | DatetimeArray | DatetimeIndex | Series | DataFrame), + td_max_box: TimedeltaArray | TimedeltaIndex | Series | DataFrame, + box_with_array, ): - max_dt64 = tm.wrap_value(Timestamp.max, dt64_type) + dt_max_box = tm.box_expected((Timestamp.max,), box_with_array) ex = (OutOfBoundsDatetime, OverflowError) - msg = "|".join([TD64_OVERFLOW_MSG, "Out of bounds nanosecond timestamp"]) + msg = "|".join([TIMEDELTA_OVERFLOW_MSG, "Out of bounds nanosecond timestamp"]) with pytest.raises(ex, match=msg): - max_td64 + max_dt64 + td_max_box + dt_max_box with pytest.raises(ex, match=msg): - max_dt64 + max_td64 + dt_max_box + td_max_box - def test_sub_td64_raises_if_result_would_overflow( + def test_sub_raises_if_result_would_overflow( self, - max_td64: TimedeltaArray | TimedeltaIndex | Series | DataFrame, - dt64_type: type(Timestamp | DatetimeArray | DatetimeIndex | Series | DataFrame), + td_max_box: TimedeltaArray | TimedeltaIndex | Series | DataFrame, + box_with_array, ): - min_dt64 = tm.wrap_value(Timestamp.min, dt64_type) + dt_min_box = tm.box_expected((Timestamp.min,), box_with_array) ex = (OutOfBoundsDatetime, OverflowError) - msg = "|".join([TD64_OVERFLOW_MSG, "Out of bounds nanosecond timestamp"]) + msg = "|".join([TIMEDELTA_OVERFLOW_MSG, "Out of bounds nanosecond timestamp"]) with pytest.raises(ex, match=msg): - min_dt64 - max_td64 + dt_min_box - td_max_box diff --git a/pandas/tests/reductions/test_reductions.py b/pandas/tests/reductions/test_reductions.py index 0f36862f7224c..21334a98748fb 100644 --- a/pandas/tests/reductions/test_reductions.py +++ b/pandas/tests/reductions/test_reductions.py @@ -1563,9 +1563,9 @@ def test_multimode_complex(self, array, expected, dtype): tm.assert_series_equal(result, expected) -class TestTimedelta64: +class TestTimedelta: """ - For timedelta64-valued ExtensionArrays/Indexes/Series/DataFrames. + For Timedelta-valued ExtensionArrays/Indexes/Series/DataFrames. """ @pytest.mark.parametrize( @@ -1577,8 +1577,8 @@ def test_single_elem_sum_retains_ns_precision_over_expected_range( value: Timedelta, index_or_series_or_array, ): - td64_arraylike = tm.wrap_value(value, index_or_series_or_array) - result = td64_arraylike.sum() + td_arraylike = tm.box_expected((value,), index_or_series_or_array) + result = td_arraylike.sum() assert result == value @@ -1600,8 +1600,8 @@ def test_single_elem_sum_loses_ns_precision_if_float_conversion_rounds( The computation involves int->float conversion, so there can be loss of precision. """ - td64_arraylike = tm.wrap_value(value, index_or_series_or_array) - result = td64_arraylike.sum() + td_arraylike = tm.box_expected((value,), index_or_series_or_array) + result = td_arraylike.sum() assert result != value assert np.isclose(result.value, value.value) @@ -1620,8 +1620,8 @@ def test_single_elem_sum_fails_for_large_values( value: Timedelta, index_or_series_or_array, ): - td64_arraylike = tm.wrap_value(value, index_or_series_or_array) - result = td64_arraylike.sum() + td_arraylike = tm.box_expected((value,), index_or_series_or_array) + result = td_arraylike.sum() assert result is NaT @@ -1644,9 +1644,9 @@ def test_arraylike_sum_usually_raises_for_overflow( expected_exs: AbstractContextManager, index_or_series_or_array, ): - td64_arraylike = tm.wrap_value(values, index_or_series_or_array) + td_arraylike = tm.box_expected(values, index_or_series_or_array) with expected_exs: - result = td64_arraylike.sum() + result = td_arraylike.sum() # for small negative overflows, sum() doesn't raise but does return NaT assert result is NaT @@ -1670,7 +1670,7 @@ def test_df_sum_usually_returns_nat_for_overflows(self, values: list[Timedelta]) """ Special case behavior for some values, for some platforms/configs. """ - td64_df = tm.wrap_value(values, DataFrame) + td64_df = tm.box_expected(values, DataFrame, transpose=False) result = td64_df.sum() expected = Series(NaT, index=[0], dtype="timedelta64[ns]") From 05f4137ce2de5af53a821a06f7eb692a73847d0b Mon Sep 17 00:00:00 2001 From: Patrick McKenna Date: Tue, 3 May 2022 14:10:50 -0700 Subject: [PATCH 20/21] DRY up some td64 tests --- pandas/tests/arithmetic/test_timedelta64.py | 182 +++++--------------- pandas/tests/reductions/test_reductions.py | 60 +++++-- 2 files changed, 84 insertions(+), 158 deletions(-) diff --git a/pandas/tests/arithmetic/test_timedelta64.py b/pandas/tests/arithmetic/test_timedelta64.py index d25e4730b51d2..53c7824562dbb 100644 --- a/pandas/tests/arithmetic/test_timedelta64.py +++ b/pandas/tests/arithmetic/test_timedelta64.py @@ -82,10 +82,7 @@ def get_expected_name(box, names): return exname -def get_result_type( - td_type: type(TimedeltaArray | TimedeltaIndex | Series | DataFrame), - dt_type: type(DatetimeArray | DatetimeIndex | Series | DataFrame), -) -> type(DatetimeArray | DatetimeIndex | Series | DataFrame): +def get_result_type(td_type, dt_type): """ Expected result for add/sub between Timestamp-valued and Timedelta-valued boxes. """ @@ -396,8 +393,6 @@ def test_subtraction_ops(self): msg = "cannot subtract a datelike from a TimedeltaArray" with pytest.raises(TypeError, match=msg): tdi - dt - with pytest.raises(TypeError, match=msg): - tdi - dti msg = r"unsupported operand type\(s\) for -" with pytest.raises(TypeError, match=msg): @@ -518,23 +513,6 @@ def _check(result, expected): expected = tm.box_expected(expected, box_with_array) tm.assert_equal(result, expected) - def test_dti_tdi_numeric_ops(self): - # These are normally union/diff set-like ops - tdi = TimedeltaIndex(["1 days", NaT, "2 days"], name="foo") - dti = pd.date_range("20130101", periods=3, name="bar") - - result = tdi - tdi - expected = TimedeltaIndex(["0 days", NaT, "0 days"], name="foo") - tm.assert_index_equal(result, expected) - - result = tdi + tdi - expected = TimedeltaIndex(["2 days", NaT, "4 days"], name="foo") - tm.assert_index_equal(result, expected) - - result = dti - tdi # name will be reset - expected = DatetimeIndex(["20121231", NaT, "20130101"]) - tm.assert_index_equal(result, expected) - def test_addition_ops(self): # with datetimes/timedelta and tdi/dti tdi = TimedeltaIndex(["1 days", NaT, "2 days"], name="foo") @@ -573,14 +551,6 @@ def test_addition_ops(self): # this is a union! # pytest.raises(TypeError, lambda : Int64Index([1,2,3]) + tdi) - result = tdi + dti # name will be reset - expected = DatetimeIndex(["20130102", NaT, "20130105"]) - tm.assert_index_equal(result, expected) - - result = dti + tdi # name will be reset - expected = DatetimeIndex(["20130102", NaT, "20130105"]) - tm.assert_index_equal(result, expected) - result = dt + td expected = Timestamp("20130102") assert result == expected @@ -631,25 +601,6 @@ def test_timedelta_tick_arithmetic(self): result3 = result3._with_freq(None) tm.assert_index_equal(result2, result3) - def test_tda_add_sub_index(self): - # Check that TimedeltaArray defers to Index on arithmetic ops - tdi = TimedeltaIndex(["1 days", NaT, "2 days"]) - tda = tdi.array - - dti = pd.date_range("1999-12-31", periods=3, freq="D") - - result = tda + dti - expected = tdi + dti - tm.assert_index_equal(result, expected) - - result = tda + tdi - expected = tdi + tdi - tm.assert_index_equal(result, expected) - - result = tda - tdi - expected = tdi - tdi - tm.assert_index_equal(result, expected) - def test_tda_add_dt64_object_array(self, box_with_array, tz_naive_fixture): # Result should be cast back to DatetimeArray box = box_with_array @@ -762,22 +713,6 @@ def test_tdarr_add_timestamp_nat_masking(self, box_with_array, str_ts): else: assert res[1] is NaT - def test_tdi_add_overflow(self): - # These should not overflow! - exp = TimedeltaIndex([NaT]) - result = pd.to_timedelta([NaT]) - Timedelta("1 days") - tm.assert_index_equal(result, exp) - - exp = TimedeltaIndex(["4 days", NaT]) - result = pd.to_timedelta(["5 days", NaT]) - Timedelta("1 days") - tm.assert_index_equal(result, exp) - - exp = TimedeltaIndex([NaT, NaT, "5 hours"]) - result = pd.to_timedelta([NaT, "5 days", "1 hours"]) + pd.to_timedelta( - ["7 seconds", NaT, "4 hours"] - ) - tm.assert_index_equal(result, exp) - class TestTimedeltaArraylikeAddSubOps: # Tests for timedelta64[ns] __add__, __sub__, __radd__, __rsub__ @@ -815,11 +750,6 @@ def test_timedelta_ops_with_missing_values(self): actual = scalar2 - scalar1 assert actual == scalar1 - actual = s1 + s1 - tm.assert_series_equal(actual, s2) - actual = s2 - s1 - tm.assert_series_equal(actual, s1) - actual = s1 + scalar1 tm.assert_series_equal(actual, s2) actual = scalar1 + s1 @@ -853,20 +783,6 @@ def test_timedelta_ops_with_missing_values(self): actual = s2 - NaT tm.assert_series_equal(actual, sn) - actual = s1 + df1 - tm.assert_frame_equal(actual, df2) - actual = s2 - df1 - tm.assert_frame_equal(actual, df1) - actual = df1 + s1 - tm.assert_frame_equal(actual, df2) - actual = df2 - s1 - tm.assert_frame_equal(actual, df1) - - actual = df1 + df1 - tm.assert_frame_equal(actual, df2) - actual = df2 - df1 - tm.assert_frame_equal(actual, df1) - actual = df1 + scalar1 tm.assert_frame_equal(actual, df2) actual = df2 - scalar1 @@ -1090,37 +1006,6 @@ def test_td64arr_add_datetime64_nat(self, box_with_array): tm.assert_equal(tdser + other, expected) tm.assert_equal(other + tdser, expected) - def test_td64arr_sub_dt64_array(self, box_with_array): - dti = pd.date_range("2016-01-01", periods=3) - tdi = TimedeltaIndex(["-1 Day"] * 3) - dtarr = dti.values - expected = DatetimeIndex(dtarr) - tdi - - tdi = tm.box_expected(tdi, box_with_array) - expected = tm.box_expected(expected, box_with_array) - - msg = "cannot subtract a datelike from" - with pytest.raises(TypeError, match=msg): - tdi - dtarr - - # TimedeltaIndex.__rsub__ - result = dtarr - tdi - tm.assert_equal(result, expected) - - def test_td64arr_add_dt64_array(self, box_with_array): - dti = pd.date_range("2016-01-01", periods=3) - tdi = TimedeltaIndex(["-1 Day"] * 3) - dtarr = dti.values - expected = DatetimeIndex(dtarr) + tdi - - tdi = tm.box_expected(tdi, box_with_array) - expected = tm.box_expected(expected, box_with_array) - - result = tdi + dtarr - tm.assert_equal(result, expected) - result = dtarr + tdi - tm.assert_equal(result, expected) - # ------------------------------------------------------------------ # Invalid __add__/__sub__ operations @@ -1225,27 +1110,6 @@ def test_td64arr_addsub_integer_array_no_freq(self, box_with_array): # ------------------------------------------------------------------ # Operations with timedelta-like others - def test_td64arr_add_sub_td64_array(self, box_with_array): - box = box_with_array - dti = pd.date_range("2016-01-01", periods=3) - tdi = dti - dti.shift(1) - tdarr = tdi.values - - expected = 2 * tdi - tdi = tm.box_expected(tdi, box) - expected = tm.box_expected(expected, box) - - result = tdi + tdarr - tm.assert_equal(result, expected) - result = tdarr + tdi - tm.assert_equal(result, expected) - - expected_sub = 0 * tdi - result = tdi - tdarr - tm.assert_equal(result, expected_sub) - result = tdarr - tdi - tm.assert_equal(result, expected_sub) - def test_td64arr_add_sub_tdi(self, box_with_array, names): # GH#17250 make sure result dtype is correct # GH#19043 make sure names are propagated correctly @@ -2209,13 +2073,22 @@ class TestAddSubTimestampBox: def test_add(self, box_with_array, box_with_array2): # GH: 35897 - td_box = tm.box_expected((Timedelta(hours=3),), box_with_array) - dt_box = tm.box_expected((Timestamp(2020, 1, 2),), box_with_array2) - + td_box = tm.box_expected( + (Timedelta(hours=3), Timedelta(hours=3), NaT, NaT), + box_with_array, + ) + dt_box = tm.box_expected( + (Timestamp(2020, 1, 2), NaT, Timestamp(2020, 1, 2), NaT), + box_with_array2, + ) expected_type = get_result_type(type(td_box), type(dt_box)) - expected = tm.box_expected((Timestamp(2020, 1, 2, 3),), expected_type) + expected = tm.box_expected( + (Timestamp(2020, 1, 2, 3), NaT, NaT, NaT), + expected_type, + ) result = dt_box + td_box + assert isinstance(result, expected_type) tm.assert_equal(result, expected) def test_add_raises_if_result_would_overflow( @@ -2233,6 +2106,33 @@ def test_add_raises_if_result_would_overflow( with pytest.raises(ex, match=msg): dt_max_box + td_max_box + def test_sub(self, box_with_array, box_with_array2): + td_box = tm.box_expected( + (Timedelta(hours=3), Timedelta(hours=3), NaT, NaT), + box_with_array, + ) + dt_box = tm.box_expected( + (Timestamp(2020, 1, 2, 6), NaT, Timestamp(2020, 1, 2, 6), NaT), + box_with_array2, + ) + expected_type = get_result_type(type(td_box), type(dt_box)) + expected = tm.box_expected( + (Timestamp(2020, 1, 2, 3), NaT, NaT, NaT), + expected_type, + ) + result = dt_box - td_box + + assert isinstance(result, expected_type) + tm.assert_equal(result, expected) + + def test_sub_dt_box_from_td_box_raises(self, box_with_array, box_with_array2): + td_box = tm.box_expected((Timedelta(hours=3),), box_with_array) + dt_box = tm.box_expected((Timestamp(2020, 1, 2),), box_with_array2) + msg = "cannot subtract a datelike from a TimedeltaArray" + + with pytest.raises(TypeError, match=msg): + td_box - dt_box + def test_sub_raises_if_result_would_overflow( self, td_max_box: TimedeltaArray | TimedeltaIndex | Series | DataFrame, diff --git a/pandas/tests/reductions/test_reductions.py b/pandas/tests/reductions/test_reductions.py index 21334a98748fb..7599f6e469386 100644 --- a/pandas/tests/reductions/test_reductions.py +++ b/pandas/tests/reductions/test_reductions.py @@ -51,15 +51,25 @@ td64_value_error = partial(pytest.raises, ValueError, match=TD64_VALUE_ERROR_MSG) # TODO: more robust platform/env detection? -xfail_on_arm = pytest.mark.xfail( - os.environ.get("CIRCLECI") == "true", - reason="ints wrap on arm?", +on_arm = os.environ.get("CIRCLECI") == "true" +using_array_data_mgr = os.environ.get("PANDAS_DATA_MANAGER") == "array" + +xfail_returns_nat = partial( + pytest.mark.xfail, + reason="returns NaT", + raises=AssertionError, + strict=True, +) +xfail_ints_wrap = pytest.mark.xfail( + reason="ints wrap", raises=AssertionError, + strict=True, ) -xfail_with_array_data_manager = pytest.mark.xfail( +xfail_value_overflow_error = pytest.mark.xfail( os.environ.get("PANDAS_DATA_MANAGER") == "array", reason="unclear", raises=(ValueError, OverflowError), + strict=True, ) @@ -1606,16 +1616,27 @@ def test_single_elem_sum_loses_ns_precision_if_float_conversion_rounds( assert result != value assert np.isclose(result.value, value.value) + @xfail_returns_nat(condition=not on_arm) @pytest.mark.parametrize( "value", ( - Timedelta.min, - Timedelta.min + Timedelta(511), - Timedelta.max - Timedelta(511), - Timedelta.max, + pytest.param(Timedelta.min, marks=xfail_returns_nat(condition=on_arm)), + pytest.param( + Timedelta.min + Timedelta(511), + marks=xfail_returns_nat(condition=on_arm), + ), + pytest.param( + Timedelta.max - Timedelta(511), + marks=pytest.mark.xfail( + on_arm, + reason="returns Timedelta.max", + raises=AssertionError, + ), + ), + pytest.param(Timedelta.max), ), ) - def test_single_elem_sum_fails_for_large_values( + def test_single_elem_sum_works_near_boundaries( self, value: Timedelta, index_or_series_or_array, @@ -1623,7 +1644,7 @@ def test_single_elem_sum_fails_for_large_values( td_arraylike = tm.box_expected((value,), index_or_series_or_array) result = td_arraylike.sum() - assert result is NaT + assert result == value @pytest.mark.parametrize( ("values", "expected_exs"), @@ -1653,18 +1674,23 @@ def test_arraylike_sum_usually_raises_for_overflow( @pytest.mark.parametrize( "values", ( - pytest.param([Timedelta.min] * 2, marks=xfail_with_array_data_manager), - [Timedelta.min, Timedelta(-1)], pytest.param( - [Timedelta.max, Timedelta(1)], - marks=[xfail_on_arm, xfail_with_array_data_manager], + (Timedelta.min,) * 2, + marks=xfail_value_overflow_error(condition=using_array_data_mgr), + ), + (Timedelta.min, Timedelta(-1)), + pytest.param( + (Timedelta.max, Timedelta(1)), marks=xfail_ints_wrap(condition=on_arm) ), pytest.param( - [Timedelta.max] * 2, - marks=[xfail_on_arm, xfail_with_array_data_manager], + (Timedelta.max,) * 2, + marks=( + xfail_ints_wrap(condition=on_arm), + xfail_value_overflow_error(condition=using_array_data_mgr), + ), ), ), - ids=["double_td_min", "over_by_-1ns", "over_by_1ns", "double_td_max"], + ids=("double_td_min", "over_by_-1ns", "over_by_1ns", "double_td_max"), ) def test_df_sum_usually_returns_nat_for_overflows(self, values: list[Timedelta]): """ From 7a6a8cf08b2447a11eeae3cd115f8e0bf7cead12 Mon Sep 17 00:00:00 2001 From: Patrick McKenna Date: Tue, 3 May 2022 15:18:44 -0700 Subject: [PATCH 21/21] platform-specific fixes --- pandas/tests/reductions/test_reductions.py | 54 +++++++++++----------- 1 file changed, 28 insertions(+), 26 deletions(-) diff --git a/pandas/tests/reductions/test_reductions.py b/pandas/tests/reductions/test_reductions.py index 7599f6e469386..5c77ed998981e 100644 --- a/pandas/tests/reductions/test_reductions.py +++ b/pandas/tests/reductions/test_reductions.py @@ -1,9 +1,5 @@ from __future__ import annotations -from contextlib import ( - AbstractContextManager, - nullcontext, -) from datetime import ( datetime, timedelta, @@ -46,27 +42,30 @@ ) -does_not_raise = nullcontext -td64_overflow_error = partial(pytest.raises, OverflowError, match=TD64_OVERFLOW_MSG) -td64_value_error = partial(pytest.raises, ValueError, match=TD64_VALUE_ERROR_MSG) - # TODO: more robust platform/env detection? on_arm = os.environ.get("CIRCLECI") == "true" using_array_data_mgr = os.environ.get("PANDAS_DATA_MANAGER") == "array" +xfail_does_not_raise = partial( + pytest.mark.xfail, + reason="should raise exception", + raises=pytest.fail.Exception, + strict=True, +) xfail_returns_nat = partial( pytest.mark.xfail, reason="returns NaT", raises=AssertionError, strict=True, ) -xfail_ints_wrap = pytest.mark.xfail( +xfail_ints_wrap = partial( + pytest.mark.xfail, reason="ints wrap", raises=AssertionError, strict=True, ) -xfail_value_overflow_error = pytest.mark.xfail( - os.environ.get("PANDAS_DATA_MANAGER") == "array", +xfail_value_overflow_error = partial( + pytest.mark.xfail, reason="unclear", raises=(ValueError, OverflowError), strict=True, @@ -1647,29 +1646,32 @@ def test_single_elem_sum_works_near_boundaries( assert result == value @pytest.mark.parametrize( - ("values", "expected_exs"), + "values", ( - ([Timedelta.min] * 2, td64_value_error()), - ([Timedelta.min, Timedelta(-1025)], td64_value_error()), - ([Timedelta.min, Timedelta(-1024)], does_not_raise()), - ([Timedelta.min, Timedelta(-1)], does_not_raise()), - ([Timedelta.max, Timedelta(1)], does_not_raise()), - ([Timedelta.max, Timedelta(1024)], does_not_raise()), - ([Timedelta.max, Timedelta(1025)], td64_value_error()), - ([Timedelta.max] * 2, td64_value_error()), + (Timedelta.min, Timedelta.min), + (Timedelta.min, Timedelta(-1025)), + pytest.param( + (Timedelta.min, Timedelta(-1024)), + marks=xfail_does_not_raise(), + ), + pytest.param((Timedelta.min, Timedelta(-1)), marks=xfail_does_not_raise()), + pytest.param((Timedelta.max, Timedelta(1)), marks=xfail_does_not_raise()), + pytest.param( + (Timedelta.max, Timedelta(1024)), + marks=xfail_does_not_raise(), + ), + (Timedelta.max, Timedelta(1025)), + (Timedelta.max, Timedelta.max), ), ) def test_arraylike_sum_usually_raises_for_overflow( self, - values: list[Timedelta], - expected_exs: AbstractContextManager, + values: tuple[Timedelta], index_or_series_or_array, ): td_arraylike = tm.box_expected(values, index_or_series_or_array) - with expected_exs: - result = td_arraylike.sum() - # for small negative overflows, sum() doesn't raise but does return NaT - assert result is NaT + with pytest.raises(ValueError, match=TD64_VALUE_ERROR_MSG): + td_arraylike.sum() @pytest.mark.parametrize( "values",