From 02bc891bf2b1d9f39ed89314398d4cce7257240c Mon Sep 17 00:00:00 2001 From: Nicola De Mitri Date: Thu, 15 Sep 2022 08:23:44 +0100 Subject: [PATCH 1/5] FIX: suppress invalid data warnings for handling NaNs in DataRange1D --- chaco/data_range_1d.py | 14 ++++++++---- chaco/tests/test_datarange_1d.py | 39 ++++++++++++++++++++++++++++++++ 2 files changed, 49 insertions(+), 4 deletions(-) diff --git a/chaco/data_range_1d.py b/chaco/data_range_1d.py index 66f070ec9..41d3624a0 100644 --- a/chaco/data_range_1d.py +++ b/chaco/data_range_1d.py @@ -16,7 +16,7 @@ # Major library imports from math import ceil, floor, log -from numpy import compress, inf, isinf, isnan, ndarray +from numpy import compress, inf, isinf, isnan, ndarray, errstate # Enthought library imports from traits.api import Bool, CFloat, Enum, Float, Property, Trait, Callable @@ -123,9 +123,15 @@ def mask_data(self, data): Implements AbstractDataRange. """ - return (data.view(ndarray) >= self._low_value) & ( - data.view(ndarray) <= self._high_value - ) + with errstate(invalid="ignore"): + # The data array may contain NaNs. These are strictly invalid for + # comparison and Numpy would emit a warning. Since we are happy + # with the defaul behavior (NaNs are "False" in the mask), we + # silence the warning. + mask = (data.view(ndarray) >= self._low_value) & ( + data.view(ndarray) <= self._high_value + ) + return mask def bound_data(self, data): """Returns a tuple of indices for the start and end of the first run diff --git a/chaco/tests/test_datarange_1d.py b/chaco/tests/test_datarange_1d.py index a40e4ae60..4757944ae 100644 --- a/chaco/tests/test_datarange_1d.py +++ b/chaco/tests/test_datarange_1d.py @@ -9,6 +9,7 @@ # Thanks for using Enthought open source! import unittest +import warnings from numpy import arange, array, zeros, inf from numpy.testing import assert_equal @@ -17,6 +18,8 @@ from chaco.api import DataRange1D, ArrayDataSource +NAN = float("nan") + class Foo(HasTraits): """ @@ -232,6 +235,15 @@ def test_clip_data(self): r = DataRange1D(low=2.0, high=2.5) assert_equal(len(r.clip_data(ary)), 0) + # Test the case with nans. Additionally require that no warnings are + # emitted. + with warnings.catch_warnings(record=True) as w: + warnings.simplefilter("always") + r = DataRange1D(low=2.0, high=10.0) + ary = array([1, 3, NAN, 9.8, 10.2, 12]) + assert_equal(r.clip_data(ary), array([3.0, 9.8])) + self.assertEqual(len(w), 0) + def test_mask_data(self): r = DataRange1D(low=2.0, high=10.0) ary = array([1, 3, 4, 9.8, 10.2, 12]) @@ -246,6 +258,25 @@ def test_mask_data(self): r = DataRange1D(low=2.0, high=2.5) assert_equal(r.mask_data(ary), zeros(len(ary))) + def test_mask_data_contains_nans(self): + # Given + r = DataRange1D(low=2.0, high=10.0) + with warnings.catch_warnings(record=True) as w: + warnings.simplefilter("always") + + # When + has_nans = array([1, 3, 9.8, NAN, 12]) + # Then + assert_equal(r.mask_data(has_nans), array([0, 1, 1, 0, 0], "b")) + + # When + all_nans = array([NAN, NAN, NAN]) + # Then + assert_equal(r.mask_data(all_nans), array([0, 0, 0], "b")) + + # Then (treating nans should come with no warnings) + self.assertEqual(len(w), 0) + def test_bound_data(self): r = DataRange1D(low=2.9, high=6.1) ary = arange(10) @@ -256,6 +287,14 @@ def test_bound_data(self): bounds = r.bound_data(ary) assert_equal(bounds, (7, 11)) + # test data with nan (expected: nan breaks a run) + with warnings.catch_warnings(record=True) as w: + warnings.simplefilter("always") + ary = array([1, 2, 3, 4, NAN, 6, 7]) + bounds = r.bound_data(ary) + assert_equal(bounds, (2, 3)) + self.assertEqual(len(w), 0) + def test_custom_bounds_func(self): def custom_func(low, high, margin, tight_bounds): assert_equal(low, 0.0) From ff500df2bb075fe05a18af812519eb21d8fb02f8 Mon Sep 17 00:00:00 2001 From: Nicola De Mitri Date: Thu, 15 Sep 2022 08:59:05 +0100 Subject: [PATCH 2/5] STY: import order and unused imports --- chaco/data_range_1d.py | 2 +- chaco/data_range_2d.py | 4 +--- chaco/tests/test_datarange_1d.py | 2 +- 3 files changed, 3 insertions(+), 5 deletions(-) diff --git a/chaco/data_range_1d.py b/chaco/data_range_1d.py index 41d3624a0..2d1b1124e 100644 --- a/chaco/data_range_1d.py +++ b/chaco/data_range_1d.py @@ -16,7 +16,7 @@ # Major library imports from math import ceil, floor, log -from numpy import compress, inf, isinf, isnan, ndarray, errstate +from numpy import compress, errstate, inf, isinf, isnan, ndarray # Enthought library imports from traits.api import Bool, CFloat, Enum, Float, Property, Trait, Callable diff --git a/chaco/data_range_2d.py b/chaco/data_range_2d.py index 436bd769f..587ecd794 100644 --- a/chaco/data_range_2d.py +++ b/chaco/data_range_2d.py @@ -12,16 +12,14 @@ Defines the DataRange2D class. """ -from numpy import compress, inf, transpose +from numpy import compress, transpose # Enthought library imports from traits.api import ( - Any, Bool, CFloat, Instance, Property, - Trait, Tuple, observe, ) diff --git a/chaco/tests/test_datarange_1d.py b/chaco/tests/test_datarange_1d.py index 4757944ae..1cc06d22d 100644 --- a/chaco/tests/test_datarange_1d.py +++ b/chaco/tests/test_datarange_1d.py @@ -258,7 +258,7 @@ def test_mask_data(self): r = DataRange1D(low=2.0, high=2.5) assert_equal(r.mask_data(ary), zeros(len(ary))) - def test_mask_data_contains_nans(self): + def test_mask_data_containing_nans(self): # Given r = DataRange1D(low=2.0, high=10.0) with warnings.catch_warnings(record=True) as w: From 7ed98ebded824c81554057c9d266a5846c74095f Mon Sep 17 00:00:00 2001 From: Nicola De Mitri Date: Thu, 15 Sep 2022 09:38:46 +0100 Subject: [PATCH 3/5] MAINT: defensive errstate() context in DataRange2D --- chaco/data_range_2d.py | 11 ++++++++--- chaco/tests/test_datarange_2d.py | 32 ++++++++++++++++++++++++++++++++ 2 files changed, 40 insertions(+), 3 deletions(-) diff --git a/chaco/data_range_2d.py b/chaco/data_range_2d.py index 587ecd794..0a1093c21 100644 --- a/chaco/data_range_2d.py +++ b/chaco/data_range_2d.py @@ -12,7 +12,7 @@ Defines the DataRange2D class. """ -from numpy import compress, transpose +from numpy import compress, errstate, transpose # Enthought library imports from traits.api import ( @@ -94,8 +94,13 @@ def mask_data(self, data): Implements AbstractDataRange. """ x_points, y_points = transpose(data) - x_mask = (x_points >= self.low[0]) & (x_points <= self.high[0]) - y_mask = (y_points >= self.low[1]) & (y_points <= self.high[1]) + with errstate(invalid="ignore"): + # Silence possible warnings that may result from NaNs being in the + # arrays. It is not strictly needed because apparently comparisons + # on arrays resulting from transpose() don't trigger the warning + # in the first place. + x_mask = (x_points >= self.low[0]) & (x_points <= self.high[0]) + y_mask = (y_points >= self.low[1]) & (y_points <= self.high[1]) return x_mask & y_mask def bound_data(self, data): diff --git a/chaco/tests/test_datarange_2d.py b/chaco/tests/test_datarange_2d.py index f75de9ba2..519308260 100644 --- a/chaco/tests/test_datarange_2d.py +++ b/chaco/tests/test_datarange_2d.py @@ -9,12 +9,15 @@ # Thanks for using Enthought open source! import unittest +import warnings from numpy import alltrue, arange, array, ravel, transpose, zeros, inf, isinf from numpy.testing import assert_equal, assert_ from chaco.api import DataRange2D, GridDataSource, PointDataSource +NAN = float("nan") + class DataRange2DTestCase(unittest.TestCase): def test_empty_range(self): @@ -205,6 +208,35 @@ def test_mask_data(self): r = DataRange2D(low=[2.0, 5.0], high=[2.5, 9.0]) assert_equal(r.mask_data(ary), zeros(len(ary))) + def test_mask_data_containing_nans(self): + # Given + r = DataRange2D(low=[2.0, 3.0], high=[12.0, 13.0]) + ary_1 = array( + [ + [NAN, 1.0], + [NAN, 4.0], + [NAN, NAN], + [25.1, NAN], + [5.0, 6.0], + [12.5, 6.0] + ] + ) + expected_mask_1 = array([0, 0, 0, 0, 1, 0], "b") + ary_2 = array([[NAN, NAN], [NAN, NAN]]) + expected_mask_2 = array([0, 0], "b") + + with warnings.catch_warnings(record=True) as w: + warnings.simplefilter("always") + + # When + mask_1 = r.mask_data(ary_1) + mask_2 = r.mask_data(ary_2) + + # Then + assert_equal(mask_1, expected_mask_1) + assert_equal(mask_2, expected_mask_2) + self.assertEqual(len(w), 0) + def assert_close_(desired, actual): diff_allowed = 1e-5 From a091e94fa0d8966c32c92dedb5a55bb8479719f8 Mon Sep 17 00:00:00 2001 From: Nicola De Mitri Date: Thu, 15 Sep 2022 09:53:16 +0100 Subject: [PATCH 4/5] DOC: there is no difference between the 1D and 2D case --- chaco/data_range_1d.py | 8 ++++---- chaco/data_range_2d.py | 8 ++++---- 2 files changed, 8 insertions(+), 8 deletions(-) diff --git a/chaco/data_range_1d.py b/chaco/data_range_1d.py index 2d1b1124e..a607dcc05 100644 --- a/chaco/data_range_1d.py +++ b/chaco/data_range_1d.py @@ -124,10 +124,10 @@ def mask_data(self, data): Implements AbstractDataRange. """ with errstate(invalid="ignore"): - # The data array may contain NaNs. These are strictly invalid for - # comparison and Numpy would emit a warning. Since we are happy - # with the defaul behavior (NaNs are "False" in the mask), we - # silence the warning. + # Running under context because the data array may contain NaNs. + # These are strictly invalid for comparison and Numpy would emit + # a warning. Since we are happy with the default behavior (NaNs + # become "False" in the mask), we silence the warning. mask = (data.view(ndarray) >= self._low_value) & ( data.view(ndarray) <= self._high_value ) diff --git a/chaco/data_range_2d.py b/chaco/data_range_2d.py index 0a1093c21..929d9c1cc 100644 --- a/chaco/data_range_2d.py +++ b/chaco/data_range_2d.py @@ -95,10 +95,10 @@ def mask_data(self, data): """ x_points, y_points = transpose(data) with errstate(invalid="ignore"): - # Silence possible warnings that may result from NaNs being in the - # arrays. It is not strictly needed because apparently comparisons - # on arrays resulting from transpose() don't trigger the warning - # in the first place. + # Running under context because the data array may contain NaNs. + # These are strictly invalid for comparison and Numpy would emit + # a warning. Since we are happy with the default behavior (NaNs + # become "False" in the mask), we silence the warning. x_mask = (x_points >= self.low[0]) & (x_points <= self.high[0]) y_mask = (y_points >= self.low[1]) & (y_points <= self.high[1]) return x_mask & y_mask From 8cbebdfadf4618e27aeea8cca5a08da0dcef62b9 Mon Sep 17 00:00:00 2001 From: Nicola De Mitri Date: Thu, 15 Sep 2022 10:23:25 +0100 Subject: [PATCH 5/5] DOC: mention caveats for warning testing --- chaco/tests/test_datarange_1d.py | 5 +++++ chaco/tests/test_datarange_2d.py | 6 ++++++ 2 files changed, 11 insertions(+) diff --git a/chaco/tests/test_datarange_1d.py b/chaco/tests/test_datarange_1d.py index 1cc06d22d..52910dedb 100644 --- a/chaco/tests/test_datarange_1d.py +++ b/chaco/tests/test_datarange_1d.py @@ -275,6 +275,11 @@ def test_mask_data_containing_nans(self): assert_equal(r.mask_data(all_nans), array([0, 0, 0], "b")) # Then (treating nans should come with no warnings) + # NOTE: This assertion may pass because the warning has been correctly + # silenced by us (useful test), but it may also pass because the + # warning has been inactivated by the "only warn once" Python rule + # (test ineffective, false negative). Clearing the registry only for + # test purposes is not feasible: https://bugs.python.org/issue21724 self.assertEqual(len(w), 0) def test_bound_data(self): diff --git a/chaco/tests/test_datarange_2d.py b/chaco/tests/test_datarange_2d.py index 519308260..761878424 100644 --- a/chaco/tests/test_datarange_2d.py +++ b/chaco/tests/test_datarange_2d.py @@ -235,6 +235,12 @@ def test_mask_data_containing_nans(self): # Then assert_equal(mask_1, expected_mask_1) assert_equal(mask_2, expected_mask_2) + + # This assertion may pass because the warning has been correctly + # silenced by us (useful test), but it may also pass because the + # warning has been inactivated by the "only warn once" Python rule + # (test ineffective, false negative). Clearing the registry only for + # test purposes is not feasible: https://bugs.python.org/issue21724 self.assertEqual(len(w), 0)