From 50808c9fe4bc4cbe2796ac3359e5427791e4875d Mon Sep 17 00:00:00 2001 From: DavidLP Date: Tue, 5 Dec 2017 14:22:55 +0100 Subject: [PATCH 1/2] PRJ: stick to PEP8! --- .landscape.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.landscape.yaml b/.landscape.yaml index 396c5333..d3f5a12f 100644 --- a/.landscape.yaml +++ b/.landscape.yaml @@ -1 +1 @@ -max-line-length: 999 + From 6bbe81341b252a7abe890b17dc6db951e163ef3f Mon Sep 17 00:00:00 2001 From: DavidLP Date: Tue, 5 Dec 2017 14:23:26 +0100 Subject: [PATCH 2/2] ENH: add fuzzing testing with hypothesis --- .travis.yml | 2 +- appveyor.yml | 2 +- requirements.txt | 3 +- .../testing/test_analysis_utils.py | 248 ++++++++++++------ 4 files changed, 175 insertions(+), 80 deletions(-) diff --git a/.travis.yml b/.travis.yml index 1ab068bb..646a2927 100644 --- a/.travis.yml +++ b/.travis.yml @@ -54,7 +54,7 @@ before_install: - conda info -a - conda create -q -n tba python="$TRAVIS_PYTHON_VERSION" pip - source activate tba - - conda install numpy cython pytables scipy matplotlib nose numba mock pytest-cov pyyaml + - conda install numpy cython pytables scipy matplotlib nose numba mock pytest-cov hypothesis pyyaml - pip install --upgrade pip - pip install progressbar-latest xvfbwrapper coverage python-coveralls pixel_clusterizer pylandau pytest - pip install -r requirements_docs.txt diff --git a/appveyor.yml b/appveyor.yml index d8b672e1..e976cd3a 100644 --- a/appveyor.yml +++ b/appveyor.yml @@ -12,7 +12,7 @@ init: - ps: Start-FileDownload 'http://download.microsoft.com/download/7/9/6/796EF2E4-801B-4FC4-AB28-B59FBF6D907B/VCForPython27.msi' C:\VCForPython27.msi; echo "Done" - cmd: msiexec /i C:\VCForPython27.msi /quiet /qn - set PATH=C:\Miniconda-x64;C:\Miniconda-x64\\Scripts;%PATH% # Miniconda is already installed on appveyor: https://github.com/appveyor/ci/issues/359 - - conda install --yes numpy cython pytables scipy matplotlib nose numba mock pyyaml + - conda install --yes numpy cython pytables scipy matplotlib nose numba mock hypothesis pyyaml - pip install progressbar-latest pixel_clusterizer pylandau - conda info -a - conda list diff --git a/requirements.txt b/requirements.txt index a4ad8ddd..ab8d4101 100644 --- a/requirements.txt +++ b/requirements.txt @@ -10,4 +10,5 @@ pylandau>2.0.0 # for charge deposition simulation progressbar-latest # to show a progress bar numexpr # for fast c compiled loops on numpy arrays dill # better serialization needed for multithreading -numpydoc # To use numpy docstring for gui tooltips \ No newline at end of file +numpydoc # To use numpy docstring for gui tooltips +hypothesis # fuzzing testing \ No newline at end of file diff --git a/testbeam_analysis/testing/test_analysis_utils.py b/testbeam_analysis/testing/test_analysis_utils.py index f605e08a..3f6c6a28 100644 --- a/testbeam_analysis/testing/test_analysis_utils.py +++ b/testbeam_analysis/testing/test_analysis_utils.py @@ -7,6 +7,11 @@ import tables as tb import numpy as np +from hypothesis import given, seed +import hypothesis.extra.numpy as nps +import hypothesis.strategies as st +from hypothesis.extra.numpy import unsigned_integer_dtypes + from testbeam_analysis.cpp import data_struct from testbeam_analysis.tools import analysis_utils, test_tools @@ -23,119 +28,208 @@ def setUpClass(cls): def tearDownClass(cls): # remove created files pass - def test_analysis_utils_get_events_in_both_arrays(self): # check compiled get_events_in_both_arrays function - event_numbers = np.array([[0, 0, 2, 2, 2, 4, 5, 5, 6, 7, 7, 7, 8], [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]], dtype=np.int64) - event_numbers_2 = np.array([1, 1, 1, 2, 2, 2, 4, 4, 4, 7], dtype=np.int64) - result = analysis_utils.get_events_in_both_arrays(event_numbers[0], event_numbers_2) - self.assertListEqual([2, 4, 7], result.tolist()) + @given(nps.arrays(np.int64, + shape=nps.array_shapes(max_dims=1, max_side=32000), + elements=st.integers(0, 2 ** 16 - 1))) + def test_get_events_in_both_arrays_fuzzing(self, arr): + ''' Check get_events_in_both_arrays function''' + + event_numbers = np.sort(arr) + + result = analysis_utils.get_events_in_both_arrays(event_numbers, + event_numbers) + + def numpy_solution(event_numbers, event_numbers_2): + ''' Slow numpy solution to check against ''' + return np.unique(event_numbers[np.in1d(event_numbers, event_numbers_2)]) + + self.assertListEqual(numpy_solution(event_numbers, + event_numbers).tolist(), + result.tolist()) - def test_analysis_utils_get_max_events_in_both_arrays(self): # check compiled get_max_events_in_both_arrays function + def test_analysis_utils_get_max_events_in_both_arrays(self): + ''' Check compiled get_max_events_in_both_arrays function''' # Test 1 - event_numbers = np.array([[0, 0, 1, 1, 2], [0, 0, 0, 0, 0]], dtype=np.int64) + event_numbers = np.array([[0, 0, 1, 1, 2], + [0, 0, 0, 0, 0]], + dtype=np.int64) event_numbers_2 = np.array([0, 3, 3, 4], dtype=np.int64) - result = analysis_utils.get_max_events_in_both_arrays(event_numbers[0], event_numbers_2) + result = analysis_utils.get_max_events_in_both_arrays(event_numbers[0], + event_numbers_2) self.assertListEqual([0, 0, 1, 1, 2, 3, 3, 4], result.tolist()) # Test 2 event_numbers = np.array([1, 1, 2, 4, 5, 6, 7], dtype=np.int64) event_numbers_2 = np.array([0, 3, 3, 4], dtype=np.int64) - result = analysis_utils.get_max_events_in_both_arrays(event_numbers, event_numbers_2) + result = analysis_utils.get_max_events_in_both_arrays(event_numbers, + event_numbers_2) self.assertListEqual([0, 1, 1, 2, 3, 3, 4, 5, 6, 7], result.tolist()) # Test 3 event_numbers = np.array([1, 1, 2, 4, 5, 6, 7], dtype=np.int64) event_numbers_2 = np.array([6, 7, 9, 10], dtype=np.int64) - result = analysis_utils.get_max_events_in_both_arrays(event_numbers, event_numbers_2) + result = analysis_utils.get_max_events_in_both_arrays(event_numbers, + event_numbers_2) self.assertListEqual([1, 1, 2, 4, 5, 6, 7, 9, 10], result.tolist()) # Test 4 event_numbers = np.array([1, 1, 2, 4, 5, 6, 7, 10, 10], dtype=np.int64) event_numbers_2 = np.array([1, 6, 7, 9, 10], dtype=np.int64) - result = analysis_utils.get_max_events_in_both_arrays(event_numbers, event_numbers_2) + result = analysis_utils.get_max_events_in_both_arrays( + event_numbers, event_numbers_2) self.assertListEqual([1, 1, 2, 4, 5, 6, 7, 9, 10, 10], result.tolist()) # Test 5 event_numbers = np.array([1, 1, 2, 4, 5, 6, 7, 10, 10], dtype=np.int64) event_numbers_2 = np.array([1, 1, 1, 6, 7, 9, 10], dtype=np.int64) - result = analysis_utils.get_max_events_in_both_arrays(event_numbers, event_numbers_2) - self.assertListEqual([1, 1, 1, 2, 4, 5, 6, 7, 9, 10, 10], result.tolist()) - - def test_map_cluster(self): # check the compiled function against result - clusters = np.zeros((20, ), dtype=tb.dtype_from_descr(data_struct.ClusterInfoTable)) - result = np.zeros((20, ), dtype=tb.dtype_from_descr(data_struct.ClusterInfoTable)) + result = analysis_utils.get_max_events_in_both_arrays(event_numbers, + event_numbers_2) + self.assertListEqual([1, 1, 1, 2, 4, 5, 6, 7, 9, 10, 10], + result.tolist()) + + def test_map_cluster(self): + ''' Check the compiled function against result ''' + # Create result + result = np.zeros( + (20, ), + dtype=tb.dtype_from_descr(data_struct.ClusterInfoTable)) result["mean_column"] = np.nan result["mean_row"] = np.nan result["charge"] = np.nan - result[1]["event_number"], result[3]["event_number"], result[7]["event_number"], result[8]["event_number"], result[9]["event_number"] = 1, 2, 4, 4, 19 - result[0]["mean_column"], result[1]["mean_column"], result[3]["mean_column"], result[7]["mean_column"], result[8]["mean_column"], result[9]["mean_column"] = 1, 2, 3, 5, 6, 20 - result[0]["mean_row"], result[1]["mean_row"], result[3]["mean_row"], result[7]["mean_row"], result[8]["mean_row"], result[9]["mean_row"] = 0, 0, 0, 0, 0, 0 - result[0]["charge"], result[1]["charge"], result[3]["charge"], result[7]["charge"], result[8]["charge"], result[9]["charge"] = 0, 0, 0, 0, 0, 0 + (result[1]["event_number"], result[3]["event_number"], result[7]["event_number"], + result[8]["event_number"], result[9]["event_number"]) = (1, 2, 4, 4, 19) + + (result[0]["mean_column"], result[1]["mean_column"], + result[3]["mean_column"], result[7]["mean_column"], + result[8]["mean_column"], result[9]["mean_column"]) = (1, 2, 3, 5, 6, 20) + + (result[0]["mean_row"], result[1]["mean_row"], + result[3]["mean_row"], result[7]["mean_row"], + result[8]["mean_row"], result[9]["mean_row"]) = (0, 0, 0, 0, 0, 0) + + (result[0]["charge"], result[1]["charge"], result[3]["charge"], + result[7]["charge"], result[8]["charge"], result[9]["charge"]) = (0, 0, 0, 0, 0, 0) + # Create data + clusters = np.zeros( + (20, ), + dtype=tb.dtype_from_descr(data_struct.ClusterInfoTable)) for index, cluster in enumerate(clusters): cluster['mean_column'] = index + 1 cluster["event_number"] = index clusters[3]["event_number"] = 2 clusters[5]["event_number"] = 4 - common_event_number = np.array([0, 1, 1, 2, 3, 3, 3, 4, 4], dtype=np.int64) + common_event_number = np.array([0, 1, 1, 2, 3, 3, 3, 4, 4], + dtype=np.int64) - data_equal = test_tools.nan_equal(first_array=analysis_utils.map_cluster(common_event_number, clusters), - second_array=result[:common_event_number.shape[0]]) + data_equal = test_tools.nan_equal( + first_array=analysis_utils.map_cluster( + common_event_number, clusters), + second_array=result[:common_event_number.shape[0]]) self.assertTrue(data_equal) - def test_analysis_utils_in1d_events(self): # check compiled get_in1d_sorted function - event_numbers = np.array([[0, 0, 2, 2, 2, 4, 5, 5, 6, 7, 7, 7, 8], [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]], dtype=np.int64) - event_numbers_2 = np.array([1, 1, 1, 2, 2, 2, 4, 4, 4, 7], dtype=np.int64) - result = event_numbers[0][analysis_utils.in1d_events(event_numbers[0], event_numbers_2)] + def test_analysis_utils_in1d_events(self): + ''' Check compiled get_in1d_sorted function ''' + event_numbers = np.array([[0, 0, 2, 2, 2, 4, 5, 5, 6, 7, 7, 7, 8], + [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]], + dtype=np.int64) + event_numbers_2 = np.array([1, 1, 1, 2, 2, 2, 4, 4, 4, 7], + dtype=np.int64) + result = event_numbers[0][analysis_utils.in1d_events(event_numbers[0], + event_numbers_2)] self.assertListEqual([2, 2, 2, 4, 7, 7, 7], result.tolist()) - def test_1d_index_histograming(self): # check compiled hist_2D_index function - x = np.random.randint(0, 100, 100) - shape = (100, ) - array_fast = analysis_utils.hist_1d_index(x, shape=shape) - array = np.histogram(x, bins=shape[0], range=(0, shape[0]))[0] - shape = (5, ) # shape that is too small for the indices to trigger exception - exception_ok = False - try: - array_fast = analysis_utils.hist_1d_index(x, shape=shape) - except IndexError: - exception_ok = True - except: # other exception that should not occur - pass - self.assertTrue(exception_ok & np.all(array == array_fast)) - - def test_2d_index_histograming(self): # check compiled hist_2D_index function - x, y = np.random.randint(0, 100, 100), np.random.randint(0, 100, 100) - shape = (100, 100) - array_fast = analysis_utils.hist_2d_index(x, y, shape=shape) - array = np.histogram2d(x, y, bins=shape, range=[[0, shape[0]], [0, shape[1]]])[0] - shape = (5, 200) # shape that is too small for the indices to trigger exception - exception_ok = False - try: - array_fast = analysis_utils.hist_2d_index(x, y, shape=shape) - except IndexError: - exception_ok = True - except: # other exception that should not occur - pass - self.assertTrue(exception_ok & np.all(array == array_fast)) - - def test_3d_index_histograming(self): # check compiled hist_3D_index function - with tb.open_file(analysis_utils.get_data('fixtures/analysis_utils/hist_data.h5', - output=os.path.join(testing_path, 'fixtures/analysis_utils/hist_data.h5')), - mode="r") as in_file_h5: - xyz = in_file_h5.root.HistDataXYZ[:] - x, y, z = xyz[0], xyz[1], xyz[2] - shape = (100, 100, 100) - array_fast = analysis_utils.hist_3d_index(x, y, z, shape=shape) - array = np.histogramdd(np.column_stack((x, y, z)), bins=shape, range=[[0, shape[0] - 1], [0, shape[1] - 1], [0, shape[2] - 1]])[0] - shape = (50, 200, 200) # shape that is too small for the indices to trigger exception - exception_ok = False - try: - array_fast = analysis_utils.hist_3d_index(x, y, z, shape=shape) - except IndexError: - exception_ok = True - except: # other exception that should not occur - pass - self.assertTrue(exception_ok & np.all(array == array_fast)) + def test_1d_index_histograming(self): + ''' Check jitted hist_1D_index function ''' + + # Shape that is too small for the indices to trigger exception + x = np.linspace(0, 100, 100) + shape = (5, ) + with self.assertRaises(IndexError): + analysis_utils.hist_1d_index(x, shape=shape) + + @given(nps.arrays(unsigned_integer_dtypes(), + shape=nps.array_shapes(max_dims=1, max_side=32000), + elements=st.integers(0, 2 ** 16 - 1))) + def test_1d_index_hist_fuzzing(self, x): + # Set maximum shape from maximum value + shape = (np.max(x) + 1, ) + # Cast to uint32 needed since python + # does sometimes upcast to int64 or float64 + shape_numpy = ((shape[0]).astype(np.uint32), ) + + array_fast = analysis_utils.hist_1d_index(x, shape=shape_numpy) + + + array = np.histogram(x.astype(np.uint32), + bins=shape_numpy[0], + range=(0, shape_numpy[0]))[0] + self.assertTrue(np.all(array == array_fast)) + + @given(nps.arrays(unsigned_integer_dtypes(), + shape=(2, 32000), + elements=st.integers(0, 2 ** 8))) + def test_2d_index_hist_fuzzing(self, arr): + # Set maximum shape from maximum value + x, y = arr[0, :], arr[1, :] + shape = (x.max() + 1, y.max() + 1) + # Cast to uint32 needed since python + # does sometimes upcast to int64 or float64 + shape_numpy = ((shape[0]).astype(np.uint32), + (shape[1]).astype(np.uint32)) + + array_fast = analysis_utils.hist_2d_index(x, y, + shape=shape_numpy) + + array = np.histogram2d(x, y, bins=shape, + range=[[0, shape[0]], [0, shape[1]]])[0] + self.assertTrue(np.all(array == array_fast)) + + def test_2d_index_histograming(self): + ''' Check jitted hist_2D_index exception ''' + x, y = np.linspace(0, 100, 100), np.linspace(0, 100, 100) + + with self.assertRaises(IndexError): + analysis_utils.hist_2d_index(x, y, shape=(5, 200)) + with self.assertRaises(IndexError): + analysis_utils.hist_2d_index(x, y, shape=(200, 5)) + + @given(nps.arrays(unsigned_integer_dtypes(), + shape=(3, 32000), + elements=st.integers(0, 2 ** 8))) + def test_3d_index_hist_fuzzing(self, arr): + ''' Fuzzing jitted hist_2D_index function ''' + x, y, z = arr[0, :], arr[1, :], arr[2, :] + shape = (x.max() + 1, y.max() + 1, z.max() + 1) + # Cast to uint32 needed since python + # does sometimes upcast to int64 or float64 + shape_numpy = ((shape[0]).astype(np.uint32), + (shape[1]).astype(np.uint32), + (shape[2]).astype(np.uint32)) + + array_fast = analysis_utils.hist_3d_index(x, y, z, + shape=shape_numpy) + + array = np.histogramdd(np.column_stack((x, y, z)), bins=shape, + range=[[0, shape_numpy[0] - 1], + [0, shape_numpy[1] - 1], + [0, shape_numpy[2] - 1]])[0] + self.assertTrue(np.all(array == array_fast)) + + def test_3d_index_histograming(self): + ''' Check jitted hist_3D_index exceptions ''' + # Shape that is too small for the indices to trigger exception + x = y = z = np.linspace(0, 100, 100) + + with self.assertRaises(IndexError): + analysis_utils.hist_3d_index(x, y, z, shape=(200, 200, 99)) + + with self.assertRaises(IndexError): + analysis_utils.hist_3d_index(x, y, z, shape=(99, 200, 200)) + + with self.assertRaises(IndexError): + analysis_utils.hist_3d_index(x, y, z, shape=(200, 99, 200)) if __name__ == '__main__': import logging - logging.basicConfig(level=logging.INFO, format="%(asctime)s - %(name)s - [%(levelname)-8s] (%(threadName)-10s) %(message)s") + logging.basicConfig( + level=logging.INFO, format="%(asctime)s - %(name)s - [%(levelname)-8s] (%(threadName)-10s) %(message)s") suite = unittest.TestLoader().loadTestsFromTestCase(TestAnalysisUtils) unittest.TextTestRunner(verbosity=2).run(suite)