From 50808c9fe4bc4cbe2796ac3359e5427791e4875d Mon Sep 17 00:00:00 2001
From: DavidLP <pohl@physik.uni-bonn.de>
Date: Tue, 5 Dec 2017 14:22:55 +0100
Subject: [PATCH 1/2] PRJ: stick to PEP8!

---
 .landscape.yaml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/.landscape.yaml b/.landscape.yaml
index 396c5333..d3f5a12f 100644
--- a/.landscape.yaml
+++ b/.landscape.yaml
@@ -1 +1 @@
-max-line-length: 999
+

From 6bbe81341b252a7abe890b17dc6db951e163ef3f Mon Sep 17 00:00:00 2001
From: DavidLP <pohl@physik.uni-bonn.de>
Date: Tue, 5 Dec 2017 14:23:26 +0100
Subject: [PATCH 2/2] ENH: add fuzzing testing with hypothesis

---
 .travis.yml                                   |   2 +-
 appveyor.yml                                  |   2 +-
 requirements.txt                              |   3 +-
 .../testing/test_analysis_utils.py            | 248 ++++++++++++------
 4 files changed, 175 insertions(+), 80 deletions(-)

diff --git a/.travis.yml b/.travis.yml
index 1ab068bb..646a2927 100644
--- a/.travis.yml
+++ b/.travis.yml
@@ -54,7 +54,7 @@ before_install:
   - conda info -a
   - conda create -q -n tba python="$TRAVIS_PYTHON_VERSION" pip
   - source activate tba
-  - conda install numpy cython pytables scipy matplotlib nose numba mock pytest-cov pyyaml
+  - conda install numpy cython pytables scipy matplotlib nose numba mock pytest-cov hypothesis pyyaml
   - pip install --upgrade pip
   - pip install progressbar-latest xvfbwrapper coverage python-coveralls pixel_clusterizer pylandau pytest
   - pip install -r requirements_docs.txt
diff --git a/appveyor.yml b/appveyor.yml
index d8b672e1..e976cd3a 100644
--- a/appveyor.yml
+++ b/appveyor.yml
@@ -12,7 +12,7 @@ init:
   - ps: Start-FileDownload 'http://download.microsoft.com/download/7/9/6/796EF2E4-801B-4FC4-AB28-B59FBF6D907B/VCForPython27.msi' C:\VCForPython27.msi; echo "Done"
   - cmd: msiexec /i C:\VCForPython27.msi /quiet /qn
   - set PATH=C:\Miniconda-x64;C:\Miniconda-x64\\Scripts;%PATH%  # Miniconda is already installed on appveyor: https://github.com/appveyor/ci/issues/359
-  - conda install --yes numpy cython pytables scipy matplotlib nose numba mock pyyaml
+  - conda install --yes numpy cython pytables scipy matplotlib nose numba mock hypothesis pyyaml
   - pip install progressbar-latest pixel_clusterizer pylandau
   - conda info -a
   - conda list
diff --git a/requirements.txt b/requirements.txt
index a4ad8ddd..ab8d4101 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -10,4 +10,5 @@ pylandau>2.0.0  # for charge deposition simulation
 progressbar-latest  # to show a progress bar
 numexpr  # for fast c compiled loops on numpy arrays
 dill  # better serialization needed for multithreading
-numpydoc  # To use numpy docstring for gui tooltips
\ No newline at end of file
+numpydoc  # To use numpy docstring for gui tooltips
+hypothesis  # fuzzing testing
\ No newline at end of file
diff --git a/testbeam_analysis/testing/test_analysis_utils.py b/testbeam_analysis/testing/test_analysis_utils.py
index f605e08a..3f6c6a28 100644
--- a/testbeam_analysis/testing/test_analysis_utils.py
+++ b/testbeam_analysis/testing/test_analysis_utils.py
@@ -7,6 +7,11 @@
 import tables as tb
 import numpy as np
 
+from hypothesis import given, seed
+import hypothesis.extra.numpy as nps
+import hypothesis.strategies as st
+from hypothesis.extra.numpy import unsigned_integer_dtypes
+
 from testbeam_analysis.cpp import data_struct
 from testbeam_analysis.tools import analysis_utils, test_tools
 
@@ -23,119 +28,208 @@ def setUpClass(cls):
     def tearDownClass(cls):  # remove created files
         pass
 
-    def test_analysis_utils_get_events_in_both_arrays(self):  # check compiled get_events_in_both_arrays function
-        event_numbers = np.array([[0, 0, 2, 2, 2, 4, 5, 5, 6, 7, 7, 7, 8], [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]], dtype=np.int64)
-        event_numbers_2 = np.array([1, 1, 1, 2, 2, 2, 4, 4, 4, 7], dtype=np.int64)
-        result = analysis_utils.get_events_in_both_arrays(event_numbers[0], event_numbers_2)
-        self.assertListEqual([2, 4, 7], result.tolist())
+    @given(nps.arrays(np.int64,
+                      shape=nps.array_shapes(max_dims=1, max_side=32000),
+                      elements=st.integers(0, 2 ** 16 - 1)))
+    def test_get_events_in_both_arrays_fuzzing(self, arr):
+        ''' Check get_events_in_both_arrays function'''
+
+        event_numbers = np.sort(arr)
+
+        result = analysis_utils.get_events_in_both_arrays(event_numbers,
+                                                          event_numbers)
+
+        def numpy_solution(event_numbers, event_numbers_2):
+            ''' Slow numpy solution to check against '''
+            return np.unique(event_numbers[np.in1d(event_numbers, event_numbers_2)])
+
+        self.assertListEqual(numpy_solution(event_numbers,
+                                            event_numbers).tolist(),
+                             result.tolist())
 
-    def test_analysis_utils_get_max_events_in_both_arrays(self):  # check compiled get_max_events_in_both_arrays function
+    def test_analysis_utils_get_max_events_in_both_arrays(self):
+        ''' Check compiled get_max_events_in_both_arrays function'''
         # Test 1
-        event_numbers = np.array([[0, 0, 1, 1, 2], [0, 0, 0, 0, 0]], dtype=np.int64)
+        event_numbers = np.array([[0, 0, 1, 1, 2],
+                                  [0, 0, 0, 0, 0]],
+                                 dtype=np.int64)
         event_numbers_2 = np.array([0, 3, 3, 4], dtype=np.int64)
-        result = analysis_utils.get_max_events_in_both_arrays(event_numbers[0], event_numbers_2)
+        result = analysis_utils.get_max_events_in_both_arrays(event_numbers[0],
+                                                              event_numbers_2)
         self.assertListEqual([0, 0, 1, 1, 2, 3, 3, 4], result.tolist())
         # Test 2
         event_numbers = np.array([1, 1, 2, 4, 5, 6, 7], dtype=np.int64)
         event_numbers_2 = np.array([0, 3, 3, 4], dtype=np.int64)
-        result = analysis_utils.get_max_events_in_both_arrays(event_numbers, event_numbers_2)
+        result = analysis_utils.get_max_events_in_both_arrays(event_numbers,
+                                                              event_numbers_2)
         self.assertListEqual([0, 1, 1, 2, 3, 3, 4, 5, 6, 7], result.tolist())
         # Test 3
         event_numbers = np.array([1, 1, 2, 4, 5, 6, 7], dtype=np.int64)
         event_numbers_2 = np.array([6, 7, 9, 10], dtype=np.int64)
-        result = analysis_utils.get_max_events_in_both_arrays(event_numbers, event_numbers_2)
+        result = analysis_utils.get_max_events_in_both_arrays(event_numbers,
+                                                              event_numbers_2)
         self.assertListEqual([1, 1, 2, 4, 5, 6, 7, 9, 10], result.tolist())
         # Test 4
         event_numbers = np.array([1, 1, 2, 4, 5, 6, 7, 10, 10], dtype=np.int64)
         event_numbers_2 = np.array([1, 6, 7, 9, 10], dtype=np.int64)
-        result = analysis_utils.get_max_events_in_both_arrays(event_numbers, event_numbers_2)
+        result = analysis_utils.get_max_events_in_both_arrays(
+            event_numbers, event_numbers_2)
         self.assertListEqual([1, 1, 2, 4, 5, 6, 7, 9, 10, 10], result.tolist())
         # Test 5
         event_numbers = np.array([1, 1, 2, 4, 5, 6, 7, 10, 10], dtype=np.int64)
         event_numbers_2 = np.array([1, 1, 1, 6, 7, 9, 10], dtype=np.int64)
-        result = analysis_utils.get_max_events_in_both_arrays(event_numbers, event_numbers_2)
-        self.assertListEqual([1, 1, 1, 2, 4, 5, 6, 7, 9, 10, 10], result.tolist())
-
-    def test_map_cluster(self):  # check the compiled function against result
-        clusters = np.zeros((20, ), dtype=tb.dtype_from_descr(data_struct.ClusterInfoTable))
-        result = np.zeros((20, ), dtype=tb.dtype_from_descr(data_struct.ClusterInfoTable))
+        result = analysis_utils.get_max_events_in_both_arrays(event_numbers,
+                                                              event_numbers_2)
+        self.assertListEqual([1, 1, 1, 2, 4, 5, 6, 7, 9, 10, 10],
+                             result.tolist())
+
+    def test_map_cluster(self):
+        ''' Check the compiled function against result '''
+        # Create result
+        result = np.zeros(
+            (20, ),
+            dtype=tb.dtype_from_descr(data_struct.ClusterInfoTable))
         result["mean_column"] = np.nan
         result["mean_row"] = np.nan
         result["charge"] = np.nan
-        result[1]["event_number"], result[3]["event_number"], result[7]["event_number"], result[8]["event_number"], result[9]["event_number"] = 1, 2, 4, 4, 19
-        result[0]["mean_column"], result[1]["mean_column"], result[3]["mean_column"], result[7]["mean_column"], result[8]["mean_column"], result[9]["mean_column"] = 1, 2, 3, 5, 6, 20
-        result[0]["mean_row"], result[1]["mean_row"], result[3]["mean_row"], result[7]["mean_row"], result[8]["mean_row"], result[9]["mean_row"] = 0, 0, 0, 0, 0, 0
-        result[0]["charge"], result[1]["charge"], result[3]["charge"], result[7]["charge"], result[8]["charge"], result[9]["charge"] = 0, 0, 0, 0, 0, 0
+        (result[1]["event_number"], result[3]["event_number"], result[7]["event_number"],
+         result[8]["event_number"], result[9]["event_number"]) = (1, 2, 4, 4, 19)
+
+        (result[0]["mean_column"], result[1]["mean_column"],
+         result[3]["mean_column"], result[7]["mean_column"],
+         result[8]["mean_column"], result[9]["mean_column"]) = (1, 2, 3, 5, 6, 20)
+
+        (result[0]["mean_row"], result[1]["mean_row"],
+         result[3]["mean_row"], result[7]["mean_row"],
+         result[8]["mean_row"], result[9]["mean_row"]) = (0, 0, 0, 0, 0, 0)
+
+        (result[0]["charge"], result[1]["charge"], result[3]["charge"],
+         result[7]["charge"], result[8]["charge"], result[9]["charge"]) = (0, 0, 0, 0, 0, 0)
 
+        # Create data
+        clusters = np.zeros(
+            (20, ),
+            dtype=tb.dtype_from_descr(data_struct.ClusterInfoTable))
         for index, cluster in enumerate(clusters):
             cluster['mean_column'] = index + 1
             cluster["event_number"] = index
         clusters[3]["event_number"] = 2
         clusters[5]["event_number"] = 4
 
-        common_event_number = np.array([0, 1, 1, 2, 3, 3, 3, 4, 4], dtype=np.int64)
+        common_event_number = np.array([0, 1, 1, 2, 3, 3, 3, 4, 4],
+                                       dtype=np.int64)
 
-        data_equal = test_tools.nan_equal(first_array=analysis_utils.map_cluster(common_event_number, clusters),
-                                          second_array=result[:common_event_number.shape[0]])
+        data_equal = test_tools.nan_equal(
+            first_array=analysis_utils.map_cluster(
+                common_event_number, clusters),
+            second_array=result[:common_event_number.shape[0]])
         self.assertTrue(data_equal)
 
-    def test_analysis_utils_in1d_events(self):  # check compiled get_in1d_sorted function
-        event_numbers = np.array([[0, 0, 2, 2, 2, 4, 5, 5, 6, 7, 7, 7, 8], [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]], dtype=np.int64)
-        event_numbers_2 = np.array([1, 1, 1, 2, 2, 2, 4, 4, 4, 7], dtype=np.int64)
-        result = event_numbers[0][analysis_utils.in1d_events(event_numbers[0], event_numbers_2)]
+    def test_analysis_utils_in1d_events(self):
+        ''' Check compiled get_in1d_sorted function '''
+        event_numbers = np.array([[0, 0, 2, 2, 2, 4, 5, 5, 6, 7, 7, 7, 8],
+                                  [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]],
+                                 dtype=np.int64)
+        event_numbers_2 = np.array([1, 1, 1, 2, 2, 2, 4, 4, 4, 7],
+                                   dtype=np.int64)
+        result = event_numbers[0][analysis_utils.in1d_events(event_numbers[0],
+                                                             event_numbers_2)]
         self.assertListEqual([2, 2, 2, 4, 7, 7, 7], result.tolist())
 
-    def test_1d_index_histograming(self):  # check compiled hist_2D_index function
-        x = np.random.randint(0, 100, 100)
-        shape = (100, )
-        array_fast = analysis_utils.hist_1d_index(x, shape=shape)
-        array = np.histogram(x, bins=shape[0], range=(0, shape[0]))[0]
-        shape = (5, )  # shape that is too small for the indices to trigger exception
-        exception_ok = False
-        try:
-            array_fast = analysis_utils.hist_1d_index(x, shape=shape)
-        except IndexError:
-            exception_ok = True
-        except:  # other exception that should not occur
-            pass
-        self.assertTrue(exception_ok & np.all(array == array_fast))
-
-    def test_2d_index_histograming(self):  # check compiled hist_2D_index function
-        x, y = np.random.randint(0, 100, 100), np.random.randint(0, 100, 100)
-        shape = (100, 100)
-        array_fast = analysis_utils.hist_2d_index(x, y, shape=shape)
-        array = np.histogram2d(x, y, bins=shape, range=[[0, shape[0]], [0, shape[1]]])[0]
-        shape = (5, 200)  # shape that is too small for the indices to trigger exception
-        exception_ok = False
-        try:
-            array_fast = analysis_utils.hist_2d_index(x, y, shape=shape)
-        except IndexError:
-            exception_ok = True
-        except:  # other exception that should not occur
-            pass
-        self.assertTrue(exception_ok & np.all(array == array_fast))
-
-    def test_3d_index_histograming(self):  # check compiled hist_3D_index function
-        with tb.open_file(analysis_utils.get_data('fixtures/analysis_utils/hist_data.h5',
-                                                  output=os.path.join(testing_path, 'fixtures/analysis_utils/hist_data.h5')),
-                          mode="r") as in_file_h5:
-            xyz = in_file_h5.root.HistDataXYZ[:]
-            x, y, z = xyz[0], xyz[1], xyz[2]
-            shape = (100, 100, 100)
-            array_fast = analysis_utils.hist_3d_index(x, y, z, shape=shape)
-            array = np.histogramdd(np.column_stack((x, y, z)), bins=shape, range=[[0, shape[0] - 1], [0, shape[1] - 1], [0, shape[2] - 1]])[0]
-            shape = (50, 200, 200)  # shape that is too small for the indices to trigger exception
-            exception_ok = False
-            try:
-                array_fast = analysis_utils.hist_3d_index(x, y, z, shape=shape)
-            except IndexError:
-                exception_ok = True
-            except:  # other exception that should not occur
-                pass
-            self.assertTrue(exception_ok & np.all(array == array_fast))
+    def test_1d_index_histograming(self):
+        ''' Check jitted hist_1D_index function '''
+
+        # Shape that is too small for the indices to trigger exception
+        x = np.linspace(0, 100, 100)
+        shape = (5, )
+        with self.assertRaises(IndexError):
+            analysis_utils.hist_1d_index(x, shape=shape)
+
+    @given(nps.arrays(unsigned_integer_dtypes(),
+                      shape=nps.array_shapes(max_dims=1, max_side=32000),
+                      elements=st.integers(0, 2 ** 16 - 1)))
+    def test_1d_index_hist_fuzzing(self, x):
+        # Set maximum shape from maximum value
+        shape = (np.max(x) + 1, )
+        # Cast to uint32 needed since python
+        # does sometimes upcast to int64 or float64
+        shape_numpy = ((shape[0]).astype(np.uint32), )
+
+        array_fast = analysis_utils.hist_1d_index(x, shape=shape_numpy)
+
+        
+        array = np.histogram(x.astype(np.uint32),
+                             bins=shape_numpy[0],
+                             range=(0, shape_numpy[0]))[0]
+        self.assertTrue(np.all(array == array_fast))
+
+    @given(nps.arrays(unsigned_integer_dtypes(),
+                      shape=(2, 32000),
+                      elements=st.integers(0, 2 ** 8)))
+    def test_2d_index_hist_fuzzing(self, arr):
+        # Set maximum shape from maximum value
+        x, y = arr[0, :], arr[1, :]
+        shape = (x.max() + 1, y.max() + 1)
+        # Cast to uint32 needed since python
+        # does sometimes upcast to int64 or float64
+        shape_numpy = ((shape[0]).astype(np.uint32),
+                       (shape[1]).astype(np.uint32))
+
+        array_fast = analysis_utils.hist_2d_index(x, y,
+                                                  shape=shape_numpy)
+
+        array = np.histogram2d(x, y, bins=shape,
+                               range=[[0, shape[0]], [0, shape[1]]])[0]
+        self.assertTrue(np.all(array == array_fast))
+
+    def test_2d_index_histograming(self):
+        ''' Check jitted hist_2D_index exception '''
+        x, y = np.linspace(0, 100, 100), np.linspace(0, 100, 100)
+
+        with self.assertRaises(IndexError):
+            analysis_utils.hist_2d_index(x, y, shape=(5, 200))
+        with self.assertRaises(IndexError):
+            analysis_utils.hist_2d_index(x, y, shape=(200, 5))
+
+    @given(nps.arrays(unsigned_integer_dtypes(),
+                      shape=(3, 32000),
+                      elements=st.integers(0, 2 ** 8)))
+    def test_3d_index_hist_fuzzing(self, arr):
+        ''' Fuzzing jitted hist_2D_index function '''
+        x, y, z = arr[0, :], arr[1, :], arr[2, :]
+        shape = (x.max() + 1, y.max() + 1, z.max() + 1)
+        # Cast to uint32 needed since python
+        # does sometimes upcast to int64 or float64
+        shape_numpy = ((shape[0]).astype(np.uint32),
+                       (shape[1]).astype(np.uint32),
+                       (shape[2]).astype(np.uint32))
+
+        array_fast = analysis_utils.hist_3d_index(x, y, z,
+                                                  shape=shape_numpy)
+
+        array = np.histogramdd(np.column_stack((x, y, z)), bins=shape,
+                               range=[[0, shape_numpy[0] - 1],
+                                      [0, shape_numpy[1] - 1],
+                                      [0, shape_numpy[2] - 1]])[0]
+        self.assertTrue(np.all(array == array_fast))
+
+    def test_3d_index_histograming(self):
+        ''' Check jitted hist_3D_index exceptions '''
+        # Shape that is too small for the indices to trigger exception
+        x = y = z = np.linspace(0, 100, 100)
+
+        with self.assertRaises(IndexError):
+            analysis_utils.hist_3d_index(x, y, z, shape=(200, 200, 99))
+
+        with self.assertRaises(IndexError):
+            analysis_utils.hist_3d_index(x, y, z, shape=(99, 200, 200))
+
+        with self.assertRaises(IndexError):
+            analysis_utils.hist_3d_index(x, y, z, shape=(200, 99, 200))
 
 if __name__ == '__main__':
     import logging
-    logging.basicConfig(level=logging.INFO, format="%(asctime)s - %(name)s - [%(levelname)-8s] (%(threadName)-10s) %(message)s")
+    logging.basicConfig(
+        level=logging.INFO, format="%(asctime)s - %(name)s - [%(levelname)-8s] (%(threadName)-10s) %(message)s")
     suite = unittest.TestLoader().loadTestsFromTestCase(TestAnalysisUtils)
     unittest.TextTestRunner(verbosity=2).run(suite)