diff --git a/CHANGELOG.md b/CHANGELOG.md index f5c2db8a..42bec8d7 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -6,6 +6,7 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/), and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html). ## [Unreleased] +* Added functions to get and assert the midpoint values of bins in a histogram (@nkorinek, #184) * Created tests for the autograde module (@nkorinek, #105) * Created tests for the vector module (@nkorinek, #209) * Created functions to test point geometries in VectorTester (@nkorinek, #176) diff --git a/examples/plot_histogram_testing.py b/examples/plot_histogram_testing.py deleted file mode 100644 index 454e406a..00000000 --- a/examples/plot_histogram_testing.py +++ /dev/null @@ -1,198 +0,0 @@ -""" -Testing Histograms -================== - -These are some examples of using the basic functionality of MatPlotCheck -to test histogram plots in Python. - -""" - -################################################################################ -# Setup -# ----- -# You will start by importing the required packages and plotting a histogram. - -import matplotlib.pyplot as plt -import matplotcheck.base as mpc -import matplotcheck.notebook as nb -import numpy as np - - -data = np.exp(np.arange(0, 5, 0.01)) - -fig, ax = plt.subplots() -ax.hist(data, bins=5, color="gold") - -plot_1_hold = nb.convert_axes(plt, which_axes="current") - -################################################################################ -# Testing the Histogram -# --------------------- -# Now you can make a PlotTester object and test the histogram. We'll test both -# the number of bins and the values of those bins. - -############################################################################### -# -# .. note:: -# Throughout this vignette, the term `bin value` is used to describe the -# number of datapoints that fall within a bin. In other words, a bin's value -# is equal to the height of the bar correspondign to that bin. For example, -# the value of the first bin in the above histogram is 341. Note that the -# height of the first bar is also 341. - -plot_tester_1 = mpc.PlotTester(plot_1_hold) - -plot_tester_1.assert_num_bins(5) - -expected_bin_values = [341, 68, 40, 28, 23] -plot_tester_1.assert_bin_values(expected_bin_values) -################################################################################ -# And we can also run some tests that will fail. -# -try: - plot_tester_1.assert_num_bins(6) -except AssertionError as message: - print("AssertionError:", message) - -try: - plot_tester_1.assert_bin_values([1, 4, 1, 3, 4]) -except AssertionError as message: - print("AssertionError:", message) - -################################################################################ -# Determining Expected Values -# --------------------------- -# With a histogram, you may not know the values you expect to find for each bin -# before you begin testing. (More simply, you probably know how you expect a -# histogram to look and how you expect it to be made. But you might not know -# the exact height of each bar in that histogram.) In this case, matplotcheck -# provides a method for extracting the bin values from an existing histogram: -# ``get_bin_values()``. -# -# To use this, you can create a histogram however you think it should be created -# (this will be called the expected histogram) and use it as a reference. Then -# you can extract the bin values from it (called the expected values). These -# expected values can be used to test whether another histogram (e.g. a -# student's histogram) also contains the expected values. -# -# For this example, you will start by creating a histogram that will serve as -# your expected histogram, and then extracting the expected values from it. To -# do this, you need to create a `PlotTester` object from it and use the -# ``get_bin_values()`` method. - -expected_data = np.sin(np.arange(0, 2 * np.pi, np.pi / 50)) - -fig, ax = plt.subplots() -ax.hist(expected_data, bins=8, color="gold") - -expected_plot_hold = nb.convert_axes(plt, which_axes="current") -plot_tester_expected = mpc.PlotTester(expected_plot_hold) -print(plot_tester_expected.get_bin_values()) - -################################################################################ -# Great! Now you know the bin values that you expect to see when you test a -# plot. -# -# Now you can create another histogram (our testing histogram) and check -# whether it matches the expected histogram (i.e. check wether its bin values -# match the expected bin values). - -# Create and plot the testing histogram -testing_data = np.sin(np.arange(2 * np.pi, 4 * np.pi, np.pi / 50)) -fig, ax = plt.subplots() -ax.hist(testing_data, bins=8, color="orange") -testing_plot_hold = nb.convert_axes(plt, which_axes="current") - -# Testing the histogram against the expected bin values -plot_tester_testing = mpc.PlotTester(testing_plot_hold) -plot_tester_testing.assert_bin_values( - [23.0, 10.0, 8.0, 9.0, 9.0, 8.0, 10.0, 23.0] -) - -################################################################################ -# Since ``assert_bin_values()`` did not raise an ``AssertionError``, you know -# that the test passed. This means the testing histogram had the same bin values -# as the expected histogram. - -############################################################################### -# -# .. note:: -# In this example, you have created the expected histogram and the testing -# histogram in the same file. Normally you would create the expected histogram -# in one location, extract the expected bin values from it, and use those to -# test the testing histogram in another location (e.g. within a student's -# homework assignment.) - - -################################################################################ -# Testing with Tolerances -# ----------------------- -# In some cases, you might want to run a test that doesn't require the bin -# values to match exactly. For this, you can use the ``tolerance`` argument of -# the ``assert_bin_values()`` method. -# -# You will start by making two histograms with slightly different data and -# storing the plots with ``nb.convert_axes()``. The gold plot will serve as the -# expected plot, and the orange plot will serve as the testing plot. - -expected_data = 0.1 * np.power(np.arange(0, 10, 0.1), 2) -bins = np.arange(0, 10, 1) - -fig1, ax2 = plt.subplots() -ax2.hist(expected_data, color="gold", bins=bins) - -expected_plot_2_hold = nb.convert_axes(plt, which_axes="current") - -################################################################################ - -test_data = 0.1995 * np.power(np.arange(0, 10, 0.1), 1.7) - -fig2, ax2 = plt.subplots() -ax2.hist(test_data, color="orange", bins=bins) - -testing_plot_2_hold = nb.convert_axes(plt, which_axes="current") - -################################################################################ -# Now you will create a `PlotTester` object for each plot. This allows you to -# extract the expected bin values from the expected plot and allows you to -# test the testing plot. - -plot_tester_expected_2 = mpc.PlotTester(expected_plot_2_hold) -bins_expected_2 = plot_tester_expected_2.get_bin_values() - -plot_tester_testing_2 = mpc.PlotTester(testing_plot_2_hold) - -################################################################################ -# You'll notice that the test (orange) plot differs somewhat from the -# expected (gold) plot, but still has a similar shape and similar bin -# values. -# -# If you test it without the ``tolerance`` argument, the assertion will fail. - -try: - plot_tester_testing_2.assert_bin_values(bins_expected_2) -except AssertionError as message: - print("AssertionError:", message) - -################################################################################ -# However, if you set a tolerance, the assertion can pass. Here you will test it -# with ``tolerance=0.2``. - -plot_tester_testing_2.assert_bin_values(bins_expected_2, tolerance=0.2) - -################################################################################ -# Because no ``AssertionError`` is raised, you know that the test passed with -# a tolerance of 0.2. However, the test will not pass with a tolerance that is -# too small; the test will fail with ``tolerance=0.1``. - -try: - plot_tester_testing_2.assert_bin_values(bins_expected_2, tolerance=0.1) -except AssertionError as message: - print("AssertionError:", message) - -############################################################################### -# -# .. note:: -# When using tolerances, the ``tolerance`` argument is taken as a relative -# tolerance. For more information, see the documentation for the -# ``base.assert_bin_heights()`` method. diff --git a/examples/plot_test_histogram.py b/examples/plot_test_histogram.py new file mode 100644 index 00000000..5db81094 --- /dev/null +++ b/examples/plot_test_histogram.py @@ -0,0 +1,261 @@ +""" +Test Histogram Plots with Matplotcheck +====================================== + +Below you will find some examples of how to use MatPlotCheck +to test histogram plots created with Matplotlib in Python. + +""" + +############################################################################### +# Setup +# ----- +# You will start by importing the required packages and plotting a histogram. +# Once you have created your plot, you will created a Matplotcheck +# ``PlotTester`` object by providing the Matplotlib axis object to +# ``PlotTester``. + +import matplotlib.pyplot as plt +import matplotcheck.base as mpc +import numpy as np + + +data = np.exp(np.arange(0, 5, 0.01)) + +fig, ax = plt.subplots() +ax.hist(data, bins=5, color="gold") + +# Create a Matplotcheck PlotTester object +plot_tester_1 = mpc.PlotTester(ax) + +############################################################################### +# Test a Histogram Plot +# --------------------- +# Once you have created a PlotTester object, you are ready to test various +# parts of your plot. Below, you test both +# the number of bins and the values associated with those bins. + +############################################################################### +# +# .. note:: +# Throughout this vignette, the term `bin value` is used to describe the +# number of datapoints that fall within a bin. In other words, a bin's value +# is equal to the height of the bar corresponding to that bin. For example, +# the value of the first bin in the above histogram is 341. Note that the +# height of the first bar is also 341. + +# Test that the histogram plot has 5 bins +plot_tester_1.assert_num_bins(5) + +# Test that the histogram bin values (the height of each bin) is as expected +expected_bin_values = [341, 68, 40, 28, 23] +plot_tester_1.assert_bin_values(expected_bin_values) + +############################################################################### +# And you can also run some tests that will fail. +# +try: + plot_tester_1.assert_num_bins(6) +except AssertionError as message: + print("AssertionError:", message) + +try: + plot_tester_1.assert_bin_values([1, 4, 1, 3, 4]) +except AssertionError as message: + print("AssertionError:", message) + +############################################################################### +# Determining Expected Values +# --------------------------- +# You can use the MatPlotCheck ``get_bin_values()`` method to extract the bin +# values that are expected for a plot. This is helpful if you are using a tool +# like nbgrader to create the the expected plot outcomes in a homework +# assignment. +# +# To extract bin values from an expected plot you first create the expected +# histogram plot that you will use to grade your assignment (or htat you expect +# as an outcome from a test). Next, you create a PlotTester object from that +# plot. Finally, you call the ``get_bin_values()`` method to grab the expected +# bin values from that plot. +# +# The steps outlined above are implemented below. + +expected_data = np.sin(np.arange(0, 2 * np.pi, np.pi / 50)) + +# Create the expected plot +fig, ax = plt.subplots() +ax.hist(expected_data, bins=8, color="gold") + +# Create a Matplotcheck PlotTester object from the axis object +plot_tester_expected = mpc.PlotTester(ax) +# Get bin values from the expected plot +print(plot_tester_expected.get_bin_values()) + +############################################################################### +# This example assumes that you are creating tests for a student +# assignment. Once you have created the PlotTester object for the expected +# plot (this is the answer to the assignment that you expect the student to +# come to), +# you can then test the student plot to see if it matches expected bin values. +# Below another plot is created that represents the student submitted plot. + +# Create and plot the student submitted histogram +data = np.sin(np.arange(2 * np.pi, 4 * np.pi, np.pi / 50)) +fig, ax = plt.subplots() +ax.hist(data, bins=8, color="orange") + +# Test the student submitted histogram bin values against the expected +# bin values (the correct answer to the assigned plot) +plot_tester_testing = mpc.PlotTester(ax) +plot_tester_testing.assert_bin_values( + [23.0, 10.0, 8.0, 9.0, 9.0, 8.0, 10.0, 23.0] +) + +############################################################################### +# Above, ``assert_bin_values()`` did not raise an ``AssertionError``. This +# means that the test passed and the student submitted plot has the correct +# histogram bins. +# + +############################################################################### +# +# .. note:: +# In this example, you created the expected histogram (the homework answer) +# and the student submitted histogram in the same file. +# + +############################################################################### +# Testing with Tolerances +# ----------------------- +# In some cases, you might want to run a test that doesn't require the bin +# values to match exactly. For example, it might be ok if the values are +# a few tenths off. To allow for some "wiggle room" in the expected answer, +# you can use the ``tolerance`` parameter of the ``assert_bin_values()`` +# method. +# +# You will start by making two histograms with slightly different data and +# storing the plots with ``nb.convert_axes()``. The gold plot will serve as the +# expected plot, and the orange plot will serve as the testing plot. +# +# You will then create a `PlotTester` object for each plot. This allows you to +# extract the expected bin values from the expected plot and use those value to +# test the testing plot. + +expected_data = 0.1 * np.power(np.arange(0, 10, 0.1), 2) +bins = np.arange(0, 10, 1) + +fig1, ax1 = plt.subplots() +ax1.hist(expected_data, color="gold", bins=bins) + +# Create plot tester object +plot_tester_expected_1 = mpc.PlotTester(ax1) +# Get expected bin values +bins_expected_1 = plot_tester_expected_1.get_bin_values() + +############################################################################### + +test_data = 0.1995 * np.power(np.arange(0, 10, 0.1), 1.7) +fig2, ax2 = plt.subplots() +ax2.hist(test_data, color="orange", bins=bins) +# Create plot tester object +plot_tester_testing_2 = mpc.PlotTester(ax2) + +############################################################################### +# You'll notice that the test (orange) plot differs somewhat from the +# expected (gold) plot, but still has a similar shape and similar bin +# values. +# +# If you test it without the ``tolerance`` argument, the assertion will fail. + +try: + plot_tester_testing_2.assert_bin_values(bins_expected_1) +except AssertionError as message: + print("AssertionError:", message) + +############################################################################### +# However, if you set a tolerance, the assertion can pass. Here you will test +# it with ``tolerance=0.2``. + +plot_tester_testing_2.assert_bin_values(bins_expected_1, tolerance=0.2) + +############################################################################### +# Because no ``AssertionError`` is raised, you know that the test passed with +# a tolerance of 0.2. However, the test will not pass with a tolerance that is +# too small; the test will fail with ``tolerance=0.1``. + +try: + plot_tester_testing_2.assert_bin_values(bins_expected_1, tolerance=0.1) +except AssertionError as message: + print("AssertionError:", message) + +############################################################################### +# +# .. note:: +# When using tolerances, the ``tolerance`` argument is taken as a relative +# tolerance. For more information, see the documentation for the +# ``base.assert_bin_heights()`` method. + +############################################################################### +# Test Histogram Midpoints +# ------------------------ +# So far, you have tested the histogram values as well as the number of bins +# the histogram has. It may also be useful to test that the data bins cover +# the range of values that they were expected to. In order to do this, you can +# test the midpoints of each bin to ensure that the data covered by each +# bin is as expected. This is tested very similarly to the bins values. +# Simply provide ``assert_bin_midpoints()`` with a list of the expected +# midpoints, and it will assert if they are accurate or not. In order to obtain +# the midpoints in a PlotTester object, you can use ``get_bin_midpoints()``, +# much like ``get_bin_values()``. +# +# For this example, you will create a plot tester object from a histogram plot, +# the same way you did for the bin values example. + +fig, ax = plt.subplots() +ax.hist(test_data, bins=8, color="gold") + +# If you were running this in a notebook, the commented out line below would +# store the matplotlib object. However, in this example, you can just grab the +# axes object directly. + +# midpoints_plot_hold = nb.convert_axes(plt, which_axes="current") + +plot_tester_expected_3 = mpc.PlotTester(ax) +print(plot_tester_expected_3.get_bin_midpoints()) + +############################################################################### +# You got the values from the plot tester object! As you can see, the values +# that were collected are the midpoints for the values each histogram bin +# covers. Now you can test that they are asserted indeed correct with an +# assertion test. + +try: + plot_tester_expected_3.assert_bin_midpoints( + [-0.875, -0.625, -0.375, -0.125, 0.125, 0.375, 0.625, 0.875] + ) +except AssertionError as message: + print("AssertionError:", message) + +############################################################################### +# Here you can see that this will fail when given incorrect values. + +try: + plot_tester_expected_3.assert_bin_midpoints( + [-0.75, -0.5, -0.25, -0, 0.25, 0.5, 0.75, 1] + ) +except AssertionError as message: + print("AssertionError:", message) + +############################################################################### +# +# .. note:: +# Keep in mind this test is for the midpoints of the range that each bin +# covers. So if a bin covers all data that's in between 0 and 1, than the +# value given for that bin will be .5, not 0 or 1. + + +# .. note:: +# If you are working on tests for jupyter notebooks, you can call the +# line below to capture the student cell in a notebook. Then you can +# Use that object for testing. +# testing_plot_2_hold = nb.convert_axes(plt, which_axes="current"). diff --git a/matplotcheck/base.py b/matplotcheck/base.py index 29be8d9b..6bdcc12c 100644 --- a/matplotcheck/base.py +++ b/matplotcheck/base.py @@ -1206,10 +1206,22 @@ def get_bin_values(self): Int : The number of bins in the histogram""" - bin_values = self.get_xy(xtime=False)["y"].tolist() + bin_values = self.get_xy()["y"].tolist() return bin_values + def get_bin_midpoints(self): + """Returns the mid point value of each bin in a histogram + + Returns + ------- + Int : + The number of bins in the histogram""" + + bin_midpoints = self.get_xy()["x"].tolist() + + return bin_midpoints + def assert_bin_values( self, bin_values, @@ -1272,3 +1284,44 @@ def assert_bin_values( ) except AssertionError: raise AssertionError(message) + + def assert_bin_midpoints( + self, + bin_midpoints, + message="Did not find expected bin midpoints in plot", + ): + """ + Asserts that the middle values of histogram bins match `bin_midpoints`. + + Parameters + ---------- + bin_midpoints : list + A list of numbers representing the expected middles of bin values + covered by each consecutive bin (i.e. the midpoint of the bars in + the histogram). + message : string + The error message to be displayed if the bin mid point values do + not match `bin_midpoints` + + Raises + ------ + AssertionError + if the Values of histogram bins do not match `bin_midpoints` + """ + + plot_bin_midpoints = self.get_bin_midpoints() + + if not isinstance(bin_midpoints, list): + raise ValueError( + "Need to submit a list for expected bin midpoints." + ) + + if len(plot_bin_midpoints) != len(bin_midpoints): + raise ValueError("Bin midpoints lists lengths do no match.") + + try: + np.testing.assert_array_max_ulp( + np.array(plot_bin_midpoints), np.array(bin_midpoints) + ) + except AssertionError: + raise AssertionError(message) diff --git a/matplotcheck/tests/test_base_data.py b/matplotcheck/tests/test_base_data.py index a6416806..a9cea54d 100644 --- a/matplotcheck/tests/test_base_data.py +++ b/matplotcheck/tests/test_base_data.py @@ -375,3 +375,80 @@ def test_assert_bin_values_tolerance_fails(pt_hist_overlapping): pt_hist_overlapping.assert_bin_values(bin_values, tolerance=0.09) plt.close() + + +def test_assert_bin_midpoints_pass(pt_hist): + """Test that bin midpoints are correct""" + bins = [2.5, 3.5, 4.5, 5.5, 6.5, 7.5] + pt_hist.assert_bin_midpoints(bins) + + plt.close() + + +def test_assert_bin_midpoints_fail(pt_hist): + """Test that bin midpoints fail when incorrect""" + bins = [2, 3, 4, 5, 6, 7] + with pytest.raises(AssertionError, match="Did not find expected bin midp"): + pt_hist.assert_bin_midpoints(bins) + + plt.close() + + +def test_assert_bin_midpoints_fails_wrong_type(pt_hist): + """Test that bin midpoints fails when not handed a list""" + bins = (2.5, 3.5, 4.5, 5.5, 6.5, 7.5) + with pytest.raises(ValueError, match="Need to submit a list for expected"): + pt_hist.assert_bin_midpoints(bins) + + plt.close() + + +def test_assert_bin_midpoints_fails_wrong_length(pt_hist): + """Test that bin midpoints fails when not handed a list with the wrong + length""" + bins = [2.5, 3.5, 4.5, 5.5, 6.5, 7.5, 8] + with pytest.raises(ValueError, match="Bin midpoints lists lengths do no "): + pt_hist.assert_bin_midpoints(bins) + + plt.close() + + +def test_assert_bin_midpoints_fail_custom_message(pt_hist): + """Test that correct error message is thrown when bin midpoints fail""" + bins = [2, 3, 4, 5, 6, 7] + with pytest.raises(AssertionError, match="Test Message"): + pt_hist.assert_bin_midpoints(bins, message="Test Message") + + plt.close() + + +def test_assert_bin_midpoints_overlap_pass(pt_hist_overlapping): + """Test that bin midpoints are correct with overlapping histograms""" + bins = [2.5, 3.5, 4.5, 5.5, 6.5, 7.5, 2.5, 3.5, 4.5, 5.5, 6.5, 7.5] + pt_hist_overlapping.assert_bin_midpoints(bins) + + plt.close() + + +def test_assert_bin_midpoints_overlap_fail(pt_hist_overlapping): + """Test that bin midpoints fail with overlapping histograms when + incorrect""" + bins = [2.5, 3.5, 4.5, 5.5, 6.5, 7.5, 2.5, 3.5, 4.5, 5.5, 6.5, 7] + with pytest.raises( + AssertionError, match="Did not find expected bin midpo" + ): + pt_hist_overlapping.assert_bin_midpoints(bins) + + plt.close() + + +def test_assert_bin_midpoints_overlap_length_fail(pt_hist_overlapping): + """Test that bin midpoints fail with overlapping histograms when + incorrect length""" + bins = [2.5, 3.5, 4.5, 5.5, 6.5, 7.5] + with pytest.raises( + ValueError, match="Bin midpoints lists lengths do no match" + ): + pt_hist_overlapping.assert_bin_midpoints(bins) + + plt.close()