From f04ca26c899a2a19cd4d749741f4d4af5eda2054 Mon Sep 17 00:00:00 2001 From: Brendan Collins Date: Wed, 22 Apr 2026 08:18:04 -0700 Subject: [PATCH] Fix cupy zonal_stats silently ignoring nodata_values=0 (#1227) `_stats_cupy` used `if nodata_values:` so passing 0 (a common sentinel) fell through the else branch and kept the zero cells in. Every other backend uses `is not None` and dropped them, so the same call gave different answers depending on where it ran. Switch to `is not None`. Adds a regression test that runs the same input through all four backends and asserts the results match, plus a zonal entry in the security-sweep state file. --- .claude/sweep-security-state.json | 7 ++++++ xrspatial/tests/test_zonal.py | 39 +++++++++++++++++++++++++++++++ xrspatial/zonal.py | 4 +++- 3 files changed, 49 insertions(+), 1 deletion(-) diff --git a/.claude/sweep-security-state.json b/.claude/sweep-security-state.json index 1daad95f..dfc45825 100644 --- a/.claude/sweep-security-state.json +++ b/.claude/sweep-security-state.json @@ -26,6 +26,13 @@ "severity_max": "HIGH", "categories_found": [1, 2], "notes": "HIGH: unbounded out/written allocation in _run_numpy/_run_cupy driven by user-supplied width/height/resolution (no cap). MEDIUM (unfixed): _build_row_csr_numba total=row_ptr[height] is int32 and can wrap for very tall rasters with many long edges." + }, + "zonal": { + "last_inspected": "2026-04-22", + "issue": 1227, + "severity_max": "HIGH", + "categories_found": [1, 2, 6], + "notes": "HIGH (fixed #1227): _stats_cupy used `if nodata_values:` (truthy) so nodata_values=0 silently skipped the filter on the cupy backend, producing wrong stats vs every other backend. MEDIUM (unfixed): _strides uses np.int32 for stride indices — can wrap for arrays > ~2B elements in the numpy path. MEDIUM (unfixed): hypsometric_integral() skips _validate_raster on zones/values; _regions_numpy has no memory guard (numpy-only path, bounded by caller-allocated input). MEDIUM (unfixed): _stats_numpy return_type='xarray.DataArray' allocates np.full((n_stats, values.size)) with no guard." } } } diff --git a/xrspatial/tests/test_zonal.py b/xrspatial/tests/test_zonal.py index b81642f9..f790d2b6 100644 --- a/xrspatial/tests/test_zonal.py +++ b/xrspatial/tests/test_zonal.py @@ -801,6 +801,45 @@ def test_stats_nodata_wipes_zone(backend): check_results(backend, df_result, expected) +@pytest.mark.parametrize("backend", ['numpy', 'dask+numpy', 'cupy', 'dask+cupy']) +def test_stats_nodata_values_zero_across_backends_1227(backend): + """Regression: nodata_values=0 must be filtered on every backend. + + The cupy path previously used `if nodata_values:` (truthiness), so passing + nodata_values=0 silently skipped the filter and zeros were included in + the per-zone statistics. Every other backend used `is not None` and + dropped the zeros correctly — that divergence is what this test pins. + """ + if 'cupy' in backend and not has_cuda_and_cupy(): + pytest.skip("Requires CUDA and CuPy") + if 'dask' in backend and not dask_array_available(): + pytest.skip("Requires Dask") + + # Zone 1: one zero (should be dropped) and three 10s -> mean=10, count=3. + # Zone 2: one zero (should be dropped) and three 20s -> mean=20, count=3. + zones_data = np.array([[1, 1, 2, 2], + [1, 1, 2, 2]], dtype=float) + values_data = np.array([[0.0, 10.0, 0.0, 20.0], + [10.0, 10.0, 20.0, 20.0]]) + + zones = create_test_raster(zones_data, backend, chunks=(2, 2)) + values = create_test_raster(values_data, backend, chunks=(2, 2)) + + funcs = ['mean', 'sum', 'count'] + df_result = stats( + zones=zones, values=values, + stats_funcs=funcs, nodata_values=0, + ) + + expected = { + 'zone': [1, 2], + 'mean': [10.0, 20.0], + 'sum': [30.0, 60.0], + 'count': [3, 3], + } + check_results(backend, df_result, expected) + + @pytest.mark.skipif(not dask_array_available(), reason="Requires Dask") @pytest.mark.parametrize("backend", ['dask+numpy', 'dask+cupy']) def test_stats_zone_in_subset_of_blocks(backend): diff --git a/xrspatial/zonal.py b/xrspatial/zonal.py index 62558445..9db5e3d6 100644 --- a/xrspatial/zonal.py +++ b/xrspatial/zonal.py @@ -544,7 +544,9 @@ def _stats_cupy( values_by_zone = values[sorted_indices] # filter out values that are non-finite or values equal to nodata_values - if nodata_values: + # Note: use `is not None` instead of truthiness so that nodata_values=0 + # (a common sentinel) still triggers the filter, matching the numpy path. + if nodata_values is not None: filter_values = cupy.isfinite(values_by_zone) & ( values_by_zone != nodata_values) else: