From b627c89de89871480a9e137065f4ab98f6d5a4af Mon Sep 17 00:00:00 2001 From: Brendan Collins Date: Mon, 30 Mar 2026 11:56:05 -0700 Subject: [PATCH 1/2] Add NaN-skipping to focal_stats CUDA kernels (#1092) All focal_stats CUDA kernels (_focal_mean_cuda, _focal_sum_cuda, _focal_std_cuda, _focal_var_cuda, _focal_range_cuda, _focal_min_cuda, _focal_max_cuda) now skip NaN neighbors with `if v != v: continue`, matching the numpy path which uses np.nanmean/nansum/nanstd/etc. Previously, NaN propagated through arithmetic, giving different results on GPU vs CPU when input contained NaN. --- xrspatial/focal.py | 55 +++++++++++++++++++++++++++------------------- 1 file changed, 32 insertions(+), 23 deletions(-) diff --git a/xrspatial/focal.py b/xrspatial/focal.py index 6e3783f2..e0074289 100644 --- a/xrspatial/focal.py +++ b/xrspatial/focal.py @@ -608,13 +608,13 @@ def _focal_min_cuda(data, kernel, out): if 0 <= ii < rows and 0 <= jj < cols: v = data[ii, jj] + if v != v: # NaN check + continue if (not found) or (v < m): m = v found = True - # With your mask containing the center, found should be True. - # But keep a safe fallback anyway. - out[i, j] = m if found else data[i, j] + out[i, j] = m if found else math.nan @cuda.jit @@ -636,20 +636,20 @@ def _focal_max_cuda(data, kernel, out): for h in range(kernel.shape[1]): w = kernel[k, h] if w == 0: - continue # mask says "ignore this neighbor" + continue ii = i + k - dr jj = j + h - dc if 0 <= ii < rows and 0 <= jj < cols: v = data[ii, jj] + if v != v: # NaN check + continue if (not found) or (v > m): m = v found = True - # With your mask containing the center (1), found should always be True. - # But keep this for safety. - out[i, j] = m if found else data[i, j] + out[i, j] = m if found else math.nan def _focal_range_cupy(data, kernel): @@ -684,6 +684,8 @@ def _focal_range_cuda(data, kernel, out): if 0 <= ii < rows and 0 <= jj < cols: v = data[ii, jj] + if v != v: # NaN check + continue if not found: mx = v mn = v @@ -694,7 +696,7 @@ def _focal_range_cuda(data, kernel, out): if v < mn: mn = v - out[i, j] = (mx - mn) if found else 0.0 + out[i, j] = (mx - mn) if found else math.nan @cuda.jit @@ -716,29 +718,29 @@ def _focal_std_cuda(data, kernel, out): for h in range(kernel.shape[1]): w = kernel[k, h] if w == 0: - continue # mask says ignore + continue ii = i + k - dr jj = j + h - dc if 0 <= ii < rows and 0 <= jj < cols: x = data[ii, jj] + if x != x: # NaN check + continue w_sum += w sum_wx += w * x sum_wx2 += w * x * x - # With your mask including the center, w_sum should be > 0. Guard anyway. if w_sum > 0.0: mean = sum_wx / w_sum var = (sum_wx2 / w_sum) - (mean * mean) - # Numerical safety (tiny negative due to floating point) if var < 0.0: var = 0.0 out[i, j] = math.sqrt(var) else: - out[i, j] = 0.0 + out[i, j] = math.nan @cuda.jit @@ -760,13 +762,15 @@ def _focal_var_cuda(data, kernel, out): for h in range(kernel.shape[1]): w = kernel[k, h] if w == 0: - continue # mask says ignore + continue ii = i + k - dr jj = j + h - dc if 0 <= ii < rows and 0 <= jj < cols: x = data[ii, jj] + if x != x: # NaN check + continue w_sum += w sum_wx += w * x sum_wx2 += w * x * x @@ -775,13 +779,12 @@ def _focal_var_cuda(data, kernel, out): mean = sum_wx / w_sum var = (sum_wx2 / w_sum) - (mean * mean) - # numerical guard for tiny negative due to float rounding if var < 0.0: var = 0.0 out[i, j] = var else: - out[i, j] = 0.0 + out[i, j] = math.nan @cuda.jit @@ -852,19 +855,24 @@ def _focal_sum_cuda(data, kernel, out): dc = kernel.shape[1] // 2 s = 0.0 + found = False for k in range(kernel.shape[0]): for h in range(kernel.shape[1]): w = kernel[k, h] if w == 0: - continue # mask says ignore + continue ii = i + k - dr jj = j + h - dc if 0 <= ii < rows and 0 <= jj < cols: - s += w * data[ii, jj] + v = data[ii, jj] + if v != v: # NaN check + continue + s += w * v + found = True - out[i, j] = s + out[i, j] = s if found else math.nan def _focal_stats_func_cupy(data, kernel, func=_focal_max_cuda): @@ -894,21 +902,22 @@ def _focal_mean_cuda(data, kernel, out): for h in range(kernel.shape[1]): w = kernel[k, h] if w == 0: - continue # mask says ignore + continue ii = i + k - dr jj = j + h - dc if 0 <= ii < rows and 0 <= jj < cols: - s += w * data[ii, jj] + v = data[ii, jj] + if v != v: # NaN check + continue + s += w * v w_sum += w - # With your mask including the center, w_sum should be > 0. - # Guard anyway to avoid divide-by-zero. if w_sum > 0.0: out[i, j] = s / w_sum else: - out[i, j] = data[i, j] + out[i, j] = math.nan def _focal_stats_cupy(agg, kernel, stats_funcs): From a5cb6c397463be2a899ace744898d7b2a0a69d8b Mon Sep 17 00:00:00 2001 From: Brendan Collins Date: Mon, 30 Mar 2026 11:58:59 -0700 Subject: [PATCH 2/2] Add NaN tests for focal_stats CUDA kernels (#1092) - test_focal_stats_nan_handling_1092: verifies all 7 stats (mean, sum, min, max, std, var, range) skip NaN neighbors across all 4 backends. - test_focal_stats_all_nan_window_1092: all-NaN window gives NaN for mean/min/max and 0 for sum (matching numpy nansum behavior). - Fixed sum kernel to return 0 (not NaN) for all-NaN windows, matching numpy nansum semantics. --- xrspatial/focal.py | 4 +- xrspatial/tests/test_focal.py | 102 ++++++++++++++++++++++++++++++++++ 2 files changed, 103 insertions(+), 3 deletions(-) diff --git a/xrspatial/focal.py b/xrspatial/focal.py index e0074289..ba5740e8 100644 --- a/xrspatial/focal.py +++ b/xrspatial/focal.py @@ -855,7 +855,6 @@ def _focal_sum_cuda(data, kernel, out): dc = kernel.shape[1] // 2 s = 0.0 - found = False for k in range(kernel.shape[0]): for h in range(kernel.shape[1]): w = kernel[k, h] @@ -870,9 +869,8 @@ def _focal_sum_cuda(data, kernel, out): if v != v: # NaN check continue s += w * v - found = True - out[i, j] = s if found else math.nan + out[i, j] = s # nansum: 0 when all NaN (matches numpy) def _focal_stats_func_cupy(data, kernel, func=_focal_max_cuda): diff --git a/xrspatial/tests/test_focal.py b/xrspatial/tests/test_focal.py index 5517713d..e38c6fa7 100644 --- a/xrspatial/tests/test_focal.py +++ b/xrspatial/tests/test_focal.py @@ -505,6 +505,108 @@ def test_focal_stats_dask_cupy(): equal_nan=True, rtol=1e-4) +# --- focal_stats NaN handling (issue-1092) -------------------------------- + +@pytest.mark.parametrize("backend", ['numpy', 'cupy', 'dask+numpy', 'dask+cupy']) +def test_focal_stats_nan_handling_1092(backend): + """All backends should skip NaN neighbors, not propagate them. + + Regression test for #1092: CUDA kernels propagated NaN through + arithmetic instead of skipping. + """ + from xrspatial.tests.general_checks import has_cuda_and_cupy + if 'cupy' in backend and not has_cuda_and_cupy(): + pytest.skip("Requires CUDA and CuPy") + if 'dask' in backend and da is None: + pytest.skip("Requires Dask") + + data = np.array([ + [1.0, np.nan, 3.0], + [4.0, 5.0, 6.0], + [7.0, 8.0, 9.0], + ]) + kernel = custom_kernel(np.array([[0, 1, 0], [1, 1, 1], [0, 1, 0]])) + + agg = create_test_raster(data, backend=backend, chunks=(3, 3)) + result = focal_stats(agg, kernel, + stats_funcs=['mean', 'sum', 'min', 'max', 'std', 'var', 'range']) + + if hasattr(result.data, 'compute'): + result = result.compute() + + def _val(stat, r, c): + d = result.sel(stats=stat).data + if hasattr(d, 'get'): + d = d.get() + return float(np.asarray(d)[r, c]) + + # Center pixel (1,1): kernel hits [NaN, 4, 5, 6, 8] -> skip NaN -> [4,5,6,8] + center_vals = np.array([4.0, 5.0, 6.0, 8.0]) + atol = 1e-3 # float32 tolerance + + mean_val = _val('mean', 1, 1) + sum_val = _val('sum', 1, 1) + min_val = _val('min', 1, 1) + max_val = _val('max', 1, 1) + std_val = _val('std', 1, 1) + var_val = _val('var', 1, 1) + range_val = _val('range', 1, 1) + + assert abs(mean_val - np.nanmean(center_vals)) < atol, f"mean={mean_val}" + assert abs(sum_val - np.nansum(center_vals)) < atol, f"sum={sum_val}" + assert abs(min_val - np.nanmin(center_vals)) < atol, f"min={min_val}" + assert abs(max_val - np.nanmax(center_vals)) < atol, f"max={max_val}" + assert abs(std_val - np.nanstd(center_vals)) < atol, f"std={std_val}" + assert abs(var_val - np.nanvar(center_vals)) < atol, f"var={var_val}" + assert abs(range_val - (np.nanmax(center_vals) - np.nanmin(center_vals))) < atol, ( + f"range={range_val}" + ) + + # Top-left corner (0,0): kernel hits [NaN, 4, 1] (cross pattern) + # NaN is from data[0,1] (up direction is OOB, left is OOB) + # Wait: the cross kernel at (0,0) covers: + # up=(-1,0)=OOB, down=(1,0)=4, left=(0,-1)=OOB, right=(0,1)=NaN, center=(0,0)=1 + # So valid values = [1, 4], NaN is skipped + corner_vals = np.array([1.0, 4.0]) + mean_corner = _val('mean', 0, 0) + assert abs(mean_corner - np.nanmean(corner_vals)) < atol, ( + f"corner mean={mean_corner}" + ) + + +@pytest.mark.parametrize("backend", ['numpy', 'cupy']) +def test_focal_stats_all_nan_window_1092(backend): + """A pixel whose entire kernel window is NaN should produce NaN.""" + from xrspatial.tests.general_checks import has_cuda_and_cupy + if 'cupy' in backend and not has_cuda_and_cupy(): + pytest.skip("Requires CUDA and CuPy") + + data = np.array([ + [np.nan, np.nan, np.nan], + [np.nan, np.nan, np.nan], + [np.nan, np.nan, 1.0], + ]) + kernel = custom_kernel(np.array([[0, 1, 0], [1, 1, 1], [0, 1, 0]])) + + agg = create_test_raster(data, backend=backend) + result = focal_stats(agg, kernel, stats_funcs=['mean', 'sum', 'min', 'max']) + + if hasattr(result.data, 'compute'): + result = result.compute() + + def _val(stat, r, c): + d = result.sel(stats=stat).data + if hasattr(d, 'get'): + d = d.get() + return float(np.asarray(d)[r, c]) + + # Center pixel (1,1): kernel hits [NaN, NaN, NaN, NaN, NaN] -> all NaN + assert np.isnan(_val('mean', 1, 1)) + assert _val('sum', 1, 1) == 0.0 # nansum of all-NaN = 0 (numpy behavior) + assert np.isnan(_val('min', 1, 1)) + assert np.isnan(_val('max', 1, 1)) + + # --- focal variety (issue-1040) ------------------------------------------ def _variety_reference_data():