From 0ddf978d3a882fcd318055073bc9767502fda394 Mon Sep 17 00:00:00 2001 From: Brendan Collins Date: Wed, 22 Apr 2026 17:00:02 -0700 Subject: [PATCH] Reject integer-dtyped input in perlin() (#1232) perlin() writes float noise into the input buffer in place, then normalizes by ptp. With an integer buffer the float values cast to 0, ptp ends up 0, and the div-by-zero produces NaN/Inf that casts back to INT_MIN on every pixel -- a silently wrong result. Add an np.issubdtype(agg.dtype, np.floating) check in perlin() that raises ValueError, and a parametrized regression test covering int8 through uint32 plus a float64 sanity check. Also records the perlin sweep result in .claude/sweep-security-state.json, including the remaining MEDIUM follow-up (no zero-ptp guard in the normalization step). --- .claude/sweep-security-state.json | 7 +++++++ xrspatial/perlin.py | 10 ++++++++++ xrspatial/tests/test_perlin.py | 27 +++++++++++++++++++++++++++ 3 files changed, 44 insertions(+) diff --git a/.claude/sweep-security-state.json b/.claude/sweep-security-state.json index b49d6393..5d474478 100644 --- a/.claude/sweep-security-state.json +++ b/.claude/sweep-security-state.json @@ -40,6 +40,13 @@ "severity_max": "HIGH", "categories_found": [1], "notes": "HIGH (fixed #1229): _viewshed_cpu allocated ~500 bytes/pixel of working memory (event_list 3*H*W*7*8 bytes + status_values/status_struct/idle + visibility_grid + lexsort temporary) with no guard. A 20000x20000 raster tried to allocate ~200 GB. Fixed by adding peak-memory guard mirroring the _viewshed_dask pattern (_available_memory_bytes() check, raises MemoryError with max_distance= hint). No other HIGH findings: dask path already guarded, _validate_raster is called, distance-sweep uses dtype=float64, _calc_dist_n_grad guards zero distance." + }, + "perlin": { + "last_inspected": "2026-04-22", + "issue": 1232, + "severity_max": "HIGH", + "categories_found": [6], + "notes": "HIGH (fixed #1232): perlin() accepted integer-dtyped DataArrays via _validate_raster, but all four backends write float noise into the input buffer in place, then normalize by ptp. With integer storage the float values cast to 0, ptp=0, and the div-by-zero produced NaN/Inf that cast back to INT_MIN on every pixel. Fixed by adding an np.issubdtype(agg.dtype, np.floating) check in perlin() that raises ValueError. MEDIUM (unfixed follow-up): _perlin_numpy/_perlin_cupy/_perlin_dask_numpy/_perlin_dask_cupy all divide by ptp/(max-min) with no zero guard, so degenerate inputs like freq=(0,0) still emit NaN through the normalization step. GPU kernels have bounds guards, shared memory is fixed-size 512 int32 (not user-influenced), cuda.syncthreads() is present after the cooperative load. No file I/O." } } } diff --git a/xrspatial/perlin.py b/xrspatial/perlin.py index d2fddae5..e263f02d 100644 --- a/xrspatial/perlin.py +++ b/xrspatial/perlin.py @@ -330,6 +330,16 @@ def perlin(agg: xr.DataArray, """ _validate_raster(agg, func_name='perlin', name='agg') + # perlin writes float noise into the raster in place, then normalizes + # by ptp. With an integer buffer the float values cast to 0, ptp is 0, + # and the normalization divides by zero, corrupting every pixel to + # INT_MIN. Reject non-float dtypes up front with a clear error. + if not np.issubdtype(agg.dtype, np.floating): + raise ValueError( + f"perlin(): `agg` must have a floating-point dtype " + f"(float32 or float64), got {agg.dtype}" + ) + mapper = ArrayTypeFunctionMapping( numpy_func=_perlin_numpy, cupy_func=_perlin_cupy, diff --git a/xrspatial/tests/test_perlin.py b/xrspatial/tests/test_perlin.py index 8e566d16..8942e645 100644 --- a/xrspatial/tests/test_perlin.py +++ b/xrspatial/tests/test_perlin.py @@ -1,4 +1,5 @@ import numpy as np +import pytest import xarray as xr from xrspatial import perlin @@ -91,3 +92,29 @@ def test_perlin_dask_gpu(): perlin_cupy.data.get(), perlin_dask_cupy.data.compute().get(), rtol=1e-4, atol=1e-4, equal_nan=True ) + + +@pytest.mark.parametrize( + "dtype", + [np.int8, np.int16, np.int32, np.int64, np.uint8, np.uint16, np.uint32], +) +def test_perlin_rejects_integer_dtype(dtype): + # Regression for issue #1232: integer-dtyped input silently produced + # INT_MIN everywhere because the float noise was written in place and + # then normalized by a zero ptp. We now raise ValueError instead. + data = np.zeros((20, 20), dtype=dtype) + raster = xr.DataArray(data, dims=['y', 'x']) + with pytest.raises(ValueError, match="floating-point dtype"): + perlin(raster) + + +def test_perlin_float64_input(): + # float64 should still work (not just float32). + data = np.zeros((20, 20), dtype=np.float64) + raster = xr.DataArray(data, dims=['y', 'x']) + result = perlin(raster) + assert result.dtype == np.float64 + assert np.isfinite(result.data).all() + # Normalized to [0, 1] + assert result.data.min() >= 0.0 + assert result.data.max() <= 1.0