From 0ddf978d3a882fcd318055073bc9767502fda394 Mon Sep 17 00:00:00 2001
From: Brendan Collins <brendancol@gmail.com>
Date: Wed, 22 Apr 2026 17:00:02 -0700
Subject: [PATCH] Reject integer-dtyped input in perlin() (#1232)

perlin() writes float noise into the input buffer in place, then
normalizes by ptp.  With an integer buffer the float values cast to 0,
ptp ends up 0, and the div-by-zero produces NaN/Inf that casts back to
INT_MIN on every pixel -- a silently wrong result.

Add an np.issubdtype(agg.dtype, np.floating) check in perlin() that
raises ValueError, and a parametrized regression test covering int8
through uint32 plus a float64 sanity check.

Also records the perlin sweep result in
.claude/sweep-security-state.json, including the remaining MEDIUM
follow-up (no zero-ptp guard in the normalization step).
---
 .claude/sweep-security-state.json |  7 +++++++
 xrspatial/perlin.py               | 10 ++++++++++
 xrspatial/tests/test_perlin.py    | 27 +++++++++++++++++++++++++++
 3 files changed, 44 insertions(+)

diff --git a/.claude/sweep-security-state.json b/.claude/sweep-security-state.json
index b49d6393..5d474478 100644
--- a/.claude/sweep-security-state.json
+++ b/.claude/sweep-security-state.json
@@ -40,6 +40,13 @@
       "severity_max": "HIGH",
       "categories_found": [1],
       "notes": "HIGH (fixed #1229): _viewshed_cpu allocated ~500 bytes/pixel of working memory (event_list 3*H*W*7*8 bytes + status_values/status_struct/idle + visibility_grid + lexsort temporary) with no guard. A 20000x20000 raster tried to allocate ~200 GB. Fixed by adding peak-memory guard mirroring the _viewshed_dask pattern (_available_memory_bytes() check, raises MemoryError with max_distance= hint). No other HIGH findings: dask path already guarded, _validate_raster is called, distance-sweep uses dtype=float64, _calc_dist_n_grad guards zero distance."
+    },
+    "perlin": {
+      "last_inspected": "2026-04-22",
+      "issue": 1232,
+      "severity_max": "HIGH",
+      "categories_found": [6],
+      "notes": "HIGH (fixed #1232): perlin() accepted integer-dtyped DataArrays via _validate_raster, but all four backends write float noise into the input buffer in place, then normalize by ptp. With integer storage the float values cast to 0, ptp=0, and the div-by-zero produced NaN/Inf that cast back to INT_MIN on every pixel. Fixed by adding an np.issubdtype(agg.dtype, np.floating) check in perlin() that raises ValueError. MEDIUM (unfixed follow-up): _perlin_numpy/_perlin_cupy/_perlin_dask_numpy/_perlin_dask_cupy all divide by ptp/(max-min) with no zero guard, so degenerate inputs like freq=(0,0) still emit NaN through the normalization step. GPU kernels have bounds guards, shared memory is fixed-size 512 int32 (not user-influenced), cuda.syncthreads() is present after the cooperative load. No file I/O."
     }
   }
 }
diff --git a/xrspatial/perlin.py b/xrspatial/perlin.py
index d2fddae5..e263f02d 100644
--- a/xrspatial/perlin.py
+++ b/xrspatial/perlin.py
@@ -330,6 +330,16 @@ def perlin(agg: xr.DataArray,
     """
     _validate_raster(agg, func_name='perlin', name='agg')
 
+    # perlin writes float noise into the raster in place, then normalizes
+    # by ptp.  With an integer buffer the float values cast to 0, ptp is 0,
+    # and the normalization divides by zero, corrupting every pixel to
+    # INT_MIN.  Reject non-float dtypes up front with a clear error.
+    if not np.issubdtype(agg.dtype, np.floating):
+        raise ValueError(
+            f"perlin(): `agg` must have a floating-point dtype "
+            f"(float32 or float64), got {agg.dtype}"
+        )
+
     mapper = ArrayTypeFunctionMapping(
         numpy_func=_perlin_numpy,
         cupy_func=_perlin_cupy,
diff --git a/xrspatial/tests/test_perlin.py b/xrspatial/tests/test_perlin.py
index 8e566d16..8942e645 100644
--- a/xrspatial/tests/test_perlin.py
+++ b/xrspatial/tests/test_perlin.py
@@ -1,4 +1,5 @@
 import numpy as np
+import pytest
 import xarray as xr
 
 from xrspatial import perlin
@@ -91,3 +92,29 @@ def test_perlin_dask_gpu():
         perlin_cupy.data.get(), perlin_dask_cupy.data.compute().get(),
         rtol=1e-4, atol=1e-4, equal_nan=True
     )
+
+
+@pytest.mark.parametrize(
+    "dtype",
+    [np.int8, np.int16, np.int32, np.int64, np.uint8, np.uint16, np.uint32],
+)
+def test_perlin_rejects_integer_dtype(dtype):
+    # Regression for issue #1232: integer-dtyped input silently produced
+    # INT_MIN everywhere because the float noise was written in place and
+    # then normalized by a zero ptp.  We now raise ValueError instead.
+    data = np.zeros((20, 20), dtype=dtype)
+    raster = xr.DataArray(data, dims=['y', 'x'])
+    with pytest.raises(ValueError, match="floating-point dtype"):
+        perlin(raster)
+
+
+def test_perlin_float64_input():
+    # float64 should still work (not just float32).
+    data = np.zeros((20, 20), dtype=np.float64)
+    raster = xr.DataArray(data, dims=['y', 'x'])
+    result = perlin(raster)
+    assert result.dtype == np.float64
+    assert np.isfinite(result.data).all()
+    # Normalized to [0, 1]
+    assert result.data.min() >= 0.0
+    assert result.data.max() <= 1.0