From d03610e27d6caedd1afc4e1e49c5b40565b36ffc Mon Sep 17 00:00:00 2001
From: Brendan Collins <brendancol@gmail.com>
Date: Wed, 13 May 2026 07:06:12 -0700
Subject: [PATCH 1/4] sweep-security state: geotiff pass 17 (2026-05-13) MEDIUM
 Cat 1 (JPEG bomb pre-check gap)

---
 .claude/sweep-security-state.csv | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/.claude/sweep-security-state.csv b/.claude/sweep-security-state.csv
index bfe45a06..d45abd48 100644
--- a/.claude/sweep-security-state.csv
+++ b/.claude/sweep-security-state.csv
@@ -18,7 +18,7 @@ fire,2026-04-25,,,,,"Clean. Despite the module's size hint, fire.py is purely pe
 flood,2026-05-03,1437,MEDIUM,3,,Re-audit 2026-05-03. MEDIUM Cat 3 fixed in PR #1438 (travel_time and flood_depth_vegetation now validate mannings_n DataArray values are finite and strictly positive via _validate_mannings_n_dataarray helper). No remaining unfixed findings. Other categories clean: every allocation is same-shape as input; no flat index math; NaN propagation explicit in every backend; tan_slope clamped by _TAN_MIN; no CUDA kernels; no file I/O; every public API calls _validate_raster on DataArray inputs.
 focal,2026-04-27,1284,HIGH,1,,"HIGH (fixed PR #1286): apply(), focal_stats(), and hotspots() accepted unbounded user-supplied kernels via custom_kernel(), which only checks shape parity. The kernel-size guard from #1241 (_check_kernel_memory) only ran inside circle_kernel/annulus_kernel, so a (50001, 50001) custom kernel on a 10x10 raster allocated ~10 GB on the kernel itself plus a much larger padded raster before any work -- same shape as the bilateral DoS in #1236. Fixed by adding _check_kernel_vs_raster_memory in focal.py and wiring it into apply(), focal_stats(), and hotspots() after custom_kernel() validation. All 134 focal tests + 19 bilateral tests pass. No other findings: 10 CUDA kernels all have proper bounds + stencil guards; _validate_raster called on every public entry point; hotspots already raises ZeroDivisionError on constant-value rasters; _focal_variety_cuda uses a fixed-size local buffer (silent truncation but bounded); _focal_std_cuda/_focal_var_cuda clamp the catastrophic-cancellation case via if var < 0.0: var = 0.0; no file I/O."
 geodesic,2026-04-27,1283,HIGH,1,,"HIGH (fixed PR #1285): slope(method='geodesic') and aspect(method='geodesic') stack a (3, H, W) float64 array (data, lat, lon) before dispatch with no memory check. A large lat/lon-tagged raster passed to either function would OOM. Fixed by adding _check_geodesic_memory(rows, cols) in xrspatial/geodesic.py (mirrors morphology._check_kernel_memory): budgets 56 bytes/cell (24 stacked float64 + 4 float32 output + 24 padded copy + slack) and raises MemoryError when > 50% of available RAM; called from slope.py and aspect.py inside the geodesic branch before dispatch. No other findings: 6 CUDA kernels all have bounds guards (e.g. _run_gpu_geodesic_aspect at geodesic.py:395), custom 16x16 thread blocks avoid register spill, no shared memory, _validate_raster runs upstream in slope/aspect, all backends cast to float32, slope_mag < 1e-7 flat threshold prevents arctan2 NaN propagation, curvature correction uses hardcoded WGS84 R."
-geotiff,2026-05-12,1737,HIGH,1,,"Re-audit pass 16 2026-05-12 (deep-sweep p3). NEW HIGH (Cat 1, fixed PR pending against #1737): VRT _resample_nearest allocated (dr.y_size, dr.x_size) before the clip was taken. A crafted <SimpleSource><DstRect xSize/ySize> can declare values orders of magnitude larger than the VRT's rasterXSize/rasterYSize; the output buffer was bounded by _check_dimensions but the resample intermediate was not. Tracing: a 10x10 source with DstRect 50000x50000 on a 100x100 VRT extent allocated ~2.5 GB inside np.repeat. Fixed by checking dr.x_size * dr.y_size against max_pixels in _vrt.py:read_vrt() before _resample_nearest runs. Mirrors the _check_dimensions pattern from _reader.py. Six new tests in test_vrt_dstrect_resample_cap_1737.py cover the huge-X, huge-Y, legitimate, max_pixels override, at-cap, and negative cases. All 201 existing VRT tests still pass. Other categories verified clean (no new findings): Cat 1 (allocations): _check_dimensions covers the public window / HTTP / dask paths; MAX_TILE_BYTES_DEFAULT (256 MiB) caps per-tile / per-strip compressed bytes locally and over HTTP (PR #1668); LERC blob-header pre-check, JP2K SIZ pre-check, deflate/zstd/lz4/packbits caps still in place; Cat 2 (overflow): _check_dimensions catches before alloc; int64 in CUDA tile offsets; Cat 3 (NaN logic): NaN-aware paths via #1597, #1630; Cat 4 (GPU bounds): all CUDA kernels (_byte_swap_lanes_kernel, _lzw_decode_tiles_kernel, _inflate_tiles_kernel, _predictor_decode_kernel_u8/u16/u32/u64, _fp_predictor_decode_kernel, _assemble_tiles_kernel, _extract_tiles_kernel, _predictor_encode_kernel_u8/u16/u32/u64, _fp_predictor_encode_kernel) have bounds guards; shared memory sizes fixed; Cat 5 (path): _MmapCache realpath, VRT path containment #1671 with XRSPATIAL_VRT_ALLOWED_ROOTS opt-in; tempfile.mkstemp + os.replace for writer; SSRF defenses for _HTTPSource via #1664; Cat 6 (dtype): _validate_dtype_cast in __init__.py; _NATIVE_ORDER byte-swap. XML payloads gated via _safe_xml.safe_fromstring with DOCTYPE rejection (#1579). _compression.py uses tempfile.mkstemp without dir= so the JP2K temp file lands in the system tmpdir (TMPDIR / TEMP), which is the documented safe default. No new findings beyond #1737."
+geotiff,2026-05-13,,MEDIUM,1,,"Re-audit pass 17 2026-05-13 (deep-sweep s2). NEW MEDIUM (Cat 1): jpeg_decompress (_compression.py:1042-1066) hands attacker-controlled JPEG bytes to Pillow without consulting the declared tile width/height/samples; a tile-size mismatch lets a small JPEG payload allocate up to Pillow's MAX_IMAGE_PIXELS*2 (~178M pixels, ~500 MB RGB) before the downstream chunk.size != expected check fires. Asymmetric with the JP2K SIZ pre-check and LERC blob-info pre-check. Pillow's default DecompressionBombError is a partial guard so severity is MEDIUM. Other categories verified clean: Cat 2-6 same coverage as pass 16 audit; JPEG2000 / LERC / deflate / zstd / lz4 / packbits / LZW caps still in place; VRT _resample_nearest DstRect cap (#1737) merged; VRT path containment + DOCTYPE rejection in _safe_xml; CUDA kernels have bounds guards; mmap cache uses realpath; SSRF defenses on _HTTPSource."
 glcm,2026-04-24,1257,HIGH,1,,"HIGH (fixed #1257): glcm_texture() validated window_size only as >= 3 and distance only as >= 1, with no upper bound on either. _glcm_numba_kernel iterates range(r-half, r+half+1) for every pixel, so window_size=1_000_001 on a 10x10 raster ran ~10^14 loop iterations with all neighbors failing the interior bounds check (CPU DoS). On the dask backends depth = window_size // 2 + distance drove map_overlap padding, so a huge window also caused oversize per-chunk allocations (memory DoS). Fixed by adding max_val caps in the public entrypoint: window_size <= max(3, min(rows, cols)) and distance <= max(1, window_size // 2). One cap covers every backend because cupy and dask+cupy call through to the CPU kernel after cupy.asnumpy. No other HIGH findings: levels is already capped at 256 so the per-pixel np.zeros((levels, levels)) matrix in the kernel is bounded to 512 KB. No CUDA kernels. No file I/O. Quantization clips to [0, levels-1] before the kernel and NaN maps to -1 which the kernel filters with i_val >= 0. Entropy log(p) and correlation p / (std_i * std_j) are both guarded. All four backends use _validate_raster and cast to float64 before quantizing. MEDIUM (unfixed, Cat 1): the per-pixel np.zeros((levels, levels)) allocation inside the hot loop is a perf issue (levels=256 -> 512 KB alloc+free per pixel) but not a security issue because levels is bounded. Could be hoisted out of the loop or replaced with an in-place clear, but that is an efficiency concern, not security."
 gpu_rtx,2026-04-29,1308,HIGH,1,,"HIGH (fixed #1308 / PR #1310): hillshade_rtx (gpu_rtx/hillshade.py:184) and viewshed_gpu (gpu_rtx/viewshed.py:269) allocated cupy device buffers sized by raster shape with no memory check. create_triangulation (mesh_utils.py:23-24) adds verts (12 B/px) + triangles (24 B/px) = 36 B/px; hillshade_rtx adds d_rays(32) + d_hits(16) + d_aux(12) + d_output(4) = 64 B/px (100 B/px total); viewshed_gpu adds d_rays(32) + d_hits(16) + d_visgrid(4) + d_vsrays(32) = 84 B/px (120 B/px total). A 30000x30000 raster asked for 90-108 GB of VRAM before cupy surfaced an opaque allocator error. Fixed by adding gpu_rtx/_memory.py with _available_gpu_memory_bytes() and _check_gpu_memory(func_name, h, w) helpers (cost_distance #1262 / sky_view_factor #1299 pattern, 120 B/px budget covers worst case, raises MemoryError when required > 50% of free VRAM, skips silently when memGetInfo() unavailable). Wired into both entry points after the cupy.ndarray type check and before create_triangulation. 9 new tests in test_gpu_rtx_memory.py (5 helper-unit + 4 end-to-end gated on has_rtx). All 81 existing hillshade/viewshed tests still pass. Cat 4 clean: all CUDA kernels (hillshade.py:25/62/106, viewshed.py:32/74/116, mesh_utils.py:50) have bounds guards; no shared memory, no syncthreads needed. MEDIUM not fixed (Cat 6): hillshade_rtx and viewshed_gpu do not call _validate_raster directly but parent hillshade() (hillshade.py:252) and viewshed() (viewshed.py:1707) already validate, so input validation runs before the gpu_rtx entry point - defense-in-depth, not exploitable. MEDIUM not fixed (Cat 2): mesh_utils.py:64-68 cast mesh_map_index to int32 in the triangle index buffer; overflows at H*W > 2.1B vertices (~46341x46341+) but the new memory guard rejects rasters that large first - documentation/clarity item rather than exploitable. MEDIUM not fixed (Cat 3): mesh_utils.py:19 scale = maxDim / maxH divides by zero on an all-zero raster, propagating inf/NaN into mesh vertex z-coords; separate follow-up. LOW not fixed (Cat 5): mesh_utils.write() opens user-supplied path without canonicalization but its only call site (mesh_utils.py:38-39) sits behind if False: in create_triangulation, not reachable in production."
 hillshade,2026-04-27,,,,,"Clean. Cat 1: only allocation is the output np.empty(data.shape) at line 32 (cupy at line 165) and a _pad_array with hardcoded depth=1 (line 62) -- bounded by caller, no user-controlled amplifier. Azimuth/altitude are scalars and don't drive size. Cat 2: numba kernel uses range(1, rows-1) with simple (y, x) indexing; numba range loops promote to int64. Cat 3: math.sqrt(1.0 + xx_plus_yy) is always >= 1.0 (no neg sqrt, no div-by-zero); NaN elevation propagates correctly through dz_dx/dz_dy -> shaded -> output (the shaded < 0.0 / shaded > 1.0 clamps don't fire on NaN). Azimuth validated to [0, 360], altitude to [0, 90]. Cat 4: _gpu_calc_numba (line 107) guards both grid bounds and 3x3 stencil reads via i > 0 and i < shape[0]-1 and j > 0 and j < shape[1]-1; no shared memory. Cat 5: no file I/O. Cat 6: hillshade() calls _validate_raster (line 252) and _validate_scalar for both azimuth (253) and angle_altitude (254); all four backend paths cast to float32; tests parametrize int32/int64/float32/float64."

From d87e7806b4e551d2d0760e5b0d53d6cefc6da24a Mon Sep 17 00:00:00 2001
From: Brendan Collins <brendancol@gmail.com>
Date: Wed, 13 May 2026 07:10:19 -0700
Subject: [PATCH 2/4] geotiff: pre-decode bomb cap for JPEG codec (#1792)

jpeg_decompress accepted width/height/samples but never consulted
them before letting Pillow allocate the decoded buffer. A crafted
TIFF could declare a small tile (256x256, ~196 KiB expected) while
shipping a JPEG payload whose SOF marker declared a much larger
image, allocating up to ~500 MB per tile before Pillow's own
DecompressionBombError fires at 178M pixels.

Mirror the JP2K SIZ pre-check (#1625) and LERC blob-info pre-check:
parse the JPEG SOF marker without decoding pixels, and reject when
declared output exceeds expected * 1.05 + 1 bytes. Default kwargs
(width=0, height=0) disable the cap so direct callers and round-trip
tests keep working.

Tests cover (1) a SOF-rewritten JPEG raising on the bomb cap, (2) a
legitimate JPEG decoding normally, (3) the no-cap fallback for direct
callers, and (4) a JPEG without a parseable SOF deferring to Pillow.
---
 xrspatial/geotiff/_compression.py             | 116 +++++++++++++++
 .../geotiff/tests/test_decompression_caps.py  | 134 ++++++++++++++++++
 2 files changed, 250 insertions(+)

diff --git a/xrspatial/geotiff/_compression.py b/xrspatial/geotiff/_compression.py
index 3be9400c..2258c629 100644
--- a/xrspatial/geotiff/_compression.py
+++ b/xrspatial/geotiff/_compression.py
@@ -1039,6 +1039,109 @@ def _splice_jpeg_tables(tile_data: bytes,
     return tile_data[:2] + tables_body + tile_data[2:]
 
 
+# JPEG Start-Of-Frame marker codes: 0xFFC0..0xFFCF *except* DHT (0xC4),
+# JPG (0xC8), and DAC (0xCC). The payload format is the same across every
+# SOF variant: 2-byte segment length, 1-byte sample precision, 2-byte
+# image height (big-endian), 2-byte image width (big-endian), 1-byte
+# component count.
+_JPEG_SOF_CODES = frozenset(
+    {0xC0, 0xC1, 0xC2, 0xC3, 0xC5, 0xC6, 0xC7,
+     0xC9, 0xCA, 0xCB, 0xCD, 0xCE, 0xCF}
+)
+
+
+def _read_jpeg_sof(data: bytes) -> tuple[int, int, int] | None:
+    """Return ``(height, width, components)`` from the first JPEG SOF marker.
+
+    Scans *data* for the first Start-Of-Frame marker without decoding any
+    pixel data. Returns ``None`` when the buffer is too short, when the
+    SOI marker is missing, when a length-prefixed segment runs off the
+    end, or when no SOF is found before EOI/end-of-buffer. The caller
+    treats a ``None`` return as "could not determine declared size" and
+    falls back to Pillow's own guard.
+
+    Used by :func:`jpeg_decompress` to enforce a pre-decode bomb cap
+    analogous to the JP2K SIZ pre-check and the LERC ``getLercBlobInfo``
+    pre-check.
+    """
+    n = len(data)
+    if n < 4 or data[0] != 0xFF or data[1] != 0xD8:
+        return None
+    i = 2
+    while i + 3 < n:
+        if data[i] != 0xFF:
+            return None
+        # Skip JPEG fill bytes (0xFF padding).
+        marker = data[i + 1]
+        while marker == 0xFF and i + 2 < n:
+            i += 1
+            marker = data[i + 1]
+        # Standalone markers without payload: SOI, EOI, RSTm, TEM.
+        # Encountering EOI before SOF means the stream is malformed for
+        # this purpose; caller falls back to Pillow.
+        if marker == 0xD9:  # EOI
+            return None
+        if marker in (0xD8,) or 0xD0 <= marker <= 0xD7 or marker == 0x01:
+            i += 2
+            continue
+        if marker in _JPEG_SOF_CODES:
+            if i + 9 >= n:
+                return None
+            height = (data[i + 5] << 8) | data[i + 6]
+            width = (data[i + 7] << 8) | data[i + 8]
+            components = data[i + 9]
+            return height, width, components
+        # Length-prefixed segment: payload length includes the two length
+        # bytes themselves but not the marker.
+        if i + 3 >= n:
+            return None
+        seg_len = (data[i + 2] << 8) | data[i + 3]
+        if seg_len < 2:
+            return None
+        i += 2 + seg_len
+    return None
+
+
+def _check_jpeg_bomb(data: bytes, expected_width: int, expected_height: int,
+                     expected_samples: int) -> None:
+    """Reject JPEG blobs whose SOF dimensions exceed the expected tile size.
+
+    The caller passes the TIFF-declared tile width/height/samples; we
+    parse the JPEG SOF marker to discover the JPEG's own declared
+    dimensions and refuse to decode when the projected output exceeds
+    the same ``expected * 1.05 + 1`` margin used by every other codec
+    wrapper. Skipping when any expected value is non-positive matches
+    the convention from the other codecs: callers that don't supply a
+    size fall back to Pillow's built-in ``DecompressionBombError``
+    guard.
+
+    A return of ``None`` from :func:`_read_jpeg_sof` is treated as
+    "could not determine declared size"; in that case we defer to
+    Pillow rather than raising, so legitimate streams with unusual
+    framing still decode.
+    """
+    if expected_width <= 0 or expected_height <= 0 or expected_samples <= 0:
+        return
+    info = _read_jpeg_sof(data)
+    if info is None:
+        return
+    declared_h, declared_w, declared_components = info
+    # JPEG component count of 1 (grey) or 3 (YCbCr/RGB) is the common
+    # case; cap at the caller-declared sample count regardless. Most
+    # 4-component JPEGs in TIFFs ship CMYK, but the bomb check just
+    # needs an upper bound on bytes per pixel.
+    expected_pixels = expected_width * expected_height * expected_samples
+    declared_pixels = declared_w * declared_h * max(declared_components, 1)
+    cap = _max_output_with_margin(expected_pixels)
+    if declared_pixels > cap:
+        raise ValueError(
+            f"jpeg decode would exceed expected size: declared output is "
+            f"{declared_pixels} bytes ({declared_w}x{declared_h}x"
+            f"{declared_components}), cap is {cap} (expected "
+            f"{expected_pixels}). Likely a decompression bomb."
+        )
+
+
 def jpeg_decompress(data: bytes, width: int = 0, height: int = 0,
                     samples: int = 1, jpeg_tables: bytes | None = None) -> bytes:
     """Decompress JPEG tile/strip data. Requires Pillow.
@@ -1048,6 +1151,13 @@ def jpeg_decompress(data: bytes, width: int = 0, height: int = 0,
     data : bytes
         Raw JPEG bytes from one TIFF strip or tile. May be a fragment
         when ``jpeg_tables`` is supplied (GDAL tiled JPEG).
+    width, height, samples : int, optional
+        TIFF-declared tile dimensions. When all three are positive the
+        wrapper inspects the JPEG SOF marker before decoding and raises
+        ``ValueError`` when the JPEG's declared output exceeds
+        ``width * height * samples * 1.05 + 1`` bytes
+        (decompression-bomb guard). Defaults of 0/0/1 disable the cap
+        for direct callers and round-trip tests.
     jpeg_tables : bytes, optional
         Contents of TIFF tag 347 (JPEGTables). If supplied, the shared
         DQT/DHT segments are spliced into ``data`` before decoding so
@@ -1060,6 +1170,12 @@ def jpeg_decompress(data: bytes, width: int = 0, height: int = 0,
     import io
     if jpeg_tables:
         data = _splice_jpeg_tables(data, jpeg_tables)
+    # Pre-decode bomb cap (issue #1792). Pillow's built-in
+    # DecompressionBombError fires at ~178M pixels (~500 MB RGB) which
+    # is well above a typical tile's expected size; without this
+    # per-tile pre-check a single malicious tile can allocate hundreds
+    # of MB before the downstream chunk.size != expected check fires.
+    _check_jpeg_bomb(data, width, height, samples)
     img = Image.open(io.BytesIO(data))
     # libjpeg already converts YCbCr->RGB during decode, so rely on the
     # mode Pillow returns. Calling .convert() unnecessarily would copy.
diff --git a/xrspatial/geotiff/tests/test_decompression_caps.py b/xrspatial/geotiff/tests/test_decompression_caps.py
index 09f49326..f5310ba7 100644
--- a/xrspatial/geotiff/tests/test_decompression_caps.py
+++ b/xrspatial/geotiff/tests/test_decompression_caps.py
@@ -49,6 +49,7 @@ def _module_available(name: str) -> bool:
 _HAS_LZ4 = _module_available("lz4.frame")
 _HAS_LERC = _module_available("lerc")
 _HAS_GLYMUR = _module_available("glymur")
+_HAS_PILLOW = _module_available("PIL")
 
 
 # ---------------------------------------------------------------------------
@@ -473,3 +474,136 @@ def __getitem__(self, _):
             _compression.jpeg2000_decompress(
                 blob, width=64, height=64, samples=1,
                 expected_size=arr.nbytes)
+
+
+# ---------------------------------------------------------------------------
+# JPEG (issue #1792)
+# ---------------------------------------------------------------------------
+#
+# Pillow has its own DecompressionBombError, but it fires only at
+# ~178M pixels (~500 MB RGB). A malicious TIFF can declare a small tile
+# (e.g. 256x256 RGB, ~196 KiB expected) while shipping a JPEG payload
+# whose SOF marker declares a much larger image; that lets ~500 MB
+# allocate per tile before the downstream chunk.size != expected
+# reshape check fires. ``jpeg_decompress`` now parses the JPEG SOF
+# marker and raises before handing the blob to Pillow when the
+# declared output exceeds ``expected * 1.05 + 1`` bytes. See
+# https://github.com/xarray-contrib/xarray-spatial/issues/1792 .
+
+def _forge_jpeg_with_sof_dimensions(real_h: int, real_w: int,
+                                    real_c: int,
+                                    declared_h: int,
+                                    declared_w: int) -> bytes:
+    """Build a real JPEG and rewrite the SOF marker's H/W fields.
+
+    Pillow encodes a small valid image so the bytestream is a complete
+    JPEG; we then overwrite the height/width fields in the SOF segment
+    so the SOF claims a much larger image than the payload can decode.
+    The decoder never gets the chance to fail on the mismatch because
+    the pre-decode cap fires first -- which is the property under test.
+    """
+    from PIL import Image
+    import io
+    mode = 'RGB' if real_c == 3 else 'L'
+    img = Image.new(mode, (real_w, real_h), color=0)
+    buf = io.BytesIO()
+    img.save(buf, format='JPEG', quality=75)
+    data = bytearray(buf.getvalue())
+    # Find SOF0..SOF3,SOF5..SOF7,SOF9..SOF11,SOF13..SOF15 marker.
+    sof_codes = {
+        0xC0, 0xC1, 0xC2, 0xC3, 0xC5, 0xC6, 0xC7,
+        0xC9, 0xCA, 0xCB, 0xCD, 0xCE, 0xCF,
+    }
+    i = 2
+    n = len(data)
+    while i + 3 < n:
+        if data[i] != 0xFF:
+            raise AssertionError("forged JPEG lost marker alignment")
+        marker = data[i + 1]
+        if marker in sof_codes:
+            # SOF: 0xFF Cx | len(2) | precision(1) | H(2) | W(2) | components(1)
+            data[i + 5] = (declared_h >> 8) & 0xFF
+            data[i + 6] = declared_h & 0xFF
+            data[i + 7] = (declared_w >> 8) & 0xFF
+            data[i + 8] = declared_w & 0xFF
+            return bytes(data)
+        if marker == 0xD9 or 0xD0 <= marker <= 0xD7 or marker == 0x01:
+            i += 2
+            continue
+        seg_len = (data[i + 2] << 8) | data[i + 3]
+        i += 2 + seg_len
+    raise AssertionError("forged JPEG has no SOF marker")
+
+
+@pytest.mark.skipif(not _HAS_PILLOW, reason="Pillow not installed")
+class TestJpegDirect:
+    def test_jpeg_bomb_raises(self):
+        """A JPEG whose SOF dimensions exceed the per-tile cap must raise.
+
+        The forged JPEG payload itself is small (a real 16x16 image with
+        the SOF marker rewritten to declare 8000x8000x3). The wrapper
+        is given a per-tile expected size of 32x32x3 = 3072 bytes; the
+        declared output 8000*8000*3 = 192_000_000 bytes is well above
+        the ``3072 * 1.05 + 1`` cap and the decode must be refused.
+        """
+        blob = _forge_jpeg_with_sof_dimensions(
+            real_h=16, real_w=16, real_c=3,
+            declared_h=8000, declared_w=8000,
+        )
+        from xrspatial.geotiff._compression import jpeg_decompress
+        with pytest.raises(ValueError, match="exceed"):
+            jpeg_decompress(blob, width=32, height=32, samples=3)
+
+    def test_jpeg_legitimate_passes(self):
+        """A JPEG whose SOF dimensions match the expected tile size passes."""
+        from PIL import Image
+        import io
+        img = Image.new('RGB', (32, 32), color=(10, 20, 30))
+        buf = io.BytesIO()
+        img.save(buf, format='JPEG', quality=90)
+        from xrspatial.geotiff._compression import jpeg_decompress
+        out = jpeg_decompress(buf.getvalue(), width=32, height=32, samples=3)
+        arr = np.frombuffer(out, dtype=np.uint8).reshape(32, 32, 3)
+        assert arr.shape == (32, 32, 3)
+
+    def test_jpeg_no_cap_when_size_kwargs_default(self):
+        """Backward-compat: omitting size kwargs falls back to Pillow's guard.
+
+        Direct callers and round-trip tests pass no dimensions; the
+        pre-check must be a no-op so those keep working.
+        """
+        blob = _forge_jpeg_with_sof_dimensions(
+            real_h=16, real_w=16, real_c=3,
+            declared_h=64, declared_w=64,
+        )
+        from xrspatial.geotiff._compression import jpeg_decompress
+        # With no dimension kwargs, the cap is disabled. The forged JPEG
+        # declares 64x64 but encodes only 16x16 of payload -- libjpeg
+        # raises on the truncation; the bomb cap is what we're checking
+        # is *not* the source of any exception here. Catch whatever
+        # Pillow raises and assert it isn't our bomb message.
+        from PIL import Image as _Img  # noqa: F401
+        try:
+            jpeg_decompress(blob)
+        except ValueError as exc:
+            assert "Likely a decompression bomb" not in str(exc)
+        except Exception:
+            # libjpeg/Pillow errors are acceptable -- we only care that
+            # the bomb cap did not fire.
+            pass
+
+    def test_jpeg_malformed_falls_through_to_pillow(self):
+        """A JPEG without a parseable SOF defers to Pillow's own guard.
+
+        We don't want the pre-check to misclassify weird-but-valid
+        streams; if the helper can't read the SOF it should return
+        ``None`` and let Pillow raise its own error.
+        """
+        # SOI followed by EOI -- a syntactically valid but empty stream
+        # with no SOF marker.
+        blob = bytes([0xFF, 0xD8, 0xFF, 0xD9])
+        from xrspatial.geotiff._compression import jpeg_decompress
+        # No SOF -> bomb cap returns None -> Pillow raises on the empty
+        # stream.
+        with pytest.raises(Exception):
+            jpeg_decompress(blob, width=32, height=32, samples=3)

From 5ec8ef0ecf281d8fa4767cc38c8537ecc7d17ed2 Mon Sep 17 00:00:00 2001
From: Brendan Collins <brendancol@gmail.com>
Date: Wed, 13 May 2026 07:10:48 -0700
Subject: [PATCH 3/4] sweep-security state: record issue #1792 for geotiff JPEG
 bomb cap

---
 .claude/sweep-security-state.csv | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/.claude/sweep-security-state.csv b/.claude/sweep-security-state.csv
index d45abd48..5f97061b 100644
--- a/.claude/sweep-security-state.csv
+++ b/.claude/sweep-security-state.csv
@@ -18,7 +18,7 @@ fire,2026-04-25,,,,,"Clean. Despite the module's size hint, fire.py is purely pe
 flood,2026-05-03,1437,MEDIUM,3,,Re-audit 2026-05-03. MEDIUM Cat 3 fixed in PR #1438 (travel_time and flood_depth_vegetation now validate mannings_n DataArray values are finite and strictly positive via _validate_mannings_n_dataarray helper). No remaining unfixed findings. Other categories clean: every allocation is same-shape as input; no flat index math; NaN propagation explicit in every backend; tan_slope clamped by _TAN_MIN; no CUDA kernels; no file I/O; every public API calls _validate_raster on DataArray inputs.
 focal,2026-04-27,1284,HIGH,1,,"HIGH (fixed PR #1286): apply(), focal_stats(), and hotspots() accepted unbounded user-supplied kernels via custom_kernel(), which only checks shape parity. The kernel-size guard from #1241 (_check_kernel_memory) only ran inside circle_kernel/annulus_kernel, so a (50001, 50001) custom kernel on a 10x10 raster allocated ~10 GB on the kernel itself plus a much larger padded raster before any work -- same shape as the bilateral DoS in #1236. Fixed by adding _check_kernel_vs_raster_memory in focal.py and wiring it into apply(), focal_stats(), and hotspots() after custom_kernel() validation. All 134 focal tests + 19 bilateral tests pass. No other findings: 10 CUDA kernels all have proper bounds + stencil guards; _validate_raster called on every public entry point; hotspots already raises ZeroDivisionError on constant-value rasters; _focal_variety_cuda uses a fixed-size local buffer (silent truncation but bounded); _focal_std_cuda/_focal_var_cuda clamp the catastrophic-cancellation case via if var < 0.0: var = 0.0; no file I/O."
 geodesic,2026-04-27,1283,HIGH,1,,"HIGH (fixed PR #1285): slope(method='geodesic') and aspect(method='geodesic') stack a (3, H, W) float64 array (data, lat, lon) before dispatch with no memory check. A large lat/lon-tagged raster passed to either function would OOM. Fixed by adding _check_geodesic_memory(rows, cols) in xrspatial/geodesic.py (mirrors morphology._check_kernel_memory): budgets 56 bytes/cell (24 stacked float64 + 4 float32 output + 24 padded copy + slack) and raises MemoryError when > 50% of available RAM; called from slope.py and aspect.py inside the geodesic branch before dispatch. No other findings: 6 CUDA kernels all have bounds guards (e.g. _run_gpu_geodesic_aspect at geodesic.py:395), custom 16x16 thread blocks avoid register spill, no shared memory, _validate_raster runs upstream in slope/aspect, all backends cast to float32, slope_mag < 1e-7 flat threshold prevents arctan2 NaN propagation, curvature correction uses hardcoded WGS84 R."
-geotiff,2026-05-13,,MEDIUM,1,,"Re-audit pass 17 2026-05-13 (deep-sweep s2). NEW MEDIUM (Cat 1): jpeg_decompress (_compression.py:1042-1066) hands attacker-controlled JPEG bytes to Pillow without consulting the declared tile width/height/samples; a tile-size mismatch lets a small JPEG payload allocate up to Pillow's MAX_IMAGE_PIXELS*2 (~178M pixels, ~500 MB RGB) before the downstream chunk.size != expected check fires. Asymmetric with the JP2K SIZ pre-check and LERC blob-info pre-check. Pillow's default DecompressionBombError is a partial guard so severity is MEDIUM. Other categories verified clean: Cat 2-6 same coverage as pass 16 audit; JPEG2000 / LERC / deflate / zstd / lz4 / packbits / LZW caps still in place; VRT _resample_nearest DstRect cap (#1737) merged; VRT path containment + DOCTYPE rejection in _safe_xml; CUDA kernels have bounds guards; mmap cache uses realpath; SSRF defenses on _HTTPSource."
+geotiff,2026-05-13,1792,MEDIUM,1,,"Re-audit pass 17 2026-05-13 (deep-sweep s2). NEW MEDIUM (Cat 1): jpeg_decompress (_compression.py:1042-1066) hands attacker-controlled JPEG bytes to Pillow without consulting the declared tile width/height/samples; a tile-size mismatch lets a small JPEG payload allocate up to Pillow's MAX_IMAGE_PIXELS*2 (~178M pixels, ~500 MB RGB) before the downstream chunk.size != expected check fires. Asymmetric with the JP2K SIZ pre-check and LERC blob-info pre-check. Pillow's default DecompressionBombError is a partial guard so severity is MEDIUM. Other categories verified clean: Cat 2-6 same coverage as pass 16 audit; JPEG2000 / LERC / deflate / zstd / lz4 / packbits / LZW caps still in place; VRT _resample_nearest DstRect cap (#1737) merged; VRT path containment + DOCTYPE rejection in _safe_xml; CUDA kernels have bounds guards; mmap cache uses realpath; SSRF defenses on _HTTPSource."
 glcm,2026-04-24,1257,HIGH,1,,"HIGH (fixed #1257): glcm_texture() validated window_size only as >= 3 and distance only as >= 1, with no upper bound on either. _glcm_numba_kernel iterates range(r-half, r+half+1) for every pixel, so window_size=1_000_001 on a 10x10 raster ran ~10^14 loop iterations with all neighbors failing the interior bounds check (CPU DoS). On the dask backends depth = window_size // 2 + distance drove map_overlap padding, so a huge window also caused oversize per-chunk allocations (memory DoS). Fixed by adding max_val caps in the public entrypoint: window_size <= max(3, min(rows, cols)) and distance <= max(1, window_size // 2). One cap covers every backend because cupy and dask+cupy call through to the CPU kernel after cupy.asnumpy. No other HIGH findings: levels is already capped at 256 so the per-pixel np.zeros((levels, levels)) matrix in the kernel is bounded to 512 KB. No CUDA kernels. No file I/O. Quantization clips to [0, levels-1] before the kernel and NaN maps to -1 which the kernel filters with i_val >= 0. Entropy log(p) and correlation p / (std_i * std_j) are both guarded. All four backends use _validate_raster and cast to float64 before quantizing. MEDIUM (unfixed, Cat 1): the per-pixel np.zeros((levels, levels)) allocation inside the hot loop is a perf issue (levels=256 -> 512 KB alloc+free per pixel) but not a security issue because levels is bounded. Could be hoisted out of the loop or replaced with an in-place clear, but that is an efficiency concern, not security."
 gpu_rtx,2026-04-29,1308,HIGH,1,,"HIGH (fixed #1308 / PR #1310): hillshade_rtx (gpu_rtx/hillshade.py:184) and viewshed_gpu (gpu_rtx/viewshed.py:269) allocated cupy device buffers sized by raster shape with no memory check. create_triangulation (mesh_utils.py:23-24) adds verts (12 B/px) + triangles (24 B/px) = 36 B/px; hillshade_rtx adds d_rays(32) + d_hits(16) + d_aux(12) + d_output(4) = 64 B/px (100 B/px total); viewshed_gpu adds d_rays(32) + d_hits(16) + d_visgrid(4) + d_vsrays(32) = 84 B/px (120 B/px total). A 30000x30000 raster asked for 90-108 GB of VRAM before cupy surfaced an opaque allocator error. Fixed by adding gpu_rtx/_memory.py with _available_gpu_memory_bytes() and _check_gpu_memory(func_name, h, w) helpers (cost_distance #1262 / sky_view_factor #1299 pattern, 120 B/px budget covers worst case, raises MemoryError when required > 50% of free VRAM, skips silently when memGetInfo() unavailable). Wired into both entry points after the cupy.ndarray type check and before create_triangulation. 9 new tests in test_gpu_rtx_memory.py (5 helper-unit + 4 end-to-end gated on has_rtx). All 81 existing hillshade/viewshed tests still pass. Cat 4 clean: all CUDA kernels (hillshade.py:25/62/106, viewshed.py:32/74/116, mesh_utils.py:50) have bounds guards; no shared memory, no syncthreads needed. MEDIUM not fixed (Cat 6): hillshade_rtx and viewshed_gpu do not call _validate_raster directly but parent hillshade() (hillshade.py:252) and viewshed() (viewshed.py:1707) already validate, so input validation runs before the gpu_rtx entry point - defense-in-depth, not exploitable. MEDIUM not fixed (Cat 2): mesh_utils.py:64-68 cast mesh_map_index to int32 in the triangle index buffer; overflows at H*W > 2.1B vertices (~46341x46341+) but the new memory guard rejects rasters that large first - documentation/clarity item rather than exploitable. MEDIUM not fixed (Cat 3): mesh_utils.py:19 scale = maxDim / maxH divides by zero on an all-zero raster, propagating inf/NaN into mesh vertex z-coords; separate follow-up. LOW not fixed (Cat 5): mesh_utils.write() opens user-supplied path without canonicalization but its only call site (mesh_utils.py:38-39) sits behind if False: in create_triangulation, not reachable in production."
 hillshade,2026-04-27,,,,,"Clean. Cat 1: only allocation is the output np.empty(data.shape) at line 32 (cupy at line 165) and a _pad_array with hardcoded depth=1 (line 62) -- bounded by caller, no user-controlled amplifier. Azimuth/altitude are scalars and don't drive size. Cat 2: numba kernel uses range(1, rows-1) with simple (y, x) indexing; numba range loops promote to int64. Cat 3: math.sqrt(1.0 + xx_plus_yy) is always >= 1.0 (no neg sqrt, no div-by-zero); NaN elevation propagates correctly through dz_dx/dz_dy -> shaded -> output (the shaded < 0.0 / shaded > 1.0 clamps don't fire on NaN). Azimuth validated to [0, 360], altitude to [0, 90]. Cat 4: _gpu_calc_numba (line 107) guards both grid bounds and 3x3 stencil reads via i > 0 and i < shape[0]-1 and j > 0 and j < shape[1]-1; no shared memory. Cat 5: no file I/O. Cat 6: hillshade() calls _validate_raster (line 252) and _validate_scalar for both azimuth (253) and angle_altitude (254); all four backend paths cast to float32; tests parametrize int32/int64/float32/float64."

From e14d83c950809886d80d5149b9066c1d07a0541b Mon Sep 17 00:00:00 2001
From: Brendan Collins <brendancol@gmail.com>
Date: Wed, 13 May 2026 07:59:06 -0700
Subject: [PATCH 4/4] geotiff: validate JPEG SOF segment length and clarify
 bomb-check units (#1792)

Addresses Copilot review on PR #1793.

1. ``_read_jpeg_sof`` now parses the SOF segment length and refuses
   the segment when ``seg_len < 8`` or the segment runs past EOF.
   Pre-fix, a truncated SOF could push the fixed-offset H/W/components
   reads past the segment boundary into later bytes, producing
   misleading dimensions.

2. Renamed ``expected_pixels``/``declared_pixels`` to
   ``expected_bytes``/``declared_bytes`` since they're compared
   against ``_max_output_with_margin()`` (byte cap) and reported as
   bytes in the error message. Updated the surrounding comment to
   describe the actual computation (caller-declared expected vs SOF-
   declared output) instead of the inaccurate "regardless of components"
   wording.

3. ``test_jpeg_bomb_raises`` now matches the full diagnostic
   ``"jpeg decode would exceed.*Likely a decompression bomb"`` so a
   regression that swaps in a different error path (numeric overflow,
   Pillow's own DecompressionBombError) fails the test instead of
   silently passing.

4. Added two tests for the new SOF length-validation branch:
   truncated segment lengths and below-minimum lengths both return
   None.
---
 xrspatial/geotiff/_compression.py             | 38 ++++++++++----
 .../geotiff/tests/test_decompression_caps.py  | 50 ++++++++++++++++++-
 2 files changed, 76 insertions(+), 12 deletions(-)

diff --git a/xrspatial/geotiff/_compression.py b/xrspatial/geotiff/_compression.py
index 2258c629..f1ce488b 100644
--- a/xrspatial/geotiff/_compression.py
+++ b/xrspatial/geotiff/_compression.py
@@ -1085,7 +1085,19 @@ def _read_jpeg_sof(data: bytes) -> tuple[int, int, int] | None:
             i += 2
             continue
         if marker in _JPEG_SOF_CODES:
-            if i + 9 >= n:
+            # SOF is a length-prefixed segment; validate the declared
+            # segment length before reading the fixed fields so a
+            # truncated/malformed segment header is rejected as "unknown
+            # size" rather than silently reading past the segment into
+            # later bytes. The fields we need are at offsets 5..9 in the
+            # segment, which sits inside the declared seg_len bytes.
+            if i + 3 >= n:
+                return None
+            seg_len = (data[i + 2] << 8) | data[i + 3]
+            # SOF requires at least: 2 (length) + 1 (precision) + 2 (h)
+            # + 2 (w) + 1 (components) = 8 bytes; the segment must also
+            # fit inside the buffer.
+            if seg_len < 8 or i + 2 + seg_len > n:
                 return None
             height = (data[i + 5] << 8) | data[i + 6]
             width = (data[i + 7] << 8) | data[i + 8]
@@ -1126,19 +1138,23 @@ def _check_jpeg_bomb(data: bytes, expected_width: int, expected_height: int,
     if info is None:
         return
     declared_h, declared_w, declared_components = info
-    # JPEG component count of 1 (grey) or 3 (YCbCr/RGB) is the common
-    # case; cap at the caller-declared sample count regardless. Most
-    # 4-component JPEGs in TIFFs ship CMYK, but the bomb check just
-    # needs an upper bound on bytes per pixel.
-    expected_pixels = expected_width * expected_height * expected_samples
-    declared_pixels = declared_w * declared_h * max(declared_components, 1)
-    cap = _max_output_with_margin(expected_pixels)
-    if declared_pixels > cap:
+    # JPEG components ship as 8-bit samples in every TIFF JPEG we
+    # support, so ``width * height * components`` equals the post-decode
+    # byte count exactly. JPEG-12 (12-bit precision) would round up to
+    # 2 bytes per sample, but tifffile/Pillow doesn't surface those on
+    # the read path so we treat samples as bytes here. The expected
+    # side uses what the *caller* declared (TIFF tile size and
+    # samples-per-pixel), so a JPEG claiming extra components is
+    # rejected as a bomb.
+    expected_bytes = expected_width * expected_height * expected_samples
+    declared_bytes = declared_w * declared_h * max(declared_components, 1)
+    cap = _max_output_with_margin(expected_bytes)
+    if declared_bytes > cap:
         raise ValueError(
             f"jpeg decode would exceed expected size: declared output is "
-            f"{declared_pixels} bytes ({declared_w}x{declared_h}x"
+            f"{declared_bytes} bytes ({declared_w}x{declared_h}x"
             f"{declared_components}), cap is {cap} (expected "
-            f"{expected_pixels}). Likely a decompression bomb."
+            f"{expected_bytes}). Likely a decompression bomb."
         )
 
 
diff --git a/xrspatial/geotiff/tests/test_decompression_caps.py b/xrspatial/geotiff/tests/test_decompression_caps.py
index f5310ba7..b3aae94e 100644
--- a/xrspatial/geotiff/tests/test_decompression_caps.py
+++ b/xrspatial/geotiff/tests/test_decompression_caps.py
@@ -551,7 +551,14 @@ def test_jpeg_bomb_raises(self):
             declared_h=8000, declared_w=8000,
         )
         from xrspatial.geotiff._compression import jpeg_decompress
-        with pytest.raises(ValueError, match="exceed"):
+        # Match the full diagnostic so a regression that swaps in a
+        # different error path (e.g. Pillow's own DecompressionBombError
+        # with a different wording, or a numeric overflow before the
+        # explicit guard) fails the test instead of silently passing.
+        with pytest.raises(
+            ValueError,
+            match=r"jpeg decode would exceed.*Likely a decompression bomb",
+        ):
             jpeg_decompress(blob, width=32, height=32, samples=3)
 
     def test_jpeg_legitimate_passes(self):
@@ -607,3 +614,44 @@ def test_jpeg_malformed_falls_through_to_pillow(self):
         # stream.
         with pytest.raises(Exception):
             jpeg_decompress(blob, width=32, height=32, samples=3)
+
+    def test_jpeg_sof_with_truncated_segment_length_returns_none(self):
+        """A SOF segment whose declared length runs past EOF returns None.
+
+        Without segment-length validation, ``_read_jpeg_sof`` would happily
+        read height/width/components at fixed offsets even when those
+        offsets pointed past the segment. The pre-check now demands
+        ``seg_len >= 8`` and ``i + 2 + seg_len <= n`` before reading;
+        truncated SOFs are treated as "unknown size" and the bomb cap
+        defers to Pillow.
+        """
+        from xrspatial.geotiff._compression import _read_jpeg_sof
+
+        # SOI | SOF0 | seg_len=64 (advertises 64 bytes of segment, but
+        # the buffer ends after only 10 bytes of segment payload).
+        # Truncation -> _read_jpeg_sof must return None.
+        truncated = bytes([
+            0xFF, 0xD8,                  # SOI
+            0xFF, 0xC0,                  # SOF0
+            0x00, 0x40,                  # seg_len = 64 (lies; buf is short)
+            0x08,                        # precision = 8
+            0x10, 0x00,                  # height = 4096
+            0x10, 0x00,                  # width  = 4096
+            0x03,                        # components = 3
+        ])
+        assert _read_jpeg_sof(truncated) is None
+
+    def test_jpeg_sof_with_segment_length_below_minimum_returns_none(self):
+        """A SOF segment whose declared length is < 8 returns None."""
+        from xrspatial.geotiff._compression import _read_jpeg_sof
+
+        too_small = bytes([
+            0xFF, 0xD8,                  # SOI
+            0xFF, 0xC0,                  # SOF0
+            0x00, 0x04,                  # seg_len = 4 (too small for SOF)
+            0x08,
+            0x10, 0x00,
+            0x10, 0x00,
+            0x03,
+        ])
+        assert _read_jpeg_sof(too_small) is None