From 3e08f34bb94113f88004e77337bd5e59cdb4ea81 Mon Sep 17 00:00:00 2001 From: Brendan Collins Date: Wed, 13 May 2026 10:13:49 -0700 Subject: [PATCH] geotiff: per-tile dim check uses default cap, not caller budget (#1823) PR #1803 forwarded the caller's max_pixels to read_to_array inside read_vrt's source loop so a tiny VRT output cannot force a huge source decode (#1796). The output-window check at the source read enforces that correctly. A separate per-tile dimension check at the same call sites also consumed the caller's max_pixels, so a caller setting max_pixels as an output budget (e.g. 10_000) failed the per-tile sanity check on any normal source whose default tile size is 256x256 (= 65_536 pixels). Use MAX_PIXELS_DEFAULT for the per-tile dim check at the two call sites in _read_tiles (local) and _read_tiles_cog_http (HTTP). The output-window check at the same functions continues to enforce the user-supplied max_pixels, preserving the #1796 protection. --- xrspatial/geotiff/_reader.py | 19 ++- .../tests/test_vrt_source_tile_check_1823.py | 113 ++++++++++++++++++ 2 files changed, 127 insertions(+), 5 deletions(-) create mode 100644 xrspatial/geotiff/tests/test_vrt_source_tile_check_1823.py diff --git a/xrspatial/geotiff/_reader.py b/xrspatial/geotiff/_reader.py index b3da32bd..c174c3fc 100644 --- a/xrspatial/geotiff/_reader.py +++ b/xrspatial/geotiff/_reader.py @@ -1560,9 +1560,14 @@ def _read_tiles(data: bytes, ifd: IFD, header: TIFFHeader, raise ValueError( f"Invalid tile dimensions: TileWidth={tw}, TileLength={th}") - # Reject crafted tile dims that would force huge per-tile allocations. - # A single tile's decoded bytes must also fit under the pixel budget. - _check_dimensions(tw, th, samples, max_pixels) + # Reject crafted tile dims (e.g. TileWidth = 2**31). This guards the + # TIFF header against malformed values; it is not the caller's output + # budget. The output-window check below uses ``max_pixels`` and is + # what enforces the user's per-call memory cap. The source-read path + # under ``read_vrt`` (#1796) relies on that output check to honour a + # small caller ``max_pixels`` against a normal-tile source; see + # #1823. + _check_dimensions(tw, th, samples, MAX_PIXELS_DEFAULT) # Per-tile compressed-byte cap (issue #1664). Same env var as the # HTTP path. mmap slicing is bounded by the file size, but the slice @@ -2016,10 +2021,14 @@ def _fetch_decode_cog_http_tiles( # A windowed HTTP read of a multi-billion-pixel COG only allocates # the window, so capping the full image would reject legitimate # tiled reads. The full-image cap still applies for whole-file - # reads (window is None). The single-tile budget always applies. + # reads (window is None). The per-tile dim check below guards the + # TIFF header against absurd ``TileWidth`` / ``TileLength`` values + # (e.g. 2**31) and uses ``MAX_PIXELS_DEFAULT`` so a caller's small + # ``max_pixels`` -- intended as an output-window budget -- does not + # reject normal 256x256 tiles. See #1823. if window is None: _check_dimensions(width, height, samples, max_pixels) - _check_dimensions(tw, th, samples, max_pixels) + _check_dimensions(tw, th, samples, MAX_PIXELS_DEFAULT) # Reject malformed TIFFs whose declared tile grid exceeds the supplied # TileOffsets length. See issue #1219. diff --git a/xrspatial/geotiff/tests/test_vrt_source_tile_check_1823.py b/xrspatial/geotiff/tests/test_vrt_source_tile_check_1823.py new file mode 100644 index 00000000..b02dd2fb --- /dev/null +++ b/xrspatial/geotiff/tests/test_vrt_source_tile_check_1823.py @@ -0,0 +1,113 @@ +"""Regression tests for #1823. + +PR #1803 forwarded the caller's ``max_pixels`` to ``read_to_array`` inside +the VRT source loop so that a tiny VRT output could not force a huge +source decode (#1796). The output-window check enforces that. A separate +per-tile dimension check was incorrectly using the same ``max_pixels`` +value, so a caller setting ``max_pixels`` as an output budget (e.g. +10,000) would also fail the per-tile sanity check on every normal source +whose default tile size is 256x256 (= 65,536 pixels). + +The #1796 protection remains: the output-window check still catches a +tiny VRT output that asks for a large source window. +""" +from __future__ import annotations + +import os +import tempfile + +import numpy as np +import pytest + +from xrspatial.geotiff import to_geotiff +from xrspatial.geotiff._reader import PixelSafetyLimitError +from xrspatial.geotiff._vrt import read_vrt + + +def _write_normal_tile_source(td: str) -> str: + """10x10 uint8 source -- ``to_geotiff`` pads to a 256x256 tile.""" + src = os.path.join(td, 'src.tif') + to_geotiff(np.zeros((10, 10), dtype=np.uint8), src, compression='none') + return src + + +def _write_vrt(td: str, *, dst_x_size: int, dst_y_size: int, + raster_x: int = 100, raster_y: int = 100, + src_x_size: int = 10, src_y_size: int = 10) -> str: + vrt = os.path.join(td, 'mosaic.vrt') + xml = ( + f'\n' + f' \n' + f' \n' + f' src.tif\n' + f' 1\n' + f' \n' + f' \n' + f' \n' + f' \n' + f'\n' + ) + with open(vrt, 'w') as f: + f.write(xml) + return vrt + + +class TestPerTileCheckDoesNotUseCallerBudget: + """Per-tile dim sanity must not reject normal 256x256 source tiles + when the caller's ``max_pixels`` is a small output-budget value.""" + + def test_normal_tile_source_with_small_max_pixels(self): + with tempfile.TemporaryDirectory(ignore_cleanup_errors=True) as td: + _write_normal_tile_source(td) + vrt = _write_vrt(td, dst_x_size=100, dst_y_size=100) + arr, _ = read_vrt(vrt, max_pixels=10_000) + assert arr.shape == (100, 100) + + def test_normal_tile_source_with_tiny_max_pixels(self): + """An output budget below a single tile must still succeed when + the requested output window itself fits.""" + with tempfile.TemporaryDirectory(ignore_cleanup_errors=True) as td: + _write_normal_tile_source(td) + # Output 5x5 = 25 pixels; max_pixels = 100 fits 25 with room. + vrt = _write_vrt(td, dst_x_size=5, dst_y_size=5, + raster_x=5, raster_y=5) + arr, _ = read_vrt(vrt, max_pixels=100) + assert arr.shape == (5, 5) + + +class TestOutputWindowCheckStillEnforced: + """The output-window check at the source read still rejects an + over-budget read; the #1796 protection is preserved.""" + + def test_output_window_exceeds_max_pixels_still_rejected(self): + with tempfile.TemporaryDirectory(ignore_cleanup_errors=True) as td: + src = os.path.join(td, 'src.tif') + to_geotiff(np.arange(16, dtype=np.uint8).reshape(4, 4), + src, compression='none') + vrt = _write_vrt(td, dst_x_size=1, dst_y_size=1, + raster_x=1, raster_y=1, + src_x_size=4, src_y_size=4) + # SrcRect 4x4 = 16 pixels > max_pixels=1 → output check fires. + with pytest.raises(ValueError, match="exceed"): + read_vrt(vrt, max_pixels=1) + + +class TestPerTileCheckStillRejectsCraftedHeader: + """A pathological ``TileWidth``/``TileLength`` must still fail at + the per-tile sanity check, which uses ``MAX_PIXELS_DEFAULT``.""" + + def test_per_tile_check_caps_at_default(self, monkeypatch): + """Lower ``MAX_PIXELS_DEFAULT`` to verify the per-tile call site + is wired to it (rather than to the caller's budget).""" + from xrspatial.geotiff import _reader as reader_mod + + monkeypatch.setattr(reader_mod, "MAX_PIXELS_DEFAULT", 100) + with tempfile.TemporaryDirectory(ignore_cleanup_errors=True) as td: + _write_normal_tile_source(td) + vrt = _write_vrt(td, dst_x_size=100, dst_y_size=100) + # 256x256 tile > patched MAX_PIXELS_DEFAULT=100 → per-tile + # check fires regardless of caller's max_pixels (1e9 here). + with pytest.raises(PixelSafetyLimitError, match="65,536"): + read_vrt(vrt, max_pixels=1_000_000_000)