From 05f6f0b682d4bb355dd6f41fe5b733540a978e30 Mon Sep 17 00:00:00 2001 From: Brendan Collins Date: Wed, 13 May 2026 08:47:00 -0700 Subject: [PATCH 1/3] geotiff: close HTTP source on tile-fetch exception (closes #1816) --- xrspatial/geotiff/_reader.py | 48 ++-- .../test_cog_http_close_on_error_1816.py | 227 ++++++++++++++++++ 2 files changed, 255 insertions(+), 20 deletions(-) create mode 100644 xrspatial/geotiff/tests/test_cog_http_close_on_error_1816.py diff --git a/xrspatial/geotiff/_reader.py b/xrspatial/geotiff/_reader.py index b3da32bd..3f99b955 100644 --- a/xrspatial/geotiff/_reader.py +++ b/xrspatial/geotiff/_reader.py @@ -1935,26 +1935,34 @@ def _read_cog_http(url: str, overview_level: int | None = None, f"band={band} out of range for " f"{ifd.samples_per_pixel}-band file.") - arr = _fetch_decode_cog_http_tiles( - source, header, ifd, max_pixels=max_pixels, window=window) - source.close() - - # Mirror the local-path band selection in ``read_to_array``: extract - # the requested band only after the array is materialised so the - # multi-band tile decode can populate every plane first. ``band`` - # outside the valid range raises ``IndexError`` the same as numpy. - if arr.ndim == 3 and ifd.samples_per_pixel > 1 and band is not None: - arr = arr[:, :, band] - - # Apply Orientation tag (274) so HTTP reads return the same pixel - # order and transform as the local-file path. Only the full-read - # branch reaches here; the windowed-read branch is rejected above - # for non-default orientation. See issue #1717. - if ifd.orientation != 1: - arr, geo_info = _apply_orientation_with_geo( - arr, geo_info, ifd.orientation) - - arr = _apply_photometric_miniswhite(arr, ifd) + # Issue #1816: wrap the tile fetch and post-processing in try/finally + # so ``source.close()`` runs even when the fetch/decode or the + # orientation/photometric step raises. ``_HTTPSource.close()`` is a + # no-op today, but a future resource-holding source would leak on + # the error path without this. The explicit ``source.close()`` calls + # in the validation block above stay as-is; ``close()`` is idempotent. + try: + arr = _fetch_decode_cog_http_tiles( + source, header, ifd, max_pixels=max_pixels, window=window) + + # Mirror the local-path band selection in ``read_to_array``: extract + # the requested band only after the array is materialised so the + # multi-band tile decode can populate every plane first. ``band`` + # outside the valid range raises ``IndexError`` the same as numpy. + if arr.ndim == 3 and ifd.samples_per_pixel > 1 and band is not None: + arr = arr[:, :, band] + + # Apply Orientation tag (274) so HTTP reads return the same pixel + # order and transform as the local-file path. Only the full-read + # branch reaches here; the windowed-read branch is rejected above + # for non-default orientation. See issue #1717. + if ifd.orientation != 1: + arr, geo_info = _apply_orientation_with_geo( + arr, geo_info, ifd.orientation) + + arr = _apply_photometric_miniswhite(arr, ifd) + finally: + source.close() return arr, geo_info diff --git a/xrspatial/geotiff/tests/test_cog_http_close_on_error_1816.py b/xrspatial/geotiff/tests/test_cog_http_close_on_error_1816.py new file mode 100644 index 00000000..dc97c82b --- /dev/null +++ b/xrspatial/geotiff/tests/test_cog_http_close_on_error_1816.py @@ -0,0 +1,227 @@ +"""Regression tests for issue #1816. + +``_read_cog_http`` called ``source.close()`` only on the success path: +the fetch/decode step plus the post-processing block (band slice, +orientation, photometric inversion) ran outside any ``try/finally``, +so a raise from any of them skipped the close. ``_HTTPSource.close()`` +is currently a no-op (a module-level urllib3 pool, not a per-source +resource), so the leak is latent rather than active, but the structure +needs the guard so a future resource-holding source does not silently +leak. + +These tests pin the close-on-error contract by injecting a wrapper +around ``_HTTPSource`` that records every ``close()`` call, then +exercising both the happy path and a tile-fetch failure path. +""" +from __future__ import annotations + +import http.server +import socketserver +import threading + +import numpy as np +import pytest + +from xrspatial.geotiff import _reader as reader_mod +from xrspatial.geotiff._reader import _read_cog_http +from xrspatial.geotiff._writer import write + + +# --------------------------------------------------------------------------- +# Loopback HTTP server with Range support (mirrors #1695 pattern). +# --------------------------------------------------------------------------- + +class _RangeHandler(http.server.BaseHTTPRequestHandler): + payload: bytes = b'' + + def do_GET(self): # noqa: N802 + rng = self.headers.get('Range') + if rng and rng.startswith('bytes='): + spec = rng[len('bytes='):] + start_s, _, end_s = spec.partition('-') + start = int(start_s) + end = int(end_s) if end_s else len(self.payload) - 1 + chunk = self.payload[start:end + 1] + self.send_response(206) + self.send_header('Content-Type', 'application/octet-stream') + self.send_header( + 'Content-Range', + f'bytes {start}-{start + len(chunk) - 1}/{len(self.payload)}', + ) + self.send_header('Content-Length', str(len(chunk))) + self.end_headers() + self.wfile.write(chunk) + return + self.send_response(200) + self.send_header('Content-Type', 'application/octet-stream') + self.send_header('Content-Length', str(len(self.payload))) + self.end_headers() + self.wfile.write(self.payload) + + def log_message(self, *_args, **_kwargs): + pass + + +def _serve(payload: bytes): + handler_cls = type( + 'RangeHandler1816', (_RangeHandler,), {'payload': payload} + ) + httpd = socketserver.TCPServer(('127.0.0.1', 0), handler_cls) + port = httpd.server_address[1] + thread = threading.Thread(target=httpd.serve_forever, daemon=True) + thread.start() + return f'http://127.0.0.1:{port}/cog.tif', httpd, thread + + +def _stop(httpd): + httpd.shutdown() + httpd.server_close() + + +@pytest.fixture(autouse=True) +def _allow_loopback(monkeypatch): + """The HTTP source rejects loopback by default after #1664.""" + monkeypatch.setenv('XRSPATIAL_GEOTIFF_ALLOW_PRIVATE_HOSTS', '1') + + +# --------------------------------------------------------------------------- +# Close-tracking wrapper installed via monkeypatch on the _HTTPSource +# constructor used inside _read_cog_http. +# --------------------------------------------------------------------------- + +class _CloseTracker: + """Delegates every attribute to a real ``_HTTPSource`` while + recording ``close()`` calls. Used to verify that ``_read_cog_http`` + closes the source on both the success and the failure path. + """ + + def __init__(self, real): + self._real = real + self.close_count = 0 + + def __getattr__(self, name): + return getattr(self._real, name) + + def close(self): + self.close_count += 1 + return self._real.close() + + +def _install_tracker(monkeypatch): + """Replace ``_HTTPSource`` in ``_reader`` with a factory that wraps + each instance in a ``_CloseTracker`` and stashes the trackers on a + list so the test can inspect them afterwards. + """ + trackers: list[_CloseTracker] = [] + real_cls = reader_mod._HTTPSource + + def factory(url, *args, **kwargs): + tracker = _CloseTracker(real_cls(url, *args, **kwargs)) + trackers.append(tracker) + return tracker + + monkeypatch.setattr(reader_mod, '_HTTPSource', factory) + return trackers + + +# --------------------------------------------------------------------------- +# Fixture: a real single-band COG served over loopback. +# --------------------------------------------------------------------------- + +@pytest.fixture +def single_band_cog(tmp_path): + arr = np.arange(32 * 32, dtype=np.float32).reshape(32, 32) + path = str(tmp_path / 'tmp_1816_single.tif') + write(arr, path, compression='deflate', tiled=True, tile_size=16, + cog=True) + with open(path, 'rb') as f: + payload = f.read() + return path, payload, arr + + +# --------------------------------------------------------------------------- +# Happy path: close called exactly once after full post-processing. +# --------------------------------------------------------------------------- + +def test_http_source_closed_on_success(single_band_cog, monkeypatch): + """A successful ``_read_cog_http`` closes the source exactly once. + + Establishes the baseline so the failure-path test below isn't just + catching an unrelated regression in the success path. + """ + _path, payload, expected = single_band_cog + trackers = _install_tracker(monkeypatch) + url, httpd, _ = _serve(payload) + try: + arr, _geo = _read_cog_http(url) + np.testing.assert_array_equal(arr, expected) + finally: + _stop(httpd) + + assert len(trackers) == 1, ( + f"expected one _HTTPSource construction, got {len(trackers)}") + assert trackers[0].close_count == 1, ( + f"expected close() called once, got {trackers[0].close_count}") + + +# --------------------------------------------------------------------------- +# Failure path: tile fetch raises, source still closed. +# --------------------------------------------------------------------------- + +def test_http_source_closed_when_tile_fetch_raises( + single_band_cog, monkeypatch): + """When ``_fetch_decode_cog_http_tiles`` raises, ``_read_cog_http`` + still closes the source. Before the fix, ``source.close()`` ran + only on the success path, so any exception in the fetch/decode + bypassed the close. + """ + _path, payload, _expected = single_band_cog + trackers = _install_tracker(monkeypatch) + + def boom(*_args, **_kwargs): + raise OSError("simulated tile fetch failure") + + monkeypatch.setattr( + reader_mod, '_fetch_decode_cog_http_tiles', boom) + + url, httpd, _ = _serve(payload) + try: + with pytest.raises(OSError, match="simulated tile fetch failure"): + _read_cog_http(url) + finally: + _stop(httpd) + + assert len(trackers) == 1 + assert trackers[0].close_count == 1, ( + "source.close() was not called on the exception path; " + "the try/finally guard in _read_cog_http is missing or broken") + + +# --------------------------------------------------------------------------- +# Failure path: post-processing (orientation) raises, source still closed. +# --------------------------------------------------------------------------- + +def test_http_source_closed_when_post_processing_raises( + single_band_cog, monkeypatch): + """An exception from the orientation/photometric step also runs + through ``finally``. Guards against a future regression that moves + ``source.close()`` back between the fetch and the post-processing. + """ + _path, payload, _expected = single_band_cog + trackers = _install_tracker(monkeypatch) + + def boom(*_args, **_kwargs): + raise RuntimeError("simulated photometric failure") + + monkeypatch.setattr( + reader_mod, '_apply_photometric_miniswhite', boom) + + url, httpd, _ = _serve(payload) + try: + with pytest.raises(RuntimeError, match="simulated photometric"): + _read_cog_http(url) + finally: + _stop(httpd) + + assert len(trackers) == 1 + assert trackers[0].close_count == 1 From 6b3d703caab7da011516760227d9d55289e01710 Mon Sep 17 00:00:00 2001 From: Brendan Collins Date: Wed, 13 May 2026 10:16:48 -0700 Subject: [PATCH 2/3] geotiff: address Copilot review comments on #1819 --- xrspatial/geotiff/_reader.py | 143 +++++++++--------- .../test_cog_http_close_on_error_1816.py | 6 +- 2 files changed, 76 insertions(+), 73 deletions(-) diff --git a/xrspatial/geotiff/_reader.py b/xrspatial/geotiff/_reader.py index 3f99b955..2727d586 100644 --- a/xrspatial/geotiff/_reader.py +++ b/xrspatial/geotiff/_reader.py @@ -1867,81 +1867,82 @@ def _read_cog_http(url: str, overview_level: int | None = None, (array, geo_info) tuple """ source = _HTTPSource(url) - header, ifd, geo_info, header_bytes = _parse_cog_http_meta( - source, overview_level=overview_level) - - # Mirror the local-path orientation guard in ``read_to_array``: a - # windowed read against a non-default Orientation tag (274) has - # ambiguous semantics (does the window refer to file pixels or to - # display pixels?) and the HTTP path does not yet implement - # ``_apply_orientation``. Reject the combination here so HTTP and - # local reads agree on the contract for oriented TIFFs instead of - # silently returning a different region or pixel order. See PR - # #1680 review feedback on issue #1669. - if ifd.orientation != 1 and window is not None: - source.close() - raise ValueError( - f"Orientation tag (274) is {ifd.orientation}; windowed reads " - f"(window=...) and dask-chunked reads (chunks=...) are not " - f"supported for non-default orientation. Read the full " - f"array first, then slice." - ) - - # Validate ``window`` against the selected IFD's extent before the - # tile fetch is built. Without this, the helper silently clamps an - # out-of-bounds window and returns a smaller array, mismatching - # ``open_geotiff``'s caller-built coord arrays. Mirrors the - # local-path validator in ``read_to_array`` (#1634). - if window is not None: - w_r0, w_c0, w_r1, w_c1 = window - if (w_r0 < 0 or w_c0 < 0 - or w_r1 > ifd.height or w_c1 > ifd.width - or w_r0 >= w_r1 or w_c0 >= w_c1): - source.close() - raise ValueError( - f"window={window} is outside the source extent " - f"({ifd.height}x{ifd.width}) or has non-positive size.") - - # Validate ``band`` against the selected IFD's sample count before - # the tile fetch. Without this, ``band=-1`` silently picks the last - # channel via numpy negative indexing and ``band>=samples_per_pixel`` - # leaks a raw numpy ``IndexError``; on a single-band file ``band=N`` - # (N != 0) is dropped on the floor because the post-decode slice - # below is gated on ``arr.ndim == 3 and samples_per_pixel > 1``. - # Mirrors the local-path validator in ``read_to_array`` so all - # backends agree on the contract: 0-based non-negative index only. - # ``source.close()`` is called for symmetry with the success-path - # teardown below; it is a no-op on ``_HTTPSource`` today (the - # urllib3 ``PoolManager`` is shared module-level, not per-source) - # but a future resource-holding source will need it. See issue #1695. - if band is not None: - # Reject ``bool`` (and ``np.bool_``) up front; ``isinstance(True, int)`` - # is True in Python so ``True < samples_per_pixel`` evaluates without - # raising and silently reads band 1. ``np.bool_`` is not a subclass of - # ``bool`` so it needs its own check to match the VRT path's - # rejection. See #1786. - if isinstance(band, (bool, np.bool_)): + # Issue #1816: wrap everything after the ``_HTTPSource`` construction + # in try/finally so ``source.close()`` runs even when header parsing, + # validation, fetch/decode, or orientation/photometric post-processing + # raises. ``_HTTPSource.close()`` is a no-op today, but a future + # resource-holding source would leak on the error path without this. + # ``close()`` is idempotent, so the explicit pre-raise ``source.close()`` + # calls in the validation blocks below stay as-is. + try: + header, ifd, geo_info, header_bytes = _parse_cog_http_meta( + source, overview_level=overview_level) + + # Mirror the local-path orientation guard in ``read_to_array``: a + # windowed read against a non-default Orientation tag (274) has + # ambiguous semantics (does the window refer to file pixels or to + # display pixels?) and the HTTP path does not yet implement + # ``_apply_orientation``. Reject the combination here so HTTP and + # local reads agree on the contract for oriented TIFFs instead of + # silently returning a different region or pixel order. See PR + # #1680 review feedback on issue #1669. + if ifd.orientation != 1 and window is not None: source.close() raise ValueError( - f"band must be a non-negative int, got {band!r}") - if ifd.samples_per_pixel <= 1: - if band != 0: + f"Orientation tag (274) is {ifd.orientation}; windowed reads " + f"(window=...) and dask-chunked reads (chunks=...) are not " + f"supported for non-default orientation. Read the full " + f"array first, then slice." + ) + + # Validate ``window`` against the selected IFD's extent before the + # tile fetch is built. Without this, the helper silently clamps an + # out-of-bounds window and returns a smaller array, mismatching + # ``open_geotiff``'s caller-built coord arrays. Mirrors the + # local-path validator in ``read_to_array`` (#1634). + if window is not None: + w_r0, w_c0, w_r1, w_c1 = window + if (w_r0 < 0 or w_c0 < 0 + or w_r1 > ifd.height or w_c1 > ifd.width + or w_r0 >= w_r1 or w_c0 >= w_c1): + source.close() + raise ValueError( + f"window={window} is outside the source extent " + f"({ifd.height}x{ifd.width}) or has non-positive size.") + + # Validate ``band`` against the selected IFD's sample count before + # the tile fetch. Without this, ``band=-1`` silently picks the last + # channel via numpy negative indexing and ``band>=samples_per_pixel`` + # leaks a raw numpy ``IndexError``; on a single-band file ``band=N`` + # (N != 0) is dropped on the floor because the post-decode slice + # below is gated on ``arr.ndim == 3 and samples_per_pixel > 1``. + # Mirrors the local-path validator in ``read_to_array`` so all + # backends agree on the contract: 0-based non-negative index only. + # ``source.close()`` is called for symmetry with the success-path + # teardown below; it is a no-op on ``_HTTPSource`` today (the + # urllib3 ``PoolManager`` is shared module-level, not per-source) + # but a future resource-holding source will need it. See issue #1695. + if band is not None: + # Reject ``bool`` (and ``np.bool_``) up front; ``isinstance(True, int)`` + # is True in Python so ``True < samples_per_pixel`` evaluates without + # raising and silently reads band 1. ``np.bool_`` is not a subclass of + # ``bool`` so it needs its own check to match the VRT path's + # rejection. See #1786. + if isinstance(band, (bool, np.bool_)): + source.close() + raise ValueError( + f"band must be a non-negative int, got {band!r}") + if ifd.samples_per_pixel <= 1: + if band != 0: + source.close() + raise IndexError( + f"band={band} requested on a single-band file.") + elif not 0 <= band < ifd.samples_per_pixel: source.close() raise IndexError( - f"band={band} requested on a single-band file.") - elif not 0 <= band < ifd.samples_per_pixel: - source.close() - raise IndexError( - f"band={band} out of range for " - f"{ifd.samples_per_pixel}-band file.") - - # Issue #1816: wrap the tile fetch and post-processing in try/finally - # so ``source.close()`` runs even when the fetch/decode or the - # orientation/photometric step raises. ``_HTTPSource.close()`` is a - # no-op today, but a future resource-holding source would leak on - # the error path without this. The explicit ``source.close()`` calls - # in the validation block above stay as-is; ``close()`` is idempotent. - try: + f"band={band} out of range for " + f"{ifd.samples_per_pixel}-band file.") + arr = _fetch_decode_cog_http_tiles( source, header, ifd, max_pixels=max_pixels, window=window) diff --git a/xrspatial/geotiff/tests/test_cog_http_close_on_error_1816.py b/xrspatial/geotiff/tests/test_cog_http_close_on_error_1816.py index dc97c82b..cd00779a 100644 --- a/xrspatial/geotiff/tests/test_cog_http_close_on_error_1816.py +++ b/xrspatial/geotiff/tests/test_cog_http_close_on_error_1816.py @@ -169,7 +169,8 @@ def test_http_source_closed_on_success(single_band_cog, monkeypatch): # --------------------------------------------------------------------------- def test_http_source_closed_when_tile_fetch_raises( - single_band_cog, monkeypatch): + single_band_cog, monkeypatch, +): """When ``_fetch_decode_cog_http_tiles`` raises, ``_read_cog_http`` still closes the source. Before the fix, ``source.close()`` ran only on the success path, so any exception in the fetch/decode @@ -202,7 +203,8 @@ def boom(*_args, **_kwargs): # --------------------------------------------------------------------------- def test_http_source_closed_when_post_processing_raises( - single_band_cog, monkeypatch): + single_band_cog, monkeypatch, +): """An exception from the orientation/photometric step also runs through ``finally``. Guards against a future regression that moves ``source.close()`` back between the fetch and the post-processing. From ed6a5155517201b70030fd0b164624152ef9da68 Mon Sep 17 00:00:00 2001 From: Brendan Collins Date: Wed, 13 May 2026 10:13:49 -0700 Subject: [PATCH 3/3] geotiff: per-tile dim check uses default cap, not caller budget (#1823) PR #1803 forwarded the caller's max_pixels to read_to_array inside read_vrt's source loop so a tiny VRT output cannot force a huge source decode (#1796). The output-window check at the source read enforces that correctly. A separate per-tile dimension check at the same call sites also consumed the caller's max_pixels, so a caller setting max_pixels as an output budget (e.g. 10_000) failed the per-tile sanity check on any normal source whose default tile size is 256x256 (= 65_536 pixels). Use MAX_PIXELS_DEFAULT for the per-tile dim check at the two call sites in _read_tiles (local) and _read_tiles_cog_http (HTTP). The output-window check at the same functions continues to enforce the user-supplied max_pixels, preserving the #1796 protection. --- xrspatial/geotiff/_reader.py | 19 ++- .../tests/test_vrt_source_tile_check_1823.py | 113 ++++++++++++++++++ 2 files changed, 127 insertions(+), 5 deletions(-) create mode 100644 xrspatial/geotiff/tests/test_vrt_source_tile_check_1823.py diff --git a/xrspatial/geotiff/_reader.py b/xrspatial/geotiff/_reader.py index 2727d586..cf7235a4 100644 --- a/xrspatial/geotiff/_reader.py +++ b/xrspatial/geotiff/_reader.py @@ -1560,9 +1560,14 @@ def _read_tiles(data: bytes, ifd: IFD, header: TIFFHeader, raise ValueError( f"Invalid tile dimensions: TileWidth={tw}, TileLength={th}") - # Reject crafted tile dims that would force huge per-tile allocations. - # A single tile's decoded bytes must also fit under the pixel budget. - _check_dimensions(tw, th, samples, max_pixels) + # Reject crafted tile dims (e.g. TileWidth = 2**31). This guards the + # TIFF header against malformed values; it is not the caller's output + # budget. The output-window check below uses ``max_pixels`` and is + # what enforces the user's per-call memory cap. The source-read path + # under ``read_vrt`` (#1796) relies on that output check to honour a + # small caller ``max_pixels`` against a normal-tile source; see + # #1823. + _check_dimensions(tw, th, samples, MAX_PIXELS_DEFAULT) # Per-tile compressed-byte cap (issue #1664). Same env var as the # HTTP path. mmap slicing is bounded by the file size, but the slice @@ -2025,10 +2030,14 @@ def _fetch_decode_cog_http_tiles( # A windowed HTTP read of a multi-billion-pixel COG only allocates # the window, so capping the full image would reject legitimate # tiled reads. The full-image cap still applies for whole-file - # reads (window is None). The single-tile budget always applies. + # reads (window is None). The per-tile dim check below guards the + # TIFF header against absurd ``TileWidth`` / ``TileLength`` values + # (e.g. 2**31) and uses ``MAX_PIXELS_DEFAULT`` so a caller's small + # ``max_pixels`` -- intended as an output-window budget -- does not + # reject normal 256x256 tiles. See #1823. if window is None: _check_dimensions(width, height, samples, max_pixels) - _check_dimensions(tw, th, samples, max_pixels) + _check_dimensions(tw, th, samples, MAX_PIXELS_DEFAULT) # Reject malformed TIFFs whose declared tile grid exceeds the supplied # TileOffsets length. See issue #1219. diff --git a/xrspatial/geotiff/tests/test_vrt_source_tile_check_1823.py b/xrspatial/geotiff/tests/test_vrt_source_tile_check_1823.py new file mode 100644 index 00000000..b02dd2fb --- /dev/null +++ b/xrspatial/geotiff/tests/test_vrt_source_tile_check_1823.py @@ -0,0 +1,113 @@ +"""Regression tests for #1823. + +PR #1803 forwarded the caller's ``max_pixels`` to ``read_to_array`` inside +the VRT source loop so that a tiny VRT output could not force a huge +source decode (#1796). The output-window check enforces that. A separate +per-tile dimension check was incorrectly using the same ``max_pixels`` +value, so a caller setting ``max_pixels`` as an output budget (e.g. +10,000) would also fail the per-tile sanity check on every normal source +whose default tile size is 256x256 (= 65,536 pixels). + +The #1796 protection remains: the output-window check still catches a +tiny VRT output that asks for a large source window. +""" +from __future__ import annotations + +import os +import tempfile + +import numpy as np +import pytest + +from xrspatial.geotiff import to_geotiff +from xrspatial.geotiff._reader import PixelSafetyLimitError +from xrspatial.geotiff._vrt import read_vrt + + +def _write_normal_tile_source(td: str) -> str: + """10x10 uint8 source -- ``to_geotiff`` pads to a 256x256 tile.""" + src = os.path.join(td, 'src.tif') + to_geotiff(np.zeros((10, 10), dtype=np.uint8), src, compression='none') + return src + + +def _write_vrt(td: str, *, dst_x_size: int, dst_y_size: int, + raster_x: int = 100, raster_y: int = 100, + src_x_size: int = 10, src_y_size: int = 10) -> str: + vrt = os.path.join(td, 'mosaic.vrt') + xml = ( + f'\n' + f' \n' + f' \n' + f' src.tif\n' + f' 1\n' + f' \n' + f' \n' + f' \n' + f' \n' + f'\n' + ) + with open(vrt, 'w') as f: + f.write(xml) + return vrt + + +class TestPerTileCheckDoesNotUseCallerBudget: + """Per-tile dim sanity must not reject normal 256x256 source tiles + when the caller's ``max_pixels`` is a small output-budget value.""" + + def test_normal_tile_source_with_small_max_pixels(self): + with tempfile.TemporaryDirectory(ignore_cleanup_errors=True) as td: + _write_normal_tile_source(td) + vrt = _write_vrt(td, dst_x_size=100, dst_y_size=100) + arr, _ = read_vrt(vrt, max_pixels=10_000) + assert arr.shape == (100, 100) + + def test_normal_tile_source_with_tiny_max_pixels(self): + """An output budget below a single tile must still succeed when + the requested output window itself fits.""" + with tempfile.TemporaryDirectory(ignore_cleanup_errors=True) as td: + _write_normal_tile_source(td) + # Output 5x5 = 25 pixels; max_pixels = 100 fits 25 with room. + vrt = _write_vrt(td, dst_x_size=5, dst_y_size=5, + raster_x=5, raster_y=5) + arr, _ = read_vrt(vrt, max_pixels=100) + assert arr.shape == (5, 5) + + +class TestOutputWindowCheckStillEnforced: + """The output-window check at the source read still rejects an + over-budget read; the #1796 protection is preserved.""" + + def test_output_window_exceeds_max_pixels_still_rejected(self): + with tempfile.TemporaryDirectory(ignore_cleanup_errors=True) as td: + src = os.path.join(td, 'src.tif') + to_geotiff(np.arange(16, dtype=np.uint8).reshape(4, 4), + src, compression='none') + vrt = _write_vrt(td, dst_x_size=1, dst_y_size=1, + raster_x=1, raster_y=1, + src_x_size=4, src_y_size=4) + # SrcRect 4x4 = 16 pixels > max_pixels=1 → output check fires. + with pytest.raises(ValueError, match="exceed"): + read_vrt(vrt, max_pixels=1) + + +class TestPerTileCheckStillRejectsCraftedHeader: + """A pathological ``TileWidth``/``TileLength`` must still fail at + the per-tile sanity check, which uses ``MAX_PIXELS_DEFAULT``.""" + + def test_per_tile_check_caps_at_default(self, monkeypatch): + """Lower ``MAX_PIXELS_DEFAULT`` to verify the per-tile call site + is wired to it (rather than to the caller's budget).""" + from xrspatial.geotiff import _reader as reader_mod + + monkeypatch.setattr(reader_mod, "MAX_PIXELS_DEFAULT", 100) + with tempfile.TemporaryDirectory(ignore_cleanup_errors=True) as td: + _write_normal_tile_source(td) + vrt = _write_vrt(td, dst_x_size=100, dst_y_size=100) + # 256x256 tile > patched MAX_PIXELS_DEFAULT=100 → per-tile + # check fires regardless of caller's max_pixels (1e9 here). + with pytest.raises(PixelSafetyLimitError, match="65,536"): + read_vrt(vrt, max_pixels=1_000_000_000)