diff --git a/xrspatial/geotiff/_gpu_decode.py b/xrspatial/geotiff/_gpu_decode.py index c1e27405..6c58d797 100644 --- a/xrspatial/geotiff/_gpu_decode.py +++ b/xrspatial/geotiff/_gpu_decode.py @@ -3090,26 +3090,37 @@ def gpu_compress_tiles(d_image, tile_width, tile_height, def _block_reduce_2d_gpu(arr2d, method, nodata=None): """2x block-reduce a single 2D CuPy plane using *method*. + Output dimensions follow GDAL's ceil semantics (5x5 -> 3x3) so the + overview pyramid covers the full source extent for odd-sized rasters. + Odd inputs are NaN-padded along the trailing edge (float-promoted for + integer dtypes) so the 2x2 block reshape works and the residual block + is reduced via the same nan-aware aggregations (issue #2105). Mirrors + the CPU helper :func:`xrspatial.geotiff._writer._block_reduce_2d` so + the two backends produce identical overviews. + When ``nodata`` is supplied and ``arr2d`` is a float dtype, cells that equal the sentinel are masked back to NaN before the reduction so the - ``cupy.nan*`` aggregation routines correctly skip them. Mirrors the - CPU helper :func:`xrspatial.geotiff._writer._block_reduce_2d` so the - two backends produce identical overviews when ``nodata`` is set. + ``cupy.nan*`` aggregation routines correctly skip them. """ import cupy import numpy as np h, w = arr2d.shape - h2 = (h // 2) * 2 - w2 = (w // 2) * 2 - cropped = arr2d[:h2, :w2] - oh, ow = h2 // 2, w2 // 2 + # Ceil semantics match GDAL and the CPU mirror in _writer. + oh, ow = (h + 1) // 2, (w + 1) // 2 + h2, w2 = 2 * oh, 2 * ow + need_pad = (h2, w2) != (h, w) if method == 'nearest': - return cropped[::2, ::2].copy() + # Top-left pixel of each 2x2 block; direct stride keeps the + # trailing row/col for odd-sized inputs (issue #2105). The + # ``.copy()`` materialises a contiguous device buffer so + # downstream nodata-mask rewrites don't touch ``arr2d``. + return arr2d[::2, ::2].copy() if method == 'mode': - # Mode is expensive on GPU; fall back to CPU + # Mode is expensive on GPU; fall back to CPU. The CPU helper + # now handles odd-sized inputs natively. cpu_arr = arr2d.get() from ._writer import _block_reduce_2d cpu_result = _block_reduce_2d(cpu_arr, 'mode', nodata=nodata) @@ -3120,15 +3131,26 @@ def _block_reduce_2d_gpu(arr2d, method, nodata=None): # factors with the same prefilter=False NaN-safety the CPU # helper uses for issue #1623. Fall back to CPU so cubic on # the GPU writer path produces the same overview bytes as the - # CPU writer and so the sentinel handling matches. + # CPU writer and so the sentinel handling matches. The CPU + # helper handles odd-sized inputs via edge-replicate padding. cpu_arr = arr2d.get() from ._writer import _block_reduce_2d cpu_result = _block_reduce_2d(cpu_arr, 'cubic', nodata=nodata) return cupy.asarray(cpu_result) - # Block reshape for mean/min/max/median + # Block reshape for mean/min/max/median. Odd-sized inputs get a + # NaN-padded trailing edge so the residual block reduces correctly; + # the nan-aware reductions exclude the padded cells. The padded + # buffer keeps the source float dtype (float32 stays float32) so an + # odd-shape GPU read does not pay a 2x device-memory cost for a + # silent promote to float64. if arr2d.dtype.kind == 'f': - blocks = cropped.reshape(oh, 2, ow, 2) + if need_pad: + padded = cupy.full((h2, w2), np.nan, dtype=arr2d.dtype) + padded[:h, :w] = arr2d + blocks = padded.reshape(oh, 2, ow, 2) + else: + blocks = arr2d.reshape(oh, 2, ow, 2) # Mask the sentinel back to NaN so cupy.nanmean and friends # honour it as missing-data (issue #1613). Match the upstream # NaN->sentinel rewrite gate so ``nodata=+/-inf`` is masked here. @@ -3143,7 +3165,12 @@ def _block_reduce_2d_gpu(arr2d, method, nodata=None): blocks = cupy.where( mask, cupy.float64('nan'), blocks) else: - blocks = cropped.astype(cupy.float64).reshape(oh, 2, ow, 2) + if need_pad: + blocks = cupy.full((h2, w2), cupy.float64('nan'), dtype=cupy.float64) + blocks[:h, :w] = arr2d + blocks = blocks.reshape(oh, 2, ow, 2) + else: + blocks = arr2d.astype(cupy.float64).reshape(oh, 2, ow, 2) # Integer GPU mirror of the CPU sentinel-mask: without it, # cupy.nanmean / nanmin / nanmax / nanmedian average the sentinel # value into surrounding valid cells and produce overview pixels @@ -3158,8 +3185,18 @@ def _block_reduce_2d_gpu(arr2d, method, nodata=None): info = np.iinfo(arr2d.dtype) if info.min <= nodata_int <= info.max: sentinel = np.dtype(str(arr2d.dtype)).type(nodata_int) - int_blocks = cropped.reshape(oh, 2, ow, 2) - mask = int_blocks == sentinel + if need_pad: + # Compute the integer-width sentinel mask before + # padding, so a 64-bit sentinel near INT64_MAX (not + # exactly representable in float64) still matches. + # The padded edge already holds NaN in ``blocks`` and + # stays False here so the where below leaves it alone. + int_mask = cupy.zeros((h2, w2), dtype=bool) + int_mask[:h, :w] = arr2d == sentinel + mask = int_mask.reshape(oh, 2, ow, 2) + else: + int_blocks = arr2d.reshape(oh, 2, ow, 2) + mask = int_blocks == sentinel if bool(mask.any().item()): blocks = cupy.where( mask, cupy.float64('nan'), blocks) diff --git a/xrspatial/geotiff/_writer.py b/xrspatial/geotiff/_writer.py index 80ad9087..97df2704 100644 --- a/xrspatial/geotiff/_writer.py +++ b/xrspatial/geotiff/_writer.py @@ -432,9 +432,44 @@ def _resolve_int_nodata(dtype, nodata): return None +def _replicate_pad_2d(src, h2, w2): + """Pad ``src`` to ``(h2, w2)`` by replicating its trailing row/col. + + Used by the cubic overview branch to extend an odd-sized input to + even dimensions so a 0.5-zoom lands on the GDAL ceil shape and the + spline kernel sees a valid neighbourhood at the trailing edge. The + helper is dtype-preserving (no float promotion) so integer inputs + stay integer through the pad. Returns ``src`` unchanged when no + padding is needed. + """ + h, w = src.shape + if (h, w) == (h2, w2): + return src + padded = np.empty((h2, w2), dtype=src.dtype) + padded[:h, :w] = src + if h2 != h: + padded[h:, :w] = src[h - 1:h, :] + if w2 != w: + padded[:h, w:] = src[:, w - 1:w] + if h2 != h and w2 != w: + padded[h:, w:] = src[h - 1, w - 1] + return padded + + def _block_reduce_2d(arr2d, method, nodata=None): """2x block-reduce a single 2D plane using *method*. + Output dimensions follow GDAL's ceil semantics (5x5 -> 3x3): every + base pixel contributes to the overview. Inputs with an odd height + or width are padded with NaN (or float-promoted and NaN-padded for + integer dtypes) along the trailing edge so the 2x2 block reshape + still works; the nan-aware aggregations then exclude the padded + cells from the residual blocks. ``nearest`` strides directly, + ``cubic`` replicates the last row/col so the spline kernel sees a + valid neighbourhood at the trailing edge, and ``mode`` ignores + NaN-padded cells when counting. Even-sized inputs take the + existing fast path with no padding overhead (issue #2105). + When ``nodata`` is supplied, cells that equal the sentinel are treated as NaN during the reduction so the ``nan*`` aggregation routines correctly skip them. The float branch keeps any all- @@ -458,14 +493,23 @@ def _block_reduce_2d(arr2d, method, nodata=None): well-defined (issue #1975). """ h, w = arr2d.shape - h2 = (h // 2) * 2 - w2 = (w // 2) * 2 - cropped = arr2d[:h2, :w2] - oh, ow = h2 // 2, w2 // 2 + # GDAL-style ceil semantics keep the trailing row/col when the input + # is odd-sized. Floor-and-crop drops those pixels and produces an + # overview that no longer covers the source extent (issue #2105). + oh, ow = (h + 1) // 2, (w + 1) // 2 + h2, w2 = 2 * oh, 2 * ow + need_pad = (h2, w2) != (h, w) if method == 'nearest': - # Top-left pixel of each 2x2 block - return cropped[::2, ::2].copy() + # Top-left pixel of each 2x2 block. For odd-sized inputs the + # trailing 1x2 / 2x1 / 1x1 residual block's top-left is still + # the source pixel at (2*i, 2*j), so direct striding gives the + # correct ceil-shape output without padding. The trailing + # ``.copy()`` materialises the strided view so the caller owns + # a contiguous buffer (downstream nodata-mask rewrites mutate + # the result in place); without it those writes would also + # touch ``arr2d``. + return arr2d[::2, ::2].copy() if method == 'cubic': try: @@ -495,6 +539,17 @@ def _block_reduce_2d(arr2d, method, nodata=None): # NaN can carry through the spline; the result is rewritten # back to the sentinel and rounded before casting to the source # integer dtype (issue #1975, integer mirror of #1623). + # + # Odd-sized inputs are first edge-replicated to even dimensions + # so the 0.5 zoom factor lands on the GDAL ceil shape and the + # spline sees a valid neighbourhood at the trailing edge. + # Replicating sentinel values is fine: they are masked to NaN + # in the steps below alongside any in-bounds sentinel, and the + # post-zoom NaN->sentinel restore stamps the trailing overview + # pixels back to the sentinel as expected (issue #2105). The + # ``_replicate_pad_2d`` helper is module-level so the cubic + # path does not redefine a closure on every call. + if (nodata is not None and arr2d.dtype.kind == 'f' and not np.isnan(nodata)): @@ -505,7 +560,9 @@ def _block_reduce_2d(arr2d, method, nodata=None): if sentinel is not None: mask = arr2d == sentinel if mask.any(): - masked = np.where(mask, np.float64('nan'), arr2d) + src = _replicate_pad_2d(arr2d, h2, w2) + src_mask = src == sentinel + masked = np.where(src_mask, np.float64('nan'), src) with warnings.catch_warnings(): warnings.simplefilter('ignore', RuntimeWarning) result = zoom(masked, 0.5, order=3, @@ -515,13 +572,18 @@ def _block_reduce_2d(arr2d, method, nodata=None): result = result.copy() result[nan_mask] = float(nodata) return result.astype(arr2d.dtype) + # No sentinel pixels present; fall through to the + # default cubic path below so the spline runs without + # the prefilter=False NaN-safety branch. nodata_int = _resolve_int_nodata(arr2d.dtype, nodata) if nodata_int is not None: sentinel = arr2d.dtype.type(nodata_int) mask = arr2d == sentinel if mask.any(): - masked = np.where(mask, np.float64('nan'), - arr2d.astype(np.float64)) + src = _replicate_pad_2d(arr2d, h2, w2) + src_mask = src == sentinel + masked = np.where(src_mask, np.float64('nan'), + src.astype(np.float64)) with warnings.catch_warnings(): warnings.simplefilter('ignore', RuntimeWarning) result = zoom(masked, 0.5, order=3, @@ -531,7 +593,10 @@ def _block_reduce_2d(arr2d, method, nodata=None): result = np.where(nan_mask, float(nodata_int), result) return np.round(result).astype(arr2d.dtype) - return zoom(arr2d, 0.5, order=3).astype(arr2d.dtype) + # No sentinel pixels present in this integer array; fall + # through to the default cubic path below. + src = _replicate_pad_2d(arr2d, h2, w2) + return zoom(src, 0.5, order=3).astype(arr2d.dtype) if method == 'mode': # Most-common value per 2x2 block (useful for classified rasters). @@ -539,7 +604,28 @@ def _block_reduce_2d(arr2d, method, nodata=None): # how many cells equal it. argmax picks the leftmost max-count # position, which (post-sort) is the smallest tied value, matching # the prior np.unique+argmax tie-break behavior ("lowest wins"). - blocks = cropped.reshape(oh, 2, ow, 2).transpose(0, 2, 1, 3).reshape(oh, ow, 4) + # + # Odd-sized inputs are float-promoted and NaN-padded so the + # residual block has at least one valid cell plus NaN cells that + # never match themselves (NaN != NaN). Counts for the NaN cells + # stay at zero, so argmax picks a valid cell, and numpy's sort + # places NaN at the end so the "lowest valid wins" tie-break + # still holds (issue #2105). The result is cast back to the + # source dtype. + if need_pad: + padded = np.full((h2, w2), np.nan, dtype=np.float64) + padded[:h, :w] = arr2d + blocks = padded.reshape(oh, 2, ow, 2).transpose(0, 2, 1, 3).reshape(oh, ow, 4) + srt = np.sort(blocks, axis=-1) + counts = np.zeros(srt.shape, dtype=np.int8) + for i in range(4): + counts[..., i] = np.sum(srt == srt[..., i:i + 1], axis=-1) + pick = np.argmax(counts, axis=-1) + picked = np.take_along_axis(srt, pick[..., None], axis=-1).squeeze(-1) + if arr2d.dtype.kind in ('i', 'u'): + return np.round(picked).astype(arr2d.dtype) + return picked.astype(arr2d.dtype) + blocks = arr2d.reshape(oh, 2, ow, 2).transpose(0, 2, 1, 3).reshape(oh, ow, 4) srt = np.sort(blocks, axis=-1) counts = np.empty_like(srt, dtype=np.int8) for i in range(4): @@ -547,9 +633,20 @@ def _block_reduce_2d(arr2d, method, nodata=None): pick = np.argmax(counts, axis=-1) return np.take_along_axis(srt, pick[..., None], axis=-1).squeeze(-1) - # Block reshape for mean/min/max/median + # Block reshape for mean/min/max/median. Odd-sized inputs are padded + # with NaN along the trailing edge so the reshape works on every + # input size; the padded cells are naturally excluded by the + # nan-aware aggregations below (issue #2105). The padded buffer + # keeps the source's float dtype (float32 inputs stay float32) so + # an odd-shape read does not pay a 2x intermediate-memory cost for + # a silent promote to float64. if arr2d.dtype.kind == 'f': - blocks = cropped.reshape(oh, 2, ow, 2) + if need_pad: + padded = np.full((h2, w2), np.nan, dtype=arr2d.dtype) + padded[:h, :w] = arr2d + blocks = padded.reshape(oh, 2, ow, 2) + else: + blocks = arr2d.reshape(oh, 2, ow, 2) # When a sentinel was used in place of NaN by an upstream # NaN-to-sentinel rewrite, mask it back to NaN here so nanmean / # nanmin / nanmax / nanmedian honour the missing-data semantic. @@ -569,28 +666,48 @@ def _block_reduce_2d(arr2d, method, nodata=None): # array so the caller's input is not mutated. blocks = np.where(mask, np.float64('nan'), blocks) else: - blocks = cropped.astype(np.float64).reshape(oh, 2, ow, 2) - # Integer rasters with a sentinel need the same NaN-mask the float - # branch above applies: without it, nanmean / nanmin / nanmax / - # nanmedian average the sentinel value into surrounding valid - # cells and produce overview pixels that are neither the sentinel - # nor any real measurement. The read-side int-to-NaN mask in - # ``open_geotiff`` only catches exact sentinel hits, so the - # poisoned values survive as silent garbage at every zoom level - # above 0. Gate on the sentinel being representable in the - # source integer dtype (mirrors ``_int_nodata_in_range`` in - # ``_reader.py``) so an out-of-range sentinel pair like - # ``uint16`` + ``GDAL_NODATA="-9999"`` stays a no-op rather than - # tripping ``OverflowError`` on the dtype cast. + if need_pad: + blocks = np.full((h2, w2), np.nan, dtype=np.float64) + blocks[:h, :w] = arr2d + blocks = blocks.reshape(oh, 2, ow, 2) + else: + blocks = arr2d.astype(np.float64).reshape(oh, 2, ow, 2) + # Integer rasters with a sentinel still need the NaN-mask the + # float branch above applies. The sentinel-equal cells in the + # original integer array are computed at the dtype's native + # width below (so a 64-bit sentinel near INT64_MAX still + # matches, which a float-cast comparison would miss) and the + # mask is then broadcast into the float64 ``blocks`` view. + # Without this, nanmean / nanmin / nanmax / nanmedian average + # the sentinel value into surrounding valid cells and produce + # overview pixels that the read side cannot remap because they + # no longer equal the sentinel. Gate on the sentinel being + # representable in the source integer dtype (mirrors + # ``_int_nodata_in_range`` in ``_reader.py``) so an out-of- + # range sentinel pair like ``uint16`` + ``GDAL_NODATA=-9999`` + # stays a no-op rather than tripping ``OverflowError`` on the + # dtype cast. nodata_int = _resolve_int_nodata(arr2d.dtype, nodata) if nodata_int is not None: sentinel = arr2d.dtype.type(nodata_int) - # Compare against the original integer block view so the - # equality runs at the integer's native width (avoids any - # float-cast rounding on adjacent values). The boolean - # mask broadcasts into the float64 block layout below. - int_blocks = cropped.reshape(oh, 2, ow, 2) - mask = int_blocks == sentinel + if need_pad: + # Compute the sentinel mask at the integer's native width + # before padding, so a 64-bit sentinel near INT64_MAX (which + # is not exactly representable in float64) still matches. + # The mask is then padded with False to the same (h2, w2) + # shape as the float-promoted blocks view, so the residual + # padded cells (already NaN in ``blocks``) are not also + # rewritten via the where below. + int_mask = np.zeros((h2, w2), dtype=bool) + int_mask[:h, :w] = arr2d == sentinel + mask = int_mask.reshape(oh, 2, ow, 2) + else: + # Compare against the original integer block view so the + # equality runs at the integer's native width (avoids any + # float-cast rounding on adjacent values). The boolean + # mask broadcasts into the float64 block layout below. + int_blocks = arr2d.reshape(oh, 2, ow, 2) + mask = int_blocks == sentinel if mask.any(): blocks = np.where(mask, np.float64('nan'), blocks) diff --git a/xrspatial/geotiff/tests/test_cog_overview_ceil_2105.py b/xrspatial/geotiff/tests/test_cog_overview_ceil_2105.py new file mode 100644 index 00000000..48d0fc4d --- /dev/null +++ b/xrspatial/geotiff/tests/test_cog_overview_ceil_2105.py @@ -0,0 +1,330 @@ +"""COG overviews use ceil semantics for odd-sized rasters (issue #2105). + +Before the fix, ``_block_reduce_2d`` floored both dimensions to an even +multiple and cropped the trailing row/col before reducing. A 5x5 input +became a 4x4 crop and then a 2x2 overview, silently dropping the bottom +row and right column. GDAL's overview generator uses ceil semantics +(5x5 -> 3x3) so the residual edge cells still contribute. + +These tests pin the contract that every base pixel reaches the overview, +across all resampling methods, on both even and odd input shapes, and +with the nodata sentinel honoured along the trailing edge. +""" +from __future__ import annotations + +import importlib.util + +import numpy as np +import pytest + +from xrspatial.geotiff._writer import _block_reduce_2d + + +def _gpu_available() -> bool: + if importlib.util.find_spec("cupy") is None: + return False + try: + import cupy + return bool(cupy.cuda.is_available()) + except Exception: + return False + + +_HAS_GPU = _gpu_available() +_gpu_only = pytest.mark.skipif(not _HAS_GPU, reason="cupy + CUDA required") + + +# --------------------------------------------------------------------------- +# Output-shape contract: ceil((h+1)/2, (w+1)/2) for every method and dtype. +# --------------------------------------------------------------------------- +@pytest.mark.parametrize( + "shape,expected", + [ + ((5, 5), (3, 3)), + ((5, 4), (3, 2)), + ((4, 5), (2, 3)), + ((7, 3), (4, 2)), + ((1, 1), (1, 1)), + ((1, 5), (1, 3)), + ((5, 1), (3, 1)), + ((4, 4), (2, 2)), + ((6, 6), (3, 3)), + ], +) +@pytest.mark.parametrize( + "method", ["nearest", "mean", "min", "max", "median", "mode"] +) +def test_ceil_output_shape_float(shape, expected, method): + arr = np.arange(shape[0] * shape[1], dtype=np.float32).reshape(shape) + out = _block_reduce_2d(arr, method) + assert out.shape == expected + + +@pytest.mark.parametrize( + "shape,expected", + [ + ((5, 5), (3, 3)), + ((3, 7), (2, 4)), + ((1, 1), (1, 1)), + ], +) +@pytest.mark.parametrize("method", ["nearest", "mean", "min", "max", "median", "mode"]) +def test_ceil_output_shape_int(shape, expected, method): + arr = np.arange(shape[0] * shape[1], dtype=np.int16).reshape(shape) + out = _block_reduce_2d(arr, method) + assert out.shape == expected + assert out.dtype == arr.dtype + + +def test_ceil_output_shape_cubic_float(): + pytest.importorskip("scipy") + arr = np.arange(25, dtype=np.float32).reshape(5, 5) + out = _block_reduce_2d(arr, "cubic") + assert out.shape == (3, 3) + + +def test_ceil_output_shape_cubic_int(): + pytest.importorskip("scipy") + arr = np.arange(25, dtype=np.int16).reshape(5, 5) + out = _block_reduce_2d(arr, "cubic") + assert out.shape == (3, 3) + assert out.dtype == arr.dtype + + +# --------------------------------------------------------------------------- +# Trailing-edge pixel values: the last row/col of the source must reach the +# overview rather than being dropped. +# --------------------------------------------------------------------------- +def test_nearest_5x5_preserves_trailing_pixels(): + arr = np.arange(25, dtype=np.float64).reshape(5, 5) + out = _block_reduce_2d(arr, "nearest") + # Nearest = top-left of every 2x2 block. With ceil, that's arr[::2, ::2]. + assert out.shape == (3, 3) + np.testing.assert_array_equal(out, arr[::2, ::2]) + # The trailing row/col of the source IS represented in the overview. + assert out[-1, -1] == arr[4, 4] + + +def test_mean_5x5_trailing_residual_block_uses_valid_cell(): + # Residual at row 4, col 4 is a 1x1 block containing arr[4,4] alone. + arr = np.zeros((5, 5), dtype=np.float32) + arr[4, 4] = 100.0 + out = _block_reduce_2d(arr, "mean") + assert out.shape == (3, 3) + # The corner residual is a 1x1 block, so its mean is the single pixel. + assert out[2, 2] == pytest.approx(100.0) + + +def test_max_5x5_residual_block_uses_valid_cell(): + arr = np.zeros((5, 5), dtype=np.float32) + arr[4, :] = 9.0 # trailing row should reach overview[2, :] + arr[:, 4] = 7.0 + out = _block_reduce_2d(arr, "max") + assert out.shape == (3, 3) + # Bottom overview row picks max of (arr[4, 2*j:2*j+2]) -> 9.0 everywhere. + np.testing.assert_array_equal(out[2, :2], [9.0, 9.0]) + # Right column gets max from (arr[2*i:2*i+2, 4]) -> 7.0 except corner. + assert out[0, 2] == 7.0 + assert out[1, 2] == 7.0 + # arr[4, 4] = 7.0 (set by the trailing-column sweep, after row sweep). + assert out[2, 2] == 7.0 + + +def test_min_5x5_residual_block_uses_valid_cell(): + arr = np.full((5, 5), 10.0, dtype=np.float32) + arr[4, 4] = -1.0 + out = _block_reduce_2d(arr, "min") + assert out[2, 2] == -1.0 + + +def test_median_5x5_residual_block_uses_valid_cell(): + arr = np.full((5, 5), 5.0, dtype=np.float32) + arr[4, 4] = 99.0 + out = _block_reduce_2d(arr, "median") + # 1x1 residual: median is the single value. + assert out[2, 2] == pytest.approx(99.0) + + +def test_mode_5x5_residual_block_picks_valid_cell(): + arr = np.array( + [[1, 1, 2, 2, 3], + [1, 1, 2, 2, 3], + [4, 4, 5, 5, 6], + [4, 4, 5, 5, 6], + [7, 7, 8, 8, 9]], + dtype=np.int16, + ) + out = _block_reduce_2d(arr, "mode") + assert out.shape == (3, 3) + # Trailing 1x1 block at (4,4) is just the value 9. + assert out[2, 2] == 9 + # Trailing column (rows 0..1 / 2..3, col 4) is 1x1 blocks containing 3 / 6. + assert out[0, 2] == 3 + assert out[1, 2] == 6 + # Trailing row (col 0..1 / 2..3, row 4) is 1x2 blocks: [7,7] -> 7, [8,8] -> 8. + assert out[2, 0] == 7 + assert out[2, 1] == 8 + + +def test_cubic_5x5_covers_source_extent(): + pytest.importorskip("scipy") + # Smoothly varying ramp so cubic interpolation is well-behaved. + arr = np.arange(25, dtype=np.float32).reshape(5, 5) + out = _block_reduce_2d(arr, "cubic") + assert out.shape == (3, 3) + # Output should not be entirely zero/NaN, and trailing corner should + # roughly reflect the high source values around (4, 4). + assert np.isfinite(out).all() + assert out[2, 2] > out[0, 0] + + +# --------------------------------------------------------------------------- +# Sentinel masking still works on odd-sized inputs. +# --------------------------------------------------------------------------- +def test_mean_5x5_with_nodata_excludes_sentinel_in_residual(): + sentinel = -9999.0 + arr = np.full((5, 5), 1.0, dtype=np.float32) + arr[4, 4] = sentinel + out = _block_reduce_2d(arr, "mean", nodata=sentinel) + # The 1x1 trailing residual is all-sentinel -> all-NaN block, which + # the post-overview rewrite (in the caller) handles. Here we just + # confirm the sentinel did not bias the reduction: out[2, 2] is NaN, + # not (1.0 + sentinel)/2 or similar. + assert np.isnan(out[2, 2]) + # Other overview cells with at least one valid neighbour stay valid. + assert out[0, 0] == pytest.approx(1.0) + + +def test_min_int_5x5_with_nodata_does_not_select_sentinel_in_residual(): + sentinel = -9999 + arr = np.full((5, 5), 10, dtype=np.int16) + # Trailing column has a sentinel + valid cell in 2x1 residual blocks. + arr[0, 4] = sentinel + arr[2, 4] = sentinel + arr[4, 4] = sentinel + out = _block_reduce_2d(arr, "min", nodata=sentinel) + # The 2x1 residual at (0..1, 4) is [-9999, 10] -> min ignoring sentinel = 10. + assert out[0, 2] == 10 + assert out[1, 2] == 10 + # The 1x1 residual at (4, 4) is sentinel -> rewritten to sentinel. + assert out[2, 2] == sentinel + + +def test_int64_sentinel_near_max_masks_in_padded_branch(): + # INT64_MAX is not exactly representable in float64: float(INT64_MAX) + # rounds up to 2**63, which would miss the sentinel if the mask were + # computed against the float-padded view. The reader must compute the + # mask at native integer width before padding. + sentinel = np.iinfo(np.int64).max + arr = np.full((5, 5), 10, dtype=np.int64) + arr[0, 0] = sentinel + # Pad branch fires because shape (5, 5) is odd. + out = _block_reduce_2d(arr, "min", nodata=sentinel) + # Top-left 2x2 block has 1 sentinel + 3 valid 10s. nanmin -> 10 + # (sentinel masked out). If the mask missed the sentinel, the int64 + # value would be cast to float and the float min would pick up the + # sentinel's value or produce noise; either way out[0,0] would not + # be 10. + assert out[0, 0] == 10 + + +def test_uint64_sentinel_near_max_masks_in_padded_branch(): + # UINT64_MAX = 2**64 - 1 is also not exactly representable in float64 + # (float(UINT64_MAX) rounds up to 2**64). The native-width mask path + # must catch the sentinel for unsigned 64-bit dtypes too. + sentinel = np.iinfo(np.uint64).max + arr = np.full((5, 5), 10, dtype=np.uint64) + arr[0, 0] = sentinel + out = _block_reduce_2d(arr, "min", nodata=sentinel) + assert out[0, 0] == 10 + + +def test_float32_padded_branch_keeps_source_dtype(): + # The padded mean/min/max/median branch used to allocate a float64 + # NaN buffer regardless of the source dtype, doubling intermediate + # memory for an odd-shape float32 read. Verify the helper now keeps + # the source dtype across the pad so a float32 input round-trips as + # float32. The contract is checked end-to-end via the output dtype. + arr = np.arange(25, dtype=np.float32).reshape(5, 5) + out = _block_reduce_2d(arr, "mean") + assert out.dtype == np.float32 + # And the values still match what a manual ceil-mean would produce + # for the top-left 2x2 block. + top_left_mean = float(arr[:2, :2].mean()) + assert out[0, 0] == pytest.approx(top_left_mean) + + +def test_max_int_5x5_with_nodata_does_not_select_sentinel_in_residual(): + sentinel = -9999 + arr = np.full((5, 5), 10, dtype=np.int16) + arr[4, 4] = sentinel + out = _block_reduce_2d(arr, "max", nodata=sentinel) + # The 1x1 corner block is all-sentinel -> sentinel. + assert out[2, 2] == sentinel + # Adjacent 2x1 residual (rows 4, cols 0..1) has valid values only. + assert out[2, 0] == 10 + assert out[2, 1] == 10 + + +# --------------------------------------------------------------------------- +# Even-sized inputs keep the existing fast-path semantics. +# --------------------------------------------------------------------------- +@pytest.mark.parametrize( + "method", ["nearest", "mean", "min", "max", "median", "mode"] +) +def test_even_input_matches_legacy_2x2_behaviour(method): + rng = np.random.default_rng(2105) + arr = rng.integers(0, 100, size=(6, 8)).astype(np.int16) + out = _block_reduce_2d(arr, method) + assert out.shape == (3, 4) + # Spot-check a single block matches a direct reduction. + block = arr[0:2, 0:2] + if method == "nearest": + assert out[0, 0] == block[0, 0] + elif method == "mean": + # Integer outputs are rounded after the float reduction. + assert out[0, 0] == int(round(block.astype(np.float64).mean())) + elif method == "min": + assert out[0, 0] == block.min() + elif method == "max": + assert out[0, 0] == block.max() + elif method == "median": + assert out[0, 0] == int(round(float(np.median(block)))) + elif method == "mode": + # Lowest-value tie-break for unique cells. + vals, counts = np.unique(block, return_counts=True) + expected = vals[np.argmax(counts)] + assert out[0, 0] == expected + + +# --------------------------------------------------------------------------- +# GPU mirror: identical shape and identical values on odd-sized inputs. +# --------------------------------------------------------------------------- +@_gpu_only +@pytest.mark.parametrize( + "method", ["nearest", "mean", "min", "max", "median"] +) +@pytest.mark.parametrize("shape", [(5, 5), (5, 4), (4, 5), (7, 3)]) +def test_gpu_block_reduce_matches_cpu_on_odd_shapes(method, shape): + import cupy + from xrspatial.geotiff._gpu_decode import _block_reduce_2d_gpu + + rng = np.random.default_rng(2105) + arr = rng.random(shape, dtype=np.float32) + cpu_out = _block_reduce_2d(arr, method) + gpu_out = _block_reduce_2d_gpu(cupy.asarray(arr), method).get() + assert gpu_out.shape == cpu_out.shape + np.testing.assert_allclose(gpu_out, cpu_out, equal_nan=True, rtol=1e-6) + + +@_gpu_only +def test_gpu_block_reduce_int_5x5_with_nodata(): + import cupy + from xrspatial.geotiff._gpu_decode import _block_reduce_2d_gpu + + sentinel = -9999 + arr = np.full((5, 5), 10, dtype=np.int16) + arr[4, 4] = sentinel + cpu_out = _block_reduce_2d(arr, "max", nodata=sentinel) + gpu_out = _block_reduce_2d_gpu(cupy.asarray(arr), "max", nodata=sentinel).get() + np.testing.assert_array_equal(gpu_out, cpu_out) diff --git a/xrspatial/geotiff/tests/test_mode_overview_perf.py b/xrspatial/geotiff/tests/test_mode_overview_perf.py index a24517cd..5b794727 100644 --- a/xrspatial/geotiff/tests/test_mode_overview_perf.py +++ b/xrspatial/geotiff/tests/test_mode_overview_perf.py @@ -14,24 +14,22 @@ def _mode_resample_reference(arr2d): - """Original per-pixel reference implementation (pre-vectorization). + """Per-pixel mode reference using GDAL ceil semantics (issue #2105). - Copied verbatim from the loop body that used ``np.unique`` per 2x2 - block, with the same crop-to-even-dims behavior as the production - function. + Walks each ceil-shaped output block over the source array, takes the + intersection of the 2x2 block window with the actual source extent, + and picks the most-frequent value with the "lowest wins" tie-break + that the vectorized production path also uses. """ h, w = arr2d.shape - h2 = (h // 2) * 2 - w2 = (w // 2) * 2 - cropped = arr2d[:h2, :w2] - oh, ow = h2 // 2, w2 // 2 - blocks = ( - cropped.reshape(oh, 2, ow, 2).transpose(0, 2, 1, 3).reshape(oh, ow, 4) - ) + oh, ow = (h + 1) // 2, (w + 1) // 2 out = np.empty((oh, ow), dtype=arr2d.dtype) for r in range(oh): for c in range(ow): - vals, counts = np.unique(blocks[r, c], return_counts=True) + r0, r1 = 2 * r, min(2 * r + 2, h) + c0, c1 = 2 * c, min(2 * c + 2, w) + window = arr2d[r0:r1, c0:c1].ravel() + vals, counts = np.unique(window, return_counts=True) out[r, c] = vals[counts.argmax()] return out @@ -48,12 +46,6 @@ def test_bit_exact_match_reference(dtype, shape): hi = min(info.max, 7) arr = rng.integers(lo, hi + 1, size=shape, dtype=dtype) - # Skip pathological 1x1 / 1xN / Nx1 cases that crop to empty. - h2 = (shape[0] // 2) * 2 - w2 = (shape[1] // 2) * 2 - if h2 == 0 or w2 == 0: - return - expected = _mode_resample_reference(arr) actual = _block_reduce_2d(arr, 'mode')