xarray-contrib · brendancol · May 19, 2026 · May 19, 2026 · May 19, 2026 · May 19, 2026
diff --git a/xrspatial/geotiff/_gpu_decode.py b/xrspatial/geotiff/_gpu_decode.py
@@ -3090,26 +3090,37 @@ def gpu_compress_tiles(d_image, tile_width, tile_height,
 def _block_reduce_2d_gpu(arr2d, method, nodata=None):
     """2x block-reduce a single 2D CuPy plane using *method*.
 
+    Output dimensions follow GDAL's ceil semantics (5x5 -> 3x3) so the
+    overview pyramid covers the full source extent for odd-sized rasters.
+    Odd inputs are NaN-padded along the trailing edge (float-promoted for
+    integer dtypes) so the 2x2 block reshape works and the residual block
+    is reduced via the same nan-aware aggregations (issue #2105). Mirrors
+    the CPU helper :func:`xrspatial.geotiff._writer._block_reduce_2d` so
+    the two backends produce identical overviews.
+
     When ``nodata`` is supplied and ``arr2d`` is a float dtype, cells that
     equal the sentinel are masked back to NaN before the reduction so the
-    ``cupy.nan*`` aggregation routines correctly skip them. Mirrors the
-    CPU helper :func:`xrspatial.geotiff._writer._block_reduce_2d` so the
-    two backends produce identical overviews when ``nodata`` is set.
+    ``cupy.nan*`` aggregation routines correctly skip them.
     """
     import cupy
     import numpy as np
 
     h, w = arr2d.shape
-    h2 = (h // 2) * 2
-    w2 = (w // 2) * 2
-    cropped = arr2d[:h2, :w2]
-    oh, ow = h2 // 2, w2 // 2
+    # Ceil semantics match GDAL and the CPU mirror in _writer.
+    oh, ow = (h + 1) // 2, (w + 1) // 2
+    h2, w2 = 2 * oh, 2 * ow
+    need_pad = (h2, w2) != (h, w)
 
     if method == 'nearest':
-        return cropped[::2, ::2].copy()
+        # Top-left pixel of each 2x2 block; direct stride keeps the
+        # trailing row/col for odd-sized inputs (issue #2105). The
+        # ``.copy()`` materialises a contiguous device buffer so
+        # downstream nodata-mask rewrites don't touch ``arr2d``.
+        return arr2d[::2, ::2].copy()
 
     if method == 'mode':
-        # Mode is expensive on GPU; fall back to CPU
+        # Mode is expensive on GPU; fall back to CPU. The CPU helper
+        # now handles odd-sized inputs natively.
         cpu_arr = arr2d.get()
         from ._writer import _block_reduce_2d
         cpu_result = _block_reduce_2d(cpu_arr, 'mode', nodata=nodata)
@@ -3120,15 +3131,26 @@ def _block_reduce_2d_gpu(arr2d, method, nodata=None):
         # factors with the same prefilter=False NaN-safety the CPU
         # helper uses for issue #1623. Fall back to CPU so cubic on
         # the GPU writer path produces the same overview bytes as the
-        # CPU writer and so the sentinel handling matches.
+        # CPU writer and so the sentinel handling matches. The CPU
+        # helper handles odd-sized inputs via edge-replicate padding.
         cpu_arr = arr2d.get()
         from ._writer import _block_reduce_2d
         cpu_result = _block_reduce_2d(cpu_arr, 'cubic', nodata=nodata)
         return cupy.asarray(cpu_result)
 
-    # Block reshape for mean/min/max/median
+    # Block reshape for mean/min/max/median. Odd-sized inputs get a
+    # NaN-padded trailing edge so the residual block reduces correctly;
+    # the nan-aware reductions exclude the padded cells. The padded
+    # buffer keeps the source float dtype (float32 stays float32) so an
+    # odd-shape GPU read does not pay a 2x device-memory cost for a
+    # silent promote to float64.
     if arr2d.dtype.kind == 'f':
-        blocks = cropped.reshape(oh, 2, ow, 2)
+        if need_pad:
+            padded = cupy.full((h2, w2), np.nan, dtype=arr2d.dtype)
+            padded[:h, :w] = arr2d
+            blocks = padded.reshape(oh, 2, ow, 2)
+        else:
+            blocks = arr2d.reshape(oh, 2, ow, 2)
         # Mask the sentinel back to NaN so cupy.nanmean and friends
         # honour it as missing-data (issue #1613). Match the upstream
         # NaN->sentinel rewrite gate so ``nodata=+/-inf`` is masked here.
@@ -3143,7 +3165,12 @@ def _block_reduce_2d_gpu(arr2d, method, nodata=None):
                     blocks = cupy.where(
                         mask, cupy.float64('nan'), blocks)
     else:
-        blocks = cropped.astype(cupy.float64).reshape(oh, 2, ow, 2)
+        if need_pad:
+            blocks = cupy.full((h2, w2), cupy.float64('nan'), dtype=cupy.float64)
+            blocks[:h, :w] = arr2d
+            blocks = blocks.reshape(oh, 2, ow, 2)
+        else:
+            blocks = arr2d.astype(cupy.float64).reshape(oh, 2, ow, 2)
         # Integer GPU mirror of the CPU sentinel-mask: without it,
         # cupy.nanmean / nanmin / nanmax / nanmedian average the sentinel
         # value into surrounding valid cells and produce overview pixels
@@ -3158,8 +3185,18 @@ def _block_reduce_2d_gpu(arr2d, method, nodata=None):
             info = np.iinfo(arr2d.dtype)
             if info.min <= nodata_int <= info.max:
                 sentinel = np.dtype(str(arr2d.dtype)).type(nodata_int)
-                int_blocks = cropped.reshape(oh, 2, ow, 2)
-                mask = int_blocks == sentinel
+                if need_pad:
+                    # Compute the integer-width sentinel mask before
+                    # padding, so a 64-bit sentinel near INT64_MAX (not
+                    # exactly representable in float64) still matches.
+                    # The padded edge already holds NaN in ``blocks`` and
+                    # stays False here so the where below leaves it alone.
+                    int_mask = cupy.zeros((h2, w2), dtype=bool)
+                    int_mask[:h, :w] = arr2d == sentinel
+                    mask = int_mask.reshape(oh, 2, ow, 2)
+                else:
+                    int_blocks = arr2d.reshape(oh, 2, ow, 2)
+                    mask = int_blocks == sentinel
                 if bool(mask.any().item()):
                     blocks = cupy.where(
                         mask, cupy.float64('nan'), blocks)

diff --git a/xrspatial/geotiff/_writer.py b/xrspatial/geotiff/_writer.py
@@ -432,9 +432,44 @@ def _resolve_int_nodata(dtype, nodata):
     return None
 
 
+def _replicate_pad_2d(src, h2, w2):
+    """Pad ``src`` to ``(h2, w2)`` by replicating its trailing row/col.
+
+    Used by the cubic overview branch to extend an odd-sized input to
+    even dimensions so a 0.5-zoom lands on the GDAL ceil shape and the
+    spline kernel sees a valid neighbourhood at the trailing edge. The
+    helper is dtype-preserving (no float promotion) so integer inputs
+    stay integer through the pad. Returns ``src`` unchanged when no
+    padding is needed.
+    """
+    h, w = src.shape
+    if (h, w) == (h2, w2):
+        return src
+    padded = np.empty((h2, w2), dtype=src.dtype)
+    padded[:h, :w] = src
+    if h2 != h:
+        padded[h:, :w] = src[h - 1:h, :]
+    if w2 != w:
+        padded[:h, w:] = src[:, w - 1:w]
+    if h2 != h and w2 != w:
+        padded[h:, w:] = src[h - 1, w - 1]
+    return padded
+
+
 def _block_reduce_2d(arr2d, method, nodata=None):
     """2x block-reduce a single 2D plane using *method*.
 
+    Output dimensions follow GDAL's ceil semantics (5x5 -> 3x3): every
+    base pixel contributes to the overview. Inputs with an odd height
+    or width are padded with NaN (or float-promoted and NaN-padded for
+    integer dtypes) along the trailing edge so the 2x2 block reshape
+    still works; the nan-aware aggregations then exclude the padded
+    cells from the residual blocks. ``nearest`` strides directly,
+    ``cubic`` replicates the last row/col so the spline kernel sees a
+    valid neighbourhood at the trailing edge, and ``mode`` ignores
+    NaN-padded cells when counting. Even-sized inputs take the
+    existing fast path with no padding overhead (issue #2105).
+
     When ``nodata`` is supplied, cells that equal the sentinel are
     treated as NaN during the reduction so the ``nan*`` aggregation
     routines correctly skip them. The float branch keeps any all-
@@ -458,14 +493,23 @@ def _block_reduce_2d(arr2d, method, nodata=None):
     well-defined (issue #1975).
     """
     h, w = arr2d.shape
-    h2 = (h // 2) * 2
-    w2 = (w // 2) * 2
-    cropped = arr2d[:h2, :w2]
-    oh, ow = h2 // 2, w2 // 2
+    # GDAL-style ceil semantics keep the trailing row/col when the input
+    # is odd-sized. Floor-and-crop drops those pixels and produces an
+    # overview that no longer covers the source extent (issue #2105).
+    oh, ow = (h + 1) // 2, (w + 1) // 2
+    h2, w2 = 2 * oh, 2 * ow
+    need_pad = (h2, w2) != (h, w)
 
     if method == 'nearest':
-        # Top-left pixel of each 2x2 block
-        return cropped[::2, ::2].copy()
+        # Top-left pixel of each 2x2 block. For odd-sized inputs the
+        # trailing 1x2 / 2x1 / 1x1 residual block's top-left is still
+        # the source pixel at (2*i, 2*j), so direct striding gives the
+        # correct ceil-shape output without padding. The trailing
+        # ``.copy()`` materialises the strided view so the caller owns
+        # a contiguous buffer (downstream nodata-mask rewrites mutate
+        # the result in place); without it those writes would also
+        # touch ``arr2d``.
+        return arr2d[::2, ::2].copy()
 
     if method == 'cubic':
         try:
@@ -495,6 +539,17 @@ def _block_reduce_2d(arr2d, method, nodata=None):
         # NaN can carry through the spline; the result is rewritten
         # back to the sentinel and rounded before casting to the source
         # integer dtype (issue #1975, integer mirror of #1623).
+        #
+        # Odd-sized inputs are first edge-replicated to even dimensions
+        # so the 0.5 zoom factor lands on the GDAL ceil shape and the
+        # spline sees a valid neighbourhood at the trailing edge.
+        # Replicating sentinel values is fine: they are masked to NaN
+        # in the steps below alongside any in-bounds sentinel, and the
+        # post-zoom NaN->sentinel restore stamps the trailing overview
+        # pixels back to the sentinel as expected (issue #2105). The
+        # ``_replicate_pad_2d`` helper is module-level so the cubic
+        # path does not redefine a closure on every call.
+
         if (nodata is not None
                 and arr2d.dtype.kind == 'f'
                 and not np.isnan(nodata)):
@@ -505,7 +560,9 @@ def _block_reduce_2d(arr2d, method, nodata=None):
             if sentinel is not None:
                 mask = arr2d == sentinel
                 if mask.any():
-                    masked = np.where(mask, np.float64('nan'), arr2d)
+                    src = _replicate_pad_2d(arr2d, h2, w2)
+                    src_mask = src == sentinel
+                    masked = np.where(src_mask, np.float64('nan'), src)
                     with warnings.catch_warnings():
                         warnings.simplefilter('ignore', RuntimeWarning)
                         result = zoom(masked, 0.5, order=3,
@@ -515,13 +572,18 @@ def _block_reduce_2d(arr2d, method, nodata=None):
                         result = result.copy()
                         result[nan_mask] = float(nodata)
                     return result.astype(arr2d.dtype)
+                # No sentinel pixels present; fall through to the
+                # default cubic path below so the spline runs without
+                # the prefilter=False NaN-safety branch.
         nodata_int = _resolve_int_nodata(arr2d.dtype, nodata)
         if nodata_int is not None:
             sentinel = arr2d.dtype.type(nodata_int)
             mask = arr2d == sentinel
             if mask.any():
-                masked = np.where(mask, np.float64('nan'),
-                                  arr2d.astype(np.float64))
+                src = _replicate_pad_2d(arr2d, h2, w2)
+                src_mask = src == sentinel
+                masked = np.where(src_mask, np.float64('nan'),
+                                  src.astype(np.float64))
                 with warnings.catch_warnings():
                     warnings.simplefilter('ignore', RuntimeWarning)
                     result = zoom(masked, 0.5, order=3,
@@ -531,25 +593,60 @@ def _block_reduce_2d(arr2d, method, nodata=None):
                     result = np.where(nan_mask, float(nodata_int),
                                       result)
                 return np.round(result).astype(arr2d.dtype)
-        return zoom(arr2d, 0.5, order=3).astype(arr2d.dtype)
+            # No sentinel pixels present in this integer array; fall
+            # through to the default cubic path below.
+        src = _replicate_pad_2d(arr2d, h2, w2)
+        return zoom(src, 0.5, order=3).astype(arr2d.dtype)
 
     if method == 'mode':
         # Most-common value per 2x2 block (useful for classified rasters).
         # Vectorized: sort each 4-cell block, then for each position count
         # how many cells equal it. argmax picks the leftmost max-count
         # position, which (post-sort) is the smallest tied value, matching
         # the prior np.unique+argmax tie-break behavior ("lowest wins").
-        blocks = cropped.reshape(oh, 2, ow, 2).transpose(0, 2, 1, 3).reshape(oh, ow, 4)
+        #
+        # Odd-sized inputs are float-promoted and NaN-padded so the
+        # residual block has at least one valid cell plus NaN cells that
+        # never match themselves (NaN != NaN). Counts for the NaN cells
+        # stay at zero, so argmax picks a valid cell, and numpy's sort
+        # places NaN at the end so the "lowest valid wins" tie-break
+        # still holds (issue #2105). The result is cast back to the
+        # source dtype.
+        if need_pad:
+            padded = np.full((h2, w2), np.nan, dtype=np.float64)
+            padded[:h, :w] = arr2d
+            blocks = padded.reshape(oh, 2, ow, 2).transpose(0, 2, 1, 3).reshape(oh, ow, 4)
+            srt = np.sort(blocks, axis=-1)
+            counts = np.zeros(srt.shape, dtype=np.int8)
+            for i in range(4):
+                counts[..., i] = np.sum(srt == srt[..., i:i + 1], axis=-1)
+            pick = np.argmax(counts, axis=-1)
+            picked = np.take_along_axis(srt, pick[..., None], axis=-1).squeeze(-1)
+            if arr2d.dtype.kind in ('i', 'u'):
+                return np.round(picked).astype(arr2d.dtype)
+            return picked.astype(arr2d.dtype)
+        blocks = arr2d.reshape(oh, 2, ow, 2).transpose(0, 2, 1, 3).reshape(oh, ow, 4)
         srt = np.sort(blocks, axis=-1)
         counts = np.empty_like(srt, dtype=np.int8)
         for i in range(4):
             counts[..., i] = np.sum(srt == srt[..., i:i + 1], axis=-1)
         pick = np.argmax(counts, axis=-1)
         return np.take_along_axis(srt, pick[..., None], axis=-1).squeeze(-1)
 
-    # Block reshape for mean/min/max/median
+    # Block reshape for mean/min/max/median. Odd-sized inputs are padded
+    # with NaN along the trailing edge so the reshape works on every
+    # input size; the padded cells are naturally excluded by the
+    # nan-aware aggregations below (issue #2105). The padded buffer
+    # keeps the source's float dtype (float32 inputs stay float32) so
+    # an odd-shape read does not pay a 2x intermediate-memory cost for
+    # a silent promote to float64.
     if arr2d.dtype.kind == 'f':
-        blocks = cropped.reshape(oh, 2, ow, 2)
+        if need_pad:
+            padded = np.full((h2, w2), np.nan, dtype=arr2d.dtype)
+            padded[:h, :w] = arr2d
+            blocks = padded.reshape(oh, 2, ow, 2)
+        else:
+            blocks = arr2d.reshape(oh, 2, ow, 2)
         # When a sentinel was used in place of NaN by an upstream
         # NaN-to-sentinel rewrite, mask it back to NaN here so nanmean /
         # nanmin / nanmax / nanmedian honour the missing-data semantic.
@@ -569,28 +666,48 @@ def _block_reduce_2d(arr2d, method, nodata=None):
                     # array so the caller's input is not mutated.
                     blocks = np.where(mask, np.float64('nan'), blocks)
     else:
-        blocks = cropped.astype(np.float64).reshape(oh, 2, ow, 2)
-        # Integer rasters with a sentinel need the same NaN-mask the float
-        # branch above applies: without it, nanmean / nanmin / nanmax /
-        # nanmedian average the sentinel value into surrounding valid
-        # cells and produce overview pixels that are neither the sentinel
-        # nor any real measurement. The read-side int-to-NaN mask in
-        # ``open_geotiff`` only catches exact sentinel hits, so the
-        # poisoned values survive as silent garbage at every zoom level
-        # above 0. Gate on the sentinel being representable in the
-        # source integer dtype (mirrors ``_int_nodata_in_range`` in
-        # ``_reader.py``) so an out-of-range sentinel pair like
-        # ``uint16`` + ``GDAL_NODATA="-9999"`` stays a no-op rather than
-        # tripping ``OverflowError`` on the dtype cast.
+        if need_pad:
+            blocks = np.full((h2, w2), np.nan, dtype=np.float64)
+            blocks[:h, :w] = arr2d
+            blocks = blocks.reshape(oh, 2, ow, 2)
+        else:
+            blocks = arr2d.astype(np.float64).reshape(oh, 2, ow, 2)
+        # Integer rasters with a sentinel still need the NaN-mask the
+        # float branch above applies. The sentinel-equal cells in the
+        # original integer array are computed at the dtype's native
+        # width below (so a 64-bit sentinel near INT64_MAX still
+        # matches, which a float-cast comparison would miss) and the
+        # mask is then broadcast into the float64 ``blocks`` view.
+        # Without this, nanmean / nanmin / nanmax / nanmedian average
+        # the sentinel value into surrounding valid cells and produce
+        # overview pixels that the read side cannot remap because they
+        # no longer equal the sentinel. Gate on the sentinel being
+        # representable in the source integer dtype (mirrors
+        # ``_int_nodata_in_range`` in ``_reader.py``) so an out-of-
+        # range sentinel pair like ``uint16`` + ``GDAL_NODATA=-9999``
+        # stays a no-op rather than tripping ``OverflowError`` on the
+        # dtype cast.
         nodata_int = _resolve_int_nodata(arr2d.dtype, nodata)
         if nodata_int is not None:
             sentinel = arr2d.dtype.type(nodata_int)
-            # Compare against the original integer block view so the
-            # equality runs at the integer's native width (avoids any
-            # float-cast rounding on adjacent values). The boolean
-            # mask broadcasts into the float64 block layout below.
-            int_blocks = cropped.reshape(oh, 2, ow, 2)
-            mask = int_blocks == sentinel
+            if need_pad:
+                # Compute the sentinel mask at the integer's native width
+                # before padding, so a 64-bit sentinel near INT64_MAX (which
+                # is not exactly representable in float64) still matches.
+                # The mask is then padded with False to the same (h2, w2)
+                # shape as the float-promoted blocks view, so the residual
+                # padded cells (already NaN in ``blocks``) are not also
+                # rewritten via the where below.
+                int_mask = np.zeros((h2, w2), dtype=bool)
+                int_mask[:h, :w] = arr2d == sentinel
+                mask = int_mask.reshape(oh, 2, ow, 2)
+            else:
+                # Compare against the original integer block view so the
+                # equality runs at the integer's native width (avoids any
+                # float-cast rounding on adjacent values). The boolean
+                # mask broadcasts into the float64 block layout below.
+                int_blocks = arr2d.reshape(oh, 2, ow, 2)
+                mask = int_blocks == sentinel
             if mask.any():
                 blocks = np.where(mask, np.float64('nan'), blocks)