Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
67 changes: 52 additions & 15 deletions xrspatial/geotiff/_gpu_decode.py
Original file line number Diff line number Diff line change
Expand Up @@ -3090,26 +3090,37 @@ def gpu_compress_tiles(d_image, tile_width, tile_height,
def _block_reduce_2d_gpu(arr2d, method, nodata=None):
"""2x block-reduce a single 2D CuPy plane using *method*.

Output dimensions follow GDAL's ceil semantics (5x5 -> 3x3) so the
overview pyramid covers the full source extent for odd-sized rasters.
Odd inputs are NaN-padded along the trailing edge (float-promoted for
integer dtypes) so the 2x2 block reshape works and the residual block
is reduced via the same nan-aware aggregations (issue #2105). Mirrors
the CPU helper :func:`xrspatial.geotiff._writer._block_reduce_2d` so
the two backends produce identical overviews.

When ``nodata`` is supplied and ``arr2d`` is a float dtype, cells that
equal the sentinel are masked back to NaN before the reduction so the
``cupy.nan*`` aggregation routines correctly skip them. Mirrors the
CPU helper :func:`xrspatial.geotiff._writer._block_reduce_2d` so the
two backends produce identical overviews when ``nodata`` is set.
``cupy.nan*`` aggregation routines correctly skip them.
"""
import cupy
import numpy as np

h, w = arr2d.shape
h2 = (h // 2) * 2
w2 = (w // 2) * 2
cropped = arr2d[:h2, :w2]
oh, ow = h2 // 2, w2 // 2
# Ceil semantics match GDAL and the CPU mirror in _writer.
oh, ow = (h + 1) // 2, (w + 1) // 2
h2, w2 = 2 * oh, 2 * ow
need_pad = (h2, w2) != (h, w)

if method == 'nearest':
return cropped[::2, ::2].copy()
# Top-left pixel of each 2x2 block; direct stride keeps the
# trailing row/col for odd-sized inputs (issue #2105). The
# ``.copy()`` materialises a contiguous device buffer so
# downstream nodata-mask rewrites don't touch ``arr2d``.
return arr2d[::2, ::2].copy()

if method == 'mode':
# Mode is expensive on GPU; fall back to CPU
# Mode is expensive on GPU; fall back to CPU. The CPU helper
# now handles odd-sized inputs natively.
cpu_arr = arr2d.get()
from ._writer import _block_reduce_2d
cpu_result = _block_reduce_2d(cpu_arr, 'mode', nodata=nodata)
Expand All @@ -3120,15 +3131,26 @@ def _block_reduce_2d_gpu(arr2d, method, nodata=None):
# factors with the same prefilter=False NaN-safety the CPU
# helper uses for issue #1623. Fall back to CPU so cubic on
# the GPU writer path produces the same overview bytes as the
# CPU writer and so the sentinel handling matches.
# CPU writer and so the sentinel handling matches. The CPU
# helper handles odd-sized inputs via edge-replicate padding.
cpu_arr = arr2d.get()
from ._writer import _block_reduce_2d
cpu_result = _block_reduce_2d(cpu_arr, 'cubic', nodata=nodata)
return cupy.asarray(cpu_result)

# Block reshape for mean/min/max/median
# Block reshape for mean/min/max/median. Odd-sized inputs get a
# NaN-padded trailing edge so the residual block reduces correctly;
# the nan-aware reductions exclude the padded cells. The padded
# buffer keeps the source float dtype (float32 stays float32) so an
# odd-shape GPU read does not pay a 2x device-memory cost for a
# silent promote to float64.
if arr2d.dtype.kind == 'f':
blocks = cropped.reshape(oh, 2, ow, 2)
if need_pad:
padded = cupy.full((h2, w2), np.nan, dtype=arr2d.dtype)
padded[:h, :w] = arr2d
blocks = padded.reshape(oh, 2, ow, 2)
else:
blocks = arr2d.reshape(oh, 2, ow, 2)
# Mask the sentinel back to NaN so cupy.nanmean and friends
# honour it as missing-data (issue #1613). Match the upstream
# NaN->sentinel rewrite gate so ``nodata=+/-inf`` is masked here.
Expand All @@ -3143,7 +3165,12 @@ def _block_reduce_2d_gpu(arr2d, method, nodata=None):
blocks = cupy.where(
mask, cupy.float64('nan'), blocks)
else:
blocks = cropped.astype(cupy.float64).reshape(oh, 2, ow, 2)
if need_pad:
blocks = cupy.full((h2, w2), cupy.float64('nan'), dtype=cupy.float64)
blocks[:h, :w] = arr2d
blocks = blocks.reshape(oh, 2, ow, 2)
else:
blocks = arr2d.astype(cupy.float64).reshape(oh, 2, ow, 2)
# Integer GPU mirror of the CPU sentinel-mask: without it,
# cupy.nanmean / nanmin / nanmax / nanmedian average the sentinel
# value into surrounding valid cells and produce overview pixels
Expand All @@ -3158,8 +3185,18 @@ def _block_reduce_2d_gpu(arr2d, method, nodata=None):
info = np.iinfo(arr2d.dtype)
if info.min <= nodata_int <= info.max:
sentinel = np.dtype(str(arr2d.dtype)).type(nodata_int)
int_blocks = cropped.reshape(oh, 2, ow, 2)
mask = int_blocks == sentinel
if need_pad:
# Compute the integer-width sentinel mask before
# padding, so a 64-bit sentinel near INT64_MAX (not
# exactly representable in float64) still matches.
# The padded edge already holds NaN in ``blocks`` and
# stays False here so the where below leaves it alone.
int_mask = cupy.zeros((h2, w2), dtype=bool)
int_mask[:h, :w] = arr2d == sentinel
mask = int_mask.reshape(oh, 2, ow, 2)
else:
int_blocks = arr2d.reshape(oh, 2, ow, 2)
mask = int_blocks == sentinel
if bool(mask.any().item()):
blocks = cupy.where(
mask, cupy.float64('nan'), blocks)
Expand Down
181 changes: 149 additions & 32 deletions xrspatial/geotiff/_writer.py
Original file line number Diff line number Diff line change
Expand Up @@ -432,9 +432,44 @@ def _resolve_int_nodata(dtype, nodata):
return None


def _replicate_pad_2d(src, h2, w2):
"""Pad ``src`` to ``(h2, w2)`` by replicating its trailing row/col.

Used by the cubic overview branch to extend an odd-sized input to
even dimensions so a 0.5-zoom lands on the GDAL ceil shape and the
spline kernel sees a valid neighbourhood at the trailing edge. The
helper is dtype-preserving (no float promotion) so integer inputs
stay integer through the pad. Returns ``src`` unchanged when no
padding is needed.
"""
h, w = src.shape
if (h, w) == (h2, w2):
return src
padded = np.empty((h2, w2), dtype=src.dtype)
padded[:h, :w] = src
if h2 != h:
padded[h:, :w] = src[h - 1:h, :]
if w2 != w:
padded[:h, w:] = src[:, w - 1:w]
if h2 != h and w2 != w:
padded[h:, w:] = src[h - 1, w - 1]
return padded


def _block_reduce_2d(arr2d, method, nodata=None):
"""2x block-reduce a single 2D plane using *method*.

Output dimensions follow GDAL's ceil semantics (5x5 -> 3x3): every
base pixel contributes to the overview. Inputs with an odd height
or width are padded with NaN (or float-promoted and NaN-padded for
integer dtypes) along the trailing edge so the 2x2 block reshape
still works; the nan-aware aggregations then exclude the padded
cells from the residual blocks. ``nearest`` strides directly,
``cubic`` replicates the last row/col so the spline kernel sees a
valid neighbourhood at the trailing edge, and ``mode`` ignores
NaN-padded cells when counting. Even-sized inputs take the
existing fast path with no padding overhead (issue #2105).

When ``nodata`` is supplied, cells that equal the sentinel are
treated as NaN during the reduction so the ``nan*`` aggregation
routines correctly skip them. The float branch keeps any all-
Expand All @@ -458,14 +493,23 @@ def _block_reduce_2d(arr2d, method, nodata=None):
well-defined (issue #1975).
"""
h, w = arr2d.shape
h2 = (h // 2) * 2
w2 = (w // 2) * 2
cropped = arr2d[:h2, :w2]
oh, ow = h2 // 2, w2 // 2
# GDAL-style ceil semantics keep the trailing row/col when the input
# is odd-sized. Floor-and-crop drops those pixels and produces an
# overview that no longer covers the source extent (issue #2105).
oh, ow = (h + 1) // 2, (w + 1) // 2
h2, w2 = 2 * oh, 2 * ow
need_pad = (h2, w2) != (h, w)

if method == 'nearest':
# Top-left pixel of each 2x2 block
return cropped[::2, ::2].copy()
# Top-left pixel of each 2x2 block. For odd-sized inputs the
# trailing 1x2 / 2x1 / 1x1 residual block's top-left is still
# the source pixel at (2*i, 2*j), so direct striding gives the
# correct ceil-shape output without padding. The trailing
# ``.copy()`` materialises the strided view so the caller owns
# a contiguous buffer (downstream nodata-mask rewrites mutate
# the result in place); without it those writes would also
# touch ``arr2d``.
return arr2d[::2, ::2].copy()

if method == 'cubic':
try:
Expand Down Expand Up @@ -495,6 +539,17 @@ def _block_reduce_2d(arr2d, method, nodata=None):
# NaN can carry through the spline; the result is rewritten
# back to the sentinel and rounded before casting to the source
# integer dtype (issue #1975, integer mirror of #1623).
#
# Odd-sized inputs are first edge-replicated to even dimensions
# so the 0.5 zoom factor lands on the GDAL ceil shape and the
# spline sees a valid neighbourhood at the trailing edge.
# Replicating sentinel values is fine: they are masked to NaN
# in the steps below alongside any in-bounds sentinel, and the
# post-zoom NaN->sentinel restore stamps the trailing overview
# pixels back to the sentinel as expected (issue #2105). The
# ``_replicate_pad_2d`` helper is module-level so the cubic
# path does not redefine a closure on every call.

if (nodata is not None
and arr2d.dtype.kind == 'f'
and not np.isnan(nodata)):
Expand All @@ -505,7 +560,9 @@ def _block_reduce_2d(arr2d, method, nodata=None):
if sentinel is not None:
mask = arr2d == sentinel
if mask.any():
masked = np.where(mask, np.float64('nan'), arr2d)
src = _replicate_pad_2d(arr2d, h2, w2)
src_mask = src == sentinel
masked = np.where(src_mask, np.float64('nan'), src)
with warnings.catch_warnings():
warnings.simplefilter('ignore', RuntimeWarning)
result = zoom(masked, 0.5, order=3,
Expand All @@ -515,13 +572,18 @@ def _block_reduce_2d(arr2d, method, nodata=None):
result = result.copy()
result[nan_mask] = float(nodata)
return result.astype(arr2d.dtype)
# No sentinel pixels present; fall through to the
# default cubic path below so the spline runs without
# the prefilter=False NaN-safety branch.
nodata_int = _resolve_int_nodata(arr2d.dtype, nodata)
if nodata_int is not None:
sentinel = arr2d.dtype.type(nodata_int)
mask = arr2d == sentinel
if mask.any():
masked = np.where(mask, np.float64('nan'),
arr2d.astype(np.float64))
src = _replicate_pad_2d(arr2d, h2, w2)
src_mask = src == sentinel
masked = np.where(src_mask, np.float64('nan'),
src.astype(np.float64))
with warnings.catch_warnings():
warnings.simplefilter('ignore', RuntimeWarning)
result = zoom(masked, 0.5, order=3,
Expand All @@ -531,25 +593,60 @@ def _block_reduce_2d(arr2d, method, nodata=None):
result = np.where(nan_mask, float(nodata_int),
result)
return np.round(result).astype(arr2d.dtype)
return zoom(arr2d, 0.5, order=3).astype(arr2d.dtype)
# No sentinel pixels present in this integer array; fall
# through to the default cubic path below.
src = _replicate_pad_2d(arr2d, h2, w2)
return zoom(src, 0.5, order=3).astype(arr2d.dtype)

if method == 'mode':
# Most-common value per 2x2 block (useful for classified rasters).
# Vectorized: sort each 4-cell block, then for each position count
# how many cells equal it. argmax picks the leftmost max-count
# position, which (post-sort) is the smallest tied value, matching
# the prior np.unique+argmax tie-break behavior ("lowest wins").
blocks = cropped.reshape(oh, 2, ow, 2).transpose(0, 2, 1, 3).reshape(oh, ow, 4)
#
# Odd-sized inputs are float-promoted and NaN-padded so the
# residual block has at least one valid cell plus NaN cells that
# never match themselves (NaN != NaN). Counts for the NaN cells
# stay at zero, so argmax picks a valid cell, and numpy's sort
# places NaN at the end so the "lowest valid wins" tie-break
# still holds (issue #2105). The result is cast back to the
# source dtype.
if need_pad:
padded = np.full((h2, w2), np.nan, dtype=np.float64)
padded[:h, :w] = arr2d
blocks = padded.reshape(oh, 2, ow, 2).transpose(0, 2, 1, 3).reshape(oh, ow, 4)
srt = np.sort(blocks, axis=-1)
counts = np.zeros(srt.shape, dtype=np.int8)
for i in range(4):
counts[..., i] = np.sum(srt == srt[..., i:i + 1], axis=-1)
pick = np.argmax(counts, axis=-1)
picked = np.take_along_axis(srt, pick[..., None], axis=-1).squeeze(-1)
if arr2d.dtype.kind in ('i', 'u'):
return np.round(picked).astype(arr2d.dtype)
return picked.astype(arr2d.dtype)
blocks = arr2d.reshape(oh, 2, ow, 2).transpose(0, 2, 1, 3).reshape(oh, ow, 4)
srt = np.sort(blocks, axis=-1)
counts = np.empty_like(srt, dtype=np.int8)
for i in range(4):
counts[..., i] = np.sum(srt == srt[..., i:i + 1], axis=-1)
pick = np.argmax(counts, axis=-1)
return np.take_along_axis(srt, pick[..., None], axis=-1).squeeze(-1)

# Block reshape for mean/min/max/median
# Block reshape for mean/min/max/median. Odd-sized inputs are padded
# with NaN along the trailing edge so the reshape works on every
# input size; the padded cells are naturally excluded by the
# nan-aware aggregations below (issue #2105). The padded buffer
# keeps the source's float dtype (float32 inputs stay float32) so
# an odd-shape read does not pay a 2x intermediate-memory cost for
# a silent promote to float64.
if arr2d.dtype.kind == 'f':
blocks = cropped.reshape(oh, 2, ow, 2)
if need_pad:
padded = np.full((h2, w2), np.nan, dtype=arr2d.dtype)
padded[:h, :w] = arr2d
blocks = padded.reshape(oh, 2, ow, 2)
else:
blocks = arr2d.reshape(oh, 2, ow, 2)
# When a sentinel was used in place of NaN by an upstream
# NaN-to-sentinel rewrite, mask it back to NaN here so nanmean /
# nanmin / nanmax / nanmedian honour the missing-data semantic.
Expand All @@ -569,28 +666,48 @@ def _block_reduce_2d(arr2d, method, nodata=None):
# array so the caller's input is not mutated.
blocks = np.where(mask, np.float64('nan'), blocks)
else:
blocks = cropped.astype(np.float64).reshape(oh, 2, ow, 2)
# Integer rasters with a sentinel need the same NaN-mask the float
# branch above applies: without it, nanmean / nanmin / nanmax /
# nanmedian average the sentinel value into surrounding valid
# cells and produce overview pixels that are neither the sentinel
# nor any real measurement. The read-side int-to-NaN mask in
# ``open_geotiff`` only catches exact sentinel hits, so the
# poisoned values survive as silent garbage at every zoom level
# above 0. Gate on the sentinel being representable in the
# source integer dtype (mirrors ``_int_nodata_in_range`` in
# ``_reader.py``) so an out-of-range sentinel pair like
# ``uint16`` + ``GDAL_NODATA="-9999"`` stays a no-op rather than
# tripping ``OverflowError`` on the dtype cast.
if need_pad:
blocks = np.full((h2, w2), np.nan, dtype=np.float64)
blocks[:h, :w] = arr2d
blocks = blocks.reshape(oh, 2, ow, 2)
else:
blocks = arr2d.astype(np.float64).reshape(oh, 2, ow, 2)
# Integer rasters with a sentinel still need the NaN-mask the
# float branch above applies. The sentinel-equal cells in the
# original integer array are computed at the dtype's native
# width below (so a 64-bit sentinel near INT64_MAX still
# matches, which a float-cast comparison would miss) and the
# mask is then broadcast into the float64 ``blocks`` view.
# Without this, nanmean / nanmin / nanmax / nanmedian average
# the sentinel value into surrounding valid cells and produce
# overview pixels that the read side cannot remap because they
# no longer equal the sentinel. Gate on the sentinel being
# representable in the source integer dtype (mirrors
# ``_int_nodata_in_range`` in ``_reader.py``) so an out-of-
# range sentinel pair like ``uint16`` + ``GDAL_NODATA=-9999``
# stays a no-op rather than tripping ``OverflowError`` on the
# dtype cast.
nodata_int = _resolve_int_nodata(arr2d.dtype, nodata)
if nodata_int is not None:
sentinel = arr2d.dtype.type(nodata_int)
# Compare against the original integer block view so the
# equality runs at the integer's native width (avoids any
# float-cast rounding on adjacent values). The boolean
# mask broadcasts into the float64 block layout below.
int_blocks = cropped.reshape(oh, 2, ow, 2)
mask = int_blocks == sentinel
if need_pad:
# Compute the sentinel mask at the integer's native width
# before padding, so a 64-bit sentinel near INT64_MAX (which
# is not exactly representable in float64) still matches.
# The mask is then padded with False to the same (h2, w2)
# shape as the float-promoted blocks view, so the residual
# padded cells (already NaN in ``blocks``) are not also
# rewritten via the where below.
int_mask = np.zeros((h2, w2), dtype=bool)
int_mask[:h, :w] = arr2d == sentinel
mask = int_mask.reshape(oh, 2, ow, 2)
else:
# Compare against the original integer block view so the
# equality runs at the integer's native width (avoids any
# float-cast rounding on adjacent values). The boolean
# mask broadcasts into the float64 block layout below.
int_blocks = arr2d.reshape(oh, 2, ow, 2)
mask = int_blocks == sentinel
if mask.any():
blocks = np.where(mask, np.float64('nan'), blocks)

Expand Down
Loading
Loading