From 1efc51a119462214ebc829a473ba22f97827ef2f Mon Sep 17 00:00:00 2001 From: Brendan Collins Date: Mon, 4 May 2026 12:52:32 -0700 Subject: [PATCH 1/3] Preserve input float dtype through resample() (#1467) resample() now picks its working dtype from the input: float64 in, float64 out; float32 in, float32 out; integer in, float32 out (since NaN-sentinel resampling needs a float type). Previously every input was cast to float64 internally and every output forced to float32, which silently dropped precision for float64 rasters. The four backend runners and their dask block helpers thread the working/output dtypes through. _maybe_astype skips the copy when the array already has the desired dtype. The aggregation kernels (numba @ngjit) keep their hardcoded float64 working buffer -- they accumulate in float64 and the runner casts the result down at the end, which is cheap and matches the existing behaviour for float32 aggregation accuracy. --- xrspatial/resample.py | 139 ++++++++++++++++++++++--------- xrspatial/tests/test_resample.py | 126 ++++++++++++++++++++++++++++ 2 files changed, 224 insertions(+), 41 deletions(-) diff --git a/xrspatial/resample.py b/xrspatial/resample.py index 5eb93978..07f6d9b3 100644 --- a/xrspatial/resample.py +++ b/xrspatial/resample.py @@ -44,11 +44,42 @@ _INTERP_DEPTH = {'nearest': 1, 'bilinear': 1, 'cubic': 10} # Approximate working-set size per output cell for the eager backends: -# one float64 working buffer (8 B) plus a float32 output cell (4 B). -# scipy.ndimage.map_coordinates also allocates a temporary of the same -# size during higher-order spline evaluation; the 0.5 * available bound -# below leaves room for that. -_BYTES_PER_OUTPUT_CELL = 12 +# one float64 working buffer (8 B) plus a float64 output cell (8 B) in +# the worst case. scipy.ndimage.map_coordinates also allocates a +# temporary of the same size during higher-order spline evaluation; the +# 0.5 * available bound below leaves room for that. +_BYTES_PER_OUTPUT_CELL = 16 + + +# -- Working / output dtype selection ---------------------------------------- + +def _working_dtype(input_dtype): + """Pick the working float dtype for resampling. + + float64 inputs stay in float64 to preserve precision; everything else + (smaller floats, integers, bool) uses float32. + """ + dt = np.dtype(input_dtype) + if dt.kind == 'f' and dt.itemsize >= 8: + return np.float64 + return np.float32 + + +def _output_dtype(input_dtype): + """Pick the output dtype for resampling. + + Float inputs keep their dtype. Integer / bool inputs return float32 + because NaN-sentinel resampling needs a float type. + """ + dt = np.dtype(input_dtype) + if dt.kind == 'f': + return dt.type + return np.float32 + + +def _maybe_astype(arr, dtype): + """astype copy that no-ops when already at the requested dtype.""" + return arr if arr.dtype == np.dtype(dtype) else arr.astype(dtype) # -- Memory guard ------------------------------------------------------------ @@ -388,13 +419,13 @@ def _agg_mode(data, out_h, out_w): def _interp_block_np(block, global_in_h, global_in_w, global_out_h, global_out_w, cum_in_y, cum_in_x, cum_out_y, cum_out_x, - depth, order, block_info=None): + depth, order, work_dtype, out_dtype, block_info=None): """Interpolate one (possibly overlapped) numpy block.""" yi, xi = block_info[0]['chunk-location'] target_h = int(cum_out_y[yi + 1] - cum_out_y[yi]) target_w = int(cum_out_x[xi + 1] - cum_out_x[xi]) - block = block.astype(np.float64) + block = _maybe_astype(block, work_dtype) # Global output pixel indices for this chunk oy = np.arange(cum_out_y[yi], cum_out_y[yi + 1], dtype=np.float64) @@ -417,19 +448,19 @@ def _interp_block_np(block, global_in_h, global_in_w, result = _scipy_map_coords(block, coords, order=order, mode='nearest') else: filled = np.where(mask, 0.0, block) - weights = (~mask).astype(np.float64) + weights = (~mask).astype(block.dtype) z_data = _scipy_map_coords(filled, coords, order=order, mode='nearest') z_wt = _scipy_map_coords(weights, coords, order=order, mode='nearest') result = np.where(z_wt > 0.01, z_data / np.maximum(z_wt, 1e-10), np.nan) - return result.reshape(target_h, target_w).astype(np.float32) + return _maybe_astype(result.reshape(target_h, target_w), out_dtype) def _interp_block_cupy(block, global_in_h, global_in_w, global_out_h, global_out_w, cum_in_y, cum_in_x, cum_out_y, cum_out_x, - depth, order, block_info=None): + depth, order, work_dtype, out_dtype, block_info=None): """CuPy variant of :func:`_interp_block_np`.""" from cupyx.scipy.ndimage import map_coordinates as _cupy_map_coords @@ -437,7 +468,8 @@ def _interp_block_cupy(block, global_in_h, global_in_w, target_h = int(cum_out_y[yi + 1] - cum_out_y[yi]) target_w = int(cum_out_x[xi + 1] - cum_out_x[xi]) - block = block.astype(cupy.float64) + if block.dtype != cupy.dtype(work_dtype): + block = block.astype(work_dtype) oy = cupy.arange(int(cum_out_y[yi]), int(cum_out_y[yi + 1]), dtype=cupy.float64) @@ -459,25 +491,30 @@ def _interp_block_cupy(block, global_in_h, global_in_w, result = _cupy_map_coords(block, coords, order=order, mode='nearest') else: filled = cupy.where(mask, 0.0, block) - weights = (~mask).astype(cupy.float64) + weights = (~mask).astype(block.dtype) z_data = _cupy_map_coords(filled, coords, order=order, mode='nearest') z_wt = _cupy_map_coords(weights, coords, order=order, mode='nearest') result = cupy.where(z_wt > 0.01, z_data / cupy.maximum(z_wt, 1e-10), cupy.nan) - return result.reshape(target_h, target_w).astype(cupy.float32) + result = result.reshape(target_h, target_w) + if result.dtype != cupy.dtype(out_dtype): + result = result.astype(out_dtype) + return result def _agg_block_np(block, method, global_in_h, global_in_w, global_out_h, global_out_w, cum_in_y, cum_in_x, cum_out_y, cum_out_x, - depth_y, depth_x, block_info=None): + depth_y, depth_x, out_dtype, block_info=None): """Block-aggregate one (possibly overlapped) numpy chunk.""" yi, xi = block_info[0]['chunk-location'] target_h = int(cum_out_y[yi + 1] - cum_out_y[yi]) target_w = int(cum_out_x[xi + 1] - cum_out_x[xi]) - block = block.astype(np.float64) + # _AGG_FUNCS kernels are @ngjit-compiled with hard-coded float64 + # working buffers; cast accordingly so numba dispatch matches. + block = _maybe_astype(block, np.float64) # The overlapped block starts depth pixels before the original chunk in_y0 = int(cum_in_y[yi]) - depth_y in_x0 = int(cum_in_x[xi]) - depth_x @@ -495,20 +532,20 @@ def _agg_block_np(block, method, global_in_h, global_in_w, sub = block[gy0:gy1, gx0:gx1] out[lo_y, lo_x] = func(sub, 1, 1)[0, 0] - return out.astype(np.float32) + return _maybe_astype(out, out_dtype) def _agg_block_cupy(block, method, global_in_h, global_in_w, global_out_h, global_out_w, cum_in_y, cum_in_x, cum_out_y, cum_out_x, - depth_y, depth_x, block_info=None): + depth_y, depth_x, out_dtype, block_info=None): """Block-aggregate one cupy chunk (falls back to CPU).""" cpu = cupy.asnumpy(block) result = _agg_block_np( cpu, method, global_in_h, global_in_w, global_out_h, global_out_w, cum_in_y, cum_in_x, cum_out_y, cum_out_x, - depth_y, depth_x, block_info=block_info, + depth_y, depth_x, out_dtype, block_info=block_info, ) return cupy.asarray(result) @@ -516,23 +553,30 @@ def _agg_block_cupy(block, method, global_in_h, global_in_w, # -- Per-backend runners ----------------------------------------------------- def _run_numpy(data, scale_y, scale_x, method): - data = data.astype(np.float64) + work_dt = _working_dtype(data.dtype) + out_dt = _output_dtype(data.dtype) + data = _maybe_astype(data, work_dt) out_h, out_w = _output_shape(*data.shape, scale_y, scale_x) if method in INTERP_METHODS: - return _nan_aware_interp_np(data, out_h, out_w, - INTERP_METHODS[method]).astype(np.float32) + result = _nan_aware_interp_np(data, out_h, out_w, + INTERP_METHODS[method]) + return _maybe_astype(result, out_dt) - return _AGG_FUNCS[method](data, out_h, out_w).astype(np.float32) + result = _AGG_FUNCS[method](data, out_h, out_w) + return _maybe_astype(result, out_dt) def _run_cupy(data, scale_y, scale_x, method): - data = data.astype(cupy.float64) + work_dt = _working_dtype(data.dtype) + out_dt = _output_dtype(data.dtype) + data = data if data.dtype == cupy.dtype(work_dt) else data.astype(work_dt) out_h, out_w = _output_shape(*data.shape, scale_y, scale_x) if method in INTERP_METHODS: - return _nan_aware_interp_cupy(data, out_h, out_w, - INTERP_METHODS[method]).astype(cupy.float32) + result = _nan_aware_interp_cupy(data, out_h, out_w, + INTERP_METHODS[method]) + return result if result.dtype == cupy.dtype(out_dt) else result.astype(out_dt) # Aggregate: GPU reshape+reduce for integer factors, CPU fallback otherwise fy, fx = data.shape[0] / out_h, data.shape[1] / out_w @@ -544,11 +588,12 @@ def _run_cupy(data, scale_y, scale_x, method): reducer = {'average': cupy.nanmean, 'min': cupy.nanmin, 'max': cupy.nanmax}[method] - return reducer(reshaped, axis=(1, 3)).astype(cupy.float32) + result = reducer(reshaped, axis=(1, 3)) + return result if result.dtype == cupy.dtype(out_dt) else result.astype(out_dt) cpu = cupy.asnumpy(data) return cupy.asarray( - _AGG_FUNCS[method](cpu, out_h, out_w).astype(np.float32) + _maybe_astype(_AGG_FUNCS[method](cpu, out_h, out_w), out_dt) ) @@ -581,8 +626,11 @@ def _ensure_min_chunksize(data, min_size): def _run_dask_numpy(data, scale_y, scale_x, method): - data = data.astype(np.float64) - meta = np.array((), dtype=np.float32) + work_dt = _working_dtype(data.dtype) + out_dt = _output_dtype(data.dtype) + if data.dtype != np.dtype(work_dt): + data = data.astype(work_dt) + meta = np.array((), dtype=out_dt) if method in INTERP_METHODS: order = INTERP_METHODS[method] @@ -616,9 +664,10 @@ def _run_dask_numpy(data, scale_y, scale_x, method): global_out_h=global_out_h, global_out_w=global_out_w, cum_in_y=cum_in_y, cum_in_x=cum_in_x, cum_out_y=cum_out_y, cum_out_x=cum_out_x, - depth=depth, order=order) + depth=depth, order=order, + work_dtype=work_dt, out_dtype=out_dt) return da.map_blocks(fn, src, chunks=(out_y, out_x), - dtype=np.float32, meta=meta) + dtype=out_dt, meta=meta) import math min_size = max(_min_chunksize_for_scale(scale_y), @@ -658,14 +707,18 @@ def _run_dask_numpy(data, scale_y, scale_x, method): global_out_h=global_out_h, global_out_w=global_out_w, cum_in_y=cum_in_y, cum_in_x=cum_in_x, cum_out_y=cum_out_y, cum_out_x=cum_out_x, - depth_y=depth_y, depth_x=depth_x) + depth_y=depth_y, depth_x=depth_x, + out_dtype=out_dt) return da.map_blocks(fn, src, chunks=(out_y, out_x), - dtype=np.float32, meta=meta) + dtype=out_dt, meta=meta) def _run_dask_cupy(data, scale_y, scale_x, method): - data = data.astype(cupy.float64) - meta = cupy.array((), dtype=cupy.float32) + work_dt = _working_dtype(data.dtype) + out_dt = _output_dtype(data.dtype) + if data.dtype != cupy.dtype(work_dt): + data = data.astype(work_dt) + meta = cupy.array((), dtype=out_dt) if method in INTERP_METHODS: order = INTERP_METHODS[method] @@ -699,9 +752,10 @@ def _run_dask_cupy(data, scale_y, scale_x, method): global_out_h=global_out_h, global_out_w=global_out_w, cum_in_y=cum_in_y, cum_in_x=cum_in_x, cum_out_y=cum_out_y, cum_out_x=cum_out_x, - depth=depth, order=order) + depth=depth, order=order, + work_dtype=work_dt, out_dtype=out_dt) return da.map_blocks(fn, src, chunks=(out_y, out_x), - dtype=cupy.float32, meta=meta) + dtype=out_dt, meta=meta) import math min_size = max(_min_chunksize_for_scale(scale_y), @@ -733,9 +787,10 @@ def _run_dask_cupy(data, scale_y, scale_x, method): global_out_h=global_out_h, global_out_w=global_out_w, cum_in_y=cum_in_y, cum_in_x=cum_in_x, cum_out_y=cum_out_y, cum_out_x=cum_out_x, - depth_y=depth_y, depth_x=depth_x) + depth_y=depth_y, depth_x=depth_x, + out_dtype=out_dt) return da.map_blocks(fn, src, chunks=(out_y, out_x), - dtype=cupy.float32, meta=meta) + dtype=out_dt, meta=meta) # -- Public API -------------------------------------------------------------- @@ -776,8 +831,10 @@ def resample( Returns ------- xarray.DataArray - Resampled raster with updated coordinates, ``res`` attribute, - and float32 dtype. + Resampled raster with updated coordinates and ``res`` attribute. + Output dtype matches the input float dtype (float32 or float64); + integer inputs return float32 since NaN-sentinel resampling + requires a float type. """ _validate_raster(agg, func_name='resample', name='agg') diff --git a/xrspatial/tests/test_resample.py b/xrspatial/tests/test_resample.py index d77e3061..9fae316b 100644 --- a/xrspatial/tests/test_resample.py +++ b/xrspatial/tests/test_resample.py @@ -430,3 +430,129 @@ def test_dask_path_skips_guard(self, grid_4x4): # guard not to short-circuit a reasonable dask call. out = resample(dask_agg, scale_factor=100.0, method='nearest') assert out.shape == (400, 400) + + +# --------------------------------------------------------------------------- +# Dtype preservation (issue #1467) +# --------------------------------------------------------------------------- + +class TestDtypePreservation: + """resample() should preserve input float dtype rather than always + forcing float32. Integer input still becomes float32 because + NaN-sentinel resampling needs a float type.""" + + @pytest.mark.parametrize("method", + ['nearest', 'bilinear', 'cubic', 'average']) + def test_float64_input_keeps_float64(self, method): + data = np.linspace(0, 1, 64 * 64, + dtype=np.float64).reshape(64, 64) + agg = create_test_raster(data, backend='numpy', + attrs={'res': (1.0, 1.0)}) + out = resample(agg, scale_factor=0.5, method=method) + assert out.dtype == np.float64 + + @pytest.mark.parametrize("method", + ['nearest', 'bilinear', 'cubic', 'average']) + def test_float32_input_keeps_float32(self, method): + data = np.linspace(0, 1, 64 * 64, + dtype=np.float32).reshape(64, 64) + agg = create_test_raster(data, backend='numpy', + attrs={'res': (1.0, 1.0)}) + out = resample(agg, scale_factor=0.5, method=method) + assert out.dtype == np.float32 + + @pytest.mark.parametrize("dtype", [np.int16, np.int32, np.int64, + np.uint8, np.uint16]) + def test_integer_input_returns_float32(self, dtype): + data = np.arange(64 * 64, dtype=dtype).reshape(64, 64) + agg = create_test_raster(data, backend='numpy', + attrs={'res': (1.0, 1.0)}) + out = resample(agg, scale_factor=0.5, method='nearest') + assert out.dtype == np.float32 + + def test_float64_precision_preserved(self): + # Use a smooth float64 signal whose downsampled values can be + # predicted analytically and require >float32 precision to verify. + y, x = np.mgrid[0:32, 0:32].astype(np.float64) + data = (y + x) * 1e-10 # values < float32 ULP at 1.0 + agg = create_test_raster(data, backend='numpy', + attrs={'res': (1.0, 1.0)}) + + out = resample(agg, scale_factor=0.5, method='nearest') + assert out.dtype == np.float64 + + # A float32 round-trip would round these tiny values to a + # quantised grid -- check we did NOT do that. + as_float32 = data.astype(np.float32).astype(np.float64) + # The float64 result should differ from a float32 round-trip + # at the indexed sample positions (i.e. precision retained). + sampled = data[1::2, 1::2] + sampled_f32 = as_float32[1::2, 1::2] + # They differ at the float32 quantisation level. + assert not np.array_equal(sampled, sampled_f32) + # And our nearest-neighbour result matches the float64 sampling + # to full precision, not the float32 quantised version. + np.testing.assert_allclose(out.values, sampled, atol=1e-12) + + @pytest.mark.skipif(not dask_array_available(), + reason="dask not installed") + @pytest.mark.parametrize("method", ['nearest', 'bilinear', 'average']) + def test_dask_float64_keeps_float64(self, method): + import dask.array as da + data = np.linspace(0, 1, 64 * 64, + dtype=np.float64).reshape(64, 64) + darr = da.from_array(data, chunks=16) + agg = create_test_raster(data, backend='numpy', + attrs={'res': (1.0, 1.0)}) + agg.data = darr + out = resample(agg, scale_factor=0.5, method=method) + assert out.dtype == np.float64 + assert out.compute().dtype == np.float64 + + @pytest.mark.skipif(not dask_array_available(), + reason="dask not installed") + @pytest.mark.parametrize("method", ['nearest', 'bilinear', 'average']) + def test_dask_float32_keeps_float32(self, method): + import dask.array as da + data = np.linspace(0, 1, 64 * 64, + dtype=np.float32).reshape(64, 64) + darr = da.from_array(data, chunks=16) + agg = create_test_raster(data, backend='numpy', + attrs={'res': (1.0, 1.0)}) + agg.data = darr + out = resample(agg, scale_factor=0.5, method=method) + assert out.dtype == np.float32 + assert out.compute().dtype == np.float32 + + @pytest.mark.skipif(not dask_array_available(), + reason="dask not installed") + def test_dask_int_input_returns_float32(self): + import dask.array as da + data = np.arange(64 * 64, dtype=np.int32).reshape(64, 64) + darr = da.from_array(data, chunks=16) + agg = create_test_raster(data, backend='numpy', + attrs={'res': (1.0, 1.0)}) + agg.data = darr + out = resample(agg, scale_factor=0.5, method='nearest') + assert out.dtype == np.float32 + assert out.compute().dtype == np.float32 + + def test_int_input_with_nodata_propagates_nan(self): + # Gated on whether resample() exposes a `nodata` parameter -- + # if it doesn't, skip the NaN-sentinel piece since there's no + # way to mark integer cells as missing. + import inspect + sig = inspect.signature(resample) + if 'nodata' not in sig.parameters: + pytest.skip("resample() has no nodata parameter (yet)") + + data = np.arange(64 * 64, dtype=np.int32).reshape(64, 64) + data[0, 0] = -9999 # sentinel + agg = create_test_raster(data, backend='numpy', + attrs={'res': (1.0, 1.0)}) + out = resample(agg, scale_factor=0.5, method='nearest', + nodata=-9999) + assert out.dtype == np.float32 + # nearest-neighbour at output (0,0) samples input near the + # sentinel cell; expect NaN somewhere in the top-left output. + assert np.isnan(out.values[:2, :2]).any() From 5c0b1ff3529a4e42dba4a8e0022c6e10dac787e5 Mon Sep 17 00:00:00 2001 From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com> Date: Mon, 4 May 2026 20:35:59 +0000 Subject: [PATCH 2/3] Fix float32 promotion and NaN test for int nodata; resolve merge conflicts Agent-Logs-Url: https://github.com/xarray-contrib/xarray-spatial/sessions/8246524b-ac66-4574-97b1-9f97fbe38126 Co-authored-by: brendancol <433221+brendancol@users.noreply.github.com> --- xrspatial/resample.py | 3 ++- xrspatial/tests/test_resample.py | 9 +++++---- 2 files changed, 7 insertions(+), 5 deletions(-) diff --git a/xrspatial/resample.py b/xrspatial/resample.py index 198978bc..79af8495 100644 --- a/xrspatial/resample.py +++ b/xrspatial/resample.py @@ -1033,8 +1033,9 @@ def _apply_nodata_mask(agg, nodata): if nodata is None: return agg # Promote to float so NaN can be stored. xr.where keeps the backend. + # Integer / bool inputs become float32 (consistent with _output_dtype). if not np.issubdtype(agg.dtype, np.floating): - agg = agg.astype(np.float64) + agg = agg.astype(np.float32) if np.isnan(nodata): return agg # already-NaN sentinels need no replacement return agg.where(agg != nodata) diff --git a/xrspatial/tests/test_resample.py b/xrspatial/tests/test_resample.py index 52b7ed0f..713fdf3a 100644 --- a/xrspatial/tests/test_resample.py +++ b/xrspatial/tests/test_resample.py @@ -796,15 +796,16 @@ def test_int_input_with_nodata_propagates_nan(self): pytest.skip("resample() has no nodata parameter (yet)") data = np.arange(64 * 64, dtype=np.int32).reshape(64, 64) - data[0, 0] = -9999 # sentinel + # With scale_factor=0.5 and block-centered coords, output pixel (0, 0) + # samples from input coord ~(0.5, 0.5) which rounds to input (1, 1). + data[1, 1] = -9999 # sentinel at a position sampled by output (0, 0) agg = create_test_raster(data, backend='numpy', attrs={'res': (1.0, 1.0)}) out = resample(agg, scale_factor=0.5, method='nearest', nodata=-9999) assert out.dtype == np.float32 - # nearest-neighbour at output (0,0) samples input near the - # sentinel cell; expect NaN somewhere in the top-left output. - assert np.isnan(out.values[:2, :2]).any() + # output (0, 0) should be NaN because input (1, 1) was masked. + assert np.isnan(out.values[0, 0]) # --------------------------------------------------------------------------- From f67015d2b7bb417e9123f4b0691afedc3dd28f13 Mon Sep 17 00:00:00 2001 From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com> Date: Mon, 4 May 2026 20:38:14 +0000 Subject: [PATCH 3/3] Remove duplicate TestTargetResolutionTuple class from merge Agent-Logs-Url: https://github.com/xarray-contrib/xarray-spatial/sessions/8246524b-ac66-4574-97b1-9f97fbe38126 Co-authored-by: brendancol <433221+brendancol@users.noreply.github.com> --- xrspatial/tests/test_resample.py | 15 --------------- 1 file changed, 15 deletions(-) diff --git a/xrspatial/tests/test_resample.py b/xrspatial/tests/test_resample.py index 713fdf3a..10b5fa35 100644 --- a/xrspatial/tests/test_resample.py +++ b/xrspatial/tests/test_resample.py @@ -1018,18 +1018,3 @@ def test_explicit_nodata_overrides_attr(self): assert np.isnan(out.values[0, 0]) -# --------------------------------------------------------------------------- -# target_resolution as 2-tuple (issue #1466) -# --------------------------------------------------------------------------- - -class TestTargetResolutionTuple: - def test_tuple_resolution_independent_axes(self, grid_8x8): - # 8x8 grid with res=(1, 1) -> target (2, 4) -> output (4, 2). - out = resample(grid_8x8, target_resolution=(2.0, 4.0)) - assert out.shape == (4, 2) - - def test_tuple_resolution_matches_scale_factor(self, grid_8x8): - # target_resolution=(2.0, 2.0) should match scale_factor=0.5. - a = resample(grid_8x8, target_resolution=(2.0, 2.0), method='nearest') - b = resample(grid_8x8, scale_factor=0.5, method='nearest') - np.testing.assert_allclose(a.values, b.values)