From 9baca233888320dbd111c1b75df5498ee9c2e8c7 Mon Sep 17 00:00:00 2001 From: Brendan Collins Date: Mon, 4 May 2026 07:38:48 -0700 Subject: [PATCH] Honor per-raster nodata sentinels in merge() (#1448) merge() previously detected nodata only from the first raster and reused that sentinel for every other input. Rasters with a different sentinel had their invalid pixels passed through reprojection as real data, producing silently wrong results. Detect each raster's own sentinel, canonicalize to NaN before the strategy merge, then convert back to the user-requested output nodata. Affects both _merge_inmemory and the dask _merge_block_adapter path. --- xrspatial/reproject/__init__.py | 61 ++++++++++++--- xrspatial/tests/test_reproject.py | 123 ++++++++++++++++++++++++++++++ 2 files changed, 173 insertions(+), 11 deletions(-) diff --git a/xrspatial/reproject/__init__.py b/xrspatial/reproject/__init__.py index ac9f93cd..397d3a11 100644 --- a/xrspatial/reproject/__init__.py +++ b/xrspatial/reproject/__init__.py @@ -1381,7 +1381,7 @@ def merge( "Could not detect target CRS. Pass target_crs explicitly." ) - # Detect nodata + # Detect output nodata (the sentinel the user asked for) nd = nodata if nodata is not None else _detect_nodata(rasters[0], nodata) # Gather source info for each raster @@ -1396,6 +1396,10 @@ def merge( sb = _source_bounds(r) ss = (r.sizes[r.dims[-2]], r.sizes[r.dims[-1]]) yd = _is_y_descending(r) + # Per-raster input nodata sentinel. Detected independently of the + # user-supplied output nodata so that mixed-sentinel inputs are + # canonicalized correctly during merge. + r_nd = _detect_nodata(r, None) raster_infos.append({ 'raster': r, 'src_crs': src_crs, @@ -1403,6 +1407,7 @@ def merge( 'src_shape': ss, 'y_desc': yd, 'src_wkt': src_crs.to_wkt(), + 'raster_nodata': r_nd, }) # Compute unified output grid @@ -1549,32 +1554,48 @@ def _merge_inmemory( Detects same-CRS tiles and uses fast direct placement instead of reprojection. + + Each raster is reprojected using its own nodata sentinel and then + canonicalized to NaN before the strategy merge so that mixed-sentinel + inputs do not leak invalid pixels into the mosaic. After merging, the + NaN canonical sentinel is converted back to the user-requested + output ``nodata``. """ from ._crs_utils import _crs_from_wkt tgt_crs = _crs_from_wkt(tgt_wkt) arrays = [] for info in raster_infos: + r_nd = info.get('raster_nodata', float('nan')) # Check if source CRS matches target (no reprojection needed) placed = None if info['src_crs'] == tgt_crs: placed = _place_same_crs( info['raster'].values, info['src_bounds'], info['src_shape'], info['y_desc'], - out_bounds, out_shape, nodata, + out_bounds, out_shape, r_nd, ) if placed is not None: - arrays.append(placed) + arr = placed else: - reprojected = _reproject_chunk_numpy( + arr = _reproject_chunk_numpy( info['raster'].values, info['src_bounds'], info['src_shape'], info['y_desc'], info['src_wkt'], tgt_wkt, out_bounds, out_shape, - resampling, nodata, 16, + resampling, r_nd, 16, ) - arrays.append(reprojected) - return _merge_arrays_numpy(arrays, nodata, strategy) + # Canonicalize this raster's sentinel to NaN before the merge so + # rasters with different sentinels merge correctly. + if not np.isnan(r_nd): + arr = np.asarray(arr, dtype=np.float64) + arr = np.where(arr == r_nd, np.nan, arr) + arrays.append(arr) + + merged = _merge_arrays_numpy(arrays, float('nan'), strategy) + if not np.isnan(nodata): + merged = np.where(np.isnan(merged), nodata, merged) + return merged def _merge_block_adapter( @@ -1583,9 +1604,14 @@ def _merge_block_adapter( src_wkt_list, tgt_wkt, out_bounds, out_shape, resampling, nodata, strategy, precision, - src_footprints_tgt, + src_footprints_tgt, raster_nodata_list, ): - """``map_blocks`` adapter for merge.""" + """``map_blocks`` adapter for merge. + + Each raster is reprojected with its own input nodata sentinel and + canonicalized to NaN before the strategy merge. The final result is + converted back to the user-requested output ``nodata``. + """ info = block_info[0] (row_start, row_end), (col_start, col_end) = info['array-location'] chunk_shape = (row_end - row_start, col_end - col_start) @@ -1598,18 +1624,27 @@ def _merge_block_adapter( if (src_footprints_tgt[i] is not None and not _bounds_overlap(cb, src_footprints_tgt[i])): continue + r_nd = raster_nodata_list[i] reprojected = _reproject_chunk_numpy( raster_data_list[i], src_bounds_list[i], src_shape_list[i], y_desc_list[i], src_wkt_list[i], tgt_wkt, cb, chunk_shape, - resampling, nodata, precision, + resampling, r_nd, precision, ) + if not np.isnan(r_nd): + reprojected = np.asarray(reprojected, dtype=np.float64) + reprojected = np.where( + reprojected == r_nd, np.nan, reprojected + ) arrays.append(reprojected) if not arrays: return np.full(chunk_shape, nodata, dtype=np.float64) - return _merge_arrays_numpy(arrays, nodata, strategy) + merged = _merge_arrays_numpy(arrays, float('nan'), strategy) + if not np.isnan(nodata): + merged = np.where(np.isnan(merged), nodata, merged) + return merged def _merge_dask( @@ -1629,6 +1664,9 @@ def _merge_dask( shape_list = [info['src_shape'] for info in raster_infos] ydesc_list = [info['y_desc'] for info in raster_infos] wkt_list = [info['src_wkt'] for info in raster_infos] + rnodata_list = [ + info.get('raster_nodata', float('nan')) for info in raster_infos + ] # Precompute source footprints in target CRS footprints = [ @@ -1653,6 +1691,7 @@ def _merge_dask( strategy=strategy, precision=16, src_footprints_tgt=footprints, + raster_nodata_list=rnodata_list, ) template = da.empty( diff --git a/xrspatial/tests/test_reproject.py b/xrspatial/tests/test_reproject.py index 704469c4..9c09be62 100644 --- a/xrspatial/tests/test_reproject.py +++ b/xrspatial/tests/test_reproject.py @@ -718,6 +718,129 @@ def test_merge_dask(self): assert computed.shape[0] > 0 +class TestMergeMixedNodata: + """merge() must honor each raster's own nodata sentinel.""" + + def test_merge_mixed_nodata_sentinels(self): + """Raster A NaN sentinel, raster B -9999 sentinel. + + B's -9999 pixels must be recognized as nodata, not leaked as + real data into the merged output. + """ + from xrspatial.reproject import merge + + # Raster A: all valid, value 10 + a_data = np.full((16, 16), 10.0, dtype=np.float64) + a = _make_raster( + a_data, x_range=(-10, 0), y_range=(-5, 5), nodata=np.nan + ) + + # Raster B: half valid (=20), half -9999 sentinel + b_data = np.full((16, 16), 20.0, dtype=np.float64) + b_data[:, :8] = -9999.0 # left half is nodata + b = _make_raster( + b_data, x_range=(0, 10), y_range=(-5, 5), nodata=-9999.0 + ) + + result = merge([a, b], strategy='mean', resolution=1.0) + vals = result.values + + # The output should never contain -9999 as a data value. + # B's -9999 pixels were correctly recognized as nodata. + assert not np.any(vals == -9999.0), ( + "B's -9999 nodata pixels leaked into the merged output" + ) + + # B's right half (x > ~5) should still surface as 20. + x = result.coords['x'].values + right_mask = x > 6 + if right_mask.any(): + right = vals[:, right_mask] + valid = ~np.isnan(right) + if valid.any(): + np.testing.assert_allclose( + right[valid], 20.0, atol=1.0 + ) + + def test_merge_nan_then_int_sentinel(self): + """Mean strategy must not fold sentinel zeros into the average.""" + from xrspatial.reproject import merge + + a_data = np.full((8, 8), 10.0, dtype=np.float64) + a = _make_raster( + a_data, x_range=(-5, 5), y_range=(-5, 5), nodata=np.nan + ) + + # Raster B uses 0.0 as nodata sentinel + b_data = np.full((8, 8), 0.0, dtype=np.float64) + b = _make_raster( + b_data, x_range=(-5, 5), y_range=(-5, 5), nodata=0.0 + ) + + result = merge([a, b], strategy='mean', resolution=1.0) + vals = result.values + interior = vals[1:-1, 1:-1] + valid = ~np.isnan(interior) + if valid.any(): + # If B's zeros were treated as data, mean would be ~5. + # Treated as nodata, mean is just 10. + np.testing.assert_allclose( + interior[valid], 10.0, atol=1.0 + ) + + def test_merge_explicit_user_nodata_with_mixed_inputs(self): + """User-specified output nodata is independent of input sentinels.""" + from xrspatial.reproject import merge + + # Raster A: NaN nodata, all valid + a_data = np.full((16, 16), 10.0, dtype=np.float64) + a = _make_raster( + a_data, x_range=(-10, 0), y_range=(-5, 5), nodata=np.nan + ) + + # Raster B: -9999 nodata, half valid (=20) + b_data = np.full((16, 16), 20.0, dtype=np.float64) + b_data[:, :8] = -9999.0 + b = _make_raster( + b_data, x_range=(0, 10), y_range=(-5, 5), nodata=-9999.0 + ) + + result = merge( + [a, b], strategy='mean', resolution=1.0, nodata=-9999.0 + ) + vals = result.values + + # Output uses -9999 as the nodata sentinel, but data pixels must + # never be 0 from B's zero-sentinel test (different test). Here + # the only -9999 in the output should be true nodata regions + # (no overlap with any input). We verify B's right half surfaces + # as 20 (not -9999) and A's region surfaces as 10. + x = result.coords['x'].values + + right_mask = x > 6 + if right_mask.any(): + right = vals[:, right_mask] + data_mask = right != -9999.0 + if data_mask.any(): + np.testing.assert_allclose( + right[data_mask], 20.0, atol=1.0 + ) + + left_mask = x < -6 + if left_mask.any(): + left = vals[:, left_mask] + data_mask = left != -9999.0 + if data_mask.any(): + np.testing.assert_allclose( + left[data_mask], 10.0, atol=1.0 + ) + + # No NaN in the output -- user requested -9999 as the sentinel. + assert not np.any(np.isnan(vals)), ( + "user requested -9999 nodata but output contains NaN" + ) + + # --------------------------------------------------------------------------- # Accessor integration # ---------------------------------------------------------------------------