diff --git a/xrspatial/reproject/__init__.py b/xrspatial/reproject/__init__.py index 16f3c1ad..e0acd8f0 100644 --- a/xrspatial/reproject/__init__.py +++ b/xrspatial/reproject/__init__.py @@ -1401,7 +1401,7 @@ def merge( "Could not detect target CRS. Pass target_crs explicitly." ) - # Detect nodata + # Detect output nodata (the sentinel the user asked for) nd = nodata if nodata is not None else _detect_nodata(rasters[0], nodata) # Gather source info for each raster @@ -1416,6 +1416,10 @@ def merge( sb = _source_bounds(r) ss = (r.sizes[r.dims[-2]], r.sizes[r.dims[-1]]) yd = _is_y_descending(r) + # Per-raster input nodata sentinel. Detected independently of the + # user-supplied output nodata so that mixed-sentinel inputs are + # canonicalized correctly during merge. + r_nd = _detect_nodata(r, None) raster_infos.append({ 'raster': r, 'src_crs': src_crs, @@ -1423,6 +1427,7 @@ def merge( 'src_shape': ss, 'y_desc': yd, 'src_wkt': src_crs.to_wkt(), + 'raster_nodata': r_nd, }) # Compute unified output grid @@ -1569,32 +1574,48 @@ def _merge_inmemory( Detects same-CRS tiles and uses fast direct placement instead of reprojection. + + Each raster is reprojected using its own nodata sentinel and then + canonicalized to NaN before the strategy merge so that mixed-sentinel + inputs do not leak invalid pixels into the mosaic. After merging, the + NaN canonical sentinel is converted back to the user-requested + output ``nodata``. """ from ._crs_utils import _crs_from_wkt tgt_crs = _crs_from_wkt(tgt_wkt) arrays = [] for info in raster_infos: + r_nd = info.get('raster_nodata', float('nan')) # Check if source CRS matches target (no reprojection needed) placed = None if info['src_crs'] == tgt_crs: placed = _place_same_crs( info['raster'].values, info['src_bounds'], info['src_shape'], info['y_desc'], - out_bounds, out_shape, nodata, + out_bounds, out_shape, r_nd, ) if placed is not None: - arrays.append(placed) + arr = placed else: - reprojected = _reproject_chunk_numpy( + arr = _reproject_chunk_numpy( info['raster'].values, info['src_bounds'], info['src_shape'], info['y_desc'], info['src_wkt'], tgt_wkt, out_bounds, out_shape, - resampling, nodata, 16, + resampling, r_nd, 16, ) - arrays.append(reprojected) - return _merge_arrays_numpy(arrays, nodata, strategy) + # Canonicalize this raster's sentinel to NaN before the merge so + # rasters with different sentinels merge correctly. + if not np.isnan(r_nd): + arr = np.asarray(arr, dtype=np.float64) + arr = np.where(arr == r_nd, np.nan, arr) + arrays.append(arr) + + merged = _merge_arrays_numpy(arrays, float('nan'), strategy) + if not np.isnan(nodata): + merged = np.where(np.isnan(merged), nodata, merged) + return merged def _merge_block_adapter( @@ -1603,9 +1624,14 @@ def _merge_block_adapter( src_wkt_list, tgt_wkt, out_bounds, out_shape, resampling, nodata, strategy, precision, - src_footprints_tgt, same_crs_list, + src_footprints_tgt, raster_nodata_list, same_crs_list, ): - """``map_blocks`` adapter for merge.""" + """``map_blocks`` adapter for merge. + + Each raster is reprojected with its own input nodata sentinel and + canonicalized to NaN before the strategy merge. The final result is + converted back to the user-requested output ``nodata``. + """ info = block_info[0] (row_start, row_end), (col_start, col_end) = info['array-location'] chunk_shape = (row_end - row_start, col_end - col_start) @@ -1618,7 +1644,7 @@ def _merge_block_adapter( if (src_footprints_tgt[i] is not None and not _bounds_overlap(cb, src_footprints_tgt[i])): continue - + r_nd = raster_nodata_list[i] placed = None if same_crs_list[i]: # Same-CRS path: direct pixel placement (no resampling). @@ -1629,23 +1655,29 @@ def _merge_block_adapter( placed = _place_same_crs( np.asarray(src_data), src_bounds_list[i], src_shape_list[i], y_desc_list[i], - cb, chunk_shape, nodata, + cb, chunk_shape, r_nd, ) if placed is not None: - arrays.append(placed) + arr = placed else: - reprojected = _reproject_chunk_numpy( + arr = _reproject_chunk_numpy( raster_data_list[i], src_bounds_list[i], src_shape_list[i], y_desc_list[i], src_wkt_list[i], tgt_wkt, cb, chunk_shape, - resampling, nodata, precision, + resampling, r_nd, precision, ) - arrays.append(reprojected) + if not np.isnan(r_nd): + arr = np.asarray(arr, dtype=np.float64) + arr = np.where(arr == r_nd, np.nan, arr) + arrays.append(arr) if not arrays: return np.full(chunk_shape, nodata, dtype=np.float64) - return _merge_arrays_numpy(arrays, nodata, strategy) + merged = _merge_arrays_numpy(arrays, float('nan'), strategy) + if not np.isnan(nodata): + merged = np.where(np.isnan(merged), nodata, merged) + return merged def _merge_dask( @@ -1665,6 +1697,9 @@ def _merge_dask( shape_list = [info['src_shape'] for info in raster_infos] ydesc_list = [info['y_desc'] for info in raster_infos] wkt_list = [info['src_wkt'] for info in raster_infos] + rnodata_list = [ + info.get('raster_nodata', float('nan')) for info in raster_infos + ] # Precompute source footprints in target CRS footprints = [ @@ -1698,6 +1733,7 @@ def _merge_dask( strategy=strategy, precision=16, src_footprints_tgt=footprints, + raster_nodata_list=rnodata_list, same_crs_list=same_crs_list, ) diff --git a/xrspatial/tests/test_reproject.py b/xrspatial/tests/test_reproject.py index 893b2150..2c128069 100644 --- a/xrspatial/tests/test_reproject.py +++ b/xrspatial/tests/test_reproject.py @@ -718,6 +718,129 @@ def test_merge_dask(self): assert computed.shape[0] > 0 +class TestMergeMixedNodata: + """merge() must honor each raster's own nodata sentinel.""" + + def test_merge_mixed_nodata_sentinels(self): + """Raster A NaN sentinel, raster B -9999 sentinel. + + B's -9999 pixels must be recognized as nodata, not leaked as + real data into the merged output. + """ + from xrspatial.reproject import merge + + # Raster A: all valid, value 10 + a_data = np.full((16, 16), 10.0, dtype=np.float64) + a = _make_raster( + a_data, x_range=(-10, 0), y_range=(-5, 5), nodata=np.nan + ) + + # Raster B: half valid (=20), half -9999 sentinel + b_data = np.full((16, 16), 20.0, dtype=np.float64) + b_data[:, :8] = -9999.0 # left half is nodata + b = _make_raster( + b_data, x_range=(0, 10), y_range=(-5, 5), nodata=-9999.0 + ) + + result = merge([a, b], strategy='mean', resolution=1.0) + vals = result.values + + # The output should never contain -9999 as a data value. + # B's -9999 pixels were correctly recognized as nodata. + assert not np.any(vals == -9999.0), ( + "B's -9999 nodata pixels leaked into the merged output" + ) + + # B's right half (x > ~5) should still surface as 20. + x = result.coords['x'].values + right_mask = x > 6 + if right_mask.any(): + right = vals[:, right_mask] + valid = ~np.isnan(right) + if valid.any(): + np.testing.assert_allclose( + right[valid], 20.0, atol=1.0 + ) + + def test_merge_nan_then_int_sentinel(self): + """Mean strategy must not fold sentinel zeros into the average.""" + from xrspatial.reproject import merge + + a_data = np.full((8, 8), 10.0, dtype=np.float64) + a = _make_raster( + a_data, x_range=(-5, 5), y_range=(-5, 5), nodata=np.nan + ) + + # Raster B uses 0.0 as nodata sentinel + b_data = np.full((8, 8), 0.0, dtype=np.float64) + b = _make_raster( + b_data, x_range=(-5, 5), y_range=(-5, 5), nodata=0.0 + ) + + result = merge([a, b], strategy='mean', resolution=1.0) + vals = result.values + interior = vals[1:-1, 1:-1] + valid = ~np.isnan(interior) + if valid.any(): + # If B's zeros were treated as data, mean would be ~5. + # Treated as nodata, mean is just 10. + np.testing.assert_allclose( + interior[valid], 10.0, atol=1.0 + ) + + def test_merge_explicit_user_nodata_with_mixed_inputs(self): + """User-specified output nodata is independent of input sentinels.""" + from xrspatial.reproject import merge + + # Raster A: NaN nodata, all valid + a_data = np.full((16, 16), 10.0, dtype=np.float64) + a = _make_raster( + a_data, x_range=(-10, 0), y_range=(-5, 5), nodata=np.nan + ) + + # Raster B: -9999 nodata, half valid (=20) + b_data = np.full((16, 16), 20.0, dtype=np.float64) + b_data[:, :8] = -9999.0 + b = _make_raster( + b_data, x_range=(0, 10), y_range=(-5, 5), nodata=-9999.0 + ) + + result = merge( + [a, b], strategy='mean', resolution=1.0, nodata=-9999.0 + ) + vals = result.values + + # Output uses -9999 as the nodata sentinel, but data pixels must + # never be 0 from B's zero-sentinel test (different test). Here + # the only -9999 in the output should be true nodata regions + # (no overlap with any input). We verify B's right half surfaces + # as 20 (not -9999) and A's region surfaces as 10. + x = result.coords['x'].values + + right_mask = x > 6 + if right_mask.any(): + right = vals[:, right_mask] + data_mask = right != -9999.0 + if data_mask.any(): + np.testing.assert_allclose( + right[data_mask], 20.0, atol=1.0 + ) + + left_mask = x < -6 + if left_mask.any(): + left = vals[:, left_mask] + data_mask = left != -9999.0 + if data_mask.any(): + np.testing.assert_allclose( + left[data_mask], 10.0, atol=1.0 + ) + + # No NaN in the output -- user requested -9999 as the sentinel. + assert not np.any(np.isnan(vals)), ( + "user requested -9999 nodata but output contains NaN" + ) + + # --------------------------------------------------------------------------- # Accessor integration # ---------------------------------------------------------------------------