Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
68 changes: 52 additions & 16 deletions xrspatial/reproject/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -1401,7 +1401,7 @@ def merge(
"Could not detect target CRS. Pass target_crs explicitly."
)

# Detect nodata
# Detect output nodata (the sentinel the user asked for)
nd = nodata if nodata is not None else _detect_nodata(rasters[0], nodata)

# Gather source info for each raster
Expand All @@ -1416,13 +1416,18 @@ def merge(
sb = _source_bounds(r)
ss = (r.sizes[r.dims[-2]], r.sizes[r.dims[-1]])
yd = _is_y_descending(r)
# Per-raster input nodata sentinel. Detected independently of the
# user-supplied output nodata so that mixed-sentinel inputs are
# canonicalized correctly during merge.
r_nd = _detect_nodata(r, None)
raster_infos.append({
'raster': r,
'src_crs': src_crs,
'src_bounds': sb,
'src_shape': ss,
'y_desc': yd,
'src_wkt': src_crs.to_wkt(),
'raster_nodata': r_nd,
})

# Compute unified output grid
Expand Down Expand Up @@ -1569,32 +1574,48 @@ def _merge_inmemory(

Detects same-CRS tiles and uses fast direct placement instead
of reprojection.

Each raster is reprojected using its own nodata sentinel and then
canonicalized to NaN before the strategy merge so that mixed-sentinel
inputs do not leak invalid pixels into the mosaic. After merging, the
NaN canonical sentinel is converted back to the user-requested
output ``nodata``.
"""
from ._crs_utils import _crs_from_wkt
tgt_crs = _crs_from_wkt(tgt_wkt)

arrays = []
for info in raster_infos:
r_nd = info.get('raster_nodata', float('nan'))
# Check if source CRS matches target (no reprojection needed)
placed = None
if info['src_crs'] == tgt_crs:
placed = _place_same_crs(
info['raster'].values,
info['src_bounds'], info['src_shape'], info['y_desc'],
out_bounds, out_shape, nodata,
out_bounds, out_shape, r_nd,
)
if placed is not None:
arrays.append(placed)
arr = placed
else:
reprojected = _reproject_chunk_numpy(
arr = _reproject_chunk_numpy(
info['raster'].values,
info['src_bounds'], info['src_shape'], info['y_desc'],
info['src_wkt'], tgt_wkt,
out_bounds, out_shape,
resampling, nodata, 16,
resampling, r_nd, 16,
)
arrays.append(reprojected)
return _merge_arrays_numpy(arrays, nodata, strategy)
# Canonicalize this raster's sentinel to NaN before the merge so
# rasters with different sentinels merge correctly.
if not np.isnan(r_nd):
arr = np.asarray(arr, dtype=np.float64)
arr = np.where(arr == r_nd, np.nan, arr)
arrays.append(arr)

merged = _merge_arrays_numpy(arrays, float('nan'), strategy)
if not np.isnan(nodata):
merged = np.where(np.isnan(merged), nodata, merged)
return merged


def _merge_block_adapter(
Expand All @@ -1603,9 +1624,14 @@ def _merge_block_adapter(
src_wkt_list, tgt_wkt,
out_bounds, out_shape,
resampling, nodata, strategy, precision,
src_footprints_tgt, same_crs_list,
src_footprints_tgt, raster_nodata_list, same_crs_list,
):
"""``map_blocks`` adapter for merge."""
"""``map_blocks`` adapter for merge.

Each raster is reprojected with its own input nodata sentinel and
canonicalized to NaN before the strategy merge. The final result is
converted back to the user-requested output ``nodata``.
"""
info = block_info[0]
(row_start, row_end), (col_start, col_end) = info['array-location']
chunk_shape = (row_end - row_start, col_end - col_start)
Expand All @@ -1618,7 +1644,7 @@ def _merge_block_adapter(
if (src_footprints_tgt[i] is not None
and not _bounds_overlap(cb, src_footprints_tgt[i])):
continue

r_nd = raster_nodata_list[i]
placed = None
if same_crs_list[i]:
# Same-CRS path: direct pixel placement (no resampling).
Expand All @@ -1629,23 +1655,29 @@ def _merge_block_adapter(
placed = _place_same_crs(
np.asarray(src_data),
src_bounds_list[i], src_shape_list[i], y_desc_list[i],
cb, chunk_shape, nodata,
cb, chunk_shape, r_nd,
)
if placed is not None:
arrays.append(placed)
arr = placed
else:
reprojected = _reproject_chunk_numpy(
arr = _reproject_chunk_numpy(
raster_data_list[i],
src_bounds_list[i], src_shape_list[i], y_desc_list[i],
src_wkt_list[i], tgt_wkt,
cb, chunk_shape,
resampling, nodata, precision,
resampling, r_nd, precision,
)
arrays.append(reprojected)
if not np.isnan(r_nd):
arr = np.asarray(arr, dtype=np.float64)
arr = np.where(arr == r_nd, np.nan, arr)
arrays.append(arr)

if not arrays:
return np.full(chunk_shape, nodata, dtype=np.float64)
return _merge_arrays_numpy(arrays, nodata, strategy)
merged = _merge_arrays_numpy(arrays, float('nan'), strategy)
if not np.isnan(nodata):
merged = np.where(np.isnan(merged), nodata, merged)
return merged


def _merge_dask(
Expand All @@ -1665,6 +1697,9 @@ def _merge_dask(
shape_list = [info['src_shape'] for info in raster_infos]
ydesc_list = [info['y_desc'] for info in raster_infos]
wkt_list = [info['src_wkt'] for info in raster_infos]
rnodata_list = [
info.get('raster_nodata', float('nan')) for info in raster_infos
]

# Precompute source footprints in target CRS
footprints = [
Expand Down Expand Up @@ -1698,6 +1733,7 @@ def _merge_dask(
strategy=strategy,
precision=16,
src_footprints_tgt=footprints,
raster_nodata_list=rnodata_list,
same_crs_list=same_crs_list,
)

Expand Down
123 changes: 123 additions & 0 deletions xrspatial/tests/test_reproject.py
Original file line number Diff line number Diff line change
Expand Up @@ -718,6 +718,129 @@ def test_merge_dask(self):
assert computed.shape[0] > 0


class TestMergeMixedNodata:
"""merge() must honor each raster's own nodata sentinel."""

def test_merge_mixed_nodata_sentinels(self):
"""Raster A NaN sentinel, raster B -9999 sentinel.

B's -9999 pixels must be recognized as nodata, not leaked as
real data into the merged output.
"""
from xrspatial.reproject import merge

# Raster A: all valid, value 10
a_data = np.full((16, 16), 10.0, dtype=np.float64)
a = _make_raster(
a_data, x_range=(-10, 0), y_range=(-5, 5), nodata=np.nan
)

# Raster B: half valid (=20), half -9999 sentinel
b_data = np.full((16, 16), 20.0, dtype=np.float64)
b_data[:, :8] = -9999.0 # left half is nodata
b = _make_raster(
b_data, x_range=(0, 10), y_range=(-5, 5), nodata=-9999.0
)

result = merge([a, b], strategy='mean', resolution=1.0)
vals = result.values

# The output should never contain -9999 as a data value.
# B's -9999 pixels were correctly recognized as nodata.
assert not np.any(vals == -9999.0), (
"B's -9999 nodata pixels leaked into the merged output"
)

# B's right half (x > ~5) should still surface as 20.
x = result.coords['x'].values
right_mask = x > 6
if right_mask.any():
right = vals[:, right_mask]
valid = ~np.isnan(right)
if valid.any():
np.testing.assert_allclose(
right[valid], 20.0, atol=1.0
)

def test_merge_nan_then_int_sentinel(self):
"""Mean strategy must not fold sentinel zeros into the average."""
from xrspatial.reproject import merge

a_data = np.full((8, 8), 10.0, dtype=np.float64)
a = _make_raster(
a_data, x_range=(-5, 5), y_range=(-5, 5), nodata=np.nan
)

# Raster B uses 0.0 as nodata sentinel
b_data = np.full((8, 8), 0.0, dtype=np.float64)
b = _make_raster(
b_data, x_range=(-5, 5), y_range=(-5, 5), nodata=0.0
)

result = merge([a, b], strategy='mean', resolution=1.0)
vals = result.values
interior = vals[1:-1, 1:-1]
valid = ~np.isnan(interior)
if valid.any():
# If B's zeros were treated as data, mean would be ~5.
# Treated as nodata, mean is just 10.
np.testing.assert_allclose(
interior[valid], 10.0, atol=1.0
)

def test_merge_explicit_user_nodata_with_mixed_inputs(self):
"""User-specified output nodata is independent of input sentinels."""
from xrspatial.reproject import merge

# Raster A: NaN nodata, all valid
a_data = np.full((16, 16), 10.0, dtype=np.float64)
a = _make_raster(
a_data, x_range=(-10, 0), y_range=(-5, 5), nodata=np.nan
)

# Raster B: -9999 nodata, half valid (=20)
b_data = np.full((16, 16), 20.0, dtype=np.float64)
b_data[:, :8] = -9999.0
b = _make_raster(
b_data, x_range=(0, 10), y_range=(-5, 5), nodata=-9999.0
)

result = merge(
[a, b], strategy='mean', resolution=1.0, nodata=-9999.0
)
vals = result.values

# Output uses -9999 as the nodata sentinel, but data pixels must
# never be 0 from B's zero-sentinel test (different test). Here
# the only -9999 in the output should be true nodata regions
# (no overlap with any input). We verify B's right half surfaces
# as 20 (not -9999) and A's region surfaces as 10.
x = result.coords['x'].values

right_mask = x > 6
if right_mask.any():
right = vals[:, right_mask]
data_mask = right != -9999.0
if data_mask.any():
np.testing.assert_allclose(
right[data_mask], 20.0, atol=1.0
)

left_mask = x < -6
if left_mask.any():
left = vals[:, left_mask]
data_mask = left != -9999.0
if data_mask.any():
np.testing.assert_allclose(
left[data_mask], 10.0, atol=1.0
)

# No NaN in the output -- user requested -9999 as the sentinel.
assert not np.any(np.isnan(vals)), (
"user requested -9999 nodata but output contains NaN"
)


# ---------------------------------------------------------------------------
# Accessor integration
# ---------------------------------------------------------------------------
Expand Down