diff --git a/xrspatial/proximity.py b/xrspatial/proximity.py index 76750c52..6f79f039 100644 --- a/xrspatial/proximity.py +++ b/xrspatial/proximity.py @@ -1221,9 +1221,24 @@ def _process_numpy(img, x_coords, y_coords): def _process_dask(raster, xs, ys): if max_distance >= max_possible_distance: - # consider all targets in the whole raster - # the data array is computed at once, - # make sure your data fit your memory + # The line-sweep needs the full raster in one chunk. + # Guard against OOM before rechunking. + estimated_bytes = np.prod(raster.shape) * raster.data.dtype.itemsize + # ~35 bytes/pixel working memory (distance, output, pan_near, + # scan_line, nearest arrays, etc.) + estimated_working = estimated_bytes * 35 + try: + from xrspatial.zonal import _available_memory_bytes + avail = _available_memory_bytes() + except ImportError: + avail = 2 * 1024**3 + if estimated_working > 0.8 * avail: + raise ValueError( + f"proximity() with max_distance >= raster diagonal " + f"needs ~{estimated_working / 1e9:.1f} GB but only " + f"~{avail / 1e9:.1f} GB available. Set a finite " + f"max_distance for out-of-core dask processing." + ) height, width = raster.shape raster.data = raster.data.rechunk({0: height, 1: width}) xs = xs.rechunk({0: height, 1: width}) diff --git a/xrspatial/tests/test_proximity.py b/xrspatial/tests/test_proximity.py index 41aa69af..5469fd06 100644 --- a/xrspatial/tests/test_proximity.py +++ b/xrspatial/tests/test_proximity.py @@ -374,6 +374,30 @@ def _make_kdtree_raster(height=20, width=30, chunks=(10, 15)): return raster +@pytest.mark.skipif(da is None, reason="dask is not installed") +def test_proximity_dask_inf_distance_memory_guard(): + """Line-sweep path with inf max_distance should raise when memory is tight.""" + from unittest.mock import patch + from xrspatial.proximity import _available_memory_bytes + + data = np.zeros((100, 100), dtype=np.float64) + data[50, 50] = 1.0 + raster = xr.DataArray( + da.from_array(data, chunks=(50, 50)), + dims=['y', 'x'], + coords={ + 'x': np.linspace(-10, 10, 100), + 'y': np.linspace(-5, 5, 100), + }, + ) + + # Force the non-KDTree path by using GREAT_CIRCLE metric + # (KDTree only supports EUCLIDEAN/MANHATTAN), and mock tight memory. + with patch('xrspatial.proximity._available_memory_bytes', return_value=1024): + with pytest.raises(MemoryError, match="exceed available memory"): + proximity(raster, target_values=[1], distance_metric="GREAT_CIRCLE") + + @pytest.mark.skipif(da is None, reason="dask is not installed") @pytest.mark.parametrize("metric", ["EUCLIDEAN", "MANHATTAN"]) def test_proximity_dask_kdtree_matches_numpy(metric):