diff --git a/xrspatial/interpolate/_kriging.py b/xrspatial/interpolate/_kriging.py index cb5cb5c8..12c2d08f 100644 --- a/xrspatial/interpolate/_kriging.py +++ b/xrspatial/interpolate/_kriging.py @@ -336,6 +336,68 @@ def _chunk_var(block, block_info=None): # Public API # --------------------------------------------------------------------------- +def _check_kriging_memory(n_points, grid_pixels): + """Raise MemoryError if kriging() would exceed available memory. + + Three allocations dominate kriging memory use: + + * Experimental variogram pair arrays. ``np.triu_indices`` produces + two int64 index arrays of length ``N*(N-1)/2``, plus float64 + ``dists`` and ``semivar`` of the same length. Roughly + ``4 * N*(N-1)/2 * 8`` bytes. + * Kriging matrix. ``K`` and ``K_inv`` are both ``(N+1, N+1)`` + float64, plus an N x N intermediate distance matrix. About + ``3 * (N+1)**2 * 8`` bytes. + * Prediction ``k0`` matrix of shape ``(grid_pixels, N+1)`` float64, + plus matching ``dists`` and ``w`` of similar size. About + ``3 * grid_pixels * (N+1) * 8`` bytes. + + Worst case is the maximum of these three. The variogram and matrix + builds run sequentially, and ``k0`` is built later, so peak usage + is bounded by the largest single allocation. + """ + n = int(n_points) + g = int(grid_pixels) + + pair_bytes = 4 * (n * (n - 1) // 2) * 8 if n > 1 else 0 + matrix_bytes = 3 * (n + 1) * (n + 1) * 8 + k0_bytes = 3 * g * (n + 1) * 8 + + estimate = max(pair_bytes, matrix_bytes, k0_bytes) + + try: + from xrspatial.zonal import _available_memory_bytes + avail = _available_memory_bytes() + except ImportError: + avail = 2 * 1024 ** 3 + + if estimate > 0.8 * avail: + if estimate == k0_bytes: + culprit = ( + f"prediction matrix of shape ({g}, {n + 1}) " + f"(grid_pixels x N+1)" + ) + advice = ( + "Reduce the template grid size or the number of input " + "points, or use a chunked dask-backed template." + ) + elif estimate == matrix_bytes: + culprit = f"kriging matrix of shape ({n + 1}, {n + 1})" + advice = "Reduce the number of input points." + else: + culprit = ( + f"variogram pair arrays of length {n * (n - 1) // 2} " + f"(N*(N-1)/2 for N={n})" + ) + advice = "Reduce the number of input points." + + raise MemoryError( + f"kriging() needs ~{estimate / 1e9:.1f} GB to allocate the " + f"{culprit}, but only ~{avail / 1e9:.1f} GB is available. " + f"{advice}" + ) + + def kriging(x, y, z, template, variogram_model='spherical', nlags=15, return_variance=False, name='kriging'): """Ordinary Kriging interpolation. @@ -361,6 +423,13 @@ def kriging(x, y, z, template, variogram_model='spherical', nlags=15, xr.DataArray or tuple of xr.DataArray Prediction raster, or ``(prediction, variance)`` if *return_variance* is True. + + Raises + ------ + MemoryError + If the worst-case allocation (variogram pair arrays, kriging + matrix, or prediction matrix) would exceed 80% of available + memory. """ _validate_raster(template, func_name='kriging', name='template') x_arr, y_arr, z_arr = validate_points(x, y, z, func_name='kriging') @@ -374,6 +443,11 @@ def kriging(x, y, z, template, variogram_model='spherical', nlags=15, _validate_scalar(nlags, func_name='kriging', name='nlags', dtype=int, min_val=1) + # Memory guard. Runs after input validation so we know N and the + # template grid size, but before any large allocation. + grid_pixels = int(np.prod(template.shape)) + _check_kriging_memory(len(x_arr), grid_pixels) + # Experimental variogram lag_h, lag_sv = _experimental_variogram(x_arr, y_arr, z_arr, nlags) diff --git a/xrspatial/tests/test_interpolation.py b/xrspatial/tests/test_interpolation.py index 6a4011f3..459930a6 100644 --- a/xrspatial/tests/test_interpolation.py +++ b/xrspatial/tests/test_interpolation.py @@ -397,3 +397,92 @@ def test_nan_points_are_dropped(self): template = _make_template([0.0], [0.5]) result = idw(x, y, z, template) assert np.isfinite(result.values[0, 0]) + + +# =================================================================== +# Memory guard tests (issue #1307) +# =================================================================== + +class TestKrigingMemoryGuard: + """Verify kriging() refuses to allocate more than ~80% of RAM. + + Allocations scale with point count N (variogram + matrix) and with + grid_pixels * N (prediction). We monkeypatch the available-memory + helper to a small number so tests can simulate "too big" without + actually allocating gigabytes. + """ + + def test_predict_matrix_exceeds_memory(self, monkeypatch): + """Large grid x N k0 matrix triggers the guard.""" + from xrspatial.interpolate import _kriging as _kr + + # Pretend we only have 64 MB available. + monkeypatch.setattr( + 'xrspatial.zonal._available_memory_bytes', + lambda: 64 * 1024 ** 2, + ) + + x = np.array([0.0, 1.0, 2.0, 0.5]) + y = np.array([0.0, 0.0, 1.0, 1.5]) + z = np.array([1.0, 2.0, 3.0, 4.0]) + # 2000x2000 grid * 5 cols * 8 bytes ~= 160 MB > 64 MB * 0.8 + template = _make_template( + np.arange(2000, dtype=np.float64), + np.arange(2000, dtype=np.float64), + ) + + with pytest.raises(MemoryError, match='prediction matrix'): + kriging(x, y, z, template) + + def test_variogram_pairs_exceed_memory(self, monkeypatch): + """Large N triggers the variogram-pair guard before the matrix one.""" + from xrspatial.interpolate import _kriging as _kr + + # 32 MB available. N=4000 -> N*(N-1)/2 ~ 8e6 pairs * 4 buffers + # * 8 bytes = ~256 MB. Larger than the 4001x4001 matrix path + # (~384 MB), so let's use a different N that makes pair_bytes win. + # N=10000 -> pair_bytes ~ 1.6 GB; matrix_bytes ~ 2.4 GB. + # Matrix wins for any N because of the 3x multiplier vs 4x and the + # (N+1)^2 term. Use a smaller N so matrix wins; check generic msg. + monkeypatch.setattr( + 'xrspatial.zonal._available_memory_bytes', + lambda: 32 * 1024 ** 2, + ) + + n = 3000 + rng = np.random.RandomState(0) + x = rng.uniform(0, 10, n) + y = rng.uniform(0, 10, n) + z = rng.uniform(0, 10, n) + template = _make_template([0.0, 1.0], [0.0, 1.0]) + + # n=3000 -> matrix_bytes = 3 * 3001^2 * 8 ~= 216 MB > 32*0.8 MB + with pytest.raises(MemoryError, match='kriging matrix'): + kriging(x, y, z, template) + + def test_small_input_allowed(self, monkeypatch): + """Tiny inputs pass the guard even with very low available memory.""" + # 16 MB available is plenty for a 4-point, 3x3 grid problem. + monkeypatch.setattr( + 'xrspatial.zonal._available_memory_bytes', + lambda: 16 * 1024 ** 2, + ) + + x, y, z = _grid_points() + template = _make_template([0.0, 1.0, 2.0], [0.0, 1.0, 2.0]) + # Should not raise. + result = kriging(x, y, z, template) + assert result.shape == template.shape + + def test_check_helper_estimate_message(self, monkeypatch): + """_check_kriging_memory reports GB and identifies culprit.""" + from xrspatial.interpolate._kriging import _check_kriging_memory + + monkeypatch.setattr( + 'xrspatial.zonal._available_memory_bytes', + lambda: 1 * 1024 ** 2, # 1 MB + ) + + # n=10, grid_pixels=100000 -> k0 ~ 26 MB > 1 MB * 0.8. + with pytest.raises(MemoryError, match='prediction matrix'): + _check_kriging_memory(n_points=10, grid_pixels=100_000)