Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
74 changes: 74 additions & 0 deletions xrspatial/interpolate/_kriging.py
Original file line number Diff line number Diff line change
Expand Up @@ -336,6 +336,68 @@ def _chunk_var(block, block_info=None):
# Public API
# ---------------------------------------------------------------------------

def _check_kriging_memory(n_points, grid_pixels):
"""Raise MemoryError if kriging() would exceed available memory.

Three allocations dominate kriging memory use:

* Experimental variogram pair arrays. ``np.triu_indices`` produces
two int64 index arrays of length ``N*(N-1)/2``, plus float64
``dists`` and ``semivar`` of the same length. Roughly
``4 * N*(N-1)/2 * 8`` bytes.
* Kriging matrix. ``K`` and ``K_inv`` are both ``(N+1, N+1)``
float64, plus an N x N intermediate distance matrix. About
``3 * (N+1)**2 * 8`` bytes.
* Prediction ``k0`` matrix of shape ``(grid_pixels, N+1)`` float64,
plus matching ``dists`` and ``w`` of similar size. About
``3 * grid_pixels * (N+1) * 8`` bytes.

Worst case is the maximum of these three. The variogram and matrix
builds run sequentially, and ``k0`` is built later, so peak usage
is bounded by the largest single allocation.
"""
n = int(n_points)
g = int(grid_pixels)

pair_bytes = 4 * (n * (n - 1) // 2) * 8 if n > 1 else 0
matrix_bytes = 3 * (n + 1) * (n + 1) * 8
k0_bytes = 3 * g * (n + 1) * 8

estimate = max(pair_bytes, matrix_bytes, k0_bytes)

try:
from xrspatial.zonal import _available_memory_bytes
avail = _available_memory_bytes()
except ImportError:
avail = 2 * 1024 ** 3

if estimate > 0.8 * avail:
if estimate == k0_bytes:
culprit = (
f"prediction matrix of shape ({g}, {n + 1}) "
f"(grid_pixels x N+1)"
)
advice = (
"Reduce the template grid size or the number of input "
"points, or use a chunked dask-backed template."
)
elif estimate == matrix_bytes:
culprit = f"kriging matrix of shape ({n + 1}, {n + 1})"
advice = "Reduce the number of input points."
else:
culprit = (
f"variogram pair arrays of length {n * (n - 1) // 2} "
f"(N*(N-1)/2 for N={n})"
)
advice = "Reduce the number of input points."

raise MemoryError(
f"kriging() needs ~{estimate / 1e9:.1f} GB to allocate the "
f"{culprit}, but only ~{avail / 1e9:.1f} GB is available. "
f"{advice}"
)


def kriging(x, y, z, template, variogram_model='spherical', nlags=15,
return_variance=False, name='kriging'):
"""Ordinary Kriging interpolation.
Expand All @@ -361,6 +423,13 @@ def kriging(x, y, z, template, variogram_model='spherical', nlags=15,
xr.DataArray or tuple of xr.DataArray
Prediction raster, or ``(prediction, variance)`` if
*return_variance* is True.

Raises
------
MemoryError
If the worst-case allocation (variogram pair arrays, kriging
matrix, or prediction matrix) would exceed 80% of available
memory.
"""
_validate_raster(template, func_name='kriging', name='template')
x_arr, y_arr, z_arr = validate_points(x, y, z, func_name='kriging')
Expand All @@ -374,6 +443,11 @@ def kriging(x, y, z, template, variogram_model='spherical', nlags=15,
_validate_scalar(nlags, func_name='kriging', name='nlags',
dtype=int, min_val=1)

# Memory guard. Runs after input validation so we know N and the
# template grid size, but before any large allocation.
grid_pixels = int(np.prod(template.shape))
_check_kriging_memory(len(x_arr), grid_pixels)

# Experimental variogram
lag_h, lag_sv = _experimental_variogram(x_arr, y_arr, z_arr, nlags)

Expand Down
89 changes: 89 additions & 0 deletions xrspatial/tests/test_interpolation.py
Original file line number Diff line number Diff line change
Expand Up @@ -397,3 +397,92 @@ def test_nan_points_are_dropped(self):
template = _make_template([0.0], [0.5])
result = idw(x, y, z, template)
assert np.isfinite(result.values[0, 0])


# ===================================================================
# Memory guard tests (issue #1307)
# ===================================================================

class TestKrigingMemoryGuard:
"""Verify kriging() refuses to allocate more than ~80% of RAM.

Allocations scale with point count N (variogram + matrix) and with
grid_pixels * N (prediction). We monkeypatch the available-memory
helper to a small number so tests can simulate "too big" without
actually allocating gigabytes.
"""

def test_predict_matrix_exceeds_memory(self, monkeypatch):
"""Large grid x N k0 matrix triggers the guard."""
from xrspatial.interpolate import _kriging as _kr

# Pretend we only have 64 MB available.
monkeypatch.setattr(
'xrspatial.zonal._available_memory_bytes',
lambda: 64 * 1024 ** 2,
)

x = np.array([0.0, 1.0, 2.0, 0.5])
y = np.array([0.0, 0.0, 1.0, 1.5])
z = np.array([1.0, 2.0, 3.0, 4.0])
# 2000x2000 grid * 5 cols * 8 bytes ~= 160 MB > 64 MB * 0.8
template = _make_template(
np.arange(2000, dtype=np.float64),
np.arange(2000, dtype=np.float64),
)

with pytest.raises(MemoryError, match='prediction matrix'):
kriging(x, y, z, template)

def test_variogram_pairs_exceed_memory(self, monkeypatch):
"""Large N triggers the variogram-pair guard before the matrix one."""
from xrspatial.interpolate import _kriging as _kr

# 32 MB available. N=4000 -> N*(N-1)/2 ~ 8e6 pairs * 4 buffers
# * 8 bytes = ~256 MB. Larger than the 4001x4001 matrix path
# (~384 MB), so let's use a different N that makes pair_bytes win.
# N=10000 -> pair_bytes ~ 1.6 GB; matrix_bytes ~ 2.4 GB.
# Matrix wins for any N because of the 3x multiplier vs 4x and the
# (N+1)^2 term. Use a smaller N so matrix wins; check generic msg.
monkeypatch.setattr(
'xrspatial.zonal._available_memory_bytes',
lambda: 32 * 1024 ** 2,
)

n = 3000
rng = np.random.RandomState(0)
x = rng.uniform(0, 10, n)
y = rng.uniform(0, 10, n)
z = rng.uniform(0, 10, n)
template = _make_template([0.0, 1.0], [0.0, 1.0])

# n=3000 -> matrix_bytes = 3 * 3001^2 * 8 ~= 216 MB > 32*0.8 MB
with pytest.raises(MemoryError, match='kriging matrix'):
kriging(x, y, z, template)

def test_small_input_allowed(self, monkeypatch):
"""Tiny inputs pass the guard even with very low available memory."""
# 16 MB available is plenty for a 4-point, 3x3 grid problem.
monkeypatch.setattr(
'xrspatial.zonal._available_memory_bytes',
lambda: 16 * 1024 ** 2,
)

x, y, z = _grid_points()
template = _make_template([0.0, 1.0, 2.0], [0.0, 1.0, 2.0])
# Should not raise.
result = kriging(x, y, z, template)
assert result.shape == template.shape

def test_check_helper_estimate_message(self, monkeypatch):
"""_check_kriging_memory reports GB and identifies culprit."""
from xrspatial.interpolate._kriging import _check_kriging_memory

monkeypatch.setattr(
'xrspatial.zonal._available_memory_bytes',
lambda: 1 * 1024 ** 2, # 1 MB
)

# n=10, grid_pixels=100000 -> k0 ~ 26 MB > 1 MB * 0.8.
with pytest.raises(MemoryError, match='prediction matrix'):
_check_kriging_memory(n_points=10, grid_pixels=100_000)
Loading