From 449bb7ecbf84a66b4aba41983358a534e0484536 Mon Sep 17 00:00:00 2001 From: RohanDisa <105740583+RohanDisa@users.noreply.github.com> Date: Wed, 21 Jan 2026 00:14:34 -0800 Subject: [PATCH] Add lazy evaluation test to df_test.py --- xarray_sql/df_test.py | 43 +++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 43 insertions(+) diff --git a/xarray_sql/df_test.py b/xarray_sql/df_test.py index ad618545..830cb626 100644 --- a/xarray_sql/df_test.py +++ b/xarray_sql/df_test.py @@ -7,6 +7,7 @@ import pytest import xarray as xr +from . import XarrayContext from .df import explode, read_xarray, block_slices, from_map, pivot, from_map_batched @@ -361,3 +362,45 @@ def test_open_era5(): "v_component_of_wind", ] assert list(era5_wind_df.columns) == expected_columns + + +def test_lazy_evaluation(): + """Test that xarray_sql uses lazy evaluation and only accesses data on execution.""" + reads = [] + + # SpyArray subclassing np.ndarray to track data access + class SpyArray(np.ndarray): + + def __new__(cls, arr, reads_list): + obj = np.asarray(arr).view(cls) + obj.reads_list = reads_list + return obj + + def __getitem__(self, key): + self.reads_list.append(key) + return super().__getitem__(key) + + # Create base dataset + base = xr.Dataset({"a": ("x", [1, 2, 3]), "b": ("x", [4, 5, 6])}) + + # Wrap each variable in a SpyArray + spy_ds = xr.Dataset( + { + "a": ("x", SpyArray(base["a"].values, reads)), + "b": ("x", SpyArray(base["b"].values, reads)), + } + ) + + ctx = XarrayContext() + + # Register dataset — should NOT trigger reads + ctx.from_dataset("test", spy_ds, chunks={"x": 2}) + assert len(reads) == 0, "Data was accessed too early on registration" + + # Build a query — still should NOT read data + tbl = ctx.sql("SELECT a FROM test WHERE b > 4") + assert len(reads) == 0, "Data was accessed before execution" + + # Execute query + result = tbl.to_pandas() # triggers execution + assert len(reads) > 0, "Data should be accessed only on execution"