Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
85 changes: 56 additions & 29 deletions xrspatial/geotiff/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -1201,6 +1201,44 @@ def _extract_rich_tags(attrs: dict) -> dict:
}


def _validate_tile_size(tile_size) -> None:
"""Validate ``tile_size`` for the tiled GeoTIFF writers.

Shared by ``to_geotiff`` (when ``tiled=True``) and
``write_geotiff_gpu`` (always tiled) so the accepted types, the
non-positive rejection, and the multiple-of-16 hint stay in lockstep.
The tiled writer computes the tile grid as
``math.ceil(width / tile_size)``; ``tile_size=0`` hits
``ZeroDivisionError`` deep inside the writer, and negative values
produce a nonsensical tile grid. The TIFF 6 spec also requires
``TileWidth`` and ``TileLength`` to be positive multiples of 16
for broad interoperability with libtiff / GDAL strict readers; a
value like 17 would otherwise round-trip through the in-repo
reader but be rejected elsewhere.
"""
if not isinstance(tile_size, (int, np.integer)) or isinstance(
tile_size, bool):
raise ValueError(
f"tile_size must be a positive int, got "
f"{tile_size!r} (type {type(tile_size).__name__}).")
if tile_size <= 0:
raise ValueError(
f"tile_size must be a positive int, got tile_size={tile_size}.")
if tile_size % 16 != 0:
lower = (int(tile_size) // 16) * 16
upper = lower + 16
# ``lower`` is 0 for tile_size < 16; suppress it from the hint
# because 0 is not a valid tile size on its own.
if lower <= 0:
hint = f"try tile_size={upper}"
else:
hint = f"try tile_size={lower} or tile_size={upper}"
raise ValueError(
f"tile_size must be a positive multiple of 16 (TIFF 6 "
f"spec requirement for TileWidth/TileLength), got "
f"tile_size={tile_size}; {hint}.")


def to_geotiff(data: xr.DataArray | np.ndarray,
path: str | BinaryIO, *,
crs: int | str | None = None,
Expand Down Expand Up @@ -1272,9 +1310,11 @@ def to_geotiff(data: xr.DataArray | np.ndarray,
tiled : bool
Use tiled layout (default True).
tile_size : int
Tile size in pixels (default 256). Ignored when ``tiled=False``;
a warning is emitted if a non-default value is passed alongside
strip mode.
Tile size in pixels (default 256). Must be a positive multiple
of 16 when ``tiled=True``; this is a TIFF 6 spec requirement
on TileWidth and TileLength for broad reader compatibility.
Ignored when ``tiled=False``; a warning is emitted if a
non-default value is passed alongside strip mode.
predictor : bool or int
TIFF predictor. Accepted values:

Expand Down Expand Up @@ -1328,13 +1368,9 @@ def to_geotiff(data: xr.DataArray | np.ndarray,

path = _coerce_path(path)

# Reject non-positive tile_size up front. The tiled writer computes
# the tile grid as ``math.ceil(width / tile_size)``; tile_size=0 hits
# ZeroDivisionError deep inside the writer, and negative values
# produce a nonsensical tile grid. tiled=False ignores tile_size, so
# only validate when tiled output is actually requested. Shared with
# ``write_geotiff_gpu`` via ``_validate_tile_size_arg`` so both
# writers emit the same error format (#1752 / #1776).
# tiled=False ignores tile_size, so only validate when tiled output
# is requested. Shared with write_geotiff_gpu via
# _validate_tile_size_arg so both writers keep identical validation.
if tiled:
_validate_tile_size_arg(tile_size)

Expand Down Expand Up @@ -2014,20 +2050,11 @@ def _validate_chunks_arg(chunks, *, allow_none=False):
def _validate_tile_size_arg(tile_size):
"""Validate the ``tile_size`` kwarg for the tiled writer entry points.

Centralises the rejection rule ``to_geotiff`` already runs so
``write_geotiff_gpu`` can share the same error format. ``tile_size``
must be a positive int; booleans are rejected (``True == 1`` would
otherwise sneak through), floats are rejected because tile dimensions
are TIFF SHORT tags (#1776).
Wrapper kept for backwards internal compatibility; delegates to
``_validate_tile_size`` so to_geotiff/write_geotiff_gpu share one
validation path (positive int + multiple-of-16 for tiled output).
"""
if not isinstance(tile_size, (int, np.integer)) or isinstance(
tile_size, bool):
raise ValueError(
f"tile_size must be a positive int, got "
f"{tile_size!r} (type {type(tile_size).__name__}).")
if tile_size <= 0:
raise ValueError(
f"tile_size must be a positive int, got tile_size={tile_size}.")
_validate_tile_size(tile_size)


def read_geotiff_dask(source: str, *,
Expand Down Expand Up @@ -3386,7 +3413,10 @@ def write_geotiff_gpu(data: xr.DataArray | cupy.ndarray | np.ndarray,
producing a tiled file. Accepted for API parity with
``to_geotiff``.
tile_size : int
Tile size in pixels (default 256).
Tile size in pixels (default 256). Must be a positive multiple
of 16; this is a TIFF 6 spec requirement on TileWidth and
TileLength for broad reader compatibility. ``write_geotiff_gpu``
is always tiled, so the check fires for every call.
predictor : bool or int
TIFF predictor. ``False``/``0``/``1`` -> none, ``True``/``2`` ->
horizontal differencing, ``3`` -> floating-point predictor
Expand Down Expand Up @@ -3428,11 +3458,8 @@ def write_geotiff_gpu(data: xr.DataArray | cupy.ndarray | np.ndarray,
"compression is tile-based; the strip layout is not "
"implemented on the GPU path. Use to_geotiff(..., gpu=False, "
"tiled=False) for strip output on CPU.")
# Reject non-positive tile_size up front so the GPU writer surfaces
# the same error as ``to_geotiff`` (#1776). Previously ``tile_size=0``
# raised ``ZeroDivisionError`` from gpu_compress_tiles, ``tile_size=-1``
# surfaced as ``struct.error`` from the SHORT-tag encoder, and
# ``tile_size=256.0`` raised ``TypeError`` deep in the kernel.
# write_geotiff_gpu is always tiled, so validate tile_size here and
# keep parity with the public to_geotiff entry point.
_validate_tile_size_arg(tile_size)
if max_z_error < 0:
raise ValueError(
Expand Down
6 changes: 3 additions & 3 deletions xrspatial/geotiff/tests/test_backend_kwarg_parity_1561.py
Original file line number Diff line number Diff line change
Expand Up @@ -51,7 +51,7 @@ def small_tiff_path(tmp_path):
attrs={'crs': 4326},
)
p = tmp_path / 'parity_1561_small.tif'
to_geotiff(da, str(p), tile_size=4)
to_geotiff(da, str(p), tile_size=16)
return str(p), arr


Expand All @@ -72,7 +72,7 @@ def small_multiband_tiff_path(tmp_path):
attrs={'crs': 4326},
)
p = tmp_path / 'parity_1561_mb.tif'
to_geotiff(da, str(p), tile_size=4)
to_geotiff(da, str(p), tile_size=16)
return str(p), arr


Expand Down Expand Up @@ -197,7 +197,7 @@ def test_write_geotiff_gpu_accepts_streaming_buffer_bytes_as_noop(tmp_path):
p = tmp_path / 'parity_1561_streaming.tif'
# Argument is accepted; result must round-trip identically to a
# call without it.
write_geotiff_gpu(da, str(p), streaming_buffer_bytes=4096, tile_size=4)
write_geotiff_gpu(da, str(p), streaming_buffer_bytes=4096, tile_size=16)
rd = open_geotiff(str(p))
np.testing.assert_array_equal(rd.values, arr.get())

Expand Down
2 changes: 1 addition & 1 deletion xrspatial/geotiff/tests/test_band_validation_1673.py
Original file line number Diff line number Diff line change
Expand Up @@ -40,7 +40,7 @@ def multiband_tiff_path(tmp_path):
attrs={'crs': 4326},
)
p = tmp_path / 'mb_1673.tif'
to_geotiff(da, str(p), tile_size=4)
to_geotiff(da, str(p), tile_size=16)
return str(p), arr


Expand Down
10 changes: 5 additions & 5 deletions xrspatial/geotiff/tests/test_cog_cubic_overview_nodata_1623.py
Original file line number Diff line number Diff line change
Expand Up @@ -138,7 +138,7 @@ def test_to_geotiff_cog_cubic_nodata_round_trip(tmp_path):
da = xr.DataArray(arr, dims=['y', 'x'])
p = str(tmp_path / 'cog_cubic_nodata.tif')
to_geotiff(da, p, nodata=-9999.0, cog=True, compression='deflate',
tiled=True, tile_size=8, overview_levels=[2],
tiled=True, tile_size=16, overview_levels=[2],
overview_resampling='cubic')

ov = open_geotiff(p, overview_level=1)
Expand All @@ -165,7 +165,7 @@ def test_to_geotiff_cog_cubic_no_nodata_round_trip(tmp_path):
da = xr.DataArray(arr, dims=['y', 'x'])
p = str(tmp_path / 'cog_cubic_no_nodata.tif')
to_geotiff(da, p, cog=True, compression='deflate',
tiled=True, tile_size=8, overview_levels=[2],
tiled=True, tile_size=16, overview_levels=[2],
overview_resampling='cubic')

ov = open_geotiff(p, overview_level=1)
Expand Down Expand Up @@ -238,7 +238,7 @@ def test_to_geotiff_cog_cubic_nodata_gpu_round_trip(tmp_path):
da = xr.DataArray(cupy.asarray(arr), dims=['y', 'x'])
p = str(tmp_path / 'cog_cubic_nodata_gpu.tif')
to_geotiff(da, p, nodata=-9999.0, cog=True, compression='deflate',
tiled=True, tile_size=8, overview_levels=[2],
tiled=True, tile_size=16, overview_levels=[2],
overview_resampling='cubic')

ov = open_geotiff(p, overview_level=1)
Expand Down Expand Up @@ -267,10 +267,10 @@ def test_gpu_cpu_cubic_overview_bytes_match(tmp_path):
cpu_path = str(tmp_path / 'cpu_cubic.tif')
gpu_path = str(tmp_path / 'gpu_cubic.tif')
to_geotiff(cpu_da, cpu_path, nodata=-9999.0, cog=True,
compression='deflate', tiled=True, tile_size=8,
compression='deflate', tiled=True, tile_size=16,
overview_levels=[2], overview_resampling='cubic')
to_geotiff(gpu_da, gpu_path, nodata=-9999.0, cog=True,
compression='deflate', tiled=True, tile_size=8,
compression='deflate', tiled=True, tile_size=16,
overview_levels=[2], overview_resampling='cubic')

cpu_ov = np.asarray(open_geotiff(cpu_path, overview_level=1).data)
Expand Down
14 changes: 7 additions & 7 deletions xrspatial/geotiff/tests/test_cog_overview_nodata_1613.py
Original file line number Diff line number Diff line change
Expand Up @@ -65,7 +65,7 @@ def test_cpu_cog_overview_mean_ignores_sentinel(tmp_path):
da = xr.DataArray(arr, dims=['y', 'x'])
p = str(tmp_path / 'cog_mean_nodata.tif')
to_geotiff(da, p, nodata=-9999.0, cog=True, compression='deflate',
tiled=True, tile_size=2, overview_levels=[2],
tiled=True, tile_size=16, overview_levels=[2],
overview_resampling='mean')

ov = open_geotiff(p, overview_level=1)
Expand All @@ -81,7 +81,7 @@ def test_cpu_cog_overview_mean_partial_block(tmp_path):
da = xr.DataArray(arr, dims=['y', 'x'])
p = str(tmp_path / 'cog_mean_nodata_full_block.tif')
to_geotiff(da, p, nodata=-9999.0, cog=True, compression='deflate',
tiled=True, tile_size=2, overview_levels=[2],
tiled=True, tile_size=16, overview_levels=[2],
overview_resampling='mean')

ov = open_geotiff(p, overview_level=1)
Expand Down Expand Up @@ -114,7 +114,7 @@ def test_cpu_cog_overview_aggregations_ignore_sentinel(
da = xr.DataArray(arr, dims=['y', 'x'])
p = str(tmp_path / f'cog_{method}_nodata.tif')
to_geotiff(da, p, nodata=-9999.0, cog=True, compression='deflate',
tiled=True, tile_size=2, overview_levels=[2],
tiled=True, tile_size=16, overview_levels=[2],
overview_resampling=method)

ov = open_geotiff(p, overview_level=1)
Expand All @@ -129,7 +129,7 @@ def test_cpu_cog_overview_mean_no_nodata_passes(tmp_path):
da = xr.DataArray(arr, dims=['y', 'x'])
p = str(tmp_path / 'cog_mean_no_nodata.tif')
to_geotiff(da, p, cog=True, compression='deflate',
tiled=True, tile_size=2, overview_levels=[2],
tiled=True, tile_size=16, overview_levels=[2],
overview_resampling='mean')

ov = open_geotiff(p, overview_level=1)
Expand Down Expand Up @@ -220,7 +220,7 @@ def test_gpu_cog_overview_mean_ignores_sentinel(tmp_path):

p = str(tmp_path / 'gpu_cog_mean_nodata.tif')
to_geotiff(da, p, nodata=-9999.0, cog=True, compression='deflate',
tiled=True, tile_size=2, overview_levels=[2],
tiled=True, tile_size=16, overview_levels=[2],
overview_resampling='mean', gpu=True)

ov = open_geotiff(p, overview_level=1)
Expand Down Expand Up @@ -277,15 +277,15 @@ def test_gpu_cog_overview_matches_cpu(tmp_path):
da_cpu = xr.DataArray(arr, dims=['y', 'x'])
p_cpu = str(tmp_path / 'cpu_pyramid.tif')
to_geotiff(da_cpu, p_cpu, nodata=-9999.0, cog=True,
compression='deflate', tiled=True, tile_size=2,
compression='deflate', tiled=True, tile_size=16,
overview_levels=[2], overview_resampling='mean')
cpu_ov = np.asarray(open_geotiff(p_cpu, overview_level=1).data)

# GPU
da_gpu = xr.DataArray(cupy.asarray(arr), dims=['y', 'x'])
p_gpu = str(tmp_path / 'gpu_pyramid.tif')
to_geotiff(da_gpu, p_gpu, nodata=-9999.0, cog=True,
compression='deflate', tiled=True, tile_size=2,
compression='deflate', tiled=True, tile_size=16,
overview_levels=[2], overview_resampling='mean', gpu=True)
gpu_ov = np.asarray(open_geotiff(p_gpu, overview_level=1).data)

Expand Down
4 changes: 2 additions & 2 deletions xrspatial/geotiff/tests/test_features.py
Original file line number Diff line number Diff line change
Expand Up @@ -1870,7 +1870,7 @@ def test_bigtiff_eager_tile_offsets_are_long8_1247(self, tmp_path):
arr = np.arange(64, dtype=np.float32).reshape(8, 8)
path = str(tmp_path / 'bigtiff_long8_eager_1247.tif')
to_geotiff(arr, path, compression='none',
tiled=True, tile_size=4, bigtiff=True)
tiled=True, tile_size=16, bigtiff=True)
self._assert_offset_tags_are_long8(path)
# Data must still round-trip.
np.testing.assert_array_equal(open_geotiff(path).values, arr)
Expand Down Expand Up @@ -1902,7 +1902,7 @@ def test_bigtiff_streaming_tile_offsets_are_long8_1247(self, tmp_path):
)
path = str(tmp_path / 'bigtiff_long8_stream_1247.tif')
to_geotiff(dask_da, path, compression='none',
tiled=True, tile_size=4, bigtiff=True)
tiled=True, tile_size=16, bigtiff=True)
self._assert_offset_tags_are_long8(path)
np.testing.assert_array_equal(open_geotiff(path).values, arr)

Expand Down
4 changes: 2 additions & 2 deletions xrspatial/geotiff/tests/test_gpu_window_band_1605.py
Original file line number Diff line number Diff line change
Expand Up @@ -59,7 +59,7 @@ def single_band_tiff(tmp_path):
attrs={'crs': 4326},
)
p = tmp_path / 'window_band_1605_single.tif'
to_geotiff(da, str(p), tile_size=8)
to_geotiff(da, str(p), tile_size=16)
return str(p), arr


Expand All @@ -80,7 +80,7 @@ def multi_band_tiff(tmp_path):
attrs={'crs': 4326},
)
p = tmp_path / 'window_band_1605_multi.tif'
to_geotiff(da, str(p), tile_size=8)
to_geotiff(da, str(p), tile_size=16)
return str(p), arr


Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -104,8 +104,7 @@ def _mode_4x4_uint8() -> np.ndarray:


def _mode_8x8_uint8() -> np.ndarray:
"""8x8 uint8 raster -- big enough for two tiles at tile_size=4 with
a deterministic mode per 2x2 block on the level-1 overview."""
"""8x8 uint8 raster with deterministic mode per 2x2 level-1 block."""
rng = np.random.default_rng(seed=1740)
# Use a small categorical range so ties are common; the GPU mode
# branch falls back to the CPU implementation, so the result must
Expand Down Expand Up @@ -191,7 +190,7 @@ def test_write_geotiff_gpu_cog_overview_resampling_mode(tmp_path):
p = str(tmp_path / 'cog_mode_gpu_1740.tif')
write_geotiff_gpu(
da, p, cog=True, compression='deflate', tiled=True,
tile_size=4, overview_levels=[2],
tile_size=16, overview_levels=[2],
overview_resampling='mode',
)

Expand Down Expand Up @@ -220,7 +219,7 @@ def test_to_geotiff_gpu_cog_overview_resampling_mode(tmp_path):
p = str(tmp_path / 'cog_mode_to_geotiff_gpu_1740.tif')
to_geotiff(
da, p, gpu=True, cog=True, compression='deflate', tiled=True,
tile_size=4, overview_levels=[2],
tile_size=16, overview_levels=[2],
overview_resampling='mode',
)

Expand Down Expand Up @@ -250,7 +249,7 @@ def test_gpu_vs_cpu_mode_overview_pixel_parity(tmp_path):
p_cpu = str(tmp_path / 'cog_mode_cpu_1740.tif')
to_geotiff(
da_cpu, p_cpu, cog=True, compression='deflate', tiled=True,
tile_size=4, overview_levels=[2],
tile_size=16, overview_levels=[2],
overview_resampling='mode',
)

Expand All @@ -261,7 +260,7 @@ def test_gpu_vs_cpu_mode_overview_pixel_parity(tmp_path):
p_gpu = str(tmp_path / 'cog_mode_gpu_via_to_geotiff_1740.tif')
to_geotiff(
da_gpu, p_gpu, gpu=True, cog=True, compression='deflate', tiled=True,
tile_size=4, overview_levels=[2],
tile_size=16, overview_levels=[2],
overview_resampling='mode',
)

Expand Down
8 changes: 4 additions & 4 deletions xrspatial/geotiff/tests/test_kwarg_behaviour_2026_05_12.py
Original file line number Diff line number Diff line change
Expand Up @@ -456,7 +456,7 @@ def test_force_bigtiff_true_writes_bigtiff(self, tmp_path):
'x': np.arange(8, dtype=np.float64)},
)
path = str(tmp_path / 'gpu_bigtiff_true.tif')
write_geotiff_gpu(da, path, bigtiff=True, tile_size=4)
write_geotiff_gpu(da, path, bigtiff=True, tile_size=16)
assert self._read_header_is_bigtiff(path), (
"write_geotiff_gpu(bigtiff=True) should emit BigTIFF header "
"(magic byte 43)."
Expand All @@ -474,7 +474,7 @@ def test_force_bigtiff_false_writes_classic(self, tmp_path):
'x': np.arange(8, dtype=np.float64)},
)
path = str(tmp_path / 'gpu_bigtiff_false.tif')
write_geotiff_gpu(da, path, bigtiff=False, tile_size=4)
write_geotiff_gpu(da, path, bigtiff=False, tile_size=16)
assert not self._read_header_is_bigtiff(path), (
"write_geotiff_gpu(bigtiff=False) should emit classic TIFF."
)
Expand All @@ -492,7 +492,7 @@ def test_bigtiff_none_stays_classic_small_file(self, tmp_path):
'x': np.arange(8, dtype=np.float64)},
)
path = str(tmp_path / 'gpu_bigtiff_default.tif')
write_geotiff_gpu(da, path, tile_size=4)
write_geotiff_gpu(da, path, tile_size=16)
assert not self._read_header_is_bigtiff(path), (
"write_geotiff_gpu default should auto-pick classic TIFF for "
"tiny outputs; a default switch to BigTIFF would break "
Expand All @@ -512,7 +512,7 @@ def test_to_geotiff_gpu_bigtiff_threads_through(self, tmp_path):
'x': np.arange(8, dtype=np.float64)},
)
path = str(tmp_path / 'to_gpu_bigtiff_true.tif')
to_geotiff(da, path, gpu=True, bigtiff=True, tile_size=4)
to_geotiff(da, path, gpu=True, bigtiff=True, tile_size=16)
assert self._read_header_is_bigtiff(path), (
"to_geotiff(gpu=True, bigtiff=True) should reach the GPU "
"writer with force_bigtiff=True propagated through."
Expand Down
Loading