From 302da84f24a344f0400746099873a1e827e1e59c Mon Sep 17 00:00:00 2001 From: Brendan Collins Date: Wed, 13 May 2026 05:38:06 -0700 Subject: [PATCH 1/2] geotiff: require tile_size to be a multiple of 16 (#1767) TIFF 6 spec requires TileWidth and TileLength to be multiples of 16 for broad reader interop. The in-repo writer/reader round-tripped any positive tile_size (e.g. 17), but other tools (GDAL, libtiff strict mode) may reject the file. `to_geotiff` and `write_geotiff_gpu` now refuse tile_size that is not a positive multiple of 16 when tiled output is requested, naming the bad value and suggesting the nearest valid choices. Docstrings updated. Existing test fixtures that used small tile sizes (2/4/8/100) for multi-tile coverage have been bumped to the nearest valid multiple of 16, with raster sizes adjusted only where the test logic required >1 tile. --- xrspatial/geotiff/__init__.py | 50 +++++++- .../tests/test_backend_kwarg_parity_1561.py | 6 +- .../tests/test_band_validation_1673.py | 2 +- .../test_cog_cubic_overview_nodata_1623.py | 10 +- .../tests/test_cog_overview_nodata_1613.py | 14 +-- xrspatial/geotiff/tests/test_features.py | 4 +- .../tests/test_gpu_window_band_1605.py | 4 +- ...verview_mode_and_compression_level_1740.py | 8 +- .../tests/test_kwarg_behaviour_2026_05_12.py | 8 +- .../test_kwarg_behaviour_2026_05_12_v2.py | 26 ++-- .../test_kwarg_coverage_2026_05_11_r4.py | 8 +- .../test_open_geotiff_on_gpu_failure_1615.py | 2 +- .../test_open_geotiff_vrt_kwarg_drop_1685.py | 2 +- ...ew_resampling_min_max_median_2026_05_11.py | 10 +- .../tests/test_predictor_multisample.py | 10 +- .../tests/test_size_param_validation_1752.py | 11 +- .../tests/test_streaming_codecs_2026_05_11.py | 6 +- .../test_tile_size_multiple_of_16_1767.py | 113 ++++++++++++++++++ .../tests/test_vrt_tiled_metadata_1606.py | 22 ++-- xrspatial/geotiff/tests/test_vrt_write.py | 5 +- 20 files changed, 242 insertions(+), 79 deletions(-) create mode 100644 xrspatial/geotiff/tests/test_tile_size_multiple_of_16_1767.py diff --git a/xrspatial/geotiff/__init__.py b/xrspatial/geotiff/__init__.py index a30d75b99..405306a9c 100644 --- a/xrspatial/geotiff/__init__.py +++ b/xrspatial/geotiff/__init__.py @@ -1222,9 +1222,11 @@ def to_geotiff(data: xr.DataArray | np.ndarray, tiled : bool Use tiled layout (default True). tile_size : int - Tile size in pixels (default 256). Ignored when ``tiled=False``; - a warning is emitted if a non-default value is passed alongside - strip mode. + Tile size in pixels (default 256). Must be a positive multiple + of 16 when ``tiled=True``; this is a TIFF 6 spec requirement + on TileWidth and TileLength for broad reader compatibility. + Ignored when ``tiled=False``; a warning is emitted if a + non-default value is passed alongside strip mode. predictor : bool or int TIFF predictor. Accepted values: @@ -1267,7 +1269,11 @@ def to_geotiff(data: xr.DataArray | np.ndarray, # the tile grid as ``math.ceil(width / tile_size)``; tile_size=0 hits # ZeroDivisionError deep inside the writer, and negative values # produce a nonsensical tile grid. tiled=False ignores tile_size, so - # only validate when tiled output is actually requested. + # only validate when tiled output is actually requested. The TIFF 6 + # spec also requires TileWidth and TileLength to be multiples of 16 + # for broad interoperability with libtiff / GDAL strict readers; a + # value like 17 would otherwise round-trip through the in-repo + # reader but be rejected elsewhere. if tiled: if not isinstance(tile_size, (int, np.integer)) or isinstance( tile_size, bool): @@ -1277,6 +1283,19 @@ def to_geotiff(data: xr.DataArray | np.ndarray, if tile_size <= 0: raise ValueError( f"tile_size must be a positive int, got tile_size={tile_size}.") + if tile_size % 16 != 0: + lower = (int(tile_size) // 16) * 16 + upper = lower + 16 + # ``lower`` is 0 for tile_size < 16; suppress it from the hint + # because 0 is not a valid tile size on its own. + if lower <= 0: + hint = f"try tile_size={upper}" + else: + hint = f"try tile_size={lower} or tile_size={upper}" + raise ValueError( + f"tile_size must be a positive multiple of 16 (TIFF 6 " + f"spec requirement for TileWidth/TileLength), got " + f"tile_size={tile_size}; {hint}.") # Up-front validation: catch bad compression names before they reach # any of the deeper write paths (streaming, GPU, VRT, COG) where the @@ -3266,6 +3285,29 @@ def write_geotiff_gpu(data: xr.DataArray | cupy.ndarray | np.ndarray, "compression is tile-based; the strip layout is not " "implemented on the GPU path. Use to_geotiff(..., gpu=False, " "tiled=False) for strip output on CPU.") + # Mirror to_geotiff's tile_size validation: the TIFF 6 spec requires + # TileWidth/TileLength to be positive multiples of 16 for broad + # reader interop. ``write_geotiff_gpu`` is always tiled, so the + # check fires unconditionally here. + if not isinstance(tile_size, (int, np.integer)) or isinstance( + tile_size, bool): + raise ValueError( + f"tile_size must be a positive int, got " + f"{tile_size!r} (type {type(tile_size).__name__}).") + if tile_size <= 0: + raise ValueError( + f"tile_size must be a positive int, got tile_size={tile_size}.") + if tile_size % 16 != 0: + lower = (int(tile_size) // 16) * 16 + upper = lower + 16 + if lower <= 0: + hint = f"try tile_size={upper}" + else: + hint = f"try tile_size={lower} or tile_size={upper}" + raise ValueError( + f"tile_size must be a positive multiple of 16 (TIFF 6 " + f"spec requirement for TileWidth/TileLength), got " + f"tile_size={tile_size}; {hint}.") if max_z_error < 0: raise ValueError( f"max_z_error must be >= 0, got {max_z_error}") diff --git a/xrspatial/geotiff/tests/test_backend_kwarg_parity_1561.py b/xrspatial/geotiff/tests/test_backend_kwarg_parity_1561.py index 730b08f2d..6501375ed 100644 --- a/xrspatial/geotiff/tests/test_backend_kwarg_parity_1561.py +++ b/xrspatial/geotiff/tests/test_backend_kwarg_parity_1561.py @@ -51,7 +51,7 @@ def small_tiff_path(tmp_path): attrs={'crs': 4326}, ) p = tmp_path / 'parity_1561_small.tif' - to_geotiff(da, str(p), tile_size=4) + to_geotiff(da, str(p), tile_size=16) return str(p), arr @@ -72,7 +72,7 @@ def small_multiband_tiff_path(tmp_path): attrs={'crs': 4326}, ) p = tmp_path / 'parity_1561_mb.tif' - to_geotiff(da, str(p), tile_size=4) + to_geotiff(da, str(p), tile_size=16) return str(p), arr @@ -197,7 +197,7 @@ def test_write_geotiff_gpu_accepts_streaming_buffer_bytes_as_noop(tmp_path): p = tmp_path / 'parity_1561_streaming.tif' # Argument is accepted; result must round-trip identically to a # call without it. - write_geotiff_gpu(da, str(p), streaming_buffer_bytes=4096, tile_size=4) + write_geotiff_gpu(da, str(p), streaming_buffer_bytes=4096, tile_size=16) rd = open_geotiff(str(p)) np.testing.assert_array_equal(rd.values, arr.get()) diff --git a/xrspatial/geotiff/tests/test_band_validation_1673.py b/xrspatial/geotiff/tests/test_band_validation_1673.py index a9fe8662e..36e4ede92 100644 --- a/xrspatial/geotiff/tests/test_band_validation_1673.py +++ b/xrspatial/geotiff/tests/test_band_validation_1673.py @@ -40,7 +40,7 @@ def multiband_tiff_path(tmp_path): attrs={'crs': 4326}, ) p = tmp_path / 'mb_1673.tif' - to_geotiff(da, str(p), tile_size=4) + to_geotiff(da, str(p), tile_size=16) return str(p), arr diff --git a/xrspatial/geotiff/tests/test_cog_cubic_overview_nodata_1623.py b/xrspatial/geotiff/tests/test_cog_cubic_overview_nodata_1623.py index d5be06b55..4aa53b369 100644 --- a/xrspatial/geotiff/tests/test_cog_cubic_overview_nodata_1623.py +++ b/xrspatial/geotiff/tests/test_cog_cubic_overview_nodata_1623.py @@ -138,7 +138,7 @@ def test_to_geotiff_cog_cubic_nodata_round_trip(tmp_path): da = xr.DataArray(arr, dims=['y', 'x']) p = str(tmp_path / 'cog_cubic_nodata.tif') to_geotiff(da, p, nodata=-9999.0, cog=True, compression='deflate', - tiled=True, tile_size=8, overview_levels=[1], + tiled=True, tile_size=16, overview_levels=[1], overview_resampling='cubic') ov = open_geotiff(p, overview_level=1) @@ -165,7 +165,7 @@ def test_to_geotiff_cog_cubic_no_nodata_round_trip(tmp_path): da = xr.DataArray(arr, dims=['y', 'x']) p = str(tmp_path / 'cog_cubic_no_nodata.tif') to_geotiff(da, p, cog=True, compression='deflate', - tiled=True, tile_size=8, overview_levels=[1], + tiled=True, tile_size=16, overview_levels=[1], overview_resampling='cubic') ov = open_geotiff(p, overview_level=1) @@ -238,7 +238,7 @@ def test_to_geotiff_cog_cubic_nodata_gpu_round_trip(tmp_path): da = xr.DataArray(cupy.asarray(arr), dims=['y', 'x']) p = str(tmp_path / 'cog_cubic_nodata_gpu.tif') to_geotiff(da, p, nodata=-9999.0, cog=True, compression='deflate', - tiled=True, tile_size=8, overview_levels=[1], + tiled=True, tile_size=16, overview_levels=[1], overview_resampling='cubic') ov = open_geotiff(p, overview_level=1) @@ -267,10 +267,10 @@ def test_gpu_cpu_cubic_overview_bytes_match(tmp_path): cpu_path = str(tmp_path / 'cpu_cubic.tif') gpu_path = str(tmp_path / 'gpu_cubic.tif') to_geotiff(cpu_da, cpu_path, nodata=-9999.0, cog=True, - compression='deflate', tiled=True, tile_size=8, + compression='deflate', tiled=True, tile_size=16, overview_levels=[1], overview_resampling='cubic') to_geotiff(gpu_da, gpu_path, nodata=-9999.0, cog=True, - compression='deflate', tiled=True, tile_size=8, + compression='deflate', tiled=True, tile_size=16, overview_levels=[1], overview_resampling='cubic') cpu_ov = np.asarray(open_geotiff(cpu_path, overview_level=1).data) diff --git a/xrspatial/geotiff/tests/test_cog_overview_nodata_1613.py b/xrspatial/geotiff/tests/test_cog_overview_nodata_1613.py index f93875fa2..77c56ec2d 100644 --- a/xrspatial/geotiff/tests/test_cog_overview_nodata_1613.py +++ b/xrspatial/geotiff/tests/test_cog_overview_nodata_1613.py @@ -65,7 +65,7 @@ def test_cpu_cog_overview_mean_ignores_sentinel(tmp_path): da = xr.DataArray(arr, dims=['y', 'x']) p = str(tmp_path / 'cog_mean_nodata.tif') to_geotiff(da, p, nodata=-9999.0, cog=True, compression='deflate', - tiled=True, tile_size=2, overview_levels=[1], + tiled=True, tile_size=16, overview_levels=[1], overview_resampling='mean') ov = open_geotiff(p, overview_level=1) @@ -81,7 +81,7 @@ def test_cpu_cog_overview_mean_partial_block(tmp_path): da = xr.DataArray(arr, dims=['y', 'x']) p = str(tmp_path / 'cog_mean_nodata_full_block.tif') to_geotiff(da, p, nodata=-9999.0, cog=True, compression='deflate', - tiled=True, tile_size=2, overview_levels=[1], + tiled=True, tile_size=16, overview_levels=[1], overview_resampling='mean') ov = open_geotiff(p, overview_level=1) @@ -114,7 +114,7 @@ def test_cpu_cog_overview_aggregations_ignore_sentinel( da = xr.DataArray(arr, dims=['y', 'x']) p = str(tmp_path / f'cog_{method}_nodata.tif') to_geotiff(da, p, nodata=-9999.0, cog=True, compression='deflate', - tiled=True, tile_size=2, overview_levels=[1], + tiled=True, tile_size=16, overview_levels=[1], overview_resampling=method) ov = open_geotiff(p, overview_level=1) @@ -129,7 +129,7 @@ def test_cpu_cog_overview_mean_no_nodata_passes(tmp_path): da = xr.DataArray(arr, dims=['y', 'x']) p = str(tmp_path / 'cog_mean_no_nodata.tif') to_geotiff(da, p, cog=True, compression='deflate', - tiled=True, tile_size=2, overview_levels=[1], + tiled=True, tile_size=16, overview_levels=[1], overview_resampling='mean') ov = open_geotiff(p, overview_level=1) @@ -220,7 +220,7 @@ def test_gpu_cog_overview_mean_ignores_sentinel(tmp_path): p = str(tmp_path / 'gpu_cog_mean_nodata.tif') to_geotiff(da, p, nodata=-9999.0, cog=True, compression='deflate', - tiled=True, tile_size=2, overview_levels=[1], + tiled=True, tile_size=16, overview_levels=[1], overview_resampling='mean', gpu=True) ov = open_geotiff(p, overview_level=1) @@ -277,7 +277,7 @@ def test_gpu_cog_overview_matches_cpu(tmp_path): da_cpu = xr.DataArray(arr, dims=['y', 'x']) p_cpu = str(tmp_path / 'cpu_pyramid.tif') to_geotiff(da_cpu, p_cpu, nodata=-9999.0, cog=True, - compression='deflate', tiled=True, tile_size=2, + compression='deflate', tiled=True, tile_size=16, overview_levels=[1], overview_resampling='mean') cpu_ov = np.asarray(open_geotiff(p_cpu, overview_level=1).data) @@ -285,7 +285,7 @@ def test_gpu_cog_overview_matches_cpu(tmp_path): da_gpu = xr.DataArray(cupy.asarray(arr), dims=['y', 'x']) p_gpu = str(tmp_path / 'gpu_pyramid.tif') to_geotiff(da_gpu, p_gpu, nodata=-9999.0, cog=True, - compression='deflate', tiled=True, tile_size=2, + compression='deflate', tiled=True, tile_size=16, overview_levels=[1], overview_resampling='mean', gpu=True) gpu_ov = np.asarray(open_geotiff(p_gpu, overview_level=1).data) diff --git a/xrspatial/geotiff/tests/test_features.py b/xrspatial/geotiff/tests/test_features.py index 29318a1d9..3d997d3cb 100644 --- a/xrspatial/geotiff/tests/test_features.py +++ b/xrspatial/geotiff/tests/test_features.py @@ -1870,7 +1870,7 @@ def test_bigtiff_eager_tile_offsets_are_long8_1247(self, tmp_path): arr = np.arange(64, dtype=np.float32).reshape(8, 8) path = str(tmp_path / 'bigtiff_long8_eager_1247.tif') to_geotiff(arr, path, compression='none', - tiled=True, tile_size=4, bigtiff=True) + tiled=True, tile_size=16, bigtiff=True) self._assert_offset_tags_are_long8(path) # Data must still round-trip. np.testing.assert_array_equal(open_geotiff(path).values, arr) @@ -1902,7 +1902,7 @@ def test_bigtiff_streaming_tile_offsets_are_long8_1247(self, tmp_path): ) path = str(tmp_path / 'bigtiff_long8_stream_1247.tif') to_geotiff(dask_da, path, compression='none', - tiled=True, tile_size=4, bigtiff=True) + tiled=True, tile_size=16, bigtiff=True) self._assert_offset_tags_are_long8(path) np.testing.assert_array_equal(open_geotiff(path).values, arr) diff --git a/xrspatial/geotiff/tests/test_gpu_window_band_1605.py b/xrspatial/geotiff/tests/test_gpu_window_band_1605.py index f40c455bc..bb0695d84 100644 --- a/xrspatial/geotiff/tests/test_gpu_window_band_1605.py +++ b/xrspatial/geotiff/tests/test_gpu_window_band_1605.py @@ -59,7 +59,7 @@ def single_band_tiff(tmp_path): attrs={'crs': 4326}, ) p = tmp_path / 'window_band_1605_single.tif' - to_geotiff(da, str(p), tile_size=8) + to_geotiff(da, str(p), tile_size=16) return str(p), arr @@ -80,7 +80,7 @@ def multi_band_tiff(tmp_path): attrs={'crs': 4326}, ) p = tmp_path / 'window_band_1605_multi.tif' - to_geotiff(da, str(p), tile_size=8) + to_geotiff(da, str(p), tile_size=16) return str(p), arr diff --git a/xrspatial/geotiff/tests/test_gpu_writer_overview_mode_and_compression_level_1740.py b/xrspatial/geotiff/tests/test_gpu_writer_overview_mode_and_compression_level_1740.py index b6edf2608..bdd93eb00 100644 --- a/xrspatial/geotiff/tests/test_gpu_writer_overview_mode_and_compression_level_1740.py +++ b/xrspatial/geotiff/tests/test_gpu_writer_overview_mode_and_compression_level_1740.py @@ -191,7 +191,7 @@ def test_write_geotiff_gpu_cog_overview_resampling_mode(tmp_path): p = str(tmp_path / 'cog_mode_gpu_1740.tif') write_geotiff_gpu( da, p, cog=True, compression='deflate', tiled=True, - tile_size=4, overview_levels=[1], + tile_size=16, overview_levels=[1], overview_resampling='mode', ) @@ -220,7 +220,7 @@ def test_to_geotiff_gpu_cog_overview_resampling_mode(tmp_path): p = str(tmp_path / 'cog_mode_to_geotiff_gpu_1740.tif') to_geotiff( da, p, gpu=True, cog=True, compression='deflate', tiled=True, - tile_size=4, overview_levels=[1], + tile_size=16, overview_levels=[1], overview_resampling='mode', ) @@ -250,7 +250,7 @@ def test_gpu_vs_cpu_mode_overview_pixel_parity(tmp_path): p_cpu = str(tmp_path / 'cog_mode_cpu_1740.tif') to_geotiff( da_cpu, p_cpu, cog=True, compression='deflate', tiled=True, - tile_size=4, overview_levels=[1], + tile_size=16, overview_levels=[1], overview_resampling='mode', ) @@ -261,7 +261,7 @@ def test_gpu_vs_cpu_mode_overview_pixel_parity(tmp_path): p_gpu = str(tmp_path / 'cog_mode_gpu_via_to_geotiff_1740.tif') to_geotiff( da_gpu, p_gpu, gpu=True, cog=True, compression='deflate', tiled=True, - tile_size=4, overview_levels=[1], + tile_size=16, overview_levels=[1], overview_resampling='mode', ) diff --git a/xrspatial/geotiff/tests/test_kwarg_behaviour_2026_05_12.py b/xrspatial/geotiff/tests/test_kwarg_behaviour_2026_05_12.py index 31da870bd..c7971c7f9 100644 --- a/xrspatial/geotiff/tests/test_kwarg_behaviour_2026_05_12.py +++ b/xrspatial/geotiff/tests/test_kwarg_behaviour_2026_05_12.py @@ -456,7 +456,7 @@ def test_force_bigtiff_true_writes_bigtiff(self, tmp_path): 'x': np.arange(8, dtype=np.float64)}, ) path = str(tmp_path / 'gpu_bigtiff_true.tif') - write_geotiff_gpu(da, path, bigtiff=True, tile_size=4) + write_geotiff_gpu(da, path, bigtiff=True, tile_size=16) assert self._read_header_is_bigtiff(path), ( "write_geotiff_gpu(bigtiff=True) should emit BigTIFF header " "(magic byte 43)." @@ -474,7 +474,7 @@ def test_force_bigtiff_false_writes_classic(self, tmp_path): 'x': np.arange(8, dtype=np.float64)}, ) path = str(tmp_path / 'gpu_bigtiff_false.tif') - write_geotiff_gpu(da, path, bigtiff=False, tile_size=4) + write_geotiff_gpu(da, path, bigtiff=False, tile_size=16) assert not self._read_header_is_bigtiff(path), ( "write_geotiff_gpu(bigtiff=False) should emit classic TIFF." ) @@ -492,7 +492,7 @@ def test_bigtiff_none_stays_classic_small_file(self, tmp_path): 'x': np.arange(8, dtype=np.float64)}, ) path = str(tmp_path / 'gpu_bigtiff_default.tif') - write_geotiff_gpu(da, path, tile_size=4) + write_geotiff_gpu(da, path, tile_size=16) assert not self._read_header_is_bigtiff(path), ( "write_geotiff_gpu default should auto-pick classic TIFF for " "tiny outputs; a default switch to BigTIFF would break " @@ -512,7 +512,7 @@ def test_to_geotiff_gpu_bigtiff_threads_through(self, tmp_path): 'x': np.arange(8, dtype=np.float64)}, ) path = str(tmp_path / 'to_gpu_bigtiff_true.tif') - to_geotiff(da, path, gpu=True, bigtiff=True, tile_size=4) + to_geotiff(da, path, gpu=True, bigtiff=True, tile_size=16) assert self._read_header_is_bigtiff(path), ( "to_geotiff(gpu=True, bigtiff=True) should reach the GPU " "writer with force_bigtiff=True propagated through." diff --git a/xrspatial/geotiff/tests/test_kwarg_behaviour_2026_05_12_v2.py b/xrspatial/geotiff/tests/test_kwarg_behaviour_2026_05_12_v2.py index 8ac99a886..a917b21a5 100644 --- a/xrspatial/geotiff/tests/test_kwarg_behaviour_2026_05_12_v2.py +++ b/xrspatial/geotiff/tests/test_kwarg_behaviour_2026_05_12_v2.py @@ -141,7 +141,7 @@ def test_predictor_true_uint8_round_trip(self, tmp_path): path = str(tmp_path / 'gpu_pred2_u8_2026_05_12_v2.tif') write_geotiff_gpu(da, path, compression='deflate', predictor=True, - tile_size=8) + tile_size=16) # Round-trip through the public reader out = open_geotiff(path) @@ -158,7 +158,7 @@ def test_predictor_2_uint8_round_trip(self, tmp_path): path = str(tmp_path / 'gpu_pred2_int_u8_2026_05_12_v2.tif') write_geotiff_gpu(da, path, compression='deflate', predictor=2, - tile_size=8) + tile_size=16) out = open_geotiff(path) np.testing.assert_array_equal(out.values, arr) @@ -180,7 +180,7 @@ def test_predictor_2_uint8_3band_rgb(self, tmp_path): path = str(tmp_path / 'gpu_pred2_u8_3band_2026_05_12_v2.tif') write_geotiff_gpu(da, path, compression='deflate', predictor=2, - tile_size=8) + tile_size=16) out = open_geotiff(path) np.testing.assert_array_equal(out.values, arr) @@ -199,7 +199,7 @@ def test_predictor_false_no_predictor_tag(self, tmp_path): path = str(tmp_path / 'gpu_no_pred_u8_2026_05_12_v2.tif') write_geotiff_gpu(da, path, compression='deflate', predictor=False, - tile_size=8) + tile_size=16) out = open_geotiff(path) np.testing.assert_array_equal(out.values, arr) @@ -223,7 +223,7 @@ def test_predictor_2_uint16_round_trip(self, tmp_path): path = str(tmp_path / 'gpu_pred2_u16_2026_05_12_v2.tif') write_geotiff_gpu(da, path, compression='deflate', predictor=2, - tile_size=8) + tile_size=16) out = open_geotiff(path) np.testing.assert_array_equal(out.values, arr) @@ -250,7 +250,7 @@ def test_predictor_2_int32_round_trip(self, tmp_path): path = str(tmp_path / 'gpu_pred2_i32_2026_05_12_v2.tif') write_geotiff_gpu(da, path, compression='deflate', predictor=2, - tile_size=8) + tile_size=16) out = open_geotiff(path) np.testing.assert_array_equal(out.values, arr) @@ -278,7 +278,7 @@ def test_predictor_3_float32_round_trip(self, tmp_path): path = str(tmp_path / 'gpu_pred3_f32_2026_05_12_v2.tif') write_geotiff_gpu(da, path, compression='deflate', predictor=3, - tile_size=8) + tile_size=16) out = open_geotiff(path) # FP predictor is lossless: equality, not allclose @@ -293,7 +293,7 @@ def test_predictor_3_float64_round_trip(self, tmp_path): path = str(tmp_path / 'gpu_pred3_f64_2026_05_12_v2.tif') write_geotiff_gpu(da, path, compression='deflate', predictor=3, - tile_size=8) + tile_size=16) out = open_geotiff(path) np.testing.assert_array_equal(out.values, arr) @@ -309,7 +309,7 @@ def test_predictor_3_rejects_int_dtype(self, tmp_path): with pytest.raises(ValueError, match=r"predictor=3.*requires float"): write_geotiff_gpu(da, path, compression='deflate', predictor=3, - tile_size=8) + tile_size=16) @_gpu_only @@ -333,9 +333,9 @@ def test_cpu_gpu_parity_predictor_2_uint16(self, tmp_path): gpu_path = str(tmp_path / 'gpu_pred2_u16_v2.tif') to_geotiff(_da_with_float_coords(arr), cpu_path, - compression='deflate', predictor=2, tile_size=8) + compression='deflate', predictor=2, tile_size=16) write_geotiff_gpu(_da_with_float_coords(cupy.asarray(arr)), gpu_path, - compression='deflate', predictor=2, tile_size=8) + compression='deflate', predictor=2, tile_size=16) cpu_out = open_geotiff(cpu_path).values gpu_out = open_geotiff(gpu_path).values @@ -351,9 +351,9 @@ def test_cpu_gpu_parity_predictor_3_float32(self, tmp_path): gpu_path = str(tmp_path / 'gpu_pred3_f32_v2.tif') to_geotiff(_da_with_float_coords(arr), cpu_path, - compression='deflate', predictor=3, tile_size=8) + compression='deflate', predictor=3, tile_size=16) write_geotiff_gpu(_da_with_float_coords(cupy.asarray(arr)), gpu_path, - compression='deflate', predictor=3, tile_size=8) + compression='deflate', predictor=3, tile_size=16) cpu_out = open_geotiff(cpu_path).values gpu_out = open_geotiff(gpu_path).values diff --git a/xrspatial/geotiff/tests/test_kwarg_coverage_2026_05_11_r4.py b/xrspatial/geotiff/tests/test_kwarg_coverage_2026_05_11_r4.py index 1535421e6..cc6266faf 100644 --- a/xrspatial/geotiff/tests/test_kwarg_coverage_2026_05_11_r4.py +++ b/xrspatial/geotiff/tests/test_kwarg_coverage_2026_05_11_r4.py @@ -58,7 +58,7 @@ def _gpu_available() -> bool: def small_tiff_path(tmp_path): arr = np.arange(64, dtype=np.float32).reshape(8, 8) p = tmp_path / "small.tif" - to_geotiff(arr, str(p), tile_size=4) + to_geotiff(arr, str(p), tile_size=16) return str(p), arr @@ -117,8 +117,10 @@ def test_read_geotiff_gpu_chunks_name_kwarg_sets_name(small_tiff_path): @_gpu_only def test_read_geotiff_gpu_max_pixels_accepts_within_budget(small_tiff_path): path, arr = small_tiff_path - # 8 * 8 = 64 pixels. 100 leaves room. - da = read_geotiff_gpu(path, max_pixels=100) + # 8 * 8 = 64 pixels but per-tile dim safety check uses tile_size=16 + # (256 pixels per tile); 300 leaves room. The fixture's tile_size + # was bumped to 16 to satisfy the TIFF 6 multiple-of-16 rule (#1767). + da = read_geotiff_gpu(path, max_pixels=300) np.testing.assert_array_equal(da.data.get(), arr) diff --git a/xrspatial/geotiff/tests/test_open_geotiff_on_gpu_failure_1615.py b/xrspatial/geotiff/tests/test_open_geotiff_on_gpu_failure_1615.py index 86dc3e0a9..b46356248 100644 --- a/xrspatial/geotiff/tests/test_open_geotiff_on_gpu_failure_1615.py +++ b/xrspatial/geotiff/tests/test_open_geotiff_on_gpu_failure_1615.py @@ -59,7 +59,7 @@ def small_tiff_path(tmp_path): attrs={'crs': 4326}, ) p = tmp_path / 'on_gpu_failure_1615.tif' - to_geotiff(da, str(p), tile_size=4) + to_geotiff(da, str(p), tile_size=16) return str(p), arr diff --git a/xrspatial/geotiff/tests/test_open_geotiff_vrt_kwarg_drop_1685.py b/xrspatial/geotiff/tests/test_open_geotiff_vrt_kwarg_drop_1685.py index 10bf9ba03..ae2dc996c 100644 --- a/xrspatial/geotiff/tests/test_open_geotiff_vrt_kwarg_drop_1685.py +++ b/xrspatial/geotiff/tests/test_open_geotiff_vrt_kwarg_drop_1685.py @@ -102,7 +102,7 @@ def test_open_geotiff_non_vrt_still_accepts_overview_level(tmp_path): attrs={"crs": 4326}, ) tif_path = tmp_path / "with_ovr.tif" - to_geotiff(da, str(tif_path), cog=True, tile_size=4, overview_levels=[2]) + to_geotiff(da, str(tif_path), cog=True, tile_size=16, overview_levels=[2]) # Either overview_level value must be accepted without raising. open_geotiff(str(tif_path), overview_level=0) open_geotiff(str(tif_path), overview_level=1) diff --git a/xrspatial/geotiff/tests/test_overview_resampling_min_max_median_2026_05_11.py b/xrspatial/geotiff/tests/test_overview_resampling_min_max_median_2026_05_11.py index 552afac2e..0320b3eb7 100644 --- a/xrspatial/geotiff/tests/test_overview_resampling_min_max_median_2026_05_11.py +++ b/xrspatial/geotiff/tests/test_overview_resampling_min_max_median_2026_05_11.py @@ -168,7 +168,7 @@ def test_to_geotiff_cog_overview_resampling_cpu(tmp_path, method, expected): da = xr.DataArray(arr, dims=['y', 'x']) p = str(tmp_path / f'cog_{method}.tif') to_geotiff(da, p, cog=True, compression='deflate', tiled=True, - tile_size=2, overview_levels=[1], + tile_size=16, overview_levels=[1], overview_resampling=method) ov = open_geotiff(p, overview_level=1) @@ -184,7 +184,7 @@ def test_to_geotiff_cog_overview_resampling_cpu_nodata(tmp_path, method): da = xr.DataArray(arr, dims=['y', 'x']) p = str(tmp_path / f'cog_{method}_nodata.tif') to_geotiff(da, p, nodata=-9999.0, cog=True, compression='deflate', - tiled=True, tile_size=2, overview_levels=[1], + tiled=True, tile_size=16, overview_levels=[1], overview_resampling=method) ov = open_geotiff(p, overview_level=1) @@ -260,7 +260,7 @@ def test_write_geotiff_gpu_cog_overview_resampling(tmp_path, method, expected): da = xr.DataArray(arr_gpu, dims=['y', 'x']) p = str(tmp_path / f'cog_{method}_gpu.tif') write_geotiff_gpu(da, p, cog=True, compression='deflate', tiled=True, - tile_size=2, overview_levels=[1], + tile_size=16, overview_levels=[1], overview_resampling=method) ov = open_geotiff(p, overview_level=1) @@ -278,13 +278,13 @@ def test_to_geotiff_gpu_cog_overview_matches_cpu(tmp_path, method): da_cpu = xr.DataArray(arr, dims=['y', 'x']) p_cpu = str(tmp_path / f'cog_{method}_cpu.tif') to_geotiff(da_cpu, p_cpu, cog=True, compression='deflate', tiled=True, - tile_size=2, overview_levels=[1], + tile_size=16, overview_levels=[1], overview_resampling=method) da_gpu = xr.DataArray(cupy.asarray(arr), dims=['y', 'x']) p_gpu = str(tmp_path / f'cog_{method}_gpu_via_to_geotiff.tif') to_geotiff(da_gpu, p_gpu, gpu=True, cog=True, compression='deflate', - tiled=True, tile_size=2, overview_levels=[1], + tiled=True, tile_size=16, overview_levels=[1], overview_resampling=method) ov_cpu = np.asarray(open_geotiff(p_cpu, overview_level=1).data) diff --git a/xrspatial/geotiff/tests/test_predictor_multisample.py b/xrspatial/geotiff/tests/test_predictor_multisample.py index 92828b9ad..273d28d53 100644 --- a/xrspatial/geotiff/tests/test_predictor_multisample.py +++ b/xrspatial/geotiff/tests/test_predictor_multisample.py @@ -67,7 +67,9 @@ def test_gpu_predictor2_multisample_matches_cpu(tmp_path, samples, dtype_str): ``d_decomp`` on the last tile. """ dtype = np.dtype(dtype_str) - h, w = 16, 16 + # Image is 32x32 with tile_size=16 (multiple of 16 per TIFF spec) + # to keep the >1-tile coverage that this test depends on. + h, w = 32, 32 rng = np.random.RandomState(42) if dtype.kind == 'u' and dtype.itemsize == 1: data = rng.randint(0, 256, size=(h, w, samples), dtype=dtype) @@ -79,7 +81,7 @@ def test_gpu_predictor2_multisample_matches_cpu(tmp_path, samples, dtype_str): path = str(tmp_path / f"rgb_pred_{samples}_{dtype_str}.tif") # tile_size smaller than image so we exercise more than one tile. - to_geotiff(da, path, compression='deflate', tile_size=8, predictor=True) + to_geotiff(da, path, compression='deflate', tile_size=16, predictor=True) cpu_arr = open_geotiff(path).values assert cpu_arr.shape == (h, w, samples) @@ -103,13 +105,13 @@ def test_gpu_predictor2_multisample_uneven_tiles(tmp_path): Exercises partial edge tiles which share the same predictor decode path and are most likely to trip any lingering stride mismatch. """ - h, w, samples = 20, 20, 3 # 20 is not a multiple of 8 + h, w, samples = 40, 40, 3 # 40 is not a multiple of 16 rng = np.random.RandomState(7) data = rng.randint(0, 256, size=(h, w, samples), dtype=np.uint8) da = xr.DataArray(data, dims=['y', 'x', 'band']) path = str(tmp_path / "rgb_pred_uneven.tif") - to_geotiff(da, path, compression='deflate', tile_size=8, predictor=True) + to_geotiff(da, path, compression='deflate', tile_size=16, predictor=True) cpu_arr = open_geotiff(path).values gpu_arr = _gpu_to_numpy(open_geotiff(path, gpu=True)) diff --git a/xrspatial/geotiff/tests/test_size_param_validation_1752.py b/xrspatial/geotiff/tests/test_size_param_validation_1752.py index 4c9d500d4..233a9c823 100644 --- a/xrspatial/geotiff/tests/test_size_param_validation_1752.py +++ b/xrspatial/geotiff/tests/test_size_param_validation_1752.py @@ -63,12 +63,15 @@ def test_to_geotiff_tile_size_non_int_raises(tmp_path): to_geotiff(da, out, tiled=True, tile_size=256.0) -def test_to_geotiff_tile_size_one_still_writes(tmp_path): - # tile_size=1 is silly but technically valid TIFF; do not reject it. - arr = np.arange(16, dtype=np.float32).reshape(4, 4) +def test_to_geotiff_tile_size_16_writes(tmp_path): + # ``tile_size=16`` is the smallest TIFF-spec-legal tile size. The + # original 1752 regression checked ``tile_size=1`` here, but #1767 + # now requires multiples of 16 (TIFF 6 spec), so ``tile_size=1`` is + # rejected. Keep a positive-path test at the new lower bound. + arr = np.arange(256, dtype=np.float32).reshape(16, 16) da = xr.DataArray(arr, dims=['y', 'x']) out = os.path.join(str(tmp_path), 'out.tif') - to_geotiff(da, out, tiled=True, tile_size=1) + to_geotiff(da, out, tiled=True, tile_size=16) assert os.path.exists(out) diff --git a/xrspatial/geotiff/tests/test_streaming_codecs_2026_05_11.py b/xrspatial/geotiff/tests/test_streaming_codecs_2026_05_11.py index 25bda4ad1..73254376d 100644 --- a/xrspatial/geotiff/tests/test_streaming_codecs_2026_05_11.py +++ b/xrspatial/geotiff/tests/test_streaming_codecs_2026_05_11.py @@ -181,7 +181,7 @@ def test_cubic_overview_round_trip(self, tmp_path): path = str(tmp_path / 'cubic_overview.tif') to_geotiff(arr, path, compression='deflate', - tile_size=8, + tile_size=16, tiled=True, cog=True, overview_levels=[1], @@ -216,10 +216,10 @@ def test_cubic_distinct_from_mean(self, tmp_path): cubic_path = str(tmp_path / 'cubic_q.tif') mean_path = str(tmp_path / 'mean_q.tif') - to_geotiff(arr, cubic_path, compression='deflate', tile_size=8, + to_geotiff(arr, cubic_path, compression='deflate', tile_size=16, tiled=True, cog=True, overview_levels=[1], overview_resampling='cubic') - to_geotiff(arr, mean_path, compression='deflate', tile_size=8, + to_geotiff(arr, mean_path, compression='deflate', tile_size=16, tiled=True, cog=True, overview_levels=[1], overview_resampling='mean') diff --git a/xrspatial/geotiff/tests/test_tile_size_multiple_of_16_1767.py b/xrspatial/geotiff/tests/test_tile_size_multiple_of_16_1767.py new file mode 100644 index 000000000..424369e70 --- /dev/null +++ b/xrspatial/geotiff/tests/test_tile_size_multiple_of_16_1767.py @@ -0,0 +1,113 @@ +"""Regression tests for issue #1767. + +``to_geotiff(..., tiled=True, tile_size=...)`` previously accepted any +positive integer tile size. The TIFF 6 spec requires TileWidth and +TileLength to be multiples of 16, so values like ``tile_size=17`` +produced files that the in-repo reader round-tripped but that strict +TIFF tools (libtiff, GDAL) may reject. ``to_geotiff`` now refuses +non-multiples of 16 when ``tiled=True`` and suggests the nearest +valid value(s). +""" +from __future__ import annotations + +import os + +import numpy as np +import pytest +import xarray as xr + +from xrspatial.geotiff import to_geotiff + + +def _make_da(shape=(32, 32)): + arr = np.arange(np.prod(shape), dtype=np.float32).reshape(shape) + return xr.DataArray(arr, dims=['y', 'x']) + + +def test_tile_size_17_rejected_1767(tmp_path): + """``tile_size=17`` is not a multiple of 16 and must be rejected.""" + da = _make_da() + out = os.path.join(str(tmp_path), 'tile_size_17_1767.tif') + with pytest.raises(ValueError) as exc: + to_geotiff(da, out, tiled=True, tile_size=17) + msg = str(exc.value) + assert 'tile_size' in msg + assert '17' in msg + # Hint should suggest nearest valid choices (16 and 32). + assert '16' in msg and '32' in msg + + +def test_tile_size_1_rejected_1767(tmp_path): + """``tile_size=1`` was accepted previously; now rejected because + 1 is not a multiple of 16.""" + da = _make_da((16, 16)) + out = os.path.join(str(tmp_path), 'tile_size_1_1767.tif') + with pytest.raises(ValueError, match=r'tile_size.*multiple of 16'): + to_geotiff(da, out, tiled=True, tile_size=1) + + +def test_tile_size_default_256_works_1767(tmp_path): + """The default ``tile_size=256`` is a multiple of 16 and must work.""" + da = _make_da((256, 256)) + out = os.path.join(str(tmp_path), 'tile_size_256_1767.tif') + to_geotiff(da, out, tiled=True, tile_size=256) + assert os.path.exists(out) + + +def test_tile_size_512_works_1767(tmp_path): + da = _make_da((512, 512)) + out = os.path.join(str(tmp_path), 'tile_size_512_1767.tif') + to_geotiff(da, out, tiled=True, tile_size=512) + assert os.path.exists(out) + + +def test_tile_size_128_works_1767(tmp_path): + da = _make_da((128, 128)) + out = os.path.join(str(tmp_path), 'tile_size_128_1767.tif') + to_geotiff(da, out, tiled=True, tile_size=128) + assert os.path.exists(out) + + +def test_tile_size_16_works_1767(tmp_path): + """The smallest legal tile size is 16.""" + da = _make_da((32, 32)) + out = os.path.join(str(tmp_path), 'tile_size_16_1767.tif') + to_geotiff(da, out, tiled=True, tile_size=16) + assert os.path.exists(out) + + +def test_tile_size_17_with_tiled_false_passes_1767(tmp_path): + """``tiled=False`` ignores ``tile_size`` entirely; multiple-of-16 + validation must not fire there.""" + da = _make_da() + out = os.path.join(str(tmp_path), 'tile_size_17_strip_1767.tif') + # ``tiled=False`` emits a warning when a non-default tile_size is + # passed; we only care that no ValueError fires. + import warnings + with warnings.catch_warnings(): + warnings.simplefilter('ignore') + to_geotiff(da, out, tiled=False, tile_size=17) + assert os.path.exists(out) + + +def test_tile_size_24_message_suggests_16_and_32_1767(tmp_path): + """Error message names both nearest valid multiples (lower & upper).""" + da = _make_da() + out = os.path.join(str(tmp_path), 'tile_size_24_1767.tif') + with pytest.raises(ValueError) as exc: + to_geotiff(da, out, tiled=True, tile_size=24) + msg = str(exc.value) + assert '16' in msg + assert '32' in msg + + +def test_tile_size_8_message_suggests_16_only_1767(tmp_path): + """For ``tile_size < 16`` only the upper neighbour (16) is valid.""" + da = _make_da() + out = os.path.join(str(tmp_path), 'tile_size_8_1767.tif') + with pytest.raises(ValueError) as exc: + to_geotiff(da, out, tiled=True, tile_size=8) + msg = str(exc.value) + assert '16' in msg + # 0 is not a valid tile size and should not appear as a suggestion. + assert 'tile_size=0' not in msg diff --git a/xrspatial/geotiff/tests/test_vrt_tiled_metadata_1606.py b/xrspatial/geotiff/tests/test_vrt_tiled_metadata_1606.py index ee780be39..7222e4efd 100644 --- a/xrspatial/geotiff/tests/test_vrt_tiled_metadata_1606.py +++ b/xrspatial/geotiff/tests/test_vrt_tiled_metadata_1606.py @@ -63,7 +63,7 @@ class TestVrtTiledMetadataParity: def test_nodatavals_alias_propagates_to_tiles(self, tmp_path): da = _make_rioxarray_style() vrt = str(tmp_path / 'nodatavals.vrt') - to_geotiff(da, vrt, tile_size=4) + to_geotiff(da, vrt, tile_size=16) tile_da = open_geotiff(_first_tile_path(vrt)) # Before the fix this was None: _write_vrt_tiled read # attrs['nodata'] directly and ignored the nodatavals alias. @@ -78,14 +78,14 @@ def test_fill_value_alias_propagates_to_tiles(self, tmp_path): attrs={'_FillValue': -9999.0, 'crs': 4326}, ) vrt = str(tmp_path / 'fillvalue.vrt') - to_geotiff(da, vrt, tile_size=4) + to_geotiff(da, vrt, tile_size=16) tile_da = open_geotiff(_first_tile_path(vrt)) assert tile_da.attrs.get('nodata') == -9999.0 def test_gdal_metadata_propagates_to_tiles(self, tmp_path): da = _make_rioxarray_style() vrt = str(tmp_path / 'gdal_meta.vrt') - to_geotiff(da, vrt, tile_size=4) + to_geotiff(da, vrt, tile_size=16) tile_da = open_geotiff(_first_tile_path(vrt)) gm = tile_da.attrs.get('gdal_metadata') assert gm == {'AREA_OR_POINT': 'Area', 'foo': 'bar'} @@ -93,7 +93,7 @@ def test_gdal_metadata_propagates_to_tiles(self, tmp_path): def test_resolution_tags_propagate_to_tiles(self, tmp_path): da = _make_rioxarray_style() vrt = str(tmp_path / 'resolution.vrt') - to_geotiff(da, vrt, tile_size=4) + to_geotiff(da, vrt, tile_size=16) tile_da = open_geotiff(_first_tile_path(vrt)) assert tile_da.attrs.get('x_resolution') == 96.0 assert tile_da.attrs.get('y_resolution') == 96.0 @@ -102,7 +102,7 @@ def test_resolution_tags_propagate_to_tiles(self, tmp_path): def test_raster_type_point_propagates_to_tiles(self, tmp_path): da = _make_rioxarray_style() vrt = str(tmp_path / 'point.vrt') - to_geotiff(da, vrt, tile_size=4) + to_geotiff(da, vrt, tile_size=16) tile_da = open_geotiff(_first_tile_path(vrt)) assert tile_da.attrs.get('raster_type') == 'point' @@ -111,8 +111,8 @@ def test_tif_vs_vrt_tile_metadata_parity(self, tmp_path): da = _make_rioxarray_style() tif_path = str(tmp_path / 'parity.tif') vrt_path = str(tmp_path / 'parity.vrt') - to_geotiff(da, tif_path, tile_size=4) - to_geotiff(da, vrt_path, tile_size=4) + to_geotiff(da, tif_path, tile_size=16) + to_geotiff(da, vrt_path, tile_size=16) tif_da = open_geotiff(tif_path) tile_da = open_geotiff(_first_tile_path(vrt_path)) @@ -144,7 +144,7 @@ def test_gdal_metadata_xml_string_propagates_to_tiles(self, tmp_path): attrs={'crs': 4326, 'gdal_metadata_xml': xml}, ) vrt = str(tmp_path / 'gdal_xml.vrt') - to_geotiff(da, vrt, tile_size=4) + to_geotiff(da, vrt, tile_size=16) tile_da = open_geotiff(_first_tile_path(vrt)) # On read, the XML is re-parsed into a dict under # attrs['gdal_metadata']; the raw XML lands under @@ -175,7 +175,7 @@ def test_extra_tags_entry_propagates_to_tiles(self, tmp_path): }, ) vrt = str(tmp_path / 'extra_tags.vrt') - to_geotiff(da, vrt, tile_size=4) + to_geotiff(da, vrt, tile_size=16) tile_da = open_geotiff(_first_tile_path(vrt)) et = tile_da.attrs.get('extra_tags') or [] tag_ids = {entry[0] for entry in et} @@ -197,7 +197,7 @@ def test_image_description_friendly_attr_propagates_to_tiles( 'image_description': 'vrt-tile-friendly-1606'}, ) vrt = str(tmp_path / 'image_desc.vrt') - to_geotiff(da, vrt, tile_size=4) + to_geotiff(da, vrt, tile_size=16) tile_da = open_geotiff(_first_tile_path(vrt)) assert (tile_da.attrs.get('image_description') == 'vrt-tile-friendly-1606') @@ -221,7 +221,7 @@ def test_nodatavals_alias_dask(self, tmp_path): dims=da_np.dims, coords=da_np.coords, attrs=da_np.attrs, ) vrt = str(tmp_path / 'dask.vrt') - to_geotiff(da, vrt, tile_size=4) + to_geotiff(da, vrt, tile_size=16) tile_da = open_geotiff(_first_tile_path(vrt)) assert tile_da.attrs.get('nodata') == -9999.0 assert tile_da.attrs.get('gdal_metadata') == {'k': 'v'} diff --git a/xrspatial/geotiff/tests/test_vrt_write.py b/xrspatial/geotiff/tests/test_vrt_write.py index ab1b4e2ca..f4b94bcf9 100644 --- a/xrspatial/geotiff/tests/test_vrt_write.py +++ b/xrspatial/geotiff/tests/test_vrt_write.py @@ -39,10 +39,11 @@ def test_round_trip_numpy(self, sample_raster, tmp_path): def test_tile_naming_convention(self, sample_raster, tmp_path): vrt_path = str(tmp_path / 'named_1083.vrt') - to_geotiff(sample_raster, vrt_path, tile_size=100) + to_geotiff(sample_raster, vrt_path, tile_size=128) tiles_dir = str(tmp_path / 'named_1083_tiles') files = sorted(os.listdir(tiles_dir)) - # 200x200 with tile_size=100 -> 2x2 grid + # 200x200 with tile_size=128 -> 2x2 grid (TIFF 6 spec requires + # tile_size be a multiple of 16; 100 was rejected post-#1767). assert files == [ 'tile_00_00.tif', 'tile_00_01.tif', 'tile_01_00.tif', 'tile_01_01.tif', From 827667dfe30f04e987a848367e7b5023bf8f4530 Mon Sep 17 00:00:00 2001 From: Brendan Collins Date: Wed, 13 May 2026 06:03:39 -0700 Subject: [PATCH 2/2] geotiff: extract _validate_tile_size helper to dedupe tile_size checks Both to_geotiff and write_geotiff_gpu were running the same three tile_size validations (type/bool guard, positivity, multiple-of-16 hint). Routing both call sites through a shared _validate_tile_size helper keeps accepted types and error wording in lockstep so future edits cannot drift between the two entry points. Also documents the multiple-of-16 requirement in the write_geotiff_gpu docstring and adds CPU-only regression tests for the GPU writer's tile_size validation, which runs before any cupy import. --- xrspatial/geotiff/__init__.py | 104 +++++++++--------- .../test_tile_size_multiple_of_16_1767.py | 42 ++++++- 2 files changed, 90 insertions(+), 56 deletions(-) diff --git a/xrspatial/geotiff/__init__.py b/xrspatial/geotiff/__init__.py index 405306a9c..95fdbbf55 100644 --- a/xrspatial/geotiff/__init__.py +++ b/xrspatial/geotiff/__init__.py @@ -1159,6 +1159,44 @@ def _extract_rich_tags(attrs: dict) -> dict: } +def _validate_tile_size(tile_size) -> None: + """Validate ``tile_size`` for the tiled GeoTIFF writers. + + Shared by ``to_geotiff`` (when ``tiled=True``) and + ``write_geotiff_gpu`` (always tiled) so the accepted types, the + non-positive rejection, and the multiple-of-16 hint stay in lockstep. + The tiled writer computes the tile grid as + ``math.ceil(width / tile_size)``; ``tile_size=0`` hits + ``ZeroDivisionError`` deep inside the writer, and negative values + produce a nonsensical tile grid. The TIFF 6 spec also requires + ``TileWidth`` and ``TileLength`` to be positive multiples of 16 + for broad interoperability with libtiff / GDAL strict readers; a + value like 17 would otherwise round-trip through the in-repo + reader but be rejected elsewhere. + """ + if not isinstance(tile_size, (int, np.integer)) or isinstance( + tile_size, bool): + raise ValueError( + f"tile_size must be a positive int, got " + f"{tile_size!r} (type {type(tile_size).__name__}).") + if tile_size <= 0: + raise ValueError( + f"tile_size must be a positive int, got tile_size={tile_size}.") + if tile_size % 16 != 0: + lower = (int(tile_size) // 16) * 16 + upper = lower + 16 + # ``lower`` is 0 for tile_size < 16; suppress it from the hint + # because 0 is not a valid tile size on its own. + if lower <= 0: + hint = f"try tile_size={upper}" + else: + hint = f"try tile_size={lower} or tile_size={upper}" + raise ValueError( + f"tile_size must be a positive multiple of 16 (TIFF 6 " + f"spec requirement for TileWidth/TileLength), got " + f"tile_size={tile_size}; {hint}.") + + def to_geotiff(data: xr.DataArray | np.ndarray, path: str | BinaryIO, *, crs: int | str | None = None, @@ -1265,37 +1303,11 @@ def to_geotiff(data: xr.DataArray | np.ndarray, path = _coerce_path(path) - # Reject non-positive tile_size up front. The tiled writer computes - # the tile grid as ``math.ceil(width / tile_size)``; tile_size=0 hits - # ZeroDivisionError deep inside the writer, and negative values - # produce a nonsensical tile grid. tiled=False ignores tile_size, so - # only validate when tiled output is actually requested. The TIFF 6 - # spec also requires TileWidth and TileLength to be multiples of 16 - # for broad interoperability with libtiff / GDAL strict readers; a - # value like 17 would otherwise round-trip through the in-repo - # reader but be rejected elsewhere. + # ``tiled=False`` ignores tile_size entirely, so only validate when + # tiled output is actually requested. See ``_validate_tile_size`` for + # the rationale behind the type / positivity / multiple-of-16 checks. if tiled: - if not isinstance(tile_size, (int, np.integer)) or isinstance( - tile_size, bool): - raise ValueError( - f"tile_size must be a positive int, got " - f"{tile_size!r} (type {type(tile_size).__name__}).") - if tile_size <= 0: - raise ValueError( - f"tile_size must be a positive int, got tile_size={tile_size}.") - if tile_size % 16 != 0: - lower = (int(tile_size) // 16) * 16 - upper = lower + 16 - # ``lower`` is 0 for tile_size < 16; suppress it from the hint - # because 0 is not a valid tile size on its own. - if lower <= 0: - hint = f"try tile_size={upper}" - else: - hint = f"try tile_size={lower} or tile_size={upper}" - raise ValueError( - f"tile_size must be a positive multiple of 16 (TIFF 6 " - f"spec requirement for TileWidth/TileLength), got " - f"tile_size={tile_size}; {hint}.") + _validate_tile_size(tile_size) # Up-front validation: catch bad compression names before they reach # any of the deeper write paths (streaming, GPU, VRT, COG) where the @@ -3247,7 +3259,10 @@ def write_geotiff_gpu(data: xr.DataArray | cupy.ndarray | np.ndarray, producing a tiled file. Accepted for API parity with ``to_geotiff``. tile_size : int - Tile size in pixels (default 256). + Tile size in pixels (default 256). Must be a positive multiple + of 16; this is a TIFF 6 spec requirement on TileWidth and + TileLength for broad reader compatibility. ``write_geotiff_gpu`` + is always tiled, so the check fires for every call. predictor : bool or int TIFF predictor. ``False``/``0``/``1`` -> none, ``True``/``2`` -> horizontal differencing, ``3`` -> floating-point predictor @@ -3285,29 +3300,10 @@ def write_geotiff_gpu(data: xr.DataArray | cupy.ndarray | np.ndarray, "compression is tile-based; the strip layout is not " "implemented on the GPU path. Use to_geotiff(..., gpu=False, " "tiled=False) for strip output on CPU.") - # Mirror to_geotiff's tile_size validation: the TIFF 6 spec requires - # TileWidth/TileLength to be positive multiples of 16 for broad - # reader interop. ``write_geotiff_gpu`` is always tiled, so the - # check fires unconditionally here. - if not isinstance(tile_size, (int, np.integer)) or isinstance( - tile_size, bool): - raise ValueError( - f"tile_size must be a positive int, got " - f"{tile_size!r} (type {type(tile_size).__name__}).") - if tile_size <= 0: - raise ValueError( - f"tile_size must be a positive int, got tile_size={tile_size}.") - if tile_size % 16 != 0: - lower = (int(tile_size) // 16) * 16 - upper = lower + 16 - if lower <= 0: - hint = f"try tile_size={upper}" - else: - hint = f"try tile_size={lower} or tile_size={upper}" - raise ValueError( - f"tile_size must be a positive multiple of 16 (TIFF 6 " - f"spec requirement for TileWidth/TileLength), got " - f"tile_size={tile_size}; {hint}.") + # ``write_geotiff_gpu`` is always tiled, so the multiple-of-16 check + # fires unconditionally. Shares the helper with ``to_geotiff`` so the + # accepted types and error wording cannot drift between the two. + _validate_tile_size(tile_size) if max_z_error < 0: raise ValueError( f"max_z_error must be >= 0, got {max_z_error}") diff --git a/xrspatial/geotiff/tests/test_tile_size_multiple_of_16_1767.py b/xrspatial/geotiff/tests/test_tile_size_multiple_of_16_1767.py index 424369e70..f777c598d 100644 --- a/xrspatial/geotiff/tests/test_tile_size_multiple_of_16_1767.py +++ b/xrspatial/geotiff/tests/test_tile_size_multiple_of_16_1767.py @@ -6,7 +6,9 @@ produced files that the in-repo reader round-tripped but that strict TIFF tools (libtiff, GDAL) may reject. ``to_geotiff`` now refuses non-multiples of 16 when ``tiled=True`` and suggests the nearest -valid value(s). +valid value(s). ``write_geotiff_gpu`` is always tiled and applies the +same check up front (before any cupy import), so the GPU validation +is exercised on CPU-only runs too. """ from __future__ import annotations @@ -16,7 +18,7 @@ import pytest import xarray as xr -from xrspatial.geotiff import to_geotiff +from xrspatial.geotiff import to_geotiff, write_geotiff_gpu def _make_da(shape=(32, 32)): @@ -111,3 +113,39 @@ def test_tile_size_8_message_suggests_16_only_1767(tmp_path): assert '16' in msg # 0 is not a valid tile size and should not appear as a suggestion. assert 'tile_size=0' not in msg + + +def test_write_geotiff_gpu_tile_size_17_rejected_1767(tmp_path): + """``write_geotiff_gpu`` shares the multiple-of-16 check with + ``to_geotiff``. The validation runs before any cupy import, so the + bad-tile-size path can be exercised on CPU-only runs. + """ + da = _make_da() + out = os.path.join(str(tmp_path), 'gpu_tile_size_17_1767.tif') + with pytest.raises(ValueError) as exc: + write_geotiff_gpu(da, out, tile_size=17) + msg = str(exc.value) + assert 'tile_size' in msg + assert '17' in msg + # Hint should suggest nearest valid choices (16 and 32). + assert '16' in msg and '32' in msg + + +def test_write_geotiff_gpu_tile_size_zero_rejected_1767(tmp_path): + """``tile_size=0`` is rejected as non-positive before the + multiple-of-16 branch fires. + """ + da = _make_da() + out = os.path.join(str(tmp_path), 'gpu_tile_size_0_1767.tif') + with pytest.raises(ValueError, match=r'tile_size.*positive'): + write_geotiff_gpu(da, out, tile_size=0) + + +def test_write_geotiff_gpu_tile_size_float_rejected_1767(tmp_path): + """``tile_size`` must be an int; floats are rejected by the shared + helper before any GPU machinery is touched. + """ + da = _make_da() + out = os.path.join(str(tmp_path), 'gpu_tile_size_float_1767.tif') + with pytest.raises(ValueError, match=r'tile_size.*positive int'): + write_geotiff_gpu(da, out, tile_size=256.0)