diff --git a/xrspatial/geotiff/__init__.py b/xrspatial/geotiff/__init__.py index 2e2122df3..89c825d4d 100644 --- a/xrspatial/geotiff/__init__.py +++ b/xrspatial/geotiff/__init__.py @@ -254,6 +254,7 @@ def open_geotiff(source: str | BinaryIO, *, missing_sources: str = _MISSING_SOURCES_SENTINEL, allow_rotated: bool = False, allow_unparseable_crs: bool = False, + mask_nodata: bool = True, ) -> xr.DataArray: """Read a GeoTIFF, COG, or VRT file into an xarray.DataArray. @@ -322,6 +323,17 @@ def open_geotiff(source: str | BinaryIO, *, return a partial mosaic. Passing this kwarg with a non-VRT source raises ``ValueError`` because the policy only applies to the VRT pipeline. See ``read_vrt`` for the full description. + mask_nodata : bool, default True + If True (the default), replace the nodata sentinel with ``NaN``; + integer rasters get promoted to ``float64`` first so NaN can be + represented. If False, skip the sentinel-to-NaN step and keep + the source dtype. ``attrs['nodata']`` still carries the raw + sentinel either way, so downstream code can mask explicitly. + Pass ``mask_nodata=False`` when you want to preserve an integer + source dtype via ``dtype=``: the default ``mask_nodata=True`` + promotes to ``float64`` whenever the sentinel matches an actual + pixel, and ``dtype=`` then raises ``ValueError`` on the + float-to-int cast. Returns ------- @@ -346,10 +358,14 @@ def open_geotiff(source: str | BinaryIO, *, Integer rasters with a nodata sentinel are silently promoted to ``float64`` with NaN replacing the sentinel so downstream NaN-aware - code works uniformly. Pass ``dtype=...`` to keep the source dtype - (the cast will fail with ``ValueError`` for float-to-int because that - is lossy in a way users rarely intend; cast explicitly after read if - you need it). + code works uniformly. To keep the source dtype on a file whose + sentinel matches actual pixels, pass ``mask_nodata=False``; the raw + sentinel stays in the data and ``attrs['nodata']`` still carries it. + Passing ``dtype=`` on its own is not enough: the + sentinel-to-NaN promotion runs first and the subsequent integer cast + then raises ``ValueError`` (float-to-int is lossy in a way users + rarely intend). When the file has no in-range sentinel match, the + promotion is skipped and ``dtype=`` works either way. """ from ._reader import _coerce_path @@ -447,6 +463,7 @@ def open_geotiff(source: str | BinaryIO, *, max_pixels=max_pixels, allow_rotated=allow_rotated, allow_unparseable_crs=allow_unparseable_crs, + mask_nodata=mask_nodata, **vrt_kwargs) # File-like buffers don't support the GPU or dask code paths because @@ -474,6 +491,7 @@ def open_geotiff(source: str | BinaryIO, *, max_pixels=max_pixels, allow_rotated=allow_rotated, allow_unparseable_crs=allow_unparseable_crs, + mask_nodata=mask_nodata, **gpu_kwargs) # Dask path (CPU) @@ -483,7 +501,8 @@ def open_geotiff(source: str | BinaryIO, *, window=window, band=band, max_pixels=max_pixels, name=name, allow_rotated=allow_rotated, - allow_unparseable_crs=allow_unparseable_crs) + allow_unparseable_crs=allow_unparseable_crs, + mask_nodata=mask_nodata) kwargs = {} if max_pixels is not None: @@ -543,16 +562,18 @@ def open_geotiff(source: str | BinaryIO, *, # write is safe; an extra ``arr.copy()`` would just double peak # memory for a multi-MB raster. nodata = geo_info.nodata - if nodata is not None: + if nodata is not None and mask_nodata: # When the reader applied MinIsWhite, the sentinel-equality mask # must compare against the inverted sentinel value (issue #1809). # ``read_to_array`` / ``_read_cog_http`` stash that value on # ``geo_info._mask_nodata``; fall back to the original sentinel - # on non-MinIsWhite files. - mask_nodata = getattr(geo_info, '_mask_nodata', nodata) + # on non-MinIsWhite files. Renamed from ``mask_nodata`` to + # ``nodata_sentinel`` so the local does not shadow the + # ``mask_nodata`` opt-out kwarg (#2052). + nodata_sentinel = getattr(geo_info, '_mask_nodata', nodata) if arr.dtype.kind == 'f': - if mask_nodata is not None and not np.isnan(mask_nodata): - arr[arr == arr.dtype.type(mask_nodata)] = np.nan + if nodata_sentinel is not None and not np.isnan(nodata_sentinel): + arr[arr == arr.dtype.type(nodata_sentinel)] = np.nan elif arr.dtype.kind in ('u', 'i'): # Integer arrays: convert to float to represent NaN. # An out-of-range sentinel (e.g. uint16 file with @@ -571,10 +592,10 @@ def open_geotiff(source: str | BinaryIO, *, # ``_writer.py`` / ``_vrt.py`` pattern used for #1564 / #1616). # attrs['nodata'] still carries the raw sentinel so a write # round-trip preserves the tag. - if (mask_nodata is not None - and np.isfinite(mask_nodata) - and float(mask_nodata).is_integer()): - nodata_int = int(mask_nodata) + if (nodata_sentinel is not None + and np.isfinite(nodata_sentinel) + and float(nodata_sentinel).is_integer()): + nodata_int = int(nodata_sentinel) info = np.iinfo(arr.dtype) if info.min <= nodata_int <= info.max: mask = arr == arr.dtype.type(nodata_int) diff --git a/xrspatial/geotiff/_backends/dask.py b/xrspatial/geotiff/_backends/dask.py index d36a6b224..bfc7851c0 100644 --- a/xrspatial/geotiff/_backends/dask.py +++ b/xrspatial/geotiff/_backends/dask.py @@ -40,7 +40,8 @@ def read_geotiff_dask(source: str, *, chunks: int | tuple = 512, max_pixels: int | None = None, allow_rotated: bool = False, - allow_unparseable_crs: bool = False) -> xr.DataArray: + allow_unparseable_crs: bool = False, + mask_nodata: bool = True) -> xr.DataArray: """Read a GeoTIFF as a dask-backed DataArray for out-of-core processing. Each chunk is loaded lazily via windowed reads. @@ -73,6 +74,14 @@ def read_geotiff_dask(source: str, *, directly. name : str or None Name for the DataArray. + mask_nodata : bool, default True + If True, replace the nodata sentinel with NaN per chunk (integer + rasters get promoted to ``float64``). If False, skip the + sentinel-to-NaN step so the source dtype survives. The raw + sentinel is still carried on ``attrs['nodata']`` either way. + Pass ``mask_nodata=False`` together with ``dtype=`` to + keep an integer source dtype; the default promotes to + ``float64`` and the cast then raises. See issue #2052. Returns ------- @@ -105,6 +114,7 @@ def read_geotiff_dask(source: str, *, return read_vrt( source, dtype=dtype, window=window, band=band, name=name, chunks=chunks, max_pixels=max_pixels, + mask_nodata=mask_nodata, ) # P5: HTTP COG sources used to fire one IFD/header GET per chunk @@ -208,7 +218,8 @@ def read_geotiff_dask(source: str, *, # pass an exotic ``nodata`` type (e.g. complex) on the no-op path # rather than surfacing an opaque error here. effective_dtype = file_dtype - if (nodata is not None + if (mask_nodata + and nodata is not None and file_dtype.kind in ('u', 'i') and np.isfinite(nodata) and float(nodata).is_integer()): @@ -379,11 +390,18 @@ def read_geotiff_dask(source: str, *, # actual dtype (uint16), silently casting later float64 # chunks back to int and converting their NaNs to 0. See # issue #1597. + # Per-chunk nodata mask is skipped when ``mask_nodata=False``; + # passing ``nodata=None`` short-circuits both the float-NaN and + # int-promotion branches in ``_delayed_read_window``. The + # original sentinel is still carried in ``attrs['nodata']`` via + # ``nodata_attr`` so write round-trips preserve the tag. See + # issue #2052. + chunk_nodata = nodata if mask_nodata else None block = da.from_delayed( _delayed_read_window(source, r0 + win_r0, c0 + win_c0, r1 + win_r0, c1 + win_c0, - overview_level, nodata, + overview_level, chunk_nodata, band_arg, target_dtype=target_dtype, http_meta_key=http_meta_key, diff --git a/xrspatial/geotiff/_backends/gpu.py b/xrspatial/geotiff/_backends/gpu.py index 175e6f792..4333a00e6 100644 --- a/xrspatial/geotiff/_backends/gpu.py +++ b/xrspatial/geotiff/_backends/gpu.py @@ -86,6 +86,7 @@ def read_geotiff_gpu(source: str, *, on_gpu_failure: str = _ON_GPU_FAILURE_SENTINEL, allow_rotated: bool = False, allow_unparseable_crs: bool = False, + mask_nodata: bool = True, gpu: str = _GPU_DEPRECATED_SENTINEL, ) -> xr.DataArray: """Read a GeoTIFF with GPU-accelerated decompression via Numba CUDA. @@ -171,6 +172,13 @@ def read_geotiff_gpu(source: str, *, ``TypeError``. The old name shipped with values ``'auto'`` / ``'strict'`` and was easy to confuse with the boolean ``gpu=`` kwarg on ``open_geotiff`` / ``to_geotiff`` / ``read_vrt``. + mask_nodata : bool, default True + If True, replace the nodata sentinel with NaN (integer rasters + get promoted to ``float64`` first). If False, keep the source + dtype and leave the raw sentinel in the data. ``attrs['nodata']`` + carries the sentinel either way. Pass ``mask_nodata=False`` + together with ``dtype=`` to preserve an integer source + dtype on a file with a matching sentinel. See issue #2052. Returns ------- @@ -241,6 +249,7 @@ def read_geotiff_gpu(source: str, *, name=name, max_pixels=max_pixels, allow_rotated=allow_rotated, allow_unparseable_crs=allow_unparseable_crs, + mask_nodata=mask_nodata, ) from .._reader import ( @@ -405,7 +414,7 @@ def read_geotiff_gpu(source: str, *, # ``_mask_nodata`` when applicable; fall back to the original # sentinel otherwise (#1809). nodata = geo_info.nodata - if nodata is not None: + if nodata is not None and mask_nodata: mask_value = getattr(_stripped_geo, '_mask_nodata', nodata) arr_gpu = _apply_nodata_mask_gpu(arr_gpu, mask_value) if dtype is not None: @@ -697,7 +706,7 @@ def _read_once(): # dtype cast so the float promotion for masked integer rasters doesn't # surprise a user-supplied dtype. nodata = geo_info.nodata - if nodata is not None: + if nodata is not None and mask_nodata: # When MinIsWhite was applied, the mask must use the inverted # sentinel; otherwise the original sentinel. The pure GPU path # records the inverted sentinel in ``_mw_mask_nodata`` above; the @@ -902,7 +911,8 @@ def _decode_window_gpu_direct(file_path, all_offsets, all_byte_counts, def _read_geotiff_gpu_chunked(source, *, dtype, chunks, overview_level, window, band, name, max_pixels, allow_rotated: bool = False, - allow_unparseable_crs: bool = False): + allow_unparseable_crs: bool = False, + mask_nodata: bool = True): """Lazy Dask+CuPy backend for ``read_geotiff_gpu(chunks=...)``. Two paths produce the same shape of dask graph: @@ -964,6 +974,7 @@ def _read_geotiff_gpu_chunked(source, *, dtype, chunks, overview_level, name=name, max_pixels=max_pixels, allow_rotated=allow_rotated, allow_unparseable_crs=allow_unparseable_crs, + mask_nodata=mask_nodata, ) except Exception: # GDS qualification failed; fall back to the CPU path. The @@ -977,6 +988,7 @@ def _read_geotiff_gpu_chunked(source, *, dtype, chunks, overview_level, max_pixels=max_pixels, name=name, allow_rotated=allow_rotated, allow_unparseable_crs=allow_unparseable_crs, + mask_nodata=mask_nodata, ) cpu_dask_arr = cpu_da.data @@ -1000,7 +1012,8 @@ def _read_geotiff_gpu_chunked_gds(source, ifd, geo_info, header, *, dtype, chunks, window, band, name, max_pixels, allow_rotated: bool = False, - allow_unparseable_crs: bool = False): + allow_unparseable_crs: bool = False, + mask_nodata: bool = True): """Build a Dask+CuPy graph that decodes each chunk disk->GPU. Caller must have verified that the source qualifies via @@ -1108,8 +1121,10 @@ def _read_geotiff_gpu_chunked_gds(source, ifd, geo_info, header, *, # Determine declared dtype for the dask graph. Nodata masking # promotes integer rasters to float64; mirror the CPU dask path. + # When ``mask_nodata=False`` the masking is skipped, so no promotion. declared_dtype = file_dtype - if nodata is not None and file_dtype.kind in ('u', 'i'): + if (mask_nodata and nodata is not None + and file_dtype.kind in ('u', 'i')): if np.isfinite(nodata) and float(nodata).is_integer(): info = np.iinfo(file_dtype) if info.min <= int(nodata) <= info.max: @@ -1127,7 +1142,7 @@ def _chunk_task(meta, r0, c0, r1, c1): r0, c0, r1, c1, masked_fill=masked_fill, ) - if nodata is not None: + if nodata is not None and mask_nodata: arr = _apply_nodata_mask_gpu(arr, nodata) if dtype is not None: target = np.dtype(dtype) diff --git a/xrspatial/geotiff/_backends/vrt.py b/xrspatial/geotiff/_backends/vrt.py index 5fe1cbbac..9d57b5f81 100644 --- a/xrspatial/geotiff/_backends/vrt.py +++ b/xrspatial/geotiff/_backends/vrt.py @@ -40,7 +40,8 @@ def read_vrt(source: str, *, missing_sources: str = 'raise', allow_rotated: bool = False, allow_unparseable_crs: bool = False, - band_nodata: str | None = None) -> xr.DataArray: + band_nodata: str | None = None, + mask_nodata: bool = True) -> xr.DataArray: """Read a GDAL Virtual Raster Table (.vrt) into an xarray.DataArray. The VRT's source GeoTIFFs are read via windowed reads and assembled @@ -84,6 +85,16 @@ def read_vrt(source: str, *, sentinel (or zero on integer bands without a sentinel). ``XRSPATIAL_GEOTIFF_STRICT=1`` forces a raise across the whole module regardless of this kwarg. + mask_nodata : bool, default True + If True, run the integer-sentinel-to-NaN promotion on the + assembled mosaic. If False, skip it and keep the source dtype + with the raw sentinel still in the data. ``attrs['nodata']`` + carries the sentinel either way. Pass ``mask_nodata=False`` + together with ``dtype=`` when you need to preserve an + integer source dtype on a VRT whose declared sentinel matches + actual pixels. See issue #2052. Float source bands are NaN-aware + by virtue of how the internal reader handles them, so this kwarg + is most useful for integer-dtype mosaics. Returns ------- @@ -164,6 +175,7 @@ def read_vrt(source: str, *, allow_rotated=allow_rotated, allow_unparseable_crs=allow_unparseable_crs, band_nodata=band_nodata, + mask_nodata=mask_nodata, ) # Issue #1987 ambiguous-metadata checks for the eager VRT path. Parse @@ -276,7 +288,10 @@ def read_vrt(source: str, *, # promoting ``arr`` to float64 on the first sentinel hit and writing # NaNs in place on the promoted view. Shared with the chunked path # (issue #1825) so behaviour stays in lockstep. See issue #1611. - arr = _vrt_apply_integer_sentinel_mask(arr, vrt, band) + # ``mask_nodata=False`` skips this so callers can preserve an + # integer source dtype via ``dtype=...`` (issue #2052). + if mask_nodata: + arr = _vrt_apply_integer_sentinel_mask(arr, vrt, band) # Surface the source GeoTransform in the same rasterio ordering used # by open_geotiff: (pixel_width, 0, origin_x, 0, pixel_height, origin_y). @@ -323,7 +338,8 @@ def read_vrt(source: str, *, def _vrt_chunk_read(source, r0, c0, r1, c1, *, band, max_pixels, missing_sources, - declared_dtype, gpu, parsed_vrt): + declared_dtype, gpu, parsed_vrt, + mask_nodata: bool = True): """Decode a single chunk window from a VRT. Called by ``dask.delayed`` from :func:`_read_vrt_chunked`. The @@ -360,8 +376,10 @@ def _vrt_chunk_read(source, r0, c0, r1, c1, *, # promotes to float64 when sentinels hit. The surrounding dask graph # already declared float64 when any band has a representable integer # sentinel, so any chunk that actually fires the mask returns a - # buffer whose dtype matches the declared one. - arr = _apply_integer_sentinel_mask(arr, vrt, band) + # buffer whose dtype matches the declared one. Skip the helper when + # ``mask_nodata=False`` so the source integer dtype survives (#2052). + if mask_nodata: + arr = _apply_integer_sentinel_mask(arr, vrt, band) if declared_dtype is not None and arr.dtype != declared_dtype: arr = arr.astype(declared_dtype) @@ -377,7 +395,8 @@ def _read_vrt_chunked(source, *, window, band, name, chunks, gpu, dtype, max_pixels, missing_sources, allow_rotated: bool = False, allow_unparseable_crs: bool = False, - band_nodata: str | None = None): + band_nodata: str | None = None, + mask_nodata: bool = True): """Lazy ``read_vrt`` dispatch when ``chunks=`` is set (issue #1814). Parses the VRT XML once to recover the extent, CRS, GeoTransform, @@ -539,7 +558,7 @@ def _read_vrt_chunked(source, *, window, band, name, chunks, gpu, dtype, # See also Copilot review on PR #1822. declared_dtype = _effective_dtype_for_bands(selected_bands) - if declared_dtype.kind in ('u', 'i'): + if mask_nodata and declared_dtype.kind in ('u', 'i'): promotes = False for vrt_band in selected_bands: if _sentinel_for_dtype(vrt_band.nodata, @@ -602,6 +621,7 @@ def _read_vrt_chunked(source, *, window, band, name, chunks, gpu, dtype, declared_dtype=declared_dtype, gpu=gpu, parsed_vrt=parsed_vrt_key, + mask_nodata=mask_nodata, ) block = da.from_delayed(d, shape=block_shape, dtype=declared_dtype, meta=meta) diff --git a/xrspatial/geotiff/tests/test_mask_nodata_kwarg_2052.py b/xrspatial/geotiff/tests/test_mask_nodata_kwarg_2052.py new file mode 100644 index 000000000..72033fbd9 --- /dev/null +++ b/xrspatial/geotiff/tests/test_mask_nodata_kwarg_2052.py @@ -0,0 +1,177 @@ +"""Regression tests for issue #2052. + +``open_geotiff(path, dtype="uint16")`` on a uint16 file whose nodata +sentinel matches actual pixels used to raise ``ValueError`` from the +``_validate_dtype_cast`` float64-to-uint16 guard: the masking block +ran first and promoted the array to float64, then the dtype= cast +rejected the float-to-int conversion. The docstring at +``xrspatial/geotiff/__init__.py`` promised "Pass ``dtype=...`` to keep +the source dtype", but for integer rasters with a matching sentinel +that contract was unreachable. + +The fix adds ``mask_nodata: bool = True`` to the public reader entry +points. Passing ``mask_nodata=False`` skips the sentinel-to-NaN step so +the source dtype survives; ``attrs['nodata']`` still carries the raw +sentinel either way. +""" +from __future__ import annotations + +import numpy as np +import pytest + +from xrspatial.geotiff import open_geotiff +from xrspatial.geotiff._writer import write + + +@pytest.fixture +def uint16_with_matching_sentinel(tmp_path): + """uint16 TIFF where nodata=0 and the array has zeros in it.""" + arr = np.array([[0, 100, 200, 300], + [400, 500, 0, 600], + [700, 800, 900, 0], + [0, 1100, 1200, 1300]], dtype=np.uint16) + path = str(tmp_path / 'uint16_match_2052.tif') + write(arr, path, nodata=0, compression='none', tiled=False) + return path, arr + + +@pytest.fixture +def uint16_no_match(tmp_path): + """uint16 TIFF whose nodata sentinel is not present in any pixel.""" + arr = np.array([[1, 2, 3, 4], + [5, 6, 7, 8]], dtype=np.uint16) + path = str(tmp_path / 'uint16_nomatch_2052.tif') + write(arr, path, nodata=65535, compression='none', tiled=False) + return path, arr + + +@pytest.fixture +def float32_tiff(tmp_path): + """float32 TIFF with NaN nodata.""" + arr = np.array([[1.0, 2.0, np.nan], + [4.0, np.nan, 6.0]], dtype=np.float32) + path = str(tmp_path / 'float32_2052.tif') + write(arr, path, nodata=float('nan'), compression='none', tiled=False) + return path, arr + + +def test_regression_dtype_uint16_was_unreachable( + uint16_with_matching_sentinel): + """Without the kwarg, ``dtype="uint16"`` raises on a matching sentinel. + + Baseline that documents the broken contract: this is the original + failure mode reported in the issue. The ``mask_nodata=False`` + branch below is the fix. + """ + path, _ = uint16_with_matching_sentinel + with pytest.raises(ValueError): + open_geotiff(path, dtype='uint16') + + +def test_mask_nodata_false_preserves_uint16(uint16_with_matching_sentinel): + """``mask_nodata=False`` keeps the uint16 source dtype.""" + path, arr = uint16_with_matching_sentinel + da = open_geotiff(path, dtype='uint16', mask_nodata=False) + assert da.dtype == np.uint16 + # Raw sentinels survive in the data. + np.testing.assert_array_equal(da.values, arr) + # The declared sentinel is still surfaced for downstream maskers. + assert da.attrs['nodata'] == 0 + + +def test_mask_nodata_false_no_dtype_kwarg(uint16_with_matching_sentinel): + """Without ``dtype=``, the source dtype is preserved as-is.""" + path, arr = uint16_with_matching_sentinel + da = open_geotiff(path, mask_nodata=False) + assert da.dtype == np.uint16 + np.testing.assert_array_equal(da.values, arr) + assert da.attrs['nodata'] == 0 + + +def test_default_mask_nodata_true_still_promotes( + uint16_with_matching_sentinel): + """Default ``mask_nodata=True`` keeps the existing behaviour.""" + path, _ = uint16_with_matching_sentinel + da = open_geotiff(path) + assert da.dtype == np.float64 + assert np.isnan(da.values).sum() == 4 + # Sentinel positions should be NaN. + assert np.isnan(da.values[0, 0]) + assert np.isnan(da.values[1, 2]) + + +def test_no_match_both_modes_agree(uint16_no_match): + """When the sentinel does not match any pixel, both modes return the + same uint16 array (no promotion needed in either case). + """ + path, arr = uint16_no_match + masked = open_geotiff(path) + unmasked = open_geotiff(path, mask_nodata=False) + assert masked.dtype == np.uint16 + assert unmasked.dtype == np.uint16 + np.testing.assert_array_equal(masked.values, arr) + np.testing.assert_array_equal(unmasked.values, arr) + + +def test_float_file_mask_nodata_false_keeps_data(float32_tiff): + """For a float32 file with NaN nodata, ``mask_nodata=False`` is a + no-op: the sentinel is NaN so the inline mask would do nothing + anyway, and the float dtype is preserved either way. + """ + path, arr = float32_tiff + masked = open_geotiff(path) + unmasked = open_geotiff(path, mask_nodata=False) + assert masked.dtype == np.float32 + assert unmasked.dtype == np.float32 + np.testing.assert_array_equal(np.isnan(masked.values), + np.isnan(arr)) + np.testing.assert_array_equal(np.isnan(unmasked.values), + np.isnan(arr)) + + +def test_dtype_cast_preservation_uint8(tmp_path): + """Casting to a different integer dtype also works with the opt-out. + + The reader keeps the source dtype (uint16) via ``mask_nodata=False``, + then ``dtype="uint32"`` casts integer-to-integer, which is allowed. + """ + arr = np.array([[0, 100, 200], + [300, 0, 500]], dtype=np.uint16) + path = str(tmp_path / 'uint16_to_uint32_2052.tif') + write(arr, path, nodata=0, compression='none', tiled=False) + + da = open_geotiff(path, dtype='uint32', mask_nodata=False) + assert da.dtype == np.uint32 + np.testing.assert_array_equal(da.values, arr.astype(np.uint32)) + + +def test_dask_path_mask_nodata_false(uint16_with_matching_sentinel): + """The dask path honours the kwarg too: integer source dtype survives. + + Without this, ``read_geotiff_dask`` would still promote the dask + graph dtype to float64 and force the per-chunk cast. + """ + path, arr = uint16_with_matching_sentinel + da = open_geotiff(path, chunks=2, mask_nodata=False) + assert da.dtype == np.uint16 + computed = da.compute() + assert computed.dtype == np.uint16 + np.testing.assert_array_equal(computed.values, arr) + assert computed.attrs['nodata'] == 0 + + +def test_dask_path_default_still_promotes(uint16_with_matching_sentinel): + """The dask default (``mask_nodata=True``) still promotes to float64.""" + path, _ = uint16_with_matching_sentinel + da = open_geotiff(path, chunks=2) + assert da.dtype == np.float64 + computed = da.compute() + assert np.isnan(computed.values).sum() == 4 + + +def test_dask_dtype_cast_with_opt_out(uint16_with_matching_sentinel): + """``dtype="uint16"`` + ``mask_nodata=False`` works on the dask path.""" + path, arr = uint16_with_matching_sentinel + da = open_geotiff(path, chunks=2, dtype='uint16', mask_nodata=False) + assert da.dtype == np.uint16 + np.testing.assert_array_equal(da.compute().values, arr) diff --git a/xrspatial/geotiff/tests/test_reader_kwarg_order_1935.py b/xrspatial/geotiff/tests/test_reader_kwarg_order_1935.py index f17c2a540..ceb97be9b 100644 --- a/xrspatial/geotiff/tests/test_reader_kwarg_order_1935.py +++ b/xrspatial/geotiff/tests/test_reader_kwarg_order_1935.py @@ -44,6 +44,7 @@ "missing_sources", "allow_rotated", "allow_unparseable_crs", + "mask_nodata", )