xarray-contrib · brendancol · May 15, 2026 · May 15, 2026
diff --git a/xrspatial/geotiff/_backends/gpu.py b/xrspatial/geotiff/_backends/gpu.py
@@ -344,7 +344,14 @@ def read_geotiff_gpu(source: str, *,
                     f"window={window} is outside the source extent "
                     f"({ifd_h}x{ifd_w}).")
 
-        if not ifd.is_tiled:
+        # float16 on disk (bps=16 + SampleFormat=3) is exposed as float32
+        # by the reader (#1941). The CPU decode path views the raw 2-byte
+        # samples as numpy float16 and upcasts; the GPU tile-assembly
+        # kernels assume bps == file_dtype.itemsize * 8 and would
+        # mis-stride the buffer. Route the rare half-precision read to
+        # the CPU path, matching the stripped-layout fallback below.
+        bps_mismatch = (file_dtype.itemsize * 8 != bps)
+        if not ifd.is_tiled or bps_mismatch:
             # Fall back to CPU for stripped files. read_to_array remaps
             # the array but only updates geo_info.transform for orientations
             # 5-8 today (the 2/3/4 fix in #1539 is in a sibling PR). Discard
@@ -767,6 +774,17 @@ def _gds_chunk_path_available(source, ifd, has_sparse_tile, orientation):
         return False
     if ifd.photometric == 0:
         return False
+    # float16 on disk (bps=16 + SampleFormat=3) is auto-promoted to
+    # float32 by the CPU decoder (#1941). The GDS path uses the raw
+    # on-disk bps for byte striding and would mis-decode the
+    # half-precision samples; route those rare reads to the CPU
+    # decode path.
+    from .._dtypes import SAMPLE_FORMAT_FLOAT
+    bps_first = ifd.bits_per_sample
+    if isinstance(bps_first, tuple):
+        bps_first = bps_first[0] if bps_first else 0
+    if bps_first == 16 and ifd.sample_format == SAMPLE_FORMAT_FLOAT:
+        return False
     return True
 
 

diff --git a/xrspatial/geotiff/_dtypes.py b/xrspatial/geotiff/_dtypes.py
@@ -83,6 +83,13 @@ def tiff_dtype_to_numpy(bits_per_sample: int, sample_format: int = 1) -> np.dtyp
         (8, SAMPLE_FORMAT_INT): np.dtype('int8'),
         (16, SAMPLE_FORMAT_UINT): np.dtype('uint16'),
         (16, SAMPLE_FORMAT_INT): np.dtype('int16'),
+        # IEEE half-precision floats stored on disk. The reader
+        # auto-promotes to float32 because xrspatial's CPU JIT and
+        # CUDA kernels don't carry float16 dispatch paths. The
+        # promotion is symmetric with the writer's float16 path,
+        # which auto-promotes float16 inputs to float32 before
+        # encoding (#1941).
+        (16, SAMPLE_FORMAT_FLOAT): np.dtype('float32'),
         (32, SAMPLE_FORMAT_UINT): np.dtype('uint32'),
         (32, SAMPLE_FORMAT_INT): np.dtype('int32'),
         (32, SAMPLE_FORMAT_FLOAT): np.dtype('float32'),
@@ -118,6 +125,24 @@ def tiff_dtype_to_numpy(bits_per_sample: int, sample_format: int = 1) -> np.dtyp
 SUB_BYTE_BPS = {1, 2, 4, 12}
 
 
+def tiff_storage_dtype(bits_per_sample: int, sample_format: int = 1) -> np.dtype:
+    """Return the on-disk numpy dtype for ``BitsPerSample + SampleFormat``.
+
+    Differs from :func:`tiff_dtype_to_numpy` only for the auto-promoted
+    case ``(16, FLOAT)`` where the on-disk samples are IEEE half-precision
+    but the reader exposes them as float32. Callers that need to view
+    the raw decompressed bytes (e.g. ``ndarray.view(dtype)``) must use
+    this function; callers that only need the user-visible dtype use
+    :func:`tiff_dtype_to_numpy`.
+
+    Falls back to :func:`tiff_dtype_to_numpy` for every other key, so
+    the two helpers agree on the standard cases.
+    """
+    if bits_per_sample == 16 and sample_format == SAMPLE_FORMAT_FLOAT:
+        return np.dtype('float16')
+    return tiff_dtype_to_numpy(bits_per_sample, sample_format)
+
+
 _GDAL_OT_FOR_BPS = {
     8: 'Byte',
     16: 'UInt16',

diff --git a/xrspatial/geotiff/_reader.py b/xrspatial/geotiff/_reader.py
@@ -1541,11 +1541,21 @@ def _decode_strip_or_tile(data_slice, compression, width, height, samples,
     if is_sub_byte:
         pixels = unpack_bits(chunk, bps, pixel_count)
     else:
-        # Use the file's byte order for the view, then convert to native
-        file_dtype = dtype.newbyteorder(byte_order)
-        pixels = chunk.view(file_dtype)
-        if file_dtype.byteorder not in ('=', '|', _NATIVE_ORDER):
-            pixels = pixels.astype(dtype)
+        # Use the file's byte order for the view, then convert to native.
+        # The view dtype must match the on-disk sample width: float16
+        # files (bps=16 + SampleFormat=3) are auto-promoted to float32
+        # for the user-visible array, but the raw bytes have to be
+        # viewed as float16 first then cast (#1941). Detect the
+        # promotion via the bps-vs-dtype.itemsize mismatch so the
+        # surrounding pipeline stays unchanged for byte-equal cases.
+        if dtype.itemsize * 8 != bps and bps == 16 and dtype.kind == 'f':
+            storage_dtype = np.dtype('float16').newbyteorder(byte_order)
+            pixels = chunk.view(storage_dtype).astype(dtype)
+        else:
+            file_dtype = dtype.newbyteorder(byte_order)
+            pixels = chunk.view(file_dtype)
+            if file_dtype.byteorder not in ('=', '|', _NATIVE_ORDER):
+                pixels = pixels.astype(dtype)
 
     if samples > 1:
         out = pixels.reshape(height, width, samples)

diff --git a/xrspatial/geotiff/tests/test_edge_cases.py b/xrspatial/geotiff/tests/test_edge_cases.py
@@ -399,10 +399,16 @@ def test_complex_dtype_write(self):
         with pytest.raises(ValueError, match="Unsupported numpy dtype"):
             numpy_to_tiff_dtype(np.dtype('complex128'))
 
-    def test_float16_not_supported(self):
-        """float16 has no TIFF equivalent."""
-        with pytest.raises(ValueError, match="Unsupported BitsPerSample"):
-            tiff_dtype_to_numpy(16, 3)  # 16-bit float
+    def test_float16_auto_promoted_to_float32(self):
+        """float16 on disk (bps=16 + SampleFormat=3) is exposed as float32.
+
+        The writer auto-promotes float16 inputs to float32 before
+        encoding, and the reader matches that contract: a file with
+        bps=16 + SampleFormat=3 (an externally produced half-precision
+        TIFF) returns float32 to the user. See issue #1941 for the
+        original read-side asymmetry.
+        """
+        assert tiff_dtype_to_numpy(16, 3) == np.float32
 
 
 # -----------------------------------------------------------------------

diff --git a/xrspatial/geotiff/tests/test_float16_read_1941.py b/xrspatial/geotiff/tests/test_float16_read_1941.py
@@ -0,0 +1,143 @@
+"""Regression tests for issue #1941.
+
+External GeoTIFFs that store IEEE half-precision floats (``BitsPerSample
+=16`` + ``SampleFormat=3``) used to raise ``ValueError("Unsupported
+BitsPerSample=16, SampleFormat=3")`` from ``tiff_dtype_to_numpy``. The
+writer auto-promotes float16 inputs to float32 before encoding, so the
+write side could not produce such a file, but reads from rasterio /
+GDAL / tifffile-produced files broke read-parity.
+
+The fix:
+
+* ``tiff_dtype_to_numpy(16, 3)`` returns ``np.float32`` (symmetric with
+  the writer's auto-promotion).
+* A new ``tiff_storage_dtype`` returns ``np.float16`` for the same key
+  so the byte-view in ``_decode_strip_or_tile`` reads the raw 2-byte
+  samples correctly before casting to float32.
+* The GPU paths fall back to CPU decode when bps != dtype.itemsize * 8,
+  matching the existing stripped-layout fallback.
+"""
+from __future__ import annotations
+
+import numpy as np
+import pytest
+
+from xrspatial.geotiff import open_geotiff, read_geotiff_dask
+from xrspatial.geotiff._dtypes import (
+    SAMPLE_FORMAT_FLOAT,
+    SAMPLE_FORMAT_INT,
+    SAMPLE_FORMAT_UINT,
+    tiff_dtype_to_numpy,
+    tiff_storage_dtype,
+)
+
+
+class TestDtypeMap:
+    """The dtype map auto-promotes float16 on read."""
+
+    def test_tiff_dtype_to_numpy_float16(self):
+        assert tiff_dtype_to_numpy(16, SAMPLE_FORMAT_FLOAT) == np.float32
+
+    def test_tiff_storage_dtype_float16(self):
+        assert tiff_storage_dtype(16, SAMPLE_FORMAT_FLOAT) == np.float16
+
+    def test_tiff_storage_dtype_delegates_for_non_promoted(self):
+        # Non-promoted keys behave identically.
+        for bps, sf in [
+            (8, SAMPLE_FORMAT_UINT),
+            (16, SAMPLE_FORMAT_UINT),
+            (16, SAMPLE_FORMAT_INT),
+            (32, SAMPLE_FORMAT_FLOAT),
+            (64, SAMPLE_FORMAT_FLOAT),
+        ]:
+            assert tiff_storage_dtype(bps, sf) == tiff_dtype_to_numpy(bps, sf)
+
+
+@pytest.fixture
+def float16_tif(tmp_path):
+    """Write a small float16 GeoTIFF using tifffile.
+
+    tifffile encodes numpy float16 with ``BitsPerSample=16`` and
+    ``SampleFormat=3``, which is what an external rasterio / GDAL caller
+    would produce.
+    """
+    tifffile = pytest.importorskip("tifffile")
+    arr = np.array(
+        [[0.0, 1.0, 2.0, 3.0],
+         [-1.0, -2.0, -3.0, -4.0],
+         [0.5, 1.5, 2.5, 3.5],
+         [100.0, 200.0, 300.0, 400.0]],
+        dtype=np.float16,
+    )
+    path = tmp_path / "f16.tif"
+    tifffile.imwrite(str(path), arr, compression=None)
+    return path, arr
+
+
+class TestEagerFloat16Read:
+    """``open_geotiff`` decodes an external float16 file to float32."""
+
+    def test_open_geotiff_returns_float32(self, float16_tif):
+        path, arr = float16_tif
+        result = open_geotiff(str(path))
+        assert result.dtype == np.float32
+        # Float16 values fit exactly in float32, so equality is well-defined.
+        np.testing.assert_array_equal(result.values, arr.astype(np.float32))
+
+    def test_open_geotiff_dask_returns_float32(self, float16_tif):
+        path, arr = float16_tif
+        result = read_geotiff_dask(str(path), chunks=2)
+        assert result.dtype == np.float32
+        np.testing.assert_array_equal(
+            result.compute().values, arr.astype(np.float32))
+
+
+class TestPredictor3Float16:
+    """Predictor=3 + float16 on disk also decodes correctly."""
+
+    def test_predictor3_float16_round_trip(self, tmp_path):
+        tifffile = pytest.importorskip("tifffile")
+        pytest.importorskip("imagecodecs")  # required for predictor=3
+        arr = np.linspace(-1.0, 1.0, 16).astype(np.float16).reshape(4, 4)
+        path = tmp_path / "pred3_f16.tif"
+        tifffile.imwrite(
+            str(path), arr, predictor=3, compression="deflate")
+
+        result = open_geotiff(str(path))
+        assert result.dtype == np.float32
+        np.testing.assert_array_equal(
+            result.values, arr.astype(np.float32))
+
+
+class TestRegressionGuards:
+    """The promotion did not change non-float16 behaviour."""
+
+    def test_float32_still_float32(self, tmp_path):
+        tifffile = pytest.importorskip("tifffile")
+        arr = np.arange(16, dtype=np.float32).reshape(4, 4)
+        path = tmp_path / "f32.tif"
+        tifffile.imwrite(str(path), arr)
+
+        result = open_geotiff(str(path))
+        assert result.dtype == np.float32
+        np.testing.assert_array_equal(result.values, arr)
+
+    def test_float64_still_float64(self, tmp_path):
+        tifffile = pytest.importorskip("tifffile")
+        arr = np.arange(16, dtype=np.float64).reshape(4, 4)
+        path = tmp_path / "f64.tif"
+        tifffile.imwrite(str(path), arr)
+
+        result = open_geotiff(str(path))
+        assert result.dtype == np.float64
+        np.testing.assert_array_equal(result.values, arr)
+
+    def test_uint16_still_uint16(self, tmp_path):
+        tifffile = pytest.importorskip("tifffile")
+        arr = np.arange(16, dtype=np.uint16).reshape(4, 4)
+        path = tmp_path / "u16.tif"
+        tifffile.imwrite(str(path), arr)
+
+        result = open_geotiff(str(path))
+        assert result.dtype == np.uint16
+        np.testing.assert_array_equal(result.values, arr)