Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
20 changes: 19 additions & 1 deletion xrspatial/geotiff/_backends/gpu.py
Original file line number Diff line number Diff line change
Expand Up @@ -344,7 +344,14 @@ def read_geotiff_gpu(source: str, *,
f"window={window} is outside the source extent "
f"({ifd_h}x{ifd_w}).")

if not ifd.is_tiled:
# float16 on disk (bps=16 + SampleFormat=3) is exposed as float32
# by the reader (#1941). The CPU decode path views the raw 2-byte
# samples as numpy float16 and upcasts; the GPU tile-assembly
# kernels assume bps == file_dtype.itemsize * 8 and would
# mis-stride the buffer. Route the rare half-precision read to
# the CPU path, matching the stripped-layout fallback below.
bps_mismatch = (file_dtype.itemsize * 8 != bps)
if not ifd.is_tiled or bps_mismatch:
# Fall back to CPU for stripped files. read_to_array remaps
# the array but only updates geo_info.transform for orientations
# 5-8 today (the 2/3/4 fix in #1539 is in a sibling PR). Discard
Expand Down Expand Up @@ -767,6 +774,17 @@ def _gds_chunk_path_available(source, ifd, has_sparse_tile, orientation):
return False
if ifd.photometric == 0:
return False
# float16 on disk (bps=16 + SampleFormat=3) is auto-promoted to
# float32 by the CPU decoder (#1941). The GDS path uses the raw
# on-disk bps for byte striding and would mis-decode the
# half-precision samples; route those rare reads to the CPU
# decode path.
from .._dtypes import SAMPLE_FORMAT_FLOAT
bps_first = ifd.bits_per_sample
if isinstance(bps_first, tuple):
bps_first = bps_first[0] if bps_first else 0
if bps_first == 16 and ifd.sample_format == SAMPLE_FORMAT_FLOAT:
return False
return True


Expand Down
25 changes: 25 additions & 0 deletions xrspatial/geotiff/_dtypes.py
Original file line number Diff line number Diff line change
Expand Up @@ -83,6 +83,13 @@ def tiff_dtype_to_numpy(bits_per_sample: int, sample_format: int = 1) -> np.dtyp
(8, SAMPLE_FORMAT_INT): np.dtype('int8'),
(16, SAMPLE_FORMAT_UINT): np.dtype('uint16'),
(16, SAMPLE_FORMAT_INT): np.dtype('int16'),
# IEEE half-precision floats stored on disk. The reader
# auto-promotes to float32 because xrspatial's CPU JIT and
# CUDA kernels don't carry float16 dispatch paths. The
# promotion is symmetric with the writer's float16 path,
# which auto-promotes float16 inputs to float32 before
# encoding (#1941).
(16, SAMPLE_FORMAT_FLOAT): np.dtype('float32'),
(32, SAMPLE_FORMAT_UINT): np.dtype('uint32'),
(32, SAMPLE_FORMAT_INT): np.dtype('int32'),
(32, SAMPLE_FORMAT_FLOAT): np.dtype('float32'),
Expand Down Expand Up @@ -118,6 +125,24 @@ def tiff_dtype_to_numpy(bits_per_sample: int, sample_format: int = 1) -> np.dtyp
SUB_BYTE_BPS = {1, 2, 4, 12}


def tiff_storage_dtype(bits_per_sample: int, sample_format: int = 1) -> np.dtype:
"""Return the on-disk numpy dtype for ``BitsPerSample + SampleFormat``.

Differs from :func:`tiff_dtype_to_numpy` only for the auto-promoted
case ``(16, FLOAT)`` where the on-disk samples are IEEE half-precision
but the reader exposes them as float32. Callers that need to view
the raw decompressed bytes (e.g. ``ndarray.view(dtype)``) must use
this function; callers that only need the user-visible dtype use
:func:`tiff_dtype_to_numpy`.

Falls back to :func:`tiff_dtype_to_numpy` for every other key, so
the two helpers agree on the standard cases.
"""
if bits_per_sample == 16 and sample_format == SAMPLE_FORMAT_FLOAT:
return np.dtype('float16')
return tiff_dtype_to_numpy(bits_per_sample, sample_format)


_GDAL_OT_FOR_BPS = {
8: 'Byte',
16: 'UInt16',
Expand Down
20 changes: 15 additions & 5 deletions xrspatial/geotiff/_reader.py
Original file line number Diff line number Diff line change
Expand Up @@ -1541,11 +1541,21 @@ def _decode_strip_or_tile(data_slice, compression, width, height, samples,
if is_sub_byte:
pixels = unpack_bits(chunk, bps, pixel_count)
else:
# Use the file's byte order for the view, then convert to native
file_dtype = dtype.newbyteorder(byte_order)
pixels = chunk.view(file_dtype)
if file_dtype.byteorder not in ('=', '|', _NATIVE_ORDER):
pixels = pixels.astype(dtype)
# Use the file's byte order for the view, then convert to native.
# The view dtype must match the on-disk sample width: float16
# files (bps=16 + SampleFormat=3) are auto-promoted to float32
# for the user-visible array, but the raw bytes have to be
# viewed as float16 first then cast (#1941). Detect the
# promotion via the bps-vs-dtype.itemsize mismatch so the
# surrounding pipeline stays unchanged for byte-equal cases.
if dtype.itemsize * 8 != bps and bps == 16 and dtype.kind == 'f':
storage_dtype = np.dtype('float16').newbyteorder(byte_order)
pixels = chunk.view(storage_dtype).astype(dtype)
else:
file_dtype = dtype.newbyteorder(byte_order)
pixels = chunk.view(file_dtype)
if file_dtype.byteorder not in ('=', '|', _NATIVE_ORDER):
pixels = pixels.astype(dtype)

if samples > 1:
out = pixels.reshape(height, width, samples)
Expand Down
14 changes: 10 additions & 4 deletions xrspatial/geotiff/tests/test_edge_cases.py
Original file line number Diff line number Diff line change
Expand Up @@ -399,10 +399,16 @@ def test_complex_dtype_write(self):
with pytest.raises(ValueError, match="Unsupported numpy dtype"):
numpy_to_tiff_dtype(np.dtype('complex128'))

def test_float16_not_supported(self):
"""float16 has no TIFF equivalent."""
with pytest.raises(ValueError, match="Unsupported BitsPerSample"):
tiff_dtype_to_numpy(16, 3) # 16-bit float
def test_float16_auto_promoted_to_float32(self):
"""float16 on disk (bps=16 + SampleFormat=3) is exposed as float32.

The writer auto-promotes float16 inputs to float32 before
encoding, and the reader matches that contract: a file with
bps=16 + SampleFormat=3 (an externally produced half-precision
TIFF) returns float32 to the user. See issue #1941 for the
original read-side asymmetry.
"""
assert tiff_dtype_to_numpy(16, 3) == np.float32


# -----------------------------------------------------------------------
Expand Down
143 changes: 143 additions & 0 deletions xrspatial/geotiff/tests/test_float16_read_1941.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,143 @@
"""Regression tests for issue #1941.

External GeoTIFFs that store IEEE half-precision floats (``BitsPerSample
=16`` + ``SampleFormat=3``) used to raise ``ValueError("Unsupported
BitsPerSample=16, SampleFormat=3")`` from ``tiff_dtype_to_numpy``. The
writer auto-promotes float16 inputs to float32 before encoding, so the
write side could not produce such a file, but reads from rasterio /
GDAL / tifffile-produced files broke read-parity.

The fix:

* ``tiff_dtype_to_numpy(16, 3)`` returns ``np.float32`` (symmetric with
the writer's auto-promotion).
* A new ``tiff_storage_dtype`` returns ``np.float16`` for the same key
so the byte-view in ``_decode_strip_or_tile`` reads the raw 2-byte
samples correctly before casting to float32.
* The GPU paths fall back to CPU decode when bps != dtype.itemsize * 8,
matching the existing stripped-layout fallback.
"""
from __future__ import annotations

import numpy as np
import pytest

from xrspatial.geotiff import open_geotiff, read_geotiff_dask
from xrspatial.geotiff._dtypes import (
SAMPLE_FORMAT_FLOAT,
SAMPLE_FORMAT_INT,
SAMPLE_FORMAT_UINT,
tiff_dtype_to_numpy,
tiff_storage_dtype,
)


class TestDtypeMap:
"""The dtype map auto-promotes float16 on read."""

def test_tiff_dtype_to_numpy_float16(self):
assert tiff_dtype_to_numpy(16, SAMPLE_FORMAT_FLOAT) == np.float32

def test_tiff_storage_dtype_float16(self):
assert tiff_storage_dtype(16, SAMPLE_FORMAT_FLOAT) == np.float16

def test_tiff_storage_dtype_delegates_for_non_promoted(self):
# Non-promoted keys behave identically.
for bps, sf in [
(8, SAMPLE_FORMAT_UINT),
(16, SAMPLE_FORMAT_UINT),
(16, SAMPLE_FORMAT_INT),
(32, SAMPLE_FORMAT_FLOAT),
(64, SAMPLE_FORMAT_FLOAT),
]:
assert tiff_storage_dtype(bps, sf) == tiff_dtype_to_numpy(bps, sf)


@pytest.fixture
def float16_tif(tmp_path):
"""Write a small float16 GeoTIFF using tifffile.

tifffile encodes numpy float16 with ``BitsPerSample=16`` and
``SampleFormat=3``, which is what an external rasterio / GDAL caller
would produce.
"""
tifffile = pytest.importorskip("tifffile")
arr = np.array(
[[0.0, 1.0, 2.0, 3.0],
[-1.0, -2.0, -3.0, -4.0],
[0.5, 1.5, 2.5, 3.5],
[100.0, 200.0, 300.0, 400.0]],
dtype=np.float16,
)
path = tmp_path / "f16.tif"
tifffile.imwrite(str(path), arr, compression=None)
return path, arr


class TestEagerFloat16Read:
"""``open_geotiff`` decodes an external float16 file to float32."""

def test_open_geotiff_returns_float32(self, float16_tif):
path, arr = float16_tif
result = open_geotiff(str(path))
assert result.dtype == np.float32
# Float16 values fit exactly in float32, so equality is well-defined.
np.testing.assert_array_equal(result.values, arr.astype(np.float32))

def test_open_geotiff_dask_returns_float32(self, float16_tif):
path, arr = float16_tif
result = read_geotiff_dask(str(path), chunks=2)
assert result.dtype == np.float32
np.testing.assert_array_equal(
result.compute().values, arr.astype(np.float32))


class TestPredictor3Float16:
"""Predictor=3 + float16 on disk also decodes correctly."""

def test_predictor3_float16_round_trip(self, tmp_path):
tifffile = pytest.importorskip("tifffile")
pytest.importorskip("imagecodecs") # required for predictor=3
arr = np.linspace(-1.0, 1.0, 16).astype(np.float16).reshape(4, 4)
path = tmp_path / "pred3_f16.tif"
tifffile.imwrite(
str(path), arr, predictor=3, compression="deflate")

result = open_geotiff(str(path))
assert result.dtype == np.float32
np.testing.assert_array_equal(
result.values, arr.astype(np.float32))


class TestRegressionGuards:
"""The promotion did not change non-float16 behaviour."""

def test_float32_still_float32(self, tmp_path):
tifffile = pytest.importorskip("tifffile")
arr = np.arange(16, dtype=np.float32).reshape(4, 4)
path = tmp_path / "f32.tif"
tifffile.imwrite(str(path), arr)

result = open_geotiff(str(path))
assert result.dtype == np.float32
np.testing.assert_array_equal(result.values, arr)

def test_float64_still_float64(self, tmp_path):
tifffile = pytest.importorskip("tifffile")
arr = np.arange(16, dtype=np.float64).reshape(4, 4)
path = tmp_path / "f64.tif"
tifffile.imwrite(str(path), arr)

result = open_geotiff(str(path))
assert result.dtype == np.float64
np.testing.assert_array_equal(result.values, arr)

def test_uint16_still_uint16(self, tmp_path):
tifffile = pytest.importorskip("tifffile")
arr = np.arange(16, dtype=np.uint16).reshape(4, 4)
path = tmp_path / "u16.tif"
tifffile.imwrite(str(path), arr)

result = open_geotiff(str(path))
assert result.dtype == np.uint16
np.testing.assert_array_equal(result.values, arr)
Loading