From 8df4085249efd9b70be1263812bc091f8866d824 Mon Sep 17 00:00:00 2001 From: Brendan Collins Date: Mon, 18 May 2026 21:25:15 -0700 Subject: [PATCH 1/2] geotiff: annotate open_geotiff(max_cloud_bytes=...) (#2106) The docstring already declared ``int or None``, but the function signature carried a bare parameter -- the only kwarg on the public read/write surface without a type annotation. ``inspect.signature``, IDE autocomplete, Sphinx output, and ``mypy --strict`` all saw the gap. Adds ``int | None`` to the annotation; default stays the module-internal ``_MAX_CLOUD_BYTES_SENTINEL`` so behaviour is unchanged. Regression test pins the immediate fix and parametrises over every public reader/writer entry point so a future kwarg addition without an annotation surfaces in CI. Also updates ``.claude/sweep-api-consistency-state.csv`` with the 2026-05-18 sweep results. --- .claude/sweep-api-consistency-state.csv | 2 +- xrspatial/geotiff/__init__.py | 2 +- ...open_geotiff_max_cloud_bytes_annot_2106.py | 81 +++++++++++++++++++ 3 files changed, 83 insertions(+), 2 deletions(-) create mode 100644 xrspatial/geotiff/tests/test_open_geotiff_max_cloud_bytes_annot_2106.py diff --git a/.claude/sweep-api-consistency-state.csv b/.claude/sweep-api-consistency-state.csv index 85ee7521e..f922f9be7 100644 --- a/.claude/sweep-api-consistency-state.csv +++ b/.claude/sweep-api-consistency-state.csv @@ -1,3 +1,3 @@ module,last_inspected,issue,severity_max,categories_found,notes -geotiff,2026-05-15,1922,MEDIUM,1,"Sweep 2026-05-15 (deep-sweep-api-consistency-geotiff-2026-05-15-1778854324). 1 MEDIUM Cat 1 finding fixed in this branch: write_geotiff_gpu and to_geotiff disagreed on order of max_z_error / streaming_buffer_bytes kwargs. Both kwargs are keyword-only so no functional break; drift surfaced in inspect.signature, IDE autocomplete, and Sphinx docs against the writers' explicit-parity promise. Fix reorders write_geotiff_gpu to match to_geotiff (streaming_buffer_bytes before max_z_error) and updates the docstring; gpu is the only kwarg to_geotiff has that write_geotiff_gpu does not, so the gap stays. Regression test in test_writer_kwarg_order_1922.py pins kwarg order parity and default-value parity. Prior findings (#1654 #1683 #1684 #1685 #1705 #1715 #1754 #1775 #1810 #1845-followup) all confirmed fixed. Cross-sibling return-type drift (Cat 2): write_vrt returns str while to_geotiff and write_geotiff_gpu return None -- still deferred (LOW, callers do not substitute these writers). Cross-cutting cross-module drift (chunk_size in reproject vs chunks in geotiff; target_crs vs crs) documented but not filed per sweep template (cross-cutting). cuda-validated." +geotiff,2026-05-18,2106,MEDIUM,3,"Sweep 2026-05-18 (deep-sweep-api-consistency-geotiff-2026-05-18-1779164255). 1 MEDIUM Cat 3 finding fixed in this branch: open_geotiff(max_cloud_bytes=...) was the only kwarg on the public reader/writer surface without a Python type annotation. Docstring already declared ``int or None``; the surface and the docs disagreed. Fix adds ``int | None`` to the annotation; default stays the module-internal _MAX_CLOUD_BYTES_SENTINEL. Regression test in test_open_geotiff_max_cloud_bytes_annot_2106.py pins the immediate gap and parametrises over every public reader/writer to catch future ungenerated annotations. Prior sweep findings (#1922/#1935 kwarg ordering, #2052 mask_nodata parity, #2097 GPU MinIsWhite, #2095 zero-band 3D writes, #1946 write_vrt path/vrt_path shim) all confirmed fixed. Cross-sibling return-type drift (Cat 2): write_vrt returns str while to_geotiff and write_geotiff_gpu return path which is str | BinaryIO -- inspected and still LOW (callers do not substitute writers; the return-type drift is documented in each writer's docstring). Cross-cutting cross-module drift (chunk_size in reproject vs chunks in geotiff; target_crs vs crs) documented but not filed per sweep template (cross-cutting). cuda-validated." reproject,2026-05-10,1570,HIGH,2;5,"Filed cross-module attrs['vertical_crs'] type collision (string vs EPSG int) vs xrspatial.geotiff. Fixed in PR (TBD): reproject now writes EPSG int and preserves friendly token under vertical_datum. MEDIUM kwarg-order drift (transform_precision vs chunk_size) and missing type hints vs geotiff documented but not fixed (cosmetic, kwarg-only)." diff --git a/xrspatial/geotiff/__init__.py b/xrspatial/geotiff/__init__.py index 215fa354d..f46dd4dd6 100644 --- a/xrspatial/geotiff/__init__.py +++ b/xrspatial/geotiff/__init__.py @@ -249,7 +249,7 @@ def open_geotiff(source: str | BinaryIO, *, chunks: int | tuple | None = None, gpu: bool = False, max_pixels: int | None = None, - max_cloud_bytes=_MAX_CLOUD_BYTES_SENTINEL, + max_cloud_bytes: int | None = _MAX_CLOUD_BYTES_SENTINEL, on_gpu_failure: str = _ON_GPU_FAILURE_SENTINEL, missing_sources: str = _MISSING_SOURCES_SENTINEL, allow_rotated: bool = False, diff --git a/xrspatial/geotiff/tests/test_open_geotiff_max_cloud_bytes_annot_2106.py b/xrspatial/geotiff/tests/test_open_geotiff_max_cloud_bytes_annot_2106.py new file mode 100644 index 000000000..6c8698032 --- /dev/null +++ b/xrspatial/geotiff/tests/test_open_geotiff_max_cloud_bytes_annot_2106.py @@ -0,0 +1,81 @@ +"""Regression test for #2106: every kwarg on the public read/write +entry points carries a type annotation. + +The original gap: ``open_geotiff(max_cloud_bytes=...)`` had no annotation +on its kwarg, while every other kwarg on the function -- and every kwarg +on every other public reader and writer in ``xrspatial.geotiff`` -- did. +``inspect.signature``, IDE autocomplete, Sphinx, and ``mypy --strict`` all +saw a bare parameter for the only fsspec-related kwarg on the public +read entry point, despite the docstring declaring ``int or None``. + +This test fixes the immediate gap (``max_cloud_bytes``) and pins every +other public reader/writer kwarg to a non-empty annotation so a future +addition cannot reopen the surface without surfacing in CI. +""" +from __future__ import annotations + +import inspect + +import pytest + +from xrspatial.geotiff import ( + open_geotiff, + read_geotiff_dask, + read_geotiff_gpu, + read_vrt, + to_geotiff, + write_geotiff_gpu, + write_vrt, +) + + +PUBLIC_ENTRY_POINTS = ( + open_geotiff, + read_geotiff_gpu, + read_geotiff_dask, + read_vrt, + to_geotiff, + write_geotiff_gpu, + write_vrt, +) + + +def test_open_geotiff_max_cloud_bytes_has_type_annotation(): + """Pin the #2106 fix: the kwarg the bug named carries ``int | None``.""" + sig = inspect.signature(open_geotiff) + param = sig.parameters["max_cloud_bytes"] + assert param.annotation is not inspect.Parameter.empty, ( + "open_geotiff(max_cloud_bytes=...) is missing a type annotation; " + "the docstring declares ``int or None`` so the surface should match." + ) + # Use ``str(...)`` rather than identity so the assertion survives the + # ``from __future__ import annotations`` lazy-eval form ``open_geotiff`` + # itself uses (its annotations come back as strings). + annotation_repr = str(param.annotation) + assert "int" in annotation_repr and "None" in annotation_repr, ( + f"open_geotiff(max_cloud_bytes=...) annotation should mention " + f"int and None; got {annotation_repr!r}" + ) + + +@pytest.mark.parametrize("fn", PUBLIC_ENTRY_POINTS, ids=lambda f: f.__name__) +def test_public_entry_point_kwargs_have_type_annotations(fn): + """Every kwarg on the public read/write surface carries an annotation. + + Catches future regressions of the same class as #2106: a kwarg added + to one entry point without an annotation while the rest of the + signature has them. + """ + sig = inspect.signature(fn) + missing = [ + name + for name, param in sig.parameters.items() + if param.annotation is inspect.Parameter.empty + ] + assert missing == [], ( + f"{fn.__name__} has kwargs without type annotations: {missing}. " + f"Add ``annotation`` to each so inspect.signature, IDE " + f"autocomplete, Sphinx, and mypy --strict all see the declared " + f"type. The docstring already declares the type for the kwargs " + f"in question (#2106 raised this for max_cloud_bytes)." + ) From ac20de48a3b7f0095ee19504c70f4f1cc1fc55b5 Mon Sep 17 00:00:00 2001 From: Brendan Collins Date: Tue, 19 May 2026 06:17:32 -0700 Subject: [PATCH 2/2] geotiff: address review nits on #2106 Two follow-ups from review on #2108: 1. ``open_geotiff(max_cloud_bytes=...)`` annotation: ``int | None`` is the caller-facing contract, but the default ``_MAX_CLOUD_BYTES_SENTINEL`` is an ``object()``. ``mypy --strict`` flagged ``Incompatible default for argument`` ([assignment]). Add ``# type: ignore[assignment]`` so the line is clean under strict mode while keeping the user-facing annotation untouched. The pre-existing ``on_gpu_failure`` and ``missing_sources`` sentinels follow the same pattern and remain out of scope. 2. ``test_open_geotiff_max_cloud_bytes_has_type_annotation``: the substring check ``"int" in s and "None" in s`` would also pass on ``Mapping[int, None]``. Pin the exact ``int | None`` form. The parametrized test below still acts as a presence-only check for the rest of the public reader/writer surface. --- xrspatial/geotiff/__init__.py | 2 +- ...test_open_geotiff_max_cloud_bytes_annot_2106.py | 14 ++++++++------ 2 files changed, 9 insertions(+), 7 deletions(-) diff --git a/xrspatial/geotiff/__init__.py b/xrspatial/geotiff/__init__.py index f46dd4dd6..6fe74d171 100644 --- a/xrspatial/geotiff/__init__.py +++ b/xrspatial/geotiff/__init__.py @@ -249,7 +249,7 @@ def open_geotiff(source: str | BinaryIO, *, chunks: int | tuple | None = None, gpu: bool = False, max_pixels: int | None = None, - max_cloud_bytes: int | None = _MAX_CLOUD_BYTES_SENTINEL, + max_cloud_bytes: int | None = _MAX_CLOUD_BYTES_SENTINEL, # type: ignore[assignment] on_gpu_failure: str = _ON_GPU_FAILURE_SENTINEL, missing_sources: str = _MISSING_SOURCES_SENTINEL, allow_rotated: bool = False, diff --git a/xrspatial/geotiff/tests/test_open_geotiff_max_cloud_bytes_annot_2106.py b/xrspatial/geotiff/tests/test_open_geotiff_max_cloud_bytes_annot_2106.py index 6c8698032..3bc613441 100644 --- a/xrspatial/geotiff/tests/test_open_geotiff_max_cloud_bytes_annot_2106.py +++ b/xrspatial/geotiff/tests/test_open_geotiff_max_cloud_bytes_annot_2106.py @@ -48,13 +48,15 @@ def test_open_geotiff_max_cloud_bytes_has_type_annotation(): "open_geotiff(max_cloud_bytes=...) is missing a type annotation; " "the docstring declares ``int or None`` so the surface should match." ) - # Use ``str(...)`` rather than identity so the assertion survives the - # ``from __future__ import annotations`` lazy-eval form ``open_geotiff`` - # itself uses (its annotations come back as strings). + # ``xrspatial.geotiff.__init__`` uses ``from __future__ import + # annotations``, so ``param.annotation`` comes back as the source + # string. Pin the exact PEP 604 form rather than ``"int" in s and + # "None" in s`` -- the looser check would also pass on something + # like ``Mapping[int, None]``. annotation_repr = str(param.annotation) - assert "int" in annotation_repr and "None" in annotation_repr, ( - f"open_geotiff(max_cloud_bytes=...) annotation should mention " - f"int and None; got {annotation_repr!r}" + assert annotation_repr == "int | None", ( + f"open_geotiff(max_cloud_bytes=...) annotation should be exactly " + f"``int | None`` to match the docstring; got {annotation_repr!r}" )