From 96d3c42b1610dedf3679b46150c59b9b3e5031d8 Mon Sep 17 00:00:00 2001
From: Brendan Collins <brendancol@gmail.com>
Date: Mon, 18 May 2026 11:25:44 -0700
Subject: [PATCH 1/3] geotiff: bound HTTP full-image strip reads to declared
 byte size (#2051)

`_HTTPSource.read_all()` was unbounded: no Content-Length validation,
no streaming cap. The full-image stripped HTTP path in
`_fetch_decode_cog_http_strips` only ran `_check_dimensions` against
the TIFF header's declared pixel count, then handed the full body to
`_read_strips`. A tiny declared raster (100x100, well within
`_check_dimensions`) could still be served as a multi-gigabyte body
and the whole body was allocated before TIFF parsing rejected anything.

Fix (option b from the security report):

- `_HTTPSource.read_all()` gains a `max_bytes: int | None = None`
  parameter. When set, it rejects an oversized `Content-Length` up
  front via `OSError`, then streams the body (urllib3
  `preload_content=False` / stdlib `resp.read` in 64 KiB chunks) and
  aborts past `max_bytes + 1`. The `+ 1` is the over-shoot probe: a
  body that exactly matches the cap passes, but a server that omits
  or lies about Content-Length is caught the moment the first extra
  byte arrives.
- `_request()` gains `preload_content=False` plumbing and releases
  the connection on the redirect path so streaming responses do not
  leak pool slots.
- `_fetch_decode_cog_http_strips` computes the budget from the strip
  table (`max(offset + byte_count) + 4 MiB slack` for TIFF header /
  trailing metadata) and passes it through.
- New `_compute_full_image_byte_budget` helper handles the
  degenerate cases (missing or all-sparse strip table) by falling
  back to the per-strip safety cap.

`max_bytes=None` preserves the legacy unbounded behaviour for the
non-HTTP callers (cloud reads are already gated by
`max_cloud_bytes`; file reads are bounded by file size).

Tests cover:
- Content-Length advertised over budget -> OSError before the read.
- Server omits Content-Length entirely (chunked) -> streaming cap
  catches the over-shoot.
- Server lies low (advertises small, sends big) -> urllib3 truncates
  at the advertised length, locked in as the expected behaviour.
- Legitimate full-image COG reads still work end-to-end.
- Attacker pads a legitimate COG body with 64 MiB of zeros -> read
  rejected by either the Content-Length pre-flight or the streaming
  cap.
- Budget helper returns the strip-table maximum plus slack, and
  falls back to the per-strip cap for empty / degenerate tables.

Closes #2051.
---
 xrspatial/geotiff/_reader.py                  | 165 ++++++++-
 .../tests/test_http_read_all_bounded_2051.py  | 315 ++++++++++++++++++
 2 files changed, 476 insertions(+), 4 deletions(-)
 create mode 100644 xrspatial/geotiff/tests/test_http_read_all_bounded_2051.py

diff --git a/xrspatial/geotiff/_reader.py b/xrspatial/geotiff/_reader.py
index b8d289b60..d1defeb8f 100644
--- a/xrspatial/geotiff/_reader.py
+++ b/xrspatial/geotiff/_reader.py
@@ -935,7 +935,8 @@ def _get_pinned_pool(self, scheme: str, host: str, port: int | None,
             self._pinned_pools[key] = pool
         return pool
 
-    def _request(self, headers: dict | None = None):
+    def _request(self, headers: dict | None = None,
+                 preload_content: bool = True):
         """Issue a GET with manual, validated redirect following.
 
         urllib3's built-in redirect follower has no validation hook, so
@@ -949,6 +950,13 @@ def _request(self, headers: dict | None = None):
         that exists between ``getaddrinfo`` in the validator and the
         second ``getaddrinfo`` urllib3 would otherwise do at connect
         time. Issue #1846.
+
+        ``preload_content=False`` returns a streaming response: the body
+        is not buffered into ``resp.data`` and the caller must drain it
+        via ``resp.stream(...)``. Used by :meth:`read_all` when a
+        ``max_bytes`` budget is in play, so the body is bounded
+        on-the-wire instead of being fully allocated before the cap is
+        checked. Issue #2051.
         """
         from urllib.parse import urljoin
         timeout = self._urllib3_timeout()
@@ -961,11 +969,24 @@ def _request(self, headers: dict | None = None):
                 headers=headers,
                 timeout=timeout,
                 redirect=False,
+                preload_content=preload_content,
             )
             if 300 <= resp.status < 400 and resp.status != 304:
                 location = resp.headers.get('Location')
                 if not location:
                     return resp
+                # Release the redirect response's connection back to
+                # the pool. ``preload_content=True`` (the default) drains
+                # the body for us, but the streaming path
+                # (``preload_content=False``, used by ``read_all`` with a
+                # byte budget) leaves the connection borrowed -- if we
+                # do not release it here, subsequent hops will allocate
+                # fresh connections every time.
+                if not preload_content:
+                    try:
+                        resp.release_conn()
+                    except Exception:  # noqa: BLE001
+                        pass
                 # Resolve relative ``Location`` against the URL we just
                 # requested, not against ``self._url``: chained
                 # redirects can land us on a different origin.
@@ -1184,8 +1205,93 @@ def read_ranges_coalesced(
         merged_bytes = self.read_ranges(merged, max_workers=max_workers)
         return split_coalesced_bytes(merged_bytes, mapping)
 
-    def read_all(self) -> bytes:
-        return self._request().data
+    def read_all(self, max_bytes: int | None = None) -> bytes:
+        """Fetch the full body, optionally bounded by ``max_bytes``.
+
+        ``max_bytes`` caps both the advertised ``Content-Length`` (rejected
+        up front before any bytes are read into memory) and the actual
+        body size (streamed and aborted once ``max_bytes + 1`` bytes have
+        arrived). The ``+ 1`` is the over-shoot detector: a body that
+        exactly matches the cap passes, but a server that ignores or
+        lies about ``Content-Length`` and streams more bytes is caught
+        as soon as the first extra byte lands.
+
+        Without a cap, a tiny TIFF header (e.g. 100x100) that survives
+        :func:`_check_dimensions` can still be served as a multi-gigabyte
+        HTTP body and the whole body is allocated before TIFF parsing
+        gets a chance to reject it. Issue #2051.
+
+        ``max_bytes=None`` preserves the legacy unbounded behaviour for
+        callers that already gate the read upstream (e.g. cloud reads
+        gated by :data:`max_cloud_bytes`).
+        """
+        if max_bytes is None:
+            return self._request().data
+        # Stream the body so the cap is enforced before the bytes land
+        # in memory. ``preload_content=False`` makes urllib3 hand us
+        # the response without buffering ``resp.data``.
+        resp = self._request(preload_content=False)
+        try:
+            self._check_content_length(resp.headers, max_bytes)
+            return self._read_capped(resp, max_bytes)
+        finally:
+            try:
+                resp.release_conn()
+            except Exception:  # noqa: BLE001
+                pass
+
+    @staticmethod
+    def _check_content_length(headers, max_bytes: int) -> None:
+        """Reject a response whose advertised ``Content-Length`` exceeds the cap.
+
+        This is the cheap pre-flight check; we still cap the actual read
+        below in case the server omits the header or lies about it.
+        """
+        raw = None
+        try:
+            raw = headers.get('Content-Length')
+        except AttributeError:
+            return
+        if raw is None:
+            return
+        try:
+            declared = int(raw)
+        except (TypeError, ValueError):
+            return
+        if declared > max_bytes:
+            raise OSError(
+                f"HTTP response declares Content-Length={declared:,} "
+                f"bytes, which exceeds the byte budget of "
+                f"{max_bytes:,} bytes computed from the TIFF strip "
+                f"table. The file is malformed or attempting "
+                f"denial-of-service. Issue #2051."
+            )
+
+    @staticmethod
+    def _read_capped(resp, max_bytes: int) -> bytes:
+        """Stream-read a urllib3 response, aborting past ``max_bytes``.
+
+        Read at most ``max_bytes + 1`` bytes. The extra byte is the
+        over-shoot probe: if it arrives the server lied or omitted
+        ``Content-Length`` and tried to send a larger body. Raise
+        :class:`OSError` so callers that already handle network failures
+        also handle this.
+        """
+        chunks: list[bytes] = []
+        received = 0
+        for chunk in resp.stream(amt=65536, decode_content=True):
+            if not chunk:
+                continue
+            chunks.append(chunk)
+            received += len(chunk)
+            if received > max_bytes:
+                raise OSError(
+                    f"HTTP response body exceeded the byte budget of "
+                    f"{max_bytes:,} bytes (received {received:,} bytes "
+                    f"before abort). The server likely ignored or lied "
+                    f"about Content-Length. Issue #2051."
+                )
+        return b''.join(chunks)
 
     @property
     def size(self) -> int | None:
@@ -1640,6 +1746,49 @@ def _has_sparse(byte_counts) -> bool:
     return False
 
 
+#: Slack added to the strip-table byte budget for the TIFF header,
+#: trailing IFD chain, ExifIFD, GeoKey directory, GDAL_METADATA, and any
+#: ICC profile or XMP packet. 4 MiB is comfortable for real-world COGs
+#: (the prefetch path already tolerates up to ``MAX_HTTP_HEADER_BYTES``
+#: of header bytes) while still bounding the body away from gigabyte
+#: scale. Issue #2051.
+_FULL_IMAGE_BUDGET_HEADER_SLACK = 4 * 1024 * 1024
+
+
+def _compute_full_image_byte_budget(offsets, byte_counts) -> int:
+    """Compute an upper bound on the legitimate HTTP body size for a stripped TIFF.
+
+    A stripped TIFF body is laid out as: [TIFF header + IFDs + tag value
+    arrays] followed by strip payloads at the offsets listed in
+    ``StripOffsets``. The largest byte index any strip references is
+    ``max(offset + byte_count)`` across the strip table; the body cannot
+    legitimately extend past that point plus a small tail for trailing
+    metadata. We add :data:`_FULL_IMAGE_BUDGET_HEADER_SLACK` to cover the
+    header prologue (which lives at offset 0) and any tags that follow
+    the last strip. The cap is loose by design -- it exists to reject
+    bodies that are orders of magnitude larger than the file claims to
+    be, not to second-guess legitimate layouts.
+
+    If the strip table is missing or empty (sparse-only, malformed),
+    fall back to the per-strip safety cap so the read is still bounded.
+    Issue #2051.
+    """
+    fallback = _max_tile_bytes_from_env() + _FULL_IMAGE_BUDGET_HEADER_SLACK
+    if not offsets or not byte_counts:
+        return fallback
+    max_end = 0
+    for off, bc in zip(offsets, byte_counts):
+        try:
+            end = int(off) + int(bc)
+        except (TypeError, ValueError):
+            continue
+        if end > max_end:
+            max_end = end
+    if max_end <= 0:
+        return fallback
+    return max_end + _FULL_IMAGE_BUDGET_HEADER_SLACK
+
+
 # ---------------------------------------------------------------------------
 # Strip reader
 # ---------------------------------------------------------------------------
@@ -2391,7 +2540,15 @@ def _fetch_decode_cog_http_strips(
     # so the dim check uses their cap instead of the default 1B.
     if window is None:
         _check_dimensions(width, height, samples, max_pixels)
-        all_data = source.read_all()
+        # Bound the HTTP body to the byte size implied by the TIFF strip
+        # table. Without this cap, a tiny declared raster (which sails
+        # past ``_check_dimensions``) can still pull a multi-gigabyte
+        # body off the wire and into memory before ``_read_strips``
+        # gets a chance to reject anything. The strip table tells us
+        # the maximum legitimate byte offset; anything beyond that is
+        # either a malformed file or a hostile server. Issue #2051.
+        max_bytes = _compute_full_image_byte_budget(offsets, byte_counts)
+        all_data = source.read_all(max_bytes=max_bytes)
         return _read_strips(all_data, ifd, header, dtype,
                             max_pixels=max_pixels)
 
diff --git a/xrspatial/geotiff/tests/test_http_read_all_bounded_2051.py b/xrspatial/geotiff/tests/test_http_read_all_bounded_2051.py
new file mode 100644
index 000000000..ccc313f9a
--- /dev/null
+++ b/xrspatial/geotiff/tests/test_http_read_all_bounded_2051.py
@@ -0,0 +1,315 @@
+"""Regression tests for issue #2051.
+
+``_HTTPSource.read_all()`` used to pull the full HTTP body unconditionally:
+no ``Content-Length`` check, no streaming cap. A TIFF whose header
+declares a tiny raster (which sails past ``_check_dimensions``) could
+still be served as a multi-gigabyte body and the whole thing landed in
+memory before TIFF parsing got a chance to reject anything.
+
+These tests stand up tiny loopback HTTP servers that misbehave in three
+ways:
+
+- declared ``Content-Length`` exceeds the byte budget,
+- ``Content-Length`` lies (says small, sends big),
+- ``Content-Length`` is omitted entirely (chunked transfer encoding).
+
+Plus a positive test that legitimate full-image reads still work, and
+unit tests for the ``_compute_full_image_byte_budget`` helper.
+"""
+from __future__ import annotations
+
+import http.server
+import socketserver
+import threading
+
+import numpy as np
+import pytest
+
+from xrspatial.geotiff._reader import (
+    _HTTPSource,
+    _compute_full_image_byte_budget,
+    _FULL_IMAGE_BUDGET_HEADER_SLACK,
+    _read_cog_http,
+)
+from xrspatial.geotiff._writer import write
+
+
+# ---------------------------------------------------------------------------
+# Server helpers
+# ---------------------------------------------------------------------------
+
+class _BaseHandler(http.server.BaseHTTPRequestHandler):
+    payload: bytes = b''
+    # Subclasses override these to fake misbehaviour.
+    lie_content_length: int | None = None
+    drop_content_length: bool = False
+    truncated_payload: bytes | None = None
+
+    def log_message(self, *_args, **_kwargs):
+        pass
+
+
+def _serve(handler_cls):
+    httpd = socketserver.TCPServer(('127.0.0.1', 0), handler_cls)
+    port = httpd.server_address[1]
+    thread = threading.Thread(target=httpd.serve_forever, daemon=True)
+    thread.start()
+    return f'http://127.0.0.1:{port}/cog.tif', httpd, thread
+
+
+def _stop(httpd):
+    httpd.shutdown()
+    httpd.server_close()
+
+
+@pytest.fixture(autouse=True)
+def _allow_loopback(monkeypatch):
+    # Loopback addresses are blocked by the SSRF allow-list; the escape
+    # hatch lets the test reach 127.0.0.1.
+    monkeypatch.setenv('XRSPATIAL_GEOTIFF_ALLOW_PRIVATE_HOSTS', '1')
+
+
+# ---------------------------------------------------------------------------
+# Unit tests for the budget helper
+# ---------------------------------------------------------------------------
+
+def test_budget_uses_max_strip_end_plus_slack():
+    """Budget is ``max(offset + byte_count) + slack`` over the strip table."""
+    offsets = [1024, 5000, 100_000]
+    byte_counts = [512, 1024, 4096]
+    budget = _compute_full_image_byte_budget(offsets, byte_counts)
+    # Largest end is 100_000 + 4096 = 104_096
+    assert budget == 104_096 + _FULL_IMAGE_BUDGET_HEADER_SLACK
+
+
+def test_budget_empty_strip_table_falls_back_to_per_strip_cap():
+    """Empty / missing strip table falls back to the per-strip safety cap."""
+    budget = _compute_full_image_byte_budget(None, None)
+    assert budget > 0
+    budget_empty = _compute_full_image_byte_budget([], [])
+    assert budget_empty > 0
+
+
+def test_budget_all_sparse_falls_back_to_per_strip_cap():
+    """A strip table where every strip is sparse (byte_count=0 and
+    offset=0) is degenerate; the helper falls back rather than picking
+    a useless cap of zero."""
+    offsets = [0, 0, 0]
+    byte_counts = [0, 0, 0]
+    budget = _compute_full_image_byte_budget(offsets, byte_counts)
+    # Falls back to per-strip cap + slack, not 0.
+    assert budget > _FULL_IMAGE_BUDGET_HEADER_SLACK
+
+
+# ---------------------------------------------------------------------------
+# read_all with a byte budget
+# ---------------------------------------------------------------------------
+
+def test_read_all_no_budget_returns_full_body():
+    """Without ``max_bytes`` the legacy unbounded behaviour is preserved."""
+
+    class _Handler(_BaseHandler):
+        payload = b'A' * 1024
+
+        def do_GET(self):  # noqa: N802
+            self.send_response(200)
+            self.send_header('Content-Length', str(len(self.payload)))
+            self.end_headers()
+            self.wfile.write(self.payload)
+
+    url, httpd, _ = _serve(_Handler)
+    try:
+        src = _HTTPSource(url)
+        data = src.read_all()
+        assert data == b'A' * 1024
+    finally:
+        _stop(httpd)
+
+
+def test_read_all_rejects_oversized_content_length():
+    """Server advertises a Content-Length larger than the budget --
+    rejected up front via OSError before any body is read."""
+
+    class _Handler(_BaseHandler):
+        payload = b'B' * 2048
+
+        def do_GET(self):  # noqa: N802
+            self.send_response(200)
+            self.send_header('Content-Length', str(len(self.payload)))
+            self.end_headers()
+            self.wfile.write(self.payload)
+
+    url, httpd, _ = _serve(_Handler)
+    try:
+        src = _HTTPSource(url)
+        with pytest.raises(OSError, match="Content-Length"):
+            src.read_all(max_bytes=1024)
+    finally:
+        _stop(httpd)
+
+
+def test_read_all_truncates_when_server_lies_about_content_length_small():
+    """Server lies low: advertises a small Content-Length but sends a
+    much larger body. urllib3 trusts the advertised length and truncates
+    at the byte count the server declared, so the client is already
+    protected -- the extra bytes never reach Python memory. The cap is
+    irrelevant on this path because the body the caller sees never
+    exceeds the (truthful or lying) Content-Length. Lock in the
+    truncation behaviour so a future urllib3 / stdlib change does not
+    quietly turn this back into a vector."""
+
+    class _Handler(_BaseHandler):
+        # 100 KiB body, but advertised as 100 bytes.
+        big_body = b'L' * (100 * 1024)
+        lied_length = 100
+
+        def do_GET(self):  # noqa: N802
+            self.send_response(200)
+            self.send_header('Content-Length', str(self.lied_length))
+            self.end_headers()
+            self.wfile.write(self.big_body)
+
+    url, httpd, _ = _serve(_Handler)
+    try:
+        src = _HTTPSource(url)
+        # Budget is 1024 bytes, server says 100 -> pre-flight passes.
+        # The body returned is the 100 bytes the server claimed, not the
+        # 100 KiB it tried to send.
+        data = src.read_all(max_bytes=1024)
+        assert len(data) <= 100, (
+            f"Got {len(data)} bytes from a server that advertised 100; "
+            f"the HTTP client failed to truncate at Content-Length and "
+            f"the byte budget did not catch the over-shoot."
+        )
+    finally:
+        _stop(httpd)
+
+
+def test_read_all_catches_missing_content_length():
+    """Server omits Content-Length and uses chunked transfer encoding.
+    The pre-flight check has nothing to look at; the streaming cap must
+    still catch the over-sized body."""
+
+    class _Handler(_BaseHandler):
+        def do_GET(self):  # noqa: N802
+            body = b'C' * (100 * 1024)
+            self.send_response(200)
+            # No Content-Length header at all.
+            self.send_header('Transfer-Encoding', 'chunked')
+            self.end_headers()
+            # Send as a single chunk.
+            self.wfile.write(f'{len(body):x}\r\n'.encode('ascii'))
+            self.wfile.write(body)
+            self.wfile.write(b'\r\n0\r\n\r\n')
+
+    url, httpd, _ = _serve(_Handler)
+    try:
+        src = _HTTPSource(url)
+        with pytest.raises(OSError, match="exceeded the byte budget"):
+            src.read_all(max_bytes=1024)
+    finally:
+        _stop(httpd)
+
+
+def test_read_all_passes_when_body_fits_budget():
+    """Legitimate path: body equals the budget exactly, returns cleanly."""
+
+    class _Handler(_BaseHandler):
+        payload = b'D' * 1024
+
+        def do_GET(self):  # noqa: N802
+            self.send_response(200)
+            self.send_header('Content-Length', str(len(self.payload)))
+            self.end_headers()
+            self.wfile.write(self.payload)
+
+    url, httpd, _ = _serve(_Handler)
+    try:
+        src = _HTTPSource(url)
+        data = src.read_all(max_bytes=2048)
+        assert data == b'D' * 1024
+    finally:
+        _stop(httpd)
+
+
+# ---------------------------------------------------------------------------
+# End-to-end COG read
+# ---------------------------------------------------------------------------
+
+class _RangeHandler(_BaseHandler):
+    """Honours Range requests; serves the full body on a no-Range GET."""
+
+    def do_GET(self):  # noqa: N802
+        rng = self.headers.get('Range')
+        if rng and rng.startswith('bytes='):
+            spec = rng[len('bytes='):]
+            start_s, _, end_s = spec.partition('-')
+            start = int(start_s)
+            end = int(end_s) if end_s else len(self.payload) - 1
+            chunk = self.payload[start:end + 1]
+            self.send_response(206)
+            self.send_header(
+                'Content-Range',
+                f'bytes {start}-{start + len(chunk) - 1}/{len(self.payload)}',
+            )
+            self.send_header('Content-Length', str(len(chunk)))
+            self.end_headers()
+            self.wfile.write(chunk)
+            return
+        self.send_response(200)
+        self.send_header('Content-Length', str(len(self.payload)))
+        self.end_headers()
+        self.wfile.write(self.payload)
+
+
+def _serve_payload(payload: bytes):
+    handler_cls = type(
+        'BoundRangeHandler', (_RangeHandler,), {'payload': payload}
+    )
+    return _serve(handler_cls)
+
+
+def test_full_image_http_read_still_works_for_legitimate_cog(tmp_path):
+    """Sanity: with the cap in place, a normal stripped COG still reads
+    cleanly end-to-end. The strip-table-derived budget is loose enough
+    to cover the real on-wire body."""
+    arr = np.arange(64 * 64, dtype=np.float32).reshape(64, 64)
+    path = str(tmp_path / 'legit_2051.tif')
+    # Stripped (not tiled) to exercise the strips path. ``cog=True``
+    # writes COG-friendly tag ordering but stripped layout is the
+    # default for non-tiled writes.
+    write(arr, path, compression='deflate', tiled=False)
+
+    with open(path, 'rb') as f:
+        payload = f.read()
+
+    url, httpd, _ = _serve_payload(payload)
+    try:
+        result, _geo = _read_cog_http(url)
+        np.testing.assert_array_equal(result, arr)
+    finally:
+        _stop(httpd)
+
+
+def test_full_image_http_read_rejects_padded_body(tmp_path):
+    """Attack scenario: a legitimate TIFF header is followed by gigabytes
+    of garbage. The strip-table-derived budget rejects the body before
+    it can be allocated."""
+    arr = np.arange(32 * 32, dtype=np.float32).reshape(32, 32)
+    path = str(tmp_path / 'padded_2051.tif')
+    write(arr, path, compression='deflate', tiled=False)
+
+    with open(path, 'rb') as f:
+        legit_payload = f.read()
+
+    # Append 64 MiB of zeros to the body. The strip table only covers
+    # the first len(legit_payload) bytes; anything past max(offset +
+    # byte_count) + slack is over-budget.
+    bloated = legit_payload + (b'\x00' * (64 * 1024 * 1024))
+
+    url, httpd, _ = _serve_payload(bloated)
+    try:
+        with pytest.raises(OSError, match="Content-Length|byte budget"):
+            _read_cog_http(url)
+    finally:
+        _stop(httpd)

From a034fac7e9886f9a5c1889a76083e42ba7b505e1 Mon Sep 17 00:00:00 2001
From: Brendan Collins <brendancol@gmail.com>
Date: Mon, 18 May 2026 11:29:09 -0700
Subject: [PATCH 2/3] geotiff: address review feedback on read_all byte-budget
 (#2051)

- Stdlib fallback: add three tests that force ``self._pool = None`` so
  the urllib.request branch in ``read_all`` is actually exercised. One
  for the Content-Length pre-flight check, one for the streaming cap
  on chunked bodies, and one for the legitimate-body pass-through.
- Redirect on the streaming path: drain the redirect response body
  before ``release_conn`` so urllib3 can return the connection to the
  pool instead of closing it. 3xx bodies are tiny so the drain is
  cheap.
- Doc tweak in ``_check_content_length``: spell out that returning
  silently on missing / unparseable headers is intentional because
  the streaming cap is the real defence.
- Test docstring fix: the padded-body test rejects before the body
  is fully buffered, not before it is allocated at all.
---
 xrspatial/geotiff/_reader.py                  | 15 +++-
 .../tests/test_http_read_all_bounded_2051.py  | 82 ++++++++++++++++++-
 2 files changed, 93 insertions(+), 4 deletions(-)

diff --git a/xrspatial/geotiff/_reader.py b/xrspatial/geotiff/_reader.py
index d1defeb8f..ca05daef7 100644
--- a/xrspatial/geotiff/_reader.py
+++ b/xrspatial/geotiff/_reader.py
@@ -981,8 +981,15 @@ def _request(self, headers: dict | None = None,
                 # (``preload_content=False``, used by ``read_all`` with a
                 # byte budget) leaves the connection borrowed -- if we
                 # do not release it here, subsequent hops will allocate
-                # fresh connections every time.
+                # fresh connections every time. Drain first so urllib3
+                # can return the connection to the pool instead of
+                # closing it; a 3xx body is bounded by Content-Length so
+                # the drain is cheap.
                 if not preload_content:
+                    try:
+                        resp.drain_conn()
+                    except Exception:  # noqa: BLE001
+                        pass
                     try:
                         resp.release_conn()
                     except Exception:  # noqa: BLE001
@@ -1246,6 +1253,12 @@ def _check_content_length(headers, max_bytes: int) -> None:
 
         This is the cheap pre-flight check; we still cap the actual read
         below in case the server omits the header or lies about it.
+
+        Missing or unparseable ``Content-Length`` returns silently --
+        the streaming cap in :meth:`_read_capped_urllib3` /
+        :meth:`_read_capped_stdlib` is the real defence and will catch
+        an over-sized body whether the header was honest, dishonest, or
+        absent.
         """
         raw = None
         try:
diff --git a/xrspatial/geotiff/tests/test_http_read_all_bounded_2051.py b/xrspatial/geotiff/tests/test_http_read_all_bounded_2051.py
index ccc313f9a..42c23b522 100644
--- a/xrspatial/geotiff/tests/test_http_read_all_bounded_2051.py
+++ b/xrspatial/geotiff/tests/test_http_read_all_bounded_2051.py
@@ -232,6 +232,81 @@ def do_GET(self):  # noqa: N802
         _stop(httpd)
 
 
+# ---------------------------------------------------------------------------
+# Stdlib fallback (urllib3 unavailable)
+# ---------------------------------------------------------------------------
+
+def test_read_all_stdlib_fallback_rejects_oversized_content_length():
+    """When urllib3 is not in play, ``read_all`` drops to stdlib
+    ``urllib.request``. The Content-Length pre-flight check must still
+    fire on that path."""
+
+    class _Handler(_BaseHandler):
+        payload = b'E' * 2048
+
+        def do_GET(self):  # noqa: N802
+            self.send_response(200)
+            self.send_header('Content-Length', str(len(self.payload)))
+            self.end_headers()
+            self.wfile.write(self.payload)
+
+    url, httpd, _ = _serve(_Handler)
+    try:
+        src = _HTTPSource(url)
+        # Force the stdlib branch by dropping the pool reference.
+        src._pool = None
+        with pytest.raises(OSError, match="Content-Length"):
+            src.read_all(max_bytes=1024)
+    finally:
+        _stop(httpd)
+
+
+def test_read_all_stdlib_fallback_catches_missing_content_length():
+    """Stdlib path with chunked-encoded body must also cap on the
+    streamed bytes."""
+
+    class _Handler(_BaseHandler):
+        def do_GET(self):  # noqa: N802
+            body = b'F' * (100 * 1024)
+            self.send_response(200)
+            self.send_header('Transfer-Encoding', 'chunked')
+            self.end_headers()
+            self.wfile.write(f'{len(body):x}\r\n'.encode('ascii'))
+            self.wfile.write(body)
+            self.wfile.write(b'\r\n0\r\n\r\n')
+
+    url, httpd, _ = _serve(_Handler)
+    try:
+        src = _HTTPSource(url)
+        src._pool = None
+        with pytest.raises(OSError, match="exceeded the byte budget"):
+            src.read_all(max_bytes=1024)
+    finally:
+        _stop(httpd)
+
+
+def test_read_all_stdlib_fallback_passes_when_body_fits():
+    """Stdlib path returns the body cleanly when it fits the budget."""
+
+    class _Handler(_BaseHandler):
+        payload = b'G' * 1024
+
+        def do_GET(self):  # noqa: N802
+            self.send_response(200)
+            self.send_header('Content-Length', str(len(self.payload)))
+            self.end_headers()
+            self.wfile.write(self.payload)
+
+    url, httpd, _ = _serve(_Handler)
+    try:
+        src = _HTTPSource(url)
+        src._pool = None
+        data = src.read_all(max_bytes=2048)
+        assert data == b'G' * 1024
+    finally:
+        _stop(httpd)
+
+
 # ---------------------------------------------------------------------------
 # End-to-end COG read
 # ---------------------------------------------------------------------------
@@ -292,9 +367,10 @@ def test_full_image_http_read_still_works_for_legitimate_cog(tmp_path):
 
 
 def test_full_image_http_read_rejects_padded_body(tmp_path):
-    """Attack scenario: a legitimate TIFF header is followed by gigabytes
-    of garbage. The strip-table-derived budget rejects the body before
-    it can be allocated."""
+    """Attack scenario: a legitimate TIFF header is followed by extra
+    garbage past what the strip table accounts for. The
+    strip-table-derived budget rejects the body before it is fully
+    buffered into memory."""
     arr = np.arange(32 * 32, dtype=np.float32).reshape(32, 32)
     path = str(tmp_path / 'padded_2051.tif')
     write(arr, path, compression='deflate', tiled=False)

From 8cd5172c5b4153ddb993bbcf88cfc766844021b4 Mon Sep 17 00:00:00 2001
From: Brendan Collins <brendancol@gmail.com>
Date: Mon, 18 May 2026 13:34:52 -0700
Subject: [PATCH 3/3] geotiff: drop stdlib-fallback tests now that urllib3 is
 required (#2051)

#2050 / #2055 dropped the stdlib ``urllib.request`` HTTP fallback
and made urllib3 a hard dependency. The three
``test_read_all_stdlib_fallback_*`` cells that exercised the
fallback's byte-budget enforcement no longer have a code path to
hit; the urllib3-only equivalents above keep the contract covered.
---
 .../tests/test_http_read_all_bounded_2051.py  | 77 ++-----------------
 1 file changed, 8 insertions(+), 69 deletions(-)

diff --git a/xrspatial/geotiff/tests/test_http_read_all_bounded_2051.py b/xrspatial/geotiff/tests/test_http_read_all_bounded_2051.py
index 42c23b522..379bc8d82 100644
--- a/xrspatial/geotiff/tests/test_http_read_all_bounded_2051.py
+++ b/xrspatial/geotiff/tests/test_http_read_all_bounded_2051.py
@@ -236,75 +236,14 @@ def do_GET(self):  # noqa: N802
 # Stdlib fallback (urllib3 unavailable)
 # ---------------------------------------------------------------------------
 
-def test_read_all_stdlib_fallback_rejects_oversized_content_length():
-    """When urllib3 is not in play, ``read_all`` drops to stdlib
-    ``urllib.request``. The Content-Length pre-flight check must still
-    fire on that path."""
-
-    class _Handler(_BaseHandler):
-        payload = b'E' * 2048
-
-        def do_GET(self):  # noqa: N802
-            self.send_response(200)
-            self.send_header('Content-Length', str(len(self.payload)))
-            self.end_headers()
-            self.wfile.write(self.payload)
-
-    url, httpd, _ = _serve(_Handler)
-    try:
-        src = _HTTPSource(url)
-        # Force the stdlib branch by dropping the pool reference.
-        src._pool = None
-        with pytest.raises(OSError, match="Content-Length"):
-            src.read_all(max_bytes=1024)
-    finally:
-        _stop(httpd)
-
-
-def test_read_all_stdlib_fallback_catches_missing_content_length():
-    """Stdlib path with chunked-encoded body must also cap on the
-    streamed bytes."""
-
-    class _Handler(_BaseHandler):
-        def do_GET(self):  # noqa: N802
-            body = b'F' * (100 * 1024)
-            self.send_response(200)
-            self.send_header('Transfer-Encoding', 'chunked')
-            self.end_headers()
-            self.wfile.write(f'{len(body):x}\r\n'.encode('ascii'))
-            self.wfile.write(body)
-            self.wfile.write(b'\r\n0\r\n\r\n')
-
-    url, httpd, _ = _serve(_Handler)
-    try:
-        src = _HTTPSource(url)
-        src._pool = None
-        with pytest.raises(OSError, match="exceeded the byte budget"):
-            src.read_all(max_bytes=1024)
-    finally:
-        _stop(httpd)
-
-
-def test_read_all_stdlib_fallback_passes_when_body_fits():
-    """Stdlib path returns the body cleanly when it fits the budget."""
-
-    class _Handler(_BaseHandler):
-        payload = b'G' * 1024
-
-        def do_GET(self):  # noqa: N802
-            self.send_response(200)
-            self.send_header('Content-Length', str(len(self.payload)))
-            self.end_headers()
-            self.wfile.write(self.payload)
-
-    url, httpd, _ = _serve(_Handler)
-    try:
-        src = _HTTPSource(url)
-        src._pool = None
-        data = src.read_all(max_bytes=2048)
-        assert data == b'G' * 1024
-    finally:
-        _stop(httpd)
+# The stdlib ``urllib.request`` fallback path was removed in #2050 /
+# #2055 (urllib3 is now a hard dependency). The three tests that
+# previously covered the fallback's byte-budget enforcement no longer
+# have a code path to exercise; the urllib3-only equivalents above
+# (test_read_all_rejects_oversized_content_length,
+# test_read_all_catches_missing_content_length,
+# test_read_all_passes_when_body_fits_budget) keep the contract
+# covered.
 
 
 # ---------------------------------------------------------------------------