From ebf3a0295d331aaf29765ad02380d02426171921 Mon Sep 17 00:00:00 2001 From: Brendan Collins Date: Mon, 18 May 2026 14:27:00 -0700 Subject: [PATCH 1/2] geotiff: jit packbits_compress (#2049) PackBits encode was the last writer-side codec helper still running in pure Python. The kernel matches the shape of the existing lzw_compress path: an @ngjit function operates on preallocated uint8 buffers, and a thin bytes-in / bytes-out wrapper takes care of the conversion. Output sizing uses the TIFF spec bound (2 * src_len + 1). On 1 MB strips the JIT version runs at 0.6-1.7 ms/call versus 30-49 ms/call for the previous Python loop, a 30-50x speedup. --- xrspatial/geotiff/_compression.py | 78 +++++++++--- .../geotiff/tests/test_packbits_jit_2049.py | 113 ++++++++++++++++++ 2 files changed, 173 insertions(+), 18 deletions(-) create mode 100644 xrspatial/geotiff/tests/test_packbits_jit_2049.py diff --git a/xrspatial/geotiff/_compression.py b/xrspatial/geotiff/_compression.py index 718189fac..ddc14b302 100644 --- a/xrspatial/geotiff/_compression.py +++ b/xrspatial/geotiff/_compression.py @@ -1018,37 +1018,79 @@ def packbits_decompress(data: bytes, expected_size: int = 0) -> bytes: return bytes(out) -def packbits_compress(data: bytes) -> bytes: - """Compress data using PackBits.""" - src = data if isinstance(data, (bytes, bytearray)) else bytes(data) - out = bytearray() +@ngjit +def _packbits_encode_kernel(src, src_len, dst, dst_cap): + """Encode src bytes as PackBits into dst, return number of bytes written. + + Numba cannot grow a ``bytearray``; the caller pre-allocates ``dst`` with + the worst-case size (``2 * src_len + 1``) and slices to the returned + length. The worst case is one literal-header byte for every input byte + (a degenerate stream where no byte equals its neighbour fills a fresh + 1-byte literal header every two payload bytes). + """ i = 0 - length = len(src) - while i < length: - # Check for a run of identical bytes + out_pos = 0 + while i < src_len: + # Scan for a run of identical bytes, capped at 128 j = i + 1 - while j < length and j - i < 128 and src[j] == src[i]: + while j < src_len and j - i < 128 and src[j] == src[i]: j += 1 run_len = j - i if run_len >= 3: - # Encode as run - out.append((256 - (run_len - 1)) & 0xFF) - out.append(src[i]) + # Encode as run: header byte is the signed value 1 - run_len + # stored as unsigned. (256 - (run_len - 1)) gives the same bit + # pattern as a signed int8 of value 1 - run_len. + if out_pos < dst_cap: + dst[out_pos] = np.uint8((256 - (run_len - 1)) & 0xFF) + out_pos += 1 + if out_pos < dst_cap: + dst[out_pos] = src[i] + out_pos += 1 i = j else: - # Literal run: accumulate non-repeating bytes + # Literal run: accumulate bytes that are not the start of a + # 3-byte run, capped at 128. lit_start = i i = j - while i < length and i - lit_start < 128: - # Check if a run starts here - if i + 2 < length and src[i] == src[i + 1] == src[i + 2]: + while i < src_len and i - lit_start < 128: + if i + 2 < src_len and src[i] == src[i + 1] and src[i + 1] == src[i + 2]: break i += 1 lit_len = i - lit_start - out.append(lit_len - 1) - out.extend(src[lit_start:lit_start + lit_len]) - return bytes(out) + if out_pos < dst_cap: + dst[out_pos] = np.uint8(lit_len - 1) + out_pos += 1 + k = 0 + while k < lit_len and out_pos < dst_cap: + dst[out_pos] = src[lit_start + k] + out_pos += 1 + k += 1 + return out_pos + + +def packbits_compress(data: bytes) -> bytes: + """Compress data using PackBits. + + Parameters + ---------- + data : bytes + Raw data to compress. + + Returns + ------- + bytes + """ + src = np.frombuffer(data, dtype=np.uint8) + src_len = len(src) + if src_len == 0: + return b'' + # Worst case: every byte becomes a (header, byte) pair; +1 covers the + # trailing header byte if encoding ends on a fresh literal of length 1. + dst_cap = 2 * src_len + 1 + dst = np.empty(dst_cap, dtype=np.uint8) + n = _packbits_encode_kernel(src, src_len, dst, dst_cap) + return dst[:n].tobytes() # -- JPEG codec (via Pillow) -------------------------------------------------- diff --git a/xrspatial/geotiff/tests/test_packbits_jit_2049.py b/xrspatial/geotiff/tests/test_packbits_jit_2049.py new file mode 100644 index 000000000..61cb41692 --- /dev/null +++ b/xrspatial/geotiff/tests/test_packbits_jit_2049.py @@ -0,0 +1,113 @@ +"""JIT'd PackBits encoder: round-trip and edge-case coverage. + +See issue #2049. ``packbits_compress`` was pure-Python; this file pins the +contract of the ``@ngjit`` rewrite against the decoder. +""" +from __future__ import annotations + +import numpy as np +import pytest + +from xrspatial.geotiff._compression import ( + packbits_compress, + packbits_decompress, +) +from xrspatial.geotiff._compression import _packbits_encode_kernel + + +def _roundtrip(data: bytes) -> None: + assert packbits_decompress(packbits_compress(data)) == data + + +class TestPackBitsJITRoundTrip: + """Encode-decode parity across the regime boundaries of PackBits.""" + + def test_empty(self): + _roundtrip(b'') + + def test_length_one(self): + _roundtrip(b'\x42') + + def test_length_two_same(self): + _roundtrip(b'\xAA\xAA') + + def test_length_two_different(self): + _roundtrip(b'\x00\xFF') + + def test_length_128_all_same(self): + # Hits the inner run cap exactly. + _roundtrip(b'\x55' * 128) + + def test_length_129_all_same(self): + # Forces a second header byte after the 128-byte run cap. + _roundtrip(b'\x55' * 129) + + def test_length_128_alternating(self): + # Hits the literal cap exactly; no run ever forms. + _roundtrip(bytes([i & 1 for i in range(128)])) + + def test_length_129_alternating(self): + # Forces a second literal header after the 128-byte literal cap. + _roundtrip(bytes([i & 1 for i in range(129)])) + + def test_alternating_short(self): + _roundtrip(b'\x00\xFF\x00\xFF\x00\xFF') + + def test_run_of_three_at_end(self): + # Boundary between literal scan and run detection. + _roundtrip(b'\x01\x02\x03\xAA\xAA\xAA') + + def test_run_of_three_at_start(self): + _roundtrip(b'\xAA\xAA\xAA\x01\x02\x03') + + def test_runs_and_literals_interleaved(self): + data = b'\x00' * 100 + b'\xFF' * 50 + bytes(range(200)) + _roundtrip(data) + + @pytest.mark.parametrize("seed", [0, 1, 42, 12345]) + def test_random_bytes(self, seed): + rng = np.random.default_rng(seed) + data = rng.integers(0, 256, size=1024, dtype=np.uint8).tobytes() + _roundtrip(data) + + def test_random_with_runs(self): + # Mix runs of varying length with random literals. + rng = np.random.default_rng(7) + chunks = [] + for _ in range(20): + if rng.random() < 0.5: + val = int(rng.integers(0, 256)) + length = int(rng.integers(1, 200)) + chunks.append(bytes([val]) * length) + else: + length = int(rng.integers(1, 200)) + chunks.append(rng.integers(0, 256, size=length, dtype=np.uint8).tobytes()) + _roundtrip(b''.join(chunks)) + + def test_all_zeros_large(self): + data = b'\x00' * 10_000 + compressed = packbits_compress(data) + # 10_000 bytes at run-cap 128 -> ceil(10_000 / 128) = 79 runs, + # each 2 bytes => 158 bytes total. + assert len(compressed) < len(data) // 50 + assert packbits_decompress(compressed) == data + + +class TestPackBitsJITKernel: + """The kernel itself is callable; sanity-check buffer mechanics.""" + + def test_kernel_returns_length(self): + src = np.array([1, 1, 1, 1, 1], dtype=np.uint8) + dst = np.empty(2 * len(src) + 1, dtype=np.uint8) + n = _packbits_encode_kernel(src, len(src), dst, len(dst)) + # 5 identical bytes -> one run: 2 bytes (header + value) + assert n == 2 + # Header is the signed int8 (1 - 5) = -4, stored as 256 - 4 = 252 + assert dst[0] == 252 + assert dst[1] == 1 + + def test_kernel_empty_input(self): + src = np.empty(0, dtype=np.uint8) + dst = np.empty(1, dtype=np.uint8) + n = _packbits_encode_kernel(src, 0, dst, 1) + assert n == 0 From 56d508afe52f8f12b5a1d443b2523878ae69c9de Mon Sep 17 00:00:00 2001 From: Brendan Collins Date: Mon, 18 May 2026 19:47:21 -0700 Subject: [PATCH 2/2] geotiff: tighten packbits encoder bound comment, add cap tests (#2049) Self-review follow-up to #2081. The original kernel docstring described the worst case as "one literal header per input byte", which is not actually achievable: the encoder packs up to 128 bytes under a single literal header, so the tight upper bound is src_len + ceil(src_len / 128) + 1. The 2 * src_len + 1 allocation in the wrapper is still a safe overestimate; the comments now say so explicitly. Adds three tests: a golden literal-encoding check on the raw kernel, a parametrized buffer-cap invariant over the regime-boundary inputs, and a random sweep that re-checks the cap on 0..4096-byte streams. --- xrspatial/geotiff/_compression.py | 15 ++++--- .../geotiff/tests/test_packbits_jit_2049.py | 40 +++++++++++++++++++ 2 files changed, 49 insertions(+), 6 deletions(-) diff --git a/xrspatial/geotiff/_compression.py b/xrspatial/geotiff/_compression.py index ddc14b302..3677327ad 100644 --- a/xrspatial/geotiff/_compression.py +++ b/xrspatial/geotiff/_compression.py @@ -1023,10 +1023,12 @@ def _packbits_encode_kernel(src, src_len, dst, dst_cap): """Encode src bytes as PackBits into dst, return number of bytes written. Numba cannot grow a ``bytearray``; the caller pre-allocates ``dst`` with - the worst-case size (``2 * src_len + 1``) and slices to the returned - length. The worst case is one literal-header byte for every input byte - (a degenerate stream where no byte equals its neighbour fills a fresh - 1-byte literal header every two payload bytes). + a worst-case size and slices to the returned length. PackBits encodes + each block as ``1 + L`` bytes for a literal of length L in [1, 128] and + ``2`` bytes for a run of length L in [3, 128], so the tightest upper + bound on output is ``src_len + ceil(src_len / 128) + 1`` (pure literal + input, plus one byte of slack for the final block). The caller uses + the looser, simpler bound ``2 * src_len + 1``, which is always safe. """ i = 0 out_pos = 0 @@ -1085,8 +1087,9 @@ def packbits_compress(data: bytes) -> bytes: src_len = len(src) if src_len == 0: return b'' - # Worst case: every byte becomes a (header, byte) pair; +1 covers the - # trailing header byte if encoding ends on a fresh literal of length 1. + # ``2 * src_len + 1`` is a safe overestimate of the tight PackBits bound + # ``src_len + ceil(src_len / 128) + 1``. The overhead is negligible at + # strip sizes and keeps the arithmetic obvious. dst_cap = 2 * src_len + 1 dst = np.empty(dst_cap, dtype=np.uint8) n = _packbits_encode_kernel(src, src_len, dst, dst_cap) diff --git a/xrspatial/geotiff/tests/test_packbits_jit_2049.py b/xrspatial/geotiff/tests/test_packbits_jit_2049.py index 61cb41692..396a200fb 100644 --- a/xrspatial/geotiff/tests/test_packbits_jit_2049.py +++ b/xrspatial/geotiff/tests/test_packbits_jit_2049.py @@ -111,3 +111,43 @@ def test_kernel_empty_input(self): dst = np.empty(1, dtype=np.uint8) n = _packbits_encode_kernel(src, 0, dst, 1) assert n == 0 + + def test_kernel_literal_golden(self): + # Three distinct bytes encode as a single literal header (lit_len-1=2) + # followed by the payload. + src = np.array([0x10, 0x20, 0x30], dtype=np.uint8) + dst = np.empty(2 * len(src) + 1, dtype=np.uint8) + n = _packbits_encode_kernel(src, len(src), dst, len(dst)) + assert n == 4 + assert dst[0] == 2 + assert list(dst[1:4]) == [0x10, 0x20, 0x30] + + +class TestPackBitsJITBufferCap: + """Output must always fit inside the worst-case allocation.""" + + @pytest.mark.parametrize( + "data", + [ + b'', + b'\x00', + b'\x00\xFF', + b'\x55' * 128, + b'\x55' * 129, + bytes([i & 1 for i in range(256)]), + bytes(range(256)), + ], + ) + def test_output_within_cap(self, data): + compressed = packbits_compress(data) + # The wrapper allocates 2 * src_len + 1 bytes for the encode buffer; + # the actual output must never exceed that bound. + assert len(compressed) <= 2 * len(data) + 1 + + def test_random_output_within_cap(self): + rng = np.random.default_rng(2049) + for _ in range(8): + length = int(rng.integers(0, 4096)) + data = rng.integers(0, 256, size=length, dtype=np.uint8).tobytes() + compressed = packbits_compress(data) + assert len(compressed) <= 2 * length + 1