From 5a6a527081277759acad358713c35bbd336b0dc3 Mon Sep 17 00:00:00 2001 From: Fokko Driesprong Date: Tue, 30 Apr 2024 15:59:03 +0200 Subject: [PATCH] Bug: Take signed bit into account ```python >>> (32768).bit_length() 16 ``` From https://docs.python.org/3/library/stdtypes.html#int.bit_length > Return the number of bits necessary to represent an integer in binary, excluding the sign and leading zeros Fixes #669 --- pyiceberg/utils/decimal.py | 4 ++-- tests/utils/test_decimal.py | 11 ++++++++++- 2 files changed, 12 insertions(+), 3 deletions(-) diff --git a/pyiceberg/utils/decimal.py b/pyiceberg/utils/decimal.py index c2125d6734..4432564dd1 100644 --- a/pyiceberg/utils/decimal.py +++ b/pyiceberg/utils/decimal.py @@ -59,9 +59,9 @@ def bytes_required(value: Union[int, Decimal]) -> int: int: the minimum number of bytes needed to serialize the value. """ if isinstance(value, int): - return (value.bit_length() + 7) // 8 + return (value.bit_length() + 8) // 8 elif isinstance(value, Decimal): - return (decimal_to_unscaled(value).bit_length() + 7) // 8 + return (decimal_to_unscaled(value).bit_length() + 8) // 8 raise ValueError(f"Unsupported value: {value}") diff --git a/tests/utils/test_decimal.py b/tests/utils/test_decimal.py index 683eab93fd..419cf05916 100644 --- a/tests/utils/test_decimal.py +++ b/tests/utils/test_decimal.py @@ -14,9 +14,11 @@ # KIND, either express or implied. See the License for the # specific language governing permissions and limitations # under the License. +from decimal import Decimal + import pytest -from pyiceberg.utils.decimal import decimal_required_bytes +from pyiceberg.utils.decimal import decimal_required_bytes, decimal_to_bytes def test_decimal_required_bytes() -> None: @@ -38,3 +40,10 @@ def test_decimal_required_bytes() -> None: with pytest.raises(ValueError) as exc_info: decimal_required_bytes(precision=-1) assert "(0, 40]" in str(exc_info.value) + + +def test_decimal_to_bytes() -> None: + # Check the boundary between 2 and 3 bytes. + # 2 bytes has a minimum of -32,768 and a maximum value of 32,767 (inclusive). + assert decimal_to_bytes(Decimal('32767.')) == b'\x7f\xff' + assert decimal_to_bytes(Decimal('32768.')) == b'\x00\x80\x00'