diff --git a/Doc/library/stdtypes.rst b/Doc/library/stdtypes.rst index 97e7e08364e0bd..c539345e598777 100644 --- a/Doc/library/stdtypes.rst +++ b/Doc/library/stdtypes.rst @@ -3173,6 +3173,30 @@ objects. .. versionadded:: 3.14 + .. method:: take_bytes(n=None, /) + + Remove the first *n* bytes from the bytearray and return them as an immutable + :class:`bytes`. + By default (if *n* is ``None``), return all bytes and clear the bytearray. + + If *n* is negative, index from the end and take the first :func:`len` + plus *n* bytes. If *n* is out of bounds, raise :exc:`IndexError`. + + Taking less than the full length will leave remaining bytes in the + :class:`bytearray`, which requires a copy. If the remaining bytes should be + discarded, use :func:`~bytearray.resize` or :keyword:`del` to truncate + then :func:`~bytearray.take_bytes` without a size. + + .. impl-detail:: + + Taking all bytes is a zero-copy operation. + + .. versionadded:: next + + See the :ref:`What's New ` entry for + common code patterns which can be optimized with + :func:`bytearray.take_bytes`. + Since bytearray objects are sequences of integers (akin to a list), for a bytearray object *b*, ``b[0]`` will be an integer, while ``b[0:1]`` will be a bytearray object of length 1. (This contrasts with text strings, where diff --git a/Doc/whatsnew/3.15.rst b/Doc/whatsnew/3.15.rst index 3cb766978a7217..d7c9a41eeb2759 100644 --- a/Doc/whatsnew/3.15.rst +++ b/Doc/whatsnew/3.15.rst @@ -307,6 +307,86 @@ Other language changes not only integers or floats, although this does not improve precision. (Contributed by Serhiy Storchaka in :gh:`67795`.) +.. _whatsnew315-bytearray-take-bytes: + +* Added :meth:`bytearray.take_bytes(n=None, /) ` to take + bytes out of a :class:`bytearray` without copying. This enables optimizing code + which must return :class:`bytes` after working with a mutable buffer of bytes + such as data buffering, network protocol parsing, encoding, decoding, + and compression. Common code patterns which can be optimized with + :func:`~bytearray.take_bytes` are listed below. + + (Contributed by Cody Maloney in :gh:`139871`.) + + .. list-table:: Suggested Optimizing Refactors + :header-rows: 1 + + * - Description + - Old + - New + + * - Return :class:`bytes` after working with :class:`bytearray` + - .. code:: python + + def read() -> bytes: + buffer = bytearray(1024) + ... + return bytes(buffer) + + - .. code:: python + + def read() -> bytes: + buffer = bytearray(1024) + ... + return buffer.take_bytes() + + * - Empty a buffer getting the bytes + - .. code:: python + + buffer = bytearray(1024) + ... + data = bytes(buffer) + buffer.clear() + + - .. code:: python + + buffer = bytearray(1024) + ... + data = buffer.take_bytes() + + * - Split a buffer at a specific separator + - .. code:: python + + buffer = bytearray(b'abc\ndef') + n = buffer.find(b'\n') + data = bytes(buffer[:n + 1]) + del buffer[:n + 1] + assert data == b'abc' + assert buffer == bytearray(b'def') + + - .. code:: python + + buffer = bytearray(b'abc\ndef') + n = buffer.find(b'\n') + data = buffer.take_bytes(n + 1) + + * - Split a buffer at a specific separator; discard after the separator + - .. code:: python + + buffer = bytearray(b'abc\ndef') + n = buffer.find(b'\n') + data = bytes(buffer[:n]) + buffer.clear() + assert data == b'abc' + assert len(buffer) == 0 + + - .. code:: python + + buffer = bytearray(b'abc\ndef') + n = buffer.find(b'\n') + buffer.resize(n) + data = buffer.take_bytes() + * Many functions related to compiling or parsing Python code, such as :func:`compile`, :func:`ast.parse`, :func:`symtable.symtable`, and :func:`importlib.abc.InspectLoader.source_to_code`, now allow to pass diff --git a/Include/cpython/bytearrayobject.h b/Include/cpython/bytearrayobject.h index 4dddef713ce097..1edd082074206c 100644 --- a/Include/cpython/bytearrayobject.h +++ b/Include/cpython/bytearrayobject.h @@ -5,25 +5,25 @@ /* Object layout */ typedef struct { PyObject_VAR_HEAD - Py_ssize_t ob_alloc; /* How many bytes allocated in ob_bytes */ + /* How many bytes allocated in ob_bytes + + In the current implementation this is equivalent to Py_SIZE(ob_bytes_object). + The value is always loaded and stored atomically for thread safety. + There are API compatibilty concerns with removing so keeping for now. */ + Py_ssize_t ob_alloc; char *ob_bytes; /* Physical backing buffer */ char *ob_start; /* Logical start inside ob_bytes */ Py_ssize_t ob_exports; /* How many buffer exports */ + PyObject *ob_bytes_object; /* PyBytes for zero-copy bytes conversion */ } PyByteArrayObject; -PyAPI_DATA(char) _PyByteArray_empty_string[]; - /* Macros and static inline functions, trading safety for speed */ #define _PyByteArray_CAST(op) \ (assert(PyByteArray_Check(op)), _Py_CAST(PyByteArrayObject*, op)) static inline char* PyByteArray_AS_STRING(PyObject *op) { - PyByteArrayObject *self = _PyByteArray_CAST(op); - if (Py_SIZE(self)) { - return self->ob_start; - } - return _PyByteArray_empty_string; + return _PyByteArray_CAST(op)->ob_start; } #define PyByteArray_AS_STRING(self) PyByteArray_AS_STRING(_PyObject_CAST(self)) diff --git a/Include/internal/pycore_bytesobject.h b/Include/internal/pycore_bytesobject.h index c7bc53b6073770..8e8fa696ee0350 100644 --- a/Include/internal/pycore_bytesobject.h +++ b/Include/internal/pycore_bytesobject.h @@ -60,6 +60,14 @@ PyAPI_FUNC(void) _PyBytes_Repeat(char* dest, Py_ssize_t len_dest, const char* src, Py_ssize_t len_src); +/* _PyBytesObject_SIZE gives the basic size of a bytes object; any memory allocation + for a bytes object of length n should request PyBytesObject_SIZE + n bytes. + + Using _PyBytesObject_SIZE instead of sizeof(PyBytesObject) saves + 3 or 7 bytes per bytes object allocation on a typical system. +*/ +#define _PyBytesObject_SIZE (offsetof(PyBytesObject, ob_sval) + 1) + /* --- PyBytesWriter ------------------------------------------------------ */ struct PyBytesWriter { diff --git a/Lib/test/test_bytes.py b/Lib/test/test_bytes.py index e012042159d223..86898bfcab9135 100644 --- a/Lib/test/test_bytes.py +++ b/Lib/test/test_bytes.py @@ -1397,6 +1397,16 @@ def test_clear(self): b.append(ord('p')) self.assertEqual(b, b'p') + # Cleared object should be empty. + b = bytearray(b'abc') + b.clear() + self.assertEqual(b.__alloc__(), 0) + base_size = sys.getsizeof(bytearray()) + self.assertEqual(sys.getsizeof(b), base_size) + c = b.copy() + self.assertEqual(c.__alloc__(), 0) + self.assertEqual(sys.getsizeof(c), base_size) + def test_copy(self): b = bytearray(b'abc') bb = b.copy() @@ -1458,6 +1468,61 @@ def test_resize(self): self.assertRaises(MemoryError, bytearray().resize, sys.maxsize) self.assertRaises(MemoryError, bytearray(1000).resize, sys.maxsize) + def test_take_bytes(self): + ba = bytearray(b'ab') + self.assertEqual(ba.take_bytes(), b'ab') + self.assertEqual(len(ba), 0) + self.assertEqual(ba, bytearray(b'')) + self.assertEqual(ba.__alloc__(), 0) + base_size = sys.getsizeof(bytearray()) + self.assertEqual(sys.getsizeof(ba), base_size) + + # Positive and negative slicing. + ba = bytearray(b'abcdef') + self.assertEqual(ba.take_bytes(1), b'a') + self.assertEqual(ba, bytearray(b'bcdef')) + self.assertEqual(len(ba), 5) + self.assertEqual(ba.take_bytes(-5), b'') + self.assertEqual(ba, bytearray(b'bcdef')) + self.assertEqual(len(ba), 5) + self.assertEqual(ba.take_bytes(-3), b'bc') + self.assertEqual(ba, bytearray(b'def')) + self.assertEqual(len(ba), 3) + self.assertEqual(ba.take_bytes(3), b'def') + self.assertEqual(ba, bytearray(b'')) + self.assertEqual(len(ba), 0) + + # Take nothing from emptiness. + self.assertEqual(ba.take_bytes(0), b'') + self.assertEqual(ba.take_bytes(), b'') + self.assertEqual(ba.take_bytes(None), b'') + + # Out of bounds, bad take value. + self.assertRaises(IndexError, ba.take_bytes, -1) + self.assertRaises(TypeError, ba.take_bytes, 3.14) + ba = bytearray(b'abcdef') + self.assertRaises(IndexError, ba.take_bytes, 7) + + # Offset between physical and logical start (ob_bytes != ob_start). + ba = bytearray(b'abcde') + del ba[:2] + self.assertEqual(ba, bytearray(b'cde')) + self.assertEqual(ba.take_bytes(), b'cde') + + # Overallocation at end. + ba = bytearray(b'abcde') + del ba[-2:] + self.assertEqual(ba, bytearray(b'abc')) + self.assertEqual(ba.take_bytes(), b'abc') + ba = bytearray(b'abcde') + ba.resize(4) + self.assertEqual(ba.take_bytes(), b'abcd') + + # Take of a bytearray with references should fail. + ba = bytearray(b'abc') + with memoryview(ba) as mv: + self.assertRaises(BufferError, ba.take_bytes) + self.assertEqual(ba.take_bytes(), b'abc') def test_setitem(self): def setitem_as_mapping(b, i, val): @@ -2564,6 +2629,18 @@ def zfill(b, a): c = a.zfill(0x400000) assert not c or c[-1] not in (0xdd, 0xcd) + def take_bytes(b, a): # MODIFIES! + b.wait() + c = a.take_bytes() + assert not c or c[0] == 48 # '0' + + def take_bytes_n(b, a): # MODIFIES! + b.wait() + try: + c = a.take_bytes(10) + assert c == b'0123456789' + except IndexError: pass + def check(funcs, a=None, *args): if a is None: a = bytearray(b'0' * 0x400000) @@ -2625,6 +2702,10 @@ def check(funcs, a=None, *args): check([clear] + [startswith] * 10) check([clear] + [strip] * 10) + check([clear] + [take_bytes] * 10) + check([take_bytes_n] * 10, bytearray(b'0123456789' * 0x400)) + check([take_bytes_n] * 10, bytearray(b'0123456789' * 5)) + check([clear] + [contains] * 10) check([clear] + [subscript] * 10) check([clear2] + [ass_subscript2] * 10, None, bytearray(b'0' * 0x400000)) diff --git a/Lib/test/test_capi/test_bytearray.py b/Lib/test/test_capi/test_bytearray.py index 52565ea34c61b8..cb7ad8b22252d9 100644 --- a/Lib/test/test_capi/test_bytearray.py +++ b/Lib/test/test_capi/test_bytearray.py @@ -1,3 +1,4 @@ +import sys import unittest from test.support import import_helper @@ -55,7 +56,9 @@ def test_fromstringandsize(self): self.assertEqual(fromstringandsize(b'', 0), bytearray()) self.assertEqual(fromstringandsize(NULL, 0), bytearray()) self.assertEqual(len(fromstringandsize(NULL, 3)), 3) - self.assertRaises(MemoryError, fromstringandsize, NULL, PY_SSIZE_T_MAX) + self.assertRaises(OverflowError, fromstringandsize, NULL, PY_SSIZE_T_MAX) + self.assertRaises(OverflowError, fromstringandsize, NULL, + PY_SSIZE_T_MAX-sys.getsizeof(b'') + 1) self.assertRaises(SystemError, fromstringandsize, b'abc', -1) self.assertRaises(SystemError, fromstringandsize, b'abc', PY_SSIZE_T_MIN) diff --git a/Lib/test/test_sys.py b/Lib/test/test_sys.py index 3ceed019ac43cf..9d3248d972e8d1 100644 --- a/Lib/test/test_sys.py +++ b/Lib/test/test_sys.py @@ -1583,7 +1583,7 @@ def test_objecttypes(self): samples = [b'', b'u'*100000] for sample in samples: x = bytearray(sample) - check(x, vsize('n2Pi') + x.__alloc__()) + check(x, vsize('n2PiP') + x.__alloc__()) # bytearray_iterator check(iter(bytearray()), size('nP')) # bytes diff --git a/Misc/NEWS.d/next/Core_and_Builtins/2025-10-14-18-24-16.gh-issue-139871.SWtuUz.rst b/Misc/NEWS.d/next/Core_and_Builtins/2025-10-14-18-24-16.gh-issue-139871.SWtuUz.rst new file mode 100644 index 00000000000000..d4b8578afe3afc --- /dev/null +++ b/Misc/NEWS.d/next/Core_and_Builtins/2025-10-14-18-24-16.gh-issue-139871.SWtuUz.rst @@ -0,0 +1,2 @@ +Update :class:`bytearray` to use a :class:`bytes` under the hood as its buffer +and add :func:`bytearray.take_bytes` to take it out. diff --git a/Objects/bytearrayobject.c b/Objects/bytearrayobject.c index a73bfff340ce48..99bfdec89f6c3a 100644 --- a/Objects/bytearrayobject.c +++ b/Objects/bytearrayobject.c @@ -17,8 +17,8 @@ class bytearray "PyByteArrayObject *" "&PyByteArray_Type" [clinic start generated code]*/ /*[clinic end generated code: output=da39a3ee5e6b4b0d input=5535b77c37a119e0]*/ -/* For PyByteArray_AS_STRING(). */ -char _PyByteArray_empty_string[] = ""; +/* Max number of bytes a bytearray can contain */ +#define PyByteArray_SIZE_MAX ((Py_ssize_t)(PY_SSIZE_T_MAX - _PyBytesObject_SIZE)) /* Helpers */ @@ -43,6 +43,14 @@ _getbytevalue(PyObject* arg, int *value) return 1; } +static void +bytearray_reinit_from_bytes(PyByteArrayObject *self, Py_ssize_t size, + Py_ssize_t alloc) { + self->ob_bytes = self->ob_start = PyBytes_AS_STRING(self->ob_bytes_object); + Py_SET_SIZE(self, size); + FT_ATOMIC_STORE_SSIZE_RELAXED(self->ob_alloc, alloc); +} + static int bytearray_getbuffer_lock_held(PyObject *self, Py_buffer *view, int flags) { @@ -127,7 +135,6 @@ PyObject * PyByteArray_FromStringAndSize(const char *bytes, Py_ssize_t size) { PyByteArrayObject *new; - Py_ssize_t alloc; if (size < 0) { PyErr_SetString(PyExc_SystemError, @@ -135,34 +142,31 @@ PyByteArray_FromStringAndSize(const char *bytes, Py_ssize_t size) return NULL; } - /* Prevent buffer overflow when setting alloc to size+1. */ - if (size == PY_SSIZE_T_MAX) { - return PyErr_NoMemory(); - } - new = PyObject_New(PyByteArrayObject, &PyByteArray_Type); - if (new == NULL) + if (new == NULL) { return NULL; + } + + /* Fill values used in bytearray_dealloc. + + In an optimized build the memory isn't zeroed and ob_exports would be + uninitialized when when PyBytes_FromStringAndSize errored leading to + intermittent test failures. */ + new->ob_exports = 0; + + /* Optimization: size=0 bytearray should not allocate space - if (size == 0) { - new->ob_bytes = NULL; - alloc = 0; + PyBytes_FromStringAndSize returns the empty bytes global when size=0 so + no allocation occurs. */ + new->ob_bytes_object = PyBytes_FromStringAndSize(NULL, size); + if (new->ob_bytes_object == NULL) { + Py_DECREF(new); + return NULL; } - else { - alloc = size + 1; - new->ob_bytes = PyMem_Malloc(alloc); - if (new->ob_bytes == NULL) { - Py_DECREF(new); - return PyErr_NoMemory(); - } - if (bytes != NULL && size > 0) - memcpy(new->ob_bytes, bytes, size); - new->ob_bytes[size] = '\0'; /* Trailing null byte */ + bytearray_reinit_from_bytes(new, size, size); + if (bytes != NULL && size > 0) { + memcpy(new->ob_bytes, bytes, size); } - Py_SET_SIZE(new, size); - new->ob_alloc = alloc; - new->ob_start = new->ob_bytes; - new->ob_exports = 0; return (PyObject *)new; } @@ -189,7 +193,6 @@ static int bytearray_resize_lock_held(PyObject *self, Py_ssize_t requested_size) { _Py_CRITICAL_SECTION_ASSERT_OBJECT_LOCKED(self); - void *sval; PyByteArrayObject *obj = ((PyByteArrayObject *)self); /* All computations are done unsigned to avoid integer overflows (see issue #22335). */ @@ -214,16 +217,17 @@ bytearray_resize_lock_held(PyObject *self, Py_ssize_t requested_size) return -1; } - if (size + logical_offset + 1 <= alloc) { + if (size + logical_offset <= alloc) { /* Current buffer is large enough to host the requested size, decide on a strategy. */ if (size < alloc / 2) { /* Major downsize; resize down to exact size */ - alloc = size + 1; + alloc = size; } else { /* Minor downsize; quick exit */ Py_SET_SIZE(self, size); + /* Add mid-buffer null; end provided by bytes. */ PyByteArray_AS_STRING(self)[size] = '\0'; /* Trailing null */ return 0; } @@ -236,38 +240,36 @@ bytearray_resize_lock_held(PyObject *self, Py_ssize_t requested_size) } else { /* Major upsize; resize up to exact size */ - alloc = size + 1; + alloc = size; } } - if (alloc > PY_SSIZE_T_MAX) { + if (alloc > PyByteArray_SIZE_MAX) { PyErr_NoMemory(); return -1; } + /* Re-align data to the start of the allocation. */ if (logical_offset > 0) { - sval = PyMem_Malloc(alloc); - if (sval == NULL) { - PyErr_NoMemory(); - return -1; - } - memcpy(sval, PyByteArray_AS_STRING(self), - Py_MIN((size_t)requested_size, (size_t)Py_SIZE(self))); - PyMem_Free(obj->ob_bytes); - } - else { - sval = PyMem_Realloc(obj->ob_bytes, alloc); - if (sval == NULL) { - PyErr_NoMemory(); - return -1; - } + /* optimization tradeoff: This is faster than a new allocation when + the number of bytes being removed in a resize is small; for large + size changes it may be better to just make a new bytes object as + _PyBytes_Resize will do a malloc + memcpy internally. */ + memmove(obj->ob_bytes, obj->ob_start, + Py_MIN(requested_size, Py_SIZE(self))); } - obj->ob_bytes = obj->ob_start = sval; - Py_SET_SIZE(self, size); - FT_ATOMIC_STORE_SSIZE_RELAXED(obj->ob_alloc, alloc); - obj->ob_bytes[size] = '\0'; /* Trailing null byte */ + int ret = _PyBytes_Resize(&obj->ob_bytes_object, alloc); + if (ret == -1) { + obj->ob_bytes_object = Py_GetConstant(Py_CONSTANT_EMPTY_BYTES); + size = alloc = 0; + } + bytearray_reinit_from_bytes(obj, size, alloc); + if (alloc != size) { + /* Add mid-buffer null; end provided by bytes. */ + obj->ob_bytes[size] = '\0'; + } - return 0; + return ret; } int @@ -295,7 +297,7 @@ PyByteArray_Concat(PyObject *a, PyObject *b) goto done; } - if (va.len > PY_SSIZE_T_MAX - vb.len) { + if (va.len > PyByteArray_SIZE_MAX - vb.len) { PyErr_NoMemory(); goto done; } @@ -339,7 +341,7 @@ bytearray_iconcat_lock_held(PyObject *op, PyObject *other) } Py_ssize_t size = Py_SIZE(self); - if (size > PY_SSIZE_T_MAX - vo.len) { + if (size > PyByteArray_SIZE_MAX - vo.len) { PyBuffer_Release(&vo); return PyErr_NoMemory(); } @@ -373,7 +375,7 @@ bytearray_repeat_lock_held(PyObject *op, Py_ssize_t count) count = 0; } const Py_ssize_t mysize = Py_SIZE(self); - if (count > 0 && mysize > PY_SSIZE_T_MAX / count) { + if (count > 0 && mysize > PyByteArray_SIZE_MAX / count) { return PyErr_NoMemory(); } Py_ssize_t size = mysize * count; @@ -409,7 +411,7 @@ bytearray_irepeat_lock_held(PyObject *op, Py_ssize_t count) } const Py_ssize_t mysize = Py_SIZE(self); - if (count > 0 && mysize > PY_SSIZE_T_MAX / count) { + if (count > 0 && mysize > PyByteArray_SIZE_MAX / count) { return PyErr_NoMemory(); } const Py_ssize_t size = mysize * count; @@ -585,7 +587,7 @@ bytearray_setslice_linear(PyByteArrayObject *self, buf = PyByteArray_AS_STRING(self); } else if (growth > 0) { - if (Py_SIZE(self) > (Py_ssize_t)PY_SSIZE_T_MAX - growth) { + if (Py_SIZE(self) > PyByteArray_SIZE_MAX - growth) { PyErr_NoMemory(); return -1; } @@ -899,6 +901,13 @@ bytearray___init___impl(PyByteArrayObject *self, PyObject *arg, PyObject *it; PyObject *(*iternext)(PyObject *); + /* First __init__; set ob_bytes_object so ob_bytes is always non-null. */ + if (self->ob_bytes_object == NULL) { + self->ob_bytes_object = Py_GetConstant(Py_CONSTANT_EMPTY_BYTES); + bytearray_reinit_from_bytes(self, 0, 0); + self->ob_exports = 0; + } + if (Py_SIZE(self) != 0) { /* Empty previous contents (yes, do this first of all!) */ if (PyByteArray_Resize((PyObject *)self, 0) < 0) @@ -1169,9 +1178,7 @@ bytearray_dealloc(PyObject *op) "deallocated bytearray object has exported buffers"); PyErr_Print(); } - if (self->ob_bytes != 0) { - PyMem_Free(self->ob_bytes); - } + Py_XDECREF(self->ob_bytes_object); Py_TYPE(self)->tp_free((PyObject *)self); } @@ -1491,6 +1498,82 @@ bytearray_resize_impl(PyByteArrayObject *self, Py_ssize_t size) } +/*[clinic input] +@critical_section +bytearray.take_bytes + n: object = None + Bytes to take, negative indexes from end. None indicates all bytes. + / +Take *n* bytes from the bytearray and return them as a bytes object. +[clinic start generated code]*/ + +static PyObject * +bytearray_take_bytes_impl(PyByteArrayObject *self, PyObject *n) +/*[clinic end generated code: output=3147fbc0bbbe8d94 input=b15b5172cdc6deda]*/ +{ + Py_ssize_t to_take; + Py_ssize_t size = Py_SIZE(self); + if (Py_IsNone(n)) { + to_take = size; + } + // Integer index, from start (zero, positive) or end (negative). + else if (_PyIndex_Check(n)) { + to_take = PyNumber_AsSsize_t(n, PyExc_IndexError); + if (to_take == -1 && PyErr_Occurred()) { + return NULL; + } + if (to_take < 0) { + to_take += size; + } + } + else { + PyErr_SetString(PyExc_TypeError, "n must be an integer or None"); + return NULL; + } + + if (to_take < 0 || to_take > size) { + PyErr_Format(PyExc_IndexError, + "can't take %zd bytes outside size %zd", + to_take, size); + return NULL; + } + + // Exports may change the contents. No mutable bytes allowed. + if (!_canresize(self)) { + return NULL; + } + + if (to_take == 0 || size == 0) { + return Py_GetConstant(Py_CONSTANT_EMPTY_BYTES); + } + + // Copy remaining bytes to a new bytes. + Py_ssize_t remaining_length = size - to_take; + PyObject *remaining = PyBytes_FromStringAndSize(self->ob_start + to_take, + remaining_length); + if (remaining == NULL) { + return NULL; + } + + // If the bytes are offset inside the buffer must first align. + if (self->ob_start != self->ob_bytes) { + memmove(self->ob_bytes, self->ob_start, to_take); + self->ob_start = self->ob_bytes; + } + + if (_PyBytes_Resize(&self->ob_bytes_object, to_take) == -1) { + Py_DECREF(remaining); + return NULL; + } + + // Point the bytearray towards the buffer with the remaining data. + PyObject *result = self->ob_bytes_object; + self->ob_bytes_object = remaining; + bytearray_reinit_from_bytes(self, remaining_length, remaining_length); + return result; +} + + /*[clinic input] @critical_section bytearray.translate @@ -1868,11 +1951,6 @@ bytearray_insert_impl(PyByteArrayObject *self, Py_ssize_t index, int item) Py_ssize_t n = Py_SIZE(self); char *buf; - if (n == PY_SSIZE_T_MAX) { - PyErr_SetString(PyExc_OverflowError, - "cannot add more objects to bytearray"); - return NULL; - } if (bytearray_resize_lock_held((PyObject *)self, n + 1) < 0) return NULL; buf = PyByteArray_AS_STRING(self); @@ -1987,11 +2065,6 @@ bytearray_append_impl(PyByteArrayObject *self, int item) { Py_ssize_t n = Py_SIZE(self); - if (n == PY_SSIZE_T_MAX) { - PyErr_SetString(PyExc_OverflowError, - "cannot add more objects to bytearray"); - return NULL; - } if (bytearray_resize_lock_held((PyObject *)self, n + 1) < 0) return NULL; @@ -2099,16 +2172,16 @@ bytearray_extend_impl(PyByteArrayObject *self, PyObject *iterable_of_ints) if (len >= buf_size) { Py_ssize_t addition; - if (len == PY_SSIZE_T_MAX) { + if (len == PyByteArray_SIZE_MAX) { Py_DECREF(it); Py_DECREF(bytearray_obj); return PyErr_NoMemory(); } addition = len >> 1; - if (addition > PY_SSIZE_T_MAX - len - 1) - buf_size = PY_SSIZE_T_MAX; + if (addition > PyByteArray_SIZE_MAX - len) + buf_size = PyByteArray_SIZE_MAX; else - buf_size = len + addition + 1; + buf_size = len + addition; if (bytearray_resize_lock_held((PyObject *)bytearray_obj, buf_size) < 0) { Py_DECREF(it); Py_DECREF(bytearray_obj); @@ -2405,7 +2478,11 @@ static PyObject * bytearray_alloc(PyObject *op, PyObject *Py_UNUSED(ignored)) { PyByteArrayObject *self = _PyByteArray_CAST(op); - return PyLong_FromSsize_t(FT_ATOMIC_LOAD_SSIZE_RELAXED(self->ob_alloc)); + Py_ssize_t alloc = FT_ATOMIC_LOAD_SSIZE_RELAXED(self->ob_alloc); + if (alloc > 0) { + alloc += _PyBytesObject_SIZE; + } + return PyLong_FromSsize_t(alloc); } /*[clinic input] @@ -2601,9 +2678,13 @@ static PyObject * bytearray_sizeof_impl(PyByteArrayObject *self) /*[clinic end generated code: output=738abdd17951c427 input=e27320fd98a4bc5a]*/ { - size_t res = _PyObject_SIZE(Py_TYPE(self)); - res += (size_t)FT_ATOMIC_LOAD_SSIZE_RELAXED(self->ob_alloc) * sizeof(char); - return PyLong_FromSize_t(res); + Py_ssize_t res = _PyObject_SIZE(Py_TYPE(self)); + Py_ssize_t alloc = FT_ATOMIC_LOAD_SSIZE_RELAXED(self->ob_alloc); + if (alloc > 0) { + res += _PyBytesObject_SIZE + alloc; + } + + return PyLong_FromSsize_t(res); } static PySequenceMethods bytearray_as_sequence = { @@ -2686,6 +2767,7 @@ static PyMethodDef bytearray_methods[] = { BYTEARRAY_STARTSWITH_METHODDEF BYTEARRAY_STRIP_METHODDEF {"swapcase", bytearray_swapcase, METH_NOARGS, _Py_swapcase__doc__}, + BYTEARRAY_TAKE_BYTES_METHODDEF {"title", bytearray_title, METH_NOARGS, _Py_title__doc__}, BYTEARRAY_TRANSLATE_METHODDEF {"upper", bytearray_upper, METH_NOARGS, _Py_upper__doc__}, diff --git a/Objects/bytesobject.c b/Objects/bytesobject.c index 2b9513abe91956..2b0925017f29e4 100644 --- a/Objects/bytesobject.c +++ b/Objects/bytesobject.c @@ -25,13 +25,7 @@ class bytes "PyBytesObject *" "&PyBytes_Type" #include "clinic/bytesobject.c.h" -/* PyBytesObject_SIZE gives the basic size of a bytes object; any memory allocation - for a bytes object of length n should request PyBytesObject_SIZE + n bytes. - - Using PyBytesObject_SIZE instead of sizeof(PyBytesObject) saves - 3 or 7 bytes per bytes object allocation on a typical system. -*/ -#define PyBytesObject_SIZE (offsetof(PyBytesObject, ob_sval) + 1) +#define PyBytesObject_SIZE _PyBytesObject_SIZE /* Forward declaration */ static void* _PyBytesWriter_ResizeAndUpdatePointer(PyBytesWriter *writer, diff --git a/Objects/clinic/bytearrayobject.c.h b/Objects/clinic/bytearrayobject.c.h index 6f13865177dde5..be704ccf68f669 100644 --- a/Objects/clinic/bytearrayobject.c.h +++ b/Objects/clinic/bytearrayobject.c.h @@ -631,6 +631,43 @@ bytearray_resize(PyObject *self, PyObject *arg) return return_value; } +PyDoc_STRVAR(bytearray_take_bytes__doc__, +"take_bytes($self, n=None, /)\n" +"--\n" +"\n" +"Take *n* bytes from the bytearray and return them as a bytes object.\n" +"\n" +" n\n" +" Bytes to take, negative indexes from end. None indicates all bytes."); + +#define BYTEARRAY_TAKE_BYTES_METHODDEF \ + {"take_bytes", _PyCFunction_CAST(bytearray_take_bytes), METH_FASTCALL, bytearray_take_bytes__doc__}, + +static PyObject * +bytearray_take_bytes_impl(PyByteArrayObject *self, PyObject *n); + +static PyObject * +bytearray_take_bytes(PyObject *self, PyObject *const *args, Py_ssize_t nargs) +{ + PyObject *return_value = NULL; + PyObject *n = Py_None; + + if (!_PyArg_CheckPositional("take_bytes", nargs, 0, 1)) { + goto exit; + } + if (nargs < 1) { + goto skip_optional; + } + n = args[0]; +skip_optional: + Py_BEGIN_CRITICAL_SECTION(self); + return_value = bytearray_take_bytes_impl((PyByteArrayObject *)self, n); + Py_END_CRITICAL_SECTION(); + +exit: + return return_value; +} + PyDoc_STRVAR(bytearray_translate__doc__, "translate($self, table, /, delete=b\'\')\n" "--\n" @@ -1796,4 +1833,4 @@ bytearray_sizeof(PyObject *self, PyObject *Py_UNUSED(ignored)) { return bytearray_sizeof_impl((PyByteArrayObject *)self); } -/*[clinic end generated code: output=fdfe41139c91e409 input=a9049054013a1b77]*/ +/*[clinic end generated code: output=5eddefde2a001ceb input=a9049054013a1b77]*/