From 18f6d4404e6283aa0857c36fa9bca4d9a8cefd72 Mon Sep 17 00:00:00 2001 From: Eric Snow Date: Thu, 9 Dec 2021 11:33:49 -0700 Subject: [PATCH 1/7] _Py_bytes_state -> _Py_global_objects. --- Include/internal/pycore_bytesobject.h | 8 ---- Include/internal/pycore_global_objects.h | 3 ++ Include/internal/pycore_interp.h | 2 - Objects/bytesobject.c | 53 +++++++++++------------- 4 files changed, 27 insertions(+), 39 deletions(-) diff --git a/Include/internal/pycore_bytesobject.h b/Include/internal/pycore_bytesobject.h index b00ed9784ef34d..656e11e101ea03 100644 --- a/Include/internal/pycore_bytesobject.h +++ b/Include/internal/pycore_bytesobject.h @@ -16,14 +16,6 @@ extern PyStatus _PyBytes_InitTypes(PyInterpreterState *); extern void _PyBytes_Fini(PyInterpreterState *); -/* other API */ - -struct _Py_bytes_state { - PyObject *empty_string; - PyBytesObject *characters[256]; -}; - - #ifdef __cplusplus } #endif diff --git a/Include/internal/pycore_global_objects.h b/Include/internal/pycore_global_objects.h index 6cae3bca6be45a..2c67c5c7775601 100644 --- a/Include/internal/pycore_global_objects.h +++ b/Include/internal/pycore_global_objects.h @@ -54,6 +54,9 @@ struct _Py_global_objects { * -_PY_NSMALLNEGINTS (inclusive) to _PY_NSMALLPOSINTS (exclusive). */ PyLongObject small_ints[_PY_NSMALLNEGINTS + _PY_NSMALLPOSINTS]; + + PyBytesObject *bytes_empty; + PyBytesObject *bytes_characters[256]; } singletons; }; diff --git a/Include/internal/pycore_interp.h b/Include/internal/pycore_interp.h index e4d7b1b8752eab..d48ea87fd67fe3 100644 --- a/Include/internal/pycore_interp.h +++ b/Include/internal/pycore_interp.h @@ -10,7 +10,6 @@ extern "C" { #include "pycore_atomic.h" // _Py_atomic_address #include "pycore_ast_state.h" // struct ast_state -#include "pycore_bytesobject.h" // struct _Py_bytes_state #include "pycore_context.h" // struct _Py_context_state #include "pycore_dict.h" // struct _Py_dict_state #include "pycore_exceptions.h" // struct _Py_exc_state @@ -152,7 +151,6 @@ struct _is { PyObject *audit_hooks; - struct _Py_bytes_state bytes; struct _Py_unicode_state unicode; struct _Py_float_state float_state; /* Using a cache is very effective since typically only a single slice is diff --git a/Objects/bytesobject.c b/Objects/bytesobject.c index 2f7e0a6dde6fe0..70f5c2e51268f2 100644 --- a/Objects/bytesobject.c +++ b/Objects/bytesobject.c @@ -5,9 +5,9 @@ #include "Python.h" #include "pycore_abstract.h" // _PyIndex_Check() #include "pycore_bytes_methods.h" // _Py_bytes_startswith() -#include "pycore_bytesobject.h" // struct _Py_bytes_state #include "pycore_call.h" // _PyObject_CallNoArgs() #include "pycore_format.h" // F_LJUST +#include "pycore_global_objects.h" // _Py_GET_GLOBAL_OBJECT() #include "pycore_initconfig.h" // _PyStatus_OK() #include "pycore_long.h" // _PyLong_DigitValue #include "pycore_object.h" // _PyObject_GC_TRACK @@ -38,36 +38,30 @@ Py_LOCAL_INLINE(Py_ssize_t) _PyBytesWriter_GetSize(_PyBytesWriter *writer, char *str); -static struct _Py_bytes_state* -get_bytes_state(void) -{ - PyInterpreterState *interp = _PyInterpreterState_GET(); - return &interp->bytes; -} +#define CHARACTERS _Py_SINGLETON(bytes_characters) +#define EMPTY _Py_SINGLETON(bytes_empty) // Return a borrowed reference to the empty bytes string singleton. static inline PyObject* bytes_get_empty(void) { - struct _Py_bytes_state *state = get_bytes_state(); // bytes_get_empty() must not be called before _PyBytes_Init() // or after _PyBytes_Fini() - assert(state->empty_string != NULL); - return state->empty_string; + assert(EMPTY != NULL); + return &EMPTY->ob_base.ob_base; } // Return a strong reference to the empty bytes string singleton. static inline PyObject* bytes_new_empty(void) { - PyObject *empty = bytes_get_empty(); - Py_INCREF(empty); - return (PyObject *)empty; + Py_INCREF(EMPTY); + return (PyObject *)EMPTY; } static int -bytes_create_empty_string_singleton(struct _Py_bytes_state *state) +bytes_create_empty_string_singleton(void) { // Create the empty bytes string singleton PyBytesObject *op = (PyBytesObject *)PyObject_Malloc(PyBytesObject_SIZE); @@ -78,8 +72,7 @@ bytes_create_empty_string_singleton(struct _Py_bytes_state *state) op->ob_shash = -1; op->ob_sval[0] = '\0'; - assert(state->empty_string == NULL); - state->empty_string = (PyObject *)op; + EMPTY = op; return 0; } @@ -148,8 +141,7 @@ PyBytes_FromStringAndSize(const char *str, Py_ssize_t size) return NULL; } if (size == 1 && str != NULL) { - struct _Py_bytes_state *state = get_bytes_state(); - op = state->characters[*str & UCHAR_MAX]; + op = CHARACTERS[*str & UCHAR_MAX]; if (op != NULL) { Py_INCREF(op); return (PyObject *)op; @@ -168,9 +160,8 @@ PyBytes_FromStringAndSize(const char *str, Py_ssize_t size) memcpy(op->ob_sval, str, size); /* share short strings */ if (size == 1) { - struct _Py_bytes_state *state = get_bytes_state(); Py_INCREF(op); - state->characters[*str & UCHAR_MAX] = op; + CHARACTERS[*str & UCHAR_MAX] = op; } return (PyObject *) op; } @@ -189,12 +180,11 @@ PyBytes_FromString(const char *str) return NULL; } - struct _Py_bytes_state *state = get_bytes_state(); if (size == 0) { return bytes_new_empty(); } else if (size == 1) { - op = state->characters[*str & UCHAR_MAX]; + op = CHARACTERS[*str & UCHAR_MAX]; if (op != NULL) { Py_INCREF(op); return (PyObject *)op; @@ -211,9 +201,9 @@ PyBytes_FromString(const char *str) memcpy(op->ob_sval, str, size+1); /* share short strings */ if (size == 1) { - assert(state->characters[*str & UCHAR_MAX] == NULL); + assert(CHARACTERS[*str & UCHAR_MAX] == NULL); Py_INCREF(op); - state->characters[*str & UCHAR_MAX] = op; + CHARACTERS[*str & UCHAR_MAX] = op; } return (PyObject *) op; } @@ -3089,8 +3079,11 @@ _PyBytes_Resize(PyObject **pv, Py_ssize_t newsize) PyStatus _PyBytes_InitGlobalObjects(PyInterpreterState *interp) { - struct _Py_bytes_state *state = &interp->bytes; - if (bytes_create_empty_string_singleton(state) < 0) { + if (!_Py_IsMainInterpreter(interp)) { + return _PyStatus_OK(); + } + + if (bytes_create_empty_string_singleton() < 0) { return _PyStatus_NO_MEMORY(); } return _PyStatus_OK(); @@ -3119,11 +3112,13 @@ _PyBytes_InitTypes(PyInterpreterState *interp) void _PyBytes_Fini(PyInterpreterState *interp) { - struct _Py_bytes_state* state = &interp->bytes; + if (!_Py_IsMainInterpreter(interp)) { + return; + } for (int i = 0; i < UCHAR_MAX + 1; i++) { - Py_CLEAR(state->characters[i]); + Py_CLEAR(CHARACTERS[i]); } - Py_CLEAR(state->empty_string); + Py_CLEAR(EMPTY); } /*********************** Bytes Iterator ****************************/ From 8b96ac9a81d242da9d118db185c3eb24f07276db Mon Sep 17 00:00:00 2001 From: Eric Snow Date: Mon, 13 Dec 2021 14:09:49 -0700 Subject: [PATCH 2/7] Statically initialize the empty bytes object. --- Include/internal/pycore_bytesobject.h | 1 - Include/internal/pycore_global_objects.h | 14 ++++++++- Objects/bytesobject.c | 37 +----------------------- Python/pylifecycle.c | 5 ---- 4 files changed, 14 insertions(+), 43 deletions(-) diff --git a/Include/internal/pycore_bytesobject.h b/Include/internal/pycore_bytesobject.h index 656e11e101ea03..a10413232a9994 100644 --- a/Include/internal/pycore_bytesobject.h +++ b/Include/internal/pycore_bytesobject.h @@ -11,7 +11,6 @@ extern "C" { /* runtime lifecycle */ -extern PyStatus _PyBytes_InitGlobalObjects(PyInterpreterState *); extern PyStatus _PyBytes_InitTypes(PyInterpreterState *); extern void _PyBytes_Fini(PyInterpreterState *); diff --git a/Include/internal/pycore_global_objects.h b/Include/internal/pycore_global_objects.h index 2c67c5c7775601..c3a8239aca7bf8 100644 --- a/Include/internal/pycore_global_objects.h +++ b/Include/internal/pycore_global_objects.h @@ -34,6 +34,16 @@ extern "C" { } +/* bytes objects */ + +#define _PyBytes_EMPTY_INIT \ + { \ + _PyVarObject_IMMORTAL_INIT(&PyBytes_Type, 0), \ + .ob_shash = -1, \ + .ob_sval[0] = '\0', \ + } + + /********************** * the global objects * **********************/ @@ -55,7 +65,7 @@ struct _Py_global_objects { */ PyLongObject small_ints[_PY_NSMALLNEGINTS + _PY_NSMALLPOSINTS]; - PyBytesObject *bytes_empty; + PyBytesObject bytes_empty; PyBytesObject *bytes_characters[256]; } singletons; }; @@ -326,6 +336,8 @@ struct _Py_global_objects { _PyLong_DIGIT_INIT(255), \ _PyLong_DIGIT_INIT(256), \ }, \ + \ + .bytes_empty = _PyBytes_EMPTY_INIT, \ }, \ } diff --git a/Objects/bytesobject.c b/Objects/bytesobject.c index 70f5c2e51268f2..65d69a730d7e8c 100644 --- a/Objects/bytesobject.c +++ b/Objects/bytesobject.c @@ -39,15 +39,12 @@ Py_LOCAL_INLINE(Py_ssize_t) _PyBytesWriter_GetSize(_PyBytesWriter *writer, #define CHARACTERS _Py_SINGLETON(bytes_characters) -#define EMPTY _Py_SINGLETON(bytes_empty) +#define EMPTY (&_Py_SINGLETON(bytes_empty)) // Return a borrowed reference to the empty bytes string singleton. static inline PyObject* bytes_get_empty(void) { - // bytes_get_empty() must not be called before _PyBytes_Init() - // or after _PyBytes_Fini() - assert(EMPTY != NULL); return &EMPTY->ob_base.ob_base; } @@ -60,23 +57,6 @@ static inline PyObject* bytes_new_empty(void) } -static int -bytes_create_empty_string_singleton(void) -{ - // Create the empty bytes string singleton - PyBytesObject *op = (PyBytesObject *)PyObject_Malloc(PyBytesObject_SIZE); - if (op == NULL) { - return -1; - } - _PyObject_InitVar((PyVarObject*)op, &PyBytes_Type, 0); - op->ob_shash = -1; - op->ob_sval[0] = '\0'; - - EMPTY = op; - return 0; -} - - /* For PyBytes_FromString(), the parameter `str' points to a null-terminated string containing exactly `size' bytes. @@ -3076,20 +3056,6 @@ _PyBytes_Resize(PyObject **pv, Py_ssize_t newsize) } -PyStatus -_PyBytes_InitGlobalObjects(PyInterpreterState *interp) -{ - if (!_Py_IsMainInterpreter(interp)) { - return _PyStatus_OK(); - } - - if (bytes_create_empty_string_singleton() < 0) { - return _PyStatus_NO_MEMORY(); - } - return _PyStatus_OK(); -} - - PyStatus _PyBytes_InitTypes(PyInterpreterState *interp) { @@ -3118,7 +3084,6 @@ _PyBytes_Fini(PyInterpreterState *interp) for (int i = 0; i < UCHAR_MAX + 1; i++) { Py_CLEAR(CHARACTERS[i]); } - Py_CLEAR(EMPTY); } /*********************** Bytes Iterator ****************************/ diff --git a/Python/pylifecycle.c b/Python/pylifecycle.c index 22281a311918e7..94f6f382d4b99d 100644 --- a/Python/pylifecycle.c +++ b/Python/pylifecycle.c @@ -678,11 +678,6 @@ pycore_init_global_objects(PyInterpreterState *interp) _PyFloat_InitState(interp); - status = _PyBytes_InitGlobalObjects(interp); - if (_PyStatus_EXCEPTION(status)) { - return status; - } - status = _PyUnicode_InitGlobalObjects(interp); if (_PyStatus_EXCEPTION(status)) { return status; From dcac2efe0be0e0ed45f6ec44c71bc61a14d5edfa Mon Sep 17 00:00:00 2001 From: Eric Snow Date: Mon, 13 Dec 2021 14:15:13 -0700 Subject: [PATCH 3/7] Statically initialize the single character bytes objects. --- Include/internal/pycore_bytesobject.h | 1 - Include/internal/pycore_global_objects.h | 271 ++++++++++++++++++++++- Objects/bytesobject.c | 40 +--- Python/pylifecycle.c | 1 - 4 files changed, 278 insertions(+), 35 deletions(-) diff --git a/Include/internal/pycore_bytesobject.h b/Include/internal/pycore_bytesobject.h index a10413232a9994..18d9530aaf41ee 100644 --- a/Include/internal/pycore_bytesobject.h +++ b/Include/internal/pycore_bytesobject.h @@ -12,7 +12,6 @@ extern "C" { /* runtime lifecycle */ extern PyStatus _PyBytes_InitTypes(PyInterpreterState *); -extern void _PyBytes_Fini(PyInterpreterState *); #ifdef __cplusplus diff --git a/Include/internal/pycore_global_objects.h b/Include/internal/pycore_global_objects.h index c3a8239aca7bf8..d08121b3902787 100644 --- a/Include/internal/pycore_global_objects.h +++ b/Include/internal/pycore_global_objects.h @@ -42,6 +42,14 @@ extern "C" { .ob_shash = -1, \ .ob_sval[0] = '\0', \ } +#define _PyBytes_CHAR_INIT(CH) \ + { \ + { \ + _PyVarObject_IMMORTAL_INIT(&PyBytes_Type, 1), \ + .ob_shash = -1, \ + .ob_sval = { CH }, \ + }, \ + } /********************** @@ -66,7 +74,10 @@ struct _Py_global_objects { PyLongObject small_ints[_PY_NSMALLNEGINTS + _PY_NSMALLPOSINTS]; PyBytesObject bytes_empty; - PyBytesObject *bytes_characters[256]; + struct { + PyBytesObject ob; + char eos; + } bytes_characters[256]; } singletons; }; @@ -338,6 +349,264 @@ struct _Py_global_objects { }, \ \ .bytes_empty = _PyBytes_EMPTY_INIT, \ + .bytes_characters = { \ + _PyBytes_CHAR_INIT(0), \ + _PyBytes_CHAR_INIT(1), \ + _PyBytes_CHAR_INIT(2), \ + _PyBytes_CHAR_INIT(3), \ + _PyBytes_CHAR_INIT(4), \ + _PyBytes_CHAR_INIT(5), \ + _PyBytes_CHAR_INIT(6), \ + _PyBytes_CHAR_INIT(7), \ + _PyBytes_CHAR_INIT(8), \ + _PyBytes_CHAR_INIT(9), \ + _PyBytes_CHAR_INIT(10), \ + _PyBytes_CHAR_INIT(11), \ + _PyBytes_CHAR_INIT(12), \ + _PyBytes_CHAR_INIT(13), \ + _PyBytes_CHAR_INIT(14), \ + _PyBytes_CHAR_INIT(15), \ + _PyBytes_CHAR_INIT(16), \ + _PyBytes_CHAR_INIT(17), \ + _PyBytes_CHAR_INIT(18), \ + _PyBytes_CHAR_INIT(19), \ + _PyBytes_CHAR_INIT(20), \ + _PyBytes_CHAR_INIT(21), \ + _PyBytes_CHAR_INIT(22), \ + _PyBytes_CHAR_INIT(23), \ + _PyBytes_CHAR_INIT(24), \ + _PyBytes_CHAR_INIT(25), \ + _PyBytes_CHAR_INIT(26), \ + _PyBytes_CHAR_INIT(27), \ + _PyBytes_CHAR_INIT(28), \ + _PyBytes_CHAR_INIT(29), \ + _PyBytes_CHAR_INIT(30), \ + _PyBytes_CHAR_INIT(31), \ + _PyBytes_CHAR_INIT(32), \ + _PyBytes_CHAR_INIT(33), \ + _PyBytes_CHAR_INIT(34), \ + _PyBytes_CHAR_INIT(35), \ + _PyBytes_CHAR_INIT(36), \ + _PyBytes_CHAR_INIT(37), \ + _PyBytes_CHAR_INIT(38), \ + _PyBytes_CHAR_INIT(39), \ + _PyBytes_CHAR_INIT(40), \ + _PyBytes_CHAR_INIT(41), \ + _PyBytes_CHAR_INIT(42), \ + _PyBytes_CHAR_INIT(43), \ + _PyBytes_CHAR_INIT(44), \ + _PyBytes_CHAR_INIT(45), \ + _PyBytes_CHAR_INIT(46), \ + _PyBytes_CHAR_INIT(47), \ + _PyBytes_CHAR_INIT(48), \ + _PyBytes_CHAR_INIT(49), \ + _PyBytes_CHAR_INIT(50), \ + _PyBytes_CHAR_INIT(51), \ + _PyBytes_CHAR_INIT(52), \ + _PyBytes_CHAR_INIT(53), \ + _PyBytes_CHAR_INIT(54), \ + _PyBytes_CHAR_INIT(55), \ + _PyBytes_CHAR_INIT(56), \ + _PyBytes_CHAR_INIT(57), \ + _PyBytes_CHAR_INIT(58), \ + _PyBytes_CHAR_INIT(59), \ + _PyBytes_CHAR_INIT(60), \ + _PyBytes_CHAR_INIT(61), \ + _PyBytes_CHAR_INIT(62), \ + _PyBytes_CHAR_INIT(63), \ + _PyBytes_CHAR_INIT(64), \ + _PyBytes_CHAR_INIT(65), \ + _PyBytes_CHAR_INIT(66), \ + _PyBytes_CHAR_INIT(67), \ + _PyBytes_CHAR_INIT(68), \ + _PyBytes_CHAR_INIT(69), \ + _PyBytes_CHAR_INIT(70), \ + _PyBytes_CHAR_INIT(71), \ + _PyBytes_CHAR_INIT(72), \ + _PyBytes_CHAR_INIT(73), \ + _PyBytes_CHAR_INIT(74), \ + _PyBytes_CHAR_INIT(75), \ + _PyBytes_CHAR_INIT(76), \ + _PyBytes_CHAR_INIT(77), \ + _PyBytes_CHAR_INIT(78), \ + _PyBytes_CHAR_INIT(79), \ + _PyBytes_CHAR_INIT(80), \ + _PyBytes_CHAR_INIT(81), \ + _PyBytes_CHAR_INIT(82), \ + _PyBytes_CHAR_INIT(83), \ + _PyBytes_CHAR_INIT(84), \ + _PyBytes_CHAR_INIT(85), \ + _PyBytes_CHAR_INIT(86), \ + _PyBytes_CHAR_INIT(87), \ + _PyBytes_CHAR_INIT(88), \ + _PyBytes_CHAR_INIT(89), \ + _PyBytes_CHAR_INIT(90), \ + _PyBytes_CHAR_INIT(91), \ + _PyBytes_CHAR_INIT(92), \ + _PyBytes_CHAR_INIT(93), \ + _PyBytes_CHAR_INIT(94), \ + _PyBytes_CHAR_INIT(95), \ + _PyBytes_CHAR_INIT(96), \ + _PyBytes_CHAR_INIT(97), \ + _PyBytes_CHAR_INIT(98), \ + _PyBytes_CHAR_INIT(99), \ + _PyBytes_CHAR_INIT(100), \ + _PyBytes_CHAR_INIT(101), \ + _PyBytes_CHAR_INIT(102), \ + _PyBytes_CHAR_INIT(103), \ + _PyBytes_CHAR_INIT(104), \ + _PyBytes_CHAR_INIT(105), \ + _PyBytes_CHAR_INIT(106), \ + _PyBytes_CHAR_INIT(107), \ + _PyBytes_CHAR_INIT(108), \ + _PyBytes_CHAR_INIT(109), \ + _PyBytes_CHAR_INIT(110), \ + _PyBytes_CHAR_INIT(111), \ + _PyBytes_CHAR_INIT(112), \ + _PyBytes_CHAR_INIT(113), \ + _PyBytes_CHAR_INIT(114), \ + _PyBytes_CHAR_INIT(115), \ + _PyBytes_CHAR_INIT(116), \ + _PyBytes_CHAR_INIT(117), \ + _PyBytes_CHAR_INIT(118), \ + _PyBytes_CHAR_INIT(119), \ + _PyBytes_CHAR_INIT(120), \ + _PyBytes_CHAR_INIT(121), \ + _PyBytes_CHAR_INIT(122), \ + _PyBytes_CHAR_INIT(123), \ + _PyBytes_CHAR_INIT(124), \ + _PyBytes_CHAR_INIT(125), \ + _PyBytes_CHAR_INIT(126), \ + _PyBytes_CHAR_INIT(127), \ + _PyBytes_CHAR_INIT(128), \ + _PyBytes_CHAR_INIT(129), \ + _PyBytes_CHAR_INIT(130), \ + _PyBytes_CHAR_INIT(131), \ + _PyBytes_CHAR_INIT(132), \ + _PyBytes_CHAR_INIT(133), \ + _PyBytes_CHAR_INIT(134), \ + _PyBytes_CHAR_INIT(135), \ + _PyBytes_CHAR_INIT(136), \ + _PyBytes_CHAR_INIT(137), \ + _PyBytes_CHAR_INIT(138), \ + _PyBytes_CHAR_INIT(139), \ + _PyBytes_CHAR_INIT(140), \ + _PyBytes_CHAR_INIT(141), \ + _PyBytes_CHAR_INIT(142), \ + _PyBytes_CHAR_INIT(143), \ + _PyBytes_CHAR_INIT(144), \ + _PyBytes_CHAR_INIT(145), \ + _PyBytes_CHAR_INIT(146), \ + _PyBytes_CHAR_INIT(147), \ + _PyBytes_CHAR_INIT(148), \ + _PyBytes_CHAR_INIT(149), \ + _PyBytes_CHAR_INIT(150), \ + _PyBytes_CHAR_INIT(151), \ + _PyBytes_CHAR_INIT(152), \ + _PyBytes_CHAR_INIT(153), \ + _PyBytes_CHAR_INIT(154), \ + _PyBytes_CHAR_INIT(155), \ + _PyBytes_CHAR_INIT(156), \ + _PyBytes_CHAR_INIT(157), \ + _PyBytes_CHAR_INIT(158), \ + _PyBytes_CHAR_INIT(159), \ + _PyBytes_CHAR_INIT(160), \ + _PyBytes_CHAR_INIT(161), \ + _PyBytes_CHAR_INIT(162), \ + _PyBytes_CHAR_INIT(163), \ + _PyBytes_CHAR_INIT(164), \ + _PyBytes_CHAR_INIT(165), \ + _PyBytes_CHAR_INIT(166), \ + _PyBytes_CHAR_INIT(167), \ + _PyBytes_CHAR_INIT(168), \ + _PyBytes_CHAR_INIT(169), \ + _PyBytes_CHAR_INIT(170), \ + _PyBytes_CHAR_INIT(171), \ + _PyBytes_CHAR_INIT(172), \ + _PyBytes_CHAR_INIT(173), \ + _PyBytes_CHAR_INIT(174), \ + _PyBytes_CHAR_INIT(175), \ + _PyBytes_CHAR_INIT(176), \ + _PyBytes_CHAR_INIT(177), \ + _PyBytes_CHAR_INIT(178), \ + _PyBytes_CHAR_INIT(179), \ + _PyBytes_CHAR_INIT(180), \ + _PyBytes_CHAR_INIT(181), \ + _PyBytes_CHAR_INIT(182), \ + _PyBytes_CHAR_INIT(183), \ + _PyBytes_CHAR_INIT(184), \ + _PyBytes_CHAR_INIT(185), \ + _PyBytes_CHAR_INIT(186), \ + _PyBytes_CHAR_INIT(187), \ + _PyBytes_CHAR_INIT(188), \ + _PyBytes_CHAR_INIT(189), \ + _PyBytes_CHAR_INIT(190), \ + _PyBytes_CHAR_INIT(191), \ + _PyBytes_CHAR_INIT(192), \ + _PyBytes_CHAR_INIT(193), \ + _PyBytes_CHAR_INIT(194), \ + _PyBytes_CHAR_INIT(195), \ + _PyBytes_CHAR_INIT(196), \ + _PyBytes_CHAR_INIT(197), \ + _PyBytes_CHAR_INIT(198), \ + _PyBytes_CHAR_INIT(199), \ + _PyBytes_CHAR_INIT(200), \ + _PyBytes_CHAR_INIT(201), \ + _PyBytes_CHAR_INIT(202), \ + _PyBytes_CHAR_INIT(203), \ + _PyBytes_CHAR_INIT(204), \ + _PyBytes_CHAR_INIT(205), \ + _PyBytes_CHAR_INIT(206), \ + _PyBytes_CHAR_INIT(207), \ + _PyBytes_CHAR_INIT(208), \ + _PyBytes_CHAR_INIT(209), \ + _PyBytes_CHAR_INIT(210), \ + _PyBytes_CHAR_INIT(211), \ + _PyBytes_CHAR_INIT(212), \ + _PyBytes_CHAR_INIT(213), \ + _PyBytes_CHAR_INIT(214), \ + _PyBytes_CHAR_INIT(215), \ + _PyBytes_CHAR_INIT(216), \ + _PyBytes_CHAR_INIT(217), \ + _PyBytes_CHAR_INIT(218), \ + _PyBytes_CHAR_INIT(219), \ + _PyBytes_CHAR_INIT(220), \ + _PyBytes_CHAR_INIT(221), \ + _PyBytes_CHAR_INIT(222), \ + _PyBytes_CHAR_INIT(223), \ + _PyBytes_CHAR_INIT(224), \ + _PyBytes_CHAR_INIT(225), \ + _PyBytes_CHAR_INIT(226), \ + _PyBytes_CHAR_INIT(227), \ + _PyBytes_CHAR_INIT(228), \ + _PyBytes_CHAR_INIT(229), \ + _PyBytes_CHAR_INIT(230), \ + _PyBytes_CHAR_INIT(231), \ + _PyBytes_CHAR_INIT(232), \ + _PyBytes_CHAR_INIT(233), \ + _PyBytes_CHAR_INIT(234), \ + _PyBytes_CHAR_INIT(235), \ + _PyBytes_CHAR_INIT(236), \ + _PyBytes_CHAR_INIT(237), \ + _PyBytes_CHAR_INIT(238), \ + _PyBytes_CHAR_INIT(239), \ + _PyBytes_CHAR_INIT(240), \ + _PyBytes_CHAR_INIT(241), \ + _PyBytes_CHAR_INIT(242), \ + _PyBytes_CHAR_INIT(243), \ + _PyBytes_CHAR_INIT(244), \ + _PyBytes_CHAR_INIT(245), \ + _PyBytes_CHAR_INIT(246), \ + _PyBytes_CHAR_INIT(247), \ + _PyBytes_CHAR_INIT(248), \ + _PyBytes_CHAR_INIT(249), \ + _PyBytes_CHAR_INIT(250), \ + _PyBytes_CHAR_INIT(251), \ + _PyBytes_CHAR_INIT(252), \ + _PyBytes_CHAR_INIT(253), \ + _PyBytes_CHAR_INIT(254), \ + _PyBytes_CHAR_INIT(255), \ + }, \ }, \ } diff --git a/Objects/bytesobject.c b/Objects/bytesobject.c index 65d69a730d7e8c..a3591f2b30a72a 100644 --- a/Objects/bytesobject.c +++ b/Objects/bytesobject.c @@ -39,6 +39,8 @@ Py_LOCAL_INLINE(Py_ssize_t) _PyBytesWriter_GetSize(_PyBytesWriter *writer, #define CHARACTERS _Py_SINGLETON(bytes_characters) +#define CHARACTER(ch) \ + ((PyBytesObject *)&(CHARACTERS[ch])); #define EMPTY (&_Py_SINGLETON(bytes_empty)) @@ -121,11 +123,9 @@ PyBytes_FromStringAndSize(const char *str, Py_ssize_t size) return NULL; } if (size == 1 && str != NULL) { - op = CHARACTERS[*str & UCHAR_MAX]; - if (op != NULL) { - Py_INCREF(op); - return (PyObject *)op; - } + op = CHARACTER(*str & UCHAR_MAX); + Py_INCREF(op); + return (PyObject *)op; } if (size == 0) { return bytes_new_empty(); @@ -138,11 +138,6 @@ PyBytes_FromStringAndSize(const char *str, Py_ssize_t size) return (PyObject *) op; memcpy(op->ob_sval, str, size); - /* share short strings */ - if (size == 1) { - Py_INCREF(op); - CHARACTERS[*str & UCHAR_MAX] = op; - } return (PyObject *) op; } @@ -164,11 +159,9 @@ PyBytes_FromString(const char *str) return bytes_new_empty(); } else if (size == 1) { - op = CHARACTERS[*str & UCHAR_MAX]; - if (op != NULL) { - Py_INCREF(op); - return (PyObject *)op; - } + op = CHARACTER(*str & UCHAR_MAX); + Py_INCREF(op); + return (PyObject *)op; } /* Inline PyObject_NewVar */ @@ -179,12 +172,6 @@ PyBytes_FromString(const char *str) _PyObject_InitVar((PyVarObject*)op, &PyBytes_Type, size); op->ob_shash = -1; memcpy(op->ob_sval, str, size+1); - /* share short strings */ - if (size == 1) { - assert(CHARACTERS[*str & UCHAR_MAX] == NULL); - Py_INCREF(op); - CHARACTERS[*str & UCHAR_MAX] = op; - } return (PyObject *) op; } @@ -3075,17 +3062,6 @@ _PyBytes_InitTypes(PyInterpreterState *interp) } -void -_PyBytes_Fini(PyInterpreterState *interp) -{ - if (!_Py_IsMainInterpreter(interp)) { - return; - } - for (int i = 0; i < UCHAR_MAX + 1; i++) { - Py_CLEAR(CHARACTERS[i]); - } -} - /*********************** Bytes Iterator ****************************/ typedef struct { diff --git a/Python/pylifecycle.c b/Python/pylifecycle.c index 94f6f382d4b99d..d7d1d5374b3103 100644 --- a/Python/pylifecycle.c +++ b/Python/pylifecycle.c @@ -1680,7 +1680,6 @@ finalize_interp_types(PyInterpreterState *interp) _PySlice_Fini(interp); - _PyBytes_Fini(interp); _PyUnicode_Fini(interp); _PyFloat_Fini(interp); } From bdae989215cfea6d4bc7090b393959603c941151 Mon Sep 17 00:00:00 2001 From: Eric Snow Date: Mon, 13 Dec 2021 15:17:51 -0700 Subject: [PATCH 4/7] Reset the hash when re-initializing. --- Include/internal/pycore_global_objects.h | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/Include/internal/pycore_global_objects.h b/Include/internal/pycore_global_objects.h index d08121b3902787..6816a8f8e32a19 100644 --- a/Include/internal/pycore_global_objects.h +++ b/Include/internal/pycore_global_objects.h @@ -51,6 +51,13 @@ extern "C" { }, \ } +static inline void +_PyBytes_reset(PyBytesObject *op) +{ + // Force a new hash to be generated since the hash seed may have changed. + op->ob_shash = -1; +} + /********************** * the global objects * @@ -613,6 +620,10 @@ struct _Py_global_objects { static inline void _Py_global_objects_reset(struct _Py_global_objects *objects) { + _PyBytes_reset(&objects->singletons.bytes_empty); + for (int i = 0; i < UCHAR_MAX + 1; i++) { + _PyBytes_reset((PyBytesObject *)&objects->singletons.bytes_characters[i]); + } } #ifdef __cplusplus From 9c3dc1ce0f6da3e6a0f36bfa70bc9d1519ae2d6a Mon Sep 17 00:00:00 2001 From: Eric Snow Date: Tue, 14 Dec 2021 15:05:01 -0700 Subject: [PATCH 5/7] Drop _PyBytes_reset(). --- Include/internal/pycore_global_objects.h | 11 ----------- 1 file changed, 11 deletions(-) diff --git a/Include/internal/pycore_global_objects.h b/Include/internal/pycore_global_objects.h index 6816a8f8e32a19..d08121b3902787 100644 --- a/Include/internal/pycore_global_objects.h +++ b/Include/internal/pycore_global_objects.h @@ -51,13 +51,6 @@ extern "C" { }, \ } -static inline void -_PyBytes_reset(PyBytesObject *op) -{ - // Force a new hash to be generated since the hash seed may have changed. - op->ob_shash = -1; -} - /********************** * the global objects * @@ -620,10 +613,6 @@ struct _Py_global_objects { static inline void _Py_global_objects_reset(struct _Py_global_objects *objects) { - _PyBytes_reset(&objects->singletons.bytes_empty); - for (int i = 0; i < UCHAR_MAX + 1; i++) { - _PyBytes_reset((PyBytesObject *)&objects->singletons.bytes_characters[i]); - } } #ifdef __cplusplus From 86bee1c4333ed4a9b40f10087e645b4ec730535e Mon Sep 17 00:00:00 2001 From: Eric Snow Date: Mon, 10 Jan 2022 16:29:49 -0700 Subject: [PATCH 6/7] Use 255 instead of UCHAR_MAX. --- Objects/bytesobject.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/Objects/bytesobject.c b/Objects/bytesobject.c index a3591f2b30a72a..85d6912ca751fc 100644 --- a/Objects/bytesobject.c +++ b/Objects/bytesobject.c @@ -123,7 +123,7 @@ PyBytes_FromStringAndSize(const char *str, Py_ssize_t size) return NULL; } if (size == 1 && str != NULL) { - op = CHARACTER(*str & UCHAR_MAX); + op = CHARACTER(*str & 255); Py_INCREF(op); return (PyObject *)op; } @@ -159,7 +159,7 @@ PyBytes_FromString(const char *str) return bytes_new_empty(); } else if (size == 1) { - op = CHARACTER(*str & UCHAR_MAX); + op = CHARACTER(*str & 255); Py_INCREF(op); return (PyObject *)op; } From dc88ef16f5f7eb4207d9ad64188dff0bfa464bb4 Mon Sep 17 00:00:00 2001 From: Eric Snow Date: Mon, 10 Jan 2022 16:57:07 -0700 Subject: [PATCH 7/7] Factor out _PyBytes_SIMPLE_INIT(). --- Include/internal/pycore_global_objects.h | 14 +++++--------- 1 file changed, 5 insertions(+), 9 deletions(-) diff --git a/Include/internal/pycore_global_objects.h b/Include/internal/pycore_global_objects.h index d08121b3902787..d2dc907c53d6de 100644 --- a/Include/internal/pycore_global_objects.h +++ b/Include/internal/pycore_global_objects.h @@ -36,19 +36,15 @@ extern "C" { /* bytes objects */ -#define _PyBytes_EMPTY_INIT \ +#define _PyBytes_SIMPLE_INIT(CH, LEN) \ { \ - _PyVarObject_IMMORTAL_INIT(&PyBytes_Type, 0), \ + _PyVarObject_IMMORTAL_INIT(&PyBytes_Type, LEN), \ .ob_shash = -1, \ - .ob_sval[0] = '\0', \ + .ob_sval = { CH }, \ } #define _PyBytes_CHAR_INIT(CH) \ { \ - { \ - _PyVarObject_IMMORTAL_INIT(&PyBytes_Type, 1), \ - .ob_shash = -1, \ - .ob_sval = { CH }, \ - }, \ + _PyBytes_SIMPLE_INIT(CH, 1) \ } @@ -348,7 +344,7 @@ struct _Py_global_objects { _PyLong_DIGIT_INIT(256), \ }, \ \ - .bytes_empty = _PyBytes_EMPTY_INIT, \ + .bytes_empty = _PyBytes_SIMPLE_INIT(0, 0), \ .bytes_characters = { \ _PyBytes_CHAR_INIT(0), \ _PyBytes_CHAR_INIT(1), \