From c85d6614b8ea477d836af40002f8f65494522add Mon Sep 17 00:00:00 2001 From: Hans Johnson Date: Thu, 9 Apr 2026 06:20:07 -0500 Subject: [PATCH] ENH: Add PEP 688 buffer protocol and fix np.asarray() lifetime safety Add zero-copy data export to all wrapped itk.Image types via three protocols, ensuring the exported array remains valid even after the source image is deleted: image = itk.imread("brain.nii.gz") arr = np.asarray(image) del image print(arr[1,1,1]) # safe -- no crash Protocol dispatch by Python version: 3.12+: np.asarray -> __buffer__ (PEP 688, zero-copy, memoryview pins self via NDArrayITKBase intermediary) 3.10-11: np.asarray -> __array_interface__ (zero-copy, NumPy sets arr.base = self, preventing image GC) All versions: image.__array__() returns NDArrayITKBase with .itk_base = image for explicit __array__ calls. Changes to pyBase.i: - Add __buffer__() implementing PEP 688 buffer export with shaped memoryview. Uses NDArrayITKBase as intermediary to hold a Python reference to the image, preventing GC while any derived memoryview/array exists. flags parameter accepted for PEP 688 compliance but not inspected (ITK buffers always writable). - Add __array_interface__ property returning NumPy v3 array interface dict. NumPy sets arr.base = self when creating arrays from this interface, providing correct lifetime on 3.10-3.11. - Simplify __array__() to always return zero-copy view via array_view_from_image(). Supports NumPy 2.0 copy= parameter. copy=True returns a plain ndarray so image can be GCd. Changes to PyBuffer.i.init: - Add _BUFFER_FORMAT_MAP constant (before function definition) - Add _get_buffer_formatstring() with descriptive KeyError - Add _NUMPY_PIXELID_MAP and _get_numpy_pixelid() - Remove LD (long double) mapping (silent corruption) Tested: Python 3.10-3.14, all pass (0 failures). Supersedes #6020, #6018, #5673, #5665. Addresses @thewtex (del image crash), @blowekamp (ref pinning). Co-Authored-By: Hans J. Johnson (cherry picked from commit 45a50960da6653d3ba2cfc13d8c78e0219ca709f) --- Modules/Bridge/NumPy/wrapping/PyBuffer.i.init | 112 ++++++++++--- Wrapping/Generators/Python/PyBase/pyBase.i | 150 +++++++++++++++++- 2 files changed, 233 insertions(+), 29 deletions(-) diff --git a/Modules/Bridge/NumPy/wrapping/PyBuffer.i.init b/Modules/Bridge/NumPy/wrapping/PyBuffer.i.init index 8f502c6fd66..62a4c3730c5 100644 --- a/Modules/Bridge/NumPy/wrapping/PyBuffer.i.init +++ b/Modules/Bridge/NumPy/wrapping/PyBuffer.i.init @@ -29,31 +29,95 @@ else: loads = dask_deserialize.dispatch(np.ndarray) return NDArrayITKBase(loads(header, frames)) +# Module-level constant — built once at import time. +# Composite pixel types (RGB, Vector, etc.) are decomposed to their +# scalar component type before reaching _get_buffer_formatstring(), +# so only scalar codes appear here. +# Platform aliases (ST/IT/OT) resolve to UL/SL at the wrapping level. +import os as _os +_BUFFER_FORMAT_MAP = { + # --- integer types --- + "B": "?", # bool + "UC": "B", # unsigned char -> uint8 + "US": "H", # unsigned short -> uint16 + "UI": "I", # unsigned int -> uint32 + "UL": "L", # unsigned long -> platform (8 bytes Linux/macOS, 4 bytes Windows) + "ULL": "Q", # unsigned long long -> uint64 + "SC": "b", # signed char -> int8 + "SS": "h", # signed short -> int16 + "SI": "i", # signed int -> int32 + "SL": "l", # signed long -> platform + "SLL": "q", # signed long long -> int64 + # --- floating point types --- + "F": "f", # float -> float32 + "D": "d", # double -> float64 + # "LD" (long double) intentionally omitted: sizeof(long double) + # is 16 bytes on Linux/macOS x86-64 but struct "d" is 8 bytes. + # Casting would silently corrupt the buffer. +} + +def _get_buffer_formatstring(itk_pixel_code: str) -> str: + """Return the struct format character for an ITK pixel type code. + + Used by the PEP 688 ``__buffer__`` protocol implementation on + ``itk.Image`` to describe the element type of the exported + memoryview. Format characters follow Python's ``struct`` module + specification. + + Parameters + ---------- + itk_pixel_code : str + Short name of the ITK component type, e.g. ``"UC"``, ``"F"``. + + Returns + ------- + str + Single-character ``struct`` format string. + + Raises + ------ + KeyError + If ``itk_pixel_code`` is not a supported scalar type. + """ + try: + return _BUFFER_FORMAT_MAP[itk_pixel_code] + except KeyError: + raise KeyError( + f"Unsupported ITK pixel type code {itk_pixel_code!r} for " + f"buffer export. Supported codes: " + f"{sorted(_BUFFER_FORMAT_MAP)}" + ) from None +if _os.name == 'nt': + # On Windows, C ``long`` is 32-bit + _BUFFER_FORMAT_MAP['UL'] = 'I' + _BUFFER_FORMAT_MAP['SL'] = 'i' + + def _get_numpy_pixelid(itk_Image_type) -> np.dtype: """Returns a ITK PixelID given a numpy array.""" - # This is a Mapping from numpy array types to itk pixel types. - _np_itk = {"UC":np.dtype(np.uint8), - "US":np.dtype(np.uint16), - "UI":np.dtype(np.uint32), - "UL":np.dtype(np.uint64), - "ULL":np.dtype(np.uint64), - "SC":np.dtype(np.int8), - "SS":np.dtype(np.int16), - "SI":np.dtype(np.int32), - "SL":np.dtype(np.int64), - "SLL":np.dtype(np.int64), - "F":np.dtype(np.float32), - "D":np.dtype(np.float64), - "PF2":np.dtype(np.float32), - "PF3":np.dtype(np.float32), - } - import os - if os.name == 'nt': - _np_itk['UL'] = np.dtype(np.uint32) - _np_itk['SL'] = np.dtype(np.int32) - try: - return _np_itk[itk_Image_type] - except KeyError as e: - raise e + return _NUMPY_PIXELID_MAP[itk_Image_type] + + +# Module-level constant — built once at import time. +_NUMPY_PIXELID_MAP = { + "B": np.dtype(np.bool_), + "UC": np.dtype(np.uint8), + "US": np.dtype(np.uint16), + "UI": np.dtype(np.uint32), + "UL": np.dtype(np.uint64), + "ULL": np.dtype(np.uint64), + "SC": np.dtype(np.int8), + "SS": np.dtype(np.int16), + "SI": np.dtype(np.int32), + "SL": np.dtype(np.int64), + "SLL": np.dtype(np.int64), + "F": np.dtype(np.float32), + "D": np.dtype(np.float64), + "PF2": np.dtype(np.float32), + "PF3": np.dtype(np.float32), +} +if _os.name == 'nt': + _NUMPY_PIXELID_MAP['UL'] = np.dtype(np.uint32) + _NUMPY_PIXELID_MAP['SL'] = np.dtype(np.int32) %} diff --git a/Wrapping/Generators/Python/PyBase/pyBase.i b/Wrapping/Generators/Python/PyBase/pyBase.i index a2c437f5961..057f437b9ae 100644 --- a/Wrapping/Generators/Python/PyBase/pyBase.i +++ b/Wrapping/Generators/Python/PyBase/pyBase.i @@ -688,13 +688,153 @@ str = str %define DECL_PYTHON_IMAGE_CLASS(swig_name) %extend swig_name { - %pythoncode { - def __array__(self, dtype=None): + %pythoncode %{ + def __buffer__(self, flags=0, /): + """PEP 688 buffer protocol -- export image data as a memoryview. + + On Python 3.12+ this is called automatically by + ``memoryview(image)`` and ``numpy.asarray(image)``. + On Python 3.10-3.11 it can be called explicitly. + + The returned memoryview shares memory with the image + (zero-copy). A reference to the image is stored on the + returned object to prevent garbage collection while the + buffer is in use. + + ``flags`` is accepted for PEP 688 compliance but not + inspected: ITK image buffers are always writable, so all + flag combinations (including PyBUF_WRITABLE) are satisfied. + """ import itk import numpy as np - array = itk.array_from_image(self) - return np.asarray(array, dtype=dtype) - } + from itk.itkPyBufferPython import _get_buffer_formatstring + + # Get 1-D raw memoryview from the C++ buffer + ImageType = type(self) + PyBufferType = itk.PyBuffer[ImageType] + raw_memview = PyBufferType._GetArrayViewFromImage(self) + + # Build shape in C-order (NumPy convention: [z, y, x, ...]) + itksize = self.GetBufferedRegion().GetSize() + shape = [int(itksize[d]) for d in range(len(itksize))] + + n_components = self.GetNumberOfComponentsPerPixel() + if n_components > 1: + shape.insert(0, n_components) + + shape.reverse() + + # Determine the struct format character for the component type + tpl = itk.template(self) + pixel_type = tpl[1][0] + from itk.support.types import itkCType + if isinstance(pixel_type, itkCType): + # Scalar pixel (UC, F, SS, etc.) + component_code = pixel_type.short_name + else: + # Composite pixel (RGB, Vector, etc.) -- use component type + pixel_tpl = itk.template(pixel_type) + component_code = pixel_tpl[1][0].short_name + + fmt = _get_buffer_formatstring(component_code) + + # Build a NumPy array view that holds a reference to self + # via NDArrayITKBase.itk_base, preventing the image from + # being garbage collected while any memoryview or array + # derived from this buffer exists. + from itk.itkPyBufferPython import NDArrayITKBase + flat = np.frombuffer(raw_memview, dtype=fmt) + shaped = NDArrayITKBase(flat.reshape(shape), self) + return memoryview(shaped) + + def _get_array_interface(self): + """NumPy array interface (v3) -- zero-copy on all versions. + + When NumPy creates an array from ``__array_interface__``, + it sets ``arr.base = self`` (the image), which prevents + the image from being garbage collected while the array + exists. This is the correct lifetime behavior for + ``np.asarray(image)`` on all Python versions. + + On Python 3.12+, NumPy prefers ``__buffer__`` (PEP 688) + over this interface, which also provides correct lifetime + via the NDArrayITKBase intermediary. + """ + import itk + import numpy as np + from itk.itkPyBufferPython import _get_numpy_pixelid + + ImageType = type(self) + PyBufferType = itk.PyBuffer[ImageType] + raw_memview = PyBufferType._GetArrayViewFromImage(self) + + # Shape in C-order (NumPy convention: [z, y, x]) + itksize = self.GetBufferedRegion().GetSize() + shape = tuple(int(itksize[d]) for d in reversed(range(len(itksize)))) + + n_components = self.GetNumberOfComponentsPerPixel() + if n_components > 1: + shape = shape + (n_components,) + + # Resolve component type code + tpl = itk.template(self) + pixel_type = tpl[1][0] + from itk.support.types import itkCType + if isinstance(pixel_type, itkCType): + component_code = pixel_type.short_name + else: + pixel_tpl = itk.template(pixel_type) + component_code = pixel_tpl[1][0].short_name + + dtype = _get_numpy_pixelid(component_code) + np_arr = np.asarray(raw_memview) + data_ptr = np_arr.__array_interface__['data'][0] + + return { + 'version': 3, + 'shape': shape, + 'typestr': dtype.str, + 'data': (data_ptr, False), + 'strides': None, + } + __array_interface__ = property(_get_array_interface) + + def __array__(self, dtype=None, copy=None): + """NumPy array protocol -- zero-copy view of image data. + + On Python 3.12+, NumPy prefers ``__buffer__`` (PEP 688) + over this method. On Python 3.10-3.11, NumPy uses + ``__array_interface__`` (which sets arr.base = self) for + ``np.asarray()``, so this method is only called for + explicit ``image.__array__()`` or ``np.array(image)``. + + Parameters + ---------- + dtype : numpy dtype, optional + If specified and different from the image dtype, + a copy is made with the requested dtype. + copy : bool or None, optional (NumPy 2.0+) + ``None``/``False``: return zero-copy view. + ``True``: return an independent copy. + """ + import itk + import numpy as np + + # Zero-copy view with reference to self via NDArrayITKBase + array = itk.array_view_from_image(self) + + if dtype is not None: + if copy is False and np.dtype(dtype) != array.dtype: + raise ValueError( + "Unable to avoid copy: dtype conversion from " + f"{array.dtype} to {np.dtype(dtype)} requires " + "a copy." + ) + array = np.asarray(array, dtype=dtype) + if copy: + array = np.array(array, copy=True) + return array + %} } %enddef