From c7592a911a46b4ea6f39453368c27910ccb3557d Mon Sep 17 00:00:00 2001 From: Simon Perkins Date: Fri, 20 Jan 2023 11:35:22 +0200 Subject: [PATCH 01/13] Fix exposing C++ ExtensionArrays in pyarrow --- python/pyarrow/tests/extensions.pyx | 83 +++++++++++++++++++++++++++++ python/pyarrow/tests/test_cython.py | 34 ++++++++++++ python/pyarrow/types.pxi | 42 +++++++-------- 3 files changed, 138 insertions(+), 21 deletions(-) create mode 100644 python/pyarrow/tests/extensions.pyx diff --git a/python/pyarrow/tests/extensions.pyx b/python/pyarrow/tests/extensions.pyx new file mode 100644 index 00000000000..501269b6560 --- /dev/null +++ b/python/pyarrow/tests/extensions.pyx @@ -0,0 +1,83 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +# distutils: language=c++ +# cython: language_level = 3 + +import pyarrow as pa +from pyarrow.lib cimport * + +cdef extern from * namespace "arrow::py" nogil: + """ + #include "arrow/status.h" + #include "arrow/extension_type.h" + #include "arrow/ipc/json_simple.h" + + namespace arrow { + namespace py { + + class UuidArray : public ExtensionArray { + public: + using ExtensionArray::ExtensionArray; + }; + + class UuidType : public ExtensionType { + public: + UuidType() : ExtensionType(fixed_size_binary(16)) {} + std::string extension_name() const override { return "uuid"; } + + bool ExtensionEquals(const ExtensionType& other) const override { + return other.extension_name() == this->extension_name(); + } + + std::shared_ptr MakeArray(std::shared_ptr data) const override { + return std::make_shared(data); + } + + Result> Deserialize( + std::shared_ptr storage_type, + const std::string& serialized) const override { + return std::make_shared(); + } + + std::string Serialize() const override { return ""; } + }; + + std::shared_ptr MakeUuidType() { + return std::make_shared(); + } + + std::shared_ptr MakeUuidArray() { + auto uuid_type = MakeUuidType(); + auto json = "[\\"abcdefghijklmno0\\", \\"0onmlkjihgfedcba\\"]"; + auto result = ipc::internal::json::ArrayFromJSON(fixed_size_binary(16), json); + return ExtensionType::WrapArray(uuid_type, result.ValueOrDie()); + } + + } // namespace py + } // namespace arrow + """ + + cdef shared_ptr[CDataType] CMakeUuidType" arrow::py::MakeUuidType"() + cdef shared_ptr[CArray] CMakeUuidArray" arrow::py::MakeUuidArray"() + +def _make_uuid_type(): + return pyarrow_wrap_data_type(CMakeUuidType()) + + +def _make_uuid_array(): + return pyarrow_wrap_array(CMakeUuidArray()) diff --git a/python/pyarrow/tests/test_cython.py b/python/pyarrow/tests/test_cython.py index 59875e7b011..33140cd1fa7 100644 --- a/python/pyarrow/tests/test_cython.py +++ b/python/pyarrow/tests/test_cython.py @@ -163,6 +163,40 @@ def test_cython_api(tmpdir): env=subprocess_env) +@pytest.mark.cython +def test_extension_type(tmpdir): + with tmpdir.as_cwd(): + # Set up temporary workspace + pyx_file = 'extensions.pyx' + shutil.copyfile(os.path.join(here, pyx_file), + os.path.join(str(tmpdir), pyx_file)) + # Create setup.py file + setup_code = setup_template.format(pyx_file=pyx_file, + compiler_opts=compiler_opts, + test_ld_path=test_ld_path) + with open('setup.py', 'w') as f: + f.write(setup_code) + + subprocess_env = test_util.get_modified_env_with_pythonpath() + + # Compile extension module + subprocess.check_call([sys.executable, 'setup.py', + 'build_ext', '--inplace'], + env=subprocess_env) + + sys.path.insert(0, str(tmpdir)) + mod = __import__('extensions') + + uuid_type = mod._make_uuid_type() + assert uuid_type.extension_name == "uuid" + assert uuid_type.storage_type == pa.binary(16) + + array = mod._make_uuid_array() + assert array.to_pylist() == [b'abcdefghijklmno0', b'0onmlkjihgfedcba'] + assert array[0].as_py() == b'abcdefghijklmno0' + assert array[1].as_py() == b'0onmlkjihgfedcba' + + @pytest.mark.cython def test_visit_strings(tmpdir): with tmpdir.as_cwd(): diff --git a/python/pyarrow/types.pxi b/python/pyarrow/types.pxi index dc74c121e6f..26f74a37bff 100644 --- a/python/pyarrow/types.pxi +++ b/python/pyarrow/types.pxi @@ -836,6 +836,27 @@ cdef class BaseExtensionType(DataType): DataType.init(self, type) self.ext_type = type.get() + def __arrow_ext_class__(self): + """Return an extension array class to be used for building or + deserializing arrays with this extension type. + + This method should return a subclass of the ExtensionArray class. By + default, if not specialized in the extension implementation, an + extension type array will be a built-in ExtensionArray instance. + """ + return ExtensionArray + + def __arrow_ext_scalar_class__(self): + """Return an extension scalar class for building scalars with this + extension type. + + This method should return subclass of the ExtensionScalar class. By + default, if not specialized in the extension implementation, an + extension type scalar will be a built-in ExtensionScalar instance. + """ + return ExtensionScalar + + @property def extension_name(self): """ @@ -968,27 +989,6 @@ cdef class ExtensionType(BaseExtensionType): """ return NotImplementedError - def __arrow_ext_class__(self): - """Return an extension array class to be used for building or - deserializing arrays with this extension type. - - This method should return a subclass of the ExtensionArray class. By - default, if not specialized in the extension implementation, an - extension type array will be a built-in ExtensionArray instance. - """ - return ExtensionArray - - def __arrow_ext_scalar_class__(self): - """Return an extension scalar class for building scalars with this - extension type. - - This method should return subclass of the ExtensionScalar class. By - default, if not specialized in the extension implementation, an - extension type scalar will be a built-in ExtensionScalar instance. - """ - return ExtensionScalar - - cdef class PyExtensionType(ExtensionType): """ Concrete base class for Python-defined extension types based on pickle From 6be252d3750505cca748c67572aac3cb62b29b4c Mon Sep 17 00:00:00 2001 From: Simon Perkins Date: Fri, 20 Jan 2023 12:05:50 +0200 Subject: [PATCH 02/13] fix linting errors --- python/pyarrow/tests/extensions.pyx | 1 + python/pyarrow/types.pxi | 1 - 2 files changed, 1 insertion(+), 1 deletion(-) diff --git a/python/pyarrow/tests/extensions.pyx b/python/pyarrow/tests/extensions.pyx index 501269b6560..f2dd6109538 100644 --- a/python/pyarrow/tests/extensions.pyx +++ b/python/pyarrow/tests/extensions.pyx @@ -75,6 +75,7 @@ cdef extern from * namespace "arrow::py" nogil: cdef shared_ptr[CDataType] CMakeUuidType" arrow::py::MakeUuidType"() cdef shared_ptr[CArray] CMakeUuidArray" arrow::py::MakeUuidArray"() + def _make_uuid_type(): return pyarrow_wrap_data_type(CMakeUuidType()) diff --git a/python/pyarrow/types.pxi b/python/pyarrow/types.pxi index 26f74a37bff..741f52edd7a 100644 --- a/python/pyarrow/types.pxi +++ b/python/pyarrow/types.pxi @@ -856,7 +856,6 @@ cdef class BaseExtensionType(DataType): """ return ExtensionScalar - @property def extension_name(self): """ From d82ecfcff48389f18ffe545d76cb169d560c5122 Mon Sep 17 00:00:00 2001 From: Simon Perkins Date: Fri, 10 Feb 2023 13:24:40 +0200 Subject: [PATCH 03/13] Return __arrow_ext_class__ and __arrow_ext_scalar_class__ methods to ExtensionType as overrides --- python/pyarrow/types.pxi | 40 ++++++++++++++++++++++++++-------------- 1 file changed, 26 insertions(+), 14 deletions(-) diff --git a/python/pyarrow/types.pxi b/python/pyarrow/types.pxi index 741f52edd7a..133177937f0 100644 --- a/python/pyarrow/types.pxi +++ b/python/pyarrow/types.pxi @@ -837,22 +837,14 @@ cdef class BaseExtensionType(DataType): self.ext_type = type.get() def __arrow_ext_class__(self): - """Return an extension array class to be used for building or - deserializing arrays with this extension type. - - This method should return a subclass of the ExtensionArray class. By - default, if not specialized in the extension implementation, an - extension type array will be a built-in ExtensionArray instance. + """ + The associated array extension class """ return ExtensionArray def __arrow_ext_scalar_class__(self): - """Return an extension scalar class for building scalars with this - extension type. - - This method should return subclass of the ExtensionScalar class. By - default, if not specialized in the extension implementation, an - extension type scalar will be a built-in ExtensionScalar instance. + """ + The associated scalar class """ return ExtensionScalar @@ -964,6 +956,26 @@ cdef class ExtensionType(BaseExtensionType): fmt = '{0.__class__.__name__}({1})' return fmt.format(self, repr(self.storage_type)) + def __arrow_ext_class__(self): + """Return an extension array class to be used for building or + deserializing arrays with this extension type. + + This method should return a subclass of the ExtensionArray class. By + default, if not specialized in the extension implementation, an + extension type array will be a built-in ExtensionArray instance. + """ + return ExtensionArray + + def __arrow_ext_scalar_class__(self): + """Return an extension scalar class for building scalars with this + extension type. + + This method should return subclass of the ExtensionScalar class. By + default, if not specialized in the extension implementation, an + extension type scalar will be a built-in ExtensionScalar instance. + """ + return ExtensionScalar + def __arrow_ext_serialize__(self): """ Serialized representation of metadata to reconstruct the type object. @@ -1450,8 +1462,8 @@ cdef class Schema(_Weakrefable): """ A named collection of types a.k.a schema. A schema defines the column names and types in a record batch or table data structure. - They also contain metadata about the columns. For example, schemas - converted from Pandas contain metadata about their original Pandas + They also contain metadata about the columns. For example, schemas + converted from Pandas contain metadata about their original Pandas types so they can be converted back to the same types. Warnings From f21c427814bfbdd68c651c361bedd0c4a30dcb13 Mon Sep 17 00:00:00 2001 From: Simon Perkins Date: Fri, 10 Feb 2023 13:25:20 +0200 Subject: [PATCH 04/13] Move extension type test to test_extension_type.py, and rename to test_cpp_extension_in_python --- python/pyarrow/tests/test_cython.py | 32 ----------------- python/pyarrow/tests/test_extension_type.py | 39 +++++++++++++++++++++ 2 files changed, 39 insertions(+), 32 deletions(-) diff --git a/python/pyarrow/tests/test_cython.py b/python/pyarrow/tests/test_cython.py index 33140cd1fa7..624d8f38802 100644 --- a/python/pyarrow/tests/test_cython.py +++ b/python/pyarrow/tests/test_cython.py @@ -163,38 +163,6 @@ def test_cython_api(tmpdir): env=subprocess_env) -@pytest.mark.cython -def test_extension_type(tmpdir): - with tmpdir.as_cwd(): - # Set up temporary workspace - pyx_file = 'extensions.pyx' - shutil.copyfile(os.path.join(here, pyx_file), - os.path.join(str(tmpdir), pyx_file)) - # Create setup.py file - setup_code = setup_template.format(pyx_file=pyx_file, - compiler_opts=compiler_opts, - test_ld_path=test_ld_path) - with open('setup.py', 'w') as f: - f.write(setup_code) - - subprocess_env = test_util.get_modified_env_with_pythonpath() - - # Compile extension module - subprocess.check_call([sys.executable, 'setup.py', - 'build_ext', '--inplace'], - env=subprocess_env) - - sys.path.insert(0, str(tmpdir)) - mod = __import__('extensions') - - uuid_type = mod._make_uuid_type() - assert uuid_type.extension_name == "uuid" - assert uuid_type.storage_type == pa.binary(16) - - array = mod._make_uuid_array() - assert array.to_pylist() == [b'abcdefghijklmno0', b'0onmlkjihgfedcba'] - assert array[0].as_py() == b'abcdefghijklmno0' - assert array[1].as_py() == b'0onmlkjihgfedcba' @pytest.mark.cython diff --git a/python/pyarrow/tests/test_extension_type.py b/python/pyarrow/tests/test_extension_type.py index f5723491cb4..8f380d5144e 100644 --- a/python/pyarrow/tests/test_extension_type.py +++ b/python/pyarrow/tests/test_extension_type.py @@ -15,9 +15,13 @@ # specific language governing permissions and limitations # under the License. +import os import pickle +import shutil +import subprocess import weakref from uuid import uuid4, UUID +import sys import numpy as np import pyarrow as pa @@ -1079,3 +1083,38 @@ def test_array_constructor_from_pandas(): pd.Series([1, 2, 3], dtype="category"), type=IntegerType() ) assert result.equals(expected) + + +@pytest.mark.cython +def test_cpp_extension_in_python(tmpdir): + from .test_cython import setup_template, compiler_opts, test_ld_path, test_util, here + with tmpdir.as_cwd(): + # Set up temporary workspace + pyx_file = 'extensions.pyx' + shutil.copyfile(os.path.join(here, pyx_file), + os.path.join(str(tmpdir), pyx_file)) + # Create setup.py file + setup_code = setup_template.format(pyx_file=pyx_file, + compiler_opts=compiler_opts, + test_ld_path=test_ld_path) + with open('setup.py', 'w') as f: + f.write(setup_code) + + subprocess_env = test_util.get_modified_env_with_pythonpath() + + # Compile extension module + subprocess.check_call([sys.executable, 'setup.py', + 'build_ext', '--inplace'], + env=subprocess_env) + + sys.path.insert(0, str(tmpdir)) + mod = __import__('extensions') + + uuid_type = mod._make_uuid_type() + assert uuid_type.extension_name == "uuid" + assert uuid_type.storage_type == pa.binary(16) + + array = mod._make_uuid_array() + assert array.to_pylist() == [b'abcdefghijklmno0', b'0onmlkjihgfedcba'] + assert array[0].as_py() == b'abcdefghijklmno0' + assert array[1].as_py() == b'0onmlkjihgfedcba' From 98bbb4abb8cc66de369930af4c2b67564bae44bf Mon Sep 17 00:00:00 2001 From: Simon Perkins Date: Fri, 10 Feb 2023 13:29:15 +0200 Subject: [PATCH 05/13] assert array.type == uuid_type --- python/pyarrow/tests/test_extension_type.py | 1 + 1 file changed, 1 insertion(+) diff --git a/python/pyarrow/tests/test_extension_type.py b/python/pyarrow/tests/test_extension_type.py index 8f380d5144e..ec8d3d9a83e 100644 --- a/python/pyarrow/tests/test_extension_type.py +++ b/python/pyarrow/tests/test_extension_type.py @@ -1115,6 +1115,7 @@ def test_cpp_extension_in_python(tmpdir): assert uuid_type.storage_type == pa.binary(16) array = mod._make_uuid_array() + assert array.type == uuid_type assert array.to_pylist() == [b'abcdefghijklmno0', b'0onmlkjihgfedcba'] assert array[0].as_py() == b'abcdefghijklmno0' assert array[1].as_py() == b'0onmlkjihgfedcba' From 16c5e5ae338db4718447aec52c7d65f839859f01 Mon Sep 17 00:00:00 2001 From: Simon Perkins Date: Fri, 10 Feb 2023 13:41:03 +0200 Subject: [PATCH 06/13] Lint --- python/pyarrow/tests/test_cython.py | 2 -- 1 file changed, 2 deletions(-) diff --git a/python/pyarrow/tests/test_cython.py b/python/pyarrow/tests/test_cython.py index 624d8f38802..59875e7b011 100644 --- a/python/pyarrow/tests/test_cython.py +++ b/python/pyarrow/tests/test_cython.py @@ -163,8 +163,6 @@ def test_cython_api(tmpdir): env=subprocess_env) - - @pytest.mark.cython def test_visit_strings(tmpdir): with tmpdir.as_cwd(): From e6664875f4de3768ac4e904153bfa6c94c038df1 Mon Sep 17 00:00:00 2001 From: Simon Perkins Date: Fri, 10 Feb 2023 13:41:37 +0200 Subject: [PATCH 07/13] lint --- python/pyarrow/tests/test_extension_type.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/python/pyarrow/tests/test_extension_type.py b/python/pyarrow/tests/test_extension_type.py index ec8d3d9a83e..43bf15ec885 100644 --- a/python/pyarrow/tests/test_extension_type.py +++ b/python/pyarrow/tests/test_extension_type.py @@ -1087,7 +1087,8 @@ def test_array_constructor_from_pandas(): @pytest.mark.cython def test_cpp_extension_in_python(tmpdir): - from .test_cython import setup_template, compiler_opts, test_ld_path, test_util, here + from .test_cython import ( + setup_template, compiler_opts, test_ld_path, test_util, here) with tmpdir.as_cwd(): # Set up temporary workspace pyx_file = 'extensions.pyx' From 5e9db7773966d83f173c339d4dff4e3e74ef1183 Mon Sep 17 00:00:00 2001 From: Simon Perkins Date: Fri, 10 Feb 2023 13:42:05 +0200 Subject: [PATCH 08/13] Add IPC serialisation testing --- python/pyarrow/tests/test_extension_type.py | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/python/pyarrow/tests/test_extension_type.py b/python/pyarrow/tests/test_extension_type.py index 43bf15ec885..2d1dc0c469b 100644 --- a/python/pyarrow/tests/test_extension_type.py +++ b/python/pyarrow/tests/test_extension_type.py @@ -1120,3 +1120,10 @@ def test_cpp_extension_in_python(tmpdir): assert array.to_pylist() == [b'abcdefghijklmno0', b'0onmlkjihgfedcba'] assert array[0].as_py() == b'abcdefghijklmno0' assert array[1].as_py() == b'0onmlkjihgfedcba' + + buf = ipc_write_batch(pa.RecordBatch.from_arrays([array], ["uuid"])) + del array + + batch = ipc_read_batch(buf) + array = batch.column(0) + assert array.type == uuid_type From dd87d5e7433887067a4911c7d0193600032758d1 Mon Sep 17 00:00:00 2001 From: Simon Perkins Date: Fri, 10 Feb 2023 13:52:35 +0200 Subject: [PATCH 09/13] Return dunder methods to original location in file --- python/pyarrow/types.pxi | 40 ++++++++++++++++++++-------------------- 1 file changed, 20 insertions(+), 20 deletions(-) diff --git a/python/pyarrow/types.pxi b/python/pyarrow/types.pxi index 133177937f0..29b397c0425 100644 --- a/python/pyarrow/types.pxi +++ b/python/pyarrow/types.pxi @@ -956,26 +956,6 @@ cdef class ExtensionType(BaseExtensionType): fmt = '{0.__class__.__name__}({1})' return fmt.format(self, repr(self.storage_type)) - def __arrow_ext_class__(self): - """Return an extension array class to be used for building or - deserializing arrays with this extension type. - - This method should return a subclass of the ExtensionArray class. By - default, if not specialized in the extension implementation, an - extension type array will be a built-in ExtensionArray instance. - """ - return ExtensionArray - - def __arrow_ext_scalar_class__(self): - """Return an extension scalar class for building scalars with this - extension type. - - This method should return subclass of the ExtensionScalar class. By - default, if not specialized in the extension implementation, an - extension type scalar will be a built-in ExtensionScalar instance. - """ - return ExtensionScalar - def __arrow_ext_serialize__(self): """ Serialized representation of metadata to reconstruct the type object. @@ -1000,6 +980,26 @@ cdef class ExtensionType(BaseExtensionType): """ return NotImplementedError + def __arrow_ext_class__(self): + """Return an extension array class to be used for building or + deserializing arrays with this extension type. + + This method should return a subclass of the ExtensionArray class. By + default, if not specialized in the extension implementation, an + extension type array will be a built-in ExtensionArray instance. + """ + return ExtensionArray + + def __arrow_ext_scalar_class__(self): + """Return an extension scalar class for building scalars with this + extension type. + + This method should return subclass of the ExtensionScalar class. By + default, if not specialized in the extension implementation, an + extension type scalar will be a built-in ExtensionScalar instance. + """ + return ExtensionScalar + cdef class PyExtensionType(ExtensionType): """ Concrete base class for Python-defined extension types based on pickle From cc72ec17ed5bb84b86b38ba914c67763de9db336 Mon Sep 17 00:00:00 2001 From: Simon Perkins Date: Fri, 10 Feb 2023 15:59:58 +0200 Subject: [PATCH 10/13] Register the extension type --- python/pyarrow/tests/test_extension_type.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/python/pyarrow/tests/test_extension_type.py b/python/pyarrow/tests/test_extension_type.py index 2d1dc0c469b..40244eeeda3 100644 --- a/python/pyarrow/tests/test_extension_type.py +++ b/python/pyarrow/tests/test_extension_type.py @@ -1121,6 +1121,8 @@ def test_cpp_extension_in_python(tmpdir): assert array[0].as_py() == b'abcdefghijklmno0' assert array[1].as_py() == b'0onmlkjihgfedcba' + pa.register_extension_type(uuid_type) + buf = ipc_write_batch(pa.RecordBatch.from_arrays([array], ["uuid"])) del array From a17e926162c9ed6f3abb5a0b5b42e35f1fa79bc0 Mon Sep 17 00:00:00 2001 From: Simon Perkins Date: Fri, 10 Feb 2023 16:12:24 +0200 Subject: [PATCH 11/13] Register UuidType at the C++ layer --- python/pyarrow/tests/extensions.pyx | 10 ++++++++++ python/pyarrow/tests/test_extension_type.py | 3 +-- 2 files changed, 11 insertions(+), 2 deletions(-) diff --git a/python/pyarrow/tests/extensions.pyx b/python/pyarrow/tests/extensions.pyx index f2dd6109538..4fd98650adb 100644 --- a/python/pyarrow/tests/extensions.pyx +++ b/python/pyarrow/tests/extensions.pyx @@ -57,10 +57,15 @@ cdef extern from * namespace "arrow::py" nogil: std::string Serialize() const override { return ""; } }; + std::shared_ptr MakeUuidType() { return std::make_shared(); } + void RegisterUuidType() { + RegisterExtensionType(std::make_shared()); + } + std::shared_ptr MakeUuidArray() { auto uuid_type = MakeUuidType(); auto json = "[\\"abcdefghijklmno0\\", \\"0onmlkjihgfedcba\\"]"; @@ -74,6 +79,11 @@ cdef extern from * namespace "arrow::py" nogil: cdef shared_ptr[CDataType] CMakeUuidType" arrow::py::MakeUuidType"() cdef shared_ptr[CArray] CMakeUuidArray" arrow::py::MakeUuidArray"() + cdef void CRegisterUuidType" arrow::py::RegisterUuidType"() + + +def _register_uuid_type(): + CRegisterUuidType() def _make_uuid_type(): diff --git a/python/pyarrow/tests/test_extension_type.py b/python/pyarrow/tests/test_extension_type.py index 40244eeeda3..50953ed63ae 100644 --- a/python/pyarrow/tests/test_extension_type.py +++ b/python/pyarrow/tests/test_extension_type.py @@ -1111,6 +1111,7 @@ def test_cpp_extension_in_python(tmpdir): sys.path.insert(0, str(tmpdir)) mod = __import__('extensions') + mod._register_uuid_type() uuid_type = mod._make_uuid_type() assert uuid_type.extension_name == "uuid" assert uuid_type.storage_type == pa.binary(16) @@ -1121,8 +1122,6 @@ def test_cpp_extension_in_python(tmpdir): assert array[0].as_py() == b'abcdefghijklmno0' assert array[1].as_py() == b'0onmlkjihgfedcba' - pa.register_extension_type(uuid_type) - buf = ipc_write_batch(pa.RecordBatch.from_arrays([array], ["uuid"])) del array From 43eb94ea1d9b741275668691fca868194e5ee8a0 Mon Sep 17 00:00:00 2001 From: Simon Perkins Date: Fri, 10 Feb 2023 16:18:36 +0200 Subject: [PATCH 12/13] Strengthen IPC test case --- python/pyarrow/tests/test_extension_type.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/python/pyarrow/tests/test_extension_type.py b/python/pyarrow/tests/test_extension_type.py index 50953ed63ae..04ef88acd58 100644 --- a/python/pyarrow/tests/test_extension_type.py +++ b/python/pyarrow/tests/test_extension_type.py @@ -1123,8 +1123,8 @@ def test_cpp_extension_in_python(tmpdir): assert array[1].as_py() == b'0onmlkjihgfedcba' buf = ipc_write_batch(pa.RecordBatch.from_arrays([array], ["uuid"])) - del array batch = ipc_read_batch(buf) - array = batch.column(0) - assert array.type == uuid_type + reconstructed_array = batch.column(0) + assert reconstructed_array.type == uuid_type + assert reconstructed_array == array From dbe302a9b354a893335512af1cb5c757d1ab7b00 Mon Sep 17 00:00:00 2001 From: Simon Perkins Date: Fri, 10 Feb 2023 16:30:16 +0200 Subject: [PATCH 13/13] Register UuidExtensionType statically --- python/pyarrow/tests/extensions.pyx | 19 ++++++++++--------- python/pyarrow/tests/test_extension_type.py | 1 - 2 files changed, 10 insertions(+), 10 deletions(-) diff --git a/python/pyarrow/tests/extensions.pyx b/python/pyarrow/tests/extensions.pyx index 4fd98650adb..30598ce0bfa 100644 --- a/python/pyarrow/tests/extensions.pyx +++ b/python/pyarrow/tests/extensions.pyx @@ -62,10 +62,6 @@ cdef extern from * namespace "arrow::py" nogil: return std::make_shared(); } - void RegisterUuidType() { - RegisterExtensionType(std::make_shared()); - } - std::shared_ptr MakeUuidArray() { auto uuid_type = MakeUuidType(); auto json = "[\\"abcdefghijklmno0\\", \\"0onmlkjihgfedcba\\"]"; @@ -73,17 +69,22 @@ cdef extern from * namespace "arrow::py" nogil: return ExtensionType::WrapArray(uuid_type, result.ValueOrDie()); } + std::once_flag uuid_registered; + + static bool RegisterUuidType() { + std::call_once(uuid_registered, RegisterExtensionType, + std::make_shared()); + return true; + } + + static auto uuid_type_registered = RegisterUuidType(); + } // namespace py } // namespace arrow """ cdef shared_ptr[CDataType] CMakeUuidType" arrow::py::MakeUuidType"() cdef shared_ptr[CArray] CMakeUuidArray" arrow::py::MakeUuidArray"() - cdef void CRegisterUuidType" arrow::py::RegisterUuidType"() - - -def _register_uuid_type(): - CRegisterUuidType() def _make_uuid_type(): diff --git a/python/pyarrow/tests/test_extension_type.py b/python/pyarrow/tests/test_extension_type.py index 04ef88acd58..fa7ece5bc24 100644 --- a/python/pyarrow/tests/test_extension_type.py +++ b/python/pyarrow/tests/test_extension_type.py @@ -1111,7 +1111,6 @@ def test_cpp_extension_in_python(tmpdir): sys.path.insert(0, str(tmpdir)) mod = __import__('extensions') - mod._register_uuid_type() uuid_type = mod._make_uuid_type() assert uuid_type.extension_name == "uuid" assert uuid_type.storage_type == pa.binary(16)