Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
95 changes: 95 additions & 0 deletions python/pyarrow/tests/extensions.pyx
Original file line number Diff line number Diff line change
@@ -0,0 +1,95 @@
# Licensed to the Apache Software Foundation (ASF) under one
# or more contributor license agreements. See the NOTICE file
# distributed with this work for additional information
# regarding copyright ownership. The ASF licenses this file
# to you under the Apache License, Version 2.0 (the
# "License"); you may not use this file except in compliance
# with the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
# KIND, either express or implied. See the License for the
# specific language governing permissions and limitations
# under the License.

# distutils: language=c++
# cython: language_level = 3

import pyarrow as pa
from pyarrow.lib cimport *

cdef extern from * namespace "arrow::py" nogil:
"""
#include "arrow/status.h"
#include "arrow/extension_type.h"
#include "arrow/ipc/json_simple.h"

namespace arrow {
namespace py {

class UuidArray : public ExtensionArray {
public:
using ExtensionArray::ExtensionArray;
};

class UuidType : public ExtensionType {
public:
UuidType() : ExtensionType(fixed_size_binary(16)) {}
std::string extension_name() const override { return "uuid"; }

bool ExtensionEquals(const ExtensionType& other) const override {
return other.extension_name() == this->extension_name();
}

std::shared_ptr<Array> MakeArray(std::shared_ptr<ArrayData> data) const override {
return std::make_shared<ExtensionArray>(data);
}

Result<std::shared_ptr<DataType>> Deserialize(
std::shared_ptr<DataType> storage_type,
const std::string& serialized) const override {
return std::make_shared<UuidType>();
}

std::string Serialize() const override { return ""; }
};


std::shared_ptr<DataType> MakeUuidType() {
return std::make_shared<UuidType>();
}

std::shared_ptr<Array> MakeUuidArray() {
auto uuid_type = MakeUuidType();
auto json = "[\\"abcdefghijklmno0\\", \\"0onmlkjihgfedcba\\"]";
auto result = ipc::internal::json::ArrayFromJSON(fixed_size_binary(16), json);
return ExtensionType::WrapArray(uuid_type, result.ValueOrDie());
}

std::once_flag uuid_registered;

static bool RegisterUuidType() {
std::call_once(uuid_registered, RegisterExtensionType,
std::make_shared<UuidType>());
return true;
}

static auto uuid_type_registered = RegisterUuidType();

} // namespace py
} // namespace arrow
"""

cdef shared_ptr[CDataType] CMakeUuidType" arrow::py::MakeUuidType"()
cdef shared_ptr[CArray] CMakeUuidArray" arrow::py::MakeUuidArray"()


def _make_uuid_type():
return pyarrow_wrap_data_type(CMakeUuidType())


def _make_uuid_array():
return pyarrow_wrap_array(CMakeUuidArray())
48 changes: 48 additions & 0 deletions python/pyarrow/tests/test_extension_type.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,9 +15,13 @@
# specific language governing permissions and limitations
# under the License.

import os
import pickle
import shutil
import subprocess
import weakref
from uuid import uuid4, UUID
import sys

import numpy as np
import pyarrow as pa
Expand Down Expand Up @@ -1079,3 +1083,47 @@ def test_array_constructor_from_pandas():
pd.Series([1, 2, 3], dtype="category"), type=IntegerType()
)
assert result.equals(expected)


@pytest.mark.cython
def test_cpp_extension_in_python(tmpdir):
from .test_cython import (
setup_template, compiler_opts, test_ld_path, test_util, here)
with tmpdir.as_cwd():
# Set up temporary workspace
pyx_file = 'extensions.pyx'
shutil.copyfile(os.path.join(here, pyx_file),
os.path.join(str(tmpdir), pyx_file))
# Create setup.py file
setup_code = setup_template.format(pyx_file=pyx_file,
compiler_opts=compiler_opts,
test_ld_path=test_ld_path)
with open('setup.py', 'w') as f:
f.write(setup_code)

subprocess_env = test_util.get_modified_env_with_pythonpath()

# Compile extension module
subprocess.check_call([sys.executable, 'setup.py',
'build_ext', '--inplace'],
env=subprocess_env)

sys.path.insert(0, str(tmpdir))
mod = __import__('extensions')

uuid_type = mod._make_uuid_type()
assert uuid_type.extension_name == "uuid"
assert uuid_type.storage_type == pa.binary(16)

array = mod._make_uuid_array()
assert array.type == uuid_type
assert array.to_pylist() == [b'abcdefghijklmno0', b'0onmlkjihgfedcba']
assert array[0].as_py() == b'abcdefghijklmno0'
assert array[1].as_py() == b'0onmlkjihgfedcba'

buf = ipc_write_batch(pa.RecordBatch.from_arrays([array], ["uuid"]))

batch = ipc_read_batch(buf)
reconstructed_array = batch.column(0)
assert reconstructed_array.type == uuid_type
assert reconstructed_array == array
17 changes: 14 additions & 3 deletions python/pyarrow/types.pxi
Original file line number Diff line number Diff line change
Expand Up @@ -836,6 +836,18 @@ cdef class BaseExtensionType(DataType):
DataType.init(self, type)
self.ext_type = <const CExtensionType*> type.get()

def __arrow_ext_class__(self):
"""
The associated array extension class
"""
return ExtensionArray

def __arrow_ext_scalar_class__(self):
"""
The associated scalar class
"""
return ExtensionScalar

@property
def extension_name(self):
"""
Expand Down Expand Up @@ -988,7 +1000,6 @@ cdef class ExtensionType(BaseExtensionType):
"""
return ExtensionScalar


cdef class PyExtensionType(ExtensionType):
"""
Concrete base class for Python-defined extension types based on pickle
Expand Down Expand Up @@ -1451,8 +1462,8 @@ cdef class Schema(_Weakrefable):
"""
A named collection of types a.k.a schema. A schema defines the
column names and types in a record batch or table data structure.
They also contain metadata about the columns. For example, schemas
converted from Pandas contain metadata about their original Pandas
They also contain metadata about the columns. For example, schemas
converted from Pandas contain metadata about their original Pandas
types so they can be converted back to the same types.

Warnings
Expand Down