Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
29 commits
Select commit Hold shift + click to select a range
854e114
Initial sceleton for interchange package
AlenkaF Oct 25, 2022
010d9a8
Add a dataframe (PyArrowTableXchg) class methods
AlenkaF Oct 26, 2022
c0af309
Add a subpackage for testing interchange protocol, add a test for Tab…
AlenkaF Oct 26, 2022
842ba3e
Add column (PyArrowColumn) class methods
AlenkaF Nov 7, 2022
61eb00f
Add buffer (PyArrowBuffer) class methods, some changes and main tests
AlenkaF Nov 8, 2022
027012d
Make changes to buffer, column and dataframe classes
AlenkaF Nov 8, 2022
6f746fb
Make changes to from_dataframe.py skeleton
AlenkaF Nov 8, 2022
1669224
Add extra tests and make minor corrections
AlenkaF Nov 8, 2022
473414e
Run linter
AlenkaF Nov 8, 2022
cba4374
Make changes to the code to make pa.Table -> pd.DataFrame work for in…
AlenkaF Nov 9, 2022
c021451
Correct linter error and add a check for TypedDict import
AlenkaF Nov 9, 2022
7e1e6bd
Use len(...) for the size of the pa.Array/pa.ChunkedArray
AlenkaF Nov 9, 2022
df9b24b
Add missing annotations import and remove TypedDict leftover
AlenkaF Nov 9, 2022
494ffbc
Remove bool bit_width check
AlenkaF Nov 10, 2022
784d178
Change buffer representation of boolean arrays
AlenkaF Nov 14, 2022
33784da
Remove dataframe protocol abstract classes and move the docstrings an…
AlenkaF Nov 15, 2022
2860911
Add missing changes to the class names and references
AlenkaF Nov 15, 2022
92a1765
Add ColumnNullType = non nullable for columns without missing values
AlenkaF Nov 15, 2022
95f7f45
Correct test error after describe_null() change
AlenkaF Nov 15, 2022
964e9da
Change DtypeKind to be imported from column.py
AlenkaF Nov 15, 2022
3658088
Add change for string dtype and bitmask - not sure about it though
AlenkaF Nov 17, 2022
caefeed
Add a change for dictionary arrays
AlenkaF Nov 17, 2022
8871d11
Add corrections for timestamp dtype
AlenkaF Nov 17, 2022
ad9b2e8
Change size() to size
AlenkaF Nov 17, 2022
2b83dd8
Add schema to empty record batch and keep the number of chukes fixed …
AlenkaF Nov 22, 2022
4f150ef
Add offset for sliced array with a test and use datetime instead of p…
AlenkaF Nov 22, 2022
1a456fe
Fix linter errors
AlenkaF Nov 24, 2022
2632c55
Add a skip for the test using from_dataframe() added in pandas versio…
AlenkaF Nov 24, 2022
f177b15
Make changes to the from_dataframe.py skeleton
AlenkaF Nov 28, 2022
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
16 changes: 16 additions & 0 deletions python/pyarrow/interchange/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,16 @@
# Licensed to the Apache Software Foundation (ASF) under one
# or more contributor license agreements. See the NOTICE file
# distributed with this work for additional information
# regarding copyright ownership. The ASF licenses this file
# to you under the Apache License, Version 2.0 (the
# "License"); you may not use this file except in compliance
# with the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
# KIND, either express or implied. See the License for the
# specific language governing permissions and limitations
# under the License.
103 changes: 103 additions & 0 deletions python/pyarrow/interchange/buffer.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,103 @@
# Licensed to the Apache Software Foundation (ASF) under one
# or more contributor license agreements. See the NOTICE file
# distributed with this work for additional information
# regarding copyright ownership. The ASF licenses this file
# to you under the Apache License, Version 2.0 (the
# "License"); you may not use this file except in compliance
# with the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
# KIND, either express or implied. See the License for the
# specific language governing permissions and limitations
# under the License.

from __future__ import annotations
import enum

import pyarrow as pa


class DlpackDeviceType(enum.IntEnum):
"""Integer enum for device type codes matching DLPack."""

CPU = 1
CUDA = 2
CPU_PINNED = 3
OPENCL = 4
VULKAN = 7
METAL = 8
VPI = 9
ROCM = 10


class _PyArrowBuffer:
"""
Data in the buffer is guaranteed to be contiguous in memory.
Note that there is no dtype attribute present, a buffer can be thought of
as simply a block of memory. However, if the column that the buffer is
attached to has a dtype that's supported by DLPack and ``__dlpack__`` is
implemented, then that dtype information will be contained in the return
value from ``__dlpack__``.
This distinction is useful to support both data exchange via DLPack on a
buffer and (b) dtypes like variable-length strings which do not have a
fixed number of bytes per element.
"""

def __init__(self, x: pa.Buffer, allow_copy: bool = True) -> None:
"""
Handle PyArrow Buffers.
"""
self._x = x

@property
def bufsize(self) -> int:
"""
Buffer size in bytes.
"""
return self._x.size

@property
def ptr(self) -> int:
"""
Pointer to start of the buffer as an integer.
"""
return self._x.address

def __dlpack__(self):
"""
Produce DLPack capsule (see array API standard).
Raises:
- TypeError : if the buffer contains unsupported dtypes.
- NotImplementedError : if DLPack support is not implemented
Useful to have to connect to array libraries. Support optional because
it's not completely trivial to implement for a Python-only library.
"""
raise NotImplementedError("__dlpack__")

def __dlpack_device__(self) -> tuple[DlpackDeviceType, int | None]:
"""
Device type and device ID for where the data in the buffer resides.
Uses device type codes matching DLPack.
Note: must be implemented even if ``__dlpack__`` is not.
"""
if self._x.is_cpu:
return (DlpackDeviceType.CPU, None)
else:
raise NotImplementedError("__dlpack_device__")

def __repr__(self) -> str:
return (
"PyArrowBuffer(" +
str(
{
"bufsize": self.bufsize,
"ptr": self.ptr,
"device": self.__dlpack_device__()[0].name,
}
) +
")"
)
Loading