Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
41 commits
Select commit Hold shift + click to select a range
c80315d
GH-40078: [C++] Import/Export ArrowDeviceArrayStream
zeroshade Mar 26, 2024
ce18e5d
fixing format lint
zeroshade Mar 26, 2024
efff4fd
one more lint fix
zeroshade Mar 26, 2024
49190c9
updates from feedback
zeroshade Mar 27, 2024
858e8d5
linting
zeroshade Mar 27, 2024
5c36bee
rebase updates
zeroshade Mar 27, 2024
e4d1699
missed a spot
zeroshade Mar 27, 2024
469ac85
linting
zeroshade Mar 27, 2024
fda7737
update error string
zeroshade Mar 28, 2024
9586883
Update cpp/src/arrow/record_batch.h
zeroshade Apr 5, 2024
ee531cf
Update cpp/src/arrow/record_batch.h
zeroshade Apr 5, 2024
889193c
additions from feedback
zeroshade Apr 5, 2024
d80464f
lint
zeroshade Apr 5, 2024
6dcc07d
propagating device_type and sync_event
zeroshade Apr 10, 2024
ba58477
linting
zeroshade Apr 10, 2024
79b3600
fix for lint
zeroshade Apr 11, 2024
f8c2b59
more tests
zeroshade Apr 16, 2024
fcd8dc9
added tests
zeroshade Apr 16, 2024
dca4503
fix lint
zeroshade Apr 25, 2024
3211ef3
fix docs
zeroshade Apr 25, 2024
ff521ab
more doxygen fixes
zeroshade Apr 25, 2024
aeff09f
updates from feeback
zeroshade Apr 26, 2024
35ef670
add comment about device types
zeroshade Apr 26, 2024
8443150
lint
zeroshade Apr 26, 2024
406ecf1
Update cpp/src/arrow/c/bridge.h
zeroshade Apr 29, 2024
c22768a
update from feedback
zeroshade Apr 29, 2024
bca54e2
linting
zeroshade Apr 29, 2024
d5a6332
use static const null_sync_event
zeroshade Apr 29, 2024
71e152c
clean up templates
zeroshade Apr 30, 2024
c9e63fd
use overload instead of if constexpr
zeroshade Apr 30, 2024
df40625
linting
zeroshade Apr 30, 2024
9284893
Update cpp/src/arrow/c/bridge.cc
zeroshade May 1, 2024
915b723
Update cpp/src/arrow/c/bridge.cc
zeroshade May 1, 2024
46f436e
Update cpp/src/arrow/c/bridge_test.cc
zeroshade May 1, 2024
0a07bee
Update cpp/src/arrow/c/bridge_test.cc
zeroshade May 1, 2024
7b89fbf
updates from feedback and lint
zeroshade May 1, 2024
4915884
fix the python failure
zeroshade May 1, 2024
3dc2d78
lint
zeroshade May 1, 2024
331ce10
add debug check to confirm device_type in ArrayData constructor
zeroshade May 6, 2024
9b14645
fix lint
zeroshade May 9, 2024
381bc7d
add a null check for children in device_type
zeroshade May 9, 2024
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
8 changes: 8 additions & 0 deletions cpp/src/arrow/array/array_base.h
Original file line number Diff line number Diff line change
Expand Up @@ -224,6 +224,14 @@ class ARROW_EXPORT Array {
/// \return Status
Status ValidateFull() const;

/// \brief Return the device_type that this array's data is allocated on
///
/// This just delegates to calling device_type on the underlying ArrayData
/// object which backs this Array.
///
/// \return DeviceAllocationType
DeviceAllocationType device_type() const { return data_->device_type(); }

protected:
Array() = default;
ARROW_DEFAULT_MOVE_AND_ASSIGN(Array);
Expand Down
5 changes: 5 additions & 0 deletions cpp/src/arrow/array/array_test.cc
Original file line number Diff line number Diff line change
Expand Up @@ -478,6 +478,7 @@ TEST_F(TestArray, TestMakeArrayOfNull) {
ASSERT_EQ(array->type(), type);
ASSERT_OK(array->ValidateFull());
ASSERT_EQ(array->length(), length);
ASSERT_EQ(array->device_type(), DeviceAllocationType::kCPU);
if (is_union(type->id())) {
ASSERT_EQ(array->null_count(), 0);
ASSERT_EQ(array->ComputeLogicalNullCount(), length);
Expand Down Expand Up @@ -719,6 +720,7 @@ TEST_F(TestArray, TestMakeArrayFromScalar) {
ASSERT_OK(array->ValidateFull());
ASSERT_EQ(array->length(), length);
ASSERT_EQ(array->null_count(), 0);
ASSERT_EQ(array->device_type(), DeviceAllocationType::kCPU);

// test case for ARROW-13321
for (int64_t i : {int64_t{0}, length / 2, length - 1}) {
Expand All @@ -744,6 +746,7 @@ TEST_F(TestArray, TestMakeArrayFromScalarSliced) {
auto sliced = array->Slice(1, 4);
ASSERT_EQ(sliced->length(), 4);
ASSERT_EQ(sliced->null_count(), 0);
ASSERT_EQ(array->device_type(), DeviceAllocationType::kCPU);
ARROW_EXPECT_OK(sliced->ValidateFull());
}
}
Expand All @@ -758,6 +761,7 @@ TEST_F(TestArray, TestMakeArrayFromDictionaryScalar) {
ASSERT_OK(array->ValidateFull());
ASSERT_EQ(array->length(), 4);
ASSERT_EQ(array->null_count(), 0);
ASSERT_EQ(array->device_type(), DeviceAllocationType::kCPU);

for (int i = 0; i < 4; i++) {
ASSERT_OK_AND_ASSIGN(auto item, array->GetScalar(i));
Expand Down Expand Up @@ -797,6 +801,7 @@ TEST_F(TestArray, TestMakeEmptyArray) {
ASSERT_OK_AND_ASSIGN(auto array, MakeEmptyArray(type));
ASSERT_OK(array->ValidateFull());
ASSERT_EQ(array->length(), 0);

CheckSpanRoundTrip(*array);
}
}
Expand Down
36 changes: 36 additions & 0 deletions cpp/src/arrow/array/data.cc
Original file line number Diff line number Diff line change
Expand Up @@ -224,6 +224,42 @@ int64_t ArrayData::ComputeLogicalNullCount() const {
return ArraySpan(*this).ComputeLogicalNullCount();
}

DeviceAllocationType ArrayData::device_type() const {
// we're using 0 as a sentinel value for NOT YET ASSIGNED
// there is explicitly no constant DeviceAllocationType to represent
// the "UNASSIGNED" case as it is invalid for data to not have an
// assigned device type. If it's still 0 at the end, then we return
// CPU as the allocation device type
int type = 0;
for (const auto& buf : buffers) {
if (!buf) continue;
if (type == 0) {
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This condition implies that, conversely, in non-debug mode we could immediately return when we encounter a buffer? Instead of continue looping on all buffers and children...

type = static_cast<int>(buf->device_type());
} else {
DCHECK_EQ(type, static_cast<int>(buf->device_type()));
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Should this only be a debug check? You can create an array with buffers from a different device, or do we disallow that upon creation?

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

We don't currently disallow it on creation. We don't do any checking of the buffer device types on ArrayData or Array creation currently. It feels wrong to do so in a non-debug context but it might make sense to add debug checks to confirm the device types of all the buffers match in the constructor. Thoughts?

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

it might make sense to add debug checks to confirm the device types of all the buffers match in the constructor

I think that would make sense yes, if we don't want to support that (I don't know if there would actually be use cases for allowing it, in which case I would rather expect a normal error here when trying to export it, saying it requires a single device type)

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Added the debug checks in the ArrayData constructors

}
}

for (const auto& child : child_data) {
if (!child) continue;
if (type == 0) {
type = static_cast<int>(child->device_type());
} else {
DCHECK_EQ(type, static_cast<int>(child->device_type()));
}
}

if (dictionary) {
if (type == 0) {
type = static_cast<int>(dictionary->device_type());
} else {
DCHECK_EQ(type, static_cast<int>(dictionary->device_type()));
}
}

return type == 0 ? DeviceAllocationType::kCPU : static_cast<DeviceAllocationType>(type);
}

// ----------------------------------------------------------------------
// Methods for ArraySpan

Expand Down
21 changes: 21 additions & 0 deletions cpp/src/arrow/array/data.h
Original file line number Diff line number Diff line change
Expand Up @@ -101,6 +101,11 @@ struct ARROW_EXPORT ArrayData {
int64_t null_count = kUnknownNullCount, int64_t offset = 0)
: ArrayData(std::move(type), length, null_count, offset) {
this->buffers = std::move(buffers);
#ifndef NDEBUG
// in debug mode, call the `device_type` function to trigger
// the DCHECKs that validate all the buffers are on the same device
ARROW_UNUSED(this->device_type());
#endif
}

ArrayData(std::shared_ptr<DataType> type, int64_t length,
Expand All @@ -110,6 +115,12 @@ struct ARROW_EXPORT ArrayData {
: ArrayData(std::move(type), length, null_count, offset) {
this->buffers = std::move(buffers);
this->child_data = std::move(child_data);
#ifndef NDEBUG
// in debug mode, call the `device_type` function to trigger
// the DCHECKs that validate all the buffers (including children)
// are on the same device
ARROW_UNUSED(this->device_type());
#endif
}

static std::shared_ptr<ArrayData> Make(std::shared_ptr<DataType> type, int64_t length,
Expand Down Expand Up @@ -358,6 +369,16 @@ struct ARROW_EXPORT ArrayData {
/// \see GetNullCount
int64_t ComputeLogicalNullCount() const;

/// \brief Returns the device_type of the underlying buffers and children
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Nit, but we tend to use infinitives in docstring ("return", not "returns")

///
/// If there are no buffers in this ArrayData object, it just returns
/// DeviceAllocationType::kCPU as a default. We also assume that all buffers
/// should be allocated on the same device type and perform DCHECKs to confirm
/// this in debug mode.
///
/// \return DeviceAllocationType
DeviceAllocationType device_type() const;

std::shared_ptr<DataType> type;
int64_t length = 0;
mutable std::atomic<int64_t> null_count{0};
Expand Down
2 changes: 1 addition & 1 deletion cpp/src/arrow/array/util.cc
Original file line number Diff line number Diff line change
Expand Up @@ -548,7 +548,7 @@ class NullArrayFactory {
}

Status Visit(const StructType& type) {
for (int i = 0; i < type_->num_fields(); ++i) {
for (int i = 0; i < type.num_fields(); ++i) {
ARROW_ASSIGN_OR_RAISE(out_->child_data[i], CreateChild(type, i, length_));
}
return Status::OK();
Expand Down
Loading