Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
54 commits
Select commit Hold shift + click to select a range
3b995ce
Added failing test case
WillAyd Jan 8, 2022
9ad70ba
incomplete impl but some progress
WillAyd Jan 8, 2022
8d12595
exceptions implemented
WillAyd Jan 9, 2022
88165c5
passed tests for exception messages
WillAyd Jan 9, 2022
bcd4352
clang format fixup
WillAyd Jan 9, 2022
ba7d698
uncomment test, still failing impl
WillAyd Jan 9, 2022
e817c35
no more segfault
WillAyd Jan 9, 2022
0e2d7a9
checkpoint for casting elements
WillAyd Jan 24, 2022
c3011d6
Datum cast hackery checkpoint
WillAyd Jan 24, 2022
f099bd5
replaced unused code with NotImplemented
WillAyd Jan 24, 2022
2d22e08
working scalar impl
WillAyd Jan 24, 2022
4a2fd05
code cleanup
WillAyd Jan 24, 2022
2bf3e22
passing tests
WillAyd Jan 24, 2022
7df2f01
format and cleanup
WillAyd Jan 24, 2022
bc21c88
revert inadvertant license typo change
WillAyd Jan 24, 2022
60dedcd
simplified impl
WillAyd Jan 24, 2022
a9d6f53
clang fixup
WillAyd Jan 24, 2022
daf9ecd
comment cleanup
WillAyd Jan 24, 2022
48e6ac2
remove auto from external loops
WillAyd Jan 24, 2022
32cb09b
Merge remote-tracking branch 'upstream/master' into arrow-1888
WillAyd Jan 25, 2022
a5b6ca4
TODO removal
WillAyd Jan 25, 2022
03a1a51
removed auto i loop initialization
WillAyd Jan 25, 2022
a8f843e
test with slice offset
WillAyd Feb 1, 2022
212fb2d
simplify return type
WillAyd Feb 1, 2022
f8b1b4c
scalar valid checks
WillAyd Feb 1, 2022
dbb746e
Merge remote-tracking branch 'upstream/master' into arrow-1888
WillAyd Feb 1, 2022
23aa3bf
clang-format
WillAyd Feb 1, 2022
6d0b197
initial feedback
WillAyd Feb 3, 2022
3528df6
better error messages
WillAyd Feb 3, 2022
59718ec
clang-format
WillAyd Feb 3, 2022
c6856ca
updated scanner test
WillAyd Feb 5, 2022
f73893e
make longer structs for CheckScalar test
WillAyd Feb 5, 2022
5be79f5
added comments for research
WillAyd Feb 5, 2022
a2c1346
compiling with segfault
WillAyd Feb 5, 2022
b5b96d5
added struct ToString for different sizes test
WillAyd Feb 5, 2022
ae87ae7
better test; revert structtype change
WillAyd Feb 5, 2022
1de6bea
revert some things
WillAyd Feb 5, 2022
4ff815e
getting warmer
WillAyd Feb 5, 2022
b051623
clang-format
WillAyd Feb 5, 2022
e7b3363
Merge remote-tracking branch 'upstream/master' into arrow-1888
WillAyd Feb 6, 2022
20fa5d9
passing test
WillAyd Feb 6, 2022
1ed9984
all tests passing
WillAyd Feb 6, 2022
090c040
clang-format
WillAyd Feb 6, 2022
9a505d6
new test
WillAyd Feb 8, 2022
1e6b660
MSVC compat
WillAyd Feb 8, 2022
cc096b9
semi passing tests
WillAyd Feb 8, 2022
dc9f386
clang-format
WillAyd Feb 8, 2022
c7ee877
passing nullability test
WillAyd Feb 9, 2022
c7150aa
variable cleanup
WillAyd Feb 10, 2022
5ebefd6
simplified tests
WillAyd Feb 10, 2022
6200023
clang-format
WillAyd Feb 10, 2022
8e8e5d6
introduce null data
WillAyd Feb 10, 2022
4214565
added test with nullability buffer
WillAyd Feb 10, 2022
d616271
more efficient slice / buffer handling
WillAyd Feb 10, 2022
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
79 changes: 79 additions & 0 deletions cpp/src/arrow/compute/kernels/scalar_cast_nested.cc
Original file line number Diff line number Diff line change
Expand Up @@ -150,6 +150,84 @@ void AddListCast(CastFunction* func) {
DCHECK_OK(func->AddKernel(SrcType::type_id, std::move(kernel)));
}

struct CastStruct {
static Status Exec(KernelContext* ctx, const ExecBatch& batch, Datum* out) {
const CastOptions& options = CastState::Get(ctx);
const StructType& in_type = checked_cast<const StructType&>(*batch[0].type());
const StructType& out_type = checked_cast<const StructType&>(*out->type());
const auto in_field_count = in_type.num_fields();

if (in_field_count != out_type.num_fields()) {
return Status::TypeError("struct field sizes do not match: ", in_type.ToString(),
" ", out_type.ToString());
}

for (int i = 0; i < in_field_count; ++i) {
const auto in_field = in_type.field(i);
const auto out_field = out_type.field(i);
if (in_field->name() != out_field->name()) {
return Status::TypeError("struct field names do not match: ", in_type.ToString(),
" ", out_type.ToString());
}

if (in_field->nullable() && !out_field->nullable()) {
return Status::TypeError("cannot cast nullable struct to non-nullable struct: ",
in_type.ToString(), " ", out_type.ToString());
}
}

if (out->kind() == Datum::SCALAR) {
const auto& in_scalar = checked_cast<const StructScalar&>(*batch[0].scalar());
auto out_scalar = checked_cast<StructScalar*>(out->scalar().get());

DCHECK(!out_scalar->is_valid);
if (in_scalar.is_valid) {
for (int i = 0; i < in_field_count; i++) {
auto values = in_scalar.value[i];
auto target_type = out->type()->field(i)->type();
ARROW_ASSIGN_OR_RAISE(Datum cast_values,
Cast(values, target_type, options, ctx->exec_context()));
DCHECK_EQ(Datum::SCALAR, cast_values.kind());
out_scalar->value.push_back(cast_values.scalar());
}
out_scalar->is_valid = true;
}
return Status::OK();
}

const ArrayData& in_array = *batch[0].array();
ArrayData* out_array = out->mutable_array();

if (in_array.buffers[0]) {
ARROW_ASSIGN_OR_RAISE(out_array->buffers[0],
CopyBitmap(ctx->memory_pool(), in_array.buffers[0]->data(),
in_array.offset, in_array.length));
}

for (int i = 0; i < in_field_count; ++i) {
auto values = in_array.child_data[i]->Slice(in_array.offset, in_array.length);
auto target_type = out->type()->field(i)->type();

ARROW_ASSIGN_OR_RAISE(Datum cast_values,
Cast(values, target_type, options, ctx->exec_context()));

DCHECK_EQ(Datum::ARRAY, cast_values.kind());
out_array->child_data.push_back(cast_values.array());
}

return Status::OK();
}
};

void AddStructToStructCast(CastFunction* func) {
ScalarKernel kernel;
kernel.exec = CastStruct::Exec;
kernel.signature =
KernelSignature::Make({InputType(StructType::type_id)}, kOutputTargetType);
kernel.null_handling = NullHandling::COMPUTED_NO_PREALLOCATE;
DCHECK_OK(func->AddKernel(StructType::type_id, std::move(kernel)));
}

} // namespace

std::vector<std::shared_ptr<CastFunction>> GetNestedCasts() {
Expand All @@ -174,6 +252,7 @@ std::vector<std::shared_ptr<CastFunction>> GetNestedCasts() {
// So is struct
auto cast_struct = std::make_shared<CastFunction>("cast_struct", Type::STRUCT);
AddCommonCasts(Type::STRUCT, kOutputTargetType, cast_struct.get());
AddStructToStructCast(cast_struct.get());

// So is dictionary
auto cast_dictionary =
Expand Down
111 changes: 111 additions & 0 deletions cpp/src/arrow/compute/kernels/scalar_cast_test.cc
Original file line number Diff line number Diff line change
Expand Up @@ -2218,6 +2218,117 @@ TEST(Cast, ListToListOptionsPassthru) {
}
}

static void CheckStructToStruct(
const std::vector<std::shared_ptr<DataType>>& value_types) {
for (const auto& src_value_type : value_types) {
for (const auto& dest_value_type : value_types) {
std::vector<std::string> field_names = {"a", "b"};
std::shared_ptr<Array> a1, b1, a2, b2;
a1 = ArrayFromJSON(src_value_type, "[1, 2, 3, 4, null]");
b1 = ArrayFromJSON(src_value_type, "[null, 7, 8, 9, 0]");
a2 = ArrayFromJSON(dest_value_type, "[1, 2, 3, 4, null]");
b2 = ArrayFromJSON(dest_value_type, "[null, 7, 8, 9, 0]");
ASSERT_OK_AND_ASSIGN(auto src, StructArray::Make({a1, b1}, field_names));
ASSERT_OK_AND_ASSIGN(auto dest, StructArray::Make({a2, b2}, field_names));

CheckCast(src, dest);

std::shared_ptr<Buffer> null_bitmap;
BitmapFromVector<int>({0, 1, 0, 1, 0}, &null_bitmap);

ASSERT_OK_AND_ASSIGN(auto src_nulls,
StructArray::Make({a1, b1}, field_names, null_bitmap));
ASSERT_OK_AND_ASSIGN(auto dest_nulls,
StructArray::Make({a2, b2}, field_names, null_bitmap));
CheckCast(src_nulls, dest_nulls);
}
}
}

TEST(Cast, StructToSameSizedAndNamedStruct) {
CheckStructToStruct({int32(), float32(), int64()});
}

TEST(Cast, StructToSameSizedButDifferentNamedStruct) {
std::vector<std::string> field_names = {"a", "b"};
std::shared_ptr<Array> a, b;
a = ArrayFromJSON(int8(), "[1, 2]");
b = ArrayFromJSON(int8(), "[3, 4]");
ASSERT_OK_AND_ASSIGN(auto src, StructArray::Make({a, b}, field_names));

const auto dest = arrow::struct_(
{std::make_shared<Field>("c", int8()), std::make_shared<Field>("d", int8())});
const auto options = CastOptions::Safe(dest);

EXPECT_RAISES_WITH_MESSAGE_THAT(
TypeError,
::testing::HasSubstr("Type error: struct field names do not match: struct<a: int8, "
"b: int8> struct<c: int8, d: int8>"),
Cast(src, options));
}

TEST(Cast, StructToDifferentSizeStruct) {
std::vector<std::string> field_names = {"a", "b"};
std::shared_ptr<Array> a, b;
a = ArrayFromJSON(int8(), "[1, 2]");
b = ArrayFromJSON(int8(), "[3, 4]");
ASSERT_OK_AND_ASSIGN(auto src, StructArray::Make({a, b}, field_names));

const auto dest = arrow::struct_({std::make_shared<Field>("a", int8()),
std::make_shared<Field>("b", int8()),
std::make_shared<Field>("c", int8())});
const auto options = CastOptions::Safe(dest);

EXPECT_RAISES_WITH_MESSAGE_THAT(
TypeError,
::testing::HasSubstr("Type error: struct field sizes do not match: struct<a: int8, "
"b: int8> struct<a: int8, b: int8, c: int8>"),
Cast(src, options));
}

TEST(Cast, StructToSameSizedButDifferentNullabilityStruct) {
// OK to go from not-nullable to nullable...
std::vector<std::shared_ptr<Field>> fields1 = {
std::make_shared<Field>("a", int8(), false),
std::make_shared<Field>("b", int8(), false)};
std::shared_ptr<Array> a1, b1;
a1 = ArrayFromJSON(int8(), "[1, 2]");
b1 = ArrayFromJSON(int8(), "[3, 4]");
ASSERT_OK_AND_ASSIGN(auto src1, StructArray::Make({a1, b1}, fields1));

std::vector<std::shared_ptr<Field>> fields2 = {
std::make_shared<Field>("a", int8(), true),
std::make_shared<Field>("b", int8(), true)};
std::shared_ptr<Array> a2, b2;
a2 = ArrayFromJSON(int8(), "[1, 2]");
b2 = ArrayFromJSON(int8(), "[3, 4]");
ASSERT_OK_AND_ASSIGN(auto dest1, StructArray::Make({a2, b2}, fields2));

CheckCast(src1, dest1);

// But not the other way around
std::vector<std::shared_ptr<Field>> fields3 = {
std::make_shared<Field>("a", int8(), true),
std::make_shared<Field>("b", int8(), true)};
std::shared_ptr<Array> a3, b3;
a3 = ArrayFromJSON(int8(), "[1, null]");
b3 = ArrayFromJSON(int8(), "[3, 4]");
ASSERT_OK_AND_ASSIGN(auto src2, StructArray::Make({a3, b3}, fields3));

std::vector<std::shared_ptr<Field>> fields4 = {
std::make_shared<Field>("a", int8(), false),
std::make_shared<Field>("b", int8(), false)};
const auto dest2 = arrow::struct_(fields4);
const auto options = CastOptions::Safe(dest2);

EXPECT_RAISES_WITH_MESSAGE_THAT(
TypeError,
::testing::HasSubstr(
"Type error: cannot cast nullable struct to non-nullable "
"struct: struct<a: int8, b: int8> struct<a: int8 not null, b: int8 not null>"),
Cast(src2, options));
}

TEST(Cast, IdentityCasts) {
// ARROW-4102
auto CheckIdentityCast = [](std::shared_ptr<DataType> type, const std::string& json) {
Expand Down
4 changes: 1 addition & 3 deletions cpp/src/arrow/dataset/scanner_test.cc
Original file line number Diff line number Diff line change
Expand Up @@ -1626,9 +1626,7 @@ TEST(ScanNode, MaterializationOfNestedVirtualColumn) {

// TODO(ARROW-1888): allow scanner to "patch up" structs with casts
EXPECT_FINISHES_AND_RAISES_WITH_MESSAGE_THAT(
NotImplemented,
::testing::HasSubstr("Unsupported cast from struct<e: int64> to struct"),
plan.Run());
TypeError, ::testing::HasSubstr("struct field sizes do not match"), plan.Run());
}

TEST(ScanNode, MinimalEndToEnd) {
Expand Down