Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
21 commits
Select commit Hold shift + click to select a range
359b29e
Adding validation for Date64Type, Time32Type, and Time64Type
JabariBooker Dec 21, 2021
fd2e08d
Merge branch 'master' of https://github.com/apache/arrow
JabariBooker Dec 21, 2021
b13946b
Corrected type usage for Time64Type check
JabariBooker Dec 21, 2021
bd1209e
Resolving formatting issues and adding constexpr's
JabariBooker Dec 22, 2021
aeedda5
Using kCamelCase for constants
JabariBooker Dec 22, 2021
9400fbb
Renaming time constants
JabariBooker Dec 22, 2021
ec369c4
Merge branch 'master' into ARROW-10924
Jan 4, 2022
68f62a5
Merge branch 'master' into ARROW-10924
JabariBooker Jan 7, 2022
f68c2a8
Moving constexpr's and making small grammatical edits
JabariBooker Jan 7, 2022
827b6a4
More grammar corrections and updating array test constant for Date64S…
JabariBooker Jan 7, 2022
4ace496
Added random generation handling for temporal types
JabariBooker Jan 8, 2022
b7b9d16
Merge branch 'master' into ARROW-10924
JabariBooker Jan 8, 2022
36c3dfc
Formatting code properly
JabariBooker Jan 8, 2022
6d7189b
Changing test constants to reflect new restrictions
JabariBooker Jan 8, 2022
d8be683
Added new test buffers for date64
JabariBooker Jan 10, 2022
997c7a1
Fixed random number generation and constants for testing date64
JabariBooker Jan 11, 2022
e409c73
Changes to the last unit tests for date64 restrictions
JabariBooker Jan 28, 2022
27411f2
Missing changes to constant from last commit
JabariBooker Jan 28, 2022
75febd1
Updated data generation for integration tests; other minor changes
JabariBooker Jan 31, 2022
e1ba73a
Added flag for date64 validation in integration tests, set to false
JabariBooker Jan 31, 2022
cc7192f
Added integration test flag for TIME32/64 types
JabariBooker Feb 1, 2022
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion cpp/src/arrow/array/array_test.cc
Original file line number Diff line number Diff line change
Expand Up @@ -534,7 +534,7 @@ static ScalarVector GetScalars() {
std::make_shared<UInt64Scalar>(3),
std::make_shared<DoubleScalar>(3.0),
std::make_shared<Date32Scalar>(10),
std::make_shared<Date64Scalar>(11),
std::make_shared<Date64Scalar>(864000000),
std::make_shared<Time32Scalar>(1000, time32(TimeUnit::SECOND)),
std::make_shared<Time64Scalar>(1111, time64(TimeUnit::MICRO)),
std::make_shared<TimestampScalar>(1111, timestamp(TimeUnit::MILLI)),
Expand Down
75 changes: 75 additions & 0 deletions cpp/src/arrow/array/validate.cc
Original file line number Diff line number Diff line change
Expand Up @@ -168,6 +168,81 @@ struct ValidateArrayImpl {
return Status::OK();
}

Status Visit(const Date64Type& type) {
RETURN_NOT_OK(ValidateFixedWidthBuffers());

if (full_validation) {
using c_type = typename Date64Type::c_type;
return VisitArrayDataInline<Date64Type>(
data,
[&](c_type date) {
constexpr c_type kFullDayMillis = 1000 * 60 * 60 * 24;
if (date % kFullDayMillis != 0) {
return Status::Invalid(type, " ", date,
" does not represent a whole number of days");
}
return Status::OK();
},
[]() { return Status::OK(); });
}
return Status::OK();
}

Status Visit(const Time32Type& type) {
RETURN_NOT_OK(ValidateFixedWidthBuffers());

if (full_validation) {
using c_type = typename Time32Type::c_type;
return VisitArrayDataInline<Time32Type>(
data,
[&](c_type time) {
constexpr c_type kFullDaySeconds = 60 * 60 * 24;
constexpr c_type kFullDayMillis = kFullDaySeconds * 1000;
if (type.unit() == TimeUnit::SECOND &&
(time < 0 || time >= kFullDaySeconds)) {
return Status::Invalid(type, " ", time,
" is not within the acceptable range of ", "[0, ",
kFullDaySeconds, ") s");
}
if (type.unit() == TimeUnit::MILLI && (time < 0 || time >= kFullDayMillis)) {
return Status::Invalid(type, " ", time,
" is not within the acceptable range of ", "[0, ",
kFullDayMillis, ") ms");
}
return Status::OK();
},
[]() { return Status::OK(); });
}
return Status::OK();
}

Status Visit(const Time64Type& type) {
RETURN_NOT_OK(ValidateFixedWidthBuffers());

if (full_validation) {
using c_type = typename Time64Type::c_type;
return VisitArrayDataInline<Time64Type>(
data,
[&](c_type time) {
constexpr c_type kFullDayMicro = 1000000LL * 60 * 60 * 24;
constexpr c_type kFullDayNano = kFullDayMicro * 1000;
if (type.unit() == TimeUnit::MICRO && (time < 0 || time >= kFullDayMicro)) {
return Status::Invalid(type, " ", time,
" is not within the acceptable range of ", "[0, ",
kFullDayMicro, ") us");
}
if (type.unit() == TimeUnit::NANO && (time < 0 || time >= kFullDayNano)) {
return Status::Invalid(type, " ", time,
" is not within the acceptable range of ", "[0, ",
kFullDayNano, ") ns");
}
return Status::OK();
},
[]() { return Status::OK(); });
}
return Status::OK();
}

Status Visit(const BinaryType& type) { return ValidateBinaryLike(type); }

Status Visit(const LargeBinaryType& type) { return ValidateBinaryLike(type); }
Expand Down
12 changes: 8 additions & 4 deletions cpp/src/arrow/c/bridge_test.cc
Original file line number Diff line number Diff line change
Expand Up @@ -1668,6 +1668,7 @@ static const float data_buffer5[] = {0.0f, 1.5f, -2.0f, 3.0f, 4.0f, 5.0f};
static const double data_buffer6[] = {0.0, 1.5, -2.0, 3.0, 4.0, 5.0};
static const int32_t data_buffer7[] = {1234, 5678, 9012, 3456};
static const int64_t data_buffer8[] = {123456789, 987654321, -123456789, -987654321};
static const int64_t date64_data_buffer8[] = {86400000, 172800000, -86400000, -172800000};
#if ARROW_LITTLE_ENDIAN
static const void* primitive_buffers_no_nulls1_8[2] = {nullptr, data_buffer1};
static const void* primitive_buffers_no_nulls1_16[2] = {nullptr, data_buffer1};
Expand Down Expand Up @@ -1699,6 +1700,9 @@ static const void* primitive_buffers_nulls7[2] = {bits_buffer1, data_buffer7};
static const void* primitive_buffers_no_nulls8[2] = {nullptr, data_buffer8};
static const void* primitive_buffers_nulls8[2] = {bits_buffer1, data_buffer8};

static const void* date64_buffers_no_nulls8[2] = {nullptr, date64_data_buffer8};
static const void* date64_buffers_nulls8[2] = {bits_buffer1, date64_data_buffer8};

static const int64_t timestamp_data_buffer1[] = {0, 951782400, -2203977600LL};
static const int64_t timestamp_data_buffer2[] = {0, 951782400000LL, -2203977600000LL};
static const int64_t timestamp_data_buffer3[] = {0, 951782400000000LL,
Expand Down Expand Up @@ -1987,8 +1991,8 @@ TEST_F(TestArrayImport, Primitive) {
TEST_F(TestArrayImport, Temporal) {
FillPrimitive(3, 0, 0, primitive_buffers_no_nulls7);
CheckImport(ArrayFromJSON(date32(), "[1234, 5678, 9012]"));
FillPrimitive(3, 0, 0, primitive_buffers_no_nulls8);
CheckImport(ArrayFromJSON(date64(), "[123456789, 987654321, -123456789]"));
FillPrimitive(3, 0, 0, date64_buffers_no_nulls8);
CheckImport(ArrayFromJSON(date64(), "[86400000, 172800000, -86400000]"));

FillPrimitive(2, 0, 0, primitive_buffers_no_nulls7);
CheckImport(ArrayFromJSON(time32(TimeUnit::SECOND), "[1234, 5678]"));
Expand Down Expand Up @@ -2026,8 +2030,8 @@ TEST_F(TestArrayImport, Temporal) {
// With nulls
FillPrimitive(3, -1, 0, primitive_buffers_nulls7);
CheckImport(ArrayFromJSON(date32(), "[1234, null, 9012]"));
FillPrimitive(3, -1, 0, primitive_buffers_nulls8);
CheckImport(ArrayFromJSON(date64(), "[123456789, null, -123456789]"));
FillPrimitive(3, -1, 0, date64_buffers_nulls8);
CheckImport(ArrayFromJSON(date64(), "[86400000, null, -86400000]"));
FillPrimitive(2, -1, 0, primitive_buffers_nulls8);
CheckImport(ArrayFromJSON(time64(TimeUnit::NANO), "[123456789, null]"));
FillPrimitive(2, -1, 0, primitive_buffers_nulls8);
Expand Down
63 changes: 49 additions & 14 deletions cpp/src/arrow/compute/kernels/hash_aggregate_test.cc
Original file line number Diff line number Diff line change
Expand Up @@ -1616,14 +1616,12 @@ TEST(GroupBy, MinMaxTypes) {
types.insert(types.end(), NumericTypes().begin(), NumericTypes().end());
types.insert(types.end(), TemporalTypes().begin(), TemporalTypes().end());
types.push_back(month_interval());
for (const auto& ty : types) {
SCOPED_TRACE(ty->ToString());
auto in_schema = schema({field("argument0", ty), field("key", int64())});
auto table = TableFromJSON(in_schema, {R"([

const std::vector<std::string> default_table = {R"([
[1, 1],
[null, 1]
])",
R"([
R"([
[0, 2],
[null, 3],
[3, 4],
Expand All @@ -1632,11 +1630,54 @@ TEST(GroupBy, MinMaxTypes) {
[3, 1],
[0, 2]
])",
R"([
R"([
[0, 2],
[1, null],
[null, 3]
])"});
])"};

const std::vector<std::string> date64_table = {R"([
[86400000, 1],
[null, 1]
])",
R"([
[0, 2],
[null, 3],
[259200000, 4],
[432000000, 4],
[345600000, null],
[259200000, 1],
[0, 2]
])",
R"([
[0, 2],
[86400000, null],
[null, 3]
])"};

const std::string default_expected =
R"([
[{"min": 1, "max": 3}, 1],
[{"min": 0, "max": 0}, 2],
[{"min": null, "max": null}, 3],
[{"min": 3, "max": 5}, 4],
[{"min": 1, "max": 4}, null]
])";

const std::string date64_expected =
R"([
[{"min": 86400000, "max": 259200000}, 1],
[{"min": 0, "max": 0}, 2],
[{"min": null, "max": null}, 3],
[{"min": 259200000, "max": 432000000}, 4],
[{"min": 86400000, "max": 345600000}, null]
])";

for (const auto& ty : types) {
SCOPED_TRACE(ty->ToString());
auto in_schema = schema({field("argument0", ty), field("key", int64())});
auto table =
TableFromJSON(in_schema, (ty->name() == "date64") ? date64_table : default_table);
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

nit: can check ty->id() == Type::DATE64


ASSERT_OK_AND_ASSIGN(
Datum aggregated_and_grouped,
Expand All @@ -1652,13 +1693,7 @@ TEST(GroupBy, MinMaxTypes) {
field("hash_min_max", struct_({field("min", ty), field("max", ty)})),
field("key_0", int64()),
}),
R"([
[{"min": 1, "max": 3}, 1],
[{"min": 0, "max": 0}, 2],
[{"min": null, "max": null}, 3],
[{"min": 3, "max": 5}, 4],
[{"min": 1, "max": 4}, null]
])"),
(ty->name() == "date64") ? date64_expected : default_expected),
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

nit: ditto here

aggregated_and_grouped,
/*verbose=*/true);
}
Expand Down
5 changes: 3 additions & 2 deletions cpp/src/arrow/compute/kernels/scalar_cast_test.cc
Original file line number Diff line number Diff line change
Expand Up @@ -1670,10 +1670,11 @@ TEST(Cast, DateZeroCopy) {
date64(),
int64(), // ARROW-1773: cast to int64
}) {
CheckCastZeroCopy(ArrayFromJSON(date64(), "[0, null, 2000, 1000, 0]"),
CheckCastZeroCopy(ArrayFromJSON(date64(), "[0, null, 172800000, 86400000, 0]"),
zero_copy_to_type);
}
CheckCastZeroCopy(ArrayFromJSON(int64(), "[0, null, 2000, 1000, 0]"), date64());
CheckCastZeroCopy(ArrayFromJSON(int64(), "[0, null, 172800000, 86400000, 0]"),
date64());
}

TEST(Cast, DurationToDuration) {
Expand Down
Loading