Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
7 changes: 5 additions & 2 deletions cpp/cmake_modules/san-config.cmake
Original file line number Diff line number Diff line change
Expand Up @@ -35,14 +35,17 @@ endif()
# - disable 'vptr' because of RTTI issues across shared libraries (?)
# - disable 'alignment' because unaligned access is really OK on Nehalem and we do it
# all over the place.
# - disable 'function' because it appears to give a false positive https://github.com/google/sanitizers/issues/911
# - disable 'function' because it appears to give a false positive
# (https://github.com/google/sanitizers/issues/911)
# - disable 'float-divide-by-zero' on clang, which considers it UB
# (https://bugs.llvm.org/show_bug.cgi?id=17000#c1)
# Note: GCC does not support the 'function' flag.
if(${ARROW_USE_UBSAN})
if(CMAKE_CXX_COMPILER_ID STREQUAL "AppleClang"
OR CMAKE_CXX_COMPILER_ID STREQUAL "Clang")
set(
CMAKE_CXX_FLAGS
"${CMAKE_CXX_FLAGS} -fsanitize=undefined -fno-sanitize=alignment,vptr,function -fno-sanitize-recover=all"
"${CMAKE_CXX_FLAGS} -fsanitize=undefined -fno-sanitize=alignment,vptr,function,float-divide-by-zero -fno-sanitize-recover=all"
)
elseif(CMAKE_CXX_COMPILER_ID STREQUAL "GNU"
AND CMAKE_CXX_COMPILER_VERSION VERSION_GREATER_EQUAL "5.1")
Expand Down
28 changes: 24 additions & 4 deletions cpp/src/arrow/compare.cc
Original file line number Diff line number Diff line change
Expand Up @@ -862,7 +862,9 @@ class TypeEqualsVisitor {

class ScalarEqualsVisitor {
public:
explicit ScalarEqualsVisitor(const Scalar& right) : right_(right), result_(false) {}
explicit ScalarEqualsVisitor(const Scalar& right,
const EqualOptions& opts = EqualOptions::Defaults())
: right_(right), result_(false), options_(opts) {}

Status Visit(const NullScalar& left) {
result_ = true;
Expand All @@ -875,9 +877,26 @@ class ScalarEqualsVisitor {
return Status::OK();
}

template <typename T>
typename std::enable_if<std::is_base_of<FloatScalar, T>::value ||
std::is_base_of<DoubleScalar, T>::value,
Status>::type
Visit(const T& left_) {
const auto& right = checked_cast<const T&>(right_);
if (options_.nans_equal()) {
result_ = right.value == left_.value ||
(std::isnan(right.value) && std::isnan(left_.value));
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Just confirmation. Is it fine with (NAN, -NAN) -> true?

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

-NAN isn't really a thing. As per IEEE, every NaN is different and unequal (even to itself). Here we treat them as all equal, because that's vastly more convenient for testing.

} else {
result_ = right.value == left_.value;
}
return Status::OK();
}

template <typename T>
typename std::enable_if<
std::is_base_of<internal::PrimitiveScalar<typename T::TypeClass>, T>::value ||
(std::is_base_of<internal::PrimitiveScalar<typename T::TypeClass>, T>::value &&
!std::is_base_of<FloatScalar, T>::value &&
!std::is_base_of<DoubleScalar, T>::value) ||
std::is_base_of<TemporalScalar<typename T::TypeClass>, T>::value,
Status>::type
Visit(const T& left_) {
Expand Down Expand Up @@ -968,6 +987,7 @@ class ScalarEqualsVisitor {
protected:
const Scalar& right_;
bool result_;
const EqualOptions options_;
};

Status PrintDiff(const Array& left, const Array& right, std::ostream* os) {
Expand Down Expand Up @@ -1386,7 +1406,7 @@ bool TypeEquals(const DataType& left, const DataType& right, bool check_metadata
}
}

bool ScalarEquals(const Scalar& left, const Scalar& right) {
bool ScalarEquals(const Scalar& left, const Scalar& right, const EqualOptions& options) {
bool are_equal = false;
if (&left == &right) {
are_equal = true;
Expand All @@ -1395,7 +1415,7 @@ bool ScalarEquals(const Scalar& left, const Scalar& right) {
} else if (left.is_valid != right.is_valid) {
are_equal = false;
} else {
ScalarEqualsVisitor visitor(right);
ScalarEqualsVisitor visitor(right, options);
auto error = VisitScalarInline(left, &visitor);
DCHECK_OK(error);
are_equal = visitor.result();
Expand Down
4 changes: 3 additions & 1 deletion cpp/src/arrow/compare.h
Original file line number Diff line number Diff line change
Expand Up @@ -111,6 +111,8 @@ bool ARROW_EXPORT TypeEquals(const DataType& left, const DataType& right,
/// Returns true if scalars are equal
/// \param[in] left a Scalar
/// \param[in] right a Scalar
bool ARROW_EXPORT ScalarEquals(const Scalar& left, const Scalar& right);
/// \param[in] options comparison options
bool ARROW_EXPORT ScalarEquals(const Scalar& left, const Scalar& right,
const EqualOptions& options = EqualOptions::Defaults());

} // namespace arrow
4 changes: 0 additions & 4 deletions cpp/src/arrow/compute/kernels/scalar_arithmetic.cc
Original file line number Diff line number Diff line change
Expand Up @@ -190,10 +190,6 @@ struct MultiplyChecked {
struct Divide {
template <typename T, typename Arg0, typename Arg1>
static enable_if_floating_point<T> Call(KernelContext* ctx, Arg0 left, Arg1 right) {
if (ARROW_PREDICT_FALSE(right == 0)) {
ctx->SetStatus(Status::Invalid("divide by zero"));
return 0;
}
return left / right;
}

Expand Down
28 changes: 22 additions & 6 deletions cpp/src/arrow/compute/kernels/scalar_arithmetic_test.cc
Original file line number Diff line number Diff line change
Expand Up @@ -144,7 +144,8 @@ class TestBinaryArithmetic : public TestBase {
const auto expected_scalar = *expected->GetScalar(i);
ASSERT_OK_AND_ASSIGN(
actual, func(*left->GetScalar(i), *right->GetScalar(i), options_, nullptr));
AssertScalarsEqual(*expected_scalar, *actual.scalar(), /*verbose=*/true);
AssertScalarsEqual(*expected_scalar, *actual.scalar(), /*verbose=*/true,
equal_options_);
}
}

Expand All @@ -165,12 +166,17 @@ class TestBinaryArithmetic : public TestBase {
void ValidateAndAssertApproxEqual(const std::shared_ptr<Array>& actual,
const std::shared_ptr<Array>& expected) {
ASSERT_OK(actual->ValidateFull());
AssertArraysApproxEqual(*expected, *actual, /*verbose=*/true);
AssertArraysApproxEqual(*expected, *actual, /*verbose=*/true, equal_options_);
}

void SetOverflowCheck(bool value = true) { options_.check_overflow = value; }

void SetNansEqual(bool value = true) {
this->equal_options_ = equal_options_.nans_equal(value);
}

ArithmeticOptions options_ = ArithmeticOptions();
EqualOptions equal_options_ = EqualOptions::Defaults();
};

template <typename... Elements>
Expand Down Expand Up @@ -510,6 +516,9 @@ TYPED_TEST(TestBinaryArithmeticFloating, Div) {
"[null, 0.1, 0.25, null, 0.2, 0.5]");
// Array with infinity
this->AssertBinop(Divide, "[3.4, Inf, -Inf]", "[1, 2, 3]", "[3.4, Inf, -Inf]");
// Array with NaN
this->SetNansEqual(true);
this->AssertBinop(Divide, "[3.4, NaN, 2.0]", "[1, 2, 2.0]", "[3.4, NaN, 1.0]");
// Scalar divides by scalar
this->AssertBinop(Divide, 21.0F, 3.0F, 7.0F);
}
Expand Down Expand Up @@ -557,10 +566,17 @@ TYPED_TEST(TestBinaryArithmeticIntegral, DivideByZero) {
}

TYPED_TEST(TestBinaryArithmeticFloating, DivideByZero) {
for (auto check_overflow : {false, true}) {
this->SetOverflowCheck(check_overflow);
this->AssertBinopRaises(Divide, "[3.0, 2.0, 6.0]", "[1.0, 1.0, 0]", "divide by zero");
}
this->SetOverflowCheck(true);
this->AssertBinopRaises(Divide, "[3.0, 2.0, 6.0]", "[1.0, 1.0, 0.0]", "divide by zero");
this->AssertBinopRaises(Divide, "[3.0, 2.0, 0.0]", "[1.0, 1.0, 0.0]", "divide by zero");
this->AssertBinopRaises(Divide, "[3.0, 2.0, -6.0]", "[1.0, 1.0, 0.0]",
"divide by zero");

this->SetOverflowCheck(false);
this->SetNansEqual(true);
this->AssertBinop(Divide, "[3.0, 2.0, 6.0]", "[1.0, 1.0, 0.0]", "[3.0, 2.0, Inf]");
this->AssertBinop(Divide, "[3.0, 2.0, 0.0]", "[1.0, 1.0, 0.0]", "[3.0, 2.0, NaN]");
this->AssertBinop(Divide, "[3.0, 2.0, -6.0]", "[1.0, 1.0, 0.0]", "[3.0, 2.0, -Inf]");
}

TYPED_TEST(TestBinaryArithmeticSigned, DivideOverflowRaises) {
Expand Down
4 changes: 3 additions & 1 deletion cpp/src/arrow/scalar.cc
Original file line number Diff line number Diff line change
Expand Up @@ -40,7 +40,9 @@ namespace arrow {
using internal::checked_cast;
using internal::checked_pointer_cast;

bool Scalar::Equals(const Scalar& other) const { return ScalarEquals(*this, other); }
bool Scalar::Equals(const Scalar& other, const EqualOptions& options) const {
return ScalarEquals(*this, other, options);
}

struct ScalarHashImpl {
static std::hash<std::string> string_hash;
Expand Down
4 changes: 3 additions & 1 deletion cpp/src/arrow/scalar.h
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,7 @@
#include <utility>
#include <vector>

#include "arrow/compare.h"
#include "arrow/result.h"
#include "arrow/status.h"
#include "arrow/type.h"
Expand Down Expand Up @@ -61,7 +62,8 @@ struct ARROW_EXPORT Scalar : public util::EqualityComparable<Scalar> {

using util::EqualityComparable<Scalar>::operator==;
using util::EqualityComparable<Scalar>::Equals;
bool Equals(const Scalar& other) const;
bool Equals(const Scalar& other,
const EqualOptions& options = EqualOptions::Defaults()) const;

struct ARROW_EXPORT Hash {
size_t operator()(const Scalar& scalar) const { return hash(scalar); }
Expand Down
12 changes: 7 additions & 5 deletions cpp/src/arrow/testing/gtest_util.cc
Original file line number Diff line number Diff line change
Expand Up @@ -135,22 +135,24 @@ void AssertArraysEqual(const Array& expected, const Array& actual, bool verbose)
});
}

void AssertArraysApproxEqual(const Array& expected, const Array& actual, bool verbose) {
void AssertArraysApproxEqual(const Array& expected, const Array& actual, bool verbose,
const EqualOptions& option) {
return AssertArraysEqualWith(
expected, actual, verbose,
[](const Array& expected, const Array& actual, std::stringstream* diff) {
return expected.ApproxEquals(actual, EqualOptions().diff_sink(diff));
[&option](const Array& expected, const Array& actual, std::stringstream* diff) {
return expected.ApproxEquals(actual, option.diff_sink(diff));
});
}

void AssertScalarsEqual(const Scalar& expected, const Scalar& actual, bool verbose) {
void AssertScalarsEqual(const Scalar& expected, const Scalar& actual, bool verbose,
const EqualOptions& options) {
std::stringstream diff;
// ARROW-8956, ScalarEquals returns false when both are null
if (!expected.is_valid && !actual.is_valid) {
// We consider both being null to be equal in this function
return;
}
if (!expected.Equals(actual)) {
if (!expected.Equals(actual, options)) {
if (verbose) {
diff << "Expected:\n" << expected.ToString();
diff << "\nActual:\n" << actual.ToString();
Expand Down
11 changes: 6 additions & 5 deletions cpp/src/arrow/testing/gtest_util.h
Original file line number Diff line number Diff line change
Expand Up @@ -164,12 +164,13 @@ std::vector<Type::type> AllTypeIds();
// If verbose is true, then the arrays will be pretty printed
ARROW_TESTING_EXPORT void AssertArraysEqual(const Array& expected, const Array& actual,
bool verbose = false);
ARROW_TESTING_EXPORT void AssertArraysApproxEqual(const Array& expected,
const Array& actual,
bool verbose = false);
ARROW_TESTING_EXPORT void AssertArraysApproxEqual(
const Array& expected, const Array& actual, bool verbose = false,
const EqualOptions& option = EqualOptions::Defaults());
// Returns true when values are both null
ARROW_TESTING_EXPORT void AssertScalarsEqual(const Scalar& expected, const Scalar& actual,
bool verbose = false);
ARROW_TESTING_EXPORT void AssertScalarsEqual(
const Scalar& expected, const Scalar& actual, bool verbose = false,
const EqualOptions& options = EqualOptions::Defaults());
ARROW_TESTING_EXPORT void AssertBatchesEqual(const RecordBatch& expected,
const RecordBatch& actual,
bool check_metadata = false);
Expand Down