Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 2 additions & 3 deletions .travis.yml
Original file line number Diff line number Diff line change
Expand Up @@ -114,16 +114,15 @@ matrix:
- ARROW_TRAVIS_OPTIONAL_INSTALL=1
- ARROW_CPP_BUILD_TARGETS="gandiva-all"
- ARROW_TRAVIS_USE_TOOLCHAIN=1
# ARROW-3979 temporarily disabled.
- ARROW_TRAVIS_VALGRIND=0
- ARROW_TRAVIS_VALGRIND=1
- ARROW_BUILD_WARNING_LEVEL=CHECKIN
- MATRIX_EVAL="CC=gcc-4.9 && CXX=g++-4.9"
before_script:
# Run if something changed in CPP or Java.
- if [ $ARROW_CI_CPP_AFFECTED != "1" ] && [ $ARROW_CI_JAVA_AFFECTED != "1" ]; then exit; fi
- $TRAVIS_BUILD_DIR/ci/travis_install_linux.sh
- $TRAVIS_BUILD_DIR/ci/travis_install_clang_tools.sh
- $TRAVIS_BUILD_DIR/ci/travis_before_script_cpp.sh --only-library
- $TRAVIS_BUILD_DIR/ci/travis_before_script_cpp.sh
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Removing --only-library probably makes building slower. Is it necessary?

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This will go away with ARROW-3803

Copy link
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Without this option, gandiva tests were not running as part of CI. I guess in the travis gandiva script, gandiva tests option is enabled only if "--only-library" was not enabled. But, if the 3803 takes care of it, I am fine removing it.

script:
- $TRAVIS_BUILD_DIR/ci/travis_script_gandiva_cpp.sh
- $TRAVIS_BUILD_DIR/ci/travis_script_gandiva_java.sh
Expand Down
7 changes: 3 additions & 4 deletions cpp/src/gandiva/bitmap_accumulator_test.cc
Original file line number Diff line number Diff line change
Expand Up @@ -32,9 +32,8 @@ class TestBitMapAccumulator : public ::testing::Test {
int nrecords);
};

void TestBitMapAccumulator::FillBitMap(uint8_t* bmap, int nrecords) {
int nbytes = nrecords / 8;
unsigned int cur;
void TestBitMapAccumulator::FillBitMap(uint8_t* bmap, int nbytes) {
unsigned int cur = 0;

for (int i = 0; i < nbytes; ++i) {
rand_r(&cur);
Expand Down Expand Up @@ -62,7 +61,7 @@ TEST_F(TestBitMapAccumulator, TestIntersectBitMaps) {
uint8_t expected_bitmap[length];

for (int i = 0; i < 4; i++) {
FillBitMap(src_bitmaps[i], nrecords);
FillBitMap(src_bitmaps[i], length);
}

for (int i = 0; i < 4; i++) {
Expand Down
2 changes: 1 addition & 1 deletion cpp/src/gandiva/eval_batch.h
Original file line number Diff line number Diff line change
Expand Up @@ -85,7 +85,7 @@ class EvalBatch {
/// An array of 'num_buffers_', each containing a buffer. The buffer
/// sizes depends on the data type, but all of them have the same
/// number of slots (equal to num_records_).
std::unique_ptr<uint8_t*> buffers_array_;
std::unique_ptr<uint8_t* []> buffers_array_;

std::unique_ptr<LocalBitMapsHolder> local_bitmaps_holder_;

Expand Down
8 changes: 5 additions & 3 deletions cpp/src/gandiva/exported_funcs_registry.h
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@
#ifndef GANDIVA_EXPORTED_FUNCS_REGISTRY_H
#define GANDIVA_EXPORTED_FUNCS_REGISTRY_H

#include <memory>
#include <vector>

#include <gandiva/engine.h>
Expand All @@ -30,12 +31,12 @@ class ExportedFuncsBase;
/// LLVM/IR code.
class ExportedFuncsRegistry {
public:
using list_type = std::vector<ExportedFuncsBase*>;
using list_type = std::vector<std::shared_ptr<ExportedFuncsBase>>;

// Add functions from all the registered classes to the engine.
static void AddMappings(Engine* engine);

static bool Register(ExportedFuncsBase* entry) {
static bool Register(std::shared_ptr<ExportedFuncsBase> entry) {
registered().push_back(entry);
return true;
}
Expand All @@ -48,7 +49,8 @@ class ExportedFuncsRegistry {
};

#define REGISTER_EXPORTED_FUNCS(classname) \
static bool _registered_##classname = ExportedFuncsRegistry::Register(new classname)
static bool _registered_##classname = \
ExportedFuncsRegistry::Register(std::make_shared<classname>())

} // namespace gandiva

Expand Down
6 changes: 3 additions & 3 deletions cpp/src/gandiva/local_bitmaps_holder.h
Original file line number Diff line number Diff line change
Expand Up @@ -50,10 +50,10 @@ class LocalBitMapsHolder {
int64_t num_records_;

/// A container of 'local_bitmaps_', each sized to accomodate 'num_records'.
std::vector<std::unique_ptr<uint8_t>> local_bitmaps_vec_;
std::vector<std::unique_ptr<uint8_t[]>> local_bitmaps_vec_;

/// An array of the local bitmaps.
std::unique_ptr<uint8_t*> local_bitmaps_array_;
std::unique_ptr<uint8_t* []> local_bitmaps_array_;

int64_t local_bitmap_size_;
};
Expand All @@ -72,7 +72,7 @@ inline LocalBitMapsHolder::LocalBitMapsHolder(int64_t num_records, int num_local
// Alloc 'num_local_bitmaps_' number of bitmaps, each of capacity 'num_records_'.
for (int i = 0; i < num_local_bitmaps; ++i) {
// TODO : round-up to a slab friendly multiple.
std::unique_ptr<uint8_t> bitmap(new uint8_t[local_bitmap_size_]);
std::unique_ptr<uint8_t[]> bitmap(new uint8_t[local_bitmap_size_]);

// keep pointer to the bitmap in the array.
(local_bitmaps_array_.get())[i] = bitmap.get();
Expand Down
2 changes: 1 addition & 1 deletion cpp/src/gandiva/precompiled/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -65,7 +65,7 @@ function(add_precompiled_unit_test REL_TEST_NAME)
)
target_compile_definitions(${TEST_NAME} PRIVATE GANDIVA_UNIT_TEST=1)
add_test(NAME ${TEST_NAME} COMMAND ${TEST_NAME})
set_property(TEST ${TEST_NAME} PROPERTY LABELS gandiva;unittest ${TEST_NAME})
set_property(TEST ${TEST_NAME} PROPERTY LABELS gandiva-tests {TEST_NAME})
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Why is it "gandiva-tests" rather than "gandiva"?

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This is the result of 9fcce64

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

It sounds bizarre to have the name "tests" in test labels. Is it because Gandiva has microbenchmakrs in its tests as well?

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

no, we removed the microbenchmarks from the tests.

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Look more closely at 9fcce64

We now have targets gandiva (libraries) gandiva-tests (tests) and gandiva-benchmarks (benchmarks). The label matches the target name

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Ah, ok. I guess it doesn't make a difference when running e.g. ctest -L arrow.

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

endfunction(add_precompiled_unit_test REL_TEST_NAME)

# testing
Expand Down
6 changes: 6 additions & 0 deletions cpp/src/gandiva/projector.cc
Original file line number Diff line number Diff line change
Expand Up @@ -175,6 +175,12 @@ Status Projector::AllocArrayData(const DataTypePtr& type, int64_t num_records,
astatus = arrow::AllocateBuffer(pool, data_len, &data);
ARROW_RETURN_NOT_OK(astatus);

// Valgrind detects unitialized memory at byte level. Boolean types use bits
// and can leave buffer memory uninitialized in the last byte.
if (type->id() == arrow::Type::BOOL) {
data->mutable_data()[data_len - 1] = 0;
}
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Is it really being left uninitialized?

FWIW we zero the memory in arrow::BooleanBuilder because valgrind doesn't like in-place modifications of uninitialized bytes

https://github.com/apache/arrow/blob/master/cpp/src/arrow/array/builder_primitive.cc#L167

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

gandiva doesn't use the builders - it allocates the buffers directly in cpp code (for the batch), and update the buffers in IR code. using builders is tricky since they expect the updates also to happen through the builder APIs (eg. for tracking length).

Is it really being left uninitialized?

gandiva only updates the relevant bits. eg. for a projector with expression "a < b" having a batch of 6 elements, gandiva will update 6 bits in the output boolean vector (to either 0 or 1 depending on the values of a and b). The remaining 2 bits are left uninitialized.

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I wasn't suggesting that you use the builders, just noting that we've also experienced valgrind issues with boolean arrays

Uninitialized bits are not an issue. I was curious why a whole byte is uninitialized

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I'm still curious about the answer to this

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think what it means is that avoid Valgrind errors with uninitialized bits.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Maybe, valgrind does require us to zero out the entire bitmap. @shyambits2004, can you please check if we do have a unit test that projects more than 8 elements?

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@pitrou agreed, we zero out all of our bytes in BooleanBuilder for example. I'm trying to understand why the last byte. That is what seems weird to me -- per @pravindra it may be that the testing is not comprehensive enough

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I'm not sure, but Valgrind is able to detect individual uninitialized bits. So only the trailing bits in the last byte would be a problem.

It also depends which exact operation is used for setting or clearing the bits.

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I wasn't aware that valgrind had bit-level precision (http://valgrind.org/docs/memcheck2005.pdf) so this is probably it.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@wesm, you are right. valgrind complained when I modified the test to have 12 output elements. gandiva uses arrow::util::SetBitTo() to update bitmaps, and there's a comment in the function that it confuses valgrind.

I've opened ARROW-4115 for this.


*array_data = arrow::ArrayData::Make(type, num_records, {null_bitmap, data});
return Status::OK();
}
Expand Down
51 changes: 24 additions & 27 deletions cpp/src/gandiva/selection_vector_test.cc
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@
#include "gandiva/selection_vector.h"

#include <memory>
#include <vector>

#include <gtest/gtest.h>

Expand Down Expand Up @@ -102,15 +103,14 @@ TEST_F(TestSelectionVector, TestInt16PopulateFromBitMap) {
EXPECT_EQ(status.ok(), true) << status.message();

int bitmap_size = RoundUpNumi64(max_slots) * 8;
std::unique_ptr<uint8_t> bitmap(new uint8_t[bitmap_size]);
memset(bitmap.get(), 0, bitmap_size);
std::vector<uint8_t> bitmap(bitmap_size);

arrow::BitUtil::SetBit(bitmap.get(), 0);
arrow::BitUtil::SetBit(bitmap.get(), 5);
arrow::BitUtil::SetBit(bitmap.get(), 121);
arrow::BitUtil::SetBit(bitmap.get(), 220);
arrow::BitUtil::SetBit(&bitmap[0], 0);
arrow::BitUtil::SetBit(&bitmap[0], 5);
arrow::BitUtil::SetBit(&bitmap[0], 121);
arrow::BitUtil::SetBit(&bitmap[0], 220);

status = selection->PopulateFromBitMap(bitmap.get(), bitmap_size, max_slots - 1);
status = selection->PopulateFromBitMap(&bitmap[0], bitmap_size, max_slots - 1);
EXPECT_EQ(status.ok(), true) << status.message();

EXPECT_EQ(selection->GetNumSlots(), 3);
Expand All @@ -127,15 +127,14 @@ TEST_F(TestSelectionVector, TestInt16PopulateFromBitMapNegative) {
EXPECT_EQ(status.ok(), true) << status.message();

int bitmap_size = 16;
std::unique_ptr<uint8_t> bitmap(new uint8_t[bitmap_size]);
memset(bitmap.get(), 0, bitmap_size);
std::vector<uint8_t> bitmap(bitmap_size);

arrow::BitUtil::SetBit(bitmap.get(), 0);
arrow::BitUtil::SetBit(bitmap.get(), 1);
arrow::BitUtil::SetBit(bitmap.get(), 2);
arrow::BitUtil::SetBit(&bitmap[0], 0);
arrow::BitUtil::SetBit(&bitmap[0], 1);
arrow::BitUtil::SetBit(&bitmap[0], 2);

// The bitmap has three set bits, whereas the selection vector has capacity for only 2.
status = selection->PopulateFromBitMap(bitmap.get(), bitmap_size, 2);
status = selection->PopulateFromBitMap(&bitmap[0], bitmap_size, 2);
EXPECT_EQ(status.IsInvalid(), true);
}

Expand Down Expand Up @@ -175,15 +174,14 @@ TEST_F(TestSelectionVector, TestInt32PopulateFromBitMap) {
EXPECT_EQ(status.ok(), true) << status.message();

int bitmap_size = RoundUpNumi64(max_slots) * 8;
std::unique_ptr<uint8_t> bitmap(new uint8_t[bitmap_size]);
memset(bitmap.get(), 0, bitmap_size);
std::vector<uint8_t> bitmap(bitmap_size);

arrow::BitUtil::SetBit(bitmap.get(), 0);
arrow::BitUtil::SetBit(bitmap.get(), 5);
arrow::BitUtil::SetBit(bitmap.get(), 121);
arrow::BitUtil::SetBit(bitmap.get(), 220);
arrow::BitUtil::SetBit(&bitmap[0], 0);
arrow::BitUtil::SetBit(&bitmap[0], 5);
arrow::BitUtil::SetBit(&bitmap[0], 121);
arrow::BitUtil::SetBit(&bitmap[0], 220);

status = selection->PopulateFromBitMap(bitmap.get(), bitmap_size, max_slots - 1);
status = selection->PopulateFromBitMap(&bitmap[0], bitmap_size, max_slots - 1);
EXPECT_EQ(status.ok(), true) << status.message();

EXPECT_EQ(selection->GetNumSlots(), 3);
Expand Down Expand Up @@ -243,15 +241,14 @@ TEST_F(TestSelectionVector, TestInt64PopulateFromBitMap) {
EXPECT_EQ(status.ok(), true) << status.message();

int bitmap_size = RoundUpNumi64(max_slots) * 8;
std::unique_ptr<uint8_t> bitmap(new uint8_t[bitmap_size]);
memset(bitmap.get(), 0, bitmap_size);
std::vector<uint8_t> bitmap(bitmap_size);

arrow::BitUtil::SetBit(bitmap.get(), 0);
arrow::BitUtil::SetBit(bitmap.get(), 5);
arrow::BitUtil::SetBit(bitmap.get(), 121);
arrow::BitUtil::SetBit(bitmap.get(), 220);
arrow::BitUtil::SetBit(&bitmap[0], 0);
arrow::BitUtil::SetBit(&bitmap[0], 5);
arrow::BitUtil::SetBit(&bitmap[0], 121);
arrow::BitUtil::SetBit(&bitmap[0], 220);

status = selection->PopulateFromBitMap(bitmap.get(), bitmap_size, max_slots - 1);
status = selection->PopulateFromBitMap(&bitmap[0], bitmap_size, max_slots - 1);
EXPECT_EQ(status.ok(), true) << status.message();

EXPECT_EQ(selection->GetNumSlots(), 3);
Expand Down
9 changes: 5 additions & 4 deletions cpp/src/gandiva/tests/projector_test.cc
Original file line number Diff line number Diff line change
Expand Up @@ -493,14 +493,15 @@ TEST_F(TestProjector, TestZeroCopy) {

// allocate output buffers
int64_t bitmap_sz = arrow::BitUtil::BytesForBits(num_records);
std::unique_ptr<uint8_t[]> bitmap(new uint8_t[bitmap_sz]);
int64_t bitmap_capacity = arrow::BitUtil::RoundUpToMultipleOf64(bitmap_sz);
std::vector<uint8_t> bitmap(bitmap_capacity);
std::shared_ptr<arrow::MutableBuffer> bitmap_buf =
std::make_shared<arrow::MutableBuffer>(bitmap.get(), bitmap_sz);
std::make_shared<arrow::MutableBuffer>(&bitmap[0], bitmap_capacity);

int64_t data_sz = sizeof(float) * num_records;
std::unique_ptr<uint8_t[]> data(new uint8_t[data_sz]);
std::vector<uint8_t> data(bitmap_capacity);
std::shared_ptr<arrow::MutableBuffer> data_buf =
std::make_shared<arrow::MutableBuffer>(data.get(), data_sz);
std::make_shared<arrow::MutableBuffer>(&data[0], data_sz);

auto array_data =
arrow::ArrayData::Make(float32(), num_records, {bitmap_buf, data_buf});
Expand Down
13 changes: 12 additions & 1 deletion cpp/valgrind.supp
Original file line number Diff line number Diff line change
Expand Up @@ -21,4 +21,15 @@
Memcheck:Cond
fun:*CastFunctor*BooleanType*
}

{
<re2>:Conditional jump or move depends on uninitialised value(s)
Memcheck:Cond
...
fun:_ZN3re23RE2C1E*
}
{
<re2>:Use of uninitialised value of size 8
Memcheck:Value8
...
fun:_ZN3re23RE2C1E*
}