Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
29 changes: 19 additions & 10 deletions cpp/cmake_modules/FindClangTools.cmake
Original file line number Diff line number Diff line change
Expand Up @@ -27,34 +27,43 @@
# This module defines
# CLANG_TIDY_BIN, The path to the clang tidy binary
# CLANG_TIDY_FOUND, Whether clang tidy was found
# CLANG_FORMAT_BIN, The path to the clang format binary
# CLANG_FORMAT_BIN, The path to the clang format binary
# CLANG_TIDY_FOUND, Whether clang format was found

find_program(CLANG_TIDY_BIN
NAMES clang-tidy-3.8 clang-tidy-3.7 clang-tidy-3.6 clang-tidy
PATHS ${ClangTools_PATH} $ENV{CLANG_TOOLS_PATH} /usr/local/bin /usr/bin
find_program(CLANG_TIDY_BIN
NAMES clang-tidy-4.0
clang-tidy-3.9
clang-tidy-3.8
clang-tidy-3.7
clang-tidy-3.6
clang-tidy
PATHS ${ClangTools_PATH} $ENV{CLANG_TOOLS_PATH} /usr/local/bin /usr/bin
NO_DEFAULT_PATH
)

if ( "${CLANG_TIDY_BIN}" STREQUAL "CLANG_TIDY_BIN-NOTFOUND" )
if ( "${CLANG_TIDY_BIN}" STREQUAL "CLANG_TIDY_BIN-NOTFOUND" )
set(CLANG_TIDY_FOUND 0)
message("clang-tidy not found")
else()
set(CLANG_TIDY_FOUND 1)
message("clang-tidy found at ${CLANG_TIDY_BIN}")
endif()

find_program(CLANG_FORMAT_BIN
NAMES clang-format-3.8 clang-format-3.7 clang-format-3.6 clang-format
PATHS ${ClangTools_PATH} $ENV{CLANG_TOOLS_PATH} /usr/local/bin /usr/bin
find_program(CLANG_FORMAT_BIN
NAMES clang-format-4.0
clang-format-3.9
clang-format-3.8
clang-format-3.7
clang-format-3.6
clang-format
PATHS ${ClangTools_PATH} $ENV{CLANG_TOOLS_PATH} /usr/local/bin /usr/bin
NO_DEFAULT_PATH
)

if ( "${CLANG_FORMAT_BIN}" STREQUAL "CLANG_FORMAT_BIN-NOTFOUND" )
if ( "${CLANG_FORMAT_BIN}" STREQUAL "CLANG_FORMAT_BIN-NOTFOUND" )
set(CLANG_FORMAT_FOUND 0)
message("clang-format not found")
else()
set(CLANG_FORMAT_FOUND 1)
message("clang-format found at ${CLANG_FORMAT_BIN}")
endif()

7 changes: 3 additions & 4 deletions cpp/src/arrow/io/io-hdfs-test.cc
Original file line number Diff line number Diff line change
Expand Up @@ -78,10 +78,9 @@ class TestHdfsClient : public ::testing::Test {
LibHdfsShim* driver_shim;

client_ = nullptr;
scratch_dir_ =
boost::filesystem::unique_path(
boost::filesystem::temp_directory_path() / "arrow-hdfs/scratch-%%%%")
.string();
scratch_dir_ = boost::filesystem::unique_path(
boost::filesystem::temp_directory_path() / "arrow-hdfs/scratch-%%%%")
.string();

loaded_driver_ = false;

Expand Down
76 changes: 53 additions & 23 deletions cpp/src/arrow/table-test.cc
Original file line number Diff line number Diff line change
Expand Up @@ -127,8 +127,8 @@ TEST_F(TestColumn, BasicAPI) {
arrays.push_back(MakePrimitive<Int32Array>(100, 10));
arrays.push_back(MakePrimitive<Int32Array>(100, 20));

auto field = std::make_shared<Field>("c0", int32());
column_.reset(new Column(field, arrays));
auto f0 = field("c0", int32());
column_.reset(new Column(f0, arrays));

ASSERT_EQ("c0", column_->name());
ASSERT_TRUE(column_->type()->Equals(int32()));
Expand All @@ -137,7 +137,7 @@ TEST_F(TestColumn, BasicAPI) {
ASSERT_EQ(3, column_->data()->num_chunks());

// nullptr array should not break
column_.reset(new Column(field, std::shared_ptr<Array>(nullptr)));
column_.reset(new Column(f0, std::shared_ptr<Array>(nullptr)));
ASSERT_NE(column_.get(), nullptr);
}

Expand All @@ -146,13 +146,13 @@ TEST_F(TestColumn, ChunksInhomogeneous) {
arrays.push_back(MakePrimitive<Int32Array>(100));
arrays.push_back(MakePrimitive<Int32Array>(100, 10));

auto field = std::make_shared<Field>("c0", int32());
column_.reset(new Column(field, arrays));
auto f0 = field("c0", int32());
column_.reset(new Column(f0, arrays));

ASSERT_OK(column_->ValidateData());

arrays.push_back(MakePrimitive<Int16Array>(100, 10));
column_.reset(new Column(field, arrays));
column_.reset(new Column(f0, arrays));
ASSERT_RAISES(Invalid, column_->ValidateData());
}

Expand All @@ -164,8 +164,8 @@ TEST_F(TestColumn, Equals) {
arrays_one_.push_back(array);
arrays_another_.push_back(array);

one_field_ = std::make_shared<Field>("column", int32());
another_field_ = std::make_shared<Field>("column", int32());
one_field_ = field("column", int32());
another_field_ = field("column", int32());

Construct();
ASSERT_TRUE(one_col_->Equals(one_col_));
Expand All @@ -174,13 +174,13 @@ TEST_F(TestColumn, Equals) {
ASSERT_TRUE(one_col_->Equals(*another_col_.get()));

// Field is different
another_field_ = std::make_shared<Field>("two", int32());
another_field_ = field("two", int32());
Construct();
ASSERT_FALSE(one_col_->Equals(another_col_));
ASSERT_FALSE(one_col_->Equals(*another_col_.get()));

// ChunkedArray is different
another_field_ = std::make_shared<Field>("column", int32());
another_field_ = field("column", int32());
arrays_another_.push_back(array);
Construct();
ASSERT_FALSE(one_col_->Equals(another_col_));
Expand All @@ -190,9 +190,9 @@ TEST_F(TestColumn, Equals) {
class TestTable : public TestBase {
public:
void MakeExample1(int length) {
auto f0 = std::make_shared<Field>("f0", int32());
auto f1 = std::make_shared<Field>("f1", uint8());
auto f2 = std::make_shared<Field>("f2", int16());
auto f0 = field("f0", int32());
auto f1 = field("f1", uint8());
auto f2 = field("f2", int16());

vector<shared_ptr<Field>> fields = {f0, f1, f2};
schema_ = std::make_shared<Schema>(fields);
Expand Down Expand Up @@ -279,9 +279,9 @@ TEST_F(TestTable, Equals) {

ASSERT_TRUE(table_->Equals(*table_));
// Differing schema
auto f0 = std::make_shared<Field>("f3", int32());
auto f1 = std::make_shared<Field>("f4", uint8());
auto f2 = std::make_shared<Field>("f5", int16());
auto f0 = field("f3", int32());
auto f1 = field("f4", uint8());
auto f2 = field("f5", int16());
vector<shared_ptr<Field>> fields = {f0, f1, f2};
auto other_schema = std::make_shared<Schema>(fields);
ASSERT_FALSE(table_->Equals(Table(other_schema, columns_)));
Expand Down Expand Up @@ -389,9 +389,9 @@ class TestRecordBatch : public TestBase {};
TEST_F(TestRecordBatch, Equals) {
const int length = 10;

auto f0 = std::make_shared<Field>("f0", int32());
auto f1 = std::make_shared<Field>("f1", uint8());
auto f2 = std::make_shared<Field>("f2", int16());
auto f0 = field("f0", int32());
auto f1 = field("f1", uint8());
auto f2 = field("f2", int16());

vector<shared_ptr<Field>> fields = {f0, f1, f2};
auto schema = std::make_shared<Schema>(fields);
Expand All @@ -401,21 +401,51 @@ TEST_F(TestRecordBatch, Equals) {
auto a2 = MakePrimitive<Int16Array>(length);

RecordBatch b1(schema, length, {a0, a1, a2});
RecordBatch b2(schema, 5, {a0, a1, a2});
RecordBatch b3(schema, length, {a0, a1});
RecordBatch b4(schema, length, {a0, a1, a1});

ASSERT_TRUE(b1.Equals(b1));
ASSERT_FALSE(b1.Equals(b2));
ASSERT_FALSE(b1.Equals(b3));
ASSERT_FALSE(b1.Equals(b4));
}

#ifdef NDEBUG
// In debug builds, RecordBatch ctor aborts if you construct an invalid one

TEST_F(TestRecordBatch, Validate) {
const int length = 10;

auto f0 = field("f0", int32());
auto f1 = field("f1", uint8());
auto f2 = field("f2", int16());

auto schema = std::shared_ptr<Schema>(new Schema({f0, f1, f2}));

auto a0 = MakePrimitive<Int32Array>(length);
auto a1 = MakePrimitive<UInt8Array>(length);
auto a2 = MakePrimitive<Int16Array>(length);
auto a3 = MakePrimitive<Int16Array>(5);

RecordBatch b1(schema, length, {a0, a1, a2});

ASSERT_OK(b1.Validate());

// Length mismatch
RecordBatch b2(schema, length, {a0, a1, a3});
ASSERT_RAISES(Invalid, b2.Validate());

// Type mismatch
RecordBatch b3(schema, length, {a0, a1, a0});
ASSERT_RAISES(Invalid, b3.Validate());
}

#endif

TEST_F(TestRecordBatch, Slice) {
const int length = 10;

auto f0 = std::make_shared<Field>("f0", int32());
auto f1 = std::make_shared<Field>("f1", uint8());
auto f0 = field("f0", int32());
auto f1 = field("f1", uint8());

vector<shared_ptr<Field>> fields = {f0, f1};
auto schema = std::make_shared<Schema>(fields);
Expand Down
25 changes: 25 additions & 0 deletions cpp/src/arrow/table.cc
Original file line number Diff line number Diff line change
Expand Up @@ -139,6 +139,11 @@ Status Column::ValidateData() {
// ----------------------------------------------------------------------
// RecordBatch methods

void AssertBatchValid(const RecordBatch& batch) {
Status s = batch.Validate();
if (!s.ok()) { DCHECK(false) << s.ToString(); }
}

RecordBatch::RecordBatch(const std::shared_ptr<Schema>& schema, int64_t num_rows,
const std::vector<std::shared_ptr<Array>>& columns)
: schema_(schema), num_rows_(num_rows), columns_(columns) {}
Expand Down Expand Up @@ -190,6 +195,26 @@ std::shared_ptr<RecordBatch> RecordBatch::Slice(int64_t offset, int64_t length)
return std::make_shared<RecordBatch>(schema_, num_rows, arrays);
}

Status RecordBatch::Validate() const {
for (int i = 0; i < num_columns(); ++i) {
const Array& arr = *columns_[i];
if (arr.length() != num_rows_) {
std::stringstream ss;
ss << "Number of rows in column " << i << " did not match batch: " << arr.length()
<< " vs " << num_rows_;
return Status::Invalid(ss.str());
}
const auto& schema_type = *schema_->field(i)->type;
if (!arr.type()->Equals(schema_type)) {
std::stringstream ss;
ss << "Column " << i << " type not match schema: " << arr.type()->ToString()
<< " vs " << schema_type.ToString();
return Status::Invalid(ss.str());
}
}
return Status::OK();
}

// ----------------------------------------------------------------------
// Table methods

Expand Down
4 changes: 4 additions & 0 deletions cpp/src/arrow/table.h
Original file line number Diff line number Diff line change
Expand Up @@ -140,6 +140,10 @@ class ARROW_EXPORT RecordBatch {
std::shared_ptr<RecordBatch> Slice(int64_t offset);
std::shared_ptr<RecordBatch> Slice(int64_t offset, int64_t length);

/// Returns error status is there is something wrong with the record batch
/// contents, like a schema/array mismatch or inconsistent lengths
Status Validate() const;

private:
std::shared_ptr<Schema> schema_;
int64_t num_rows_;
Expand Down