Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
50 changes: 17 additions & 33 deletions c++/src/Reader.cc
Original file line number Diff line number Diff line change
Expand Up @@ -895,8 +895,7 @@ namespace orc {
proto::StripeFooter getStripeFooter(const proto::StripeInformation& info);
void startNextStripe();
void checkOrcVersion();
void selectTypeParent(size_t columnId);
void selectTypeChildren(size_t columnId);
void selectType(const Type& type);
void readMetadata() const;
std::unique_ptr<ColumnVectorBatch> createRowBatch(const Type& type,
uint64_t capacity
Expand Down Expand Up @@ -1069,9 +1068,22 @@ namespace orc {
const std::list<int64_t>& included = options.getInclude();
for(std::list<int64_t>::const_iterator columnId = included.begin();
columnId != included.end(); ++columnId) {
if (*columnId <= static_cast<int64_t>(schema->getSubtypeCount())) {
selectTypeParent(static_cast<size_t>(*columnId));
selectTypeChildren(static_cast<size_t>(*columnId));
if (*columnId == 0) {
selectType(*(schema.get()));
} else if (*columnId <= static_cast<int64_t>(schema->getSubtypeCount())) {
selectType(schema->getSubtype(*columnId-1));
}
}
if (included.size() > 0) {
selectedColumns[0] = true;
}
}

void ReaderImpl::selectType(const Type& type) {
if (!selectedColumns[type.getColumnId()]) {
selectedColumns[type.getColumnId()] = true;
for (uint64_t i=0; i < type.getSubtypeCount(); i++) {
selectType(type.getSubtype(i));
}
}
}
Expand Down Expand Up @@ -1184,34 +1196,6 @@ namespace orc {
return false;
}

void ReaderImpl::selectTypeParent(size_t columnId) {
for(size_t parent=0; parent < columnId; ++parent) {
const proto::Type& parentType = footer->types(static_cast<int>(parent));
for(int idx=0; idx < parentType.subtypes_size(); ++idx) {
uint64_t child = parentType.subtypes(idx);
if (child == columnId) {
if (!selectedColumns[parent]) {
selectedColumns[parent] = true;
selectTypeParent(parent);
return;
}
}
}
}
}

void ReaderImpl::selectTypeChildren(size_t columnId) {
if (!selectedColumns[columnId]) {
selectedColumns[columnId] = true;
const proto::Type& parentType =
footer->types(static_cast<int>(columnId));
for(int idx=0; idx < parentType.subtypes_size(); ++idx) {
uint64_t child = parentType.subtypes(idx);
selectTypeChildren(child);
}
}
}

const std::vector<bool> ReaderImpl::getSelectedColumns() const {
return selectedColumns;
}
Expand Down
70 changes: 70 additions & 0 deletions tools/test/TestReader.cc
Original file line number Diff line number Diff line change
Expand Up @@ -907,6 +907,76 @@ TEST(Reader, futureFormatVersion) {
EXPECT_EQ("19.99", reader->getFormatVersion());
}

TEST(Reader, selectColumns) {
orc::ReaderOptions opts;
std::ostringstream filename;
filename << exampleDirectory << "/TestOrcFile.testSeek.orc";
std::list<int64_t> cols;

// All columns
cols.push_back(0);
opts.include(cols);
std::unique_ptr<orc::Reader> reader =
orc::createReader(orc::readLocalFile(filename.str()), opts);
std::vector<bool> c = reader->getSelectedColumns();
EXPECT_EQ(24, c.size());
for (unsigned int i=0; i < c.size(); i++) {
EXPECT_TRUE(c[i]);
}

// Int column #2
cols.clear();
cols.push_back(2);
opts.include(cols);
reader = orc::createReader(orc::readLocalFile(filename.str()), opts);
c = reader->getSelectedColumns();
for (unsigned int i=1; i < c.size(); i++) {
if (i==2)
EXPECT_TRUE(c[i]);
else
EXPECT_TRUE(!c[i]);
}

// Struct column #10
cols.clear();
cols.push_back(10);
opts.include(cols);
reader = orc::createReader(orc::readLocalFile(filename.str()), opts);
c = reader->getSelectedColumns();
for (unsigned int i=1; i < c.size(); i++) {
if (i>=10 && i<=14)
EXPECT_TRUE(c[i]);
else
EXPECT_TRUE(!c[i]);
}

// Array column #11
cols.clear();
cols.push_back(11);
opts.include(cols);
reader = orc::createReader(orc::readLocalFile(filename.str()), opts);
c = reader->getSelectedColumns();
for (unsigned int i=1; i < c.size(); i++) {
if (i>=15 && i<=18)
EXPECT_TRUE(c[i]);
else
EXPECT_TRUE(!c[i]);
}

// Map column #12
cols.clear();
cols.push_back(12);
opts.include(cols);
reader = orc::createReader(orc::readLocalFile(filename.str()), opts);
c = reader->getSelectedColumns();
for (unsigned int i=1; i < c.size(); i++) {
if (i>=19 && i<=23)
EXPECT_TRUE(c[i]);
else
EXPECT_TRUE(!c[i]);
}
}

std::map<std::string, std::string> makeMetadata() {
std::map<std::string, std::string> result;
result["my.meta"] = "\x01\x02\x03\x04\x05\x06\x07\xff\xfe\x7f\x80";
Expand Down