diff --git a/c++/include/orc/ColumnPrinter.hh b/c++/include/orc/ColumnPrinter.hh index 17c1901b32..95e965fafc 100644 --- a/c++/include/orc/ColumnPrinter.hh +++ b/c++/include/orc/ColumnPrinter.hh @@ -47,6 +47,7 @@ namespace orc { }; ORC_UNIQUE_PTR createColumnPrinter(std::string&, - const Type& type); + const Type& type, + const std::vector* selectedColumns = ORC_NULLPTR); } #endif diff --git a/c++/src/ColumnPrinter.cc b/c++/src/ColumnPrinter.cc index aa90be61d1..5d1703235f 100644 --- a/c++/src/ColumnPrinter.cc +++ b/c++/src/ColumnPrinter.cc @@ -173,8 +173,10 @@ namespace orc { class StructColumnPrinter: public ColumnPrinter { private: std::vector fieldPrinter; + std::vector fieldNames; public: - StructColumnPrinter(std::string&, const Type& type); + StructColumnPrinter(std::string&, const Type& type, + const std::vector* selectedColumns); virtual ~StructColumnPrinter(); void printRow(uint64_t rowId) override; void reset(const ColumnVectorBatch& batch) override; @@ -209,9 +211,11 @@ namespace orc { } } - std::unique_ptr createColumnPrinter(std::string& buffer, - const Type& type) { - ColumnPrinter *result; + std::unique_ptr createColumnPrinter( + std::string& buffer, + const Type& type, + const std::vector* selectedColumns) { + ColumnPrinter *result = nullptr; switch(static_cast(type.getKind())) { case BOOLEAN: result = new BooleanColumnPrinter(buffer, type); @@ -252,7 +256,7 @@ namespace orc { break; case STRUCT: - result = new StructColumnPrinter(buffer, type); + result = new StructColumnPrinter(buffer, type, selectedColumns); break; case DECIMAL: @@ -558,12 +562,17 @@ namespace orc { } } - StructColumnPrinter::StructColumnPrinter(std::string& buffer, - const Type& type - ): ColumnPrinter(buffer, type) { + StructColumnPrinter::StructColumnPrinter( + std::string& buffer, + const Type& type, + const std::vector* selectedColumns + ): ColumnPrinter(buffer, type) { for(unsigned int i=0; i < type.getSubtypeCount(); ++i) { - fieldPrinter.push_back(createColumnPrinter(buffer, type.getSubtype(i)) - .release()); + if (selectedColumns==nullptr || selectedColumns->at(type.getSubtype(i).getColumnId())) { + fieldNames.push_back(type.getFieldName(i)); + fieldPrinter.push_back(createColumnPrinter(buffer, + type.getSubtype(i)).release()); + } } } @@ -592,7 +601,7 @@ namespace orc { writeString(buffer, ", "); } writeChar(buffer, '"'); - writeString(buffer, type.getFieldName(i).c_str()); + writeString(buffer, fieldNames[i].c_str()); writeString(buffer, "\": "); fieldPrinter[i]->printRow(rowId); } diff --git a/tools/src/FileContents.cc b/tools/src/FileContents.cc index 694fea3ba9..c2e2c358f0 100644 --- a/tools/src/FileContents.cc +++ b/tools/src/FileContents.cc @@ -31,8 +31,9 @@ void printContents(const char* filename, const orc::ReaderOptions opts) { std::unique_ptr batch = reader->createRowBatch(1000); std::string line; + const std::vector selectedColumns = reader->getSelectedColumns(); std::unique_ptr printer = - createColumnPrinter(line, reader->getType()); + createColumnPrinter(line, reader->getType(), &selectedColumns); while (reader->next(*batch)) { printer->reset(*batch); @@ -48,12 +49,36 @@ void printContents(const char* filename, const orc::ReaderOptions opts) { int main(int argc, char* argv[]) { if (argc < 2) { - std::cout << "Usage: file-contents \n"; + std::cout << "Usage: file-contents [--columns=1,2,...]\n" + << "Print contents of .\n" + << "If columns are specified, only these top-level (logical) columns are printed.\n" ; return 1; } try { + const std::string COLUMNS_PREFIX = "--columns="; + std::list cols; + char* filename = ORC_NULLPTR; + + // Read command-line options + char *param, *value; + for (int i = 1; i < argc; i++) { + if ( (param = std::strstr(argv[i], COLUMNS_PREFIX.c_str())) ) { + value = std::strtok(param+COLUMNS_PREFIX.length(), "," ); + while (value) { + cols.push_back(std::atoi(value)); + value = std::strtok(nullptr, "," ); + } + } else { + filename = argv[i]; + } + } orc::ReaderOptions opts; - printContents(argv[1], opts); + if (cols.size() > 0) { + opts.include(cols); + } + if (filename != ORC_NULLPTR) { + printContents(filename, opts); + } } catch (std::exception& ex) { std::cerr << "Caught exception: " << ex.what() << "\n"; return 1; diff --git a/tools/test/TestReader.cc b/tools/test/TestReader.cc index 92fa10ae62..773e6dfcd7 100644 --- a/tools/test/TestReader.cc +++ b/tools/test/TestReader.cc @@ -923,6 +923,26 @@ TEST(Reader, selectColumns) { for (unsigned int i=0; i < c.size(); i++) { EXPECT_TRUE(c[i]); } + std::unique_ptr batch = reader->createRowBatch(1); + std::string line; + std::unique_ptr printer = + createColumnPrinter(line, reader->getType(), &c); + reader->next(*batch); + printer->reset(*batch); + printer->printRow(0); + std::ostringstream expected; + expected << "{\"boolean1\": true, \"byte1\": -76, " + << "\"short1\": 21684, \"int1\": -941468492, " + << "\"long1\": -6863419716327549772, \"float1\": 0.7762409, " + << "\"double1\": 0.77624090391187, \"bytes1\": [123, 108, 207, 27, 93, " + << "157, 139, 233, 181, 90, 14, 60, 34, 120, 26, 119, 231, 50, 155, 121], " + << "\"string1\": \"887336a7\", \"middle\": {\"list\": [{\"int1\": " + << "-941468492, \"string1\": \"887336a7\"}, {\"int1\": -1598014431, " + << "\"string1\": \"ba419d35-x\"}]}, \"list\": [], \"map\": [{\"key\": " + << "\"ba419d35-x\", \"value\": {\"int1\": -1598014431, \"string1\": " + << "\"ba419d35-x\"}}, {\"key\": \"887336a7\", \"value\": {\"int1\": " + << "-941468492, \"string1\": \"887336a7\"}}]}"; + EXPECT_EQ(expected.str(), line); // Int column #2 cols.clear(); @@ -936,6 +956,15 @@ TEST(Reader, selectColumns) { else EXPECT_TRUE(!c[i]); } + batch = reader->createRowBatch(1); + line.clear(); + printer = createColumnPrinter(line, reader->getType(), &c); + reader->next(*batch); + printer->reset(*batch); + printer->printRow(0); + std::string expectedInt("{\"byte1\": -76}"); + EXPECT_EQ(expectedInt, line); + // Struct column #10 cols.clear(); @@ -949,6 +978,17 @@ TEST(Reader, selectColumns) { else EXPECT_TRUE(!c[i]); } + batch = reader->createRowBatch(1); + line.clear(); + printer = createColumnPrinter(line, reader->getType(), &c); + reader->next(*batch); + printer->reset(*batch); + printer->printRow(0); + std::ostringstream expectedStruct; + expectedStruct << "{\"middle\": {\"list\": " + << "[{\"int1\": -941468492, \"string1\": \"887336a7\"}, " + << "{\"int1\": -1598014431, \"string1\": \"ba419d35-x\"}]}}"; + EXPECT_EQ(expectedStruct.str(), line); // Array column #11 cols.clear(); @@ -962,6 +1002,14 @@ TEST(Reader, selectColumns) { else EXPECT_TRUE(!c[i]); } + batch = reader->createRowBatch(1); + line.clear(); + printer = createColumnPrinter(line, reader->getType(), &c); + reader->next(*batch); + printer->reset(*batch); + printer->printRow(0); + std::string expectedArray("{\"list\": []}"); + EXPECT_EQ(expectedArray, line); // Map column #12 cols.clear(); @@ -975,6 +1023,18 @@ TEST(Reader, selectColumns) { else EXPECT_TRUE(!c[i]); } + batch = reader->createRowBatch(1); + line.clear(); + printer = createColumnPrinter(line, reader->getType(), &c); + reader->next(*batch); + printer->reset(*batch); + printer->printRow(0); + std::ostringstream expectedMap; + expectedMap << "{\"map\": [{\"key\": \"ba419d35-x\", \"value\": {\"int1\":" + << " -1598014431, \"string1\": \"ba419d35-x\"}}, {\"key\": " + << "\"887336a7\", \"value\": {\"int1\": -941468492, \"string1\": " + << "\"887336a7\"}}]}"; + EXPECT_EQ(expectedMap.str(), line); } std::map makeMetadata() {