Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
44 changes: 31 additions & 13 deletions c++/src/ColumnPrinter.cc
Original file line number Diff line number Diff line change
Expand Up @@ -279,8 +279,9 @@ namespace orc {

LongColumnPrinter::LongColumnPrinter(std::string& buffer,
const Type& type
): ColumnPrinter(buffer, type) {
// pass
): ColumnPrinter(buffer, type),
data(nullptr) {
// PASS
}

void LongColumnPrinter::reset(const ColumnVectorBatch& batch) {
Expand All @@ -302,6 +303,7 @@ namespace orc {
DoubleColumnPrinter::DoubleColumnPrinter(std::string& buffer,
const Type& type
): ColumnPrinter(buffer, type),
data(nullptr),
isFloat(type.getKind() == FLOAT){
// PASS
}
Expand All @@ -325,7 +327,9 @@ namespace orc {
Decimal64ColumnPrinter::Decimal64ColumnPrinter(std::string& buffer,
const Type& type
): ColumnPrinter(buffer,
type) {
type),
data(nullptr),
scale(0) {
// PASS
}

Expand Down Expand Up @@ -375,14 +379,16 @@ namespace orc {
Decimal128ColumnPrinter::Decimal128ColumnPrinter(std::string& buffer,
const Type& type
): ColumnPrinter(buffer,
type) {
type),
data(nullptr),
scale(0) {
// PASS
}

void Decimal128ColumnPrinter::reset(const ColumnVectorBatch& batch) {
ColumnPrinter::reset(batch);
data = dynamic_cast<const Decimal128VectorBatch&>(batch).values.data();
scale =dynamic_cast<const Decimal128VectorBatch&>(batch).scale;
scale = dynamic_cast<const Decimal128VectorBatch&>(batch).scale;
}

void Decimal128ColumnPrinter::printRow(uint64_t rowId) {
Expand All @@ -395,7 +401,9 @@ namespace orc {

StringColumnPrinter::StringColumnPrinter(std::string& buffer,
const Type& type
): ColumnPrinter(buffer, type) {
): ColumnPrinter(buffer, type),
start(nullptr),
length(nullptr) {
// PASS
}

Expand Down Expand Up @@ -445,7 +453,8 @@ namespace orc {

ListColumnPrinter::ListColumnPrinter(std::string& buffer,
const Type& type
): ColumnPrinter(buffer, type) {
): ColumnPrinter(buffer, type),
offsets(nullptr) {
elementPrinter = createColumnPrinter(buffer, type.getSubtype(0));
}

Expand Down Expand Up @@ -473,7 +482,8 @@ namespace orc {

MapColumnPrinter::MapColumnPrinter(std::string& buffer,
const Type& type
): ColumnPrinter(buffer, type) {
): ColumnPrinter(buffer, type),
offsets(nullptr) {
keyPrinter = createColumnPrinter(buffer, type.getSubtype(0));
elementPrinter = createColumnPrinter(buffer, type.getSubtype(1));
}
Expand Down Expand Up @@ -507,7 +517,9 @@ namespace orc {

UnionColumnPrinter::UnionColumnPrinter(std::string& buffer,
const Type& type
): ColumnPrinter(buffer, type) {
): ColumnPrinter(buffer, type),
tags(nullptr),
offsets(nullptr) {
for(unsigned int i=0; i < type.getSubtypeCount(); ++i) {
fieldPrinter.push_back(createColumnPrinter(buffer, type.getSubtype(i))
.release());
Expand Down Expand Up @@ -590,7 +602,8 @@ namespace orc {

DateColumnPrinter::DateColumnPrinter(std::string& buffer,
const Type& type
): ColumnPrinter(buffer, type) {
): ColumnPrinter(buffer, type),
data(nullptr) {
// PASS
}

Expand All @@ -616,7 +629,8 @@ namespace orc {

BooleanColumnPrinter::BooleanColumnPrinter(std::string& buffer,
const Type& type
): ColumnPrinter(buffer, type) {
): ColumnPrinter(buffer, type),
data(nullptr) {
// PASS
}

Expand All @@ -635,7 +649,9 @@ namespace orc {

BinaryColumnPrinter::BinaryColumnPrinter(std::string& buffer,
const Type& type
): ColumnPrinter(buffer, type) {
): ColumnPrinter(buffer, type),
start(nullptr),
length(nullptr) {
// PASS
}

Expand Down Expand Up @@ -666,7 +682,9 @@ namespace orc {
TimestampColumnPrinter::TimestampColumnPrinter(std::string& buffer,
const Type& type
): ColumnPrinter(buffer,
type) {
type),
seconds(nullptr),
nanoseconds(nullptr) {
// PASS
}

Expand Down
2 changes: 1 addition & 1 deletion c++/src/MemoryPool.cc
Original file line number Diff line number Diff line change
Expand Up @@ -88,7 +88,7 @@ namespace orc {

template <class T>
void DataBuffer<T>::reserve(uint64_t newCapacity){
if (newCapacity > currentCapacity) {
if (newCapacity > currentCapacity || !buf) {

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This is a redundant condition that will never be used. Does Coverity complain about this?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

No, coverity was complaining about needing null checks on buf in other methods. With this change, even if newCapacity is 0, buf is always non-null.

if (buf) {
T* buf_old = buf;
buf = reinterpret_cast<T*>(memoryPool.malloc(sizeof(T) * newCapacity));
Expand Down
5 changes: 4 additions & 1 deletion c++/src/RLEv1.cc
Original file line number Diff line number Diff line change
Expand Up @@ -86,8 +86,11 @@ RleDecoderV1::RleDecoderV1(std::unique_ptr<SeekableInputStream> input,
: inputStream(std::move(input)),
isSigned(hasSigned),
remainingValues(0),
value(0),
bufferStart(nullptr),
bufferEnd(bufferStart) {
bufferEnd(bufferStart),
delta(0),
repeating(false) {
}

void RleDecoderV1::seek(PositionProvider& location) {
Expand Down
6 changes: 4 additions & 2 deletions c++/src/RLEv2.cc
Original file line number Diff line number Diff line change
Expand Up @@ -34,8 +34,7 @@ struct FixedBitSizes {
};

inline uint32_t decodeBitWidth(uint32_t n) {
if (n >= FixedBitSizes::ONE &&
n <= FixedBitSizes::TWENTYFOUR) {
if (n <= FixedBitSizes::TWENTYFOUR) {
return n + 1;
} else if (n == FixedBitSizes::TWENTYSIX) {
return 26;
Expand Down Expand Up @@ -125,8 +124,11 @@ RleDecoderV2::RleDecoderV2(std::unique_ptr<SeekableInputStream> input,
bitsLeft(0),
curByte(0),
patchBitSize(0),
unpackedIdx(0),
patchIdx(0),
base(0),
curGap(0),
curPatch(0),
patchMask(0),
actualGap(0),
unpacked(pool, 0),
Expand Down
62 changes: 44 additions & 18 deletions c++/src/Reader.cc
Original file line number Diff line number Diff line change
Expand Up @@ -1525,12 +1525,16 @@ namespace orc {
result = new TimestampVectorBatch(capacity, memoryPool);
break;
case STRUCT:
result = new StructVectorBatch(capacity, memoryPool);
for(uint64_t i=0; i < type.getSubtypeCount(); ++i) {
subtype = &(type.getSubtype(i));
if (selectedColumns[static_cast<size_t>(subtype->getColumnId())]) {
dynamic_cast<StructVectorBatch*>(result)->fields.push_back
(createRowBatch(*subtype, capacity).release());
{
StructVectorBatch *structResult =
new StructVectorBatch(capacity, memoryPool);
result = structResult;
for(uint64_t i=0; i < type.getSubtypeCount(); ++i) {
subtype = &(type.getSubtype(i));
if (selectedColumns[static_cast<size_t>(subtype->getColumnId())]) {
structResult->fields.push_back(createRowBatch(*subtype,
capacity).release());
}
}
}
break;
Expand Down Expand Up @@ -1563,12 +1567,16 @@ namespace orc {
}
break;
case UNION:
result = new UnionVectorBatch(capacity, memoryPool);
for(uint64_t i=0; i < type.getSubtypeCount(); ++i) {
subtype = &(type.getSubtype(i));
if (selectedColumns[static_cast<size_t>(subtype->getColumnId())]) {
dynamic_cast<UnionVectorBatch*>(result)->children.push_back
(createRowBatch(*subtype, capacity).release());
{
UnionVectorBatch *unionResult =
new UnionVectorBatch(capacity, memoryPool);
result = unionResult;
for(uint64_t i=0; i < type.getSubtypeCount(); ++i) {
subtype = &(type.getSubtype(i));
if (selectedColumns[static_cast<size_t>(subtype->getColumnId())]) {
unionResult->children.push_back(createRowBatch(*subtype,
capacity).release());
}
}
}
break;
Expand Down Expand Up @@ -1804,6 +1812,8 @@ namespace orc {
valueCount = pb.numberofvalues();
if (!pb.has_binarystatistics() || !correctStats) {
_hasTotalLength = false;

totalLength = 0;
}else{
_hasTotalLength = pb.binarystatistics().has_sum();
totalLength = static_cast<uint64_t>(pb.binarystatistics().sum());
Expand All @@ -1815,6 +1825,7 @@ namespace orc {
valueCount = pb.numberofvalues();
if (!pb.has_bucketstatistics() || !correctStats) {
_hasCount = false;
trueCount = 0;
}else{
_hasCount = true;
trueCount = pb.bucketstatistics().count(0);
Expand All @@ -1827,11 +1838,14 @@ namespace orc {
if (!pb.has_datestatistics() || !correctStats) {
_hasMinimum = false;
_hasMaximum = false;
}else{
_hasMinimum = pb.datestatistics().has_minimum();
_hasMaximum = pb.datestatistics().has_maximum();
minimum = pb.datestatistics().minimum();
maximum = pb.datestatistics().maximum();

minimum = 0;
maximum = 0;
} else {
_hasMinimum = pb.datestatistics().has_minimum();
_hasMaximum = pb.datestatistics().has_maximum();
minimum = pb.datestatistics().minimum();
maximum = pb.datestatistics().maximum();
}
}

Expand Down Expand Up @@ -1861,6 +1875,10 @@ namespace orc {
_hasMinimum = false;
_hasMaximum = false;
_hasSum = false;

minimum = 0;
maximum = 0;
sum = 0;
}else{
const proto::DoubleStatistics& stats = pb.doublestatistics();
_hasMinimum = stats.has_minimum();
Expand All @@ -1880,6 +1898,10 @@ namespace orc {
_hasMinimum = false;
_hasMaximum = false;
_hasSum = false;

minimum = 0;
maximum = 0;
sum = 0;
}else{
const proto::IntegerStatistics& stats = pb.intstatistics();
_hasMinimum = stats.has_minimum();
Expand All @@ -1899,6 +1921,8 @@ namespace orc {
_hasMinimum = false;
_hasMaximum = false;
_hasTotalLength = false;

totalLength = 0;
}else{
const proto::StringStatistics& stats = pb.stringstatistics();
_hasMinimum = stats.has_minimum();
Expand All @@ -1912,11 +1936,13 @@ namespace orc {
}

TimestampColumnStatisticsImpl::TimestampColumnStatisticsImpl
(const proto::ColumnStatistics& pb, bool correctStats){
(const proto::ColumnStatistics& pb, bool correctStats) {
valueCount = pb.numberofvalues();
if (!pb.has_timestampstatistics() || !correctStats) {
_hasMinimum = false;
_hasMaximum = false;
minimum = 0;
maximum = 0;
}else{
const proto::TimestampStatistics& stats = pb.timestampstatistics();
_hasMinimum = stats.has_minimum();
Expand Down
4 changes: 4 additions & 0 deletions c++/src/Vector.cc
Original file line number Diff line number Diff line change
Expand Up @@ -232,6 +232,8 @@ namespace orc {

Decimal64VectorBatch::Decimal64VectorBatch(uint64_t cap, MemoryPool& pool
): ColumnVectorBatch(cap, pool),
precision(0),
scale(0),
values(pool, cap),
readScales(pool, cap) {
// PASS
Expand All @@ -258,6 +260,8 @@ namespace orc {

Decimal128VectorBatch::Decimal128VectorBatch(uint64_t cap, MemoryPool& pool
): ColumnVectorBatch(cap, pool),
precision(0),
scale(0),
values(pool, cap),
readScales(pool, cap) {
// PASS
Expand Down
8 changes: 7 additions & 1 deletion c++/test/TestCompression.cc
Original file line number Diff line number Diff line change
Expand Up @@ -38,7 +38,13 @@ namespace orc {
// Per-test-case set-up.
static void SetUpTestCase() {
simpleFile = "simple-file.binary";
remove(simpleFile);
if (remove(simpleFile) != 0) {
if (errno != ENOENT) {
std::cerr << "Can't remove simple-file.binary: "
<< strerror(errno) << "\n";
throw std::runtime_error("Can't remove file");
}
}
std::ofstream file;
file.exceptions(std::ofstream::failbit | std::ofstream::badbit);
file.open(simpleFile,
Expand Down
2 changes: 1 addition & 1 deletion c++/test/TestInt128.cc
Original file line number Diff line number Diff line change
Expand Up @@ -431,7 +431,7 @@ namespace orc {

TEST(Int128, testBuildFromArray) {
Int128 result;
uint32_t array[4]={0x12345678, 0x9abcdef0, 0xfedcba98, 0x76543210};
uint32_t array[5]={0x12345678, 0x9abcdef0, 0xfedcba98, 0x76543210, 0};

buildFromArray(result, array, 0);
EXPECT_EQ(0, result.toLong());
Expand Down
33 changes: 16 additions & 17 deletions tools/src/FileContents.cc
Original file line number Diff line number Diff line change
Expand Up @@ -25,24 +25,9 @@
#include <iostream>
#include <string>

int main(int argc, char* argv[]) {
if (argc < 2) {
std::cout << "Usage: file-contents <filename>\n";
return 1;
}
orc::ReaderOptions opts;
std::list<int64_t> cols;
cols.push_back(0);
opts.include(cols);

void printContents(const char* filename, const orc::ReaderOptions opts) {
std::unique_ptr<orc::Reader> reader;
try{
reader = orc::createReader(orc::readLocalFile(std::string(argv[1])), opts);
} catch (orc::ParseError e) {
std::cout << "Error reading file " << argv[1] << "! "
<< e.what() << std::endl;
return -1;
}
reader = orc::createReader(orc::readLocalFile(std::string(filename)), opts);

std::unique_ptr<orc::ColumnVectorBatch> batch = reader->createRowBatch(1000);
std::string line;
Expand All @@ -59,5 +44,19 @@ int main(int argc, char* argv[]) {
fwrite(str, 1, strlen(str), stdout);
}
}
}

int main(int argc, char* argv[]) {
if (argc < 2) {
std::cout << "Usage: file-contents <filename>\n";
return 1;
}
try {
orc::ReaderOptions opts;
printContents(argv[1], opts);
} catch (std::exception& ex) {
std::cerr << "Caught exception: " << ex.what() << "\n";
return 1;
}
return 0;
}
Loading