Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
25 changes: 20 additions & 5 deletions cpp/velox/shuffle/VeloxShuffleReader.cc
Original file line number Diff line number Diff line change
Expand Up @@ -317,13 +317,13 @@ std::shared_ptr<ColumnarBatch> VeloxHashShuffleReaderDeserializer::next() {
auto arrowBuffers,
BlockPayload::deserialize(in_.get(), codec_, memoryPool_, numRows, deserializeTime_, decompressTime_));
if (arrowBuffers.empty()) {
// Reach EOS.
reachEos();
return nullptr;
}
return makeColumnarBatch(rowType_, numRows, std::move(arrowBuffers), veloxPool_, deserializeTime_);
}

if (reachEos_) {
if (reachedEos_) {
if (merged_) {
return makeColumnarBatch(rowType_, std::move(merged_), veloxPool_, deserializeTime_);
}
Expand All @@ -337,7 +337,7 @@ std::shared_ptr<ColumnarBatch> VeloxHashShuffleReaderDeserializer::next() {
arrowBuffers,
BlockPayload::deserialize(in_.get(), codec_, memoryPool_, numRows, deserializeTime_, decompressTime_));
if (arrowBuffers.empty()) {
reachEos_ = true;
reachEos();
break;
}
if (!merged_) {
Expand All @@ -356,7 +356,7 @@ std::shared_ptr<ColumnarBatch> VeloxHashShuffleReaderDeserializer::next() {
}

// Reach EOS.
if (reachEos_ && !merged_) {
if (reachedEos_ && !merged_) {
return nullptr;
}

Expand All @@ -369,6 +369,11 @@ std::shared_ptr<ColumnarBatch> VeloxHashShuffleReaderDeserializer::next() {
return columnarBatch;
}

void VeloxHashShuffleReaderDeserializer::reachEos() {
reachedEos_ = true;
in_.reset();
}

VeloxSortShuffleReaderDeserializer::VeloxSortShuffleReaderDeserializer(
std::shared_ptr<arrow::io::InputStream> in,
const std::shared_ptr<arrow::Schema>& schema,
Expand Down Expand Up @@ -410,7 +415,7 @@ std::shared_ptr<ColumnarBatch> VeloxSortShuffleReaderDeserializer::next() {
BlockPayload::deserialize(in_.get(), codec_, arrowPool_, numRows, deserializeTime_, decompressTime_));

if (arrowBuffers.empty()) {
reachedEos_ = true;
reachEos();
if (cachedRows_ > 0) {
return deserializeToBatch();
}
Expand Down Expand Up @@ -493,6 +498,11 @@ void VeloxSortShuffleReaderDeserializer::readLargeRow(std::vector<std::shared_pt
cachedRows_++;
}

void VeloxSortShuffleReaderDeserializer::reachEos() {
reachedEos_ = true;
in_.reset();
}

class VeloxRssSortShuffleReaderDeserializer::VeloxInputStream : public facebook::velox::GlutenByteInputStream {
public:
VeloxInputStream(std::shared_ptr<arrow::io::InputStream> input, facebook::velox::BufferPtr buffer);
Expand Down Expand Up @@ -561,6 +571,7 @@ std::shared_ptr<ColumnarBatch> VeloxRssSortShuffleReaderDeserializer::next() {
}

if (!in_->hasNext()) {
reachEos();
return nullptr;
}

Expand All @@ -582,6 +593,10 @@ std::shared_ptr<ColumnarBatch> VeloxRssSortShuffleReaderDeserializer::next() {
return std::make_shared<VeloxColumnarBatch>(std::move(rowVector));
}

void VeloxRssSortShuffleReaderDeserializer::reachEos() {
in_.reset();
}

size_t VeloxRssSortShuffleReaderDeserializer::VeloxInputStream::remainingSize() const {
return std::numeric_limits<unsigned long>::max();
}
Expand Down
8 changes: 7 additions & 1 deletion cpp/velox/shuffle/VeloxShuffleReader.h
Original file line number Diff line number Diff line change
Expand Up @@ -46,6 +46,8 @@ class VeloxHashShuffleReaderDeserializer final : public ColumnarBatchIterator {
std::shared_ptr<ColumnarBatch> next() override;

private:
void reachEos();

std::shared_ptr<arrow::io::InputStream> in_;
std::shared_ptr<arrow::Schema> schema_;
std::shared_ptr<arrow::util::Codec> codec_;
Expand All @@ -60,7 +62,7 @@ class VeloxHashShuffleReaderDeserializer final : public ColumnarBatchIterator {
int64_t& decompressTime_;

std::unique_ptr<InMemoryPayload> merged_{nullptr};
bool reachEos_{false};
bool reachedEos_{false};
};

class VeloxSortShuffleReaderDeserializer final : public ColumnarBatchIterator {
Expand All @@ -86,6 +88,8 @@ class VeloxSortShuffleReaderDeserializer final : public ColumnarBatchIterator {

void readLargeRow(std::vector<std::shared_ptr<arrow::Buffer>>& arrowBuffers);

void reachEos();

std::shared_ptr<arrow::io::InputStream> in_;
std::shared_ptr<arrow::Schema> schema_;
std::shared_ptr<arrow::util::Codec> codec_;
Expand Down Expand Up @@ -119,6 +123,8 @@ class VeloxRssSortShuffleReaderDeserializer : public ColumnarBatchIterator {
private:
class VeloxInputStream;

void reachEos();

std::shared_ptr<facebook::velox::memory::MemoryPool> veloxPool_;
facebook::velox::RowTypePtr rowType_;
std::vector<facebook::velox::RowVectorPtr> batches_;
Expand Down
Loading