-
Notifications
You must be signed in to change notification settings - Fork 4k
GH-39377: [C++] IO: Reuse same buffer in CompressedInputStream #39807
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Merged
Merged
Changes from all commits
Commits
Show all changes
23 commits
Select commit
Hold shift + click to select a range
76341d5
Reuse same buffer in CompressedInputStream
mapleFU 119866a
Merge branch 'main' into compress-stream-update
mapleFU 6f9d865
Resolve comments
mapleFU fb2bb77
fix stupid error
mapleFU 584fc2e
Merge branch 'main' into compress-stream-update
mapleFU de100e9
Add benchmark for compress input
mapleFU 4a19061
re-impl NonZeroCopyBufferReader
mapleFU 86a2a62
DatasetWriter: Check num_rows() before allocate batch
mapleFU 344aca9
Merge branch 'main' into compress-stream-update
mapleFU 7cb23a3
Update testing impl
mapleFU 0521d1f
Reducing the calling to ResizableBuffer::Resize
mapleFU e50a404
Fix lint
mapleFU fabb5b4
add arrow/util/config.h for macro
mapleFU f3a6f29
Merge branch 'main' into compress-stream-update
mapleFU 01db14c
resolve comment
mapleFU 441cd18
remove included header
mapleFU a69a299
Merge branch 'main' into compress-stream-update
mapleFU 8d04f5e
Merge branch 'compress-stream-update' of github.com:mapleFU/arrow int…
mapleFU 47212e3
Merge branch 'main' into compress-stream-update
mapleFU dff8f9c
fix win ci
mapleFU e50c3fc
Merge branch 'main' into compress-stream-update
pitrou 4c97871
Count decompressed bytes; naming nits
pitrou 8ca0840
Fix compile error
pitrou File filter
Filter by extension
Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
There are no files selected for viewing
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,200 @@ | ||
| // Licensed to the Apache Software Foundation (ASF) under one | ||
| // or more contributor license agreements. See the NOTICE file | ||
| // distributed with this work for additional information | ||
| // regarding copyright ownership. The ASF licenses this file | ||
| // to you under the Apache License, Version 2.0 (the | ||
| // "License"); you may not use this file except in compliance | ||
| // with the License. You may obtain a copy of the License at | ||
| // | ||
| // http://www.apache.org/licenses/LICENSE-2.0 | ||
| // | ||
| // Unless required by applicable law or agreed to in writing, | ||
| // software distributed under the License is distributed on an | ||
| // "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY | ||
| // KIND, either express or implied. See the License for the | ||
| // specific language governing permissions and limitations | ||
| // under the License. | ||
|
|
||
| #include "benchmark/benchmark.h" | ||
|
|
||
| #include <algorithm> | ||
| #include <cstdint> | ||
| #include <cstring> | ||
| #include <memory> | ||
| #include <random> | ||
| #include <string> | ||
| #include <vector> | ||
|
|
||
| #include "arrow/buffer.h" | ||
| #include "arrow/io/compressed.h" | ||
| #include "arrow/io/memory.h" | ||
| #include "arrow/result.h" | ||
| #include "arrow/testing/gtest_util.h" | ||
| #include "arrow/util/compression.h" | ||
| #include "arrow/util/config.h" | ||
| #include "arrow/util/logging.h" | ||
| #include "arrow/util/macros.h" | ||
|
|
||
| namespace arrow::io { | ||
|
|
||
| using ::arrow::Compression; | ||
|
|
||
| std::vector<uint8_t> MakeCompressibleData(int data_size) { | ||
| // XXX This isn't a real-world corpus so doesn't really represent the | ||
| // comparative qualities of the algorithms | ||
|
|
||
| // First make highly compressible data | ||
| std::string base_data = | ||
| "Apache Arrow is a cross-language development platform for in-memory data"; | ||
| int nrepeats = static_cast<int>(1 + data_size / base_data.size()); | ||
|
|
||
| std::vector<uint8_t> data(base_data.size() * nrepeats); | ||
| for (int i = 0; i < nrepeats; ++i) { | ||
| std::memcpy(data.data() + i * base_data.size(), base_data.data(), base_data.size()); | ||
| } | ||
| data.resize(data_size); | ||
|
|
||
| // Then randomly mutate some bytes so as to make things harder | ||
| std::mt19937 engine(42); | ||
| std::exponential_distribution<> offsets(0.05); | ||
| std::uniform_int_distribution<> values(0, 255); | ||
|
|
||
| int64_t pos = 0; | ||
| while (pos < data_size) { | ||
| data[pos] = static_cast<uint8_t>(values(engine)); | ||
| pos += static_cast<int64_t>(offsets(engine)); | ||
| } | ||
|
|
||
| return data; | ||
| } | ||
|
|
||
| // Using a non-zero copy buffer reader to benchmark the non-zero copy path. | ||
| class NonZeroCopyBufferReader final : public InputStream { | ||
| public: | ||
| NonZeroCopyBufferReader(std::shared_ptr<Buffer> buffer) : reader_(std::move(buffer)) {} | ||
|
|
||
| bool supports_zero_copy() const override { return false; } | ||
|
|
||
| Result<int64_t> Read(int64_t nbytes, void* out) override { | ||
| return reader_.Read(nbytes, out); | ||
| } | ||
|
|
||
| Result<std::shared_ptr<Buffer>> Read(int64_t nbytes) override { | ||
| // Testing the non-zero copy path like reading from local file or Object store, | ||
| // so we need to allocate a buffer and copy the data. | ||
| ARROW_ASSIGN_OR_RAISE(auto buf, ::arrow::AllocateResizableBuffer(nbytes)); | ||
| ARROW_ASSIGN_OR_RAISE(int64_t size, Read(nbytes, buf->mutable_data())); | ||
| ARROW_RETURN_NOT_OK(buf->Resize(size)); | ||
| return buf; | ||
| } | ||
| Status Close() override { return reader_.Close(); } | ||
| Result<int64_t> Tell() const override { return reader_.Tell(); } | ||
| bool closed() const override { return reader_.closed(); } | ||
|
|
||
| private: | ||
| ::arrow::io::BufferReader reader_; | ||
| }; | ||
|
|
||
| enum class BufferReadMode { ProvidedByCaller, ReturnedByCallee }; | ||
|
|
||
| template <typename BufReader, BufferReadMode Mode> | ||
| static void CompressedInputStreamBenchmark(::benchmark::State& state, | ||
| Compression::type compression) { | ||
| const int64_t input_size = state.range(0); | ||
| const int64_t batch_size = state.range(1); | ||
|
|
||
| const std::vector<uint8_t> data = MakeCompressibleData(static_cast<int>(input_size)); | ||
| auto codec = ::arrow::util::Codec::Create(compression).ValueOrDie(); | ||
| int64_t max_compress_len = | ||
| codec->MaxCompressedLen(static_cast<int64_t>(data.size()), data.data()); | ||
| std::shared_ptr<::arrow::ResizableBuffer> buf = | ||
| ::arrow::AllocateResizableBuffer(max_compress_len).ValueOrDie(); | ||
| const int64_t compressed_length = | ||
| codec | ||
| ->Compress(static_cast<int64_t>(data.size()), data.data(), max_compress_len, | ||
| buf->mutable_data()) | ||
| .ValueOrDie(); | ||
| ABORT_NOT_OK(buf->Resize(compressed_length)); | ||
| for (auto _ : state) { | ||
| state.PauseTiming(); | ||
| auto reader = std::make_shared<BufReader>(buf); | ||
| [[maybe_unused]] std::unique_ptr<Buffer> read_buffer; | ||
| if constexpr (Mode == BufferReadMode::ProvidedByCaller) { | ||
| read_buffer = ::arrow::AllocateBuffer(batch_size).ValueOrDie(); | ||
| } | ||
| state.ResumeTiming(); | ||
| // Put `CompressedInputStream::Make` in timing. | ||
| auto input_stream = | ||
| ::arrow::io::CompressedInputStream::Make(codec.get(), reader).ValueOrDie(); | ||
| auto remaining_size = input_size; | ||
| while (remaining_size > 0) { | ||
| if constexpr (Mode == BufferReadMode::ProvidedByCaller) { | ||
| auto value = input_stream->Read(batch_size, read_buffer->mutable_data()); | ||
| ABORT_NOT_OK(value); | ||
| remaining_size -= value.ValueOrDie(); | ||
| } else { | ||
| auto value = input_stream->Read(batch_size); | ||
| ABORT_NOT_OK(value); | ||
| remaining_size -= value.ValueOrDie()->size(); | ||
| } | ||
| } | ||
| } | ||
| state.SetBytesProcessed(input_size * state.iterations()); | ||
| } | ||
|
|
||
| template <Compression::type kCompression> | ||
| static void CompressedInputStreamZeroCopyBufferProvidedByCaller( | ||
| ::benchmark::State& state) { | ||
| CompressedInputStreamBenchmark<::arrow::io::BufferReader, | ||
| BufferReadMode::ProvidedByCaller>(state, kCompression); | ||
| } | ||
|
|
||
| template <Compression::type kCompression> | ||
| static void CompressedInputStreamNonZeroCopyBufferProvidedByCaller( | ||
| ::benchmark::State& state) { | ||
| CompressedInputStreamBenchmark<NonZeroCopyBufferReader, | ||
| BufferReadMode::ProvidedByCaller>(state, kCompression); | ||
| } | ||
|
|
||
| template <Compression::type kCompression> | ||
| static void CompressedInputStreamZeroCopyBufferReturnedByCallee( | ||
| ::benchmark::State& state) { | ||
| CompressedInputStreamBenchmark<::arrow::io::BufferReader, | ||
| BufferReadMode::ReturnedByCallee>(state, kCompression); | ||
| } | ||
|
|
||
| template <Compression::type kCompression> | ||
| static void CompressedInputStreamNonZeroCopyBufferReturnedByCallee( | ||
| ::benchmark::State& state) { | ||
| CompressedInputStreamBenchmark<NonZeroCopyBufferReader, | ||
| BufferReadMode::ReturnedByCallee>(state, kCompression); | ||
| } | ||
|
|
||
| static void CompressedInputArguments(::benchmark::internal::Benchmark* b) { | ||
| b->ArgNames({"num_bytes", "batch_size"}) | ||
| ->Args({8 * 1024, 8 * 1024}) | ||
| ->Args({64 * 1024, 8 * 1024}) | ||
| ->Args({64 * 1024, 64 * 1024}) | ||
| ->Args({1024 * 1024, 8 * 1024}) | ||
| ->Args({1024 * 1024, 64 * 1024}) | ||
| ->Args({1024 * 1024, 1024 * 1024}); | ||
| } | ||
|
|
||
| #ifdef ARROW_WITH_LZ4 | ||
| // Benchmark LZ4 because it's lightweight, which makes benchmarking focused on the | ||
| // overhead of the compression input stream. | ||
| BENCHMARK_TEMPLATE(CompressedInputStreamZeroCopyBufferProvidedByCaller, | ||
| Compression::LZ4_FRAME) | ||
| ->Apply(CompressedInputArguments); | ||
| BENCHMARK_TEMPLATE(CompressedInputStreamNonZeroCopyBufferProvidedByCaller, | ||
| Compression::LZ4_FRAME) | ||
| ->Apply(CompressedInputArguments); | ||
| BENCHMARK_TEMPLATE(CompressedInputStreamZeroCopyBufferReturnedByCallee, | ||
| Compression::LZ4_FRAME) | ||
| ->Apply(CompressedInputArguments); | ||
| BENCHMARK_TEMPLATE(CompressedInputStreamNonZeroCopyBufferReturnedByCallee, | ||
| Compression::LZ4_FRAME) | ||
| ->Apply(CompressedInputArguments); | ||
| #endif | ||
|
|
||
| } // namespace arrow::io | ||
Add this suggestion to a batch that can be applied as a single commit.
This suggestion is invalid because no changes were made to the code.
Suggestions cannot be applied while the pull request is closed.
Suggestions cannot be applied while viewing a subset of changes.
Only one suggestion per line can be applied in a batch.
Add this suggestion to a batch that can be applied as a single commit.
Applying suggestions on deleted lines is not supported.
You must change the existing code in this line in order to create a valid suggestion.
Outdated suggestions cannot be applied.
This suggestion has been applied or marked resolved.
Suggestions cannot be applied from pending reviews.
Suggestions cannot be applied on multi-line comments.
Suggestions cannot be applied while the pull request is queued to merge.
Suggestion cannot be applied right now. Please check back later.
Uh oh!
There was an error while loading. Please reload this page.