From fd6d3caa4b75671cdaec1e67c3582df440ce1561 Mon Sep 17 00:00:00 2001 From: MacVincent Agha-Oko Date: Mon, 1 Dec 2025 19:47:46 -0800 Subject: [PATCH 1/3] Fix Nimble OSS Build (#340) Summary: D87964917 made a change to a CMakeLists.txt file that broke OSS builds. This diff fixes that. Differential Revision: D88117968 --- dwio/nimble/tablet/CMakeLists.txt | 2 +- dwio/nimble/tablet/tests/CMakeLists.txt | 3 +-- 2 files changed, 2 insertions(+), 3 deletions(-) diff --git a/dwio/nimble/tablet/CMakeLists.txt b/dwio/nimble/tablet/CMakeLists.txt index 49d1c314..eeb23805 100644 --- a/dwio/nimble/tablet/CMakeLists.txt +++ b/dwio/nimble/tablet/CMakeLists.txt @@ -29,7 +29,7 @@ target_include_directories( ) add_dependencies(nimble_footer_fb nimble_footer_schema_fb) -add_library(nimble_tablet_common Compression.cpp, MetadataBuffer.cpp) +add_library(nimble_tablet_common Compression.cpp MetadataBuffer.cpp) target_link_libraries(nimble_tablet_common nimble_footer_fb Folly::folly) add_library(nimble_tablet_reader TabletReader.cpp) diff --git a/dwio/nimble/tablet/tests/CMakeLists.txt b/dwio/nimble/tablet/tests/CMakeLists.txt index f4613020..ad5372d2 100644 --- a/dwio/nimble/tablet/tests/CMakeLists.txt +++ b/dwio/nimble/tablet/tests/CMakeLists.txt @@ -11,8 +11,7 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -add_executable( - nimble_tabletReader_tests TabletTest.cpp MetadataBufferTest.cpp) +add_executable(nimble_tabletReader_tests TabletTest.cpp MetadataBufferTest.cpp) add_test(nimble_tabletReader_tests nimble_tabletReader_tests) From a3cd2135f38b21c0726271c8bf4f78a2a431b8ea Mon Sep 17 00:00:00 2001 From: MacVincent Agha-Oko Date: Mon, 1 Dec 2025 19:47:46 -0800 Subject: [PATCH 2/3] Fix GetRawDatasize Test Util for Nullable Encoding (#339) Summary: The test util used to calculate encoding sizes in NimbleDump and VeloxWriter tests only includes the size of the nulls bits for nullable encoded data. This is different from how sizes are nullable content stream sizes are calculated in the rest of our code base and introduced flakiness to a VeloxWriter test. This diff fixes that. Differential Revision: D88113894 --- dwio/nimble/encodings/tests/EncodingSelectionTests.cpp | 2 +- dwio/nimble/encodings/tests/TestUtils.cpp | 7 ++----- 2 files changed, 3 insertions(+), 6 deletions(-) diff --git a/dwio/nimble/encodings/tests/EncodingSelectionTests.cpp b/dwio/nimble/encodings/tests/EncodingSelectionTests.cpp index 1ba93ae9..d20350a9 100644 --- a/dwio/nimble/encodings/tests/EncodingSelectionTests.cpp +++ b/dwio/nimble/encodings/tests/EncodingSelectionTests.cpp @@ -989,7 +989,7 @@ TEST(EncodingSelectionTests, TestNullable) { // test getRawDataSize auto size = facebook::nimble::test::TestUtils::getRawDataSize(*pool, serialized); - auto expectedSize = 15 + 6; // 15 bytes for string data, 6 bytes for nulls + auto expectedSize = 15 + 10; // 15 bytes for string data, 10 bytes for nulls ASSERT_EQ(size, expectedSize); LOG(INFO) << "Final size: " << serialized.size(); diff --git a/dwio/nimble/encodings/tests/TestUtils.cpp b/dwio/nimble/encodings/tests/TestUtils.cpp index 81014df2..4e7f54f5 100644 --- a/dwio/nimble/encodings/tests/TestUtils.cpp +++ b/dwio/nimble/encodings/tests/TestUtils.cpp @@ -38,11 +38,8 @@ uint64_t TestUtils::getRawDataSize( if (encodingType == EncodingType::Nullable) { auto pos = encodingStr.data() + kPrefixSize; auto nonNullsSize = encoding::readUint32(pos); - auto nonNullsCount = encoding::peek(pos + kRowCountOffset); - // We do not count the bits indicating non-null, therefore we only - // include the size of the null bits and the non-null values. - return getRawDataSize(memoryPool, {pos, nonNullsSize}) + - (rowCount - nonNullsCount); + // Sum of the nulls count and size of the non-null child encoding. + return getRawDataSize(memoryPool, {pos, nonNullsSize}) + rowCount; } else { if (dataType != DataType::String) { auto typeSize = nimble::detail::dataTypeSize(dataType); From 21078acf08fe1d2375a9a0af2e38d6396f7d7ade Mon Sep 17 00:00:00 2001 From: MacVincent Agha-Oko Date: Mon, 1 Dec 2025 19:47:46 -0800 Subject: [PATCH 3/3] (fix): Fix Flakiness in Writer Unit Test Summary: In `VeloxWriterTests.fuzzComplex` random min and max stream chunk size values are selected. However, flaky errors may occur when these values are close. For instance in this case: ``` buck run mode/opt dwio/nimble/velox/tests/ -- --gtest_filter=VeloxWriterTests.fuzzComplex --writer_tests_seed 3173165506 ``` a min chunk size of 263 and a max chunk size of 266 is selected. In the soft chunking phase, when `ensureFullChunks` is true, the stream chunker accumulates the maximum number of values with total size up to but not exceeding the max chunk size. Differential Revision: D88092017 --- dwio/nimble/velox/tests/VeloxWriterTests.cpp | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/dwio/nimble/velox/tests/VeloxWriterTests.cpp b/dwio/nimble/velox/tests/VeloxWriterTests.cpp index 86601fdf..eddd33eb 100644 --- a/dwio/nimble/velox/tests/VeloxWriterTests.cpp +++ b/dwio/nimble/velox/tests/VeloxWriterTests.cpp @@ -2239,12 +2239,15 @@ TEST_F(VeloxWriterTests, fuzzComplex) { } const auto iterations = 20; + // provide sufficient buffer between min and max chunk size thresholds + constexpr uint64_t chunkThresholdBuffer = sizeof(int64_t) + sizeof(bool); for (auto i = 0; i < iterations; ++i) { writerOptions.minStreamChunkRawSize = std::uniform_int_distribution(10, 4096)(rng); writerOptions.maxStreamChunkRawSize = std::uniform_int_distribution( - writerOptions.minStreamChunkRawSize, 8192)(rng); + writerOptions.minStreamChunkRawSize + chunkThresholdBuffer, + 8192)(rng); const auto batchSize = std::uniform_int_distribution(10, 400)(rng); const auto batchCount = 5;