From 3c77bbae33385b8d090056a4f75e2d99595c0739 Mon Sep 17 00:00:00 2001 From: Andre Luis Anastacio Date: Mon, 8 Sep 2025 20:06:04 -0300 Subject: [PATCH 1/5] Parquet: Fix UUID ClassCastException java.util.UUID cannot be cast to class java.nio.ByteBuffer --- .../data/TestMetricsRowGroupFilter.java | 30 ++++++++++++++++-- .../iceberg/parquet/ParquetConversions.java | 2 ++ .../parquet/TestDictionaryRowGroupFilter.java | 31 +++++++++++++++++-- 3 files changed, 59 insertions(+), 4 deletions(-) diff --git a/data/src/test/java/org/apache/iceberg/data/TestMetricsRowGroupFilter.java b/data/src/test/java/org/apache/iceberg/data/TestMetricsRowGroupFilter.java index e12015d5eb73..a30fa723c3da 100644 --- a/data/src/test/java/org/apache/iceberg/data/TestMetricsRowGroupFilter.java +++ b/data/src/test/java/org/apache/iceberg/data/TestMetricsRowGroupFilter.java @@ -119,7 +119,8 @@ public static List parameters() { optional(14, "all_nans", DoubleType.get()), optional(15, "some_nans", FloatType.get()), optional(16, "no_nans", DoubleType.get()), - optional(17, "some_double_nans", DoubleType.get())); + optional(17, "some_double_nans", DoubleType.get()), + optional(18, "uuid_col", Types.UUIDType.get())); private static final Types.StructType UNDERSCORE_STRUCT_FIELD_TYPE = Types.StructType.of(Types.NestedField.required(8, "_int_field", IntegerType.get())); @@ -137,7 +138,8 @@ public static List parameters() { optional(14, "_all_nans", Types.DoubleType.get()), optional(15, "_some_nans", FloatType.get()), optional(16, "_no_nans", Types.DoubleType.get()), - optional(17, "_some_double_nans", Types.DoubleType.get())); + optional(17, "_some_double_nans", Types.DoubleType.get()), + optional(18, "_uuid_col", Types.UUIDType.get())); private static final Schema VARIANT_SCHEMA = new Schema( @@ -157,6 +159,11 @@ public static List parameters() { private static final int INT_MIN_VALUE = 30; private static final int INT_MAX_VALUE = 79; + private static final UUID UUID_WITH_ZEROS = + UUID.fromString("00000000-0000-0000-0000-000000000000"); + private static final UUID UUID_WITH_ONES = + UUID.fromString("11111111-1111-1111-1111-111111111111"); + private File orcFile = null; private MessageType parquetSchema = null; private BlockMetaData rowGroupMetadata = null; @@ -211,6 +218,9 @@ public void createOrcInputFile() throws IOException { structNotNull.setField("_int_field", INT_MIN_VALUE + i); record.setField("_struct_not_null", structNotNull); // struct with int + record.setField( + "_uuid_col", (i % 3 == 0) ? UUID_WITH_ZEROS : (i % 3 == 1) ? UUID_WITH_ONES : null); + appender.add(record); } } @@ -248,6 +258,10 @@ private void createParquetInputFile() throws IOException { GenericRecord structNotNull = GenericRecord.create(UNDERSCORE_STRUCT_FIELD_TYPE); structNotNull.setField("_int_field", INT_MIN_VALUE + i); builder.setField("_struct_not_null", structNotNull); // struct with int + + builder.setField( + "_uuid_col", (i % 3 == 0) ? UUID_WITH_ZEROS : (i % 3 == 1) ? UUID_WITH_ONES : null); + records.add(builder); } @@ -1063,6 +1077,18 @@ public void testVariantFieldAllNullsNotNull() throws IOException { } } + @TestTemplate + public void testUUIDEq() { + assumeThat(format).as("Only valid for Parquet").isEqualTo(FileFormat.PARQUET); + + boolean shouldRead = shouldRead(equal("uuid_col", UUID_WITH_ZEROS)); + assertThat(shouldRead).as("Should read: UUID value exists in row group").isTrue(); + + UUID nonExistentUuid = UUID.fromString("99999999-9999-9999-9999-999999999999"); + boolean shouldSkip = shouldRead(equal("uuid_col", nonExistentUuid)); + assertThat(shouldSkip).as("Should skip: UUID value does not exist in row group").isFalse(); + } + private boolean shouldRead(Expression expression) { return shouldRead(expression, true); } diff --git a/parquet/src/main/java/org/apache/iceberg/parquet/ParquetConversions.java b/parquet/src/main/java/org/apache/iceberg/parquet/ParquetConversions.java index 3a70198a1a57..1e5ed1fb9b87 100644 --- a/parquet/src/main/java/org/apache/iceberg/parquet/ParquetConversions.java +++ b/parquet/src/main/java/org/apache/iceberg/parquet/ParquetConversions.java @@ -83,6 +83,8 @@ static Function converterFromParquet( } else if (icebergType.typeId() == Type.TypeID.DOUBLE && parquetType.getPrimitiveTypeName() == PrimitiveType.PrimitiveTypeName.FLOAT) { return value -> ((Float) fromParquet.apply(value)).doubleValue(); + } else if (icebergType.typeId() == Type.TypeID.UUID) { + return binary -> UUIDUtil.convert(((Binary) binary).toByteBuffer()); } } diff --git a/parquet/src/test/java/org/apache/iceberg/parquet/TestDictionaryRowGroupFilter.java b/parquet/src/test/java/org/apache/iceberg/parquet/TestDictionaryRowGroupFilter.java index ac6e41347d47..c0dc5f21775b 100644 --- a/parquet/src/test/java/org/apache/iceberg/parquet/TestDictionaryRowGroupFilter.java +++ b/parquet/src/test/java/org/apache/iceberg/parquet/TestDictionaryRowGroupFilter.java @@ -111,7 +111,8 @@ public class TestDictionaryRowGroupFilter { 14, "decimal_fixed", DecimalType.of(20, 10)), // >18 precision to enforce FIXED_LEN_BYTE_ARRAY - optional(15, "_nans_and_nulls", DoubleType.get())); + optional(15, "_nans_and_nulls", DoubleType.get()), + optional(16, "uuid_col", Types.UUIDType.get())); private static final Types.StructType UNDERSCORE_STRUCT_FIELD_TYPE = Types.StructType.of(Types.NestedField.required(9, "_int_field", IntegerType.get())); @@ -133,7 +134,8 @@ public class TestDictionaryRowGroupFilter { 14, "_decimal_fixed", DecimalType.of(20, 10)), // >18 precision to enforce FIXED_LEN_BYTE_ARRAY - optional(15, "_nans_and_nulls", DoubleType.get())); + optional(15, "_nans_and_nulls", DoubleType.get()), + optional(16, "_uuid_col", Types.UUIDType.get())); private static final String TOO_LONG_FOR_STATS; @@ -153,6 +155,11 @@ public class TestDictionaryRowGroupFilter { .subtract(DECIMAL_MIN_VALUE) .divide(new BigDecimal(INT_MAX_VALUE - INT_MIN_VALUE), RoundingMode.HALF_UP); + private static final UUID UUID_WITH_ZEROS = + UUID.fromString("00000000-0000-0000-0000-000000000000"); + private static final UUID UUID_WITH_ONES = + UUID.fromString("11111111-1111-1111-1111-111111111111"); + private MessageType parquetSchema = null; private BlockMetaData rowGroupMetadata = null; private DictionaryPageReadStore dictionaryStore = null; @@ -203,6 +210,9 @@ public void createInputFile() throws IOException { structNotNull.put("_int_field", INT_MIN_VALUE + i); builder.set("_struct_not_null", structNotNull); // struct with int + builder.set( + "_uuid_col", (i % 3 == 0) ? UUID_WITH_ZEROS : (i % 3 == 1) ? UUID_WITH_ONES : null); + appender.add(builder.build()); } } @@ -1267,6 +1277,23 @@ public void testTransformFilter() { .isTrue(); } + @TestTemplate + public void testUUIDDictionaryFilter() { + assumeThat(getColumnForName(rowGroupMetadata, "_uuid_col").getEncodings()) + .contains(Encoding.RLE_DICTIONARY); + + boolean shouldReadExisting = + new ParquetDictionaryRowGroupFilter(SCHEMA, equal("uuid_col", UUID_WITH_ZEROS)) + .shouldRead(parquetSchema, rowGroupMetadata, dictionaryStore); + assertThat(shouldReadExisting).as("Should read: Dictionary contains a matching entry").isTrue(); + + UUID nonExistentUUID = UUID.fromString("22222222-2222-2222-2222-222222222222"); + boolean shouldRead = + new ParquetDictionaryRowGroupFilter(SCHEMA, equal("uuid_col", nonExistentUUID)) + .shouldRead(parquetSchema, rowGroupMetadata, dictionaryStore); + assertThat(shouldRead).as("Should skip: UUID not found in dictionary").isFalse(); + } + private ColumnChunkMetaData getColumnForName(BlockMetaData rowGroup, String columnName) { ColumnPath columnPath = ColumnPath.fromDotString(columnName); for (ColumnChunkMetaData column : rowGroup.getColumns()) { From ebd82de3bf23cca276c8630412198a141bd5f29a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Andr=C3=A9=20Anast=C3=A1cio?= Date: Fri, 3 Oct 2025 15:39:44 -0300 Subject: [PATCH 2/5] fixup! Parquet: Fix UUID ClassCastException --- .../data/TestMetricsRowGroupFilter.java | 41 ++++++++++---- .../parquet/TestDictionaryRowGroupFilter.java | 55 +++++++++++++++---- 2 files changed, 76 insertions(+), 20 deletions(-) diff --git a/data/src/test/java/org/apache/iceberg/data/TestMetricsRowGroupFilter.java b/data/src/test/java/org/apache/iceberg/data/TestMetricsRowGroupFilter.java index a30fa723c3da..77a51542f228 100644 --- a/data/src/test/java/org/apache/iceberg/data/TestMetricsRowGroupFilter.java +++ b/data/src/test/java/org/apache/iceberg/data/TestMetricsRowGroupFilter.java @@ -218,8 +218,7 @@ public void createOrcInputFile() throws IOException { structNotNull.setField("_int_field", INT_MIN_VALUE + i); record.setField("_struct_not_null", structNotNull); // struct with int - record.setField( - "_uuid_col", (i % 3 == 0) ? UUID_WITH_ZEROS : (i % 3 == 1) ? UUID_WITH_ONES : null); + record.setField("_uuid_col", (i % 2 == 0) ? UUID_WITH_ZEROS : null); appender.add(record); } @@ -259,8 +258,7 @@ private void createParquetInputFile() throws IOException { structNotNull.setField("_int_field", INT_MIN_VALUE + i); builder.setField("_struct_not_null", structNotNull); // struct with int - builder.setField( - "_uuid_col", (i % 3 == 0) ? UUID_WITH_ZEROS : (i % 3 == 1) ? UUID_WITH_ONES : null); + builder.setField("_uuid_col", (i % 2 == 0) ? UUID_WITH_ZEROS : null); records.add(builder); } @@ -1078,15 +1076,38 @@ public void testVariantFieldAllNullsNotNull() throws IOException { } @TestTemplate - public void testUUIDEq() { + public void testUUID() { assumeThat(format).as("Only valid for Parquet").isEqualTo(FileFormat.PARQUET); - boolean shouldRead = shouldRead(equal("uuid_col", UUID_WITH_ZEROS)); - assertThat(shouldRead).as("Should read: UUID value exists in row group").isTrue(); - UUID nonExistentUuid = UUID.fromString("99999999-9999-9999-9999-999999999999"); - boolean shouldSkip = shouldRead(equal("uuid_col", nonExistentUuid)); - assertThat(shouldSkip).as("Should skip: UUID value does not exist in row group").isFalse(); + + boolean shouldRead = shouldRead(notEqual("uuid_col", UUID_WITH_ZEROS)); + assertThat(shouldRead).as("Should read: column contains nulls").isTrue(); + + shouldRead = shouldRead(notEqual("uuid_col", nonExistentUuid)); + assertThat(shouldRead).as("Should read: column contains non-matching values").isTrue(); + + shouldRead = shouldRead(isNull("uuid_col")); + assertThat(shouldRead).as("Should read: column contains null values").isTrue(); + + shouldRead = shouldRead(notNull("uuid_col")); + assertThat(shouldRead).as("Should read: column contains non-null values").isTrue(); + + shouldRead = shouldRead(in("uuid_col", UUID_WITH_ZEROS, nonExistentUuid)); + assertThat(shouldRead).as("Should read: column contains one of the values").isTrue(); + + shouldRead = shouldRead(in("uuid_col", nonExistentUuid)); + assertThat(shouldRead).as("Should skip: column contains none of the values").isFalse(); + + shouldRead = shouldRead(notIn("uuid_col", nonExistentUuid)); + assertThat(shouldRead) + .as("Should read: column contains values not in the exclusion list") + .isTrue(); + + shouldRead = shouldRead(notIn("uuid_col", UUID_WITH_ZEROS)); + assertThat(shouldRead) + .as("Should read: column contains null values not in the exclusion list") + .isTrue(); } private boolean shouldRead(Expression expression) { diff --git a/parquet/src/test/java/org/apache/iceberg/parquet/TestDictionaryRowGroupFilter.java b/parquet/src/test/java/org/apache/iceberg/parquet/TestDictionaryRowGroupFilter.java index c0dc5f21775b..c08a19524af8 100644 --- a/parquet/src/test/java/org/apache/iceberg/parquet/TestDictionaryRowGroupFilter.java +++ b/parquet/src/test/java/org/apache/iceberg/parquet/TestDictionaryRowGroupFilter.java @@ -210,8 +210,7 @@ public void createInputFile() throws IOException { structNotNull.put("_int_field", INT_MIN_VALUE + i); builder.set("_struct_not_null", structNotNull); // struct with int - builder.set( - "_uuid_col", (i % 3 == 0) ? UUID_WITH_ZEROS : (i % 3 == 1) ? UUID_WITH_ONES : null); + builder.set("_uuid_col", (i % 2 == 0) ? UUID_WITH_ZEROS : null); appender.add(builder.build()); } @@ -1278,20 +1277,56 @@ public void testTransformFilter() { } @TestTemplate - public void testUUIDDictionaryFilter() { + public void testUUID() { assumeThat(getColumnForName(rowGroupMetadata, "_uuid_col").getEncodings()) .contains(Encoding.RLE_DICTIONARY); - boolean shouldReadExisting = - new ParquetDictionaryRowGroupFilter(SCHEMA, equal("uuid_col", UUID_WITH_ZEROS)) - .shouldRead(parquetSchema, rowGroupMetadata, dictionaryStore); - assertThat(shouldReadExisting).as("Should read: Dictionary contains a matching entry").isTrue(); + UUID nonExistentUuid = UUID.fromString("99999999-9999-9999-9999-999999999999"); - UUID nonExistentUUID = UUID.fromString("22222222-2222-2222-2222-222222222222"); boolean shouldRead = - new ParquetDictionaryRowGroupFilter(SCHEMA, equal("uuid_col", nonExistentUUID)) + new ParquetDictionaryRowGroupFilter(SCHEMA, notEqual("uuid_col", UUID_WITH_ZEROS)) + .shouldRead(parquetSchema, rowGroupMetadata, dictionaryStore); + assertThat(shouldRead).as("Should read: column contains nulls").isTrue(); + + shouldRead = + new ParquetDictionaryRowGroupFilter(SCHEMA, notEqual("uuid_col", nonExistentUuid)) + .shouldRead(parquetSchema, rowGroupMetadata, dictionaryStore); + assertThat(shouldRead).as("Should read: column contains non-matching values").isTrue(); + + shouldRead = + new ParquetDictionaryRowGroupFilter(SCHEMA, isNull("uuid_col")) + .shouldRead(parquetSchema, rowGroupMetadata, dictionaryStore); + assertThat(shouldRead).as("Should read: column contains null values").isTrue(); + + shouldRead = + new ParquetDictionaryRowGroupFilter(SCHEMA, notNull("uuid_col")) + .shouldRead(parquetSchema, rowGroupMetadata, dictionaryStore); + assertThat(shouldRead).as("Should read: column contains non-null values").isTrue(); + + shouldRead = + new ParquetDictionaryRowGroupFilter( + SCHEMA, in("uuid_col", UUID_WITH_ZEROS, nonExistentUuid)) + .shouldRead(parquetSchema, rowGroupMetadata, dictionaryStore); + assertThat(shouldRead).as("Should read: column contains one of the values").isTrue(); + + shouldRead = + new ParquetDictionaryRowGroupFilter(SCHEMA, in("uuid_col", nonExistentUuid)) + .shouldRead(parquetSchema, rowGroupMetadata, dictionaryStore); + assertThat(shouldRead).as("Should skip: column contains none of the values").isFalse(); + + shouldRead = + new ParquetDictionaryRowGroupFilter(SCHEMA, notIn("uuid_col", nonExistentUuid)) .shouldRead(parquetSchema, rowGroupMetadata, dictionaryStore); - assertThat(shouldRead).as("Should skip: UUID not found in dictionary").isFalse(); + assertThat(shouldRead) + .as("Should read: column contains values not in the exclusion list") + .isTrue(); + + shouldRead = + new ParquetDictionaryRowGroupFilter(SCHEMA, notIn("uuid_col", UUID_WITH_ZEROS)) + .shouldRead(parquetSchema, rowGroupMetadata, dictionaryStore); + assertThat(shouldRead) + .as("Should read: column contains null values not in the exclusion list") + .isTrue(); } private ColumnChunkMetaData getColumnForName(BlockMetaData rowGroup, String columnName) { From b51a298326f406392c1461de1810dfe1ae413f69 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Andr=C3=A9=20Anast=C3=A1cio?= Date: Tue, 7 Oct 2025 10:45:22 -0300 Subject: [PATCH 3/5] Remove unused variables --- .../java/org/apache/iceberg/data/TestMetricsRowGroupFilter.java | 2 -- .../apache/iceberg/parquet/TestDictionaryRowGroupFilter.java | 2 -- 2 files changed, 4 deletions(-) diff --git a/data/src/test/java/org/apache/iceberg/data/TestMetricsRowGroupFilter.java b/data/src/test/java/org/apache/iceberg/data/TestMetricsRowGroupFilter.java index 77a51542f228..d545c0324729 100644 --- a/data/src/test/java/org/apache/iceberg/data/TestMetricsRowGroupFilter.java +++ b/data/src/test/java/org/apache/iceberg/data/TestMetricsRowGroupFilter.java @@ -161,8 +161,6 @@ public static List parameters() { private static final UUID UUID_WITH_ZEROS = UUID.fromString("00000000-0000-0000-0000-000000000000"); - private static final UUID UUID_WITH_ONES = - UUID.fromString("11111111-1111-1111-1111-111111111111"); private File orcFile = null; private MessageType parquetSchema = null; diff --git a/parquet/src/test/java/org/apache/iceberg/parquet/TestDictionaryRowGroupFilter.java b/parquet/src/test/java/org/apache/iceberg/parquet/TestDictionaryRowGroupFilter.java index c08a19524af8..3112945458e3 100644 --- a/parquet/src/test/java/org/apache/iceberg/parquet/TestDictionaryRowGroupFilter.java +++ b/parquet/src/test/java/org/apache/iceberg/parquet/TestDictionaryRowGroupFilter.java @@ -157,8 +157,6 @@ public class TestDictionaryRowGroupFilter { private static final UUID UUID_WITH_ZEROS = UUID.fromString("00000000-0000-0000-0000-000000000000"); - private static final UUID UUID_WITH_ONES = - UUID.fromString("11111111-1111-1111-1111-111111111111"); private MessageType parquetSchema = null; private BlockMetaData rowGroupMetadata = null; From 5aa04cfc3b297b15f734d6020e6cbe437912c84d Mon Sep 17 00:00:00 2001 From: Eduard Tudenhoefner Date: Fri, 10 Oct 2025 11:29:23 +0200 Subject: [PATCH 4/5] Apply suggestion from @nastra --- .../java/org/apache/iceberg/data/TestMetricsRowGroupFilter.java | 1 - 1 file changed, 1 deletion(-) diff --git a/data/src/test/java/org/apache/iceberg/data/TestMetricsRowGroupFilter.java b/data/src/test/java/org/apache/iceberg/data/TestMetricsRowGroupFilter.java index d545c0324729..4bd7786c54aa 100644 --- a/data/src/test/java/org/apache/iceberg/data/TestMetricsRowGroupFilter.java +++ b/data/src/test/java/org/apache/iceberg/data/TestMetricsRowGroupFilter.java @@ -255,7 +255,6 @@ private void createParquetInputFile() throws IOException { GenericRecord structNotNull = GenericRecord.create(UNDERSCORE_STRUCT_FIELD_TYPE); structNotNull.setField("_int_field", INT_MIN_VALUE + i); builder.setField("_struct_not_null", structNotNull); // struct with int - builder.setField("_uuid_col", (i % 2 == 0) ? UUID_WITH_ZEROS : null); records.add(builder); From 1a51553be223f7d5546b974e2c79ae7038192d3f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Andr=C3=A9=20Anast=C3=A1cio?= Date: Tue, 14 Oct 2025 10:43:38 -0300 Subject: [PATCH 5/5] fixup! Parquet: Fix UUID ClassCastException --- .../data/TestMetricsRowGroupFilter.java | 21 +++++++++++-- .../parquet/TestDictionaryRowGroupFilter.java | 30 +++++++++++++++++++ 2 files changed, 49 insertions(+), 2 deletions(-) diff --git a/data/src/test/java/org/apache/iceberg/data/TestMetricsRowGroupFilter.java b/data/src/test/java/org/apache/iceberg/data/TestMetricsRowGroupFilter.java index 4bd7786c54aa..c871c25c931d 100644 --- a/data/src/test/java/org/apache/iceberg/data/TestMetricsRowGroupFilter.java +++ b/data/src/test/java/org/apache/iceberg/data/TestMetricsRowGroupFilter.java @@ -215,7 +215,6 @@ public void createOrcInputFile() throws IOException { GenericRecord structNotNull = GenericRecord.create(UNDERSCORE_STRUCT_FIELD_TYPE); structNotNull.setField("_int_field", INT_MIN_VALUE + i); record.setField("_struct_not_null", structNotNull); // struct with int - record.setField("_uuid_col", (i % 2 == 0) ? UUID_WITH_ZEROS : null); appender.add(record); @@ -1078,12 +1077,30 @@ public void testUUID() { UUID nonExistentUuid = UUID.fromString("99999999-9999-9999-9999-999999999999"); - boolean shouldRead = shouldRead(notEqual("uuid_col", UUID_WITH_ZEROS)); + boolean shouldRead = shouldRead(equal("uuid_col", UUID_WITH_ZEROS)); + assertThat(shouldRead).as("Should read: column contains the value").isTrue(); + + shouldRead = shouldRead(equal("uuid_col", nonExistentUuid)); + assertThat(shouldRead).as("Should skip: column does not contain the value").isFalse(); + + shouldRead = shouldRead(notEqual("uuid_col", UUID_WITH_ZEROS)); assertThat(shouldRead).as("Should read: column contains nulls").isTrue(); shouldRead = shouldRead(notEqual("uuid_col", nonExistentUuid)); assertThat(shouldRead).as("Should read: column contains non-matching values").isTrue(); + shouldRead = shouldRead(lessThan("uuid_col", UUID_WITH_ZEROS)); + assertThat(shouldRead).as("Should skip: no values lower").isFalse(); + + shouldRead = shouldRead(lessThanOrEqual("uuid_col", UUID_WITH_ZEROS)); + assertThat(shouldRead).as("Should read: column contains the value").isTrue(); + + shouldRead = shouldRead(greaterThan("uuid_col", UUID_WITH_ZEROS)); + assertThat(shouldRead).as("Should skip: no values greater").isFalse(); + + shouldRead = shouldRead(greaterThanOrEqual("uuid_col", UUID_WITH_ZEROS)); + assertThat(shouldRead).as("Should read: column contains the value").isTrue(); + shouldRead = shouldRead(isNull("uuid_col")); assertThat(shouldRead).as("Should read: column contains null values").isTrue(); diff --git a/parquet/src/test/java/org/apache/iceberg/parquet/TestDictionaryRowGroupFilter.java b/parquet/src/test/java/org/apache/iceberg/parquet/TestDictionaryRowGroupFilter.java index 3112945458e3..48cc2b0992a5 100644 --- a/parquet/src/test/java/org/apache/iceberg/parquet/TestDictionaryRowGroupFilter.java +++ b/parquet/src/test/java/org/apache/iceberg/parquet/TestDictionaryRowGroupFilter.java @@ -1282,6 +1282,16 @@ public void testUUID() { UUID nonExistentUuid = UUID.fromString("99999999-9999-9999-9999-999999999999"); boolean shouldRead = + new ParquetDictionaryRowGroupFilter(SCHEMA, equal("uuid_col", UUID_WITH_ZEROS)) + .shouldRead(parquetSchema, rowGroupMetadata, dictionaryStore); + assertThat(shouldRead).as("Should read: column contains the value").isTrue(); + + shouldRead = + new ParquetDictionaryRowGroupFilter(SCHEMA, equal("uuid_col", nonExistentUuid)) + .shouldRead(parquetSchema, rowGroupMetadata, dictionaryStore); + assertThat(shouldRead).as("Should skip: column does not contain the value").isFalse(); + + shouldRead = new ParquetDictionaryRowGroupFilter(SCHEMA, notEqual("uuid_col", UUID_WITH_ZEROS)) .shouldRead(parquetSchema, rowGroupMetadata, dictionaryStore); assertThat(shouldRead).as("Should read: column contains nulls").isTrue(); @@ -1291,6 +1301,26 @@ public void testUUID() { .shouldRead(parquetSchema, rowGroupMetadata, dictionaryStore); assertThat(shouldRead).as("Should read: column contains non-matching values").isTrue(); + shouldRead = + new ParquetDictionaryRowGroupFilter(SCHEMA, lessThan("uuid_col", UUID_WITH_ZEROS)) + .shouldRead(parquetSchema, rowGroupMetadata, dictionaryStore); + assertThat(shouldRead).as("Should skip: no uuid less than lower bound").isFalse(); + + shouldRead = + new ParquetDictionaryRowGroupFilter(SCHEMA, lessThanOrEqual("uuid_col", UUID_WITH_ZEROS)) + .shouldRead(parquetSchema, rowGroupMetadata, dictionaryStore); + assertThat(shouldRead).as("Should read: one possible uuid").isTrue(); + + shouldRead = + new ParquetDictionaryRowGroupFilter(SCHEMA, greaterThan("uuid_col", UUID_WITH_ZEROS)) + .shouldRead(parquetSchema, rowGroupMetadata, dictionaryStore); + assertThat(shouldRead).as("Should skip: no uuid greater than upper bound").isFalse(); + + shouldRead = + new ParquetDictionaryRowGroupFilter(SCHEMA, greaterThanOrEqual("uuid_col", UUID_WITH_ZEROS)) + .shouldRead(parquetSchema, rowGroupMetadata, dictionaryStore); + assertThat(shouldRead).as("Should read: one possible uuid").isTrue(); + shouldRead = new ParquetDictionaryRowGroupFilter(SCHEMA, isNull("uuid_col")) .shouldRead(parquetSchema, rowGroupMetadata, dictionaryStore);