From d022451f5e58e86b39f0a312d6251a0dc6bb7468 Mon Sep 17 00:00:00 2001 From: Gabor Szadovszky Date: Wed, 22 Sep 2021 17:36:03 +0200 Subject: [PATCH 1/4] PARQUET-2094: Handle negative values in page headers --- .../parquet/format/MetadataValidator.java | 56 +++++++++++++++++++ .../java/org/apache/parquet/format/Util.java | 3 +- .../org/apache/parquet/format/TestUtil.java | 21 ++++++- 3 files changed, 78 insertions(+), 2 deletions(-) create mode 100644 parquet-format-structures/src/main/java/org/apache/parquet/format/MetadataValidator.java diff --git a/parquet-format-structures/src/main/java/org/apache/parquet/format/MetadataValidator.java b/parquet-format-structures/src/main/java/org/apache/parquet/format/MetadataValidator.java new file mode 100644 index 0000000000..52fec0edea --- /dev/null +++ b/parquet-format-structures/src/main/java/org/apache/parquet/format/MetadataValidator.java @@ -0,0 +1,56 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.parquet.format; + +import java.io.IOException; +import java.util.function.Predicate; + +/** + * Utility class to validate different types of Parquet metadata (e.g. footer, page headers etc.). + */ +public class MetadataValidator { + + /** + * A specific IOException thrown when invalid values are found in the Parquet file metadata (including the footer, + * page header etc.). + */ + public static class InvalidParquetMetadataException extends IOException { + private InvalidParquetMetadataException(String metaName, T value) { + super("Metadata " + metaName + " is invalid: " + value); + } + } + + static PageHeader validate(PageHeader pageHeader) throws InvalidParquetMetadataException { + validateValue(size -> size >= 0, pageHeader.getCompressed_page_size(), "pageHeader.compressed_page_size"); + return pageHeader; + } + + private static void validateValue(Predicate validator, T value, String metaName) + throws InvalidParquetMetadataException { + if (!validator.test(value)) { + throw new InvalidParquetMetadataException(metaName, value); + } + } + + private MetadataValidator() { + // Private constructor to prevent instantiation + } + +} diff --git a/parquet-format-structures/src/main/java/org/apache/parquet/format/Util.java b/parquet-format-structures/src/main/java/org/apache/parquet/format/Util.java index 32c1986a91..600835ba95 100644 --- a/parquet-format-structures/src/main/java/org/apache/parquet/format/Util.java +++ b/parquet-format-structures/src/main/java/org/apache/parquet/format/Util.java @@ -27,6 +27,7 @@ import static org.apache.parquet.format.FileMetaData._Fields.ROW_GROUPS; import static org.apache.parquet.format.FileMetaData._Fields.SCHEMA; import static org.apache.parquet.format.FileMetaData._Fields.VERSION; +import static org.apache.parquet.format.MetadataValidator.validate; import static org.apache.parquet.format.event.Consumers.fieldConsumer; import static org.apache.parquet.format.event.Consumers.listElementsOf; import static org.apache.parquet.format.event.Consumers.listOf; @@ -130,7 +131,7 @@ public static PageHeader readPageHeader(InputStream from) throws IOException { public static PageHeader readPageHeader(InputStream from, BlockCipher.Decryptor decryptor, byte[] AAD) throws IOException { - return read(from, new PageHeader(), decryptor, AAD); + return validate(read(from, new PageHeader(), decryptor, AAD)); } public static void writeFileMetaData(org.apache.parquet.format.FileMetaData fileMetadata, diff --git a/parquet-format-structures/src/test/java/org/apache/parquet/format/TestUtil.java b/parquet-format-structures/src/test/java/org/apache/parquet/format/TestUtil.java index 1adf0998fb..dedd55b7ea 100644 --- a/parquet-format-structures/src/test/java/org/apache/parquet/format/TestUtil.java +++ b/parquet-format-structures/src/test/java/org/apache/parquet/format/TestUtil.java @@ -23,13 +23,17 @@ import static junit.framework.Assert.assertNull; import static org.apache.parquet.format.Util.readFileMetaData; import static org.apache.parquet.format.Util.writeFileMetaData; +import static org.junit.Assert.assertTrue; +import static org.junit.Assert.fail; import java.io.ByteArrayInputStream; import java.io.ByteArrayOutputStream; +import java.io.IOException; import org.junit.Test; - +import org.apache.parquet.format.MetadataValidator.InvalidParquetMetadataException; import org.apache.parquet.format.Util.DefaultFileMetaDataConsumer; + public class TestUtil { @Test @@ -77,6 +81,21 @@ public void testReadFileMetadata() throws Exception { assertEquals(md, md6); } + @Test + public void testInvalidPageHeader() throws IOException { + PageHeader ph = new PageHeader(PageType.DATA_PAGE, 100, -50); + ByteArrayOutputStream out = new ByteArrayOutputStream(); + Util.writePageHeader(ph, out); + + try { + Util.readPageHeader(in(out)); + fail("Expected exception but did not thrown"); + } catch (InvalidParquetMetadataException e) { + assertTrue("Exception message does not contain the expected parts", + e.getMessage().contains("pageHeader.compressed_page_size")); + } + } + private ByteArrayInputStream in(ByteArrayOutputStream baos) { return new ByteArrayInputStream(baos.toByteArray()); } From c06a2c1685a00b1419328fae310e822ffea021a4 Mon Sep 17 00:00:00 2001 From: Gabor Szadovszky Date: Mon, 27 Sep 2021 10:20:55 +0200 Subject: [PATCH 2/4] PARQUET-2094: Address comments --- .../parquet/format/MetadataValidator.java | 26 +++++++++---------- .../java/org/apache/parquet/format/Util.java | 3 +-- .../org/apache/parquet/format/TestUtil.java | 2 +- 3 files changed, 14 insertions(+), 17 deletions(-) diff --git a/parquet-format-structures/src/main/java/org/apache/parquet/format/MetadataValidator.java b/parquet-format-structures/src/main/java/org/apache/parquet/format/MetadataValidator.java index 52fec0edea..09381bb0c1 100644 --- a/parquet-format-structures/src/main/java/org/apache/parquet/format/MetadataValidator.java +++ b/parquet-format-structures/src/main/java/org/apache/parquet/format/MetadataValidator.java @@ -19,33 +19,31 @@ package org.apache.parquet.format; -import java.io.IOException; -import java.util.function.Predicate; - /** * Utility class to validate different types of Parquet metadata (e.g. footer, page headers etc.). */ public class MetadataValidator { /** - * A specific IOException thrown when invalid values are found in the Parquet file metadata (including the footer, - * page header etc.). + * A specific RuntimeException thrown when invalid values are found in the Parquet file metadata (including the + * footer, page header etc.). */ - public static class InvalidParquetMetadataException extends IOException { - private InvalidParquetMetadataException(String metaName, T value) { - super("Metadata " + metaName + " is invalid: " + value); + public static class InvalidParquetMetadataException extends RuntimeException { + private InvalidParquetMetadataException(String message) { + super(message); } } - static PageHeader validate(PageHeader pageHeader) throws InvalidParquetMetadataException { - validateValue(size -> size >= 0, pageHeader.getCompressed_page_size(), "pageHeader.compressed_page_size"); + static PageHeader validate(PageHeader pageHeader) { + int compressed_page_size = pageHeader.getCompressed_page_size(); + validateValue(compressed_page_size >= 0, + String.format("Compressed page size must not be negative but was: %s", compressed_page_size)); return pageHeader; } - private static void validateValue(Predicate validator, T value, String metaName) - throws InvalidParquetMetadataException { - if (!validator.test(value)) { - throw new InvalidParquetMetadataException(metaName, value); + private static void validateValue(boolean valid, String message) { + if (!valid) { + throw new InvalidParquetMetadataException(message); } } diff --git a/parquet-format-structures/src/main/java/org/apache/parquet/format/Util.java b/parquet-format-structures/src/main/java/org/apache/parquet/format/Util.java index 600835ba95..4d4c893ca3 100644 --- a/parquet-format-structures/src/main/java/org/apache/parquet/format/Util.java +++ b/parquet-format-structures/src/main/java/org/apache/parquet/format/Util.java @@ -27,7 +27,6 @@ import static org.apache.parquet.format.FileMetaData._Fields.ROW_GROUPS; import static org.apache.parquet.format.FileMetaData._Fields.SCHEMA; import static org.apache.parquet.format.FileMetaData._Fields.VERSION; -import static org.apache.parquet.format.MetadataValidator.validate; import static org.apache.parquet.format.event.Consumers.fieldConsumer; import static org.apache.parquet.format.event.Consumers.listElementsOf; import static org.apache.parquet.format.event.Consumers.listOf; @@ -131,7 +130,7 @@ public static PageHeader readPageHeader(InputStream from) throws IOException { public static PageHeader readPageHeader(InputStream from, BlockCipher.Decryptor decryptor, byte[] AAD) throws IOException { - return validate(read(from, new PageHeader(), decryptor, AAD)); + return MetadataValidator.validate(read(from, new PageHeader(), decryptor, AAD)); } public static void writeFileMetaData(org.apache.parquet.format.FileMetaData fileMetadata, diff --git a/parquet-format-structures/src/test/java/org/apache/parquet/format/TestUtil.java b/parquet-format-structures/src/test/java/org/apache/parquet/format/TestUtil.java index dedd55b7ea..198f3c6bac 100644 --- a/parquet-format-structures/src/test/java/org/apache/parquet/format/TestUtil.java +++ b/parquet-format-structures/src/test/java/org/apache/parquet/format/TestUtil.java @@ -92,7 +92,7 @@ public void testInvalidPageHeader() throws IOException { fail("Expected exception but did not thrown"); } catch (InvalidParquetMetadataException e) { assertTrue("Exception message does not contain the expected parts", - e.getMessage().contains("pageHeader.compressed_page_size")); + e.getMessage().contains("Compressed page size")); } } From 1346999a6f16d79eeafdf3bddbf76b5b15aa3aec Mon Sep 17 00:00:00 2001 From: Gabor Szadovszky Date: Tue, 28 Sep 2021 10:55:18 +0200 Subject: [PATCH 3/4] PARQUET-2094: Move exception to its own file --- .../InvalidParquetMetadataException.java | 30 +++++++++++++++++++ .../parquet/format/MetadataValidator.java | 10 ------- .../org/apache/parquet/format/TestUtil.java | 3 +- 3 files changed, 31 insertions(+), 12 deletions(-) create mode 100644 parquet-format-structures/src/main/java/org/apache/parquet/format/InvalidParquetMetadataException.java diff --git a/parquet-format-structures/src/main/java/org/apache/parquet/format/InvalidParquetMetadataException.java b/parquet-format-structures/src/main/java/org/apache/parquet/format/InvalidParquetMetadataException.java new file mode 100644 index 0000000000..e642525cbc --- /dev/null +++ b/parquet-format-structures/src/main/java/org/apache/parquet/format/InvalidParquetMetadataException.java @@ -0,0 +1,30 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.parquet.format; + +/** + * A specific RuntimeException thrown when invalid values are found in the Parquet file metadata (including the footer, + * page header etc.). + */ +public class InvalidParquetMetadataException extends RuntimeException { + InvalidParquetMetadataException(String message) { + super(message); + } +} diff --git a/parquet-format-structures/src/main/java/org/apache/parquet/format/MetadataValidator.java b/parquet-format-structures/src/main/java/org/apache/parquet/format/MetadataValidator.java index 09381bb0c1..b3738ec48f 100644 --- a/parquet-format-structures/src/main/java/org/apache/parquet/format/MetadataValidator.java +++ b/parquet-format-structures/src/main/java/org/apache/parquet/format/MetadataValidator.java @@ -24,16 +24,6 @@ */ public class MetadataValidator { - /** - * A specific RuntimeException thrown when invalid values are found in the Parquet file metadata (including the - * footer, page header etc.). - */ - public static class InvalidParquetMetadataException extends RuntimeException { - private InvalidParquetMetadataException(String message) { - super(message); - } - } - static PageHeader validate(PageHeader pageHeader) { int compressed_page_size = pageHeader.getCompressed_page_size(); validateValue(compressed_page_size >= 0, diff --git a/parquet-format-structures/src/test/java/org/apache/parquet/format/TestUtil.java b/parquet-format-structures/src/test/java/org/apache/parquet/format/TestUtil.java index 198f3c6bac..271b3db1a2 100644 --- a/parquet-format-structures/src/test/java/org/apache/parquet/format/TestUtil.java +++ b/parquet-format-structures/src/test/java/org/apache/parquet/format/TestUtil.java @@ -30,9 +30,8 @@ import java.io.ByteArrayOutputStream; import java.io.IOException; -import org.junit.Test; -import org.apache.parquet.format.MetadataValidator.InvalidParquetMetadataException; import org.apache.parquet.format.Util.DefaultFileMetaDataConsumer; +import org.junit.Test; public class TestUtil { From 3d64e3407baf02849401364d88ce572eafac3bfc Mon Sep 17 00:00:00 2001 From: Gabor Szadovszky Date: Wed, 29 Sep 2021 09:17:47 +0200 Subject: [PATCH 4/4] PARQUET-2094: Fix findings --- .../apache/parquet/format/InvalidParquetMetadataException.java | 2 +- .../src/test/java/org/apache/parquet/format/TestUtil.java | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/parquet-format-structures/src/main/java/org/apache/parquet/format/InvalidParquetMetadataException.java b/parquet-format-structures/src/main/java/org/apache/parquet/format/InvalidParquetMetadataException.java index e642525cbc..c0852bbbd2 100644 --- a/parquet-format-structures/src/main/java/org/apache/parquet/format/InvalidParquetMetadataException.java +++ b/parquet-format-structures/src/main/java/org/apache/parquet/format/InvalidParquetMetadataException.java @@ -24,7 +24,7 @@ * page header etc.). */ public class InvalidParquetMetadataException extends RuntimeException { - InvalidParquetMetadataException(String message) { + InvalidParquetMetadataException(String message) { super(message); } } diff --git a/parquet-format-structures/src/test/java/org/apache/parquet/format/TestUtil.java b/parquet-format-structures/src/test/java/org/apache/parquet/format/TestUtil.java index 271b3db1a2..685e2514b0 100644 --- a/parquet-format-structures/src/test/java/org/apache/parquet/format/TestUtil.java +++ b/parquet-format-structures/src/test/java/org/apache/parquet/format/TestUtil.java @@ -30,8 +30,8 @@ import java.io.ByteArrayOutputStream; import java.io.IOException; -import org.apache.parquet.format.Util.DefaultFileMetaDataConsumer; import org.junit.Test; +import org.apache.parquet.format.Util.DefaultFileMetaDataConsumer; public class TestUtil {