diff --git a/parquet-format-structures/src/main/java/org/apache/parquet/format/InvalidParquetMetadataException.java b/parquet-format-structures/src/main/java/org/apache/parquet/format/InvalidParquetMetadataException.java new file mode 100644 index 0000000000..c0852bbbd2 --- /dev/null +++ b/parquet-format-structures/src/main/java/org/apache/parquet/format/InvalidParquetMetadataException.java @@ -0,0 +1,30 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.parquet.format; + +/** + * A specific RuntimeException thrown when invalid values are found in the Parquet file metadata (including the footer, + * page header etc.). + */ +public class InvalidParquetMetadataException extends RuntimeException { + InvalidParquetMetadataException(String message) { + super(message); + } +} diff --git a/parquet-format-structures/src/main/java/org/apache/parquet/format/MetadataValidator.java b/parquet-format-structures/src/main/java/org/apache/parquet/format/MetadataValidator.java new file mode 100644 index 0000000000..b3738ec48f --- /dev/null +++ b/parquet-format-structures/src/main/java/org/apache/parquet/format/MetadataValidator.java @@ -0,0 +1,44 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.parquet.format; + +/** + * Utility class to validate different types of Parquet metadata (e.g. footer, page headers etc.). + */ +public class MetadataValidator { + + static PageHeader validate(PageHeader pageHeader) { + int compressed_page_size = pageHeader.getCompressed_page_size(); + validateValue(compressed_page_size >= 0, + String.format("Compressed page size must not be negative but was: %s", compressed_page_size)); + return pageHeader; + } + + private static void validateValue(boolean valid, String message) { + if (!valid) { + throw new InvalidParquetMetadataException(message); + } + } + + private MetadataValidator() { + // Private constructor to prevent instantiation + } + +} diff --git a/parquet-format-structures/src/main/java/org/apache/parquet/format/Util.java b/parquet-format-structures/src/main/java/org/apache/parquet/format/Util.java index 32c1986a91..4d4c893ca3 100644 --- a/parquet-format-structures/src/main/java/org/apache/parquet/format/Util.java +++ b/parquet-format-structures/src/main/java/org/apache/parquet/format/Util.java @@ -130,7 +130,7 @@ public static PageHeader readPageHeader(InputStream from) throws IOException { public static PageHeader readPageHeader(InputStream from, BlockCipher.Decryptor decryptor, byte[] AAD) throws IOException { - return read(from, new PageHeader(), decryptor, AAD); + return MetadataValidator.validate(read(from, new PageHeader(), decryptor, AAD)); } public static void writeFileMetaData(org.apache.parquet.format.FileMetaData fileMetadata, diff --git a/parquet-format-structures/src/test/java/org/apache/parquet/format/TestUtil.java b/parquet-format-structures/src/test/java/org/apache/parquet/format/TestUtil.java index 1adf0998fb..685e2514b0 100644 --- a/parquet-format-structures/src/test/java/org/apache/parquet/format/TestUtil.java +++ b/parquet-format-structures/src/test/java/org/apache/parquet/format/TestUtil.java @@ -23,13 +23,16 @@ import static junit.framework.Assert.assertNull; import static org.apache.parquet.format.Util.readFileMetaData; import static org.apache.parquet.format.Util.writeFileMetaData; +import static org.junit.Assert.assertTrue; +import static org.junit.Assert.fail; import java.io.ByteArrayInputStream; import java.io.ByteArrayOutputStream; +import java.io.IOException; import org.junit.Test; - import org.apache.parquet.format.Util.DefaultFileMetaDataConsumer; + public class TestUtil { @Test @@ -77,6 +80,21 @@ public void testReadFileMetadata() throws Exception { assertEquals(md, md6); } + @Test + public void testInvalidPageHeader() throws IOException { + PageHeader ph = new PageHeader(PageType.DATA_PAGE, 100, -50); + ByteArrayOutputStream out = new ByteArrayOutputStream(); + Util.writePageHeader(ph, out); + + try { + Util.readPageHeader(in(out)); + fail("Expected exception but did not thrown"); + } catch (InvalidParquetMetadataException e) { + assertTrue("Exception message does not contain the expected parts", + e.getMessage().contains("Compressed page size")); + } + } + private ByteArrayInputStream in(ByteArrayOutputStream baos) { return new ByteArrayInputStream(baos.toByteArray()); }