diff --git a/parquet-hadoop/src/main/java/org/apache/parquet/format/converter/ParquetMetadataConverter.java b/parquet-hadoop/src/main/java/org/apache/parquet/format/converter/ParquetMetadataConverter.java index 77856eea53..4409d5f689 100644 --- a/parquet-hadoop/src/main/java/org/apache/parquet/format/converter/ParquetMetadataConverter.java +++ b/parquet-hadoop/src/main/java/org/apache/parquet/format/converter/ParquetMetadataConverter.java @@ -1450,15 +1450,19 @@ public FileMetaDataAndRowGroupOffsetInfo visit(SkipMetadataFilter filter) throws @Override public FileMetaDataAndRowGroupOffsetInfo visit(OffsetMetadataFilter filter) throws IOException { FileMetaData fileMetadata = readFileMetaData(from, footerDecryptor, encryptedFooterAAD); + // We must generate the map *before* filtering because it modifies `fileMetadata`. + Map rowGroupToRowIndexOffsetMap = generateRowGroupOffsets(fileMetadata); FileMetaData filteredFileMetadata = filterFileMetaDataByStart(fileMetadata, filter); - return new FileMetaDataAndRowGroupOffsetInfo(filteredFileMetadata, generateRowGroupOffsets(fileMetadata)); + return new FileMetaDataAndRowGroupOffsetInfo(filteredFileMetadata, rowGroupToRowIndexOffsetMap); } @Override public FileMetaDataAndRowGroupOffsetInfo visit(RangeMetadataFilter filter) throws IOException { FileMetaData fileMetadata = readFileMetaData(from, footerDecryptor, encryptedFooterAAD); + // We must generate the map *before* filtering because it modifies `fileMetadata`. + Map rowGroupToRowIndexOffsetMap = generateRowGroupOffsets(fileMetadata); FileMetaData filteredFileMetadata = filterFileMetaDataByMidpoint(fileMetadata, filter); - return new FileMetaDataAndRowGroupOffsetInfo(filteredFileMetadata, generateRowGroupOffsets(fileMetadata)); + return new FileMetaDataAndRowGroupOffsetInfo(filteredFileMetadata, rowGroupToRowIndexOffsetMap); } }); FileMetaData fileMetaData = fileMetaDataAndRowGroupInfo.fileMetadata; diff --git a/parquet-hadoop/src/test/java/org/apache/parquet/filter2/recordlevel/PhoneBookWriter.java b/parquet-hadoop/src/test/java/org/apache/parquet/filter2/recordlevel/PhoneBookWriter.java index 1e74353e2c..f68dce4256 100644 --- a/parquet-hadoop/src/test/java/org/apache/parquet/filter2/recordlevel/PhoneBookWriter.java +++ b/parquet-hadoop/src/test/java/org/apache/parquet/filter2/recordlevel/PhoneBookWriter.java @@ -359,7 +359,7 @@ public static List readUsers(ParquetReader.Builder builder, boolean User u = userFromGroup(group); users.add(u); if (validateRowIndexes) { - assertEquals(reader.getCurrentRowIndex(), u.id); + assertEquals("Row index should be equal to User id", u.id, reader.getCurrentRowIndex()); } } return users; diff --git a/parquet-hadoop/src/test/java/org/apache/parquet/hadoop/TestParquetReader.java b/parquet-hadoop/src/test/java/org/apache/parquet/hadoop/TestParquetReader.java index 86f14a8628..2cb1f54528 100644 --- a/parquet-hadoop/src/test/java/org/apache/parquet/hadoop/TestParquetReader.java +++ b/parquet-hadoop/src/test/java/org/apache/parquet/hadoop/TestParquetReader.java @@ -51,6 +51,7 @@ public class TestParquetReader { private static final List DATA = Collections.unmodifiableList(makeUsers(1000)); private final Path file; + private final long fileSize; private static Path createPathFromCP(String path) { try { @@ -60,8 +61,9 @@ private static Path createPathFromCP(String path) { } } - public TestParquetReader(Path file) { + public TestParquetReader(Path file) throws IOException { this.file = file; + this.fileSize = file.getFileSystem(new Configuration()).getFileStatus(file).getLen(); } @Parameterized.Parameters @@ -126,13 +128,19 @@ private static void writePhoneBookToFile(Path file, ParquetProperties.WriterVers } private List readUsers(FilterCompat.Filter filter, boolean useOtherFiltering, boolean useColumnIndexFilter) + throws IOException { + return readUsers(filter, useOtherFiltering, useColumnIndexFilter, 0, this.fileSize); + } + + private List readUsers(FilterCompat.Filter filter, boolean useOtherFiltering, boolean useColumnIndexFilter, long rangeStart, long rangeEnd) throws IOException { return PhoneBookWriter.readUsers(ParquetReader.builder(new GroupReadSupport(), file) .withFilter(filter) .useDictionaryFilter(useOtherFiltering) .useStatsFilter(useOtherFiltering) .useRecordFilter(useOtherFiltering) - .useColumnIndexFilter(useColumnIndexFilter), true); + .useColumnIndexFilter(useColumnIndexFilter) + .withFileRange(rangeStart, rangeEnd), true); } @Test @@ -157,6 +165,15 @@ public void testCurrentRowIndex() throws Exception { assertEquals(reader.getCurrentRowIndex(), -1); } + @Test + public void testRangeFiltering() throws Exception { + // The readUsers also validates the rowIndex for each returned row. + readUsers(FilterCompat.NOOP, false, false, this.fileSize / 2, this.fileSize); + readUsers(FilterCompat.NOOP, true, false, this.fileSize / 3, this.fileSize * 3 / 4); + readUsers(FilterCompat.NOOP, false, true, this.fileSize / 4, this.fileSize / 2); + readUsers(FilterCompat.NOOP, true, true, this.fileSize * 3 / 4, this.fileSize); + } + @Test public void testSimpleFiltering() throws Exception { Set idSet = new HashSet<>();