Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -1450,15 +1450,19 @@ public FileMetaDataAndRowGroupOffsetInfo visit(SkipMetadataFilter filter) throws
@Override
public FileMetaDataAndRowGroupOffsetInfo visit(OffsetMetadataFilter filter) throws IOException {
FileMetaData fileMetadata = readFileMetaData(from, footerDecryptor, encryptedFooterAAD);
// We must generate the map *before* filtering because it modifies `fileMetadata`.
Map<RowGroup, Long> rowGroupToRowIndexOffsetMap = generateRowGroupOffsets(fileMetadata);
FileMetaData filteredFileMetadata = filterFileMetaDataByStart(fileMetadata, filter);
return new FileMetaDataAndRowGroupOffsetInfo(filteredFileMetadata, generateRowGroupOffsets(fileMetadata));
return new FileMetaDataAndRowGroupOffsetInfo(filteredFileMetadata, rowGroupToRowIndexOffsetMap);
}

@Override
public FileMetaDataAndRowGroupOffsetInfo visit(RangeMetadataFilter filter) throws IOException {
FileMetaData fileMetadata = readFileMetaData(from, footerDecryptor, encryptedFooterAAD);
// We must generate the map *before* filtering because it modifies `fileMetadata`.
Map<RowGroup, Long> rowGroupToRowIndexOffsetMap = generateRowGroupOffsets(fileMetadata);
FileMetaData filteredFileMetadata = filterFileMetaDataByMidpoint(fileMetadata, filter);
return new FileMetaDataAndRowGroupOffsetInfo(filteredFileMetadata, generateRowGroupOffsets(fileMetadata));
return new FileMetaDataAndRowGroupOffsetInfo(filteredFileMetadata, rowGroupToRowIndexOffsetMap);
}
});
FileMetaData fileMetaData = fileMetaDataAndRowGroupInfo.fileMetadata;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -359,7 +359,7 @@ public static List<User> readUsers(ParquetReader.Builder<Group> builder, boolean
User u = userFromGroup(group);
users.add(u);
if (validateRowIndexes) {
assertEquals(reader.getCurrentRowIndex(), u.id);
assertEquals("Row index should be equal to User id", u.id, reader.getCurrentRowIndex());
}
}
return users;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -51,6 +51,7 @@ public class TestParquetReader {
private static final List<PhoneBookWriter.User> DATA = Collections.unmodifiableList(makeUsers(1000));

private final Path file;
private final long fileSize;

private static Path createPathFromCP(String path) {
try {
Expand All @@ -60,8 +61,9 @@ private static Path createPathFromCP(String path) {
}
}

public TestParquetReader(Path file) {
public TestParquetReader(Path file) throws IOException {
this.file = file;
this.fileSize = file.getFileSystem(new Configuration()).getFileStatus(file).getLen();
}

@Parameterized.Parameters
Expand Down Expand Up @@ -126,13 +128,19 @@ private static void writePhoneBookToFile(Path file, ParquetProperties.WriterVers
}

private List<PhoneBookWriter.User> readUsers(FilterCompat.Filter filter, boolean useOtherFiltering, boolean useColumnIndexFilter)
throws IOException {
return readUsers(filter, useOtherFiltering, useColumnIndexFilter, 0, this.fileSize);
}

private List<PhoneBookWriter.User> readUsers(FilterCompat.Filter filter, boolean useOtherFiltering, boolean useColumnIndexFilter, long rangeStart, long rangeEnd)
throws IOException {
return PhoneBookWriter.readUsers(ParquetReader.builder(new GroupReadSupport(), file)
.withFilter(filter)
.useDictionaryFilter(useOtherFiltering)
.useStatsFilter(useOtherFiltering)
.useRecordFilter(useOtherFiltering)
.useColumnIndexFilter(useColumnIndexFilter), true);
.useColumnIndexFilter(useColumnIndexFilter)
.withFileRange(rangeStart, rangeEnd), true);
}

@Test
Expand All @@ -157,6 +165,15 @@ public void testCurrentRowIndex() throws Exception {
assertEquals(reader.getCurrentRowIndex(), -1);
}

@Test
public void testRangeFiltering() throws Exception {
// The readUsers also validates the rowIndex for each returned row.
readUsers(FilterCompat.NOOP, false, false, this.fileSize / 2, this.fileSize);
readUsers(FilterCompat.NOOP, true, false, this.fileSize / 3, this.fileSize * 3 / 4);
readUsers(FilterCompat.NOOP, false, true, this.fileSize / 4, this.fileSize / 2);
readUsers(FilterCompat.NOOP, true, true, this.fileSize * 3 / 4, this.fileSize);
}

@Test
public void testSimpleFiltering() throws Exception {
Set<Long> idSet = new HashSet<>();
Expand Down