From 016d63aecb7d1a77350a676989667f242173cd01 Mon Sep 17 00:00:00 2001 From: aiden Date: Wed, 4 Dec 2024 16:05:24 +0800 Subject: [PATCH 1/3] fix bug --- .../org/apache/paimon/format/parquet/ParquetReaderFactory.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/paimon-format/src/main/java/org/apache/paimon/format/parquet/ParquetReaderFactory.java b/paimon-format/src/main/java/org/apache/paimon/format/parquet/ParquetReaderFactory.java index 0c996531201a..6f8cab2202d6 100644 --- a/paimon-format/src/main/java/org/apache/paimon/format/parquet/ParquetReaderFactory.java +++ b/paimon-format/src/main/java/org/apache/paimon/format/parquet/ParquetReaderFactory.java @@ -491,7 +491,7 @@ private long getNextRowPosition(int num) { nextIndex = this.currentRowGroupReadState.currentRangeStart(); } - return nextIndex; + return this.currentRowGroupFirstRowIndex + nextIndex; } } From aa01bdcab4cc800752e0d12d71fd0abe34368bb9 Mon Sep 17 00:00:00 2001 From: aiden Date: Wed, 4 Dec 2024 17:35:05 +0800 Subject: [PATCH 2/3] add test --- .../table/PrimaryKeyFileStoreTableTest.java | 63 ++++++++++++++++++- 1 file changed, 60 insertions(+), 3 deletions(-) diff --git a/paimon-core/src/test/java/org/apache/paimon/table/PrimaryKeyFileStoreTableTest.java b/paimon-core/src/test/java/org/apache/paimon/table/PrimaryKeyFileStoreTableTest.java index e80b49a0f05d..1e46041a180c 100644 --- a/paimon-core/src/test/java/org/apache/paimon/table/PrimaryKeyFileStoreTableTest.java +++ b/paimon-core/src/test/java/org/apache/paimon/table/PrimaryKeyFileStoreTableTest.java @@ -812,6 +812,27 @@ public void testDeletionVectorsWithFileIndexInFile() throws Exception { @Test public void testDeletionVectorsWithParquetFilter() throws Exception { + // RowGroup record range [pk] : + // + // RowGroup-0 : [0-93421) + // RowGroup-1 : [93421-187794) + // RowGroup-2 : [187794-200000) + // + // ColumnPage record count : + // + // col-0 : 300 + // col-1 : 200 + // col-2 : 300 + // col-3 : 300 + // col-4 : 300 + // col-5 : 200 + // col-6 : 100 + // col-7 : 100 + // col-8 : 100 + // col-9 : 100 + // col-10 : 100 + // col-11 : 300 + FileStoreTable table = createFileStoreTable( conf -> { @@ -822,6 +843,8 @@ public void testDeletionVectorsWithParquetFilter() throws Exception { conf.set("parquet.page.size", "1024"); }); + System.out.println(((AbstractFileStoreTable) table).path); + BatchWriteBuilder writeBuilder = table.newBatchWriteBuilder(); BatchTableWrite write = @@ -842,7 +865,11 @@ public void testDeletionVectorsWithParquetFilter() throws Exception { writeBuilder .newWrite() .withIOManager(new IOManagerImpl(tempDir.toString())); - for (int i = 180000; i < 200000; i++) { + for (int i = 110000; i < 115000; i++) { + write.write(rowDataWithKind(RowKind.DELETE, 1, i, i * 100L)); + } + + for (int i = 130000; i < 135000; i++) { write.write(rowDataWithKind(RowKind.DELETE, 1, i, i * 100L)); } @@ -854,8 +881,10 @@ public void testDeletionVectorsWithParquetFilter() throws Exception { List splits = toSplits(table.newSnapshotReader().read().dataSplits()); Random random = new Random(); + // point filter + for (int i = 0; i < 10; i++) { - int value = random.nextInt(180000); + int value = random.nextInt(110000); TableRead read = table.newRead().withFilter(builder.equal(1, value)).executeFilter(); assertThat(getResult(read, splits, BATCH_ROW_TO_STRING)) .isEqualTo( @@ -866,10 +895,38 @@ public void testDeletionVectorsWithParquetFilter() throws Exception { } for (int i = 0; i < 10; i++) { - int value = 180000 + random.nextInt(20000); + int value = 130000 + random.nextInt(5000); TableRead read = table.newRead().withFilter(builder.equal(1, value)).executeFilter(); assertThat(getResult(read, splits, BATCH_ROW_TO_STRING)).isEmpty(); } + + TableRead tableRead = + table.newRead() + .withFilter( + PredicateBuilder.and( + builder.greaterOrEqual(1, 100000), + builder.lessThan(1, 150000))) + .executeFilter(); + + List result = getResult(tableRead, splits, BATCH_ROW_TO_STRING); + + assertThat(result.size()).isEqualTo(40000); // filter 10000 + + assertThat(result) + .doesNotContain("1|110000|11000000|binary|varbinary|mapKey:mapVal|multiset"); + assertThat(result) + .doesNotContain("1|114999|11499900|binary|varbinary|mapKey:mapVal|multiset"); + assertThat(result) + .doesNotContain("1|130000|13000000|binary|varbinary|mapKey:mapVal|multiset"); + assertThat(result) + .doesNotContain("1|134999|13499900|binary|varbinary|mapKey:mapVal|multiset"); + assertThat(result).contains("1|100000|10000000|binary|varbinary|mapKey:mapVal|multiset"); + assertThat(result).contains("1|149999|14999900|binary|varbinary|mapKey:mapVal|multiset"); + + assertThat(result).contains("1|101099|10109900|binary|varbinary|mapKey:mapVal|multiset"); + assertThat(result).contains("1|115000|11500000|binary|varbinary|mapKey:mapVal|multiset"); + assertThat(result).contains("1|129999|12999900|binary|varbinary|mapKey:mapVal|multiset"); + assertThat(result).contains("1|135000|13500000|binary|varbinary|mapKey:mapVal|multiset"); } @Test From 892cf4df2b3581d819dd912309c64f6cde6ca943 Mon Sep 17 00:00:00 2001 From: aiden Date: Wed, 4 Dec 2024 18:11:45 +0800 Subject: [PATCH 3/3] delete debug msg --- .../org/apache/paimon/table/PrimaryKeyFileStoreTableTest.java | 2 -- 1 file changed, 2 deletions(-) diff --git a/paimon-core/src/test/java/org/apache/paimon/table/PrimaryKeyFileStoreTableTest.java b/paimon-core/src/test/java/org/apache/paimon/table/PrimaryKeyFileStoreTableTest.java index 1e46041a180c..fa635e2ab666 100644 --- a/paimon-core/src/test/java/org/apache/paimon/table/PrimaryKeyFileStoreTableTest.java +++ b/paimon-core/src/test/java/org/apache/paimon/table/PrimaryKeyFileStoreTableTest.java @@ -843,8 +843,6 @@ public void testDeletionVectorsWithParquetFilter() throws Exception { conf.set("parquet.page.size", "1024"); }); - System.out.println(((AbstractFileStoreTable) table).path); - BatchWriteBuilder writeBuilder = table.newBatchWriteBuilder(); BatchTableWrite write =