From bb03baa0df76efa84ec4e010f358ef3b09b7154d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?D=C3=A1vid=20Paksy?= Date: Wed, 6 Nov 2024 11:48:43 +0100 Subject: [PATCH 1/9] HBASE-28634 Fix FuzzyRowFilter may not return data on reverse scans Reproduced the issue with a mini cluster test. --- .../TestFuzzyRowFilterWithMiniCluster.java | 141 ++++++++++++++++++ 1 file changed, 141 insertions(+) create mode 100644 hbase-server/src/test/java/org/apache/hadoop/hbase/filter/TestFuzzyRowFilterWithMiniCluster.java diff --git a/hbase-server/src/test/java/org/apache/hadoop/hbase/filter/TestFuzzyRowFilterWithMiniCluster.java b/hbase-server/src/test/java/org/apache/hadoop/hbase/filter/TestFuzzyRowFilterWithMiniCluster.java new file mode 100644 index 000000000000..936039444ffb --- /dev/null +++ b/hbase-server/src/test/java/org/apache/hadoop/hbase/filter/TestFuzzyRowFilterWithMiniCluster.java @@ -0,0 +1,141 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.hbase.filter; + +import static org.junit.Assert.assertEquals; + +import java.io.IOException; +import java.util.ArrayList; +import java.util.LinkedList; +import java.util.List; +import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.hbase.HBaseClassTestRule; +import org.apache.hadoop.hbase.HBaseTestingUtil; +import org.apache.hadoop.hbase.HConstants; +import org.apache.hadoop.hbase.TableName; +import org.apache.hadoop.hbase.client.Durability; +import org.apache.hadoop.hbase.client.Put; +import org.apache.hadoop.hbase.client.Result; +import org.apache.hadoop.hbase.client.ResultScanner; +import org.apache.hadoop.hbase.client.Scan; +import org.apache.hadoop.hbase.client.Table; +import org.apache.hadoop.hbase.regionserver.ConstantSizeRegionSplitPolicy; +import org.apache.hadoop.hbase.testclassification.FilterTests; +import org.apache.hadoop.hbase.testclassification.MediumTests; +import org.apache.hadoop.hbase.util.Bytes; +import org.apache.hadoop.hbase.util.Pair; +import org.junit.AfterClass; +import org.junit.BeforeClass; +import org.junit.ClassRule; +import org.junit.Rule; +import org.junit.Test; +import org.junit.experimental.categories.Category; +import org.junit.rules.TestName; + +// TODO: Move this into TestFuzzyRowFilterEndToEnd?! +@Category({ FilterTests.class, MediumTests.class }) +public class TestFuzzyRowFilterWithMiniCluster { + + private final static HBaseTestingUtil TEST_UTIL = new HBaseTestingUtil(); + private static final String CF = "f"; + private static final String CQ = "name"; + private static final String TABLE = "abcd"; + private static Table ht; + + @ClassRule + public static final HBaseClassTestRule CLASS_RULE = + HBaseClassTestRule.forClass(TestFuzzyRowFilterWithMiniCluster.class); + + @Rule + public TestName name = new TestName(); + + private static final byte[][] ROWS_ONE = { Bytes.toBytes("111311"), Bytes.toBytes("111444"), + Bytes.toBytes("111511"), Bytes.toBytes("111611"), Bytes.toBytes("111446"), + Bytes.toBytes("111777"), Bytes.toBytes("111777"), }; + + @BeforeClass + public static void setUpBeforeClass() throws Exception { + Configuration conf = TEST_UTIL.getConfiguration(); + conf.setInt("hbase.client.scanner.caching", 1000); + conf.set(HConstants.HBASE_REGION_SPLIT_POLICY_KEY, + ConstantSizeRegionSplitPolicy.class.getName()); + // set no splits + conf.setLong(HConstants.HREGION_MAX_FILESIZE, (1024L) * 1024 * 1024 * 10); + + TEST_UTIL.startMiniCluster(); + + // load the mini cluster with a single table with 20 rows, with rowkeys of a single byte, 0-19. + ht = TEST_UTIL.createTable(TableName.valueOf(TABLE), Bytes.toBytes(CF), Integer.MAX_VALUE); + // Insert first half + for (byte[] ROW : ROWS_ONE) { + Put p = new Put(ROW); + p.setDurability(Durability.SKIP_WAL); + p.addColumn(Bytes.toBytes(CF), Bytes.toBytes(CQ), Bytes.toBytes("aaaaa")); + ht.put(p); + } + TEST_UTIL.flush(); + } + + @AfterClass + public static void tearDownAfterClass() throws Exception { + TEST_UTIL.shutdownMiniCluster(); + } + + @Test + public void testNormalScanForwardsSeek() throws IOException { + LinkedList> fuzzyList = new LinkedList<>(); + byte[] fuzzyRowKey = Bytes.toBytes("111433"); + byte[] mask = Bytes.toBytesBinary("\\xFF\\xFF\\xFF\\xFF\\x02\\x02"); + fuzzyList.add(new Pair<>(fuzzyRowKey, mask)); + FuzzyRowFilter filter = new FuzzyRowFilter(fuzzyList); + + Scan scan = new Scan(); + scan.setFilter(filter); + + ResultScanner scanner = ht.getScanner(scan); + List actualRowsList = new ArrayList<>(); + for (Result result : scanner) { + byte[] row = result.getRow(); + actualRowsList.add(row); + } + + assertEquals(2, actualRowsList.size()); + } + + @Test + public void testReversedScanBackwardsSeek() throws IOException { + LinkedList> fuzzyList = new LinkedList<>(); + byte[] fuzzyRowKey = Bytes.toBytes("111433"); + byte[] mask = Bytes.toBytesBinary("\\xFF\\xFF\\xFF\\xFF\\x02\\x02"); + fuzzyList.add(new Pair<>(fuzzyRowKey, mask)); + FuzzyRowFilter filter = new FuzzyRowFilter(fuzzyList); + + Scan scan = new Scan(); + scan.setFilter(filter); + scan.setReversed(true); + + ResultScanner scanner = ht.getScanner(scan); + List actualRowsList = new ArrayList<>(); + for (Result result : scanner) { + byte[] row = result.getRow(); + actualRowsList.add(row); + } + + assertEquals(2, actualRowsList.size()); + } +} From 00eafcc118f0ab2399308883d154f2c3893faa36 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?D=C3=A1vid=20Paksy?= Date: Mon, 11 Nov 2024 09:05:51 +0100 Subject: [PATCH 2/9] HBASE-28634 Corrected reverse unit tests in TestFuzzyRowFilter Some expectations were not correct. Also extracted PrefixFilter.increaseLastNonMaxByte() method to PrivateCellUtil so that it can be reused in FuzzyRowFilter. --- .../hadoop/hbase/filter/FuzzyRowFilter.java | 14 +- .../hadoop/hbase/filter/PrefixFilter.java | 15 +- .../apache/hadoop/hbase/PrivateCellUtil.java | 13 ++ .../hbase/filter/TestFuzzyRowFilter.java | 177 +++++++++++------- 4 files changed, 139 insertions(+), 80 deletions(-) diff --git a/hbase-client/src/main/java/org/apache/hadoop/hbase/filter/FuzzyRowFilter.java b/hbase-client/src/main/java/org/apache/hadoop/hbase/filter/FuzzyRowFilter.java index f8c35b465284..5f60f7f138d5 100644 --- a/hbase-client/src/main/java/org/apache/hadoop/hbase/filter/FuzzyRowFilter.java +++ b/hbase-client/src/main/java/org/apache/hadoop/hbase/filter/FuzzyRowFilter.java @@ -574,8 +574,11 @@ public static Order orderFor(boolean reverse) { } /** - * @return greater byte array than given (row) which satisfies the fuzzy rule if it exists, null - * otherwise + * Find out the closes next byte array that satisfies fuzzy rule and is after the given one. In + * the reverse case it returns increased byte array to make sure that the proper row is selected + * next. + * @return byte array which is after the given row and which satisfies the fuzzy rule if it + * exists, null otherwise */ static byte[] getNextForFuzzyRule(boolean reverse, byte[] row, int offset, int length, byte[] fuzzyKeyBytes, byte[] fuzzyKeyMeta) { @@ -638,7 +641,12 @@ static byte[] getNextForFuzzyRule(boolean reverse, byte[] row, int offset, int l } } - return reverse ? result : trimTrailingZeroes(result, fuzzyKeyMeta, toInc); + byte[] trailingZerosTrimmed = trimTrailingZeroes(result, fuzzyKeyMeta, toInc); + if (reverse) { + return PrivateCellUtil.increaseLastNonMaxByte(trailingZerosTrimmed); + } else { + return trailingZerosTrimmed; + } } /** diff --git a/hbase-client/src/main/java/org/apache/hadoop/hbase/filter/PrefixFilter.java b/hbase-client/src/main/java/org/apache/hadoop/hbase/filter/PrefixFilter.java index 5ef48c62fc6a..f6bdf4358e26 100644 --- a/hbase-client/src/main/java/org/apache/hadoop/hbase/filter/PrefixFilter.java +++ b/hbase-client/src/main/java/org/apache/hadoop/hbase/filter/PrefixFilter.java @@ -18,7 +18,6 @@ package org.apache.hadoop.hbase.filter; import java.util.ArrayList; -import java.util.Arrays; import org.apache.hadoop.hbase.ByteBufferExtendedCell; import org.apache.hadoop.hbase.Cell; import org.apache.hadoop.hbase.PrivateCellUtil; @@ -57,7 +56,7 @@ private void createCellHints() { return; } // On reversed scan hint should be the prefix with last byte incremented - byte[] reversedHintBytes = increaseLastNonMaxByte(this.prefix); + byte[] reversedHintBytes = PrivateCellUtil.increaseLastNonMaxByte(this.prefix); this.reversedNextCellHint = PrivateCellUtil.createFirstOnRow(reversedHintBytes, 0, (short) reversedHintBytes.length); // On forward scan hint should be the prefix @@ -132,18 +131,6 @@ public Cell getNextCellHint(Cell cell) { } } - private byte[] increaseLastNonMaxByte(byte[] bytes) { - byte[] result = Arrays.copyOf(bytes, bytes.length); - for (int i = bytes.length - 1; i >= 0; i--) { - byte b = bytes[i]; - if (b < Byte.MAX_VALUE) { - result[i] = (byte) (b + 1); - break; - } - } - return result; - } - public static Filter createFilterFromArguments(ArrayList filterArguments) { Preconditions.checkArgument(filterArguments.size() == 1, "Expected 1 but got: %s", filterArguments.size()); diff --git a/hbase-common/src/main/java/org/apache/hadoop/hbase/PrivateCellUtil.java b/hbase-common/src/main/java/org/apache/hadoop/hbase/PrivateCellUtil.java index 08160145455c..33a821d75db4 100644 --- a/hbase-common/src/main/java/org/apache/hadoop/hbase/PrivateCellUtil.java +++ b/hbase-common/src/main/java/org/apache/hadoop/hbase/PrivateCellUtil.java @@ -27,6 +27,7 @@ import java.math.BigDecimal; import java.nio.ByteBuffer; import java.util.ArrayList; +import java.util.Arrays; import java.util.Iterator; import java.util.List; import java.util.Map.Entry; @@ -3095,4 +3096,16 @@ public static long getSequenceId(Cell c) { return HConstants.NO_SEQNUM; } } + + public static byte[] increaseLastNonMaxByte(byte[] bytes) { + byte[] result = Arrays.copyOf(bytes, bytes.length); + for (int i = bytes.length - 1; i >= 0; i--) { + byte b = bytes[i]; + if (b < Byte.MAX_VALUE) { + result[i] = (byte) (b + 1); + break; + } + } + return result; + } } diff --git a/hbase-server/src/test/java/org/apache/hadoop/hbase/filter/TestFuzzyRowFilter.java b/hbase-server/src/test/java/org/apache/hadoop/hbase/filter/TestFuzzyRowFilter.java index 8079f3b54cda..71a643666782 100644 --- a/hbase-server/src/test/java/org/apache/hadoop/hbase/filter/TestFuzzyRowFilter.java +++ b/hbase-server/src/test/java/org/apache/hadoop/hbase/filter/TestFuzzyRowFilter.java @@ -221,94 +221,145 @@ public void testGetNextForFuzzyRuleForward() { @Test public void testGetNextForFuzzyRuleReverse() { + // ?12 + // 12101 + // 112 -> 113 assertNext(true, new byte[] { 0, 1, 2 }, // fuzzy row new byte[] { 0, -1, -1 }, // mask new byte[] { 1, 2, 1, 0, 1 }, // current - // TODO: should be {1, 1, 3} ? - new byte[] { 1, 1, 2, (byte) 0xFF, (byte) 0xFF }); // expected next + new byte[] { 1, 1, 3 }); // expected next + // ?1?2? + // 12131 + // 11(0xFF)2 -> 11(0xFF)3 assertNext(true, new byte[] { 0, 1, 0, 2, 0 }, // fuzzy row new byte[] { 0, -1, 0, -1, 0 }, // mask new byte[] { 1, 2, 1, 3, 1 }, // current - // TODO: should be {1, 1, 1, 3} ? - new byte[] { 1, 1, 0, 2, 0 }); // expected next + new byte[] { 1, 1, (byte) 255, 3 }); // expected next - assertNext(true, new byte[] { 1, 0, 1 }, new byte[] { -1, 0, -1 }, - new byte[] { 1, (byte) 128, 2, 0, 1 }, - // TODO: should be {1, (byte) 128, 2} ? - new byte[] { 1, (byte) 128, 1, (byte) 0xFF, (byte) 0xFF }); + // 1?1 + // 1(128)201 + // 1(128)1 -> 1(128)2 + assertNext(true, new byte[] { 1, 0, 1 }, // fuzzy row + new byte[] { -1, 0, -1 }, // mask + new byte[] { 1, (byte) 128, 2, 0, 1 }, // current + new byte[] { 1, (byte) 128, 2 }); // expected next - assertNext(true, new byte[] { 0, 1, 0, 1 }, new byte[] { 0, -1, 0, -1 }, - new byte[] { 5, 1, 0, 2, 1 }, - // TODO: should be {5, 1, 0, 2} ? - new byte[] { 5, 1, 0, 1, (byte) 0xFF }); + // ?1?1 + // 51021 + // 5101 -> 5102 + assertNext(true, new byte[] { 0, 1, 0, 1 }, // fuzzy row + new byte[] { 0, -1, 0, -1 }, // mask + new byte[] { 5, 1, 0, 2, 1 }, // current + new byte[] { 5, 1, 0, 2 }); // expected next + // ?1?? + // 51(255)1 + // 51(255)0 -> 51(255)1 assertNext(true, new byte[] { 0, 1, 0, 0 }, // fuzzy row new byte[] { 0, -1, 0, 0 }, // mask new byte[] { 5, 1, (byte) 255, 1 }, // current - new byte[] { 5, 1, (byte) 255, 0 }); // expected next + new byte[] { 5, 1, (byte) 255, 1 }); // expected next + // ?1?1 + // 5101 + // 41(255)1 -> 41(255)2 assertNext(true, new byte[] { 0, 1, 0, 1 }, // fuzzy row new byte[] { 0, -1, 0, -1 }, // mask new byte[] { 5, 1, 0, 1 }, // current - new byte[] { 4, 1, (byte) 255, 1 }); // expected next + new byte[] { 4, 1, (byte) 255, 2 }); // expected next + // ?1?1 + // 51(255)0 + // 51(254)1 -> 51(254)2 assertNext(true, new byte[] { 0, 1, 0, 1 }, // fuzzy row new byte[] { 0, -1, 0, -1 }, // mask new byte[] { 5, 1, (byte) 255, 0 }, // current - new byte[] { 5, 1, (byte) 254, 1 }); // expected next - - assertNext(true, new byte[] { 1, 1, 0, 0 }, new byte[] { -1, -1, 0, 0 }, - new byte[] { 2, 1, 3, 2 }, - // TODO: should be {1, 0} ? - new byte[] { 1, 1, 0, 0 }); - + new byte[] { 5, 1, (byte) 254, 2 }); // expected next + + // 11?? + // 2132 + // 11(0xFF)(0xFF) -> 12 + assertNext(true, new byte[] { 1, 1, 0, 0 }, // fuzzy row + new byte[] { -1, -1, 0, 0 }, // mask + new byte[] { 2, 1, 3, 2 }, // current + new byte[] { 1, 2 }); // expected next + + // 1?1 + // 23111 + // 1(0xFF)1 -> 1(0xFF)2 assertNext(true, new byte[] { 1, 0, 1 }, // fuzzy row new byte[] { -1, 0, -1 }, // mask new byte[] { 2, 3, 1, 1, 1 }, // row to check - // TODO: should be {1, (byte) 0xFF, 2} ? - new byte[] { 1, 0, 1, (byte) 0xFF, (byte) 0xFF }); - - assertNext(true, new byte[] { 1, 1, 0, 3 }, new byte[] { -1, -1, 0, -1 }, - new byte[] { 1, (byte) 245, 1, 3, 0 }, - // TODO: should be {1, 1, (byte) 255, 4} ? - new byte[] { 1, 1, 0, 3, (byte) 0xFF }); - - assertNext(true, new byte[] { 1, 2, 0, 3 }, new byte[] { -1, -1, 0, -1 }, - new byte[] { 1, 3, 1, 3, 0 }, - // TODO: should be 1, 2, (byte) 255, 4 ? - new byte[] { 1, 2, 0, 3, (byte) 0xFF }); - - assertNext(true, new byte[] { 1, 2, 0, 3 }, new byte[] { -1, -1, 0, -1 }, - new byte[] { 2, 1, 1, 1, 0 }, - // TODO: should be {1, 2, (byte) 255, 4} ? - new byte[] { 1, 2, 0, 3, (byte) 0xFF }); - - assertNext(true, - // TODO: should be null? - new byte[] { 1, 0, 1 }, new byte[] { -1, 0, -1 }, new byte[] { 1, (byte) 128, 2 }, - new byte[] { 1, (byte) 128, 1 }); - - assertNext(true, - // TODO: should be null? - new byte[] { 0, 1, 0, 1 }, new byte[] { 0, -1, 0, -1 }, new byte[] { 5, 1, 0, 2 }, - new byte[] { 5, 1, 0, 1 }); - - assertNext(true, - // TODO: should be null? - new byte[] { 5, 1, 1, 0 }, new byte[] { -1, -1, 0, 0 }, new byte[] { 5, 1, (byte) 0xFF, 1 }, - new byte[] { 5, 1, (byte) 0xFF, 0 }); - - assertNext(true, - // TODO: should be null? - new byte[] { 1, 1, 1, 1 }, new byte[] { -1, -1, 0, 0 }, new byte[] { 1, 1, 2, 2 }, - new byte[] { 1, 1, 2, 1 }); - - assertNext(true, - // TODO: should be null? - new byte[] { 1, 1, 1, 1 }, new byte[] { 0, 0, 0, 0 }, new byte[] { 1, 1, 2, 3 }, - new byte[] { 1, 1, 2, 2 }); + new byte[] { 1, (byte) 255, 2 }); // expected next + + // 11?3 + // 1(245)130 + // 11(0xFF)3 -> 11(0xFF)4 + assertNext(true, new byte[] { 1, 1, 0, 3 }, // fuzzy row + new byte[] { -1, -1, 0, -1 }, // mask + new byte[] { 1, (byte) 245, 1, 3, 0 }, // row to check + new byte[] { 1, 1, (byte) 255, 4 }); // expected next + + // 12?3 + // 13130 + // 12(0xFF)3 -> 12(0xFF)4 + assertNext(true, new byte[] { 1, 2, 0, 3 }, // fuzzy row + new byte[] { -1, -1, 0, -1 }, // mask + new byte[] { 1, 3, 1, 3, 0 }, // row to check + new byte[] { 1, 2, (byte) 255, 4 }); // expected next + + // 12?3 + // 21110 + // 12(0xFF)3 -> 12(0xFF)4 + assertNext(true, new byte[] { 1, 2, 0, 3 }, // fuzzy row + new byte[] { -1, -1, 0, -1 }, // mask + new byte[] { 2, 1, 1, 1, 0 }, // row to check + new byte[] { 1, 2, (byte) 255, 4 }); // expected next + + // 1?1 + // 1(128)2 + // 1(128)1 -> 1(128)2 + assertNext(true, new byte[] { 1, 0, 1 }, // fuzzy row + new byte[] { -1, 0, -1 }, // mask + new byte[] { 1, (byte) 128, 2 }, // row to check + new byte[] { 1, (byte) 128, 2 }); // expected next + // ?1?1 + // 5102 + // 5101 -> 5102 or null? + assertNext(true, new byte[] { 0, 1, 0, 1 }, // fuzzy row + new byte[] { 0, -1, 0, -1 }, // mask + new byte[] { 5, 1, 0, 2 }, // row to check + new byte[] { 5, 1, 0, 2 }); // expected next + + // 51?? + // 51(0xFF)1 + // 51(0xFF)0 -> 51(0xFF)1 + assertNext(true, new byte[] { 5, 1, 1, 0 }, // fuzzy row + new byte[] { -1, -1, 0, 0 }, // mask + new byte[] { 5, 1, (byte) 0xFF, 1 }, // row to check + new byte[] { 5, 1, (byte) 0xFF, 1 }); // expected next + + // 11?? + // 1122 + // 1121 -> 1122 + assertNext(true, new byte[] { 1, 1, 1, 1 }, // fuzzy row + new byte[] { -1, -1, 0, 0 }, // mask + new byte[] { 1, 1, 2, 2 }, // row to check + new byte[] { 1, 1, 2, 2 }); // expected next + + // ???? + // 1123 + // 1122 -> 1123 + assertNext(true, new byte[] { 1, 1, 1, 1 }, // fuzzy row + new byte[] { 0, 0, 0, 0 }, // mask + new byte[] { 1, 1, 2, 3 }, // row to check + new byte[] { 1, 1, 2, 3 }); // expected next + + // 12?3 + // 11130 + // no before cell than current which satisfies the fuzzy row -> null Assert.assertNull(FuzzyRowFilter.getNextForFuzzyRule(true, new byte[] { 1, 1, 1, 3, 0 }, new byte[] { 1, 2, 0, 3 }, new byte[] { -1, -1, 0, -1 })); } From 019f6d8d4329f7188b33f1ccf4e913c34fca62c4 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?D=C3=A1vid=20Paksy?= Date: Tue, 12 Nov 2024 13:57:01 +0100 Subject: [PATCH 3/9] HBASE-28634 Fixed hint returned by FuzzyRowFilter.getNextForFuzzyRule() when the hint should contain a 0xFF As a start, when we have a reverse scan, we not only need trailing 0xff's instead of trailing 0x00's, but it seems we always need 0xff's instead of the 0x00's. --- .../org/apache/hadoop/hbase/filter/FuzzyRowFilter.java | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/hbase-client/src/main/java/org/apache/hadoop/hbase/filter/FuzzyRowFilter.java b/hbase-client/src/main/java/org/apache/hadoop/hbase/filter/FuzzyRowFilter.java index 5f60f7f138d5..ec9014e6f9d6 100644 --- a/hbase-client/src/main/java/org/apache/hadoop/hbase/filter/FuzzyRowFilter.java +++ b/hbase-client/src/main/java/org/apache/hadoop/hbase/filter/FuzzyRowFilter.java @@ -592,10 +592,12 @@ static byte[] getNextForFuzzyRule(boolean reverse, byte[] row, int offset, int l // values than otherwise. byte[] result = Arrays.copyOf(fuzzyKeyBytes, length > fuzzyKeyBytes.length ? length : fuzzyKeyBytes.length); - if (reverse && length > fuzzyKeyBytes.length) { - // we need trailing 0xff's instead of trailing 0x00's - for (int i = fuzzyKeyBytes.length; i < result.length; i++) { - result[i] = (byte) 0xFF; + if (reverse) { + // we need 0xff's instead of 0x00's + for (int i = 0; i < result.length; i++) { + if (result[i] == 0) { + result[i] = (byte) 0xFF; + } } } int toInc = -1; From 85fd4318422152f5c80819579fda4b67c75842ac Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?D=C3=A1vid=20Paksy?= Date: Tue, 12 Nov 2024 14:04:03 +0100 Subject: [PATCH 4/9] HBASE-28634 Removed unneeded comments --- .../hadoop/hbase/filter/FuzzyRowFilter.java | 2 + .../hbase/filter/TestFuzzyRowFilter.java | 56 +------------------ 2 files changed, 5 insertions(+), 53 deletions(-) diff --git a/hbase-client/src/main/java/org/apache/hadoop/hbase/filter/FuzzyRowFilter.java b/hbase-client/src/main/java/org/apache/hadoop/hbase/filter/FuzzyRowFilter.java index ec9014e6f9d6..7fac452bd28b 100644 --- a/hbase-client/src/main/java/org/apache/hadoop/hbase/filter/FuzzyRowFilter.java +++ b/hbase-client/src/main/java/org/apache/hadoop/hbase/filter/FuzzyRowFilter.java @@ -645,6 +645,8 @@ static byte[] getNextForFuzzyRule(boolean reverse, byte[] row, int offset, int l byte[] trailingZerosTrimmed = trimTrailingZeroes(result, fuzzyKeyMeta, toInc); if (reverse) { + // In the reverse case we increase last non-max byte to make sure that the proper row is + // selected next. return PrivateCellUtil.increaseLastNonMaxByte(trailingZerosTrimmed); } else { return trailingZerosTrimmed; diff --git a/hbase-server/src/test/java/org/apache/hadoop/hbase/filter/TestFuzzyRowFilter.java b/hbase-server/src/test/java/org/apache/hadoop/hbase/filter/TestFuzzyRowFilter.java index 71a643666782..eda9b8e07797 100644 --- a/hbase-server/src/test/java/org/apache/hadoop/hbase/filter/TestFuzzyRowFilter.java +++ b/hbase-server/src/test/java/org/apache/hadoop/hbase/filter/TestFuzzyRowFilter.java @@ -221,144 +221,94 @@ public void testGetNextForFuzzyRuleForward() { @Test public void testGetNextForFuzzyRuleReverse() { - // ?12 - // 12101 - // 112 -> 113 + // In the reverse case the last non-max byte should be increased to make sure that the proper + // row is selected next. + // e.g. 112 -> 113 or 11(0xFF)2 -> 11(0xFF)3, 5101 -> 5102 etc. assertNext(true, new byte[] { 0, 1, 2 }, // fuzzy row new byte[] { 0, -1, -1 }, // mask new byte[] { 1, 2, 1, 0, 1 }, // current new byte[] { 1, 1, 3 }); // expected next - // ?1?2? - // 12131 - // 11(0xFF)2 -> 11(0xFF)3 assertNext(true, new byte[] { 0, 1, 0, 2, 0 }, // fuzzy row new byte[] { 0, -1, 0, -1, 0 }, // mask new byte[] { 1, 2, 1, 3, 1 }, // current new byte[] { 1, 1, (byte) 255, 3 }); // expected next - // 1?1 - // 1(128)201 - // 1(128)1 -> 1(128)2 assertNext(true, new byte[] { 1, 0, 1 }, // fuzzy row new byte[] { -1, 0, -1 }, // mask new byte[] { 1, (byte) 128, 2, 0, 1 }, // current new byte[] { 1, (byte) 128, 2 }); // expected next - // ?1?1 - // 51021 - // 5101 -> 5102 assertNext(true, new byte[] { 0, 1, 0, 1 }, // fuzzy row new byte[] { 0, -1, 0, -1 }, // mask new byte[] { 5, 1, 0, 2, 1 }, // current new byte[] { 5, 1, 0, 2 }); // expected next - // ?1?? - // 51(255)1 - // 51(255)0 -> 51(255)1 assertNext(true, new byte[] { 0, 1, 0, 0 }, // fuzzy row new byte[] { 0, -1, 0, 0 }, // mask new byte[] { 5, 1, (byte) 255, 1 }, // current new byte[] { 5, 1, (byte) 255, 1 }); // expected next - // ?1?1 - // 5101 - // 41(255)1 -> 41(255)2 assertNext(true, new byte[] { 0, 1, 0, 1 }, // fuzzy row new byte[] { 0, -1, 0, -1 }, // mask new byte[] { 5, 1, 0, 1 }, // current new byte[] { 4, 1, (byte) 255, 2 }); // expected next - // ?1?1 - // 51(255)0 - // 51(254)1 -> 51(254)2 assertNext(true, new byte[] { 0, 1, 0, 1 }, // fuzzy row new byte[] { 0, -1, 0, -1 }, // mask new byte[] { 5, 1, (byte) 255, 0 }, // current new byte[] { 5, 1, (byte) 254, 2 }); // expected next - // 11?? - // 2132 - // 11(0xFF)(0xFF) -> 12 assertNext(true, new byte[] { 1, 1, 0, 0 }, // fuzzy row new byte[] { -1, -1, 0, 0 }, // mask new byte[] { 2, 1, 3, 2 }, // current new byte[] { 1, 2 }); // expected next - // 1?1 - // 23111 - // 1(0xFF)1 -> 1(0xFF)2 assertNext(true, new byte[] { 1, 0, 1 }, // fuzzy row new byte[] { -1, 0, -1 }, // mask new byte[] { 2, 3, 1, 1, 1 }, // row to check new byte[] { 1, (byte) 255, 2 }); // expected next - // 11?3 - // 1(245)130 - // 11(0xFF)3 -> 11(0xFF)4 assertNext(true, new byte[] { 1, 1, 0, 3 }, // fuzzy row new byte[] { -1, -1, 0, -1 }, // mask new byte[] { 1, (byte) 245, 1, 3, 0 }, // row to check new byte[] { 1, 1, (byte) 255, 4 }); // expected next - // 12?3 - // 13130 - // 12(0xFF)3 -> 12(0xFF)4 assertNext(true, new byte[] { 1, 2, 0, 3 }, // fuzzy row new byte[] { -1, -1, 0, -1 }, // mask new byte[] { 1, 3, 1, 3, 0 }, // row to check new byte[] { 1, 2, (byte) 255, 4 }); // expected next - // 12?3 - // 21110 - // 12(0xFF)3 -> 12(0xFF)4 assertNext(true, new byte[] { 1, 2, 0, 3 }, // fuzzy row new byte[] { -1, -1, 0, -1 }, // mask new byte[] { 2, 1, 1, 1, 0 }, // row to check new byte[] { 1, 2, (byte) 255, 4 }); // expected next - // 1?1 - // 1(128)2 - // 1(128)1 -> 1(128)2 assertNext(true, new byte[] { 1, 0, 1 }, // fuzzy row new byte[] { -1, 0, -1 }, // mask new byte[] { 1, (byte) 128, 2 }, // row to check new byte[] { 1, (byte) 128, 2 }); // expected next - // ?1?1 - // 5102 - // 5101 -> 5102 or null? assertNext(true, new byte[] { 0, 1, 0, 1 }, // fuzzy row new byte[] { 0, -1, 0, -1 }, // mask new byte[] { 5, 1, 0, 2 }, // row to check new byte[] { 5, 1, 0, 2 }); // expected next - // 51?? - // 51(0xFF)1 - // 51(0xFF)0 -> 51(0xFF)1 assertNext(true, new byte[] { 5, 1, 1, 0 }, // fuzzy row new byte[] { -1, -1, 0, 0 }, // mask new byte[] { 5, 1, (byte) 0xFF, 1 }, // row to check new byte[] { 5, 1, (byte) 0xFF, 1 }); // expected next - // 11?? - // 1122 - // 1121 -> 1122 assertNext(true, new byte[] { 1, 1, 1, 1 }, // fuzzy row new byte[] { -1, -1, 0, 0 }, // mask new byte[] { 1, 1, 2, 2 }, // row to check new byte[] { 1, 1, 2, 2 }); // expected next - // ???? - // 1123 - // 1122 -> 1123 assertNext(true, new byte[] { 1, 1, 1, 1 }, // fuzzy row new byte[] { 0, 0, 0, 0 }, // mask new byte[] { 1, 1, 2, 3 }, // row to check new byte[] { 1, 1, 2, 3 }); // expected next - // 12?3 - // 11130 // no before cell than current which satisfies the fuzzy row -> null Assert.assertNull(FuzzyRowFilter.getNextForFuzzyRule(true, new byte[] { 1, 1, 1, 3, 0 }, new byte[] { 1, 2, 0, 3 }, new byte[] { -1, -1, 0, -1 })); From 719eb63c541e712475f3b1b061aca3f1a84b5ab0 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?D=C3=A1vid=20Paksy?= Date: Tue, 12 Nov 2024 14:10:33 +0100 Subject: [PATCH 5/9] HBASE-28634 Added more separating comments to TestFuzzyRowFilter.testGetNextForFuzzyRuleForward --- .../hbase/filter/TestFuzzyRowFilter.java | 54 ++++++++++++------- 1 file changed, 36 insertions(+), 18 deletions(-) diff --git a/hbase-server/src/test/java/org/apache/hadoop/hbase/filter/TestFuzzyRowFilter.java b/hbase-server/src/test/java/org/apache/hadoop/hbase/filter/TestFuzzyRowFilter.java index eda9b8e07797..989bb5d3bc50 100644 --- a/hbase-server/src/test/java/org/apache/hadoop/hbase/filter/TestFuzzyRowFilter.java +++ b/hbase-server/src/test/java/org/apache/hadoop/hbase/filter/TestFuzzyRowFilter.java @@ -164,14 +164,20 @@ public void testGetNextForFuzzyRuleForward() { new byte[] { 1, 0, 2, 0, 1 }, // current new byte[] { 1, 1, 0, 2 }); // expected next - assertNext(false, new byte[] { 1, 0, 1 }, new byte[] { -1, 0, -1 }, - new byte[] { 1, (byte) 128, 2, 0, 1 }, new byte[] { 1, (byte) 129, 1 }); + assertNext(false, new byte[] { 1, 0, 1 }, // fuzzy row + new byte[] { -1, 0, -1 }, // mask + new byte[] { 1, (byte) 128, 2, 0, 1 }, // current + new byte[] { 1, (byte) 129, 1 }); // expected next - assertNext(false, new byte[] { 0, 1, 0, 1 }, new byte[] { 0, -1, 0, -1 }, - new byte[] { 5, 1, 0, 1 }, new byte[] { 5, 1, 1, 1 }); + assertNext(false, new byte[] { 0, 1, 0, 1 }, // fuzzy row + new byte[] { 0, -1, 0, -1 }, // mask + new byte[] { 5, 1, 0, 1 }, // current + new byte[] { 5, 1, 1, 1 }); // expected next - assertNext(false, new byte[] { 0, 1, 0, 1 }, new byte[] { 0, -1, 0, -1 }, - new byte[] { 5, 1, 0, 1, 1 }, new byte[] { 5, 1, 0, 1, 2 }); + assertNext(false, new byte[] { 0, 1, 0, 1 }, // fuzzy row + new byte[] { 0, -1, 0, -1 }, // mask + new byte[] { 5, 1, 0, 1, 1 }, // current + new byte[] { 5, 1, 0, 1, 2 }); // expected next assertNext(false, new byte[] { 0, 1, 0, 0 }, // fuzzy row new byte[] { 0, -1, 0, 0 }, // mask @@ -188,23 +194,35 @@ public void testGetNextForFuzzyRuleForward() { new byte[] { 5, 1, (byte) 255, 0 }, // current new byte[] { 5, 1, (byte) 255, 1 }); // expected next - assertNext(false, new byte[] { 5, 1, 1, 0 }, new byte[] { -1, -1, 0, 0 }, - new byte[] { 5, 1, (byte) 255, 1 }, new byte[] { 5, 1, (byte) 255, 2 }); + assertNext(false, new byte[] { 5, 1, 1, 0 }, // fuzzy row + new byte[] { -1, -1, 0, 0 }, // mask + new byte[] { 5, 1, (byte) 255, 1 }, // current + new byte[] { 5, 1, (byte) 255, 2 }); // expected next - assertNext(false, new byte[] { 1, 1, 1, 1 }, new byte[] { -1, -1, 0, 0 }, - new byte[] { 1, 1, 2, 2 }, new byte[] { 1, 1, 2, 3 }); + assertNext(false, new byte[] { 1, 1, 1, 1 }, // fuzzy row + new byte[] { -1, -1, 0, 0 }, // mask + new byte[] { 1, 1, 2, 2 }, // current + new byte[] { 1, 1, 2, 3 }); // expected next - assertNext(false, new byte[] { 1, 1, 1, 1 }, new byte[] { -1, -1, 0, 0 }, - new byte[] { 1, 1, 3, 2 }, new byte[] { 1, 1, 3, 3 }); + assertNext(false, new byte[] { 1, 1, 1, 1 }, // fuzzy row + new byte[] { -1, -1, 0, 0 }, // mask + new byte[] { 1, 1, 3, 2 }, // current + new byte[] { 1, 1, 3, 3 }); // expected next - assertNext(false, new byte[] { 1, 1, 1, 1 }, new byte[] { 0, 0, 0, 0 }, - new byte[] { 1, 1, 2, 3 }, new byte[] { 1, 1, 2, 4 }); + assertNext(false, new byte[] { 1, 1, 1, 1 }, // fuzzy row + new byte[] { 0, 0, 0, 0 }, // mask + new byte[] { 1, 1, 2, 3 }, // current + new byte[] { 1, 1, 2, 4 }); // expected next - assertNext(false, new byte[] { 1, 1, 1, 1 }, new byte[] { 0, 0, 0, 0 }, - new byte[] { 1, 1, 3, 2 }, new byte[] { 1, 1, 3, 3 }); + assertNext(false, new byte[] { 1, 1, 1, 1 }, // fuzzy row + new byte[] { 0, 0, 0, 0 }, // mask + new byte[] { 1, 1, 3, 2 }, // current + new byte[] { 1, 1, 3, 3 }); // expected next - assertNext(false, new byte[] { 1, 1, 0, 0 }, new byte[] { -1, -1, 0, 0 }, - new byte[] { 0, 1, 3, 2 }, new byte[] { 1, 1 }); + assertNext(false, new byte[] { 1, 1, 0, 0 }, // fuzzy row + new byte[] { -1, -1, 0, 0 }, // mask + new byte[] { 0, 1, 3, 2 }, // current + new byte[] { 1, 1 }); // expected next // No next for this one Assert.assertNull(FuzzyRowFilter.getNextForFuzzyRule(new byte[] { 2, 3, 1, 1, 1 }, // row to From 5fa2074142a700477dec6b351da78328e0e2c02c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?D=C3=A1vid=20Paksy?= Date: Tue, 12 Nov 2024 14:39:47 +0100 Subject: [PATCH 6/9] HBASE-28634 Move reproduction test into TestFuzzyRowFilterEndToEnd to not start another minicluster needlessly. --- .../filter/TestFuzzyRowFilterEndToEnd.java | 64 ++++++++ .../TestFuzzyRowFilterWithMiniCluster.java | 141 ------------------ 2 files changed, 64 insertions(+), 141 deletions(-) delete mode 100644 hbase-server/src/test/java/org/apache/hadoop/hbase/filter/TestFuzzyRowFilterWithMiniCluster.java diff --git a/hbase-server/src/test/java/org/apache/hadoop/hbase/filter/TestFuzzyRowFilterEndToEnd.java b/hbase-server/src/test/java/org/apache/hadoop/hbase/filter/TestFuzzyRowFilterEndToEnd.java index 7ab741b70343..8337009fbadc 100644 --- a/hbase-server/src/test/java/org/apache/hadoop/hbase/filter/TestFuzzyRowFilterEndToEnd.java +++ b/hbase-server/src/test/java/org/apache/hadoop/hbase/filter/TestFuzzyRowFilterEndToEnd.java @@ -24,6 +24,7 @@ import java.nio.ByteBuffer; import java.util.ArrayList; import java.util.Arrays; +import java.util.LinkedList; import java.util.List; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.hbase.Cell; @@ -337,4 +338,67 @@ public void testHBASE26967() throws IOException { TEST_UTIL.deleteTable(TableName.valueOf(name.getMethodName())); } + + @Test + public void testHBASE28634() throws IOException { + final String CF = "f"; + final String CQ = "name"; + + Table ht = TEST_UTIL.createTable(TableName.valueOf(name.getMethodName()), Bytes.toBytes(CF)); + + // Put data + List puts = Lists.newArrayList(); + puts.add(new Put(Bytes.toBytes("111311")).addColumn(Bytes.toBytes(CF), Bytes.toBytes(CQ), + Bytes.toBytes("a1"))); + puts.add(new Put(Bytes.toBytes("111444")).addColumn(Bytes.toBytes(CF), Bytes.toBytes(CQ), + Bytes.toBytes("a2"))); + puts.add(new Put(Bytes.toBytes("111511")).addColumn(Bytes.toBytes(CF), Bytes.toBytes(CQ), + Bytes.toBytes("a3"))); + puts.add(new Put(Bytes.toBytes("111611")).addColumn(Bytes.toBytes(CF), Bytes.toBytes(CQ), + Bytes.toBytes("a4"))); + puts.add(new Put(Bytes.toBytes("111446")).addColumn(Bytes.toBytes(CF), Bytes.toBytes(CQ), + Bytes.toBytes("a5"))); + puts.add(new Put(Bytes.toBytes("111777")).addColumn(Bytes.toBytes(CF), Bytes.toBytes(CQ), + Bytes.toBytes("a6"))); + puts.add(new Put(Bytes.toBytes("111777")).addColumn(Bytes.toBytes(CF), Bytes.toBytes(CQ), + Bytes.toBytes("a"))); + ht.put(puts); + + TEST_UTIL.flush(); + + // Forward scan + LinkedList> fuzzyList = new LinkedList<>(); + byte[] fuzzyRowKey = Bytes.toBytes("111433"); + byte[] mask = Bytes.toBytesBinary("\\xFF\\xFF\\xFF\\xFF\\x02\\x02"); + fuzzyList.add(new Pair<>(fuzzyRowKey, mask)); + FuzzyRowFilter fuzzyRowFilter = new FuzzyRowFilter(fuzzyList); + + Scan scan = new Scan(); + scan.setFilter(fuzzyRowFilter); + + ResultScanner scanner = ht.getScanner(scan); + List actualRowsList = new ArrayList<>(); + for (Result result : scanner) { + byte[] row = result.getRow(); + actualRowsList.add(row); + } + + assertEquals(2, actualRowsList.size()); + + // Reverse scan + scan = new Scan(); + scan.setFilter(fuzzyRowFilter); + scan.setReversed(true); + + scanner = ht.getScanner(scan); + actualRowsList = new ArrayList<>(); + for (Result result : scanner) { + byte[] row = result.getRow(); + actualRowsList.add(row); + } + + assertEquals(2, actualRowsList.size()); + + TEST_UTIL.deleteTable(TableName.valueOf(name.getMethodName())); + } } diff --git a/hbase-server/src/test/java/org/apache/hadoop/hbase/filter/TestFuzzyRowFilterWithMiniCluster.java b/hbase-server/src/test/java/org/apache/hadoop/hbase/filter/TestFuzzyRowFilterWithMiniCluster.java deleted file mode 100644 index 936039444ffb..000000000000 --- a/hbase-server/src/test/java/org/apache/hadoop/hbase/filter/TestFuzzyRowFilterWithMiniCluster.java +++ /dev/null @@ -1,141 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.hadoop.hbase.filter; - -import static org.junit.Assert.assertEquals; - -import java.io.IOException; -import java.util.ArrayList; -import java.util.LinkedList; -import java.util.List; -import org.apache.hadoop.conf.Configuration; -import org.apache.hadoop.hbase.HBaseClassTestRule; -import org.apache.hadoop.hbase.HBaseTestingUtil; -import org.apache.hadoop.hbase.HConstants; -import org.apache.hadoop.hbase.TableName; -import org.apache.hadoop.hbase.client.Durability; -import org.apache.hadoop.hbase.client.Put; -import org.apache.hadoop.hbase.client.Result; -import org.apache.hadoop.hbase.client.ResultScanner; -import org.apache.hadoop.hbase.client.Scan; -import org.apache.hadoop.hbase.client.Table; -import org.apache.hadoop.hbase.regionserver.ConstantSizeRegionSplitPolicy; -import org.apache.hadoop.hbase.testclassification.FilterTests; -import org.apache.hadoop.hbase.testclassification.MediumTests; -import org.apache.hadoop.hbase.util.Bytes; -import org.apache.hadoop.hbase.util.Pair; -import org.junit.AfterClass; -import org.junit.BeforeClass; -import org.junit.ClassRule; -import org.junit.Rule; -import org.junit.Test; -import org.junit.experimental.categories.Category; -import org.junit.rules.TestName; - -// TODO: Move this into TestFuzzyRowFilterEndToEnd?! -@Category({ FilterTests.class, MediumTests.class }) -public class TestFuzzyRowFilterWithMiniCluster { - - private final static HBaseTestingUtil TEST_UTIL = new HBaseTestingUtil(); - private static final String CF = "f"; - private static final String CQ = "name"; - private static final String TABLE = "abcd"; - private static Table ht; - - @ClassRule - public static final HBaseClassTestRule CLASS_RULE = - HBaseClassTestRule.forClass(TestFuzzyRowFilterWithMiniCluster.class); - - @Rule - public TestName name = new TestName(); - - private static final byte[][] ROWS_ONE = { Bytes.toBytes("111311"), Bytes.toBytes("111444"), - Bytes.toBytes("111511"), Bytes.toBytes("111611"), Bytes.toBytes("111446"), - Bytes.toBytes("111777"), Bytes.toBytes("111777"), }; - - @BeforeClass - public static void setUpBeforeClass() throws Exception { - Configuration conf = TEST_UTIL.getConfiguration(); - conf.setInt("hbase.client.scanner.caching", 1000); - conf.set(HConstants.HBASE_REGION_SPLIT_POLICY_KEY, - ConstantSizeRegionSplitPolicy.class.getName()); - // set no splits - conf.setLong(HConstants.HREGION_MAX_FILESIZE, (1024L) * 1024 * 1024 * 10); - - TEST_UTIL.startMiniCluster(); - - // load the mini cluster with a single table with 20 rows, with rowkeys of a single byte, 0-19. - ht = TEST_UTIL.createTable(TableName.valueOf(TABLE), Bytes.toBytes(CF), Integer.MAX_VALUE); - // Insert first half - for (byte[] ROW : ROWS_ONE) { - Put p = new Put(ROW); - p.setDurability(Durability.SKIP_WAL); - p.addColumn(Bytes.toBytes(CF), Bytes.toBytes(CQ), Bytes.toBytes("aaaaa")); - ht.put(p); - } - TEST_UTIL.flush(); - } - - @AfterClass - public static void tearDownAfterClass() throws Exception { - TEST_UTIL.shutdownMiniCluster(); - } - - @Test - public void testNormalScanForwardsSeek() throws IOException { - LinkedList> fuzzyList = new LinkedList<>(); - byte[] fuzzyRowKey = Bytes.toBytes("111433"); - byte[] mask = Bytes.toBytesBinary("\\xFF\\xFF\\xFF\\xFF\\x02\\x02"); - fuzzyList.add(new Pair<>(fuzzyRowKey, mask)); - FuzzyRowFilter filter = new FuzzyRowFilter(fuzzyList); - - Scan scan = new Scan(); - scan.setFilter(filter); - - ResultScanner scanner = ht.getScanner(scan); - List actualRowsList = new ArrayList<>(); - for (Result result : scanner) { - byte[] row = result.getRow(); - actualRowsList.add(row); - } - - assertEquals(2, actualRowsList.size()); - } - - @Test - public void testReversedScanBackwardsSeek() throws IOException { - LinkedList> fuzzyList = new LinkedList<>(); - byte[] fuzzyRowKey = Bytes.toBytes("111433"); - byte[] mask = Bytes.toBytesBinary("\\xFF\\xFF\\xFF\\xFF\\x02\\x02"); - fuzzyList.add(new Pair<>(fuzzyRowKey, mask)); - FuzzyRowFilter filter = new FuzzyRowFilter(fuzzyList); - - Scan scan = new Scan(); - scan.setFilter(filter); - scan.setReversed(true); - - ResultScanner scanner = ht.getScanner(scan); - List actualRowsList = new ArrayList<>(); - for (Result result : scanner) { - byte[] row = result.getRow(); - actualRowsList.add(row); - } - - assertEquals(2, actualRowsList.size()); - } -} From 188f4a125bef88f7ebc9b3b368ff78e751e6e350 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?D=C3=A1vid=20Paksy?= Date: Tue, 12 Nov 2024 14:50:41 +0100 Subject: [PATCH 7/9] HBASE-28634 Small improvements to FuzzyRowFilter --- .../apache/hadoop/hbase/filter/FuzzyRowFilter.java | 11 +++++------ 1 file changed, 5 insertions(+), 6 deletions(-) diff --git a/hbase-client/src/main/java/org/apache/hadoop/hbase/filter/FuzzyRowFilter.java b/hbase-client/src/main/java/org/apache/hadoop/hbase/filter/FuzzyRowFilter.java index 7fac452bd28b..2626dcfa6d8e 100644 --- a/hbase-client/src/main/java/org/apache/hadoop/hbase/filter/FuzzyRowFilter.java +++ b/hbase-client/src/main/java/org/apache/hadoop/hbase/filter/FuzzyRowFilter.java @@ -67,7 +67,7 @@ @InterfaceAudience.Public public class FuzzyRowFilter extends FilterBase implements HintingFilter { private static final boolean UNSAFE_UNALIGNED = HBasePlatformDependent.unaligned(); - private List> fuzzyKeysData; + private final List> fuzzyKeysData; // Used to record whether we want to skip the current row. // Usually we should use filterRowKey here but in the current scan implementation, if filterRowKey // returns true, we will just skip to next row, instead of calling getNextCellHint to determine @@ -89,7 +89,7 @@ public class FuzzyRowFilter extends FilterBase implements HintingFilter { /** * Row tracker (keeps all next rows after SEEK_NEXT_USING_HINT was returned) */ - private RowTracker tracker; + private final RowTracker tracker; public FuzzyRowFilter(List> fuzzyKeysData) { List> fuzzyKeyDataCopy = new ArrayList<>(fuzzyKeysData.size()); @@ -200,7 +200,7 @@ public boolean filterRow() throws IOException { @Override public ReturnCode filterCell(final Cell c) { - final int startIndex = lastFoundIndex >= 0 ? lastFoundIndex : 0; + final int startIndex = Math.max(lastFoundIndex, 0); final int size = fuzzyKeysData.size(); for (int i = startIndex; i < size + startIndex; i++) { final int index = i % size; @@ -226,7 +226,7 @@ public ReturnCode filterCell(final Cell c) { @Override public Cell getNextCellHint(Cell currentCell) { boolean result = tracker.updateTracker(currentCell); - if (result == false) { + if (!result) { done = true; return null; } @@ -590,8 +590,7 @@ static byte[] getNextForFuzzyRule(boolean reverse, byte[] row, int offset, int l // It is easier to perform this by using fuzzyKeyBytes copy and setting "non-fixed" position // values than otherwise. - byte[] result = - Arrays.copyOf(fuzzyKeyBytes, length > fuzzyKeyBytes.length ? length : fuzzyKeyBytes.length); + byte[] result = Arrays.copyOf(fuzzyKeyBytes, Math.max(length, fuzzyKeyBytes.length)); if (reverse) { // we need 0xff's instead of 0x00's for (int i = 0; i < result.length; i++) { From 348b364c207161d55e2c5fcda8c95e72f411b5bc Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?D=C3=A1vid=20Paksy?= Date: Wed, 13 Nov 2024 09:37:02 +0100 Subject: [PATCH 8/9] HBASE-28634 Small comment improvement in FuzzyRowFilter --- .../java/org/apache/hadoop/hbase/filter/FuzzyRowFilter.java | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/hbase-client/src/main/java/org/apache/hadoop/hbase/filter/FuzzyRowFilter.java b/hbase-client/src/main/java/org/apache/hadoop/hbase/filter/FuzzyRowFilter.java index 2626dcfa6d8e..006ddc8f1c30 100644 --- a/hbase-client/src/main/java/org/apache/hadoop/hbase/filter/FuzzyRowFilter.java +++ b/hbase-client/src/main/java/org/apache/hadoop/hbase/filter/FuzzyRowFilter.java @@ -582,8 +582,8 @@ public static Order orderFor(boolean reverse) { */ static byte[] getNextForFuzzyRule(boolean reverse, byte[] row, int offset, int length, byte[] fuzzyKeyBytes, byte[] fuzzyKeyMeta) { - // To find out the next "smallest" byte array that satisfies fuzzy rule and "greater" than - // the given one we do the following: + // To find out the closest next byte array that satisfies fuzzy rule and is after the given one + // we do the following: // 1. setting values on all "fixed" positions to the values from fuzzyKeyBytes // 2. if during the first step given row did not increase, then we increase the value at // the first "non-fixed" position (where it is not maximum already) From 9cacc58e750fc41343dcf4523ed5a61bffed306a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?D=C3=A1vid=20Paksy?= Date: Wed, 20 Nov 2024 09:00:06 +0100 Subject: [PATCH 9/9] HBASE-28634 Added more explanation to comment in unit test As the comment did not make much sense without the mask and current row. --- .../hadoop/hbase/filter/TestFuzzyRowFilter.java | 11 ++++++++--- 1 file changed, 8 insertions(+), 3 deletions(-) diff --git a/hbase-server/src/test/java/org/apache/hadoop/hbase/filter/TestFuzzyRowFilter.java b/hbase-server/src/test/java/org/apache/hadoop/hbase/filter/TestFuzzyRowFilter.java index 989bb5d3bc50..d184bf2a3183 100644 --- a/hbase-server/src/test/java/org/apache/hadoop/hbase/filter/TestFuzzyRowFilter.java +++ b/hbase-server/src/test/java/org/apache/hadoop/hbase/filter/TestFuzzyRowFilter.java @@ -239,9 +239,14 @@ public void testGetNextForFuzzyRuleForward() { @Test public void testGetNextForFuzzyRuleReverse() { - // In the reverse case the last non-max byte should be increased to make sure that the proper - // row is selected next. - // e.g. 112 -> 113 or 11(0xFF)2 -> 11(0xFF)3, 5101 -> 5102 etc. + // In these reverse cases for the next row key the last non-max byte should be increased + // to make sure that the proper row is selected next by the scanner. + // For example: + // fuzzy row: 0,1,2 + // mask: 0,-1,-1 + // current: 1,2,1,0,1 + // next would be: 1,1,2 + // this has to be increased to 1,1,3 to make sure that the proper row is selected next. assertNext(true, new byte[] { 0, 1, 2 }, // fuzzy row new byte[] { 0, -1, -1 }, // mask new byte[] { 1, 2, 1, 0, 1 }, // current