From 7f834cff7061978296f7f97a1fbda83e2cdd42cf Mon Sep 17 00:00:00 2001 From: Shubham Roy Date: Mon, 4 Nov 2024 14:24:27 +0530 Subject: [PATCH 01/15] Added feature to count delete markers in RowCounter. (cherry picked from commit 8b9787cc81d40a264d64c3936700ba36e1d23e46) --- .../hadoop/hbase/mapreduce/RowCounter.java | 55 ++++++++++++++++++- 1 file changed, 53 insertions(+), 2 deletions(-) diff --git a/hbase-mapreduce/src/main/java/org/apache/hadoop/hbase/mapreduce/RowCounter.java b/hbase-mapreduce/src/main/java/org/apache/hadoop/hbase/mapreduce/RowCounter.java index 6d163e82e8cd..0b0ac972760d 100644 --- a/hbase-mapreduce/src/main/java/org/apache/hadoop/hbase/mapreduce/RowCounter.java +++ b/hbase-mapreduce/src/main/java/org/apache/hadoop/hbase/mapreduce/RowCounter.java @@ -22,6 +22,7 @@ import java.util.List; import org.apache.commons.lang3.StringUtils; import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.hbase.Cell; import org.apache.hadoop.hbase.HConstants; import org.apache.hadoop.hbase.client.Result; import org.apache.hadoop.hbase.client.Scan; @@ -33,6 +34,7 @@ import org.apache.hadoop.hbase.util.Bytes; import org.apache.hadoop.mapreduce.Counter; import org.apache.hadoop.mapreduce.Job; +import org.apache.hadoop.mapreduce.Mapper; import org.apache.hadoop.mapreduce.lib.output.NullOutputFormat; import org.apache.yetus.audience.InterfaceAudience; import org.slf4j.Logger; @@ -65,12 +67,14 @@ public class RowCounter extends AbstractHBaseTool { private final static String OPT_END_TIME = "endtime"; private final static String OPT_RANGE = "range"; private final static String OPT_EXPECTED_COUNT = "expectedCount"; + private final static String OPT_COUNT_DELETE_MARKERS = "countDeleteMarkers"; private String tableName; private List rowRangeList; private long startTime; private long endTime; private long expectedCount; + private boolean countDeleteMarkers; private List columns = new ArrayList<>(); /** @@ -78,9 +82,19 @@ public class RowCounter extends AbstractHBaseTool { */ static class RowCounterMapper extends TableMapper { - /** Counter enumeration to count the actual rows. */ + /** Counter enumeration to count the actual rows, cells and delete markers. */ public static enum Counters { - ROWS + ROWS, CELLS, DELETE, DELETE_COLUMN, DELETE_FAMILY, DELETE_FAMILY_VERSION, + ROWS_WITH_DELETE_MARKER + } + + private boolean countDeleteMarkers; + + @Override + protected void setup(Mapper.Context context) throws IOException, InterruptedException { + Configuration conf = context.getConfiguration(); + countDeleteMarkers = conf.getBoolean(OPT_COUNT_DELETE_MARKERS, false); } /** @@ -95,6 +109,36 @@ public static enum Counters { public void map(ImmutableBytesWritable row, Result values, Context context) throws IOException { // Count every row containing data, whether it's in qualifiers or values context.getCounter(Counters.ROWS).increment(1); + context.getCounter(Counters.CELLS).increment(values.size()); + + boolean rowContainsDeleteMarker = true; + + if (countDeleteMarkers) { + for (Cell cell : values.rawCells()) { + Cell.Type type = cell.getType(); + switch (type) { + case Delete: + context.getCounter(Counters.DELETE).increment(1); + break; + case DeleteColumn: + context.getCounter(Counters.DELETE_COLUMN).increment(1); + break; + case DeleteFamily: + context.getCounter(Counters.DELETE_FAMILY).increment(1); + break; + case DeleteFamilyVersion: + context.getCounter(Counters.DELETE_FAMILY_VERSION).increment(1); + break; + default: + rowContainsDeleteMarker = false; + break; + } + } + } + + if (rowContainsDeleteMarker) { + context.getCounter(Counters.ROWS_WITH_DELETE_MARKER).increment(1); + } } } @@ -105,9 +149,11 @@ public void map(ImmutableBytesWritable row, Result values, Context context) thro * @throws IOException When setting up the job fails. */ public Job createSubmittableJob(Configuration conf) throws IOException { + conf.setBoolean(OPT_COUNT_DELETE_MARKERS, this.countDeleteMarkers); Job job = Job.getInstance(conf, conf.get(JOB_NAME_CONF_KEY, NAME + "_" + tableName)); job.setJarByClass(RowCounter.class); Scan scan = new Scan(); + scan.setRaw(this.countDeleteMarkers); scan.setCacheBlocks(false); setScanFilter(scan, rowRangeList); @@ -295,10 +341,14 @@ protected void addOptions() { .desc("[startKey],[endKey][;[startKey],[endKey]...]]").longOpt(OPT_RANGE).build(); Option expectedOption = Option.builder(null).valueSeparator('=').hasArg(true) .desc("expected number of rows to be count.").longOpt(OPT_EXPECTED_COUNT).build(); + Option countDeleteMarkersOption = Option.builder(null).hasArg(false) + .desc("counts the number of Delete Markers of all types, i.e. (DELETE, DELETE_COLUMN, DELETE_FAMILY, DELETE_FAMILY_VERSION)") + .longOpt(OPT_COUNT_DELETE_MARKERS).build(); addOption(startTimeOption); addOption(endTimeOption); addOption(rangeOption); addOption(expectedOption); + addOption(countDeleteMarkersOption); } @Override @@ -316,6 +366,7 @@ protected void processOptions(CommandLine cmd) throws IllegalArgumentException { this.startTime = cmd.getOptionValue(OPT_START_TIME) == null ? 0 : Long.parseLong(cmd.getOptionValue(OPT_START_TIME)); + this.countDeleteMarkers = cmd.hasOption(OPT_COUNT_DELETE_MARKERS); for (int i = 1; i < cmd.getArgList().size(); i++) { String argument = cmd.getArgList().get(i); From 8ceede0a9d42bf579db7d8f02480ad386cbdb8a1 Mon Sep 17 00:00:00 2001 From: Shubham Roy Date: Tue, 5 Nov 2024 09:00:55 +0530 Subject: [PATCH 02/15] Put all new features under the flag for correct working. (cherry picked from commit fa107712da895aae553d8ac9d3b993abff461e67) --- .../apache/hadoop/hbase/mapreduce/RowCounter.java | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/hbase-mapreduce/src/main/java/org/apache/hadoop/hbase/mapreduce/RowCounter.java b/hbase-mapreduce/src/main/java/org/apache/hadoop/hbase/mapreduce/RowCounter.java index 0b0ac972760d..f5e3af4429f8 100644 --- a/hbase-mapreduce/src/main/java/org/apache/hadoop/hbase/mapreduce/RowCounter.java +++ b/hbase-mapreduce/src/main/java/org/apache/hadoop/hbase/mapreduce/RowCounter.java @@ -109,11 +109,11 @@ protected void setup(Mapper Date: Tue, 5 Nov 2024 11:51:34 +0530 Subject: [PATCH 03/15] Added UT. (cherry picked from commit 0cb98d8f8c9a8597a53236f18131866bccb876c3) --- .../hadoop/hbase/mapreduce/RowCounter.java | 10 +- .../hbase/mapreduce/TestRowCounter.java | 96 +++++++++++++++++++ 2 files changed, 105 insertions(+), 1 deletion(-) diff --git a/hbase-mapreduce/src/main/java/org/apache/hadoop/hbase/mapreduce/RowCounter.java b/hbase-mapreduce/src/main/java/org/apache/hadoop/hbase/mapreduce/RowCounter.java index f5e3af4429f8..77ff6ed825a5 100644 --- a/hbase-mapreduce/src/main/java/org/apache/hadoop/hbase/mapreduce/RowCounter.java +++ b/hbase-mapreduce/src/main/java/org/apache/hadoop/hbase/mapreduce/RowCounter.java @@ -20,6 +20,7 @@ import java.io.IOException; import java.util.ArrayList; import java.util.List; +import com.google.common.annotations.VisibleForTesting; import org.apache.commons.lang3.StringUtils; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.hbase.Cell; @@ -77,6 +78,8 @@ public class RowCounter extends AbstractHBaseTool { private boolean countDeleteMarkers; private List columns = new ArrayList<>(); + private Job job; + /** * Mapper that runs the count. */ @@ -398,7 +401,7 @@ protected void processOldArgs(List args) { @Override protected int doWork() throws Exception { - Job job = createSubmittableJob(getConf()); + job = createSubmittableJob(getConf()); if (job == null) { return -1; } @@ -439,4 +442,9 @@ protected CommandLineParser newParser() { return new RowCounterCommandLineParser(); } + @VisibleForTesting + public Job getMapReduceJob() { + return job; + } + } diff --git a/hbase-mapreduce/src/test/java/org/apache/hadoop/hbase/mapreduce/TestRowCounter.java b/hbase-mapreduce/src/test/java/org/apache/hadoop/hbase/mapreduce/TestRowCounter.java index 4404e3aee877..8596afd7e899 100644 --- a/hbase-mapreduce/src/test/java/org/apache/hadoop/hbase/mapreduce/TestRowCounter.java +++ b/hbase-mapreduce/src/test/java/org/apache/hadoop/hbase/mapreduce/TestRowCounter.java @@ -18,6 +18,7 @@ package org.apache.hadoop.hbase.mapreduce; import static org.junit.Assert.assertEquals; +import static org.junit.Assert.assertFalse; import static org.junit.Assert.assertTrue; import static org.junit.Assert.fail; @@ -29,6 +30,7 @@ import org.apache.hadoop.hbase.HBaseClassTestRule; import org.apache.hadoop.hbase.HBaseTestingUtility; import org.apache.hadoop.hbase.TableName; +import org.apache.hadoop.hbase.client.Delete; import org.apache.hadoop.hbase.client.Put; import org.apache.hadoop.hbase.client.Table; import org.apache.hadoop.hbase.testclassification.LargeTests; @@ -37,6 +39,7 @@ import org.apache.hadoop.hbase.util.EnvironmentEdgeManager; import org.apache.hadoop.hbase.util.LauncherSecurityManager; import org.apache.hadoop.mapreduce.Counter; +import org.apache.hadoop.mapreduce.Counters; import org.apache.hadoop.mapreduce.Job; import org.junit.AfterClass; import org.junit.BeforeClass; @@ -524,6 +527,99 @@ public void testInvalidTable() throws Exception { } } + /** + * Step 1: Add 6 rows(row1, row2, row3, row4, row5 and row6) to a table. Each row contains 1 column family and 4 columns. + * Step 2: Delete a column for row1. + * Step 3: Delete a column family for row2 and row4. + * Step 4: Delete all versions of a specific column for row3, row5 and row6. + *

+ * Case 1: Run row counter without countDeleteMarkers flag + * Step a: Validate counter values. + * Step b: Assert that the countOfDeleteMarker variable is false. + *

+ * Case 2: Run row counter with countDeleteMarkers flag + * Step a: Validate counter values. + * Step b: Assert that the countOfDeleteMarker variable is true. + * @throws Exception + */ + @Test + public void testRowCounterWithCountDeleteMarkersOption() throws Exception { + // Test Setup + + final TableName tableName = TableName.valueOf(TABLE_NAME + "_" + "withCountDeleteMarkersOption"); + final byte[][] rowKeys = { + Bytes.toBytes("row1"), Bytes.toBytes("row2"), + Bytes.toBytes("row3"), Bytes.toBytes("row4"), + Bytes.toBytes("row5"), Bytes.toBytes("row6") + }; + final byte[] columnFamily = Bytes.toBytes("cf"); + final byte[][] columns = { + Bytes.toBytes("A"), Bytes.toBytes("B"), + Bytes.toBytes("C"), Bytes.toBytes("D") + }; + final byte[] value = Bytes.toBytes("a"); + + try (Table table = TEST_UTIL.createTable(tableName, columnFamily)) { + // Step 1: Insert rows with columns + for (byte[] rowKey : rowKeys) { + Put put = new Put(rowKey); + for (byte[] col : columns) { + put.addColumn(columnFamily, col, value); + } + table.put(put); + } + TEST_UTIL.getAdmin().flush(tableName); + + // Steps 2, 3, and 4: Delete columns, families, and all versions of columns + Delete deleteA = new Delete(rowKeys[0]).addColumn(columnFamily, columns[0]); + Delete deleteB = new Delete(rowKeys[1]).addFamily(columnFamily); + Delete deleteC = new Delete(rowKeys[2]).addColumns(columnFamily, columns[0]); + Delete deleteD = new Delete(rowKeys[3]).addFamily(columnFamily); + Delete deleteE = new Delete(rowKeys[4]).addColumns(columnFamily, columns[0]); + Delete deleteF = new Delete(rowKeys[5]).addColumns(columnFamily, columns[0]); + + table.delete(deleteA); + table.delete(deleteB); + table.delete(deleteC); + table.delete(deleteD); + table.delete(deleteE); + table.delete(deleteF); + TEST_UTIL.getAdmin().flush(tableName); + } + + RowCounter rowCounterWithoutCountDeleteMarkers = new RowCounter(); + RowCounter rowCounterWithCountDeleteMarkers = new RowCounter(); + rowCounterWithoutCountDeleteMarkers.setConf(TEST_UTIL.getConfiguration()); + rowCounterWithCountDeleteMarkers.setConf(TEST_UTIL.getConfiguration()); + + // Invocation + + rowCounterWithoutCountDeleteMarkers.run(new String[]{tableName.getNameAsString()}); + rowCounterWithCountDeleteMarkers.run(new String[]{ tableName.getNameAsString(), "--countDeleteMarkers"}); + + // Validation + + // Case 1: + validateCounterCounts(rowCounterWithoutCountDeleteMarkers.getMapReduceJob().getCounters(), 4, 0, 0, 0, 0, 0, 0); + assertFalse(rowCounterWithoutCountDeleteMarkers.getConf().getBoolean("countDeleteMarkers", true)); + + // Case 2: + validateCounterCounts(rowCounterWithoutCountDeleteMarkers.getMapReduceJob().getCounters(), 6, 24, 6, 1, 3, 2, 0); + assertTrue(rowCounterWithoutCountDeleteMarkers.getConf().getBoolean("countDeleteMarkers", false)); + } + + private void validateCounterCounts(Counters counters, long rowCount, long cellCount, + long rowsWithDeleteMarkersCount, long deleteCount, long deleteColumnCount, + long deleteFamilyCount, long deleteFamilyVersionCount) { + assertEquals(rowCount, counters.findCounter(RowCounter.RowCounterMapper.Counters.ROWS).getValue()); + assertEquals(cellCount, counters.findCounter(RowCounter.RowCounterMapper.Counters.CELLS).getValue()); + assertEquals(rowsWithDeleteMarkersCount, counters.findCounter(RowCounter.RowCounterMapper.Counters.ROWS_WITH_DELETE_MARKER).getValue()); + assertEquals(deleteCount, counters.findCounter(RowCounter.RowCounterMapper.Counters.DELETE).getValue()); + assertEquals(deleteColumnCount, counters.findCounter(RowCounter.RowCounterMapper.Counters.DELETE_COLUMN).getValue()); + assertEquals(deleteFamilyCount, counters.findCounter(RowCounter.RowCounterMapper.Counters.DELETE_FAMILY).getValue()); + assertEquals(deleteFamilyVersionCount, counters.findCounter(RowCounter.RowCounterMapper.Counters.DELETE_FAMILY_VERSION).getValue()); + } + private void assertUsageContent(String usage) { assertTrue(usage .contains("usage: hbase rowcounter " + " [options] [ ...]")); From d3a255a4c37ad2f4ba1bed31e84dfcfea10733c1 Mon Sep 17 00:00:00 2001 From: Shubham Roy Date: Tue, 5 Nov 2024 11:53:34 +0530 Subject: [PATCH 04/15] Fixed style. (cherry picked from commit 972d3b7449a61acd603d3d4e02c18e729b5edc5b) --- .../java/org/apache/hadoop/hbase/mapreduce/RowCounter.java | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/hbase-mapreduce/src/main/java/org/apache/hadoop/hbase/mapreduce/RowCounter.java b/hbase-mapreduce/src/main/java/org/apache/hadoop/hbase/mapreduce/RowCounter.java index 77ff6ed825a5..70ed32c481cb 100644 --- a/hbase-mapreduce/src/main/java/org/apache/hadoop/hbase/mapreduce/RowCounter.java +++ b/hbase-mapreduce/src/main/java/org/apache/hadoop/hbase/mapreduce/RowCounter.java @@ -345,7 +345,8 @@ protected void addOptions() { Option expectedOption = Option.builder(null).valueSeparator('=').hasArg(true) .desc("expected number of rows to be count.").longOpt(OPT_EXPECTED_COUNT).build(); Option countDeleteMarkersOption = Option.builder(null).hasArg(false) - .desc("counts the number of Delete Markers of all types, i.e. (DELETE, DELETE_COLUMN, DELETE_FAMILY, DELETE_FAMILY_VERSION)") + .desc("counts the number of Delete Markers of all types, i.e. " + + "(DELETE, DELETE_COLUMN, DELETE_FAMILY, DELETE_FAMILY_VERSION)") .longOpt(OPT_COUNT_DELETE_MARKERS).build(); addOption(startTimeOption); addOption(endTimeOption); From d020606bf53829d62c035e1a303faecb7056a1e9 Mon Sep 17 00:00:00 2001 From: Shubham Roy Date: Tue, 5 Nov 2024 11:59:00 +0530 Subject: [PATCH 05/15] Fixed style by applying spotless plugin. (cherry picked from commit db72e7c6165c160c7b5ae5e5aa307471ea51e1d1) --- .../hadoop/hbase/mapreduce/RowCounter.java | 14 ++-- .../hbase/mapreduce/TestRowCounter.java | 72 ++++++++++--------- 2 files changed, 48 insertions(+), 38 deletions(-) diff --git a/hbase-mapreduce/src/main/java/org/apache/hadoop/hbase/mapreduce/RowCounter.java b/hbase-mapreduce/src/main/java/org/apache/hadoop/hbase/mapreduce/RowCounter.java index 70ed32c481cb..73010482e7fd 100644 --- a/hbase-mapreduce/src/main/java/org/apache/hadoop/hbase/mapreduce/RowCounter.java +++ b/hbase-mapreduce/src/main/java/org/apache/hadoop/hbase/mapreduce/RowCounter.java @@ -17,10 +17,10 @@ */ package org.apache.hadoop.hbase.mapreduce; +import com.google.common.annotations.VisibleForTesting; import java.io.IOException; import java.util.ArrayList; import java.util.List; -import com.google.common.annotations.VisibleForTesting; import org.apache.commons.lang3.StringUtils; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.hbase.Cell; @@ -87,15 +87,21 @@ static class RowCounterMapper extends TableMapper.Context context) throws IOException, InterruptedException { + protected void + setup(Mapper.Context context) + throws IOException, InterruptedException { Configuration conf = context.getConfiguration(); countDeleteMarkers = conf.getBoolean(OPT_COUNT_DELETE_MARKERS, false); } diff --git a/hbase-mapreduce/src/test/java/org/apache/hadoop/hbase/mapreduce/TestRowCounter.java b/hbase-mapreduce/src/test/java/org/apache/hadoop/hbase/mapreduce/TestRowCounter.java index 8596afd7e899..5307d453177a 100644 --- a/hbase-mapreduce/src/test/java/org/apache/hadoop/hbase/mapreduce/TestRowCounter.java +++ b/hbase-mapreduce/src/test/java/org/apache/hadoop/hbase/mapreduce/TestRowCounter.java @@ -528,35 +528,27 @@ public void testInvalidTable() throws Exception { } /** - * Step 1: Add 6 rows(row1, row2, row3, row4, row5 and row6) to a table. Each row contains 1 column family and 4 columns. - * Step 2: Delete a column for row1. - * Step 3: Delete a column family for row2 and row4. - * Step 4: Delete all versions of a specific column for row3, row5 and row6. + * Step 1: Add 6 rows(row1, row2, row3, row4, row5 and row6) to a table. Each row contains 1 + * column family and 4 columns. Step 2: Delete a column for row1. Step 3: Delete a column family + * for row2 and row4. Step 4: Delete all versions of a specific column for row3, row5 and row6. *

- * Case 1: Run row counter without countDeleteMarkers flag - * Step a: Validate counter values. - * Step b: Assert that the countOfDeleteMarker variable is false. + * Case 1: Run row counter without countDeleteMarkers flag Step a: Validate counter values. Step + * b: Assert that the countOfDeleteMarker variable is false. *

- * Case 2: Run row counter with countDeleteMarkers flag - * Step a: Validate counter values. - * Step b: Assert that the countOfDeleteMarker variable is true. - * @throws Exception + * Case 2: Run row counter with countDeleteMarkers flag Step a: Validate counter values. Step b: + * Assert that the countOfDeleteMarker variable is true. */ @Test public void testRowCounterWithCountDeleteMarkersOption() throws Exception { // Test Setup - final TableName tableName = TableName.valueOf(TABLE_NAME + "_" + "withCountDeleteMarkersOption"); - final byte[][] rowKeys = { - Bytes.toBytes("row1"), Bytes.toBytes("row2"), - Bytes.toBytes("row3"), Bytes.toBytes("row4"), - Bytes.toBytes("row5"), Bytes.toBytes("row6") - }; + final TableName tableName = + TableName.valueOf(TABLE_NAME + "_" + "withCountDeleteMarkersOption"); + final byte[][] rowKeys = { Bytes.toBytes("row1"), Bytes.toBytes("row2"), Bytes.toBytes("row3"), + Bytes.toBytes("row4"), Bytes.toBytes("row5"), Bytes.toBytes("row6") }; final byte[] columnFamily = Bytes.toBytes("cf"); - final byte[][] columns = { - Bytes.toBytes("A"), Bytes.toBytes("B"), - Bytes.toBytes("C"), Bytes.toBytes("D") - }; + final byte[][] columns = + { Bytes.toBytes("A"), Bytes.toBytes("B"), Bytes.toBytes("C"), Bytes.toBytes("D") }; final byte[] value = Bytes.toBytes("a"); try (Table table = TEST_UTIL.createTable(tableName, columnFamily)) { @@ -594,30 +586,42 @@ public void testRowCounterWithCountDeleteMarkersOption() throws Exception { // Invocation - rowCounterWithoutCountDeleteMarkers.run(new String[]{tableName.getNameAsString()}); - rowCounterWithCountDeleteMarkers.run(new String[]{ tableName.getNameAsString(), "--countDeleteMarkers"}); + rowCounterWithoutCountDeleteMarkers.run(new String[] { tableName.getNameAsString() }); + rowCounterWithCountDeleteMarkers + .run(new String[] { tableName.getNameAsString(), "--countDeleteMarkers" }); // Validation // Case 1: - validateCounterCounts(rowCounterWithoutCountDeleteMarkers.getMapReduceJob().getCounters(), 4, 0, 0, 0, 0, 0, 0); - assertFalse(rowCounterWithoutCountDeleteMarkers.getConf().getBoolean("countDeleteMarkers", true)); + validateCounterCounts(rowCounterWithoutCountDeleteMarkers.getMapReduceJob().getCounters(), 4, 0, + 0, 0, 0, 0, 0); + assertFalse( + rowCounterWithoutCountDeleteMarkers.getConf().getBoolean("countDeleteMarkers", true)); // Case 2: - validateCounterCounts(rowCounterWithoutCountDeleteMarkers.getMapReduceJob().getCounters(), 6, 24, 6, 1, 3, 2, 0); - assertTrue(rowCounterWithoutCountDeleteMarkers.getConf().getBoolean("countDeleteMarkers", false)); + validateCounterCounts(rowCounterWithoutCountDeleteMarkers.getMapReduceJob().getCounters(), 6, + 24, 6, 1, 3, 2, 0); + assertTrue( + rowCounterWithoutCountDeleteMarkers.getConf().getBoolean("countDeleteMarkers", false)); } private void validateCounterCounts(Counters counters, long rowCount, long cellCount, long rowsWithDeleteMarkersCount, long deleteCount, long deleteColumnCount, long deleteFamilyCount, long deleteFamilyVersionCount) { - assertEquals(rowCount, counters.findCounter(RowCounter.RowCounterMapper.Counters.ROWS).getValue()); - assertEquals(cellCount, counters.findCounter(RowCounter.RowCounterMapper.Counters.CELLS).getValue()); - assertEquals(rowsWithDeleteMarkersCount, counters.findCounter(RowCounter.RowCounterMapper.Counters.ROWS_WITH_DELETE_MARKER).getValue()); - assertEquals(deleteCount, counters.findCounter(RowCounter.RowCounterMapper.Counters.DELETE).getValue()); - assertEquals(deleteColumnCount, counters.findCounter(RowCounter.RowCounterMapper.Counters.DELETE_COLUMN).getValue()); - assertEquals(deleteFamilyCount, counters.findCounter(RowCounter.RowCounterMapper.Counters.DELETE_FAMILY).getValue()); - assertEquals(deleteFamilyVersionCount, counters.findCounter(RowCounter.RowCounterMapper.Counters.DELETE_FAMILY_VERSION).getValue()); + assertEquals(rowCount, + counters.findCounter(RowCounter.RowCounterMapper.Counters.ROWS).getValue()); + assertEquals(cellCount, + counters.findCounter(RowCounter.RowCounterMapper.Counters.CELLS).getValue()); + assertEquals(rowsWithDeleteMarkersCount, counters + .findCounter(RowCounter.RowCounterMapper.Counters.ROWS_WITH_DELETE_MARKER).getValue()); + assertEquals(deleteCount, + counters.findCounter(RowCounter.RowCounterMapper.Counters.DELETE).getValue()); + assertEquals(deleteColumnCount, + counters.findCounter(RowCounter.RowCounterMapper.Counters.DELETE_COLUMN).getValue()); + assertEquals(deleteFamilyCount, + counters.findCounter(RowCounter.RowCounterMapper.Counters.DELETE_FAMILY).getValue()); + assertEquals(deleteFamilyVersionCount, + counters.findCounter(RowCounter.RowCounterMapper.Counters.DELETE_FAMILY_VERSION).getValue()); } private void assertUsageContent(String usage) { From 30139f471b29b59ff5dfe3f079905c31d4ece7f6 Mon Sep 17 00:00:00 2001 From: Shubham Roy Date: Thu, 7 Nov 2024 17:11:06 +0530 Subject: [PATCH 06/15] Removed banned dependency and used shaded one. (cherry picked from commit f2e988628fc4cfb17d73af3a604ef9e656318476) --- .../main/java/org/apache/hadoop/hbase/mapreduce/RowCounter.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/hbase-mapreduce/src/main/java/org/apache/hadoop/hbase/mapreduce/RowCounter.java b/hbase-mapreduce/src/main/java/org/apache/hadoop/hbase/mapreduce/RowCounter.java index 73010482e7fd..c3f31694c02a 100644 --- a/hbase-mapreduce/src/main/java/org/apache/hadoop/hbase/mapreduce/RowCounter.java +++ b/hbase-mapreduce/src/main/java/org/apache/hadoop/hbase/mapreduce/RowCounter.java @@ -17,11 +17,11 @@ */ package org.apache.hadoop.hbase.mapreduce; -import com.google.common.annotations.VisibleForTesting; import java.io.IOException; import java.util.ArrayList; import java.util.List; import org.apache.commons.lang3.StringUtils; +import org.apache.curator.shaded.com.google.common.annotations.VisibleForTesting; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.hbase.Cell; import org.apache.hadoop.hbase.HConstants; From 706715a31887ac343ba6a2a54072a551917efffe Mon Sep 17 00:00:00 2001 From: Shubham Roy Date: Thu, 7 Nov 2024 18:33:07 +0530 Subject: [PATCH 07/15] Removed annotation. (cherry picked from commit f3db2d5728292373142dac6124248c1f6c76608d) --- .../java/org/apache/hadoop/hbase/mapreduce/RowCounter.java | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/hbase-mapreduce/src/main/java/org/apache/hadoop/hbase/mapreduce/RowCounter.java b/hbase-mapreduce/src/main/java/org/apache/hadoop/hbase/mapreduce/RowCounter.java index c3f31694c02a..a00fefd08327 100644 --- a/hbase-mapreduce/src/main/java/org/apache/hadoop/hbase/mapreduce/RowCounter.java +++ b/hbase-mapreduce/src/main/java/org/apache/hadoop/hbase/mapreduce/RowCounter.java @@ -21,7 +21,6 @@ import java.util.ArrayList; import java.util.List; import org.apache.commons.lang3.StringUtils; -import org.apache.curator.shaded.com.google.common.annotations.VisibleForTesting; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.hbase.Cell; import org.apache.hadoop.hbase.HConstants; @@ -449,7 +448,7 @@ protected CommandLineParser newParser() { return new RowCounterCommandLineParser(); } - @VisibleForTesting + // Visible for testing public Job getMapReduceJob() { return job; } From 3bf7ff3dc85d743f37a1d001d48b7bd3258c955c Mon Sep 17 00:00:00 2001 From: Shubham Roy Date: Fri, 8 Nov 2024 09:28:11 +0530 Subject: [PATCH 08/15] Fixed test. (cherry picked from commit 12405945503ce91350fe495fbf25d652e494031f) --- .../hadoop/hbase/mapreduce/TestRowCounter.java | 15 ++++----------- 1 file changed, 4 insertions(+), 11 deletions(-) diff --git a/hbase-mapreduce/src/test/java/org/apache/hadoop/hbase/mapreduce/TestRowCounter.java b/hbase-mapreduce/src/test/java/org/apache/hadoop/hbase/mapreduce/TestRowCounter.java index 5307d453177a..77bf80faf871 100644 --- a/hbase-mapreduce/src/test/java/org/apache/hadoop/hbase/mapreduce/TestRowCounter.java +++ b/hbase-mapreduce/src/test/java/org/apache/hadoop/hbase/mapreduce/TestRowCounter.java @@ -18,7 +18,6 @@ package org.apache.hadoop.hbase.mapreduce; import static org.junit.Assert.assertEquals; -import static org.junit.Assert.assertFalse; import static org.junit.Assert.assertTrue; import static org.junit.Assert.fail; @@ -532,11 +531,9 @@ public void testInvalidTable() throws Exception { * column family and 4 columns. Step 2: Delete a column for row1. Step 3: Delete a column family * for row2 and row4. Step 4: Delete all versions of a specific column for row3, row5 and row6. *

- * Case 1: Run row counter without countDeleteMarkers flag Step a: Validate counter values. Step - * b: Assert that the countOfDeleteMarker variable is false. + * Case 1: Run row counter without countDeleteMarkers flag Step a: Validate counter values. *

- * Case 2: Run row counter with countDeleteMarkers flag Step a: Validate counter values. Step b: - * Assert that the countOfDeleteMarker variable is true. + * Case 2: Run row counter with countDeleteMarkers flag Step a: Validate counter values. */ @Test public void testRowCounterWithCountDeleteMarkersOption() throws Exception { @@ -595,14 +592,10 @@ public void testRowCounterWithCountDeleteMarkersOption() throws Exception { // Case 1: validateCounterCounts(rowCounterWithoutCountDeleteMarkers.getMapReduceJob().getCounters(), 4, 0, 0, 0, 0, 0, 0); - assertFalse( - rowCounterWithoutCountDeleteMarkers.getConf().getBoolean("countDeleteMarkers", true)); // Case 2: - validateCounterCounts(rowCounterWithoutCountDeleteMarkers.getMapReduceJob().getCounters(), 6, - 24, 6, 1, 3, 2, 0); - assertTrue( - rowCounterWithoutCountDeleteMarkers.getConf().getBoolean("countDeleteMarkers", false)); + validateCounterCounts(rowCounterWithCountDeleteMarkers.getMapReduceJob().getCounters(), 6, + 6, 6, 1, 3, 2, 0); } private void validateCounterCounts(Counters counters, long rowCount, long cellCount, From b7106ceeb7c92f33a567a2d0ea2ac297c446a02b Mon Sep 17 00:00:00 2001 From: Shubham Roy Date: Fri, 8 Nov 2024 10:50:10 +0530 Subject: [PATCH 09/15] Changed filter behaviour for the scan in case of --countDeleteMarkers flag is set. (cherry picked from commit 5754e536ffd56a6f8ae04891fe957a4447229052) --- .../hadoop/hbase/mapreduce/RowCounter.java | 23 +++++++++---- .../hbase/mapreduce/TestRowCounter.java | 32 ++++++++++--------- 2 files changed, 34 insertions(+), 21 deletions(-) diff --git a/hbase-mapreduce/src/main/java/org/apache/hadoop/hbase/mapreduce/RowCounter.java b/hbase-mapreduce/src/main/java/org/apache/hadoop/hbase/mapreduce/RowCounter.java index a00fefd08327..a9124de93679 100644 --- a/hbase-mapreduce/src/main/java/org/apache/hadoop/hbase/mapreduce/RowCounter.java +++ b/hbase-mapreduce/src/main/java/org/apache/hadoop/hbase/mapreduce/RowCounter.java @@ -121,24 +121,27 @@ public void map(ImmutableBytesWritable row, Result values, Context context) thro if (countDeleteMarkers) { context.getCounter(Counters.CELLS).increment(values.size()); - boolean rowContainsDeleteMarker = true; + boolean rowContainsDeleteMarker = false; for (Cell cell : values.rawCells()) { Cell.Type type = cell.getType(); switch (type) { case Delete: + rowContainsDeleteMarker = true; context.getCounter(Counters.DELETE).increment(1); break; case DeleteColumn: + rowContainsDeleteMarker = true; context.getCounter(Counters.DELETE_COLUMN).increment(1); break; case DeleteFamily: + rowContainsDeleteMarker = true; context.getCounter(Counters.DELETE_FAMILY).increment(1); break; case DeleteFamilyVersion: + rowContainsDeleteMarker = true; context.getCounter(Counters.DELETE_FAMILY_VERSION).increment(1); break; default: - rowContainsDeleteMarker = false; break; } } @@ -163,7 +166,7 @@ public Job createSubmittableJob(Configuration conf) throws IOException { Scan scan = new Scan(); scan.setRaw(this.countDeleteMarkers); scan.setCacheBlocks(false); - setScanFilter(scan, rowRangeList); + setScanFilter(scan, rowRangeList, this.countDeleteMarkers); for (String columnName : this.columns) { String family = StringUtils.substringBefore(columnName, ":"); @@ -201,6 +204,7 @@ public static Job createSubmittableJob(Configuration conf, String[] args) throws List rowRangeList = null; long startTime = 0; long endTime = 0; + boolean countDeleteMarkers = false; StringBuilder sb = new StringBuilder(); @@ -208,6 +212,7 @@ public static Job createSubmittableJob(Configuration conf, String[] args) throws final String startTimeArgKey = "--starttime="; final String endTimeArgKey = "--endtime="; final String expectedCountArg = "--expected-count="; + final String countDeleteMarkersArg = "--countDeleteMarkers"; // First argument is table name, starting from second for (int i = 1; i < args.length; i++) { @@ -233,10 +238,15 @@ public static Job createSubmittableJob(Configuration conf, String[] args) throws Long.parseLong(args[i].substring(expectedCountArg.length()))); continue; } + if (args[i].startsWith(countDeleteMarkersArg)) { + countDeleteMarkers = true; + continue; + } // if no switch, assume column names sb.append(args[i]); sb.append(" "); } + conf.setBoolean(OPT_COUNT_DELETE_MARKERS, countDeleteMarkers); if (endTime < startTime) { printUsage("--endtime=" + endTime + " needs to be greater than --starttime=" + startTime); return null; @@ -246,7 +256,8 @@ public static Job createSubmittableJob(Configuration conf, String[] args) throws job.setJarByClass(RowCounter.class); Scan scan = new Scan(); scan.setCacheBlocks(false); - setScanFilter(scan, rowRangeList); + scan.setRaw(countDeleteMarkers); + setScanFilter(scan, rowRangeList, countDeleteMarkers); if (sb.length() > 0) { for (String columnName : sb.toString().trim().split(" ")) { String family = StringUtils.substringBefore(columnName, ":"); @@ -304,9 +315,9 @@ private static List parseRowRangeParameter(String * Otherwise, method sets filter which is instance of {@link FirstKeyOnlyFilter}. If rowRangeList * contains exactly one element, startRow and stopRow are set to the scan. */ - private static void setScanFilter(Scan scan, List rowRangeList) { + private static void setScanFilter(Scan scan, List rowRangeList, boolean countDeleteMarkers) { final int size = rowRangeList == null ? 0 : rowRangeList.size(); - if (size <= 1) { + if (size <= 1 && !countDeleteMarkers) { scan.setFilter(new FirstKeyOnlyFilter()); } if (size == 1) { diff --git a/hbase-mapreduce/src/test/java/org/apache/hadoop/hbase/mapreduce/TestRowCounter.java b/hbase-mapreduce/src/test/java/org/apache/hadoop/hbase/mapreduce/TestRowCounter.java index 77bf80faf871..55b13eaf5969 100644 --- a/hbase-mapreduce/src/test/java/org/apache/hadoop/hbase/mapreduce/TestRowCounter.java +++ b/hbase-mapreduce/src/test/java/org/apache/hadoop/hbase/mapreduce/TestRowCounter.java @@ -595,26 +595,28 @@ public void testRowCounterWithCountDeleteMarkersOption() throws Exception { // Case 2: validateCounterCounts(rowCounterWithCountDeleteMarkers.getMapReduceJob().getCounters(), 6, - 6, 6, 1, 3, 2, 0); + 30, 6, 1, 3, 2, 0); } private void validateCounterCounts(Counters counters, long rowCount, long cellCount, long rowsWithDeleteMarkersCount, long deleteCount, long deleteColumnCount, long deleteFamilyCount, long deleteFamilyVersionCount) { - assertEquals(rowCount, - counters.findCounter(RowCounter.RowCounterMapper.Counters.ROWS).getValue()); - assertEquals(cellCount, - counters.findCounter(RowCounter.RowCounterMapper.Counters.CELLS).getValue()); - assertEquals(rowsWithDeleteMarkersCount, counters - .findCounter(RowCounter.RowCounterMapper.Counters.ROWS_WITH_DELETE_MARKER).getValue()); - assertEquals(deleteCount, - counters.findCounter(RowCounter.RowCounterMapper.Counters.DELETE).getValue()); - assertEquals(deleteColumnCount, - counters.findCounter(RowCounter.RowCounterMapper.Counters.DELETE_COLUMN).getValue()); - assertEquals(deleteFamilyCount, - counters.findCounter(RowCounter.RowCounterMapper.Counters.DELETE_FAMILY).getValue()); - assertEquals(deleteFamilyVersionCount, - counters.findCounter(RowCounter.RowCounterMapper.Counters.DELETE_FAMILY_VERSION).getValue()); + + long actualRowCount = counters.findCounter(RowCounter.RowCounterMapper.Counters.ROWS).getValue(); + long actualCellCount = counters.findCounter(RowCounter.RowCounterMapper.Counters.CELLS).getValue(); + long actualRowsWithDeleteMarkersCount = counters.findCounter(RowCounter.RowCounterMapper.Counters.ROWS_WITH_DELETE_MARKER).getValue(); + long actualDeleteCount = counters.findCounter(RowCounter.RowCounterMapper.Counters.DELETE).getValue(); + long actualDeleteColumnCount = counters.findCounter(RowCounter.RowCounterMapper.Counters.DELETE_COLUMN).getValue(); + long actualDeleteFamilyCount = counters.findCounter(RowCounter.RowCounterMapper.Counters.DELETE_FAMILY).getValue(); + long actualDeleteFamilyVersionCount = counters.findCounter(RowCounter.RowCounterMapper.Counters.DELETE_FAMILY_VERSION).getValue(); + + assertEquals(rowCount, actualRowCount); + assertEquals(cellCount, actualCellCount); + assertEquals(rowsWithDeleteMarkersCount, actualRowsWithDeleteMarkersCount); + assertEquals(deleteCount, actualDeleteCount); + assertEquals(deleteColumnCount, actualDeleteColumnCount); + assertEquals(deleteFamilyCount, actualDeleteFamilyCount); + assertEquals(deleteFamilyVersionCount, actualDeleteFamilyVersionCount); } private void assertUsageContent(String usage) { From bff2212463ea5e1efde129cf48b2c0c43de3d5d1 Mon Sep 17 00:00:00 2001 From: Shubham Roy Date: Fri, 8 Nov 2024 10:55:30 +0530 Subject: [PATCH 10/15] Applied spotless plugin. (cherry picked from commit 0e69048a4fb6f32a7b9facac94ed44e2cfa5a022) --- .../hadoop/hbase/mapreduce/RowCounter.java | 6 ++++- .../hbase/mapreduce/TestRowCounter.java | 27 ++++++++++++------- 2 files changed, 22 insertions(+), 11 deletions(-) diff --git a/hbase-mapreduce/src/main/java/org/apache/hadoop/hbase/mapreduce/RowCounter.java b/hbase-mapreduce/src/main/java/org/apache/hadoop/hbase/mapreduce/RowCounter.java index a9124de93679..11105e87b325 100644 --- a/hbase-mapreduce/src/main/java/org/apache/hadoop/hbase/mapreduce/RowCounter.java +++ b/hbase-mapreduce/src/main/java/org/apache/hadoop/hbase/mapreduce/RowCounter.java @@ -164,6 +164,7 @@ public Job createSubmittableJob(Configuration conf) throws IOException { Job job = Job.getInstance(conf, conf.get(JOB_NAME_CONF_KEY, NAME + "_" + tableName)); job.setJarByClass(RowCounter.class); Scan scan = new Scan(); + // raw scan will be needed to account for delete markers when --countDeleteMarkers flag is set scan.setRaw(this.countDeleteMarkers); scan.setCacheBlocks(false); setScanFilter(scan, rowRangeList, this.countDeleteMarkers); @@ -256,6 +257,7 @@ public static Job createSubmittableJob(Configuration conf, String[] args) throws job.setJarByClass(RowCounter.class); Scan scan = new Scan(); scan.setCacheBlocks(false); + // raw scan will be needed to account for delete markers when --countDeleteMarkers flag is set scan.setRaw(countDeleteMarkers); setScanFilter(scan, rowRangeList, countDeleteMarkers); if (sb.length() > 0) { @@ -315,8 +317,10 @@ private static List parseRowRangeParameter(String * Otherwise, method sets filter which is instance of {@link FirstKeyOnlyFilter}. If rowRangeList * contains exactly one element, startRow and stopRow are set to the scan. */ - private static void setScanFilter(Scan scan, List rowRangeList, boolean countDeleteMarkers) { + private static void setScanFilter(Scan scan, List rowRangeList, + boolean countDeleteMarkers) { final int size = rowRangeList == null ? 0 : rowRangeList.size(); + // all cells will be needed if --countDeleteMarkers flag is set, hence, skipping filter if (size <= 1 && !countDeleteMarkers) { scan.setFilter(new FirstKeyOnlyFilter()); } diff --git a/hbase-mapreduce/src/test/java/org/apache/hadoop/hbase/mapreduce/TestRowCounter.java b/hbase-mapreduce/src/test/java/org/apache/hadoop/hbase/mapreduce/TestRowCounter.java index 55b13eaf5969..1e0679a70cfa 100644 --- a/hbase-mapreduce/src/test/java/org/apache/hadoop/hbase/mapreduce/TestRowCounter.java +++ b/hbase-mapreduce/src/test/java/org/apache/hadoop/hbase/mapreduce/TestRowCounter.java @@ -594,21 +594,28 @@ public void testRowCounterWithCountDeleteMarkersOption() throws Exception { 0, 0, 0, 0, 0); // Case 2: - validateCounterCounts(rowCounterWithCountDeleteMarkers.getMapReduceJob().getCounters(), 6, - 30, 6, 1, 3, 2, 0); + validateCounterCounts(rowCounterWithCountDeleteMarkers.getMapReduceJob().getCounters(), 6, 30, + 6, 1, 3, 2, 0); } private void validateCounterCounts(Counters counters, long rowCount, long cellCount, long rowsWithDeleteMarkersCount, long deleteCount, long deleteColumnCount, long deleteFamilyCount, long deleteFamilyVersionCount) { - - long actualRowCount = counters.findCounter(RowCounter.RowCounterMapper.Counters.ROWS).getValue(); - long actualCellCount = counters.findCounter(RowCounter.RowCounterMapper.Counters.CELLS).getValue(); - long actualRowsWithDeleteMarkersCount = counters.findCounter(RowCounter.RowCounterMapper.Counters.ROWS_WITH_DELETE_MARKER).getValue(); - long actualDeleteCount = counters.findCounter(RowCounter.RowCounterMapper.Counters.DELETE).getValue(); - long actualDeleteColumnCount = counters.findCounter(RowCounter.RowCounterMapper.Counters.DELETE_COLUMN).getValue(); - long actualDeleteFamilyCount = counters.findCounter(RowCounter.RowCounterMapper.Counters.DELETE_FAMILY).getValue(); - long actualDeleteFamilyVersionCount = counters.findCounter(RowCounter.RowCounterMapper.Counters.DELETE_FAMILY_VERSION).getValue(); + + long actualRowCount = + counters.findCounter(RowCounter.RowCounterMapper.Counters.ROWS).getValue(); + long actualCellCount = + counters.findCounter(RowCounter.RowCounterMapper.Counters.CELLS).getValue(); + long actualRowsWithDeleteMarkersCount = + counters.findCounter(RowCounter.RowCounterMapper.Counters.ROWS_WITH_DELETE_MARKER).getValue(); + long actualDeleteCount = + counters.findCounter(RowCounter.RowCounterMapper.Counters.DELETE).getValue(); + long actualDeleteColumnCount = + counters.findCounter(RowCounter.RowCounterMapper.Counters.DELETE_COLUMN).getValue(); + long actualDeleteFamilyCount = + counters.findCounter(RowCounter.RowCounterMapper.Counters.DELETE_FAMILY).getValue(); + long actualDeleteFamilyVersionCount = + counters.findCounter(RowCounter.RowCounterMapper.Counters.DELETE_FAMILY_VERSION).getValue(); assertEquals(rowCount, actualRowCount); assertEquals(cellCount, actualCellCount); From fc9bee1f5d5078c2c7b75af09c55d2b47489a6d6 Mon Sep 17 00:00:00 2001 From: Shubham Roy Date: Mon, 18 Nov 2024 08:15:32 +0530 Subject: [PATCH 11/15] Altered postion of setting conf property. (cherry picked from commit 7f1d9baaff6b1e9ae24b660f777606b0ed9dce71) --- .../main/java/org/apache/hadoop/hbase/mapreduce/RowCounter.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/hbase-mapreduce/src/main/java/org/apache/hadoop/hbase/mapreduce/RowCounter.java b/hbase-mapreduce/src/main/java/org/apache/hadoop/hbase/mapreduce/RowCounter.java index 11105e87b325..c281d3397e6b 100644 --- a/hbase-mapreduce/src/main/java/org/apache/hadoop/hbase/mapreduce/RowCounter.java +++ b/hbase-mapreduce/src/main/java/org/apache/hadoop/hbase/mapreduce/RowCounter.java @@ -160,7 +160,6 @@ public void map(ImmutableBytesWritable row, Result values, Context context) thro * @throws IOException When setting up the job fails. */ public Job createSubmittableJob(Configuration conf) throws IOException { - conf.setBoolean(OPT_COUNT_DELETE_MARKERS, this.countDeleteMarkers); Job job = Job.getInstance(conf, conf.get(JOB_NAME_CONF_KEY, NAME + "_" + tableName)); job.setJarByClass(RowCounter.class); Scan scan = new Scan(); @@ -182,6 +181,7 @@ public Job createSubmittableJob(Configuration conf) throws IOException { if (this.expectedCount >= 0) { conf.setLong(EXPECTED_COUNT_KEY, this.expectedCount); } + conf.setBoolean(OPT_COUNT_DELETE_MARKERS, this.countDeleteMarkers); scan.setTimeRange(startTime, endTime); job.setOutputFormatClass(NullOutputFormat.class); From 6ebb4561af6b8dbb34d2ef79665fe7d9662810da Mon Sep 17 00:00:00 2001 From: Shubham Roy Date: Mon, 18 Nov 2024 10:33:45 +0530 Subject: [PATCH 12/15] Added more test coverage for all types of delete markers. (cherry picked from commit 1904ed6df731e7c6ba0b5423e4b266acb577f35c) --- hbase-mapreduce/pom.xml | 8 +++ .../hadoop/hbase/mapreduce/RowCounter.java | 5 +- .../hbase/mapreduce/TestRowCounter.java | 59 ++++++++++++------- 3 files changed, 46 insertions(+), 26 deletions(-) diff --git a/hbase-mapreduce/pom.xml b/hbase-mapreduce/pom.xml index 40b6cae60da4..b1237bd569cc 100644 --- a/hbase-mapreduce/pom.xml +++ b/hbase-mapreduce/pom.xml @@ -274,6 +274,14 @@ net.revelc.code warbucks-maven-plugin + + org.apache.maven.plugins + maven-compiler-plugin + + 8 + 8 + + diff --git a/hbase-mapreduce/src/main/java/org/apache/hadoop/hbase/mapreduce/RowCounter.java b/hbase-mapreduce/src/main/java/org/apache/hadoop/hbase/mapreduce/RowCounter.java index c281d3397e6b..097fd15f1b3d 100644 --- a/hbase-mapreduce/src/main/java/org/apache/hadoop/hbase/mapreduce/RowCounter.java +++ b/hbase-mapreduce/src/main/java/org/apache/hadoop/hbase/mapreduce/RowCounter.java @@ -87,7 +87,6 @@ static class RowCounterMapper extends TableMapper= 0) { conf.setLong(EXPECTED_COUNT_KEY, this.expectedCount); } - conf.setBoolean(OPT_COUNT_DELETE_MARKERS, this.countDeleteMarkers); scan.setTimeRange(startTime, endTime); job.setOutputFormatClass(NullOutputFormat.class); diff --git a/hbase-mapreduce/src/test/java/org/apache/hadoop/hbase/mapreduce/TestRowCounter.java b/hbase-mapreduce/src/test/java/org/apache/hadoop/hbase/mapreduce/TestRowCounter.java index 1e0679a70cfa..b8c0551b535c 100644 --- a/hbase-mapreduce/src/test/java/org/apache/hadoop/hbase/mapreduce/TestRowCounter.java +++ b/hbase-mapreduce/src/test/java/org/apache/hadoop/hbase/mapreduce/TestRowCounter.java @@ -527,13 +527,22 @@ public void testInvalidTable() throws Exception { } /** - * Step 1: Add 6 rows(row1, row2, row3, row4, row5 and row6) to a table. Each row contains 1 - * column family and 4 columns. Step 2: Delete a column for row1. Step 3: Delete a column family - * for row2 and row4. Step 4: Delete all versions of a specific column for row3, row5 and row6. + * Step 1: Add 8 rows(row1, row2, row3, row4, row5, row6, row7, row8) to a table. Each row + * contains 1 column family and 4 columns and values for two different timestamps - 5 & 10. *

- * Case 1: Run row counter without countDeleteMarkers flag Step a: Validate counter values. + * Step 2: Delete the latest version of column A for row1. --> 1 X Delete *

- * Case 2: Run row counter with countDeleteMarkers flag Step a: Validate counter values. + * Step 3: Delete the cell for timestamp 5 of column B for row1. --> 1 X Delete + *

+ * Step 4: Delete a column family for row2 and row4. --> 2 X DeleteFamily + *

+ * Step 5: Delete all versions of a specific column for row3, row5 and row6. --> 3 X DeleteColumn + *

+ * Step 6: Delete all columns for timestamp 5 for row 7. --> 1 X DeleteFamilyVersion + *

+ * Case 1: Run row counter without countDeleteMarkers and validate counter values. + *

+ * Case 2: Run row counter with countDeleteMarkers flag and validate counter values. */ @Test public void testRowCounterWithCountDeleteMarkersOption() throws Exception { @@ -542,30 +551,37 @@ public void testRowCounterWithCountDeleteMarkersOption() throws Exception { final TableName tableName = TableName.valueOf(TABLE_NAME + "_" + "withCountDeleteMarkersOption"); final byte[][] rowKeys = { Bytes.toBytes("row1"), Bytes.toBytes("row2"), Bytes.toBytes("row3"), - Bytes.toBytes("row4"), Bytes.toBytes("row5"), Bytes.toBytes("row6") }; + Bytes.toBytes("row4"), Bytes.toBytes("row5"), Bytes.toBytes("row6"), Bytes.toBytes("row7"), + Bytes.toBytes("row8") }; final byte[] columnFamily = Bytes.toBytes("cf"); final byte[][] columns = { Bytes.toBytes("A"), Bytes.toBytes("B"), Bytes.toBytes("C"), Bytes.toBytes("D") }; - final byte[] value = Bytes.toBytes("a"); + final byte[][] values = { Bytes.toBytes("a"), Bytes.toBytes("b") }; try (Table table = TEST_UTIL.createTable(tableName, columnFamily)) { // Step 1: Insert rows with columns for (byte[] rowKey : rowKeys) { Put put = new Put(rowKey); for (byte[] col : columns) { - put.addColumn(columnFamily, col, value); + long timestamp = 5L; + for (byte[] value : values) { + put.addColumn(columnFamily, col, timestamp, value); + timestamp += 5L; + } } table.put(put); } TEST_UTIL.getAdmin().flush(tableName); - // Steps 2, 3, and 4: Delete columns, families, and all versions of columns + // Steps 2-6 Delete deleteA = new Delete(rowKeys[0]).addColumn(columnFamily, columns[0]); - Delete deleteB = new Delete(rowKeys[1]).addFamily(columnFamily); - Delete deleteC = new Delete(rowKeys[2]).addColumns(columnFamily, columns[0]); - Delete deleteD = new Delete(rowKeys[3]).addFamily(columnFamily); - Delete deleteE = new Delete(rowKeys[4]).addColumns(columnFamily, columns[0]); - Delete deleteF = new Delete(rowKeys[5]).addColumns(columnFamily, columns[0]); + Delete deleteB = new Delete(rowKeys[0]).addColumn(columnFamily, columns[1], 5L); + Delete deleteC = new Delete(rowKeys[1]).addFamily(columnFamily); + Delete deleteD = new Delete(rowKeys[2]).addColumns(columnFamily, columns[0]); + Delete deleteE = new Delete(rowKeys[3]).addFamily(columnFamily); + Delete deleteF = new Delete(rowKeys[4]).addColumns(columnFamily, columns[0]); + Delete deleteG = new Delete(rowKeys[5]).addColumns(columnFamily, columns[0]); + Delete deleteH = new Delete(rowKeys[6]).addFamilyVersion(columnFamily, 5L); table.delete(deleteA); table.delete(deleteB); @@ -573,6 +589,8 @@ public void testRowCounterWithCountDeleteMarkersOption() throws Exception { table.delete(deleteD); table.delete(deleteE); table.delete(deleteF); + table.delete(deleteG); + table.delete(deleteH); TEST_UTIL.getAdmin().flush(tableName); } @@ -590,22 +608,20 @@ public void testRowCounterWithCountDeleteMarkersOption() throws Exception { // Validation // Case 1: - validateCounterCounts(rowCounterWithoutCountDeleteMarkers.getMapReduceJob().getCounters(), 4, 0, - 0, 0, 0, 0, 0); + validateCounterCounts(rowCounterWithoutCountDeleteMarkers.getMapReduceJob().getCounters(), 6, 0, + 0, 0, 0, 0); // Case 2: - validateCounterCounts(rowCounterWithCountDeleteMarkers.getMapReduceJob().getCounters(), 6, 30, - 6, 1, 3, 2, 0); + validateCounterCounts(rowCounterWithCountDeleteMarkers.getMapReduceJob().getCounters(), 8, 7, 2, + 3, 2, 1); } - private void validateCounterCounts(Counters counters, long rowCount, long cellCount, + private void validateCounterCounts(Counters counters, long rowCount, long rowsWithDeleteMarkersCount, long deleteCount, long deleteColumnCount, long deleteFamilyCount, long deleteFamilyVersionCount) { long actualRowCount = counters.findCounter(RowCounter.RowCounterMapper.Counters.ROWS).getValue(); - long actualCellCount = - counters.findCounter(RowCounter.RowCounterMapper.Counters.CELLS).getValue(); long actualRowsWithDeleteMarkersCount = counters.findCounter(RowCounter.RowCounterMapper.Counters.ROWS_WITH_DELETE_MARKER).getValue(); long actualDeleteCount = @@ -618,7 +634,6 @@ private void validateCounterCounts(Counters counters, long rowCount, long cellCo counters.findCounter(RowCounter.RowCounterMapper.Counters.DELETE_FAMILY_VERSION).getValue(); assertEquals(rowCount, actualRowCount); - assertEquals(cellCount, actualCellCount); assertEquals(rowsWithDeleteMarkersCount, actualRowsWithDeleteMarkersCount); assertEquals(deleteCount, actualDeleteCount); assertEquals(deleteColumnCount, actualDeleteColumnCount); From 3a4f00865bac52a6867101e0c531428c9b409d8b Mon Sep 17 00:00:00 2001 From: Shubham Roy Date: Wed, 20 Nov 2024 10:42:46 +0530 Subject: [PATCH 13/15] Removed pom change. (cherry picked from commit 478ead401148fcf37311907d3ed8413ef8845ce3) --- hbase-mapreduce/pom.xml | 8 -------- 1 file changed, 8 deletions(-) diff --git a/hbase-mapreduce/pom.xml b/hbase-mapreduce/pom.xml index b1237bd569cc..40b6cae60da4 100644 --- a/hbase-mapreduce/pom.xml +++ b/hbase-mapreduce/pom.xml @@ -274,14 +274,6 @@ net.revelc.code warbucks-maven-plugin - - org.apache.maven.plugins - maven-compiler-plugin - - 8 - 8 - - From 19ced25c6409d715db53a7a2cfb8d1c7ab1f572d Mon Sep 17 00:00:00 2001 From: Shubham Roy Date: Wed, 20 Nov 2024 12:09:08 +0530 Subject: [PATCH 14/15] Added UT for range row case. (cherry picked from commit 3b103ef1fb93ee8a9c9f10b18dba76af116d1eed) --- .../hadoop/hbase/mapreduce/RowCounter.java | 2 +- .../hbase/mapreduce/TestRowCounter.java | 37 ++++++++++++++----- 2 files changed, 28 insertions(+), 11 deletions(-) diff --git a/hbase-mapreduce/src/main/java/org/apache/hadoop/hbase/mapreduce/RowCounter.java b/hbase-mapreduce/src/main/java/org/apache/hadoop/hbase/mapreduce/RowCounter.java index 097fd15f1b3d..ff27bd49f5f0 100644 --- a/hbase-mapreduce/src/main/java/org/apache/hadoop/hbase/mapreduce/RowCounter.java +++ b/hbase-mapreduce/src/main/java/org/apache/hadoop/hbase/mapreduce/RowCounter.java @@ -461,7 +461,7 @@ protected CommandLineParser newParser() { } // Visible for testing - public Job getMapReduceJob() { + Job getMapReduceJob() { return job; } diff --git a/hbase-mapreduce/src/test/java/org/apache/hadoop/hbase/mapreduce/TestRowCounter.java b/hbase-mapreduce/src/test/java/org/apache/hadoop/hbase/mapreduce/TestRowCounter.java index b8c0551b535c..559bdd3e2633 100644 --- a/hbase-mapreduce/src/test/java/org/apache/hadoop/hbase/mapreduce/TestRowCounter.java +++ b/hbase-mapreduce/src/test/java/org/apache/hadoop/hbase/mapreduce/TestRowCounter.java @@ -26,6 +26,7 @@ import java.io.PrintStream; import java.util.ArrayList; import java.util.Arrays; +import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.hbase.HBaseClassTestRule; import org.apache.hadoop.hbase.HBaseTestingUtility; import org.apache.hadoop.hbase.TableName; @@ -527,8 +528,9 @@ public void testInvalidTable() throws Exception { } /** - * Step 1: Add 8 rows(row1, row2, row3, row4, row5, row6, row7, row8) to a table. Each row - * contains 1 column family and 4 columns and values for two different timestamps - 5 & 10. + * Step 1: Add 10 rows(row1, row2, row3, row4, row5, row6, row7, row8, row9, row10) to a table. + * Each row contains 1 column family and 4 columns and values for two different timestamps - 5 & + * 10. *

* Step 2: Delete the latest version of column A for row1. --> 1 X Delete *

@@ -543,6 +545,9 @@ public void testInvalidTable() throws Exception { * Case 1: Run row counter without countDeleteMarkers and validate counter values. *

* Case 2: Run row counter with countDeleteMarkers flag and validate counter values. + *

+ * Case 3: Run row counter with countDeleteMarkers flag for a row range and validate counter + * values. */ @Test public void testRowCounterWithCountDeleteMarkersOption() throws Exception { @@ -550,9 +555,12 @@ public void testRowCounterWithCountDeleteMarkersOption() throws Exception { final TableName tableName = TableName.valueOf(TABLE_NAME + "_" + "withCountDeleteMarkersOption"); - final byte[][] rowKeys = { Bytes.toBytes("row1"), Bytes.toBytes("row2"), Bytes.toBytes("row3"), - Bytes.toBytes("row4"), Bytes.toBytes("row5"), Bytes.toBytes("row6"), Bytes.toBytes("row7"), - Bytes.toBytes("row8") }; + // Row keys are represented in this way because of HBASE-15287 + final byte[][] rowKeys = { Bytes.toBytesBinary("\\x00row1"), Bytes.toBytesBinary("\\x00row2"), + Bytes.toBytesBinary("\\x00row3"), Bytes.toBytesBinary("\\x00row4"), + Bytes.toBytesBinary("\\x00row5"), Bytes.toBytesBinary("\\x00row6"), + Bytes.toBytesBinary("\\x00row7"), Bytes.toBytesBinary("\\x00row8"), + Bytes.toBytesBinary("\\x00row9"), Bytes.toBytesBinary("\\x00row10") }; final byte[] columnFamily = Bytes.toBytes("cf"); final byte[][] columns = { Bytes.toBytes("A"), Bytes.toBytes("B"), Bytes.toBytes("C"), Bytes.toBytes("D") }; @@ -596,24 +604,33 @@ public void testRowCounterWithCountDeleteMarkersOption() throws Exception { RowCounter rowCounterWithoutCountDeleteMarkers = new RowCounter(); RowCounter rowCounterWithCountDeleteMarkers = new RowCounter(); - rowCounterWithoutCountDeleteMarkers.setConf(TEST_UTIL.getConfiguration()); - rowCounterWithCountDeleteMarkers.setConf(TEST_UTIL.getConfiguration()); + RowCounter rowCounterForRangeWithCountDeleteMarkers = new RowCounter(); + rowCounterWithoutCountDeleteMarkers.setConf(new Configuration(TEST_UTIL.getConfiguration())); + rowCounterWithCountDeleteMarkers.setConf(new Configuration(TEST_UTIL.getConfiguration())); + rowCounterForRangeWithCountDeleteMarkers + .setConf(new Configuration(TEST_UTIL.getConfiguration())); // Invocation rowCounterWithoutCountDeleteMarkers.run(new String[] { tableName.getNameAsString() }); rowCounterWithCountDeleteMarkers .run(new String[] { tableName.getNameAsString(), "--countDeleteMarkers" }); + rowCounterForRangeWithCountDeleteMarkers.run(new String[] { tableName.getNameAsString(), + "--countDeleteMarkers", "--range=\\x00row8,\\x00row9" }); // Validation // Case 1: - validateCounterCounts(rowCounterWithoutCountDeleteMarkers.getMapReduceJob().getCounters(), 6, 0, + validateCounterCounts(rowCounterWithoutCountDeleteMarkers.getMapReduceJob().getCounters(), 8, 0, 0, 0, 0, 0); // Case 2: - validateCounterCounts(rowCounterWithCountDeleteMarkers.getMapReduceJob().getCounters(), 8, 7, 2, - 3, 2, 1); + validateCounterCounts(rowCounterWithCountDeleteMarkers.getMapReduceJob().getCounters(), 10, 7, + 2, 3, 2, 1); + + // Case 3: + validateCounterCounts(rowCounterForRangeWithCountDeleteMarkers.getMapReduceJob().getCounters(), + 1, 0, 0, 0, 0, 0); } private void validateCounterCounts(Counters counters, long rowCount, From 1568fc8a751d96ac836a15a5b3e736f03b3966ae Mon Sep 17 00:00:00 2001 From: Shubham Roy Date: Thu, 21 Nov 2024 16:14:45 +0530 Subject: [PATCH 15/15] Added annotation. (cherry picked from commit 947662246af6552fe919542dc6efcd462850458f) --- .../java/org/apache/hadoop/hbase/mapreduce/RowCounter.java | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/hbase-mapreduce/src/main/java/org/apache/hadoop/hbase/mapreduce/RowCounter.java b/hbase-mapreduce/src/main/java/org/apache/hadoop/hbase/mapreduce/RowCounter.java index ff27bd49f5f0..88337ebedb76 100644 --- a/hbase-mapreduce/src/main/java/org/apache/hadoop/hbase/mapreduce/RowCounter.java +++ b/hbase-mapreduce/src/main/java/org/apache/hadoop/hbase/mapreduce/RowCounter.java @@ -17,6 +17,7 @@ */ package org.apache.hadoop.hbase.mapreduce; +import com.google.errorprone.annotations.RestrictedApi; import java.io.IOException; import java.util.ArrayList; import java.util.List; @@ -460,7 +461,8 @@ protected CommandLineParser newParser() { return new RowCounterCommandLineParser(); } - // Visible for testing + @RestrictedApi(explanation = "Only visible for testing", link = "", + allowedOnPath = ".*/src/test/.*") Job getMapReduceJob() { return job; }