From b14e28ce9109f4c45c93896fa98c7474f291128f Mon Sep 17 00:00:00 2001 From: huiruan Date: Tue, 16 Aug 2022 21:31:17 +0800 Subject: [PATCH 1/2] add an option to skip file splitting when bulkload hfiles --- .../hadoop/hbase/tool/BulkLoadHFilesTool.java | 10 +++++++ .../hadoop/hbase/tool/TestBulkLoadHFiles.java | 28 +++++++++++++++++++ 2 files changed, 38 insertions(+) diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/tool/BulkLoadHFilesTool.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/tool/BulkLoadHFilesTool.java index d1c99fc6334e..0ff42c02ff6b 100644 --- a/hbase-server/src/main/java/org/apache/hadoop/hbase/tool/BulkLoadHFilesTool.java +++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/tool/BulkLoadHFilesTool.java @@ -124,6 +124,9 @@ public class BulkLoadHFilesTool extends Configured implements BulkLoadHFiles, To */ public static final String BULK_LOAD_HFILES_BY_FAMILY = "hbase.mapreduce.bulkload.by.family"; + public static final String SKIP_STORE_FILE_SPLITTING = + "hbase.loadincremental.skip.storefile.splitting"; + // We use a '.' prefix which is ignored when walking directory trees // above. It is invalid family name. static final String TMP_DIR = ".tmp"; @@ -141,6 +144,7 @@ public class BulkLoadHFilesTool extends Configured implements BulkLoadHFiles, To private List clusterIds = new ArrayList<>(); private boolean replicate = true; + private boolean skipStoreFileSplitting = false; public BulkLoadHFilesTool(Configuration conf) { // make a copy, just to be sure we're not overriding someone else's config @@ -159,6 +163,7 @@ public void initialize() { nrThreads = conf.getInt("hbase.loadincremental.threads.max", Runtime.getRuntime().availableProcessors()); bulkLoadByFamily = conf.getBoolean(BULK_LOAD_HFILES_BY_FAMILY, false); + skipStoreFileSplitting = conf.getBoolean(SKIP_STORE_FILE_SPLITTING, false); } // Initialize a thread pool @@ -699,6 +704,11 @@ CacheConfig.DISABLED, true, getConf())) { Bytes.compareTo(last.get(), startEndKeys.get(firstKeyRegionIdx).getSecond()) < 0 || Bytes .equals(startEndKeys.get(firstKeyRegionIdx).getSecond(), HConstants.EMPTY_BYTE_ARRAY); if (!lastKeyInRange) { + if (skipStoreFileSplitting) { + throw new IOException("The key range of hfile=" + hfilePath + " fits into no region. " + + "And because " + SKIP_STORE_FILE_SPLITTING + " was set to true, " + + "we just skip the next steps."); + } int lastKeyRegionIdx = getRegionIndex(startEndKeys, last.get()); int splitIdx = (firstKeyRegionIdx + lastKeyRegionIdx) / 2; // make sure the splitPoint is valid in case region overlap occur, maybe the splitPoint bigger diff --git a/hbase-server/src/test/java/org/apache/hadoop/hbase/tool/TestBulkLoadHFiles.java b/hbase-server/src/test/java/org/apache/hadoop/hbase/tool/TestBulkLoadHFiles.java index f15ba688b13b..5a074f0c9935 100644 --- a/hbase-server/src/test/java/org/apache/hadoop/hbase/tool/TestBulkLoadHFiles.java +++ b/hbase-server/src/test/java/org/apache/hadoop/hbase/tool/TestBulkLoadHFiles.java @@ -20,6 +20,7 @@ import static org.apache.hadoop.hbase.HBaseTestingUtil.countRows; import static org.junit.Assert.assertArrayEquals; import static org.junit.Assert.assertEquals; +import static org.junit.Assert.assertThrows; import static org.junit.Assert.assertTrue; import static org.junit.Assert.fail; @@ -782,4 +783,31 @@ protected CompletableFuture> tryAtomicRegionLoad( util.getConfiguration().setBoolean(BulkLoadHFilesTool.BULK_LOAD_HFILES_BY_FAMILY, false); } } + + @Test + public void testSkipStoreFileSplitting() throws IOException { + TableName tableName = TableName.valueOf(tn.getMethodName()); + Table table = util.createTable(tableName, FAMILY); + + util.loadTable(table, FAMILY); + + FileSystem fs = util.getTestFileSystem(); + Path sfPath = new Path(fs.getWorkingDirectory(), new Path(Bytes.toString(FAMILY), "file")); + HFileTestUtil.createHFile(util.getConfiguration(), fs, sfPath, FAMILY, QUALIFIER, + Bytes.toBytes("aaa"), Bytes.toBytes("zzz"), 1000); + + + util.getAdmin().split(tableName); + util.waitFor(10000, 1000, () -> util.getAdmin().getRegions(tableName).size() > 1); + + Configuration config = util.getConfiguration(); + config.setBoolean(BulkLoadHFilesTool.SKIP_STORE_FILE_SPLITTING, true); + BulkLoadHFilesTool tool = new BulkLoadHFilesTool(config); + + String[] args = new String[] { fs.getWorkingDirectory().toString(), tableName.toString() }; + assertThrows(IOException.class, () -> tool.run(args)); + util.getHBaseCluster().getRegions(tableName).forEach(r -> + assertEquals(1, r.getStore(FAMILY).getStorefiles().size()) + ); + } } From 259776bed74a72685a6317babf707ffa89a4b790 Mon Sep 17 00:00:00 2001 From: huiruan Date: Wed, 17 Aug 2022 21:44:03 +0800 Subject: [PATCH 2/2] rename skipStoreFileSplitting to failIfNeedSplitHFile --- .../hadoop/hbase/tool/BulkLoadHFilesTool.java | 16 ++++++++-------- .../hadoop/hbase/tool/TestBulkLoadHFiles.java | 12 +++++------- 2 files changed, 13 insertions(+), 15 deletions(-) diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/tool/BulkLoadHFilesTool.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/tool/BulkLoadHFilesTool.java index 0ff42c02ff6b..06f97cf0aff6 100644 --- a/hbase-server/src/main/java/org/apache/hadoop/hbase/tool/BulkLoadHFilesTool.java +++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/tool/BulkLoadHFilesTool.java @@ -124,8 +124,8 @@ public class BulkLoadHFilesTool extends Configured implements BulkLoadHFiles, To */ public static final String BULK_LOAD_HFILES_BY_FAMILY = "hbase.mapreduce.bulkload.by.family"; - public static final String SKIP_STORE_FILE_SPLITTING = - "hbase.loadincremental.skip.storefile.splitting"; + public static final String FAIL_IF_NEED_SPLIT_HFILE = + "hbase.loadincremental.fail.if.need.split.hfile"; // We use a '.' prefix which is ignored when walking directory trees // above. It is invalid family name. @@ -144,7 +144,7 @@ public class BulkLoadHFilesTool extends Configured implements BulkLoadHFiles, To private List clusterIds = new ArrayList<>(); private boolean replicate = true; - private boolean skipStoreFileSplitting = false; + private boolean failIfNeedSplitHFile = false; public BulkLoadHFilesTool(Configuration conf) { // make a copy, just to be sure we're not overriding someone else's config @@ -163,7 +163,7 @@ public void initialize() { nrThreads = conf.getInt("hbase.loadincremental.threads.max", Runtime.getRuntime().availableProcessors()); bulkLoadByFamily = conf.getBoolean(BULK_LOAD_HFILES_BY_FAMILY, false); - skipStoreFileSplitting = conf.getBoolean(SKIP_STORE_FILE_SPLITTING, false); + failIfNeedSplitHFile = conf.getBoolean(FAIL_IF_NEED_SPLIT_HFILE, false); } // Initialize a thread pool @@ -704,10 +704,10 @@ CacheConfig.DISABLED, true, getConf())) { Bytes.compareTo(last.get(), startEndKeys.get(firstKeyRegionIdx).getSecond()) < 0 || Bytes .equals(startEndKeys.get(firstKeyRegionIdx).getSecond(), HConstants.EMPTY_BYTE_ARRAY); if (!lastKeyInRange) { - if (skipStoreFileSplitting) { - throw new IOException("The key range of hfile=" + hfilePath + " fits into no region. " - + "And because " + SKIP_STORE_FILE_SPLITTING + " was set to true, " - + "we just skip the next steps."); + if (failIfNeedSplitHFile) { + throw new IOException( + "The key range of hfile=" + hfilePath + " fits into no region. " + "And because " + + FAIL_IF_NEED_SPLIT_HFILE + " was set to true, we just skip the next steps."); } int lastKeyRegionIdx = getRegionIndex(startEndKeys, last.get()); int splitIdx = (firstKeyRegionIdx + lastKeyRegionIdx) / 2; diff --git a/hbase-server/src/test/java/org/apache/hadoop/hbase/tool/TestBulkLoadHFiles.java b/hbase-server/src/test/java/org/apache/hadoop/hbase/tool/TestBulkLoadHFiles.java index 5a074f0c9935..591d807c0da4 100644 --- a/hbase-server/src/test/java/org/apache/hadoop/hbase/tool/TestBulkLoadHFiles.java +++ b/hbase-server/src/test/java/org/apache/hadoop/hbase/tool/TestBulkLoadHFiles.java @@ -785,7 +785,7 @@ protected CompletableFuture> tryAtomicRegionLoad( } @Test - public void testSkipStoreFileSplitting() throws IOException { + public void testFailIfNeedSplitHFile() throws IOException { TableName tableName = TableName.valueOf(tn.getMethodName()); Table table = util.createTable(tableName, FAMILY); @@ -796,18 +796,16 @@ public void testSkipStoreFileSplitting() throws IOException { HFileTestUtil.createHFile(util.getConfiguration(), fs, sfPath, FAMILY, QUALIFIER, Bytes.toBytes("aaa"), Bytes.toBytes("zzz"), 1000); - util.getAdmin().split(tableName); util.waitFor(10000, 1000, () -> util.getAdmin().getRegions(tableName).size() > 1); - Configuration config = util.getConfiguration(); - config.setBoolean(BulkLoadHFilesTool.SKIP_STORE_FILE_SPLITTING, true); + Configuration config = new Configuration(util.getConfiguration()); + config.setBoolean(BulkLoadHFilesTool.FAIL_IF_NEED_SPLIT_HFILE, true); BulkLoadHFilesTool tool = new BulkLoadHFilesTool(config); String[] args = new String[] { fs.getWorkingDirectory().toString(), tableName.toString() }; assertThrows(IOException.class, () -> tool.run(args)); - util.getHBaseCluster().getRegions(tableName).forEach(r -> - assertEquals(1, r.getStore(FAMILY).getStorefiles().size()) - ); + util.getHBaseCluster().getRegions(tableName) + .forEach(r -> assertEquals(1, r.getStore(FAMILY).getStorefiles().size())); } }