From 58dcee01590d2364348066f0648a3cabacfec94c Mon Sep 17 00:00:00 2001 From: Mihir Monani Date: Fri, 9 Feb 2024 20:11:20 -0800 Subject: [PATCH 1/2] HBASE-28204 : Canary can take lot more time If any region (except the first region) starts with delete markers --- .../apache/hadoop/hbase/tool/CanaryTool.java | 31 ++++++++++++++----- 1 file changed, 23 insertions(+), 8 deletions(-) diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/tool/CanaryTool.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/tool/CanaryTool.java index d5676263c820..082d2ac1e681 100644 --- a/hbase-server/src/main/java/org/apache/hadoop/hbase/tool/CanaryTool.java +++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/tool/CanaryTool.java @@ -510,19 +510,38 @@ public Void call() { private Void readColumnFamily(Table table, ColumnFamilyDescriptor column) { byte[] startKey = null; - Get get = null; Scan scan = null; ResultScanner rs = null; StopWatch stopWatch = new StopWatch(); startKey = region.getStartKey(); // Can't do a get on empty start row so do a Scan of first element if any instead. if (startKey.length > 0) { - get = new Get(startKey); + Get get = new Get(startKey); get.setCacheBlocks(false); get.setFilter(new FirstKeyOnlyFilter()); get.addFamily(column.getName()); + // Converting get object to scan to enable RAW SCAN. + // This will work for all the regions of the HBase tables except first region of the table. + scan = new Scan(get); + scan.setRaw(rawScanEnabled); } else { scan = new Scan(); + // In case of first region of the HBase Table, we do not have start-key for the region. + // For Region Canary, we only need scan a single row/cell in the region to make sure that + // region is accessible. + // + // When HBase table has more than 1 empty regions at start of the row-key space, Canary will + // create multiple scan object to find first available row in the table by scanning all the + // regions in sequence until it can find first available row. + // + // This could result in multiple millions of scans based on the size of table and number of + // empty regions in sequence. In test environment, A table no data and 1000 empty regions, + // Single canary run was creating close to half million to 1 million scans to successfully + // do canary run for the table. + // + // Since First region of the table doesn't have any start key, We should set End Key as + // stop row and set inclusive=false to limit scan to single region only. + scan.withStopRow(region.getEndKey(), false); LOG.debug("rawScan {} for {}", rawScanEnabled, region.getTable()); scan.setRaw(rawScanEnabled); scan.setCaching(1); @@ -536,12 +555,8 @@ private Void readColumnFamily(Table table, ColumnFamilyDescriptor column) { column.getNameAsString(), Bytes.toStringBinary(startKey)); try { stopWatch.start(); - if (startKey.length > 0) { - table.get(get); - } else { - rs = table.getScanner(scan); - rs.next(); - } + rs = table.getScanner(scan); + rs.next(); stopWatch.stop(); this.readWriteLatency.add(stopWatch.getTime()); sink.publishReadTiming(serverName, region, column, stopWatch.getTime()); From 222b34f8e45ee48321d9a0445696bd760ea2c2c6 Mon Sep 17 00:00:00 2001 From: Mihir Monani Date: Sat, 10 Feb 2024 10:12:58 -0800 Subject: [PATCH 2/2] Added more comments --- .../org/apache/hadoop/hbase/tool/CanaryTool.java | 14 ++++++++++---- 1 file changed, 10 insertions(+), 4 deletions(-) diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/tool/CanaryTool.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/tool/CanaryTool.java index 082d2ac1e681..a3caf1b24c74 100644 --- a/hbase-server/src/main/java/org/apache/hadoop/hbase/tool/CanaryTool.java +++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/tool/CanaryTool.java @@ -527,7 +527,7 @@ private Void readColumnFamily(Table table, ColumnFamilyDescriptor column) { } else { scan = new Scan(); // In case of first region of the HBase Table, we do not have start-key for the region. - // For Region Canary, we only need scan a single row/cell in the region to make sure that + // For Region Canary, we only need to scan a single row/cell in the region to make sure that // region is accessible. // // When HBase table has more than 1 empty regions at start of the row-key space, Canary will @@ -535,12 +535,18 @@ private Void readColumnFamily(Table table, ColumnFamilyDescriptor column) { // regions in sequence until it can find first available row. // // This could result in multiple millions of scans based on the size of table and number of - // empty regions in sequence. In test environment, A table no data and 1000 empty regions, - // Single canary run was creating close to half million to 1 million scans to successfully - // do canary run for the table. + // empty regions in sequence. In test environment, A table with no data and 1100 empty + // regions, Single canary run was creating close to half million to 1 million scans to + // successfully do canary run for the table. // // Since First region of the table doesn't have any start key, We should set End Key as // stop row and set inclusive=false to limit scan to single region only. + // + // TODO : In future, we can streamline Canary behaviour for all the regions by doing scan + // with startRow inclusive and stopRow exclusive instead of different behaviour for First + // Region of the table and rest of the region of the table. This way implementation is + // simplified. As of now this change has been kept minimal to avoid any unnecessary + // perf impact. scan.withStopRow(region.getEndKey(), false); LOG.debug("rawScan {} for {}", rawScanEnabled, region.getTable()); scan.setRaw(rawScanEnabled);