From e861e335b7aecac67d8bb2b444019d4e70ea93d0 Mon Sep 17 00:00:00 2001 From: Rajeshbabu Chintaguntla Date: Thu, 11 May 2023 08:08:43 +0530 Subject: [PATCH] HBASE-27793 Make HBCK be able to report unknown servers --- .../apache/hadoop/hbase/util/HBaseFsck.java | 14 ++++++++++++ .../hadoop/hbase/util/HbckErrorReporter.java | 3 ++- .../apache/hadoop/hbase/client/TestAdmin.java | 22 +++++++++++++++++++ 3 files changed, 38 insertions(+), 1 deletion(-) diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/util/HBaseFsck.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/util/HBaseFsck.java index 7e10fd786a45..9e012ba45d0f 100644 --- a/hbase-server/src/main/java/org/apache/hadoop/hbase/util/HBaseFsck.java +++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/util/HBaseFsck.java @@ -655,6 +655,8 @@ public int onlineConsistencyRepair() throws IOException, KeeperException, Interr loadDeployedRegions(); // check whether hbase:meta is deployed and online recordMetaRegion(); + // Report inconsistencies if there are any unknown servers. + reportUnknownServers(); // Check if hbase:meta is found only once and in the right place if (!checkMetaRegion()) { String errorMsg = "hbase:meta table is not consistent. "; @@ -707,6 +709,18 @@ public int onlineConsistencyRepair() throws IOException, KeeperException, Interr return errors.getErrorList().size(); } + private void reportUnknownServers() throws IOException { + List unknownServers = admin.listUnknownServers(); + if (!unknownServers.isEmpty()) { + unknownServers.stream().forEach(serverName -> { + errors.reportError(ERROR_CODE.UNKNOWN_SERVER, + "Found unknown server," + + "some of the regions held by this server may not get assigned. " + + String.format("Use HBCK2 scheduleRecoveries %s to recover.", serverName)); + }); + } + } + /** * This method maintains an ephemeral znode. If the creation fails we return false or throw * exception diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/util/HbckErrorReporter.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/util/HbckErrorReporter.java index 0735809424ed..11f99aa53824 100644 --- a/hbase-server/src/main/java/org/apache/hadoop/hbase/util/HbckErrorReporter.java +++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/util/HbckErrorReporter.java @@ -65,7 +65,8 @@ enum ERROR_CODE { UNDELETED_REPLICATION_QUEUE, DUPE_ENDKEYS, UNSUPPORTED_OPTION, - INVALID_TABLE + INVALID_TABLE, + UNKNOWN_SERVER } void clear(); diff --git a/hbase-server/src/test/java/org/apache/hadoop/hbase/client/TestAdmin.java b/hbase-server/src/test/java/org/apache/hadoop/hbase/client/TestAdmin.java index ff928dc90b51..68a841b7d671 100644 --- a/hbase-server/src/test/java/org/apache/hadoop/hbase/client/TestAdmin.java +++ b/hbase-server/src/test/java/org/apache/hadoop/hbase/client/TestAdmin.java @@ -40,6 +40,9 @@ import org.apache.hadoop.hbase.TableExistsException; import org.apache.hadoop.hbase.TableName; import org.apache.hadoop.hbase.TableNotFoundException; +import org.apache.hadoop.hbase.master.HMaster; +import org.apache.hadoop.hbase.master.assignment.AssignmentManager; +import org.apache.hadoop.hbase.master.assignment.RegionStateNode; import org.apache.hadoop.hbase.regionserver.storefiletracker.StoreFileTrackerFactory; import org.apache.hadoop.hbase.testclassification.ClientTests; import org.apache.hadoop.hbase.testclassification.LargeTests; @@ -552,4 +555,23 @@ public void testOnlineChangeTableSchema() throws IOException, InterruptedExcepti ADMIN.listTableDescriptors(); assertFalse(ADMIN.tableExists(tableName)); } + + @Test + public void testUnknownServers() throws Exception { + TableName table = TableName.valueOf(name.getMethodName()); + ColumnFamilyDescriptor cfd = ColumnFamilyDescriptorBuilder.of(HConstants.CATALOG_FAMILY); + ADMIN.createTable(TableDescriptorBuilder.newBuilder(table).setColumnFamily(cfd).build()); + final List regions = ADMIN.getRegions(table); + HMaster master = TEST_UTIL.getHBaseCluster().getMaster(); + final AssignmentManager am = master.getAssignmentManager(); + RegionStateNode rsNode = am.getRegionStates().getRegionStateNode(regions.get(0)); + ServerName regionLocation = rsNode.getRegionLocation(); + rsNode.setRegionLocation(ServerName.valueOf("dummyserver", 1234, System.currentTimeMillis())); + try { + assertTrue(ADMIN.listUnknownServers().get(0).getHostname().equals("dummyserver")); + } finally { + rsNode.setRegionLocation(regionLocation); + } + assertTrue(ADMIN.listUnknownServers().isEmpty()); + } }