From 781bc1175862142cb3945fee0f121f3524e6341d Mon Sep 17 00:00:00 2001 From: Victor Li Date: Tue, 25 May 2021 15:40:41 -0700 Subject: [PATCH 1/9] HBASE-25910: Fix the port assignment test by: disabling the port auto assignment and check the exception root cause against BindException. --- .../hbase/regionserver/HRegionServer.java | 1 + .../hbase/TestClusterPortAssignment.java | 22 ++++++++++++++----- 2 files changed, 17 insertions(+), 6 deletions(-) diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/HRegionServer.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/HRegionServer.java index 1e23591d51da..52ac42a8f31f 100644 --- a/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/HRegionServer.java +++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/HRegionServer.java @@ -2268,6 +2268,7 @@ private void putUpWebUI() throws IOException { // auto bind enabled, try to use another port LOG.info("Failed binding http info server to port: " + port); port++; + LOG.info("Retry starting http info server with port: " + port); } } port = this.infoServer.getPort(); diff --git a/hbase-server/src/test/java/org/apache/hadoop/hbase/TestClusterPortAssignment.java b/hbase-server/src/test/java/org/apache/hadoop/hbase/TestClusterPortAssignment.java index 933cdc661d10..1da0285daee6 100644 --- a/hbase-server/src/test/java/org/apache/hadoop/hbase/TestClusterPortAssignment.java +++ b/hbase-server/src/test/java/org/apache/hadoop/hbase/TestClusterPortAssignment.java @@ -19,7 +19,10 @@ import static org.junit.Assert.assertEquals; import static org.junit.Assert.assertTrue; +import static org.junit.Assert.fail; +import java.lang.Throwable; import java.net.BindException; +import org.apache.commons.lang3.exception.ExceptionUtils; import org.apache.hadoop.hbase.testclassification.MediumTests; import org.junit.ClassRule; import org.junit.Test; @@ -27,7 +30,6 @@ import org.slf4j.Logger; import org.slf4j.LoggerFactory; -@org.junit.Ignore // See HBASE-24342. This test can't pass 100% of time as written so disabling @Category(MediumTests.class) public class TestClusterPortAssignment { @ClassRule @@ -44,16 +46,19 @@ public class TestClusterPortAssignment { @Test public void testClusterPortAssignment() throws Exception { boolean retry = false; + int retryCount = 0; do { int masterPort = HBaseTestingUtility.randomFreePort(); int masterInfoPort = HBaseTestingUtility.randomFreePort(); int rsPort = HBaseTestingUtility.randomFreePort(); int rsInfoPort = HBaseTestingUtility.randomFreePort(); TEST_UTIL.getConfiguration().setBoolean(LocalHBaseCluster.ASSIGN_RANDOM_PORTS, false); + TEST_UTIL.getConfiguration().setBoolean(HConstants.REGIONSERVER_INFO_PORT_AUTO, false); TEST_UTIL.getConfiguration().setInt(HConstants.MASTER_PORT, masterPort); TEST_UTIL.getConfiguration().setInt(HConstants.MASTER_INFO_PORT, masterInfoPort); TEST_UTIL.getConfiguration().setInt(HConstants.REGIONSERVER_PORT, rsPort); TEST_UTIL.getConfiguration().setInt(HConstants.REGIONSERVER_INFO_PORT, rsInfoPort); + LOG.info("Ports: " + masterPort + ", " + masterInfoPort + ", " + rsPort + ", " + rsInfoPort); try { MiniHBaseCluster cluster = TEST_UTIL.startMiniCluster(); assertTrue("Cluster failed to come up", cluster.waitForActiveAndReadyMaster(30000)); @@ -67,17 +72,22 @@ public void testClusterPortAssignment() throws Exception { assertEquals("RS info port is incorrect", rsInfoPort, cluster.getRegionServer(0).getInfoServer().getPort()); } catch (Exception e) { - if (e instanceof BindException || e.getCause() != null && - (e.getCause() instanceof BindException || e.getCause().getCause() != null && - e.getCause().getCause() instanceof BindException)) { + Throwable rootCause = ExceptionUtils.getRootCause(e); + if (rootCause instanceof BindException) { LOG.info("Failed bind, need to retry", e); retry = true; + retryCount++; } else { - throw e; + retry = false; + fail("Failed to start mini cluster" + e); } } finally { TEST_UTIL.shutdownMiniCluster(); } - } while (retry); + } while (retry && retryCount < 10); + + if (retry) { + fail("Retry exhausted."); + } } } From a428bb2b80fbb8cf6dd1d1ed59db5d81ee52a58a Mon Sep 17 00:00:00 2001 From: Victor Li Date: Tue, 25 May 2021 18:51:35 -0700 Subject: [PATCH 2/9] Add retry test with hijacking a port: test succeeded with hdfs cache being disabled. --- .../hadoop/hbase/TestClusterPortAssignment.java | 16 ++++++++++++++++ 1 file changed, 16 insertions(+) diff --git a/hbase-server/src/test/java/org/apache/hadoop/hbase/TestClusterPortAssignment.java b/hbase-server/src/test/java/org/apache/hadoop/hbase/TestClusterPortAssignment.java index 1da0285daee6..a1a4f62f15b2 100644 --- a/hbase-server/src/test/java/org/apache/hadoop/hbase/TestClusterPortAssignment.java +++ b/hbase-server/src/test/java/org/apache/hadoop/hbase/TestClusterPortAssignment.java @@ -22,6 +22,7 @@ import static org.junit.Assert.fail; import java.lang.Throwable; import java.net.BindException; +import java.net.ServerSocket; import org.apache.commons.lang3.exception.ExceptionUtils; import org.apache.hadoop.hbase.testclassification.MediumTests; import org.junit.ClassRule; @@ -54,11 +55,20 @@ public void testClusterPortAssignment() throws Exception { int rsInfoPort = HBaseTestingUtility.randomFreePort(); TEST_UTIL.getConfiguration().setBoolean(LocalHBaseCluster.ASSIGN_RANDOM_PORTS, false); TEST_UTIL.getConfiguration().setBoolean(HConstants.REGIONSERVER_INFO_PORT_AUTO, false); + TEST_UTIL.getConfiguration().setBoolean("fs.hdfs.impl.disable.cache", true); TEST_UTIL.getConfiguration().setInt(HConstants.MASTER_PORT, masterPort); TEST_UTIL.getConfiguration().setInt(HConstants.MASTER_INFO_PORT, masterInfoPort); TEST_UTIL.getConfiguration().setInt(HConstants.REGIONSERVER_PORT, rsPort); TEST_UTIL.getConfiguration().setInt(HConstants.REGIONSERVER_INFO_PORT, rsInfoPort); LOG.info("Ports: " + masterPort + ", " + masterInfoPort + ", " + rsPort + ", " + rsInfoPort); + ServerSocket sock = null; + if (retryCount < 2) { + try { + LOG.info("Hijack port: " + rsInfoPort); + sock = new ServerSocket(rsInfoPort); + } catch (Exception e) { + } + } try { MiniHBaseCluster cluster = TEST_UTIL.startMiniCluster(); assertTrue("Cluster failed to come up", cluster.waitForActiveAndReadyMaster(30000)); @@ -84,6 +94,12 @@ public void testClusterPortAssignment() throws Exception { } finally { TEST_UTIL.shutdownMiniCluster(); } + if (sock != null) { + try { + sock.close(); + } catch (Exception e) { + } + } } while (retry && retryCount < 10); if (retry) { From c1862c923f6e70654795c4583f532c93acfc49a0 Mon Sep 17 00:00:00 2001 From: Victor Li Date: Tue, 25 May 2021 18:55:45 -0700 Subject: [PATCH 3/9] Removed test code for this test case. --- .../hadoop/hbase/TestClusterPortAssignment.java | 15 --------------- 1 file changed, 15 deletions(-) diff --git a/hbase-server/src/test/java/org/apache/hadoop/hbase/TestClusterPortAssignment.java b/hbase-server/src/test/java/org/apache/hadoop/hbase/TestClusterPortAssignment.java index a1a4f62f15b2..d67f399820c5 100644 --- a/hbase-server/src/test/java/org/apache/hadoop/hbase/TestClusterPortAssignment.java +++ b/hbase-server/src/test/java/org/apache/hadoop/hbase/TestClusterPortAssignment.java @@ -22,7 +22,6 @@ import static org.junit.Assert.fail; import java.lang.Throwable; import java.net.BindException; -import java.net.ServerSocket; import org.apache.commons.lang3.exception.ExceptionUtils; import org.apache.hadoop.hbase.testclassification.MediumTests; import org.junit.ClassRule; @@ -61,14 +60,6 @@ public void testClusterPortAssignment() throws Exception { TEST_UTIL.getConfiguration().setInt(HConstants.REGIONSERVER_PORT, rsPort); TEST_UTIL.getConfiguration().setInt(HConstants.REGIONSERVER_INFO_PORT, rsInfoPort); LOG.info("Ports: " + masterPort + ", " + masterInfoPort + ", " + rsPort + ", " + rsInfoPort); - ServerSocket sock = null; - if (retryCount < 2) { - try { - LOG.info("Hijack port: " + rsInfoPort); - sock = new ServerSocket(rsInfoPort); - } catch (Exception e) { - } - } try { MiniHBaseCluster cluster = TEST_UTIL.startMiniCluster(); assertTrue("Cluster failed to come up", cluster.waitForActiveAndReadyMaster(30000)); @@ -94,12 +85,6 @@ public void testClusterPortAssignment() throws Exception { } finally { TEST_UTIL.shutdownMiniCluster(); } - if (sock != null) { - try { - sock.close(); - } catch (Exception e) { - } - } } while (retry && retryCount < 10); if (retry) { From b734923414cddb6d88ca91fc4d09b055b9482991 Mon Sep 17 00:00:00 2001 From: Victor Li Date: Wed, 26 May 2021 11:05:32 -0700 Subject: [PATCH 4/9] Fix checkstyle warning by removing import of Throwable. --- .../java/org/apache/hadoop/hbase/TestClusterPortAssignment.java | 1 - 1 file changed, 1 deletion(-) diff --git a/hbase-server/src/test/java/org/apache/hadoop/hbase/TestClusterPortAssignment.java b/hbase-server/src/test/java/org/apache/hadoop/hbase/TestClusterPortAssignment.java index d67f399820c5..4c212a01692e 100644 --- a/hbase-server/src/test/java/org/apache/hadoop/hbase/TestClusterPortAssignment.java +++ b/hbase-server/src/test/java/org/apache/hadoop/hbase/TestClusterPortAssignment.java @@ -20,7 +20,6 @@ import static org.junit.Assert.assertEquals; import static org.junit.Assert.assertTrue; import static org.junit.Assert.fail; -import java.lang.Throwable; import java.net.BindException; import org.apache.commons.lang3.exception.ExceptionUtils; import org.apache.hadoop.hbase.testclassification.MediumTests; From 6a8f56f2eb32207d50bde10cb19cffab1e293814 Mon Sep 17 00:00:00 2001 From: Victor Li Date: Thu, 27 May 2021 10:25:41 -0700 Subject: [PATCH 5/9] Address comments with a retry test. --- .../hbase/TestClusterPortAssignment.java | 28 +++++++++++++------ 1 file changed, 19 insertions(+), 9 deletions(-) diff --git a/hbase-server/src/test/java/org/apache/hadoop/hbase/TestClusterPortAssignment.java b/hbase-server/src/test/java/org/apache/hadoop/hbase/TestClusterPortAssignment.java index 4c212a01692e..616745201788 100644 --- a/hbase-server/src/test/java/org/apache/hadoop/hbase/TestClusterPortAssignment.java +++ b/hbase-server/src/test/java/org/apache/hadoop/hbase/TestClusterPortAssignment.java @@ -21,6 +21,7 @@ import static org.junit.Assert.assertTrue; import static org.junit.Assert.fail; import java.net.BindException; +import java.net.ServerSocket; import org.apache.commons.lang3.exception.ExceptionUtils; import org.apache.hadoop.hbase.testclassification.MediumTests; import org.junit.ClassRule; @@ -45,7 +46,6 @@ public class TestClusterPortAssignment { @Test public void testClusterPortAssignment() throws Exception { boolean retry = false; - int retryCount = 0; do { int masterPort = HBaseTestingUtility.randomFreePort(); int masterInfoPort = HBaseTestingUtility.randomFreePort(); @@ -58,7 +58,15 @@ public void testClusterPortAssignment() throws Exception { TEST_UTIL.getConfiguration().setInt(HConstants.MASTER_INFO_PORT, masterInfoPort); TEST_UTIL.getConfiguration().setInt(HConstants.REGIONSERVER_PORT, rsPort); TEST_UTIL.getConfiguration().setInt(HConstants.REGIONSERVER_INFO_PORT, rsInfoPort); - LOG.info("Ports: " + masterPort + ", " + masterInfoPort + ", " + rsPort + ", " + rsInfoPort); + LOG.info("Ports: {}, {}, {}, {}", masterPort, masterInfoPort, rsPort, rsInfoPort); + ServerSocket sock = null; + if (!retry) { + try { + LOG.info("Hijack port: " + rsInfoPort); + sock = new ServerSocket(rsInfoPort); + } catch (Exception e) { + } + } try { MiniHBaseCluster cluster = TEST_UTIL.startMiniCluster(); assertTrue("Cluster failed to come up", cluster.waitForActiveAndReadyMaster(30000)); @@ -76,18 +84,20 @@ public void testClusterPortAssignment() throws Exception { if (rootCause instanceof BindException) { LOG.info("Failed bind, need to retry", e); retry = true; - retryCount++; } else { + LOG.info("Failed to start mini cluster" + e); retry = false; - fail("Failed to start mini cluster" + e); + fail("Failed to start mini cluster with assigned ports."); } } finally { TEST_UTIL.shutdownMiniCluster(); } - } while (retry && retryCount < 10); - - if (retry) { - fail("Retry exhausted."); - } + if (sock != null) { + try { + sock.close(); + } catch (Exception e) { + } + } + } while (retry); } } From 6495858d2203a0a6a8a27e9cbee3ad33b96d87f1 Mon Sep 17 00:00:00 2001 From: Victor Li Date: Thu, 27 May 2021 10:28:58 -0700 Subject: [PATCH 6/9] Removed the retry test. --- .../hadoop/hbase/TestClusterPortAssignment.java | 15 --------------- 1 file changed, 15 deletions(-) diff --git a/hbase-server/src/test/java/org/apache/hadoop/hbase/TestClusterPortAssignment.java b/hbase-server/src/test/java/org/apache/hadoop/hbase/TestClusterPortAssignment.java index 616745201788..6ce20b24c18f 100644 --- a/hbase-server/src/test/java/org/apache/hadoop/hbase/TestClusterPortAssignment.java +++ b/hbase-server/src/test/java/org/apache/hadoop/hbase/TestClusterPortAssignment.java @@ -21,7 +21,6 @@ import static org.junit.Assert.assertTrue; import static org.junit.Assert.fail; import java.net.BindException; -import java.net.ServerSocket; import org.apache.commons.lang3.exception.ExceptionUtils; import org.apache.hadoop.hbase.testclassification.MediumTests; import org.junit.ClassRule; @@ -59,14 +58,6 @@ public void testClusterPortAssignment() throws Exception { TEST_UTIL.getConfiguration().setInt(HConstants.REGIONSERVER_PORT, rsPort); TEST_UTIL.getConfiguration().setInt(HConstants.REGIONSERVER_INFO_PORT, rsInfoPort); LOG.info("Ports: {}, {}, {}, {}", masterPort, masterInfoPort, rsPort, rsInfoPort); - ServerSocket sock = null; - if (!retry) { - try { - LOG.info("Hijack port: " + rsInfoPort); - sock = new ServerSocket(rsInfoPort); - } catch (Exception e) { - } - } try { MiniHBaseCluster cluster = TEST_UTIL.startMiniCluster(); assertTrue("Cluster failed to come up", cluster.waitForActiveAndReadyMaster(30000)); @@ -92,12 +83,6 @@ public void testClusterPortAssignment() throws Exception { } finally { TEST_UTIL.shutdownMiniCluster(); } - if (sock != null) { - try { - sock.close(); - } catch (Exception e) { - } - } } while (retry); } } From 6cd572624ef2a4c62abd6e19ac306fccc8b6b390 Mon Sep 17 00:00:00 2001 From: Victor Li Date: Thu, 27 May 2021 11:00:22 -0700 Subject: [PATCH 7/9] Add a timeout for 5 minutes with should be able to accomondate about 10+ retries. --- .../java/org/apache/hadoop/hbase/TestClusterPortAssignment.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/hbase-server/src/test/java/org/apache/hadoop/hbase/TestClusterPortAssignment.java b/hbase-server/src/test/java/org/apache/hadoop/hbase/TestClusterPortAssignment.java index 6ce20b24c18f..981defd2644b 100644 --- a/hbase-server/src/test/java/org/apache/hadoop/hbase/TestClusterPortAssignment.java +++ b/hbase-server/src/test/java/org/apache/hadoop/hbase/TestClusterPortAssignment.java @@ -42,7 +42,7 @@ public class TestClusterPortAssignment { * Check that we can start an HBase cluster specifying a custom set of * RPC and infoserver ports. */ - @Test + @Test(timeout = 300000) public void testClusterPortAssignment() throws Exception { boolean retry = false; do { From 40fa243b0c6f44544b9df6a55a4f81e6da39cac1 Mon Sep 17 00:00:00 2001 From: Victor Li Date: Thu, 27 May 2021 11:08:14 -0700 Subject: [PATCH 8/9] cosmetics changes on logs. --- .../java/org/apache/hadoop/hbase/TestClusterPortAssignment.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/hbase-server/src/test/java/org/apache/hadoop/hbase/TestClusterPortAssignment.java b/hbase-server/src/test/java/org/apache/hadoop/hbase/TestClusterPortAssignment.java index 981defd2644b..664d30d99a1c 100644 --- a/hbase-server/src/test/java/org/apache/hadoop/hbase/TestClusterPortAssignment.java +++ b/hbase-server/src/test/java/org/apache/hadoop/hbase/TestClusterPortAssignment.java @@ -76,7 +76,7 @@ public void testClusterPortAssignment() throws Exception { LOG.info("Failed bind, need to retry", e); retry = true; } else { - LOG.info("Failed to start mini cluster" + e); + LOG.info("Failed to start mini cluster", e); retry = false; fail("Failed to start mini cluster with assigned ports."); } From fd07726c43723e4debc463ee8d1e1649be26aeb6 Mon Sep 17 00:00:00 2001 From: Victor Li Date: Fri, 28 May 2021 08:14:11 -0700 Subject: [PATCH 9/9] log error message when test fails. --- .../java/org/apache/hadoop/hbase/TestClusterPortAssignment.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/hbase-server/src/test/java/org/apache/hadoop/hbase/TestClusterPortAssignment.java b/hbase-server/src/test/java/org/apache/hadoop/hbase/TestClusterPortAssignment.java index 664d30d99a1c..4d37ca5dcfc6 100644 --- a/hbase-server/src/test/java/org/apache/hadoop/hbase/TestClusterPortAssignment.java +++ b/hbase-server/src/test/java/org/apache/hadoop/hbase/TestClusterPortAssignment.java @@ -76,7 +76,7 @@ public void testClusterPortAssignment() throws Exception { LOG.info("Failed bind, need to retry", e); retry = true; } else { - LOG.info("Failed to start mini cluster", e); + LOG.error("Failed to start mini cluster", e); retry = false; fail("Failed to start mini cluster with assigned ports."); }