From e66abf1698d0b022d03397179a94e48c72e3928b Mon Sep 17 00:00:00 2001 From: Bukros Szabolcs Date: Fri, 6 Sep 2019 14:23:45 +0200 Subject: [PATCH 1/5] HBASE-22982 Add chaos monkey action for suspend/resume region servers Add chaos monkey action for graceful rolling restart Add these to relevant chaos monkeys --- .../hadoop/hbase/DistributedHBaseCluster.java | 14 +++ .../hadoop/hbase/chaos/actions/Action.java | 42 +++++++ .../GracefulRollingRestartRsAction.java | 75 ++++++++++++ .../actions/RestartActionBaseAction.java | 17 +++ .../RollingBatchSuspendResumeRsAction.java | 115 ++++++++++++++++++ .../chaos/factories/MonkeyConstants.java | 6 + ...erAndDependenciesKillingMonkeyFactory.java | 23 +++- .../factories/ServerKillingMonkeyFactory.java | 23 +++- .../SlowDeterministicMonkeyFactory.java | 16 +++ .../StressAssignmentManagerMonkeyFactory.java | 23 ++++ .../chaos/monkies/PolicyBasedChaosMonkey.java | 18 ++- .../org/apache/hadoop/hbase/HBaseCluster.java | 14 +++ .../apache/hadoop/hbase/MiniHBaseCluster.java | 36 ++++++ 13 files changed, 409 insertions(+), 13 deletions(-) create mode 100644 hbase-it/src/test/java/org/apache/hadoop/hbase/chaos/actions/GracefulRollingRestartRsAction.java create mode 100644 hbase-it/src/test/java/org/apache/hadoop/hbase/chaos/actions/RollingBatchSuspendResumeRsAction.java diff --git a/hbase-it/src/test/java/org/apache/hadoop/hbase/DistributedHBaseCluster.java b/hbase-it/src/test/java/org/apache/hadoop/hbase/DistributedHBaseCluster.java index cb6069541fab..2426775617da 100644 --- a/hbase-it/src/test/java/org/apache/hadoop/hbase/DistributedHBaseCluster.java +++ b/hbase-it/src/test/java/org/apache/hadoop/hbase/DistributedHBaseCluster.java @@ -126,6 +126,20 @@ public void waitForRegionServerToStop(ServerName serverName, long timeout) throw waitForServiceToStop(ServiceType.HBASE_REGIONSERVER, serverName, timeout); } + @Override + public void suspendRegionServer(ServerName serverName) throws IOException { + LOG.info("Suspend RS: " + serverName.getServerName()); + clusterManager.suspend(ServiceType.HBASE_REGIONSERVER, + serverName.getHostname(), serverName.getPort()); + } + + @Override + public void resumeRegionServer(ServerName serverName) throws IOException { + LOG.info("Resume RS: " + serverName.getServerName()); + clusterManager.resume(ServiceType.HBASE_REGIONSERVER, + serverName.getHostname(), serverName.getPort()); + } + @Override public void startZkNode(String hostname, int port) throws IOException { LOG.info("Starting ZooKeeper node on: " + hostname); diff --git a/hbase-it/src/test/java/org/apache/hadoop/hbase/chaos/actions/Action.java b/hbase-it/src/test/java/org/apache/hadoop/hbase/chaos/actions/Action.java index 13b6b9d8edf7..6f0c1bcc2694 100644 --- a/hbase-it/src/test/java/org/apache/hadoop/hbase/chaos/actions/Action.java +++ b/hbase-it/src/test/java/org/apache/hadoop/hbase/chaos/actions/Action.java @@ -31,6 +31,7 @@ import org.apache.commons.lang3.RandomUtils; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.hbase.ClusterMetrics; +import org.apache.hadoop.hbase.MiniHBaseCluster; import org.apache.hadoop.hbase.HBaseCluster; import org.apache.hadoop.hbase.HBaseTestingUtility; import org.apache.hadoop.hbase.HRegionInfo; @@ -163,6 +164,34 @@ protected void startMaster(ServerName server) throws IOException { LOG.info("Started master " + server.getHostname()); } + protected void stopRs(ServerName server) throws IOException { + LOG.info("Stopping regionserver " + server); + cluster.stopRegionServer(server); + cluster.waitForRegionServerToStop(server, killRsTimeout); + LOG.info("Stoppiong regionserver " + server + ". Reported num of rs:" + + cluster.getClusterMetrics().getLiveServerMetrics().size()); + } + + protected void suspendRs(ServerName server) throws IOException { + LOG.info("Suspending regionserver " + server); + cluster.suspendRegionServer(server); + if(!(cluster instanceof MiniHBaseCluster)){ + cluster.waitForRegionServerToStop(server, killRsTimeout); + } + LOG.info("Suspending regionserver " + server + ". Reported num of rs:" + + cluster.getClusterMetrics().getLiveServerMetrics().size()); + } + + protected void resumeRs(ServerName server) throws IOException { + LOG.info("Resuming regionserver " + server); + cluster.resumeRegionServer(server); + if(!(cluster instanceof MiniHBaseCluster)){ + cluster.waitForRegionServerToStart(server.getHostname(), server.getPort(), startRsTimeout); + } + LOG.info("Resuming regionserver " + server + ". Reported num of rs:" + + cluster.getClusterMetrics().getLiveServerMetrics().size()); + } + protected void killRs(ServerName server) throws IOException { LOG.info("Killing regionserver " + server); cluster.killRegionServer(server); @@ -269,6 +298,19 @@ protected void forceBalancer() throws Exception { } } + protected void setBalancer(boolean onOrOff, boolean synchronous) throws Exception { + Admin admin = this.context.getHBaseIntegrationTestingUtility().getAdmin(); + boolean result = false; + try { + result = admin.balancerSwitch(onOrOff, synchronous); + } catch (Exception e) { + LOG.warn("Got exception while switching balancee ", e); + } + if (!result) { + LOG.error("Balancer switch didn't succeed"); + } + } + public Configuration getConf() { return cluster.getConf(); } diff --git a/hbase-it/src/test/java/org/apache/hadoop/hbase/chaos/actions/GracefulRollingRestartRsAction.java b/hbase-it/src/test/java/org/apache/hadoop/hbase/chaos/actions/GracefulRollingRestartRsAction.java new file mode 100644 index 000000000000..3ec1ba192618 --- /dev/null +++ b/hbase-it/src/test/java/org/apache/hadoop/hbase/chaos/actions/GracefulRollingRestartRsAction.java @@ -0,0 +1,75 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hbase.chaos.actions; + +import org.apache.commons.lang3.RandomUtils; +import org.apache.hadoop.hbase.IntegrationTestingUtility; +import org.apache.hadoop.hbase.ServerName; +import org.apache.hadoop.hbase.util.RegionMover; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import java.io.IOException; +import java.util.ArrayList; +import java.util.Arrays; +import java.util.List; + +/** + * Gracefully restarts every non-admin regionserver in a rolling fashion. At each step, it unloads, restarts + * the loads every rs server sleeping randomly (0-sleepTime) in between servers. + */ +public class GracefulRollingRestartRsAction extends RestartActionBaseAction { + private static final Logger LOG = LoggerFactory.getLogger(GracefulRollingRestartRsAction.class); + + public GracefulRollingRestartRsAction(long sleepTime) { + super(sleepTime); + } + + @Override + public void perform() throws Exception { + LOG.info("Performing action: Rolling restarting non-master region servers"); + List selectedServers = selectServers(); + + LOG.info("Disabling balancer to make unloading possible"); + setBalancer(false, false); + + for(ServerName server : selectedServers){ + String rsName = server.getAddress().toString(); + try (RegionMover rm = + new RegionMover.RegionMoverBuilder(rsName, getConf()).ack(true).build()) { + LOG.info("Unloading " + server); + rm.unload(); + LOG.info("Restarting " + server); + gracefulRestartRs(server, sleepTime); + LOG.info("Loading " + server); + rm.load(); + } catch (org.apache.hadoop.util.Shell.ExitCodeException e) { + LOG.info("Problem restarting but presume successful; code=" + e.getExitCode(), e); + } + sleep(RandomUtils.nextInt(0, (int)sleepTime)); + } + LOG.info("Enabling balancer"); + setBalancer(true, false); + } + + protected List selectServers() throws IOException { + return Arrays.asList(getCurrentServers()); + } + +} diff --git a/hbase-it/src/test/java/org/apache/hadoop/hbase/chaos/actions/RestartActionBaseAction.java b/hbase-it/src/test/java/org/apache/hadoop/hbase/chaos/actions/RestartActionBaseAction.java index 6e589aeaa2da..262926a15adf 100644 --- a/hbase-it/src/test/java/org/apache/hadoop/hbase/chaos/actions/RestartActionBaseAction.java +++ b/hbase-it/src/test/java/org/apache/hadoop/hbase/chaos/actions/RestartActionBaseAction.java @@ -50,6 +50,23 @@ void restartMaster(ServerName server, long sleepTime) throws IOException { startMaster(server); } + /** + * Stop and then restart the region server instaedof killing it. + * @param server + * @param sleepTime + * @throws IOException + */ + void gracefulRestartRs(ServerName server, long sleepTime) throws IOException { + sleepTime = Math.max(sleepTime, 1000); + // Don't try the stop if we're stopping already + if (context.isStopping()) { + return; + } + stopRs(server); + sleep(sleepTime); + startRs(server); + } + void restartRs(ServerName server, long sleepTime) throws IOException { sleepTime = Math.max(sleepTime, 1000); // Don't try the kill if we're stopping diff --git a/hbase-it/src/test/java/org/apache/hadoop/hbase/chaos/actions/RollingBatchSuspendResumeRsAction.java b/hbase-it/src/test/java/org/apache/hadoop/hbase/chaos/actions/RollingBatchSuspendResumeRsAction.java new file mode 100644 index 000000000000..0db089ac5d97 --- /dev/null +++ b/hbase-it/src/test/java/org/apache/hadoop/hbase/chaos/actions/RollingBatchSuspendResumeRsAction.java @@ -0,0 +1,115 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hbase.chaos.actions; + +import org.apache.commons.lang3.RandomUtils; +import org.apache.hadoop.hbase.IntegrationTestingUtility; +import org.apache.hadoop.hbase.ServerName; +import org.apache.hadoop.hbase.chaos.monkies.PolicyBasedChaosMonkey; +import org.apache.hadoop.hbase.util.Threads; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import java.io.IOException; +import java.util.ArrayList; +import java.util.LinkedList; +import java.util.List; +import java.util.Queue; + +/** + * Suspend then resume a ratio of the regionservers in a rolling fashion. At each step, either suspend a + * server, or resume one, sleeping (sleepTime) in between steps. The parameter maxSuspendedServers + * limits the maximum number of servers that can be down at the same time during rolling restarts. + */ +public class RollingBatchSuspendResumeRsAction extends Action { + private static final Logger LOG = LoggerFactory.getLogger(RollingBatchSuspendResumeRsAction.class); + private float ratio; + private long sleepTime; + private int maxSuspendedServers; // number of maximum suspended servers at any given time. Defaults to 5 + + public RollingBatchSuspendResumeRsAction(long sleepTime, float ratio) { + this(sleepTime, ratio, 5); + } + + public RollingBatchSuspendResumeRsAction(long sleepTime, float ratio, int maxSuspendedServers) { + this.ratio = ratio; + this.sleepTime = sleepTime; + this.maxSuspendedServers = maxSuspendedServers; + } + + enum SuspendOrResume { + SUSPEND, RESUME + } + + @Override + public void perform() throws Exception { + LOG.info(String.format("Performing action: Rolling batch restarting %d%% of region servers", + (int)(ratio * 100))); + List selectedServers = selectServers(); + + Queue serversToBeSuspended = new LinkedList<>(selectedServers); + Queue suspendedServers = new LinkedList<>(); + + // loop while there are servers to be suspended or suspended servers to be resumed + while ((!serversToBeSuspended.isEmpty() || !suspendedServers.isEmpty()) && !context.isStopping()) { + SuspendOrResume action; + + if (serversToBeSuspended.isEmpty()) { // no more servers to suspend + action = SuspendOrResume.RESUME; + } else if (suspendedServers.isEmpty()) { + action = SuspendOrResume.SUSPEND; // no more servers to resume + } else if (suspendedServers.size() >= maxSuspendedServers) { + // we have too many suspended servers. Don't suspend any more + action = SuspendOrResume.RESUME; + } else { + // do a coin toss + action = RandomUtils.nextBoolean() ? SuspendOrResume.SUSPEND : SuspendOrResume.RESUME; + } + + ServerName server; + switch (action) { + case SUSPEND: + server = serversToBeSuspended.remove(); + try { + suspendRs(server); + } catch (org.apache.hadoop.util.Shell.ExitCodeException e) { + LOG.info("Problem suspending but presume successful; code=" + e.getExitCode(), e); + } + suspendedServers.add(server); + break; + case RESUME: + server = suspendedServers.remove(); + try { + resumeRs(server); + } catch (org.apache.hadoop.util.Shell.ExitCodeException e) { + LOG.info("Problem resuming, will retry; code=" + e.getExitCode(), e); + } + break; + } + + LOG.info("Sleeping for:" + sleepTime); + Threads.sleep(sleepTime); + } + } + + protected List selectServers() throws IOException { + return PolicyBasedChaosMonkey.selectRandomItems(getCurrentServers(), ratio); + } + +} diff --git a/hbase-it/src/test/java/org/apache/hadoop/hbase/chaos/factories/MonkeyConstants.java b/hbase-it/src/test/java/org/apache/hadoop/hbase/chaos/factories/MonkeyConstants.java index 5657d3962e87..9051e98ff2c2 100644 --- a/hbase-it/src/test/java/org/apache/hadoop/hbase/chaos/factories/MonkeyConstants.java +++ b/hbase-it/src/test/java/org/apache/hadoop/hbase/chaos/factories/MonkeyConstants.java @@ -45,6 +45,9 @@ public interface MonkeyConstants { String UNBALANCE_WAIT_AFTER_BALANCE_MS = "unbalance.action.wait.after.period"; String UNBALANCE_KILL_META_RS = "unbalance.action.kill.meta.rs"; String DECREASE_HFILE_SIZE_SLEEP_TIME = "decrease.hfile.size.sleep.time"; + String GRACEFUL_RESTART_RS_SLEEP_TIME = "graceful.restart.rs.sleep.time"; + String ROLLING_BATCH_SUSPEND_RS_SLEEP_TIME = "rolling.batch.suspend.rs.sleep.time"; + String ROLLING_BATCH_SUSPEND_RS_RATIO = "rolling.batch.suspend.rs.ratio"; /** * A Set of prefixes which encompasses all of the configuration properties for the ChaosMonky. @@ -75,4 +78,7 @@ public interface MonkeyConstants { long DEFAULT_UNBALANCE_WAIT_AFTER_BALANCE_MS = 5 * 1000; boolean DEFAULT_UNBALANCE_KILL_META_RS = true; long DEFAULT_DECREASE_HFILE_SIZE_SLEEP_TIME = 30 * 1000; + long DEFAULT_GRACEFUL_RESTART_RS_SLEEP_TIME = 5000; + long DEFAULT_ROLLING_BATCH_SUSPEND_RS_SLEEP_TIME = 30 * 1000; + float DEFAULT_ROLLING_BATCH_SUSPEND_RS_RATIO = 1.0f; } diff --git a/hbase-it/src/test/java/org/apache/hadoop/hbase/chaos/factories/ServerAndDependenciesKillingMonkeyFactory.java b/hbase-it/src/test/java/org/apache/hadoop/hbase/chaos/factories/ServerAndDependenciesKillingMonkeyFactory.java index 4faa786bcd20..18a6fbaf6649 100644 --- a/hbase-it/src/test/java/org/apache/hadoop/hbase/chaos/factories/ServerAndDependenciesKillingMonkeyFactory.java +++ b/hbase-it/src/test/java/org/apache/hadoop/hbase/chaos/factories/ServerAndDependenciesKillingMonkeyFactory.java @@ -21,11 +21,13 @@ import org.apache.hadoop.hbase.chaos.actions.Action; import org.apache.hadoop.hbase.chaos.actions.DumpClusterStatusAction; import org.apache.hadoop.hbase.chaos.actions.ForceBalancerAction; +import org.apache.hadoop.hbase.chaos.actions.GracefulRollingRestartRsAction; import org.apache.hadoop.hbase.chaos.actions.RestartActiveMasterAction; import org.apache.hadoop.hbase.chaos.actions.RestartRandomDataNodeAction; import org.apache.hadoop.hbase.chaos.actions.RestartRandomRsExceptMetaAction; import org.apache.hadoop.hbase.chaos.actions.RestartRandomZKNodeAction; import org.apache.hadoop.hbase.chaos.actions.RollingBatchRestartRsExceptMetaAction; +import org.apache.hadoop.hbase.chaos.actions.RollingBatchSuspendResumeRsAction; import org.apache.hadoop.hbase.chaos.monkies.ChaosMonkey; import org.apache.hadoop.hbase.chaos.monkies.PolicyBasedChaosMonkey; import org.apache.hadoop.hbase.chaos.policies.CompositeSequentialPolicy; @@ -38,8 +40,13 @@ */ public class ServerAndDependenciesKillingMonkeyFactory extends MonkeyFactory { + private long gracefulRollingRestartTSSLeepTime; + private long rollingBatchSuspendRSSleepTime; + private float rollingBatchSuspendtRSRatio; + @Override public ChaosMonkey build() { + loadProperties(); // Destructive actions to mess things around. Cannot run batch restart. Action[] actions1 = new Action[]{ @@ -48,7 +55,9 @@ public ChaosMonkey build() { new RollingBatchRestartRsExceptMetaAction(5000, 1.0f, 2), // only allow 2 servers to be dead. new ForceBalancerAction(), new RestartRandomDataNodeAction(60000), - new RestartRandomZKNodeAction(60000) + new RestartRandomZKNodeAction(60000), + new GracefulRollingRestartRsAction(gracefulRollingRestartTSSLeepTime), + new RollingBatchSuspendResumeRsAction(rollingBatchSuspendRSSleepTime, rollingBatchSuspendtRSRatio) }; // Action to log more info for debugging @@ -62,4 +71,16 @@ public ChaosMonkey build() { new PeriodicRandomActionPolicy(60 * 1000, actions1)), new PeriodicRandomActionPolicy(60 * 1000, actions2)); } + + private void loadProperties() { + gracefulRollingRestartTSSLeepTime = Long.parseLong(this.properties.getProperty( + MonkeyConstants.GRACEFUL_RESTART_RS_SLEEP_TIME, + MonkeyConstants.DEFAULT_GRACEFUL_RESTART_RS_SLEEP_TIME + "")); + rollingBatchSuspendRSSleepTime = Long.parseLong(this.properties.getProperty( + MonkeyConstants.ROLLING_BATCH_RESTART_RS_SLEEP_TIME, + MonkeyConstants.DEFAULT_ROLLING_BATCH_RESTART_RS_SLEEP_TIME + "")); + rollingBatchSuspendtRSRatio = Float.parseFloat(this.properties.getProperty( + MonkeyConstants.ROLLING_BATCH_RESTART_RS_RATIO, + MonkeyConstants.DEFAULT_ROLLING_BATCH_RESTART_RS_RATIO + "")); + } } diff --git a/hbase-it/src/test/java/org/apache/hadoop/hbase/chaos/factories/ServerKillingMonkeyFactory.java b/hbase-it/src/test/java/org/apache/hadoop/hbase/chaos/factories/ServerKillingMonkeyFactory.java index 02b59140c280..dfab4ff22de6 100644 --- a/hbase-it/src/test/java/org/apache/hadoop/hbase/chaos/factories/ServerKillingMonkeyFactory.java +++ b/hbase-it/src/test/java/org/apache/hadoop/hbase/chaos/factories/ServerKillingMonkeyFactory.java @@ -21,9 +21,11 @@ import org.apache.hadoop.hbase.chaos.actions.Action; import org.apache.hadoop.hbase.chaos.actions.DumpClusterStatusAction; import org.apache.hadoop.hbase.chaos.actions.ForceBalancerAction; +import org.apache.hadoop.hbase.chaos.actions.GracefulRollingRestartRsAction; import org.apache.hadoop.hbase.chaos.actions.RestartActiveMasterAction; import org.apache.hadoop.hbase.chaos.actions.RestartRandomRsExceptMetaAction; import org.apache.hadoop.hbase.chaos.actions.RollingBatchRestartRsExceptMetaAction; +import org.apache.hadoop.hbase.chaos.actions.RollingBatchSuspendResumeRsAction; import org.apache.hadoop.hbase.chaos.monkies.ChaosMonkey; import org.apache.hadoop.hbase.chaos.monkies.PolicyBasedChaosMonkey; import org.apache.hadoop.hbase.chaos.policies.CompositeSequentialPolicy; @@ -36,15 +38,22 @@ */ public class ServerKillingMonkeyFactory extends MonkeyFactory { + private long gracefulRollingRestartTSSLeepTime; + private long rollingBatchSuspendRSSleepTime; + private float rollingBatchSuspendtRSRatio; + @Override public ChaosMonkey build() { + loadProperties(); // Destructive actions to mess things around. Cannot run batch restart Action[] actions1 = new Action[] { new RestartRandomRsExceptMetaAction(60000), new RestartActiveMasterAction(5000), new RollingBatchRestartRsExceptMetaAction(5000, 1.0f, 2), //only allow 2 servers to be dead - new ForceBalancerAction() + new ForceBalancerAction(), + new GracefulRollingRestartRsAction(gracefulRollingRestartTSSLeepTime), + new RollingBatchSuspendResumeRsAction(rollingBatchSuspendRSSleepTime, rollingBatchSuspendtRSRatio) }; // Action to log more info for debugging @@ -58,4 +67,16 @@ public ChaosMonkey build() { new PeriodicRandomActionPolicy(60 * 1000, actions1)), new PeriodicRandomActionPolicy(60 * 1000, actions2)); } + + private void loadProperties() { + gracefulRollingRestartTSSLeepTime = Long.parseLong(this.properties.getProperty( + MonkeyConstants.GRACEFUL_RESTART_RS_SLEEP_TIME, + MonkeyConstants.DEFAULT_GRACEFUL_RESTART_RS_SLEEP_TIME + "")); + rollingBatchSuspendRSSleepTime = Long.parseLong(this.properties.getProperty( + MonkeyConstants.ROLLING_BATCH_RESTART_RS_SLEEP_TIME, + MonkeyConstants.DEFAULT_ROLLING_BATCH_RESTART_RS_SLEEP_TIME + "")); + rollingBatchSuspendtRSRatio = Float.parseFloat(this.properties.getProperty( + MonkeyConstants.ROLLING_BATCH_RESTART_RS_RATIO, + MonkeyConstants.DEFAULT_ROLLING_BATCH_RESTART_RS_RATIO + "")); + } } diff --git a/hbase-it/src/test/java/org/apache/hadoop/hbase/chaos/factories/SlowDeterministicMonkeyFactory.java b/hbase-it/src/test/java/org/apache/hadoop/hbase/chaos/factories/SlowDeterministicMonkeyFactory.java index 101a7d5d9655..b2bad4381dae 100644 --- a/hbase-it/src/test/java/org/apache/hadoop/hbase/chaos/factories/SlowDeterministicMonkeyFactory.java +++ b/hbase-it/src/test/java/org/apache/hadoop/hbase/chaos/factories/SlowDeterministicMonkeyFactory.java @@ -31,6 +31,7 @@ import org.apache.hadoop.hbase.chaos.actions.DumpClusterStatusAction; import org.apache.hadoop.hbase.chaos.actions.FlushRandomRegionOfTableAction; import org.apache.hadoop.hbase.chaos.actions.FlushTableAction; +import org.apache.hadoop.hbase.chaos.actions.GracefulRollingRestartRsAction; import org.apache.hadoop.hbase.chaos.actions.MergeRandomAdjacentRegionsOfTableAction; import org.apache.hadoop.hbase.chaos.actions.MoveRandomRegionOfTableAction; import org.apache.hadoop.hbase.chaos.actions.MoveRegionsOfTableAction; @@ -39,6 +40,7 @@ import org.apache.hadoop.hbase.chaos.actions.RestartRandomRsAction; import org.apache.hadoop.hbase.chaos.actions.RestartRsHoldingMetaAction; import org.apache.hadoop.hbase.chaos.actions.RollingBatchRestartRsAction; +import org.apache.hadoop.hbase.chaos.actions.RollingBatchSuspendResumeRsAction; import org.apache.hadoop.hbase.chaos.actions.SnapshotTableAction; import org.apache.hadoop.hbase.chaos.actions.SplitAllRegionOfTableAction; import org.apache.hadoop.hbase.chaos.actions.SplitRandomRegionOfTableAction; @@ -66,6 +68,9 @@ public class SlowDeterministicMonkeyFactory extends MonkeyFactory { private float compactTableRatio; private float compactRandomRegionRatio; private long decreaseHFileSizeSleepTime; + private long gracefulRollingRestartTSSLeepTime; + private long rollingBatchSuspendRSSleepTime; + private float rollingBatchSuspendtRSRatio; @Override public ChaosMonkey build() { @@ -110,6 +115,8 @@ public ChaosMonkey build() { new RestartRsHoldingMetaAction(restartRsHoldingMetaSleepTime), new DecreaseMaxHFileSizeAction(decreaseHFileSizeSleepTime, tableName), new SplitAllRegionOfTableAction(tableName), + new GracefulRollingRestartRsAction(gracefulRollingRestartTSSLeepTime), + new RollingBatchSuspendResumeRsAction(rollingBatchSuspendRSSleepTime, rollingBatchSuspendtRSRatio) }; // Action to log more info for debugging @@ -179,5 +186,14 @@ private void loadProperties() { decreaseHFileSizeSleepTime = Long.parseLong(this.properties.getProperty( MonkeyConstants.DECREASE_HFILE_SIZE_SLEEP_TIME, MonkeyConstants.DEFAULT_DECREASE_HFILE_SIZE_SLEEP_TIME + "")); + gracefulRollingRestartTSSLeepTime = Long.parseLong(this.properties.getProperty( + MonkeyConstants.GRACEFUL_RESTART_RS_SLEEP_TIME, + MonkeyConstants.DEFAULT_GRACEFUL_RESTART_RS_SLEEP_TIME + "")); + rollingBatchSuspendRSSleepTime = Long.parseLong(this.properties.getProperty( + MonkeyConstants.ROLLING_BATCH_RESTART_RS_SLEEP_TIME, + MonkeyConstants.DEFAULT_ROLLING_BATCH_RESTART_RS_SLEEP_TIME + "")); + rollingBatchSuspendtRSRatio = Float.parseFloat(this.properties.getProperty( + MonkeyConstants.ROLLING_BATCH_RESTART_RS_RATIO, + MonkeyConstants.DEFAULT_ROLLING_BATCH_RESTART_RS_RATIO + "")); } } diff --git a/hbase-it/src/test/java/org/apache/hadoop/hbase/chaos/factories/StressAssignmentManagerMonkeyFactory.java b/hbase-it/src/test/java/org/apache/hadoop/hbase/chaos/factories/StressAssignmentManagerMonkeyFactory.java index 2c6bb4664283..d4d3395759e9 100644 --- a/hbase-it/src/test/java/org/apache/hadoop/hbase/chaos/factories/StressAssignmentManagerMonkeyFactory.java +++ b/hbase-it/src/test/java/org/apache/hadoop/hbase/chaos/factories/StressAssignmentManagerMonkeyFactory.java @@ -27,6 +27,7 @@ import org.apache.hadoop.hbase.chaos.actions.DumpClusterStatusAction; import org.apache.hadoop.hbase.chaos.actions.FlushRandomRegionOfTableAction; import org.apache.hadoop.hbase.chaos.actions.FlushTableAction; +import org.apache.hadoop.hbase.chaos.actions.GracefulRollingRestartRsAction; import org.apache.hadoop.hbase.chaos.actions.MergeRandomAdjacentRegionsOfTableAction; import org.apache.hadoop.hbase.chaos.actions.MoveRandomRegionOfTableAction; import org.apache.hadoop.hbase.chaos.actions.MoveRegionsOfTableAction; @@ -34,6 +35,7 @@ import org.apache.hadoop.hbase.chaos.actions.RestartRandomRsAction; import org.apache.hadoop.hbase.chaos.actions.RestartRsHoldingMetaAction; import org.apache.hadoop.hbase.chaos.actions.RollingBatchRestartRsAction; +import org.apache.hadoop.hbase.chaos.actions.RollingBatchSuspendResumeRsAction; import org.apache.hadoop.hbase.chaos.actions.SplitAllRegionOfTableAction; import org.apache.hadoop.hbase.chaos.actions.SplitRandomRegionOfTableAction; import org.apache.hadoop.hbase.chaos.monkies.ChaosMonkey; @@ -43,8 +45,15 @@ import org.apache.hadoop.hbase.chaos.policies.PeriodicRandomActionPolicy; public class StressAssignmentManagerMonkeyFactory extends MonkeyFactory { + + private long gracefulRollingRestartTSSLeepTime; + private long rollingBatchSuspendRSSleepTime; + private float rollingBatchSuspendtRSRatio; + @Override public ChaosMonkey build() { + loadProperties(); + // Actions that could slow down region movement. // These could also get regions stuck if there are issues. Action[] actions1 = new Action[]{ @@ -72,6 +81,8 @@ public ChaosMonkey build() { new SplitAllRegionOfTableAction(tableName), new DecreaseMaxHFileSizeAction(MonkeyConstants.DEFAULT_DECREASE_HFILE_SIZE_SLEEP_TIME, tableName), + new GracefulRollingRestartRsAction(gracefulRollingRestartTSSLeepTime), + new RollingBatchSuspendResumeRsAction(rollingBatchSuspendRSSleepTime, rollingBatchSuspendtRSRatio) }; // Action to log more info for debugging @@ -87,4 +98,16 @@ public ChaosMonkey build() { new PeriodicRandomActionPolicy(90 * 1000, actions3) ); } + + private void loadProperties() { + gracefulRollingRestartTSSLeepTime = Long.parseLong(this.properties.getProperty( + MonkeyConstants.GRACEFUL_RESTART_RS_SLEEP_TIME, + MonkeyConstants.DEFAULT_GRACEFUL_RESTART_RS_SLEEP_TIME + "")); + rollingBatchSuspendRSSleepTime = Long.parseLong(this.properties.getProperty( + MonkeyConstants.ROLLING_BATCH_RESTART_RS_SLEEP_TIME, + MonkeyConstants.DEFAULT_ROLLING_BATCH_RESTART_RS_SLEEP_TIME + "")); + rollingBatchSuspendtRSRatio = Float.parseFloat(this.properties.getProperty( + MonkeyConstants.ROLLING_BATCH_RESTART_RS_RATIO, + MonkeyConstants.DEFAULT_ROLLING_BATCH_RESTART_RS_RATIO + "")); + } } diff --git a/hbase-it/src/test/java/org/apache/hadoop/hbase/chaos/monkies/PolicyBasedChaosMonkey.java b/hbase-it/src/test/java/org/apache/hadoop/hbase/chaos/monkies/PolicyBasedChaosMonkey.java index 70636dd3e84d..fed51491af3f 100644 --- a/hbase-it/src/test/java/org/apache/hadoop/hbase/chaos/monkies/PolicyBasedChaosMonkey.java +++ b/hbase-it/src/test/java/org/apache/hadoop/hbase/chaos/monkies/PolicyBasedChaosMonkey.java @@ -18,8 +18,9 @@ package org.apache.hadoop.hbase.chaos.monkies; -import java.util.ArrayList; +import java.util.Arrays; import java.util.Collection; +import java.util.Collections; import java.util.List; import org.apache.commons.lang3.RandomUtils; @@ -90,18 +91,13 @@ public static T selectWeightedRandomItem(List> items) { /** Selects and returns ceil(ratio * items.length) random items from the given array */ public static List selectRandomItems(T[] items, float ratio) { - int remaining = (int)Math.ceil(items.length * ratio); + int selectedNumber = (int)Math.ceil(items.length * ratio); - List selectedItems = new ArrayList<>(remaining); + List originalItems = Arrays.asList(items); + Collections.shuffle(originalItems); - for (int i=0; i 0; i++) { - if (RandomUtils.nextFloat() < ((float)remaining/(items.length-i))) { - selectedItems.add(items[i]); - remaining--; - } - } - - return selectedItems; + int startIndex = RandomUtils.nextInt(0, items.length - selectedNumber); + return originalItems.subList(startIndex, startIndex + selectedNumber); } private Policy[] policies; diff --git a/hbase-server/src/test/java/org/apache/hadoop/hbase/HBaseCluster.java b/hbase-server/src/test/java/org/apache/hadoop/hbase/HBaseCluster.java index 3a1c8945f2a0..43a704ddc91c 100644 --- a/hbase-server/src/test/java/org/apache/hadoop/hbase/HBaseCluster.java +++ b/hbase-server/src/test/java/org/apache/hadoop/hbase/HBaseCluster.java @@ -148,6 +148,20 @@ public void waitForRegionServerToStart(String hostname, int port, long timeout) public abstract void waitForRegionServerToStop(ServerName serverName, long timeout) throws IOException; + /** + * Suspend the region server + * @param serverName + * @throws IOException + */ + public abstract void suspendRegionServer(ServerName serverName) throws IOException; + + /** + * Resume the region server + * @param serverName + * @throws IOException + */ + public abstract void resumeRegionServer(ServerName serverName) throws IOException; + /** * Starts a new zookeeper node on the given hostname or if this is a mini/local cluster, * silently logs warning message. diff --git a/hbase-server/src/test/java/org/apache/hadoop/hbase/MiniHBaseCluster.java b/hbase-server/src/test/java/org/apache/hadoop/hbase/MiniHBaseCluster.java index 99dca1df66b0..666a65b9f6f5 100644 --- a/hbase-server/src/test/java/org/apache/hadoop/hbase/MiniHBaseCluster.java +++ b/hbase-server/src/test/java/org/apache/hadoop/hbase/MiniHBaseCluster.java @@ -291,6 +291,16 @@ public void stopRegionServer(ServerName serverName) throws IOException { stopRegionServer(getRegionServerIndex(serverName)); } + @Override + public void suspendRegionServer(ServerName serverName) throws IOException { + suspendRegionServer(getRegionServerIndex(serverName)); + } + + @Override + public void resumeRegionServer(ServerName serverName) throws IOException { + resumeRegionServer(getRegionServerIndex(serverName)); + } + @Override public void waitForRegionServerToStop(ServerName serverName, long timeout) throws IOException { //ignore timeout for now @@ -489,6 +499,32 @@ public JVMClusterUtil.RegionServerThread stopRegionServer(int serverNumber, return server; } + /** + * Suspend the specified region server + * @param serverNumber Used as index into a list. + * @return + */ + public JVMClusterUtil.RegionServerThread suspendRegionServer(int serverNumber) { + JVMClusterUtil.RegionServerThread server = + hbaseCluster.getRegionServers().get(serverNumber); + LOG.info("Suspending " + server.toString()); + server.suspend(); + return server; + } + + /** + * Resume the specified region server + * @param serverNumber Used as index into a list. + * @return + */ + public JVMClusterUtil.RegionServerThread resumeRegionServer(int serverNumber) { + JVMClusterUtil.RegionServerThread server = + hbaseCluster.getRegionServers().get(serverNumber); + LOG.info("Resuming " + server.toString()); + server.resume(); + return server; + } + /** * Wait for the specified region server to stop. Removes this thread from list * of running threads. From f8b6b0fc02c97aa9803501a96b5f6c825d3908a2 Mon Sep 17 00:00:00 2001 From: Bukros Szabolcs Date: Mon, 9 Sep 2019 10:55:51 +0200 Subject: [PATCH 2/5] HBASE-22982 checkstyle fixes --- .../hadoop/hbase/chaos/actions/Action.java | 2 +- .../GracefulRollingRestartRsAction.java | 13 +- .../actions/RestartActionBaseAction.java | 6 +- .../RollingBatchSuspendResumeRsAction.java | 66 ++++---- ...erAndDependenciesKillingMonkeyFactory.java | 3 +- .../factories/ServerKillingMonkeyFactory.java | 16 +- .../SlowDeterministicMonkeyFactory.java | 155 +++++++++--------- .../StressAssignmentManagerMonkeyFactory.java | 51 +++--- .../org/apache/hadoop/hbase/HBaseCluster.java | 8 +- 9 files changed, 162 insertions(+), 158 deletions(-) diff --git a/hbase-it/src/test/java/org/apache/hadoop/hbase/chaos/actions/Action.java b/hbase-it/src/test/java/org/apache/hadoop/hbase/chaos/actions/Action.java index 6f0c1bcc2694..c676b22b53c9 100644 --- a/hbase-it/src/test/java/org/apache/hadoop/hbase/chaos/actions/Action.java +++ b/hbase-it/src/test/java/org/apache/hadoop/hbase/chaos/actions/Action.java @@ -31,11 +31,11 @@ import org.apache.commons.lang3.RandomUtils; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.hbase.ClusterMetrics; -import org.apache.hadoop.hbase.MiniHBaseCluster; import org.apache.hadoop.hbase.HBaseCluster; import org.apache.hadoop.hbase.HBaseTestingUtility; import org.apache.hadoop.hbase.HRegionInfo; import org.apache.hadoop.hbase.IntegrationTestingUtility; +import org.apache.hadoop.hbase.MiniHBaseCluster; import org.apache.hadoop.hbase.ServerMetrics; import org.apache.hadoop.hbase.ServerName; import org.apache.hadoop.hbase.TableName; diff --git a/hbase-it/src/test/java/org/apache/hadoop/hbase/chaos/actions/GracefulRollingRestartRsAction.java b/hbase-it/src/test/java/org/apache/hadoop/hbase/chaos/actions/GracefulRollingRestartRsAction.java index 3ec1ba192618..00c30987a982 100644 --- a/hbase-it/src/test/java/org/apache/hadoop/hbase/chaos/actions/GracefulRollingRestartRsAction.java +++ b/hbase-it/src/test/java/org/apache/hadoop/hbase/chaos/actions/GracefulRollingRestartRsAction.java @@ -18,21 +18,18 @@ package org.apache.hadoop.hbase.chaos.actions; +import java.io.IOException; +import java.util.Arrays; +import java.util.List; import org.apache.commons.lang3.RandomUtils; -import org.apache.hadoop.hbase.IntegrationTestingUtility; import org.apache.hadoop.hbase.ServerName; import org.apache.hadoop.hbase.util.RegionMover; import org.slf4j.Logger; import org.slf4j.LoggerFactory; -import java.io.IOException; -import java.util.ArrayList; -import java.util.Arrays; -import java.util.List; - /** - * Gracefully restarts every non-admin regionserver in a rolling fashion. At each step, it unloads, restarts - * the loads every rs server sleeping randomly (0-sleepTime) in between servers. + * Gracefully restarts every non-admin regionserver in a rolling fashion. At each step, it unloads, + * restarts the loads every rs server sleeping randomly (0-sleepTime) in between servers. */ public class GracefulRollingRestartRsAction extends RestartActionBaseAction { private static final Logger LOG = LoggerFactory.getLogger(GracefulRollingRestartRsAction.class); diff --git a/hbase-it/src/test/java/org/apache/hadoop/hbase/chaos/actions/RestartActionBaseAction.java b/hbase-it/src/test/java/org/apache/hadoop/hbase/chaos/actions/RestartActionBaseAction.java index 262926a15adf..5f39c7fd7640 100644 --- a/hbase-it/src/test/java/org/apache/hadoop/hbase/chaos/actions/RestartActionBaseAction.java +++ b/hbase-it/src/test/java/org/apache/hadoop/hbase/chaos/actions/RestartActionBaseAction.java @@ -52,9 +52,9 @@ void restartMaster(ServerName server, long sleepTime) throws IOException { /** * Stop and then restart the region server instaedof killing it. - * @param server - * @param sleepTime - * @throws IOException + * @param server hostname to restart the regionserver on + * @param sleepTime number of milliseconds between stop and restart + * @throws IOException if something goes wrong */ void gracefulRestartRs(ServerName server, long sleepTime) throws IOException { sleepTime = Math.max(sleepTime, 1000); diff --git a/hbase-it/src/test/java/org/apache/hadoop/hbase/chaos/actions/RollingBatchSuspendResumeRsAction.java b/hbase-it/src/test/java/org/apache/hadoop/hbase/chaos/actions/RollingBatchSuspendResumeRsAction.java index 0db089ac5d97..78af04da308e 100644 --- a/hbase-it/src/test/java/org/apache/hadoop/hbase/chaos/actions/RollingBatchSuspendResumeRsAction.java +++ b/hbase-it/src/test/java/org/apache/hadoop/hbase/chaos/actions/RollingBatchSuspendResumeRsAction.java @@ -18,30 +18,30 @@ package org.apache.hadoop.hbase.chaos.actions; +import java.io.IOException; +import java.util.LinkedList; +import java.util.List; +import java.util.Queue; + import org.apache.commons.lang3.RandomUtils; -import org.apache.hadoop.hbase.IntegrationTestingUtility; import org.apache.hadoop.hbase.ServerName; import org.apache.hadoop.hbase.chaos.monkies.PolicyBasedChaosMonkey; import org.apache.hadoop.hbase.util.Threads; import org.slf4j.Logger; import org.slf4j.LoggerFactory; -import java.io.IOException; -import java.util.ArrayList; -import java.util.LinkedList; -import java.util.List; -import java.util.Queue; - /** - * Suspend then resume a ratio of the regionservers in a rolling fashion. At each step, either suspend a - * server, or resume one, sleeping (sleepTime) in between steps. The parameter maxSuspendedServers - * limits the maximum number of servers that can be down at the same time during rolling restarts. + * Suspend then resume a ratio of the regionservers in a rolling fashion. At each step, either + * suspend a server, or resume one, sleeping (sleepTime) in between steps. The parameter + * maxSuspendedServers limits the maximum number of servers that can be down at the same time + * during rolling restarts. */ public class RollingBatchSuspendResumeRsAction extends Action { - private static final Logger LOG = LoggerFactory.getLogger(RollingBatchSuspendResumeRsAction.class); + private static final Logger LOG = + LoggerFactory.getLogger(RollingBatchSuspendResumeRsAction.class); private float ratio; private long sleepTime; - private int maxSuspendedServers; // number of maximum suspended servers at any given time. Defaults to 5 + private int maxSuspendedServers; // number of maximum suspended servers at any given time. public RollingBatchSuspendResumeRsAction(long sleepTime, float ratio) { this(sleepTime, ratio, 5); @@ -57,17 +57,17 @@ enum SuspendOrResume { SUSPEND, RESUME } - @Override - public void perform() throws Exception { + @Override public void perform() throws Exception { LOG.info(String.format("Performing action: Rolling batch restarting %d%% of region servers", - (int)(ratio * 100))); + (int) (ratio * 100))); List selectedServers = selectServers(); Queue serversToBeSuspended = new LinkedList<>(selectedServers); Queue suspendedServers = new LinkedList<>(); // loop while there are servers to be suspended or suspended servers to be resumed - while ((!serversToBeSuspended.isEmpty() || !suspendedServers.isEmpty()) && !context.isStopping()) { + while ((!serversToBeSuspended.isEmpty() || !suspendedServers.isEmpty()) && !context + .isStopping()) { SuspendOrResume action; if (serversToBeSuspended.isEmpty()) { // no more servers to suspend @@ -84,23 +84,23 @@ public void perform() throws Exception { ServerName server; switch (action) { - case SUSPEND: - server = serversToBeSuspended.remove(); - try { - suspendRs(server); - } catch (org.apache.hadoop.util.Shell.ExitCodeException e) { - LOG.info("Problem suspending but presume successful; code=" + e.getExitCode(), e); - } - suspendedServers.add(server); - break; - case RESUME: - server = suspendedServers.remove(); - try { - resumeRs(server); - } catch (org.apache.hadoop.util.Shell.ExitCodeException e) { - LOG.info("Problem resuming, will retry; code=" + e.getExitCode(), e); - } - break; + case SUSPEND: + server = serversToBeSuspended.remove(); + try { + suspendRs(server); + } catch (org.apache.hadoop.util.Shell.ExitCodeException e) { + LOG.info("Problem suspending but presume successful; code=" + e.getExitCode(), e); + } + suspendedServers.add(server); + break; + case RESUME: + server = suspendedServers.remove(); + try { + resumeRs(server); + } catch (org.apache.hadoop.util.Shell.ExitCodeException e) { + LOG.info("Problem resuming, will retry; code=" + e.getExitCode(), e); + } + break; } LOG.info("Sleeping for:" + sleepTime); diff --git a/hbase-it/src/test/java/org/apache/hadoop/hbase/chaos/factories/ServerAndDependenciesKillingMonkeyFactory.java b/hbase-it/src/test/java/org/apache/hadoop/hbase/chaos/factories/ServerAndDependenciesKillingMonkeyFactory.java index 18a6fbaf6649..2e763adbfd7b 100644 --- a/hbase-it/src/test/java/org/apache/hadoop/hbase/chaos/factories/ServerAndDependenciesKillingMonkeyFactory.java +++ b/hbase-it/src/test/java/org/apache/hadoop/hbase/chaos/factories/ServerAndDependenciesKillingMonkeyFactory.java @@ -57,7 +57,8 @@ public ChaosMonkey build() { new RestartRandomDataNodeAction(60000), new RestartRandomZKNodeAction(60000), new GracefulRollingRestartRsAction(gracefulRollingRestartTSSLeepTime), - new RollingBatchSuspendResumeRsAction(rollingBatchSuspendRSSleepTime, rollingBatchSuspendtRSRatio) + new RollingBatchSuspendResumeRsAction(rollingBatchSuspendRSSleepTime, + rollingBatchSuspendtRSRatio) }; // Action to log more info for debugging diff --git a/hbase-it/src/test/java/org/apache/hadoop/hbase/chaos/factories/ServerKillingMonkeyFactory.java b/hbase-it/src/test/java/org/apache/hadoop/hbase/chaos/factories/ServerKillingMonkeyFactory.java index dfab4ff22de6..2a9394d26cc4 100644 --- a/hbase-it/src/test/java/org/apache/hadoop/hbase/chaos/factories/ServerKillingMonkeyFactory.java +++ b/hbase-it/src/test/java/org/apache/hadoop/hbase/chaos/factories/ServerKillingMonkeyFactory.java @@ -48,17 +48,19 @@ public ChaosMonkey build() { // Destructive actions to mess things around. Cannot run batch restart Action[] actions1 = new Action[] { - new RestartRandomRsExceptMetaAction(60000), - new RestartActiveMasterAction(5000), - new RollingBatchRestartRsExceptMetaAction(5000, 1.0f, 2), //only allow 2 servers to be dead - new ForceBalancerAction(), - new GracefulRollingRestartRsAction(gracefulRollingRestartTSSLeepTime), - new RollingBatchSuspendResumeRsAction(rollingBatchSuspendRSSleepTime, rollingBatchSuspendtRSRatio) + new RestartRandomRsExceptMetaAction(60000), + new RestartActiveMasterAction(5000), + //only allow 2 servers to be dead + new RollingBatchRestartRsExceptMetaAction(5000, 1.0f, 2), + new ForceBalancerAction(), + new GracefulRollingRestartRsAction(gracefulRollingRestartTSSLeepTime), + new RollingBatchSuspendResumeRsAction(rollingBatchSuspendRSSleepTime, + rollingBatchSuspendtRSRatio) }; // Action to log more info for debugging Action[] actions2 = new Action[] { - new DumpClusterStatusAction() + new DumpClusterStatusAction() }; return new PolicyBasedChaosMonkey(util, diff --git a/hbase-it/src/test/java/org/apache/hadoop/hbase/chaos/factories/SlowDeterministicMonkeyFactory.java b/hbase-it/src/test/java/org/apache/hadoop/hbase/chaos/factories/SlowDeterministicMonkeyFactory.java index b2bad4381dae..784a6ce45b38 100644 --- a/hbase-it/src/test/java/org/apache/hadoop/hbase/chaos/factories/SlowDeterministicMonkeyFactory.java +++ b/hbase-it/src/test/java/org/apache/hadoop/hbase/chaos/factories/SlowDeterministicMonkeyFactory.java @@ -79,49 +79,50 @@ public ChaosMonkey build() { // move one region around. They are not so destructive, // can be executed more frequently. Action[] actions1 = new Action[] { - new CompactTableAction(tableName, compactTableRatio), - new CompactRandomRegionOfTableAction(tableName, compactRandomRegionRatio), - new FlushTableAction(tableName), - new FlushRandomRegionOfTableAction(tableName), - new MoveRandomRegionOfTableAction(tableName) + new CompactTableAction(tableName, compactTableRatio), + new CompactRandomRegionOfTableAction(tableName, compactRandomRegionRatio), + new FlushTableAction(tableName), + new FlushRandomRegionOfTableAction(tableName), + new MoveRandomRegionOfTableAction(tableName) }; // Actions such as split/merge/snapshot. // They should not cause data loss, or unreliability // such as region stuck in transition. Action[] actions2 = new Action[] { - new SplitRandomRegionOfTableAction(tableName), - new MergeRandomAdjacentRegionsOfTableAction(tableName), - new SnapshotTableAction(tableName), - new AddColumnAction(tableName), - new RemoveColumnAction(tableName, columnFamilies), - new ChangeEncodingAction(tableName), - new ChangeCompressionAction(tableName), - new ChangeBloomFilterAction(tableName), - new ChangeVersionsAction(tableName), - new ChangeSplitPolicyAction(tableName), + new SplitRandomRegionOfTableAction(tableName), + new MergeRandomAdjacentRegionsOfTableAction(tableName), + new SnapshotTableAction(tableName), + new AddColumnAction(tableName), + new RemoveColumnAction(tableName, columnFamilies), + new ChangeEncodingAction(tableName), + new ChangeCompressionAction(tableName), + new ChangeBloomFilterAction(tableName), + new ChangeVersionsAction(tableName), + new ChangeSplitPolicyAction(tableName), }; // Destructive actions to mess things around. Action[] actions3 = new Action[] { - new MoveRegionsOfTableAction(moveRegionsSleepTime, moveRegionsMaxTime, - tableName), - new MoveRandomRegionOfTableAction(moveRandomRegionSleepTime, tableName), - new RestartRandomRsAction(restartRandomRSSleepTime), - new BatchRestartRsAction(batchRestartRSSleepTime, batchRestartRSRatio), - new RestartActiveMasterAction(restartActiveMasterSleepTime), - new RollingBatchRestartRsAction(rollingBatchRestartRSSleepTime, - rollingBatchRestartRSRatio), - new RestartRsHoldingMetaAction(restartRsHoldingMetaSleepTime), - new DecreaseMaxHFileSizeAction(decreaseHFileSizeSleepTime, tableName), - new SplitAllRegionOfTableAction(tableName), - new GracefulRollingRestartRsAction(gracefulRollingRestartTSSLeepTime), - new RollingBatchSuspendResumeRsAction(rollingBatchSuspendRSSleepTime, rollingBatchSuspendtRSRatio) + new MoveRegionsOfTableAction(moveRegionsSleepTime, moveRegionsMaxTime, + tableName), + new MoveRandomRegionOfTableAction(moveRandomRegionSleepTime, tableName), + new RestartRandomRsAction(restartRandomRSSleepTime), + new BatchRestartRsAction(batchRestartRSSleepTime, batchRestartRSRatio), + new RestartActiveMasterAction(restartActiveMasterSleepTime), + new RollingBatchRestartRsAction(rollingBatchRestartRSSleepTime, + rollingBatchRestartRSRatio), + new RestartRsHoldingMetaAction(restartRsHoldingMetaSleepTime), + new DecreaseMaxHFileSizeAction(decreaseHFileSizeSleepTime, tableName), + new SplitAllRegionOfTableAction(tableName), + new GracefulRollingRestartRsAction(gracefulRollingRestartTSSLeepTime), + new RollingBatchSuspendResumeRsAction(rollingBatchSuspendRSSleepTime, + rollingBatchSuspendtRSRatio) }; // Action to log more info for debugging Action[] actions4 = new Action[] { - new DumpClusterStatusAction() + new DumpClusterStatusAction() }; return new PolicyBasedChaosMonkey(util, @@ -135,54 +136,54 @@ public ChaosMonkey build() { private void loadProperties() { - action1Period = Long.parseLong(this.properties.getProperty( - MonkeyConstants.PERIODIC_ACTION1_PERIOD, - MonkeyConstants.DEFAULT_PERIODIC_ACTION1_PERIOD + "")); - action2Period = Long.parseLong(this.properties.getProperty( - MonkeyConstants.PERIODIC_ACTION2_PERIOD, - MonkeyConstants.DEFAULT_PERIODIC_ACTION2_PERIOD + "")); - action3Period = Long.parseLong(this.properties.getProperty( - MonkeyConstants.COMPOSITE_ACTION3_PERIOD, - MonkeyConstants.DEFAULT_COMPOSITE_ACTION3_PERIOD + "")); - action4Period = Long.parseLong(this.properties.getProperty( - MonkeyConstants.PERIODIC_ACTION4_PERIOD, - MonkeyConstants.DEFAULT_PERIODIC_ACTION4_PERIOD + "")); - moveRegionsMaxTime = Long.parseLong(this.properties.getProperty( - MonkeyConstants.MOVE_REGIONS_MAX_TIME, - MonkeyConstants.DEFAULT_MOVE_REGIONS_MAX_TIME + "")); - moveRegionsSleepTime = Long.parseLong(this.properties.getProperty( - MonkeyConstants.MOVE_REGIONS_SLEEP_TIME, - MonkeyConstants.DEFAULT_MOVE_REGIONS_SLEEP_TIME + "")); - moveRandomRegionSleepTime = Long.parseLong(this.properties.getProperty( - MonkeyConstants.MOVE_RANDOM_REGION_SLEEP_TIME, - MonkeyConstants.DEFAULT_MOVE_RANDOM_REGION_SLEEP_TIME + "")); - restartRandomRSSleepTime = Long.parseLong(this.properties.getProperty( - MonkeyConstants.RESTART_RANDOM_RS_SLEEP_TIME, - MonkeyConstants.DEFAULT_RESTART_RANDOM_RS_SLEEP_TIME + "")); - batchRestartRSSleepTime = Long.parseLong(this.properties.getProperty( - MonkeyConstants.BATCH_RESTART_RS_SLEEP_TIME, - MonkeyConstants.DEFAULT_BATCH_RESTART_RS_SLEEP_TIME + "")); - batchRestartRSRatio = Float.parseFloat(this.properties.getProperty( - MonkeyConstants.BATCH_RESTART_RS_RATIO, - MonkeyConstants.DEFAULT_BATCH_RESTART_RS_RATIO + "")); - restartActiveMasterSleepTime = Long.parseLong(this.properties.getProperty( - MonkeyConstants.RESTART_ACTIVE_MASTER_SLEEP_TIME, - MonkeyConstants.DEFAULT_RESTART_ACTIVE_MASTER_SLEEP_TIME + "")); - rollingBatchRestartRSSleepTime = Long.parseLong(this.properties.getProperty( - MonkeyConstants.ROLLING_BATCH_RESTART_RS_SLEEP_TIME, - MonkeyConstants.DEFAULT_ROLLING_BATCH_RESTART_RS_SLEEP_TIME + "")); - rollingBatchRestartRSRatio = Float.parseFloat(this.properties.getProperty( - MonkeyConstants.ROLLING_BATCH_RESTART_RS_RATIO, - MonkeyConstants.DEFAULT_ROLLING_BATCH_RESTART_RS_RATIO + "")); - restartRsHoldingMetaSleepTime = Long.parseLong(this.properties.getProperty( - MonkeyConstants.RESTART_RS_HOLDING_META_SLEEP_TIME, - MonkeyConstants.DEFAULT_RESTART_RS_HOLDING_META_SLEEP_TIME + "")); - compactTableRatio = Float.parseFloat(this.properties.getProperty( - MonkeyConstants.COMPACT_TABLE_ACTION_RATIO, - MonkeyConstants.DEFAULT_COMPACT_TABLE_ACTION_RATIO + "")); - compactRandomRegionRatio = Float.parseFloat(this.properties.getProperty( - MonkeyConstants.COMPACT_RANDOM_REGION_RATIO, - MonkeyConstants.DEFAULT_COMPACT_RANDOM_REGION_RATIO + "")); + action1Period = Long.parseLong(this.properties.getProperty( + MonkeyConstants.PERIODIC_ACTION1_PERIOD, + MonkeyConstants.DEFAULT_PERIODIC_ACTION1_PERIOD + "")); + action2Period = Long.parseLong(this.properties.getProperty( + MonkeyConstants.PERIODIC_ACTION2_PERIOD, + MonkeyConstants.DEFAULT_PERIODIC_ACTION2_PERIOD + "")); + action3Period = Long.parseLong(this.properties.getProperty( + MonkeyConstants.COMPOSITE_ACTION3_PERIOD, + MonkeyConstants.DEFAULT_COMPOSITE_ACTION3_PERIOD + "")); + action4Period = Long.parseLong(this.properties.getProperty( + MonkeyConstants.PERIODIC_ACTION4_PERIOD, + MonkeyConstants.DEFAULT_PERIODIC_ACTION4_PERIOD + "")); + moveRegionsMaxTime = Long.parseLong(this.properties.getProperty( + MonkeyConstants.MOVE_REGIONS_MAX_TIME, + MonkeyConstants.DEFAULT_MOVE_REGIONS_MAX_TIME + "")); + moveRegionsSleepTime = Long.parseLong(this.properties.getProperty( + MonkeyConstants.MOVE_REGIONS_SLEEP_TIME, + MonkeyConstants.DEFAULT_MOVE_REGIONS_SLEEP_TIME + "")); + moveRandomRegionSleepTime = Long.parseLong(this.properties.getProperty( + MonkeyConstants.MOVE_RANDOM_REGION_SLEEP_TIME, + MonkeyConstants.DEFAULT_MOVE_RANDOM_REGION_SLEEP_TIME + "")); + restartRandomRSSleepTime = Long.parseLong(this.properties.getProperty( + MonkeyConstants.RESTART_RANDOM_RS_SLEEP_TIME, + MonkeyConstants.DEFAULT_RESTART_RANDOM_RS_SLEEP_TIME + "")); + batchRestartRSSleepTime = Long.parseLong(this.properties.getProperty( + MonkeyConstants.BATCH_RESTART_RS_SLEEP_TIME, + MonkeyConstants.DEFAULT_BATCH_RESTART_RS_SLEEP_TIME + "")); + batchRestartRSRatio = Float.parseFloat(this.properties.getProperty( + MonkeyConstants.BATCH_RESTART_RS_RATIO, + MonkeyConstants.DEFAULT_BATCH_RESTART_RS_RATIO + "")); + restartActiveMasterSleepTime = Long.parseLong(this.properties.getProperty( + MonkeyConstants.RESTART_ACTIVE_MASTER_SLEEP_TIME, + MonkeyConstants.DEFAULT_RESTART_ACTIVE_MASTER_SLEEP_TIME + "")); + rollingBatchRestartRSSleepTime = Long.parseLong(this.properties.getProperty( + MonkeyConstants.ROLLING_BATCH_RESTART_RS_SLEEP_TIME, + MonkeyConstants.DEFAULT_ROLLING_BATCH_RESTART_RS_SLEEP_TIME + "")); + rollingBatchRestartRSRatio = Float.parseFloat(this.properties.getProperty( + MonkeyConstants.ROLLING_BATCH_RESTART_RS_RATIO, + MonkeyConstants.DEFAULT_ROLLING_BATCH_RESTART_RS_RATIO + "")); + restartRsHoldingMetaSleepTime = Long.parseLong(this.properties.getProperty( + MonkeyConstants.RESTART_RS_HOLDING_META_SLEEP_TIME, + MonkeyConstants.DEFAULT_RESTART_RS_HOLDING_META_SLEEP_TIME + "")); + compactTableRatio = Float.parseFloat(this.properties.getProperty( + MonkeyConstants.COMPACT_TABLE_ACTION_RATIO, + MonkeyConstants.DEFAULT_COMPACT_TABLE_ACTION_RATIO + "")); + compactRandomRegionRatio = Float.parseFloat(this.properties.getProperty( + MonkeyConstants.COMPACT_RANDOM_REGION_RATIO, + MonkeyConstants.DEFAULT_COMPACT_RANDOM_REGION_RATIO + "")); decreaseHFileSizeSleepTime = Long.parseLong(this.properties.getProperty( MonkeyConstants.DECREASE_HFILE_SIZE_SLEEP_TIME, MonkeyConstants.DEFAULT_DECREASE_HFILE_SIZE_SLEEP_TIME + "")); diff --git a/hbase-it/src/test/java/org/apache/hadoop/hbase/chaos/factories/StressAssignmentManagerMonkeyFactory.java b/hbase-it/src/test/java/org/apache/hadoop/hbase/chaos/factories/StressAssignmentManagerMonkeyFactory.java index d4d3395759e9..4ae980579ecf 100644 --- a/hbase-it/src/test/java/org/apache/hadoop/hbase/chaos/factories/StressAssignmentManagerMonkeyFactory.java +++ b/hbase-it/src/test/java/org/apache/hadoop/hbase/chaos/factories/StressAssignmentManagerMonkeyFactory.java @@ -57,37 +57,40 @@ public ChaosMonkey build() { // Actions that could slow down region movement. // These could also get regions stuck if there are issues. Action[] actions1 = new Action[]{ - new CompactTableAction(tableName, 0.5f), - new CompactRandomRegionOfTableAction(tableName, 0.6f), - new FlushTableAction(tableName), - new FlushRandomRegionOfTableAction(tableName) + new CompactTableAction(tableName, 0.5f), + new CompactRandomRegionOfTableAction(tableName, 0.6f), + new FlushTableAction(tableName), + new FlushRandomRegionOfTableAction(tableName) }; Action[] actions2 = new Action[]{ - new SplitRandomRegionOfTableAction(tableName), - new MergeRandomAdjacentRegionsOfTableAction(tableName), - new AddColumnAction(tableName), - new RemoveColumnAction(tableName, columnFamilies), - new MoveRegionsOfTableAction(MonkeyConstants.DEFAULT_MOVE_REGIONS_SLEEP_TIME, - 1600, - tableName), - new MoveRandomRegionOfTableAction(MonkeyConstants.DEFAULT_MOVE_RANDOM_REGION_SLEEP_TIME, - tableName), - new RestartRandomRsAction(MonkeyConstants.DEFAULT_RESTART_RANDOM_RS_SLEEP_TIME), - new BatchRestartRsAction(MonkeyConstants.DEFAULT_ROLLING_BATCH_RESTART_RS_SLEEP_TIME, 0.5f), - new RollingBatchRestartRsAction(MonkeyConstants.DEFAULT_BATCH_RESTART_RS_SLEEP_TIME, 1.0f), - new RestartRsHoldingMetaAction(MonkeyConstants.DEFAULT_RESTART_RS_HOLDING_META_SLEEP_TIME), - new ChangeSplitPolicyAction(tableName), - new SplitAllRegionOfTableAction(tableName), - new DecreaseMaxHFileSizeAction(MonkeyConstants.DEFAULT_DECREASE_HFILE_SIZE_SLEEP_TIME, - tableName), - new GracefulRollingRestartRsAction(gracefulRollingRestartTSSLeepTime), - new RollingBatchSuspendResumeRsAction(rollingBatchSuspendRSSleepTime, rollingBatchSuspendtRSRatio) + new SplitRandomRegionOfTableAction(tableName), + new MergeRandomAdjacentRegionsOfTableAction(tableName), + new AddColumnAction(tableName), + new RemoveColumnAction(tableName, columnFamilies), + new MoveRegionsOfTableAction(MonkeyConstants.DEFAULT_MOVE_REGIONS_SLEEP_TIME, + 1600, + tableName), + new MoveRandomRegionOfTableAction(MonkeyConstants.DEFAULT_MOVE_RANDOM_REGION_SLEEP_TIME, + tableName), + new RestartRandomRsAction(MonkeyConstants.DEFAULT_RESTART_RANDOM_RS_SLEEP_TIME), + new BatchRestartRsAction(MonkeyConstants.DEFAULT_ROLLING_BATCH_RESTART_RS_SLEEP_TIME, + 0.5f), + new RollingBatchRestartRsAction(MonkeyConstants.DEFAULT_BATCH_RESTART_RS_SLEEP_TIME, + 1.0f), + new RestartRsHoldingMetaAction(MonkeyConstants.DEFAULT_RESTART_RS_HOLDING_META_SLEEP_TIME), + new ChangeSplitPolicyAction(tableName), + new SplitAllRegionOfTableAction(tableName), + new DecreaseMaxHFileSizeAction(MonkeyConstants.DEFAULT_DECREASE_HFILE_SIZE_SLEEP_TIME, + tableName), + new GracefulRollingRestartRsAction(gracefulRollingRestartTSSLeepTime), + new RollingBatchSuspendResumeRsAction(rollingBatchSuspendRSSleepTime, + rollingBatchSuspendtRSRatio) }; // Action to log more info for debugging Action[] actions3 = new Action[]{ - new DumpClusterStatusAction() + new DumpClusterStatusAction() }; return new PolicyBasedChaosMonkey(util, diff --git a/hbase-server/src/test/java/org/apache/hadoop/hbase/HBaseCluster.java b/hbase-server/src/test/java/org/apache/hadoop/hbase/HBaseCluster.java index 43a704ddc91c..85dff357ca62 100644 --- a/hbase-server/src/test/java/org/apache/hadoop/hbase/HBaseCluster.java +++ b/hbase-server/src/test/java/org/apache/hadoop/hbase/HBaseCluster.java @@ -150,15 +150,15 @@ public abstract void waitForRegionServerToStop(ServerName serverName, long timeo /** * Suspend the region server - * @param serverName - * @throws IOException + * @param serverName the hostname to suspend the regionserver on + * @throws IOException if something goes wrong */ public abstract void suspendRegionServer(ServerName serverName) throws IOException; /** * Resume the region server - * @param serverName - * @throws IOException + * @param serverName the hostname to resume the regionserver on + * @throws IOException if something goes wrong */ public abstract void resumeRegionServer(ServerName serverName) throws IOException; From 5cc76d147373205d077fda28a0b7a05fd06ba195 Mon Sep 17 00:00:00 2001 From: Bukros Szabolcs Date: Wed, 18 Sep 2019 13:20:00 +0200 Subject: [PATCH 3/5] HBASE-22982 use parametrized logging in affected classes other small fixes --- .../hadoop/hbase/DistributedHBaseCluster.java | 72 +++++++++---------- .../hadoop/hbase/chaos/actions/Action.java | 72 +++++++++---------- .../GracefulRollingRestartRsAction.java | 2 +- .../RollingBatchSuspendResumeRsAction.java | 5 +- 4 files changed, 76 insertions(+), 75 deletions(-) diff --git a/hbase-it/src/test/java/org/apache/hadoop/hbase/DistributedHBaseCluster.java b/hbase-it/src/test/java/org/apache/hadoop/hbase/DistributedHBaseCluster.java index 2426775617da..796bc1f27e00 100644 --- a/hbase-it/src/test/java/org/apache/hadoop/hbase/DistributedHBaseCluster.java +++ b/hbase-it/src/test/java/org/apache/hadoop/hbase/DistributedHBaseCluster.java @@ -97,13 +97,13 @@ public void close() throws IOException { @Override public void startRegionServer(String hostname, int port) throws IOException { - LOG.info("Starting RS on: " + hostname); + LOG.info("Starting RS on: {}", hostname); clusterManager.start(ServiceType.HBASE_REGIONSERVER, hostname, port); } @Override public void killRegionServer(ServerName serverName) throws IOException { - LOG.info("Aborting RS: " + serverName.getServerName()); + LOG.info("Aborting RS: {}", serverName.getServerName()); killedRegionServers.add(serverName); clusterManager.kill(ServiceType.HBASE_REGIONSERVER, serverName.getHostname(), serverName.getPort()); @@ -116,7 +116,7 @@ public boolean isKilledRS(ServerName serverName) { @Override public void stopRegionServer(ServerName serverName) throws IOException { - LOG.info("Stopping RS: " + serverName.getServerName()); + LOG.info("Stopping RS: {}", serverName.getServerName()); clusterManager.stop(ServiceType.HBASE_REGIONSERVER, serverName.getHostname(), serverName.getPort()); } @@ -128,34 +128,34 @@ public void waitForRegionServerToStop(ServerName serverName, long timeout) throw @Override public void suspendRegionServer(ServerName serverName) throws IOException { - LOG.info("Suspend RS: " + serverName.getServerName()); + LOG.info("Suspend RS: {}", serverName.getServerName()); clusterManager.suspend(ServiceType.HBASE_REGIONSERVER, serverName.getHostname(), serverName.getPort()); } @Override public void resumeRegionServer(ServerName serverName) throws IOException { - LOG.info("Resume RS: " + serverName.getServerName()); + LOG.info("Resume RS: {}", serverName.getServerName()); clusterManager.resume(ServiceType.HBASE_REGIONSERVER, serverName.getHostname(), serverName.getPort()); } @Override public void startZkNode(String hostname, int port) throws IOException { - LOG.info("Starting ZooKeeper node on: " + hostname); + LOG.info("Starting ZooKeeper node on: {}", hostname); clusterManager.start(ServiceType.ZOOKEEPER_SERVER, hostname, port); } @Override public void killZkNode(ServerName serverName) throws IOException { - LOG.info("Aborting ZooKeeper node on: " + serverName.getServerName()); + LOG.info("Aborting ZooKeeper node on: {}", serverName.getServerName()); clusterManager.kill(ServiceType.ZOOKEEPER_SERVER, serverName.getHostname(), serverName.getPort()); } @Override public void stopZkNode(ServerName serverName) throws IOException { - LOG.info("Stopping ZooKeeper node: " + serverName.getServerName()); + LOG.info("Stopping ZooKeeper node: {}", serverName.getServerName()); clusterManager.stop(ServiceType.ZOOKEEPER_SERVER, serverName.getHostname(), serverName.getPort()); } @@ -172,21 +172,21 @@ public void waitForZkNodeToStop(ServerName serverName, long timeout) throws IOEx @Override public void startDataNode(ServerName serverName) throws IOException { - LOG.info("Starting data node on: " + serverName.getServerName()); + LOG.info("Starting data node on: {}", serverName.getServerName()); clusterManager.start(ServiceType.HADOOP_DATANODE, serverName.getHostname(), serverName.getPort()); } @Override public void killDataNode(ServerName serverName) throws IOException { - LOG.info("Aborting data node on: " + serverName.getServerName()); + LOG.info("Aborting data node on: {}", serverName.getServerName()); clusterManager.kill(ServiceType.HADOOP_DATANODE, serverName.getHostname(), serverName.getPort()); } @Override public void stopDataNode(ServerName serverName) throws IOException { - LOG.info("Stopping data node on: " + serverName.getServerName()); + LOG.info("Stopping data node on: {}", serverName.getServerName()); clusterManager.stop(ServiceType.HADOOP_DATANODE, serverName.getHostname(), serverName.getPort()); } @@ -203,21 +203,21 @@ public void waitForDataNodeToStop(ServerName serverName, long timeout) throws IO @Override public void startNameNode(ServerName serverName) throws IOException { - LOG.info("Starting name node on: " + serverName.getServerName()); + LOG.info("Starting name node on: {}", serverName.getServerName()); clusterManager.start(ServiceType.HADOOP_NAMENODE, serverName.getHostname(), serverName.getPort()); } @Override public void killNameNode(ServerName serverName) throws IOException { - LOG.info("Aborting name node on: " + serverName.getServerName()); + LOG.info("Aborting name node on: {}", serverName.getServerName()); clusterManager.kill(ServiceType.HADOOP_NAMENODE, serverName.getHostname(), serverName.getPort()); } @Override public void stopNameNode(ServerName serverName) throws IOException { - LOG.info("Stopping name node on: " + serverName.getServerName()); + LOG.info("Stopping name node on: {}", serverName.getServerName()); clusterManager.stop(ServiceType.HADOOP_NAMENODE, serverName.getHostname(), serverName.getPort()); } @@ -234,7 +234,7 @@ public void waitForNameNodeToStop(ServerName serverName, long timeout) throws IO private void waitForServiceToStop(ServiceType service, ServerName serverName, long timeout) throws IOException { - LOG.info("Waiting for service: " + service + " to stop: " + serverName.getServerName()); + LOG.info("Waiting for service: {} to stop: {}", service, serverName.getServerName()); long start = System.currentTimeMillis(); while ((System.currentTimeMillis() - start) < timeout) { @@ -248,7 +248,7 @@ private void waitForServiceToStop(ServiceType service, ServerName serverName, lo private void waitForServiceToStart(ServiceType service, ServerName serverName, long timeout) throws IOException { - LOG.info("Waiting for service: " + service + " to start: " + serverName.getServerName()); + LOG.info("Waiting for service: {} to start: ", service, serverName.getServerName()); long start = System.currentTimeMillis(); while ((System.currentTimeMillis() - start) < timeout) { @@ -262,19 +262,19 @@ private void waitForServiceToStart(ServiceType service, ServerName serverName, l @Override public void startMaster(String hostname, int port) throws IOException { - LOG.info("Starting Master on: " + hostname + ":" + port); + LOG.info("Starting Master on: {}:{}", hostname, port); clusterManager.start(ServiceType.HBASE_MASTER, hostname, port); } @Override public void killMaster(ServerName serverName) throws IOException { - LOG.info("Aborting Master: " + serverName.getServerName()); + LOG.info("Aborting Master: {}", serverName.getServerName()); clusterManager.kill(ServiceType.HBASE_MASTER, serverName.getHostname(), serverName.getPort()); } @Override public void stopMaster(ServerName serverName) throws IOException { - LOG.info("Stopping Master: " + serverName.getServerName()); + LOG.info("Stopping Master: {}", serverName.getServerName()); clusterManager.stop(ServiceType.HBASE_MASTER, serverName.getHostname(), serverName.getPort()); } @@ -308,7 +308,7 @@ public ServerName getServerHoldingRegion(TableName tn, byte[] regionName) throws regionLoc = locator.getRegionLocation(startKey, true); } if (regionLoc == null) { - LOG.warn("Cannot find region server holding region " + Bytes.toStringBinary(regionName)); + LOG.warn("Cannot find region server holding region {}", Bytes.toStringBinary(regionName)); return null; } return regionLoc.getServerName(); @@ -352,15 +352,15 @@ protected boolean restoreMasters(ClusterMetrics initial, ClusterMetrics current) //check whether current master has changed final ServerName initMaster = initial.getMasterName(); if (!ServerName.isSameAddress(initMaster, current.getMasterName())) { - LOG.info("Restoring cluster - Initial active master : " + initMaster.getAddress() + - " has changed to : " + current.getMasterName().getAddress()); + LOG.info("Restoring cluster - Initial active master : {} has changed to : {}", + initMaster.getAddress(), current.getMasterName().getAddress()); // If initial master is stopped, start it, before restoring the state. // It will come up as a backup master, if there is already an active master. try { if (!clusterManager.isRunning(ServiceType.HBASE_MASTER, initMaster.getHostname(), initMaster.getPort())) { - LOG.info("Restoring cluster - starting initial active master at:" - + initMaster.getAddress()); + LOG.info("Restoring cluster - starting initial active master at:{}", + initMaster.getAddress()); startMaster(initMaster.getHostname(), initMaster.getPort()); } @@ -370,11 +370,11 @@ protected boolean restoreMasters(ClusterMetrics initial, ClusterMetrics current) // 3. Start backup masters for (ServerName currentBackup : current.getBackupMasterNames()) { if (!ServerName.isSameAddress(currentBackup, initMaster)) { - LOG.info("Restoring cluster - stopping backup master: " + currentBackup); + LOG.info("Restoring cluster - stopping backup master: {}", currentBackup); stopMaster(currentBackup); } } - LOG.info("Restoring cluster - stopping active master: " + current.getMasterName()); + LOG.info("Restoring cluster - stopping active master: {}", current.getMasterName()); stopMaster(current.getMasterName()); waitForActiveAndReadyMaster(); // wait so that active master takes over } catch (IOException ex) { @@ -390,8 +390,8 @@ protected boolean restoreMasters(ClusterMetrics initial, ClusterMetrics current) if (!clusterManager.isRunning(ServiceType.HBASE_MASTER, backup.getHostname(), backup.getPort())) { - LOG.info("Restoring cluster - starting initial backup master: " - + backup.getAddress()); + LOG.info("Restoring cluster - starting initial backup master: {}", + backup.getAddress()); startMaster(backup.getHostname(), backup.getPort()); } } catch (IOException ex) { @@ -415,7 +415,7 @@ protected boolean restoreMasters(ClusterMetrics initial, ClusterMetrics current) for (ServerName sn:toStart) { try { if(!clusterManager.isRunning(ServiceType.HBASE_MASTER, sn.getHostname(), sn.getPort())) { - LOG.info("Restoring cluster - starting initial backup master: " + sn.getAddress()); + LOG.info("Restoring cluster - starting initial backup master: {}", sn.getAddress()); startMaster(sn.getHostname(), sn.getPort()); } } catch (IOException ex) { @@ -426,7 +426,7 @@ protected boolean restoreMasters(ClusterMetrics initial, ClusterMetrics current) for (ServerName sn:toKill) { try { if(clusterManager.isRunning(ServiceType.HBASE_MASTER, sn.getHostname(), sn.getPort())) { - LOG.info("Restoring cluster - stopping backup master: " + sn.getAddress()); + LOG.info("Restoring cluster - stopping backup master: {}", sn.getAddress()); stopMaster(sn); } } catch (IOException ex) { @@ -435,8 +435,8 @@ protected boolean restoreMasters(ClusterMetrics initial, ClusterMetrics current) } } if (!deferred.isEmpty()) { - LOG.warn("Restoring cluster - restoring region servers reported " - + deferred.size() + " errors:"); + LOG.warn("Restoring cluster - restoring region servers reported {} errors:", + deferred.size()); for (int i=0; i fromServers, List toServers, @@ -263,7 +263,7 @@ protected void unbalanceRegions(ClusterMetrics clusterStatus, // Ugh. List regions = new LinkedList<>(serverLoad.getRegionMetrics().keySet()); int victimRegionCount = (int)Math.ceil(fractionOfRegions * regions.size()); - LOG.debug("Removing " + victimRegionCount + " regions from " + sn); + LOG.debug("Removing {} regions from {}", victimRegionCount, sn); for (int i = 0; i < victimRegionCount; ++i) { int victimIx = RandomUtils.nextInt(0, regions.size()); String regionId = HRegionInfo.encodeRegionName(regions.remove(victimIx)); @@ -271,8 +271,8 @@ protected void unbalanceRegions(ClusterMetrics clusterStatus, } } - LOG.info("Moving " + victimRegions.size() + " regions from " + fromServers.size() - + " servers to " + toServers.size() + " different servers"); + LOG.info("Moving {} regions from {} servers to {} different servers", victimRegions.size(), + fromServers.size(), toServers.size()); Admin admin = this.context.getHBaseIntegrationTestingUtility().getAdmin(); for (byte[] victimRegion : victimRegions) { // Don't keep moving regions if we're @@ -304,7 +304,7 @@ protected void setBalancer(boolean onOrOff, boolean synchronous) throws Exceptio try { result = admin.balancerSwitch(onOrOff, synchronous); } catch (Exception e) { - LOG.warn("Got exception while switching balancee ", e); + LOG.warn("Got exception while switching balance ", e); } if (!result) { LOG.error("Balancer switch didn't succeed"); diff --git a/hbase-it/src/test/java/org/apache/hadoop/hbase/chaos/actions/GracefulRollingRestartRsAction.java b/hbase-it/src/test/java/org/apache/hadoop/hbase/chaos/actions/GracefulRollingRestartRsAction.java index 00c30987a982..fd2c36464bfa 100644 --- a/hbase-it/src/test/java/org/apache/hadoop/hbase/chaos/actions/GracefulRollingRestartRsAction.java +++ b/hbase-it/src/test/java/org/apache/hadoop/hbase/chaos/actions/GracefulRollingRestartRsAction.java @@ -57,7 +57,7 @@ public void perform() throws Exception { LOG.info("Loading " + server); rm.load(); } catch (org.apache.hadoop.util.Shell.ExitCodeException e) { - LOG.info("Problem restarting but presume successful; code=" + e.getExitCode(), e); + LOG.info("Problem restarting but presume successful; code={}", e.getExitCode(), e); } sleep(RandomUtils.nextInt(0, (int)sleepTime)); } diff --git a/hbase-it/src/test/java/org/apache/hadoop/hbase/chaos/actions/RollingBatchSuspendResumeRsAction.java b/hbase-it/src/test/java/org/apache/hadoop/hbase/chaos/actions/RollingBatchSuspendResumeRsAction.java index 78af04da308e..975846a7d6d3 100644 --- a/hbase-it/src/test/java/org/apache/hadoop/hbase/chaos/actions/RollingBatchSuspendResumeRsAction.java +++ b/hbase-it/src/test/java/org/apache/hadoop/hbase/chaos/actions/RollingBatchSuspendResumeRsAction.java @@ -27,6 +27,7 @@ import org.apache.hadoop.hbase.ServerName; import org.apache.hadoop.hbase.chaos.monkies.PolicyBasedChaosMonkey; import org.apache.hadoop.hbase.util.Threads; +import org.apache.hadoop.util.Shell; import org.slf4j.Logger; import org.slf4j.LoggerFactory; @@ -88,7 +89,7 @@ enum SuspendOrResume { server = serversToBeSuspended.remove(); try { suspendRs(server); - } catch (org.apache.hadoop.util.Shell.ExitCodeException e) { + } catch (Shell.ExitCodeException e) { LOG.info("Problem suspending but presume successful; code=" + e.getExitCode(), e); } suspendedServers.add(server); @@ -97,7 +98,7 @@ enum SuspendOrResume { server = suspendedServers.remove(); try { resumeRs(server); - } catch (org.apache.hadoop.util.Shell.ExitCodeException e) { + } catch (Shell.ExitCodeException e) { LOG.info("Problem resuming, will retry; code=" + e.getExitCode(), e); } break; From a86975ab245db057f42c6dbe30c9e7845ca43952 Mon Sep 17 00:00:00 2001 From: Bukros Szabolcs Date: Wed, 25 Sep 2019 16:45:02 +0200 Subject: [PATCH 4/5] HBASE-22982 reverse formatting for otherwiseuntouched code additional logging added typos fixed --- .../hadoop/hbase/chaos/actions/Action.java | 6 +- .../GracefulRollingRestartRsAction.java | 17 +- .../actions/RestartActionBaseAction.java | 14 +- .../RollingBatchSuspendResumeRsAction.java | 9 +- .../factories/ServerKillingMonkeyFactory.java | 11 +- .../SlowDeterministicMonkeyFactory.java | 150 +++++++++--------- .../StressAssignmentManagerMonkeyFactory.java | 46 +++--- .../apache/hadoop/hbase/MiniHBaseCluster.java | 4 +- 8 files changed, 132 insertions(+), 125 deletions(-) diff --git a/hbase-it/src/test/java/org/apache/hadoop/hbase/chaos/actions/Action.java b/hbase-it/src/test/java/org/apache/hadoop/hbase/chaos/actions/Action.java index ff1e65cc1fe5..b0e7078397ec 100644 --- a/hbase-it/src/test/java/org/apache/hadoop/hbase/chaos/actions/Action.java +++ b/hbase-it/src/test/java/org/apache/hadoop/hbase/chaos/actions/Action.java @@ -300,15 +300,11 @@ protected void forceBalancer() throws Exception { protected void setBalancer(boolean onOrOff, boolean synchronous) throws Exception { Admin admin = this.context.getHBaseIntegrationTestingUtility().getAdmin(); - boolean result = false; try { - result = admin.balancerSwitch(onOrOff, synchronous); + admin.balancerSwitch(onOrOff, synchronous); } catch (Exception e) { LOG.warn("Got exception while switching balance ", e); } - if (!result) { - LOG.error("Balancer switch didn't succeed"); - } } public Configuration getConf() { diff --git a/hbase-it/src/test/java/org/apache/hadoop/hbase/chaos/actions/GracefulRollingRestartRsAction.java b/hbase-it/src/test/java/org/apache/hadoop/hbase/chaos/actions/GracefulRollingRestartRsAction.java index fd2c36464bfa..82005bbbd4e7 100644 --- a/hbase-it/src/test/java/org/apache/hadoop/hbase/chaos/actions/GracefulRollingRestartRsAction.java +++ b/hbase-it/src/test/java/org/apache/hadoop/hbase/chaos/actions/GracefulRollingRestartRsAction.java @@ -24,11 +24,12 @@ import org.apache.commons.lang3.RandomUtils; import org.apache.hadoop.hbase.ServerName; import org.apache.hadoop.hbase.util.RegionMover; +import org.apache.hadoop.util.Shell; import org.slf4j.Logger; import org.slf4j.LoggerFactory; /** - * Gracefully restarts every non-admin regionserver in a rolling fashion. At each step, it unloads, + * Gracefully restarts every regionserver in a rolling fashion. At each step, it unloads, * restarts the loads every rs server sleeping randomly (0-sleepTime) in between servers. */ public class GracefulRollingRestartRsAction extends RestartActionBaseAction { @@ -44,25 +45,25 @@ public void perform() throws Exception { List selectedServers = selectServers(); LOG.info("Disabling balancer to make unloading possible"); - setBalancer(false, false); + setBalancer(false, true); - for(ServerName server : selectedServers){ + for (ServerName server : selectedServers) { String rsName = server.getAddress().toString(); try (RegionMover rm = new RegionMover.RegionMoverBuilder(rsName, getConf()).ack(true).build()) { - LOG.info("Unloading " + server); + LOG.info("Unloading {}", server); rm.unload(); - LOG.info("Restarting " + server); + LOG.info("Restarting {}", server); gracefulRestartRs(server, sleepTime); - LOG.info("Loading " + server); + LOG.info("Loading {}", server); rm.load(); - } catch (org.apache.hadoop.util.Shell.ExitCodeException e) { + } catch (Shell.ExitCodeException e) { LOG.info("Problem restarting but presume successful; code={}", e.getExitCode(), e); } sleep(RandomUtils.nextInt(0, (int)sleepTime)); } LOG.info("Enabling balancer"); - setBalancer(true, false); + setBalancer(true, true); } protected List selectServers() throws IOException { diff --git a/hbase-it/src/test/java/org/apache/hadoop/hbase/chaos/actions/RestartActionBaseAction.java b/hbase-it/src/test/java/org/apache/hadoop/hbase/chaos/actions/RestartActionBaseAction.java index 5f39c7fd7640..d964d6272d59 100644 --- a/hbase-it/src/test/java/org/apache/hadoop/hbase/chaos/actions/RestartActionBaseAction.java +++ b/hbase-it/src/test/java/org/apache/hadoop/hbase/chaos/actions/RestartActionBaseAction.java @@ -45,13 +45,15 @@ void restartMaster(ServerName server, long sleepTime) throws IOException { return; } + LOG.info("Killing master: {}", server); killMaster(server); sleep(sleepTime); + LOG.info("Starting master: {}", server); startMaster(server); } /** - * Stop and then restart the region server instaedof killing it. + * Stop and then restart the region server instead of killing it. * @param server hostname to restart the regionserver on * @param sleepTime number of milliseconds between stop and restart * @throws IOException if something goes wrong @@ -62,8 +64,10 @@ void gracefulRestartRs(ServerName server, long sleepTime) throws IOException { if (context.isStopping()) { return; } + LOG.info("Stopping region server: {}", server); stopRs(server); sleep(sleepTime); + LOG.info("Starting region server: {}", server); startRs(server); } @@ -73,8 +77,10 @@ void restartRs(ServerName server, long sleepTime) throws IOException { if (context.isStopping()) { return; } + LOG.info("Killing region server: {}", server); killRs(server); sleep(sleepTime); + LOG.info("Starting region server: {}", server); startRs(server); } @@ -84,8 +90,10 @@ void restartZKNode(ServerName server, long sleepTime) throws IOException { if (context.isStopping()) { return; } + LOG.info("Killing zookeeper node: {}", server); killZKNode(server); sleep(sleepTime); + LOG.info("Starting zookeeper node: {}", server); startZKNode(server); } @@ -95,8 +103,10 @@ void restartDataNode(ServerName server, long sleepTime) throws IOException { if (context.isStopping()) { return; } + LOG.info("Killing data node: {}", server); killDataNode(server); sleep(sleepTime); + LOG.info("Starting data node: {}", server); startDataNode(server); } @@ -106,8 +116,10 @@ void restartNameNode(ServerName server, long sleepTime) throws IOException { if (context.isStopping()) { return; } + LOG.info("Killing name node: {}", server); killNameNode(server); sleep(sleepTime); + LOG.info("Starting name node: {}", server); startNameNode(server); } diff --git a/hbase-it/src/test/java/org/apache/hadoop/hbase/chaos/actions/RollingBatchSuspendResumeRsAction.java b/hbase-it/src/test/java/org/apache/hadoop/hbase/chaos/actions/RollingBatchSuspendResumeRsAction.java index 975846a7d6d3..d4ad3e40b5f8 100644 --- a/hbase-it/src/test/java/org/apache/hadoop/hbase/chaos/actions/RollingBatchSuspendResumeRsAction.java +++ b/hbase-it/src/test/java/org/apache/hadoop/hbase/chaos/actions/RollingBatchSuspendResumeRsAction.java @@ -58,7 +58,8 @@ enum SuspendOrResume { SUSPEND, RESUME } - @Override public void perform() throws Exception { + @Override + public void perform() throws Exception { LOG.info(String.format("Performing action: Rolling batch restarting %d%% of region servers", (int) (ratio * 100))); List selectedServers = selectServers(); @@ -90,7 +91,7 @@ enum SuspendOrResume { try { suspendRs(server); } catch (Shell.ExitCodeException e) { - LOG.info("Problem suspending but presume successful; code=" + e.getExitCode(), e); + LOG.warn("Problem suspending but presume successful; code={}", e.getExitCode(), e); } suspendedServers.add(server); break; @@ -99,12 +100,12 @@ enum SuspendOrResume { try { resumeRs(server); } catch (Shell.ExitCodeException e) { - LOG.info("Problem resuming, will retry; code=" + e.getExitCode(), e); + LOG.info("Problem resuming, will retry; code={}", e.getExitCode(), e); } break; } - LOG.info("Sleeping for:" + sleepTime); + LOG.info("Sleeping for:{}", sleepTime); Threads.sleep(sleepTime); } } diff --git a/hbase-it/src/test/java/org/apache/hadoop/hbase/chaos/factories/ServerKillingMonkeyFactory.java b/hbase-it/src/test/java/org/apache/hadoop/hbase/chaos/factories/ServerKillingMonkeyFactory.java index 2a9394d26cc4..4a5f69e8c229 100644 --- a/hbase-it/src/test/java/org/apache/hadoop/hbase/chaos/factories/ServerKillingMonkeyFactory.java +++ b/hbase-it/src/test/java/org/apache/hadoop/hbase/chaos/factories/ServerKillingMonkeyFactory.java @@ -48,11 +48,10 @@ public ChaosMonkey build() { // Destructive actions to mess things around. Cannot run batch restart Action[] actions1 = new Action[] { - new RestartRandomRsExceptMetaAction(60000), - new RestartActiveMasterAction(5000), - //only allow 2 servers to be dead - new RollingBatchRestartRsExceptMetaAction(5000, 1.0f, 2), - new ForceBalancerAction(), + new RestartRandomRsExceptMetaAction(60000), + new RestartActiveMasterAction(5000), + new RollingBatchRestartRsExceptMetaAction(5000, 1.0f, 2), //only allow 2 servers to be dead + new ForceBalancerAction(), new GracefulRollingRestartRsAction(gracefulRollingRestartTSSLeepTime), new RollingBatchSuspendResumeRsAction(rollingBatchSuspendRSSleepTime, rollingBatchSuspendtRSRatio) @@ -60,7 +59,7 @@ public ChaosMonkey build() { // Action to log more info for debugging Action[] actions2 = new Action[] { - new DumpClusterStatusAction() + new DumpClusterStatusAction() }; return new PolicyBasedChaosMonkey(util, diff --git a/hbase-it/src/test/java/org/apache/hadoop/hbase/chaos/factories/SlowDeterministicMonkeyFactory.java b/hbase-it/src/test/java/org/apache/hadoop/hbase/chaos/factories/SlowDeterministicMonkeyFactory.java index 784a6ce45b38..22c35b96b957 100644 --- a/hbase-it/src/test/java/org/apache/hadoop/hbase/chaos/factories/SlowDeterministicMonkeyFactory.java +++ b/hbase-it/src/test/java/org/apache/hadoop/hbase/chaos/factories/SlowDeterministicMonkeyFactory.java @@ -79,42 +79,42 @@ public ChaosMonkey build() { // move one region around. They are not so destructive, // can be executed more frequently. Action[] actions1 = new Action[] { - new CompactTableAction(tableName, compactTableRatio), - new CompactRandomRegionOfTableAction(tableName, compactRandomRegionRatio), - new FlushTableAction(tableName), - new FlushRandomRegionOfTableAction(tableName), - new MoveRandomRegionOfTableAction(tableName) + new CompactTableAction(tableName, compactTableRatio), + new CompactRandomRegionOfTableAction(tableName, compactRandomRegionRatio), + new FlushTableAction(tableName), + new FlushRandomRegionOfTableAction(tableName), + new MoveRandomRegionOfTableAction(tableName) }; // Actions such as split/merge/snapshot. // They should not cause data loss, or unreliability // such as region stuck in transition. Action[] actions2 = new Action[] { - new SplitRandomRegionOfTableAction(tableName), - new MergeRandomAdjacentRegionsOfTableAction(tableName), - new SnapshotTableAction(tableName), - new AddColumnAction(tableName), - new RemoveColumnAction(tableName, columnFamilies), - new ChangeEncodingAction(tableName), - new ChangeCompressionAction(tableName), - new ChangeBloomFilterAction(tableName), - new ChangeVersionsAction(tableName), - new ChangeSplitPolicyAction(tableName), + new SplitRandomRegionOfTableAction(tableName), + new MergeRandomAdjacentRegionsOfTableAction(tableName), + new SnapshotTableAction(tableName), + new AddColumnAction(tableName), + new RemoveColumnAction(tableName, columnFamilies), + new ChangeEncodingAction(tableName), + new ChangeCompressionAction(tableName), + new ChangeBloomFilterAction(tableName), + new ChangeVersionsAction(tableName), + new ChangeSplitPolicyAction(tableName), }; // Destructive actions to mess things around. Action[] actions3 = new Action[] { - new MoveRegionsOfTableAction(moveRegionsSleepTime, moveRegionsMaxTime, - tableName), - new MoveRandomRegionOfTableAction(moveRandomRegionSleepTime, tableName), - new RestartRandomRsAction(restartRandomRSSleepTime), - new BatchRestartRsAction(batchRestartRSSleepTime, batchRestartRSRatio), - new RestartActiveMasterAction(restartActiveMasterSleepTime), - new RollingBatchRestartRsAction(rollingBatchRestartRSSleepTime, - rollingBatchRestartRSRatio), - new RestartRsHoldingMetaAction(restartRsHoldingMetaSleepTime), - new DecreaseMaxHFileSizeAction(decreaseHFileSizeSleepTime, tableName), - new SplitAllRegionOfTableAction(tableName), + new MoveRegionsOfTableAction(moveRegionsSleepTime, moveRegionsMaxTime, + tableName), + new MoveRandomRegionOfTableAction(moveRandomRegionSleepTime, tableName), + new RestartRandomRsAction(restartRandomRSSleepTime), + new BatchRestartRsAction(batchRestartRSSleepTime, batchRestartRSRatio), + new RestartActiveMasterAction(restartActiveMasterSleepTime), + new RollingBatchRestartRsAction(rollingBatchRestartRSSleepTime, + rollingBatchRestartRSRatio), + new RestartRsHoldingMetaAction(restartRsHoldingMetaSleepTime), + new DecreaseMaxHFileSizeAction(decreaseHFileSizeSleepTime, tableName), + new SplitAllRegionOfTableAction(tableName), new GracefulRollingRestartRsAction(gracefulRollingRestartTSSLeepTime), new RollingBatchSuspendResumeRsAction(rollingBatchSuspendRSSleepTime, rollingBatchSuspendtRSRatio) @@ -122,7 +122,7 @@ public ChaosMonkey build() { // Action to log more info for debugging Action[] actions4 = new Action[] { - new DumpClusterStatusAction() + new DumpClusterStatusAction() }; return new PolicyBasedChaosMonkey(util, @@ -136,54 +136,54 @@ public ChaosMonkey build() { private void loadProperties() { - action1Period = Long.parseLong(this.properties.getProperty( - MonkeyConstants.PERIODIC_ACTION1_PERIOD, - MonkeyConstants.DEFAULT_PERIODIC_ACTION1_PERIOD + "")); - action2Period = Long.parseLong(this.properties.getProperty( - MonkeyConstants.PERIODIC_ACTION2_PERIOD, - MonkeyConstants.DEFAULT_PERIODIC_ACTION2_PERIOD + "")); - action3Period = Long.parseLong(this.properties.getProperty( - MonkeyConstants.COMPOSITE_ACTION3_PERIOD, - MonkeyConstants.DEFAULT_COMPOSITE_ACTION3_PERIOD + "")); - action4Period = Long.parseLong(this.properties.getProperty( - MonkeyConstants.PERIODIC_ACTION4_PERIOD, - MonkeyConstants.DEFAULT_PERIODIC_ACTION4_PERIOD + "")); - moveRegionsMaxTime = Long.parseLong(this.properties.getProperty( - MonkeyConstants.MOVE_REGIONS_MAX_TIME, - MonkeyConstants.DEFAULT_MOVE_REGIONS_MAX_TIME + "")); - moveRegionsSleepTime = Long.parseLong(this.properties.getProperty( - MonkeyConstants.MOVE_REGIONS_SLEEP_TIME, - MonkeyConstants.DEFAULT_MOVE_REGIONS_SLEEP_TIME + "")); - moveRandomRegionSleepTime = Long.parseLong(this.properties.getProperty( - MonkeyConstants.MOVE_RANDOM_REGION_SLEEP_TIME, - MonkeyConstants.DEFAULT_MOVE_RANDOM_REGION_SLEEP_TIME + "")); - restartRandomRSSleepTime = Long.parseLong(this.properties.getProperty( - MonkeyConstants.RESTART_RANDOM_RS_SLEEP_TIME, - MonkeyConstants.DEFAULT_RESTART_RANDOM_RS_SLEEP_TIME + "")); - batchRestartRSSleepTime = Long.parseLong(this.properties.getProperty( - MonkeyConstants.BATCH_RESTART_RS_SLEEP_TIME, - MonkeyConstants.DEFAULT_BATCH_RESTART_RS_SLEEP_TIME + "")); - batchRestartRSRatio = Float.parseFloat(this.properties.getProperty( - MonkeyConstants.BATCH_RESTART_RS_RATIO, - MonkeyConstants.DEFAULT_BATCH_RESTART_RS_RATIO + "")); - restartActiveMasterSleepTime = Long.parseLong(this.properties.getProperty( - MonkeyConstants.RESTART_ACTIVE_MASTER_SLEEP_TIME, - MonkeyConstants.DEFAULT_RESTART_ACTIVE_MASTER_SLEEP_TIME + "")); - rollingBatchRestartRSSleepTime = Long.parseLong(this.properties.getProperty( - MonkeyConstants.ROLLING_BATCH_RESTART_RS_SLEEP_TIME, - MonkeyConstants.DEFAULT_ROLLING_BATCH_RESTART_RS_SLEEP_TIME + "")); - rollingBatchRestartRSRatio = Float.parseFloat(this.properties.getProperty( - MonkeyConstants.ROLLING_BATCH_RESTART_RS_RATIO, - MonkeyConstants.DEFAULT_ROLLING_BATCH_RESTART_RS_RATIO + "")); - restartRsHoldingMetaSleepTime = Long.parseLong(this.properties.getProperty( - MonkeyConstants.RESTART_RS_HOLDING_META_SLEEP_TIME, - MonkeyConstants.DEFAULT_RESTART_RS_HOLDING_META_SLEEP_TIME + "")); - compactTableRatio = Float.parseFloat(this.properties.getProperty( - MonkeyConstants.COMPACT_TABLE_ACTION_RATIO, - MonkeyConstants.DEFAULT_COMPACT_TABLE_ACTION_RATIO + "")); - compactRandomRegionRatio = Float.parseFloat(this.properties.getProperty( - MonkeyConstants.COMPACT_RANDOM_REGION_RATIO, - MonkeyConstants.DEFAULT_COMPACT_RANDOM_REGION_RATIO + "")); + action1Period = Long.parseLong(this.properties.getProperty( + MonkeyConstants.PERIODIC_ACTION1_PERIOD, + MonkeyConstants.DEFAULT_PERIODIC_ACTION1_PERIOD + "")); + action2Period = Long.parseLong(this.properties.getProperty( + MonkeyConstants.PERIODIC_ACTION2_PERIOD, + MonkeyConstants.DEFAULT_PERIODIC_ACTION2_PERIOD + "")); + action3Period = Long.parseLong(this.properties.getProperty( + MonkeyConstants.COMPOSITE_ACTION3_PERIOD, + MonkeyConstants.DEFAULT_COMPOSITE_ACTION3_PERIOD + "")); + action4Period = Long.parseLong(this.properties.getProperty( + MonkeyConstants.PERIODIC_ACTION4_PERIOD, + MonkeyConstants.DEFAULT_PERIODIC_ACTION4_PERIOD + "")); + moveRegionsMaxTime = Long.parseLong(this.properties.getProperty( + MonkeyConstants.MOVE_REGIONS_MAX_TIME, + MonkeyConstants.DEFAULT_MOVE_REGIONS_MAX_TIME + "")); + moveRegionsSleepTime = Long.parseLong(this.properties.getProperty( + MonkeyConstants.MOVE_REGIONS_SLEEP_TIME, + MonkeyConstants.DEFAULT_MOVE_REGIONS_SLEEP_TIME + "")); + moveRandomRegionSleepTime = Long.parseLong(this.properties.getProperty( + MonkeyConstants.MOVE_RANDOM_REGION_SLEEP_TIME, + MonkeyConstants.DEFAULT_MOVE_RANDOM_REGION_SLEEP_TIME + "")); + restartRandomRSSleepTime = Long.parseLong(this.properties.getProperty( + MonkeyConstants.RESTART_RANDOM_RS_SLEEP_TIME, + MonkeyConstants.DEFAULT_RESTART_RANDOM_RS_SLEEP_TIME + "")); + batchRestartRSSleepTime = Long.parseLong(this.properties.getProperty( + MonkeyConstants.BATCH_RESTART_RS_SLEEP_TIME, + MonkeyConstants.DEFAULT_BATCH_RESTART_RS_SLEEP_TIME + "")); + batchRestartRSRatio = Float.parseFloat(this.properties.getProperty( + MonkeyConstants.BATCH_RESTART_RS_RATIO, + MonkeyConstants.DEFAULT_BATCH_RESTART_RS_RATIO + "")); + restartActiveMasterSleepTime = Long.parseLong(this.properties.getProperty( + MonkeyConstants.RESTART_ACTIVE_MASTER_SLEEP_TIME, + MonkeyConstants.DEFAULT_RESTART_ACTIVE_MASTER_SLEEP_TIME + "")); + rollingBatchRestartRSSleepTime = Long.parseLong(this.properties.getProperty( + MonkeyConstants.ROLLING_BATCH_RESTART_RS_SLEEP_TIME, + MonkeyConstants.DEFAULT_ROLLING_BATCH_RESTART_RS_SLEEP_TIME + "")); + rollingBatchRestartRSRatio = Float.parseFloat(this.properties.getProperty( + MonkeyConstants.ROLLING_BATCH_RESTART_RS_RATIO, + MonkeyConstants.DEFAULT_ROLLING_BATCH_RESTART_RS_RATIO + "")); + restartRsHoldingMetaSleepTime = Long.parseLong(this.properties.getProperty( + MonkeyConstants.RESTART_RS_HOLDING_META_SLEEP_TIME, + MonkeyConstants.DEFAULT_RESTART_RS_HOLDING_META_SLEEP_TIME + "")); + compactTableRatio = Float.parseFloat(this.properties.getProperty( + MonkeyConstants.COMPACT_TABLE_ACTION_RATIO, + MonkeyConstants.DEFAULT_COMPACT_TABLE_ACTION_RATIO + "")); + compactRandomRegionRatio = Float.parseFloat(this.properties.getProperty( + MonkeyConstants.COMPACT_RANDOM_REGION_RATIO, + MonkeyConstants.DEFAULT_COMPACT_RANDOM_REGION_RATIO + "")); decreaseHFileSizeSleepTime = Long.parseLong(this.properties.getProperty( MonkeyConstants.DECREASE_HFILE_SIZE_SLEEP_TIME, MonkeyConstants.DEFAULT_DECREASE_HFILE_SIZE_SLEEP_TIME + "")); diff --git a/hbase-it/src/test/java/org/apache/hadoop/hbase/chaos/factories/StressAssignmentManagerMonkeyFactory.java b/hbase-it/src/test/java/org/apache/hadoop/hbase/chaos/factories/StressAssignmentManagerMonkeyFactory.java index 4ae980579ecf..4e304fbd2a6c 100644 --- a/hbase-it/src/test/java/org/apache/hadoop/hbase/chaos/factories/StressAssignmentManagerMonkeyFactory.java +++ b/hbase-it/src/test/java/org/apache/hadoop/hbase/chaos/factories/StressAssignmentManagerMonkeyFactory.java @@ -57,32 +57,30 @@ public ChaosMonkey build() { // Actions that could slow down region movement. // These could also get regions stuck if there are issues. Action[] actions1 = new Action[]{ - new CompactTableAction(tableName, 0.5f), - new CompactRandomRegionOfTableAction(tableName, 0.6f), - new FlushTableAction(tableName), - new FlushRandomRegionOfTableAction(tableName) + new CompactTableAction(tableName, 0.5f), + new CompactRandomRegionOfTableAction(tableName, 0.6f), + new FlushTableAction(tableName), + new FlushRandomRegionOfTableAction(tableName) }; Action[] actions2 = new Action[]{ - new SplitRandomRegionOfTableAction(tableName), - new MergeRandomAdjacentRegionsOfTableAction(tableName), - new AddColumnAction(tableName), - new RemoveColumnAction(tableName, columnFamilies), - new MoveRegionsOfTableAction(MonkeyConstants.DEFAULT_MOVE_REGIONS_SLEEP_TIME, - 1600, - tableName), - new MoveRandomRegionOfTableAction(MonkeyConstants.DEFAULT_MOVE_RANDOM_REGION_SLEEP_TIME, - tableName), - new RestartRandomRsAction(MonkeyConstants.DEFAULT_RESTART_RANDOM_RS_SLEEP_TIME), - new BatchRestartRsAction(MonkeyConstants.DEFAULT_ROLLING_BATCH_RESTART_RS_SLEEP_TIME, - 0.5f), - new RollingBatchRestartRsAction(MonkeyConstants.DEFAULT_BATCH_RESTART_RS_SLEEP_TIME, - 1.0f), - new RestartRsHoldingMetaAction(MonkeyConstants.DEFAULT_RESTART_RS_HOLDING_META_SLEEP_TIME), - new ChangeSplitPolicyAction(tableName), - new SplitAllRegionOfTableAction(tableName), - new DecreaseMaxHFileSizeAction(MonkeyConstants.DEFAULT_DECREASE_HFILE_SIZE_SLEEP_TIME, - tableName), + new SplitRandomRegionOfTableAction(tableName), + new MergeRandomAdjacentRegionsOfTableAction(tableName), + new AddColumnAction(tableName), + new RemoveColumnAction(tableName, columnFamilies), + new MoveRegionsOfTableAction(MonkeyConstants.DEFAULT_MOVE_REGIONS_SLEEP_TIME, + 1600, + tableName), + new MoveRandomRegionOfTableAction(MonkeyConstants.DEFAULT_MOVE_RANDOM_REGION_SLEEP_TIME, + tableName), + new RestartRandomRsAction(MonkeyConstants.DEFAULT_RESTART_RANDOM_RS_SLEEP_TIME), + new BatchRestartRsAction(MonkeyConstants.DEFAULT_ROLLING_BATCH_RESTART_RS_SLEEP_TIME, 0.5f), + new RollingBatchRestartRsAction(MonkeyConstants.DEFAULT_BATCH_RESTART_RS_SLEEP_TIME, 1.0f), + new RestartRsHoldingMetaAction(MonkeyConstants.DEFAULT_RESTART_RS_HOLDING_META_SLEEP_TIME), + new ChangeSplitPolicyAction(tableName), + new SplitAllRegionOfTableAction(tableName), + new DecreaseMaxHFileSizeAction(MonkeyConstants.DEFAULT_DECREASE_HFILE_SIZE_SLEEP_TIME, + tableName), new GracefulRollingRestartRsAction(gracefulRollingRestartTSSLeepTime), new RollingBatchSuspendResumeRsAction(rollingBatchSuspendRSSleepTime, rollingBatchSuspendtRSRatio) @@ -90,7 +88,7 @@ public ChaosMonkey build() { // Action to log more info for debugging Action[] actions3 = new Action[]{ - new DumpClusterStatusAction() + new DumpClusterStatusAction() }; return new PolicyBasedChaosMonkey(util, diff --git a/hbase-server/src/test/java/org/apache/hadoop/hbase/MiniHBaseCluster.java b/hbase-server/src/test/java/org/apache/hadoop/hbase/MiniHBaseCluster.java index 666a65b9f6f5..948ba1f2c4e3 100644 --- a/hbase-server/src/test/java/org/apache/hadoop/hbase/MiniHBaseCluster.java +++ b/hbase-server/src/test/java/org/apache/hadoop/hbase/MiniHBaseCluster.java @@ -507,7 +507,7 @@ public JVMClusterUtil.RegionServerThread stopRegionServer(int serverNumber, public JVMClusterUtil.RegionServerThread suspendRegionServer(int serverNumber) { JVMClusterUtil.RegionServerThread server = hbaseCluster.getRegionServers().get(serverNumber); - LOG.info("Suspending " + server.toString()); + LOG.info("Suspending {}", server.toString()); server.suspend(); return server; } @@ -520,7 +520,7 @@ public JVMClusterUtil.RegionServerThread suspendRegionServer(int serverNumber) { public JVMClusterUtil.RegionServerThread resumeRegionServer(int serverNumber) { JVMClusterUtil.RegionServerThread server = hbaseCluster.getRegionServers().get(serverNumber); - LOG.info("Resuming " + server.toString()); + LOG.info("Resuming {}", server.toString()); server.resume(); return server; } From 6285cb1031c6fb0d32cf1761db2efcf9637fc4de Mon Sep 17 00:00:00 2001 From: Bukros Szabolcs Date: Thu, 26 Sep 2019 09:43:17 +0200 Subject: [PATCH 5/5] HBASE-22982 checkstlye fix --- .../hbase/chaos/factories/ServerKillingMonkeyFactory.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/hbase-it/src/test/java/org/apache/hadoop/hbase/chaos/factories/ServerKillingMonkeyFactory.java b/hbase-it/src/test/java/org/apache/hadoop/hbase/chaos/factories/ServerKillingMonkeyFactory.java index 4a5f69e8c229..68d11f9a6405 100644 --- a/hbase-it/src/test/java/org/apache/hadoop/hbase/chaos/factories/ServerKillingMonkeyFactory.java +++ b/hbase-it/src/test/java/org/apache/hadoop/hbase/chaos/factories/ServerKillingMonkeyFactory.java @@ -51,7 +51,7 @@ public ChaosMonkey build() { new RestartRandomRsExceptMetaAction(60000), new RestartActiveMasterAction(5000), new RollingBatchRestartRsExceptMetaAction(5000, 1.0f, 2), //only allow 2 servers to be dead - new ForceBalancerAction(), + new ForceBalancerAction(), new GracefulRollingRestartRsAction(gracefulRollingRestartTSSLeepTime), new RollingBatchSuspendResumeRsAction(rollingBatchSuspendRSSleepTime, rollingBatchSuspendtRSRatio)