From c7f6bc7976e9402b1549894bc81cd0fd8471f4ad Mon Sep 17 00:00:00 2001 From: Bukros Szabolcs Date: Mon, 30 Sep 2019 11:16:13 +0200 Subject: [PATCH 1/4] HBASE-23085: Network and Data related Actions Add monkey actions: - manipulate network packages with tc (reorder, loose,...) - add CPU load - fill the disk - corrupt or delete regionserver data files Create monkey factories for the new actions Extend HBaseClusterManager to allow sudo calls Fix a copy/paste issue wih monkey constants in some factories --- .../hadoop/hbase/HBaseClusterManager.java | 86 +++++++++++++ .../hbase/chaos/actions/AddCPULoadAction.java | 69 +++++++++++ .../hbase/chaos/actions/CommandAction.java | 70 +++++++++++ .../chaos/actions/CorruptDataFilesAction.java | 69 +++++++++++ .../actions/CorruptPackagesCommandAction.java | 70 +++++++++++ .../actions/DelayPackagesCommandAction.java | 71 +++++++++++ .../chaos/actions/DeleteDataFilesAction.java | 61 +++++++++ .../DuplicatePackagesCommandAction.java | 70 +++++++++++ .../chaos/actions/FillDiskCommandAction.java | 83 +++++++++++++ .../actions/LosePackagesCommandAction.java | 70 +++++++++++ .../actions/ReorderPackagesCommandAction.java | 74 +++++++++++ .../factories/DataIssuesMonkeyFactory.java | 72 +++++++++++ .../DistributedIssuesMonkeyFactory.java | 117 ++++++++++++++++++ .../chaos/factories/MonkeyConstants.java | 23 ++++ .../hbase/chaos/factories/MonkeyFactory.java | 4 + ...erAndDependenciesKillingMonkeyFactory.java | 8 +- .../factories/ServerKillingMonkeyFactory.java | 8 +- .../SlowDeterministicMonkeyFactory.java | 8 +- .../StressAssignmentManagerMonkeyFactory.java | 8 +- 19 files changed, 1025 insertions(+), 16 deletions(-) create mode 100644 hbase-it/src/test/java/org/apache/hadoop/hbase/chaos/actions/AddCPULoadAction.java create mode 100644 hbase-it/src/test/java/org/apache/hadoop/hbase/chaos/actions/CommandAction.java create mode 100644 hbase-it/src/test/java/org/apache/hadoop/hbase/chaos/actions/CorruptDataFilesAction.java create mode 100644 hbase-it/src/test/java/org/apache/hadoop/hbase/chaos/actions/CorruptPackagesCommandAction.java create mode 100644 hbase-it/src/test/java/org/apache/hadoop/hbase/chaos/actions/DelayPackagesCommandAction.java create mode 100644 hbase-it/src/test/java/org/apache/hadoop/hbase/chaos/actions/DeleteDataFilesAction.java create mode 100644 hbase-it/src/test/java/org/apache/hadoop/hbase/chaos/actions/DuplicatePackagesCommandAction.java create mode 100644 hbase-it/src/test/java/org/apache/hadoop/hbase/chaos/actions/FillDiskCommandAction.java create mode 100644 hbase-it/src/test/java/org/apache/hadoop/hbase/chaos/actions/LosePackagesCommandAction.java create mode 100644 hbase-it/src/test/java/org/apache/hadoop/hbase/chaos/actions/ReorderPackagesCommandAction.java create mode 100644 hbase-it/src/test/java/org/apache/hadoop/hbase/chaos/factories/DataIssuesMonkeyFactory.java create mode 100644 hbase-it/src/test/java/org/apache/hadoop/hbase/chaos/factories/DistributedIssuesMonkeyFactory.java diff --git a/hbase-it/src/test/java/org/apache/hadoop/hbase/HBaseClusterManager.java b/hbase-it/src/test/java/org/apache/hadoop/hbase/HBaseClusterManager.java index f7c2fc652805..d44630ca405f 100644 --- a/hbase-it/src/test/java/org/apache/hadoop/hbase/HBaseClusterManager.java +++ b/hbase-it/src/test/java/org/apache/hadoop/hbase/HBaseClusterManager.java @@ -62,6 +62,15 @@ public class HBaseClusterManager extends Configured implements ClusterManager { "timeout 30 /usr/bin/ssh %1$s %2$s%3$s%4$s \"sudo -u %6$s %5$s\""; private String tunnelCmd; + /** + * The command format that is used to execute the remote command with sudo. Arguments: + * 1 SSH options, 2 user name , 3 "@" if username is set, 4 host, + * 5 original command, 6 timeout. + */ + private static final String DEFAULT_TUNNEL_SUDO_CMD = + "timeout %6$s /usr/bin/ssh %1$s %2$s%3$s%4$s \"sudo %5$s\""; + private String tunnelSudoCmd; + private static final String RETRY_ATTEMPTS_KEY = "hbase.it.clustermanager.retry.attempts"; private static final int DEFAULT_RETRY_ATTEMPTS = 5; @@ -86,6 +95,7 @@ public void setConf(Configuration conf) { sshOptions = (sshOptions == null) ? "" : sshOptions; sshUserName = (sshUserName == null) ? "" : sshUserName; tunnelCmd = conf.get("hbase.it.clustermanager.ssh.cmd", DEFAULT_TUNNEL_CMD); + tunnelSudoCmd = conf.get("hbase.it.clustermanager.ssh.sudo.cmd", DEFAULT_TUNNEL_SUDO_CMD); // Print out ssh special config if any. if ((sshUserName != null && sshUserName.length() > 0) || (sshOptions != null && sshOptions.length() > 0)) { @@ -159,6 +169,38 @@ public void execute() throws IOException { } } + /** + * Executes commands over SSH + */ + protected class RemoteSudoShell extends Shell.ShellCommandExecutor { + private String hostname; + + public RemoteSudoShell(String hostname, String[] execString, long timeout) { + this(hostname, execString, null, null, timeout); + } + + public RemoteSudoShell(String hostname, String[] execString, File dir, Map env, + long timeout) { + super(execString, dir, env, timeout); + this.hostname = hostname; + } + + @Override + public String[] getExecString() { + String at = sshUserName.isEmpty() ? "" : "@"; + String remoteCmd = StringUtils.join(super.getExecString(), " "); + String cmd = String.format(tunnelSudoCmd, sshOptions, sshUserName, at, hostname, remoteCmd, + timeOutInterval/1000f); + LOG.info("Executing full command [" + cmd + "]"); + return new String[] { "/usr/bin/env", "bash", "-c", cmd }; + } + + @Override + public void execute() throws IOException { + super.execute(); + } + } + /** * Provides command strings for services to be executed by Shell. CommandProviders are * pluggable, and different deployments(windows, bigtop, etc) can be managed by @@ -336,6 +378,50 @@ private Pair execWithRetries(String hostname, ServiceType servi } } + /** + * Execute the given command on the host using SSH + * @return pair of exit code and command output + * @throws IOException if something goes wrong. + */ + public Pair execSudo(String hostname, long timeout, String... cmd) + throws IOException { + LOG.info("Executing remote command: " + StringUtils.join(cmd, " ") + " , hostname:" + hostname); + + RemoteSudoShell shell = new RemoteSudoShell(hostname, cmd, timeout); + try { + shell.execute(); + } catch (Shell.ExitCodeException ex) { + // capture the stdout of the process as well. + String output = shell.getOutput(); + // add output for the ExitCodeException. + throw new Shell.ExitCodeException(ex.getExitCode(), "stderr: " + ex.getMessage() + + ", stdout: " + output); + } + + LOG.info("Executed remote command, exit code:" + shell.getExitCode() + + " , output:" + shell.getOutput()); + + return new Pair<>(shell.getExitCode(), shell.getOutput()); + } + + public Pair execSudoWithRetries(String hostname, long timeout, String... cmd) + throws IOException { + RetryCounter retryCounter = retryCounterFactory.create(); + while (true) { + try { + return execSudo(hostname, timeout, cmd); + } catch (IOException e) { + retryOrThrow(retryCounter, e, hostname, cmd); + } + try { + retryCounter.sleepUntilNextRetry(); + } catch (InterruptedException ex) { + // ignore + LOG.warn("Sleep Interrupted:" + ex); + } + } + } + private void retryOrThrow(RetryCounter retryCounter, E ex, String hostname, String[] cmd) throws E { if (retryCounter.shouldRetry()) { diff --git a/hbase-it/src/test/java/org/apache/hadoop/hbase/chaos/actions/AddCPULoadAction.java b/hbase-it/src/test/java/org/apache/hadoop/hbase/chaos/actions/AddCPULoadAction.java new file mode 100644 index 000000000000..6e4003d56205 --- /dev/null +++ b/hbase-it/src/test/java/org/apache/hadoop/hbase/chaos/actions/AddCPULoadAction.java @@ -0,0 +1,69 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hbase.chaos.actions; + +import java.io.IOException; + +import org.apache.hadoop.hbase.ServerName; +import org.apache.hadoop.hbase.chaos.monkies.PolicyBasedChaosMonkey; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +/** + * Action that adds high cpu load to a random regionserver for a given duration + */ +public class AddCPULoadAction extends CommandAction { + protected static final Logger LOG = LoggerFactory.getLogger(AddCPULoadAction.class); + private static final String CPU_LOAD_COMMAND = + "seq 1 %s | xargs -I{} -n 1 -P %s timeout %s dd if=/dev/urandom of=/dev/null bs=1M " + + "iflag=fullblock"; + + private final long duration; + private long processes; + + /** + * Add high load to cpu + * + * @param duration Duration that this thread should generate the load for in miliseconds + * @param processes The number of parallel processes, should be equal to cpu threads for max load + */ + public AddCPULoadAction(long duration, long processes, long timeout) { + super(timeout); + this.duration = duration; + this.processes = processes; + } + + protected void localPerform() throws IOException { + LOG.info("Starting to execute AddCPULoadAction"); + ServerName server = PolicyBasedChaosMonkey.selectRandomItem(getCurrentServers()); + String hostname = server.getHostname(); + + try { + clusterManager.execSudo(hostname, timeout, getCommand()); + } catch (IOException ex){ + //This will always happen. We use timeout to kill a continously running process + //after the duration expires + } + LOG.info("Finished to execute AddCPULoadAction"); + } + + private String getCommand(){ + return String.format(CPU_LOAD_COMMAND, processes, processes, duration/1000f); + } +} diff --git a/hbase-it/src/test/java/org/apache/hadoop/hbase/chaos/actions/CommandAction.java b/hbase-it/src/test/java/org/apache/hadoop/hbase/chaos/actions/CommandAction.java new file mode 100644 index 000000000000..91b0820f099c --- /dev/null +++ b/hbase-it/src/test/java/org/apache/hadoop/hbase/chaos/actions/CommandAction.java @@ -0,0 +1,70 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hbase.chaos.actions; + +import java.io.IOException; + +import org.apache.hadoop.hbase.DistributedHBaseCluster; +import org.apache.hadoop.hbase.HBaseCluster; +import org.apache.hadoop.hbase.HBaseClusterManager; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +/** + * Base class for performing Actions based on linux commands requiring sudo privileges + */ +abstract public class CommandAction extends Action { + private static final Logger LOG = LoggerFactory.getLogger(CommandAction.class); + + protected long timeout; + protected HBaseClusterManager clusterManager; + + public CommandAction(long timeout) { + this.timeout = timeout; + } + + @Override + public void init(ActionContext context) throws IOException { + super.init(context); + HBaseCluster cluster = context.getHBaseCluster(); + if(cluster != null && cluster instanceof DistributedHBaseCluster){ + Object manager = ((DistributedHBaseCluster)cluster).getClusterManager(); + if(manager != null && manager instanceof HBaseClusterManager){ + clusterManager = (HBaseClusterManager) manager; + } + } + } + + @Override + public void perform() throws Exception { + if(clusterManager == null){ + LOG.info("Couldn't perform command action, it requires a distributed cluster."); + return; + } + + // Don't try the modify if we're stopping + if (context.isStopping()) { + return; + } + + localPerform(); + } + + abstract protected void localPerform() throws IOException; +} diff --git a/hbase-it/src/test/java/org/apache/hadoop/hbase/chaos/actions/CorruptDataFilesAction.java b/hbase-it/src/test/java/org/apache/hadoop/hbase/chaos/actions/CorruptDataFilesAction.java new file mode 100644 index 000000000000..1ad52217c15f --- /dev/null +++ b/hbase-it/src/test/java/org/apache/hadoop/hbase/chaos/actions/CorruptDataFilesAction.java @@ -0,0 +1,69 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hbase.chaos.actions; + +import org.apache.commons.lang3.RandomUtils; +import org.apache.hadoop.fs.FSDataOutputStream; +import org.apache.hadoop.fs.FileSystem; +import org.apache.hadoop.fs.LocatedFileStatus; +import org.apache.hadoop.fs.Path; +import org.apache.hadoop.fs.RemoteIterator; +import org.apache.hadoop.hbase.util.CommonFSUtils; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +/** + * Action corrupts region server data. + */ +public class CorruptDataFilesAction extends Action { + private static final Logger LOG = LoggerFactory.getLogger(CorruptDataFilesAction.class); + private float chance; + + /** + * Corrupts region server data file switch a certain chance + * @param chance chance to corrupt any give data file (0.5 => 50%) + */ + public CorruptDataFilesAction(float chance) { + this.chance = chance * 100; + } + + @Override + public void perform() throws Exception { + LOG.info("Start corrupting data files"); + FileSystem fs = CommonFSUtils.getRootDirFileSystem(getConf()); + Path rootDir = CommonFSUtils.getRootDir(getConf()); + RemoteIterator iterator = fs.listFiles(rootDir, true); + while (iterator.hasNext()){ + LocatedFileStatus status = iterator.next(); + if(RandomUtils.nextFloat(0, 100) > chance){ + continue; + } + + FSDataOutputStream out = fs.create(status.getPath(), true); + try { + out.write(0); + } finally { + out.close(); + } + LOG.info("Corrupting {}", status.getPath()); + } + LOG.info("Done corrupting data files"); + } + +} diff --git a/hbase-it/src/test/java/org/apache/hadoop/hbase/chaos/actions/CorruptPackagesCommandAction.java b/hbase-it/src/test/java/org/apache/hadoop/hbase/chaos/actions/CorruptPackagesCommandAction.java new file mode 100644 index 000000000000..4fc6bb5e26b4 --- /dev/null +++ b/hbase-it/src/test/java/org/apache/hadoop/hbase/chaos/actions/CorruptPackagesCommandAction.java @@ -0,0 +1,70 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hbase.chaos.actions; + +import java.io.IOException; + +import org.apache.hadoop.hbase.ServerName; +import org.apache.hadoop.hbase.chaos.monkies.PolicyBasedChaosMonkey; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +/** + * + * Corrupt network packages on a random regionserver. + */ +public class CorruptPackagesCommandAction extends CommandAction { + private static final Logger LOG = LoggerFactory.getLogger(CorruptPackagesCommandAction.class); + private float ratio; + private long duration; + + /** + * Corrupt network packages on a random regionserver. + * + * @param ratio the ratio of packages corrupted + * @param duration the time this issue persists in milliseconds + * @param timeout the timeout for executing required commands on the region server in milliseconds + */ + public CorruptPackagesCommandAction(float ratio, long duration, long timeout) { + super(timeout); + this.ratio = ratio; + this.duration = duration; + } + + protected void localPerform() throws IOException { + LOG.info("Starting to execute CorruptPackagesCommandAction"); + ServerName server = PolicyBasedChaosMonkey.selectRandomItem(getCurrentServers()); + String hostname = server.getHostname(); + + try { + clusterManager.execSudoWithRetries(hostname, timeout, getCommand("add")); + Thread.sleep(duration); + } catch (InterruptedException e) { + LOG.debug("Failed to run the command for the full duration", e); + } finally { + clusterManager.execSudoWithRetries(hostname, timeout, getCommand("del")); + } + + LOG.info("Finished to execute CorruptPackagesCommandAction"); + } + + private String getCommand(String operation){ + return String.format("tc qdisc %s dev eth0 root netem corrupt %s%%", operation, ratio * 100); + } +} diff --git a/hbase-it/src/test/java/org/apache/hadoop/hbase/chaos/actions/DelayPackagesCommandAction.java b/hbase-it/src/test/java/org/apache/hadoop/hbase/chaos/actions/DelayPackagesCommandAction.java new file mode 100644 index 000000000000..c3b982f13eba --- /dev/null +++ b/hbase-it/src/test/java/org/apache/hadoop/hbase/chaos/actions/DelayPackagesCommandAction.java @@ -0,0 +1,71 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hbase.chaos.actions; + +import java.io.IOException; + +import org.apache.hadoop.hbase.ServerName; +import org.apache.hadoop.hbase.chaos.monkies.PolicyBasedChaosMonkey; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +/** + * Action adds latency to communication on a random regionserver. + */ +public class DelayPackagesCommandAction extends CommandAction { + private static final Logger LOG = LoggerFactory.getLogger(DelayPackagesCommandAction.class); + private long delay; + private long duration; + + /** + * Adds latency to communication on a random region server + * + * @param delay the latency wil be delay +/-50% in milliseconds + * @param duration the time this issue persists in milliseconds + * @param timeout the timeout for executing required commands on the region server in milliseconds + */ + public DelayPackagesCommandAction(long delay, long duration, long timeout) { + super(timeout); + this.delay = delay; + this.duration = duration; + } + + protected void localPerform() throws IOException { + LOG.info("Starting to execute DelayPackagesCommandAction"); + ServerName server = PolicyBasedChaosMonkey.selectRandomItem(getCurrentServers()); + String hostname = server.getHostname(); + + String base = getConf().get("hbase.home.dir"); + try { + clusterManager.execSudoWithRetries(hostname, timeout, getCommand("add")); + Thread.sleep(duration); + } catch (InterruptedException e) { + LOG.debug("Failed to run the command for the full duration", e); + } finally { + clusterManager.execSudoWithRetries(hostname, timeout, getCommand("del")); + } + + LOG.info("Finished to execute DelayPackagesCommandAction"); + } + + private String getCommand(String operation){ + return String.format("tc qdisc %s dev eth0 root netem delay %sms %sms", + operation, delay, delay/2); + } +} diff --git a/hbase-it/src/test/java/org/apache/hadoop/hbase/chaos/actions/DeleteDataFilesAction.java b/hbase-it/src/test/java/org/apache/hadoop/hbase/chaos/actions/DeleteDataFilesAction.java new file mode 100644 index 000000000000..0bd2fd2468eb --- /dev/null +++ b/hbase-it/src/test/java/org/apache/hadoop/hbase/chaos/actions/DeleteDataFilesAction.java @@ -0,0 +1,61 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hbase.chaos.actions; + +import org.apache.commons.lang3.RandomUtils; +import org.apache.hadoop.fs.FileSystem; +import org.apache.hadoop.fs.LocatedFileStatus; +import org.apache.hadoop.fs.Path; +import org.apache.hadoop.fs.RemoteIterator; +import org.apache.hadoop.hbase.util.CommonFSUtils; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +/** + * Action deletes region server data. + */ +public class DeleteDataFilesAction extends Action { + private static final Logger LOG = LoggerFactory.getLogger(DeleteDataFilesAction.class); + private float chance; + + /** + * Delets region server data file switch a certain chance + * @param chance chance to corrupt any give data file (0.5 => 50%) + */ + public DeleteDataFilesAction(float chance) { + this.chance = chance * 100; + } + + @Override + public void perform() throws Exception { + LOG.info("Start deleting data files"); + FileSystem fs = CommonFSUtils.getRootDirFileSystem(getConf()); + Path rootDir = CommonFSUtils.getRootDir(getConf()); + RemoteIterator iterator = fs.listFiles(rootDir, true); + while (iterator.hasNext()){ + LocatedFileStatus status = iterator.next(); + if(RandomUtils.nextFloat(0, 100) > chance){ + continue; + } + fs.delete(status.getPath()); + LOG.info("Deleting {}", status.getPath()); + } + LOG.info("Done deleting data files"); + } +} diff --git a/hbase-it/src/test/java/org/apache/hadoop/hbase/chaos/actions/DuplicatePackagesCommandAction.java b/hbase-it/src/test/java/org/apache/hadoop/hbase/chaos/actions/DuplicatePackagesCommandAction.java new file mode 100644 index 000000000000..d57a3d7d2928 --- /dev/null +++ b/hbase-it/src/test/java/org/apache/hadoop/hbase/chaos/actions/DuplicatePackagesCommandAction.java @@ -0,0 +1,70 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hbase.chaos.actions; + +import java.io.IOException; + +import org.apache.hadoop.hbase.ServerName; +import org.apache.hadoop.hbase.chaos.monkies.PolicyBasedChaosMonkey; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +/** + * + * Duplicate network packages on a random regionserver. + */ +public class DuplicatePackagesCommandAction extends CommandAction { + private static final Logger LOG = LoggerFactory.getLogger(DuplicatePackagesCommandAction.class); + private float ratio; + private long duration; + + /** + * Duplicate network packages on a random regionserver. + * + * @param ratio the ratio of packages duplicated + * @param duration the time this issue persists in milliseconds + * @param timeout the timeout for executing required commands on the region server in milliseconds + */ + public DuplicatePackagesCommandAction(float ratio, long duration, long timeout) { + super(timeout); + this.ratio = ratio; + this.duration = duration; + } + + protected void localPerform() throws IOException { + LOG.info("Starting to execute DuplicatePackagesCommandAction"); + ServerName server = PolicyBasedChaosMonkey.selectRandomItem(getCurrentServers()); + String hostname = server.getHostname(); + + try { + clusterManager.execSudoWithRetries(hostname, timeout, getCommand("add")); + Thread.sleep(duration); + } catch (InterruptedException e) { + LOG.debug("Failed to run the command for the full duration", e); + } finally { + clusterManager.execSudoWithRetries(hostname, timeout, getCommand("del")); + } + + LOG.info("Finished to execute DuplicatePackagesCommandAction"); + } + + private String getCommand(String operation){ + return String.format("tc qdisc %s dev eth0 root netem duplicate %s%%", operation, ratio * 100); + } +} diff --git a/hbase-it/src/test/java/org/apache/hadoop/hbase/chaos/actions/FillDiskCommandAction.java b/hbase-it/src/test/java/org/apache/hadoop/hbase/chaos/actions/FillDiskCommandAction.java new file mode 100644 index 000000000000..0de1a32ea26b --- /dev/null +++ b/hbase-it/src/test/java/org/apache/hadoop/hbase/chaos/actions/FillDiskCommandAction.java @@ -0,0 +1,83 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hbase.chaos.actions; + +import java.io.IOException; + +import org.apache.hadoop.hbase.ServerName; +import org.apache.hadoop.hbase.chaos.monkies.PolicyBasedChaosMonkey; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +/** + * + * Fill the disk on a random regionserver. + */ +public class FillDiskCommandAction extends CommandAction { + private static final Logger LOG = LoggerFactory.getLogger(FillDiskCommandAction.class); + private long size; + private long duration; + private String path; + + /** + * Fill the disk on a random regionserver. + * Please note that the file will be created regardless of the set duration or timeout. + * So please use timeout and duration big enough to avoid complication caused by retries. + * + * @param size size of the generated file in MB or fill the disk if set to 0 + * @param duration the time this issue persists in milliseconds + * @param path the path to the generated file + * @param timeout the timeout for executing required commands on the region server in milliseconds + */ + public FillDiskCommandAction(long size, long duration, String path, long timeout) { + super(timeout); + this.size = size; + this.duration = duration; + this.path = path; + } + + protected void localPerform() throws IOException { + LOG.info("Starting to execute FillDiskCommandAction"); + ServerName server = PolicyBasedChaosMonkey.selectRandomItem(getCurrentServers()); + String hostname = server.getHostname(); + + try { + clusterManager.execSudoWithRetries(hostname, timeout, getFillCommand()); + Thread.sleep(duration); + } catch (InterruptedException e) { + LOG.debug("Failed to run the command for the full duration", e); + } finally { + clusterManager.execSudoWithRetries(hostname, timeout, getClearCommand()); + } + + LOG.info("Finished to execute FillDiskCommandAction"); + } + + private String getFillCommand(){ + if (size == 0){ + return String.format("dd if=/dev/urandom of=%s/garbage bs=1M iflag=fullblock", path); + } + return String.format("dd if=/dev/urandom of=%s/garbage bs=1M count=%s iflag=fullblock", + path, size); + } + + private String getClearCommand(){ + return String.format("rm -f %s/garbage", path); + } +} diff --git a/hbase-it/src/test/java/org/apache/hadoop/hbase/chaos/actions/LosePackagesCommandAction.java b/hbase-it/src/test/java/org/apache/hadoop/hbase/chaos/actions/LosePackagesCommandAction.java new file mode 100644 index 000000000000..4278999442c1 --- /dev/null +++ b/hbase-it/src/test/java/org/apache/hadoop/hbase/chaos/actions/LosePackagesCommandAction.java @@ -0,0 +1,70 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hbase.chaos.actions; + +import java.io.IOException; + +import org.apache.hadoop.hbase.ServerName; +import org.apache.hadoop.hbase.chaos.monkies.PolicyBasedChaosMonkey; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +/** + * + * Lose network packages on a random regionserver. + */ +public class LosePackagesCommandAction extends CommandAction { + private static final Logger LOG = LoggerFactory.getLogger(LosePackagesCommandAction.class); + private float ratio; + private long duration; + + /** + * Lose network packages on a random regionserver. + * + * @param ratio the ratio of packages lost + * @param duration the time this issue persists in milliseconds + * @param timeout the timeout for executing required commands on the region server in milliseconds + */ + public LosePackagesCommandAction(float ratio, long duration, long timeout) { + super(timeout); + this.ratio = ratio; + this.duration = duration; + } + + protected void localPerform() throws IOException { + LOG.info("Starting to execute LosePackagesCommandAction"); + ServerName server = PolicyBasedChaosMonkey.selectRandomItem(getCurrentServers()); + String hostname = server.getHostname(); + + try { + clusterManager.execSudoWithRetries(hostname, timeout, getCommand("add")); + Thread.sleep(duration); + } catch (InterruptedException e) { + LOG.debug("Failed to run the command for the full duration", e); + } finally { + clusterManager.execSudoWithRetries(hostname, timeout, getCommand("del")); + } + + LOG.info("Finished to execute LosePackagesCommandAction"); + } + + private String getCommand(String operation){ + return String.format("tc qdisc %s dev eth0 root netem loss %s%%", operation, ratio * 100); + } +} diff --git a/hbase-it/src/test/java/org/apache/hadoop/hbase/chaos/actions/ReorderPackagesCommandAction.java b/hbase-it/src/test/java/org/apache/hadoop/hbase/chaos/actions/ReorderPackagesCommandAction.java new file mode 100644 index 000000000000..4939b623f5b2 --- /dev/null +++ b/hbase-it/src/test/java/org/apache/hadoop/hbase/chaos/actions/ReorderPackagesCommandAction.java @@ -0,0 +1,74 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hbase.chaos.actions; + +import java.io.IOException; + +import org.apache.hadoop.hbase.ServerName; +import org.apache.hadoop.hbase.chaos.monkies.PolicyBasedChaosMonkey; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +/** + * + * Reorder network packages on a random regionserver. + */ +public class ReorderPackagesCommandAction extends CommandAction { + private static final Logger LOG = LoggerFactory.getLogger(ReorderPackagesCommandAction.class); + private float ratio; + private long duration; + private long delay; + + /** + * Reorder network packages on a random regionserver. + * + * @param ratio the ratio of packages reordered + * @param duration the time this issue persists in milliseconds + * @param delay the delay between reordered and non-reordered packages in milliseconds + * @param timeout the timeout for executing required commands on the region server in milliseconds + */ + public ReorderPackagesCommandAction(float ratio, long duration, long delay, long timeout) { + super(timeout); + this.ratio = ratio; + this.duration = duration; + this.delay = delay; + } + + protected void localPerform() throws IOException { + LOG.info("Starting to execute ReorderPackagesCommandAction"); + ServerName server = PolicyBasedChaosMonkey.selectRandomItem(getCurrentServers()); + String hostname = server.getHostname(); + + try { + clusterManager.execSudoWithRetries(hostname, timeout, getCommand("add")); + Thread.sleep(duration); + } catch (InterruptedException e) { + LOG.debug("Failed to run the command for the full duration", e); + } finally { + clusterManager.execSudoWithRetries(hostname, timeout, getCommand("del")); + } + + LOG.info("Finished to execute ReorderPackagesCommandAction"); + } + + private String getCommand(String operation){ + return String.format("tc qdisc %s dev eth0 root netem delay %sms reorder %s%% 50%", + operation, delay, ratio * 100); + } +} diff --git a/hbase-it/src/test/java/org/apache/hadoop/hbase/chaos/factories/DataIssuesMonkeyFactory.java b/hbase-it/src/test/java/org/apache/hadoop/hbase/chaos/factories/DataIssuesMonkeyFactory.java new file mode 100644 index 000000000000..a06a9779e414 --- /dev/null +++ b/hbase-it/src/test/java/org/apache/hadoop/hbase/chaos/factories/DataIssuesMonkeyFactory.java @@ -0,0 +1,72 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hbase.chaos.factories; + +import org.apache.hadoop.hbase.chaos.actions.Action; +import org.apache.hadoop.hbase.chaos.actions.CorruptDataFilesAction; +import org.apache.hadoop.hbase.chaos.actions.DeleteDataFilesAction; +import org.apache.hadoop.hbase.chaos.actions.DumpClusterStatusAction; +import org.apache.hadoop.hbase.chaos.monkies.ChaosMonkey; +import org.apache.hadoop.hbase.chaos.monkies.PolicyBasedChaosMonkey; +import org.apache.hadoop.hbase.chaos.policies.PeriodicRandomActionPolicy; + +/** + * A chaos monkey to delete and corrupt regionserver data, requires a user with + * passwordless ssh access to the cluster and sudo privileges. + * Highly destructive + */ +public class DataIssuesMonkeyFactory extends MonkeyFactory { + + private long action1Period; + private long action2Period; + + private float chanceToAct; + + @Override + public ChaosMonkey build() { + loadProperties(); + + // Highly destructive actions to mess things around. + Action[] actions1 = new Action[] { + new DeleteDataFilesAction(chanceToAct), + new CorruptDataFilesAction(chanceToAct) + }; + + // Action to log more info for debugging + Action[] actions2 = new Action[] { + new DumpClusterStatusAction() + }; + + return new PolicyBasedChaosMonkey(util, + new PeriodicRandomActionPolicy(action1Period, actions1), + new PeriodicRandomActionPolicy(action2Period, actions2)); + } + + private void loadProperties() { + action1Period = Long.parseLong(this.properties.getProperty( + MonkeyConstants.PERIODIC_ACTION1_PERIOD, + MonkeyConstants.DEFAULT_PERIODIC_ACTION1_PERIOD + "")); + action2Period = Long.parseLong(this.properties.getProperty( + MonkeyConstants.PERIODIC_ACTION2_PERIOD, + MonkeyConstants.DEFAULT_PERIODIC_ACTION2_PERIOD + "")); + chanceToAct = Float.parseFloat(this.properties.getProperty( + MonkeyConstants.DATA_ISSUE_CHANCE, + MonkeyConstants.DEFAULT_DATA_ISSUE_CHANCE+ "")); + } +} diff --git a/hbase-it/src/test/java/org/apache/hadoop/hbase/chaos/factories/DistributedIssuesMonkeyFactory.java b/hbase-it/src/test/java/org/apache/hadoop/hbase/chaos/factories/DistributedIssuesMonkeyFactory.java new file mode 100644 index 000000000000..7ff3376555a7 --- /dev/null +++ b/hbase-it/src/test/java/org/apache/hadoop/hbase/chaos/factories/DistributedIssuesMonkeyFactory.java @@ -0,0 +1,117 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + *

+ * http://www.apache.org/licenses/LICENSE-2.0 + *

+ * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hbase.chaos.factories; + +import org.apache.hadoop.hbase.chaos.actions.Action; +import org.apache.hadoop.hbase.chaos.actions.AddCPULoadAction; +import org.apache.hadoop.hbase.chaos.actions.CorruptPackagesCommandAction; +import org.apache.hadoop.hbase.chaos.actions.DelayPackagesCommandAction; +import org.apache.hadoop.hbase.chaos.actions.DumpClusterStatusAction; +import org.apache.hadoop.hbase.chaos.actions.DuplicatePackagesCommandAction; +import org.apache.hadoop.hbase.chaos.actions.FillDiskCommandAction; +import org.apache.hadoop.hbase.chaos.actions.LosePackagesCommandAction; +import org.apache.hadoop.hbase.chaos.actions.ReorderPackagesCommandAction; +import org.apache.hadoop.hbase.chaos.monkies.ChaosMonkey; +import org.apache.hadoop.hbase.chaos.monkies.PolicyBasedChaosMonkey; +import org.apache.hadoop.hbase.chaos.policies.PeriodicRandomActionPolicy; + +/** + * A chaos monkey to create distributed cluster related issues, requires a user with + * passwordless ssh access to the cluster and sudo privileges. + */ +public class DistributedIssuesMonkeyFactory extends MonkeyFactory { + + private long action1Period; + private long action2Period; + + private long cpuLoadDuration; + private long cpuLoadProcesses; + private long networkIssueTimeout; + private long networkIssueDuration; + private float networkIssueRation; + private long networkIssueDelay; + private long fillDiskTimeout; + private String fillDiskPath; + private long fillDiskFileSize; + private long fillDiskIssueduration; + + @Override public ChaosMonkey build() { + loadProperties(); + + Action[] actions1 = new Action[] { + new AddCPULoadAction(cpuLoadDuration, cpuLoadProcesses, networkIssueTimeout), + new CorruptPackagesCommandAction(networkIssueRation, networkIssueDuration, + networkIssueTimeout), + new DuplicatePackagesCommandAction(networkIssueRation, networkIssueDuration, + networkIssueTimeout), + new LosePackagesCommandAction(networkIssueRation, networkIssueDuration, + networkIssueTimeout), + new DelayPackagesCommandAction(networkIssueDelay, networkIssueDuration, + networkIssueTimeout), + new ReorderPackagesCommandAction(networkIssueRation, networkIssueDuration, + networkIssueDelay, networkIssueTimeout), + new FillDiskCommandAction(fillDiskFileSize, fillDiskIssueduration, fillDiskPath, + fillDiskTimeout)}; + + // Action to log more info for debugging + Action[] actions2 = new Action[] {new DumpClusterStatusAction()}; + + return new PolicyBasedChaosMonkey(util, new PeriodicRandomActionPolicy(action1Period, actions1), + new PeriodicRandomActionPolicy(action2Period, actions2)); + } + + private void loadProperties() { + action1Period = Long.parseLong(this.properties + .getProperty(MonkeyConstants.PERIODIC_ACTION1_PERIOD, + MonkeyConstants.DEFAULT_PERIODIC_ACTION1_PERIOD + "")); + action2Period = Long.parseLong(this.properties + .getProperty(MonkeyConstants.PERIODIC_ACTION2_PERIOD, + MonkeyConstants.DEFAULT_PERIODIC_ACTION2_PERIOD + "")); + cpuLoadDuration = Long.parseLong(this.properties.getProperty( + MonkeyConstants.CPU_LOAD_DURATION, + MonkeyConstants.DEFAULT_CPU_LOAD_DURATION + "")); + cpuLoadProcesses = Long.parseLong(this.properties.getProperty( + MonkeyConstants.CPU_LOAD_PROCESSES, + MonkeyConstants.DEFAULT_CPU_LOAD_PROCESSES + "")); + networkIssueTimeout = Long.parseLong(this.properties + .getProperty(MonkeyConstants.NETWORK_ISSUE_COMMAND_TIMEOUT, + MonkeyConstants.DEFAULT_NETWORK_ISSUE_COMMAND_TIMEOUT + "")); + networkIssueDuration = Long.parseLong(this.properties + .getProperty(MonkeyConstants.NETWORK_ISSUE_DURATION, + MonkeyConstants.DEFAULT_NETWORK_ISSUE_DURATION + "")); + networkIssueRation = Float.parseFloat(this.properties + .getProperty(MonkeyConstants.NETWORK_ISSUE_RATIO, + MonkeyConstants.DEFAULT_NETWORK_ISSUE_RATIO + "")); + networkIssueDelay = Long.parseLong(this.properties + .getProperty(MonkeyConstants.NETWORK_ISSUE_DELAY, + MonkeyConstants.DEFAULT_NETWORK_ISSUE_DELAY + "")); + fillDiskTimeout = Long.parseLong(this.properties + .getProperty(MonkeyConstants.FILL_DISK_COMMAND_TIMEOUT, + MonkeyConstants.DEFAULT_FILL_DISK_COMMAND_TIMEOUT + "")); + fillDiskPath = this.properties + .getProperty(MonkeyConstants.FILL_DISK_PATH, + MonkeyConstants.DEFAULT_FILL_DISK_PATH + ""); + fillDiskFileSize = Long.parseLong(this.properties + .getProperty(MonkeyConstants.FILL_DISK_FILE_SIZE, + MonkeyConstants.DEFAULT_FILL_DISK_FILE_SIZE + "")); + fillDiskIssueduration = Long.parseLong(this.properties + .getProperty(MonkeyConstants.FILL_DISK_ISSUE_DURATION, + MonkeyConstants.DEFAULT_FILL_DISK_ISSUE_DURATION + "")); + } +} diff --git a/hbase-it/src/test/java/org/apache/hadoop/hbase/chaos/factories/MonkeyConstants.java b/hbase-it/src/test/java/org/apache/hadoop/hbase/chaos/factories/MonkeyConstants.java index 9051e98ff2c2..f9702c292bd8 100644 --- a/hbase-it/src/test/java/org/apache/hadoop/hbase/chaos/factories/MonkeyConstants.java +++ b/hbase-it/src/test/java/org/apache/hadoop/hbase/chaos/factories/MonkeyConstants.java @@ -48,6 +48,18 @@ public interface MonkeyConstants { String GRACEFUL_RESTART_RS_SLEEP_TIME = "graceful.restart.rs.sleep.time"; String ROLLING_BATCH_SUSPEND_RS_SLEEP_TIME = "rolling.batch.suspend.rs.sleep.time"; String ROLLING_BATCH_SUSPEND_RS_RATIO = "rolling.batch.suspend.rs.ratio"; + String CPU_LOAD_DURATION = "cpu.load.duration"; + String CPU_LOAD_PROCESSES = "cpu.load.processes"; + String NETWORK_ISSUE_COMMAND_TIMEOUT = "network.issue.command.timeout"; + String NETWORK_ISSUE_DURATION = "network.issueduration"; + String NETWORK_ISSUE_RATIO = "network.issue.ratio"; + String NETWORK_ISSUE_DELAY = "networkissue.delay"; + //should be big enough to create the file + String FILL_DISK_COMMAND_TIMEOUT = "fill.disk.command.timeout"; + String FILL_DISK_PATH = "fill.disk.path"; + String FILL_DISK_FILE_SIZE = "fill.disk.file.size"; + String FILL_DISK_ISSUE_DURATION = "fill.disk.issue.duration"; + String DATA_ISSUE_CHANCE = "data.issue.chance"; /** * A Set of prefixes which encompasses all of the configuration properties for the ChaosMonky. @@ -81,4 +93,15 @@ public interface MonkeyConstants { long DEFAULT_GRACEFUL_RESTART_RS_SLEEP_TIME = 5000; long DEFAULT_ROLLING_BATCH_SUSPEND_RS_SLEEP_TIME = 30 * 1000; float DEFAULT_ROLLING_BATCH_SUSPEND_RS_RATIO = 1.0f; + long DEFAULT_CPU_LOAD_DURATION = 5 * 60 * 1000; + long DEFAULT_CPU_LOAD_PROCESSES = 2; + long DEFAULT_NETWORK_ISSUE_COMMAND_TIMEOUT = 30 * 1000; + long DEFAULT_NETWORK_ISSUE_DURATION = 60 * 1000; + float DEFAULT_NETWORK_ISSUE_RATIO = 0.1f; + long DEFAULT_NETWORK_ISSUE_DELAY = 100; + long DEFAULT_FILL_DISK_COMMAND_TIMEOUT = 2 * 60 * 1000; + String DEFAULT_FILL_DISK_PATH = "/tmp"; + long DEFAULT_FILL_DISK_FILE_SIZE = 0; + long DEFAULT_FILL_DISK_ISSUE_DURATION = 5 * 60 * 1000; + float DEFAULT_DATA_ISSUE_CHANCE = 0.01f; } diff --git a/hbase-it/src/test/java/org/apache/hadoop/hbase/chaos/factories/MonkeyFactory.java b/hbase-it/src/test/java/org/apache/hadoop/hbase/chaos/factories/MonkeyFactory.java index f4492b3b0240..73f696824403 100644 --- a/hbase-it/src/test/java/org/apache/hadoop/hbase/chaos/factories/MonkeyFactory.java +++ b/hbase-it/src/test/java/org/apache/hadoop/hbase/chaos/factories/MonkeyFactory.java @@ -77,6 +77,8 @@ public MonkeyFactory setProperties(Properties props) { public static final String MOB_NO_KILL = "mobNoKill"; public static final String MOB_SLOW_DETERMINISTIC = "mobSlowDeterministic"; public static final String SERVER_AND_DEPENDENCIES_KILLING = "serverAndDependenciesKilling"; + public static final String DISTRIBUTED_ISSUES = "distributedIssues"; + public static final String DATA_ISSUES = "dataIssues"; public static Map FACTORIES = ImmutableMap.builder() .put(CALM, new CalmMonkeyFactory()) @@ -89,6 +91,8 @@ public MonkeyFactory setProperties(Properties props) { .put(MOB_NO_KILL, new MobNoKillMonkeyFactory()) .put(MOB_SLOW_DETERMINISTIC, new MobNoKillMonkeyFactory()) .put(SERVER_AND_DEPENDENCIES_KILLING, new ServerAndDependenciesKillingMonkeyFactory()) + .put(DISTRIBUTED_ISSUES, new DistributedIssuesMonkeyFactory()) + .put(DATA_ISSUES, new DataIssuesMonkeyFactory()) .build(); public static MonkeyFactory getFactory(String factoryName) { diff --git a/hbase-it/src/test/java/org/apache/hadoop/hbase/chaos/factories/ServerAndDependenciesKillingMonkeyFactory.java b/hbase-it/src/test/java/org/apache/hadoop/hbase/chaos/factories/ServerAndDependenciesKillingMonkeyFactory.java index 2e763adbfd7b..5cb2d7f7b8a8 100644 --- a/hbase-it/src/test/java/org/apache/hadoop/hbase/chaos/factories/ServerAndDependenciesKillingMonkeyFactory.java +++ b/hbase-it/src/test/java/org/apache/hadoop/hbase/chaos/factories/ServerAndDependenciesKillingMonkeyFactory.java @@ -78,10 +78,10 @@ private void loadProperties() { MonkeyConstants.GRACEFUL_RESTART_RS_SLEEP_TIME, MonkeyConstants.DEFAULT_GRACEFUL_RESTART_RS_SLEEP_TIME + "")); rollingBatchSuspendRSSleepTime = Long.parseLong(this.properties.getProperty( - MonkeyConstants.ROLLING_BATCH_RESTART_RS_SLEEP_TIME, - MonkeyConstants.DEFAULT_ROLLING_BATCH_RESTART_RS_SLEEP_TIME + "")); + MonkeyConstants.ROLLING_BATCH_SUSPEND_RS_SLEEP_TIME, + MonkeyConstants.DEFAULT_ROLLING_BATCH_SUSPEND_RS_SLEEP_TIME+ "")); rollingBatchSuspendtRSRatio = Float.parseFloat(this.properties.getProperty( - MonkeyConstants.ROLLING_BATCH_RESTART_RS_RATIO, - MonkeyConstants.DEFAULT_ROLLING_BATCH_RESTART_RS_RATIO + "")); + MonkeyConstants.ROLLING_BATCH_SUSPEND_RS_RATIO, + MonkeyConstants.DEFAULT_ROLLING_BATCH_SUSPEND_RS_RATIO + "")); } } diff --git a/hbase-it/src/test/java/org/apache/hadoop/hbase/chaos/factories/ServerKillingMonkeyFactory.java b/hbase-it/src/test/java/org/apache/hadoop/hbase/chaos/factories/ServerKillingMonkeyFactory.java index 68d11f9a6405..3f2edcc9f8fd 100644 --- a/hbase-it/src/test/java/org/apache/hadoop/hbase/chaos/factories/ServerKillingMonkeyFactory.java +++ b/hbase-it/src/test/java/org/apache/hadoop/hbase/chaos/factories/ServerKillingMonkeyFactory.java @@ -74,10 +74,10 @@ private void loadProperties() { MonkeyConstants.GRACEFUL_RESTART_RS_SLEEP_TIME, MonkeyConstants.DEFAULT_GRACEFUL_RESTART_RS_SLEEP_TIME + "")); rollingBatchSuspendRSSleepTime = Long.parseLong(this.properties.getProperty( - MonkeyConstants.ROLLING_BATCH_RESTART_RS_SLEEP_TIME, - MonkeyConstants.DEFAULT_ROLLING_BATCH_RESTART_RS_SLEEP_TIME + "")); + MonkeyConstants.ROLLING_BATCH_SUSPEND_RS_SLEEP_TIME, + MonkeyConstants.DEFAULT_ROLLING_BATCH_SUSPEND_RS_SLEEP_TIME+ "")); rollingBatchSuspendtRSRatio = Float.parseFloat(this.properties.getProperty( - MonkeyConstants.ROLLING_BATCH_RESTART_RS_RATIO, - MonkeyConstants.DEFAULT_ROLLING_BATCH_RESTART_RS_RATIO + "")); + MonkeyConstants.ROLLING_BATCH_SUSPEND_RS_RATIO, + MonkeyConstants.DEFAULT_ROLLING_BATCH_SUSPEND_RS_RATIO + "")); } } diff --git a/hbase-it/src/test/java/org/apache/hadoop/hbase/chaos/factories/SlowDeterministicMonkeyFactory.java b/hbase-it/src/test/java/org/apache/hadoop/hbase/chaos/factories/SlowDeterministicMonkeyFactory.java index 22c35b96b957..deaf25640c8d 100644 --- a/hbase-it/src/test/java/org/apache/hadoop/hbase/chaos/factories/SlowDeterministicMonkeyFactory.java +++ b/hbase-it/src/test/java/org/apache/hadoop/hbase/chaos/factories/SlowDeterministicMonkeyFactory.java @@ -191,10 +191,10 @@ private void loadProperties() { MonkeyConstants.GRACEFUL_RESTART_RS_SLEEP_TIME, MonkeyConstants.DEFAULT_GRACEFUL_RESTART_RS_SLEEP_TIME + "")); rollingBatchSuspendRSSleepTime = Long.parseLong(this.properties.getProperty( - MonkeyConstants.ROLLING_BATCH_RESTART_RS_SLEEP_TIME, - MonkeyConstants.DEFAULT_ROLLING_BATCH_RESTART_RS_SLEEP_TIME + "")); + MonkeyConstants.ROLLING_BATCH_SUSPEND_RS_SLEEP_TIME, + MonkeyConstants.DEFAULT_ROLLING_BATCH_SUSPEND_RS_SLEEP_TIME+ "")); rollingBatchSuspendtRSRatio = Float.parseFloat(this.properties.getProperty( - MonkeyConstants.ROLLING_BATCH_RESTART_RS_RATIO, - MonkeyConstants.DEFAULT_ROLLING_BATCH_RESTART_RS_RATIO + "")); + MonkeyConstants.ROLLING_BATCH_SUSPEND_RS_RATIO, + MonkeyConstants.DEFAULT_ROLLING_BATCH_SUSPEND_RS_RATIO + "")); } } diff --git a/hbase-it/src/test/java/org/apache/hadoop/hbase/chaos/factories/StressAssignmentManagerMonkeyFactory.java b/hbase-it/src/test/java/org/apache/hadoop/hbase/chaos/factories/StressAssignmentManagerMonkeyFactory.java index 4e304fbd2a6c..7f3aed4bb679 100644 --- a/hbase-it/src/test/java/org/apache/hadoop/hbase/chaos/factories/StressAssignmentManagerMonkeyFactory.java +++ b/hbase-it/src/test/java/org/apache/hadoop/hbase/chaos/factories/StressAssignmentManagerMonkeyFactory.java @@ -105,10 +105,10 @@ private void loadProperties() { MonkeyConstants.GRACEFUL_RESTART_RS_SLEEP_TIME, MonkeyConstants.DEFAULT_GRACEFUL_RESTART_RS_SLEEP_TIME + "")); rollingBatchSuspendRSSleepTime = Long.parseLong(this.properties.getProperty( - MonkeyConstants.ROLLING_BATCH_RESTART_RS_SLEEP_TIME, - MonkeyConstants.DEFAULT_ROLLING_BATCH_RESTART_RS_SLEEP_TIME + "")); + MonkeyConstants.ROLLING_BATCH_SUSPEND_RS_SLEEP_TIME, + MonkeyConstants.DEFAULT_ROLLING_BATCH_SUSPEND_RS_SLEEP_TIME+ "")); rollingBatchSuspendtRSRatio = Float.parseFloat(this.properties.getProperty( - MonkeyConstants.ROLLING_BATCH_RESTART_RS_RATIO, - MonkeyConstants.DEFAULT_ROLLING_BATCH_RESTART_RS_RATIO + "")); + MonkeyConstants.ROLLING_BATCH_SUSPEND_RS_RATIO, + MonkeyConstants.DEFAULT_ROLLING_BATCH_SUSPEND_RS_RATIO + "")); } } From d6bcf6be5f05e2eb4df0331d7a6379c164b6c13e Mon Sep 17 00:00:00 2001 From: Bukros Szabolcs Date: Wed, 9 Oct 2019 17:53:53 +0200 Subject: [PATCH 2/4] HBASE-23085: Network and Data related Actions rename base class for new actions to better reflect it's role make network iterface configurable for tc commands fix typos and logging --- .../hadoop/hbase/HBaseClusterManager.java | 28 ++++++---------- .../hbase/chaos/actions/AddCPULoadAction.java | 4 +-- .../actions/CorruptPackagesCommandAction.java | 12 +++---- .../actions/DelayPackagesCommandAction.java | 15 ++++----- .../DuplicatePackagesCommandAction.java | 12 +++---- .../chaos/actions/FillDiskCommandAction.java | 2 +- .../actions/LosePackagesCommandAction.java | 12 +++---- .../actions/ReorderPackagesCommandAction.java | 14 ++++---- ...mandAction.java => SudoCommandAction.java} | 6 ++-- .../hbase/chaos/actions/TCCommandAction.java | 33 +++++++++++++++++++ .../DistributedIssuesMonkeyFactory.java | 14 +++++--- .../chaos/factories/MonkeyConstants.java | 6 ++-- 12 files changed, 94 insertions(+), 64 deletions(-) rename hbase-it/src/test/java/org/apache/hadoop/hbase/chaos/actions/{CommandAction.java => SudoCommandAction.java} (91%) create mode 100644 hbase-it/src/test/java/org/apache/hadoop/hbase/chaos/actions/TCCommandAction.java diff --git a/hbase-it/src/test/java/org/apache/hadoop/hbase/HBaseClusterManager.java b/hbase-it/src/test/java/org/apache/hadoop/hbase/HBaseClusterManager.java index d44630ca405f..2f75c731bffa 100644 --- a/hbase-it/src/test/java/org/apache/hadoop/hbase/HBaseClusterManager.java +++ b/hbase-it/src/test/java/org/apache/hadoop/hbase/HBaseClusterManager.java @@ -162,11 +162,6 @@ public String[] getExecString() { LOG.info("Executing full command [" + cmd + "]"); return new String[] { "/usr/bin/env", "bash", "-c", cmd }; } - - @Override - public void execute() throws IOException { - super.execute(); - } } /** @@ -194,11 +189,6 @@ public String[] getExecString() { LOG.info("Executing full command [" + cmd + "]"); return new String[] { "/usr/bin/env", "bash", "-c", cmd }; } - - @Override - public void execute() throws IOException { - super.execute(); - } } /** @@ -341,7 +331,8 @@ protected CommandProvider getCommandProvider(ServiceType service) throws IOExcep */ private Pair exec(String hostname, ServiceType service, String... cmd) throws IOException { - LOG.info("Executing remote command: " + StringUtils.join(cmd, " ") + " , hostname:" + hostname); + LOG.info("Executing remote command: {} , hostname:{}", StringUtils.join(cmd, " "), + hostname); RemoteShell shell = new RemoteShell(hostname, getServiceUser(service), cmd); try { @@ -354,8 +345,8 @@ private Pair exec(String hostname, ServiceType service, String. + ", stdout: " + output); } - LOG.info("Executed remote command, exit code:" + shell.getExitCode() - + " , output:" + shell.getOutput()); + LOG.info("Executed remote command, exit code:{} , output:{}", shell.getExitCode(), + shell.getOutput()); return new Pair<>(shell.getExitCode(), shell.getOutput()); } @@ -373,7 +364,7 @@ private Pair execWithRetries(String hostname, ServiceType servi retryCounter.sleepUntilNextRetry(); } catch (InterruptedException ex) { // ignore - LOG.warn("Sleep Interrupted:" + ex); + LOG.warn("Sleep Interrupted:", ex); } } } @@ -385,7 +376,8 @@ private Pair execWithRetries(String hostname, ServiceType servi */ public Pair execSudo(String hostname, long timeout, String... cmd) throws IOException { - LOG.info("Executing remote command: " + StringUtils.join(cmd, " ") + " , hostname:" + hostname); + LOG.info("Executing remote command: {} , hostname:{}", StringUtils.join(cmd, " "), + hostname); RemoteSudoShell shell = new RemoteSudoShell(hostname, cmd, timeout); try { @@ -398,8 +390,8 @@ public Pair execSudo(String hostname, long timeout, String... c + ", stdout: " + output); } - LOG.info("Executed remote command, exit code:" + shell.getExitCode() - + " , output:" + shell.getOutput()); + LOG.info("Executed remote command, exit code:{} , output:{}", shell.getExitCode(), + shell.getOutput()); return new Pair<>(shell.getExitCode(), shell.getOutput()); } @@ -417,7 +409,7 @@ public Pair execSudoWithRetries(String hostname, long timeout, retryCounter.sleepUntilNextRetry(); } catch (InterruptedException ex) { // ignore - LOG.warn("Sleep Interrupted:" + ex); + LOG.warn("Sleep Interrupted:", ex); } } } diff --git a/hbase-it/src/test/java/org/apache/hadoop/hbase/chaos/actions/AddCPULoadAction.java b/hbase-it/src/test/java/org/apache/hadoop/hbase/chaos/actions/AddCPULoadAction.java index 6e4003d56205..9d6437e431ba 100644 --- a/hbase-it/src/test/java/org/apache/hadoop/hbase/chaos/actions/AddCPULoadAction.java +++ b/hbase-it/src/test/java/org/apache/hadoop/hbase/chaos/actions/AddCPULoadAction.java @@ -28,7 +28,7 @@ /** * Action that adds high cpu load to a random regionserver for a given duration */ -public class AddCPULoadAction extends CommandAction { +public class AddCPULoadAction extends SudoCommandAction { protected static final Logger LOG = LoggerFactory.getLogger(AddCPULoadAction.class); private static final String CPU_LOAD_COMMAND = "seq 1 %s | xargs -I{} -n 1 -P %s timeout %s dd if=/dev/urandom of=/dev/null bs=1M " + @@ -40,7 +40,7 @@ public class AddCPULoadAction extends CommandAction { /** * Add high load to cpu * - * @param duration Duration that this thread should generate the load for in miliseconds + * @param duration Duration that this thread should generate the load for in milliseconds * @param processes The number of parallel processes, should be equal to cpu threads for max load */ public AddCPULoadAction(long duration, long processes, long timeout) { diff --git a/hbase-it/src/test/java/org/apache/hadoop/hbase/chaos/actions/CorruptPackagesCommandAction.java b/hbase-it/src/test/java/org/apache/hadoop/hbase/chaos/actions/CorruptPackagesCommandAction.java index 4fc6bb5e26b4..643154707fc9 100644 --- a/hbase-it/src/test/java/org/apache/hadoop/hbase/chaos/actions/CorruptPackagesCommandAction.java +++ b/hbase-it/src/test/java/org/apache/hadoop/hbase/chaos/actions/CorruptPackagesCommandAction.java @@ -29,7 +29,7 @@ * * Corrupt network packages on a random regionserver. */ -public class CorruptPackagesCommandAction extends CommandAction { +public class CorruptPackagesCommandAction extends TCCommandAction { private static final Logger LOG = LoggerFactory.getLogger(CorruptPackagesCommandAction.class); private float ratio; private long duration; @@ -41,8 +41,8 @@ public class CorruptPackagesCommandAction extends CommandAction { * @param duration the time this issue persists in milliseconds * @param timeout the timeout for executing required commands on the region server in milliseconds */ - public CorruptPackagesCommandAction(float ratio, long duration, long timeout) { - super(timeout); + public CorruptPackagesCommandAction(float ratio, long duration, long timeout, String network) { + super(timeout, network); this.ratio = ratio; this.duration = duration; } @@ -53,18 +53,18 @@ protected void localPerform() throws IOException { String hostname = server.getHostname(); try { - clusterManager.execSudoWithRetries(hostname, timeout, getCommand("add")); + clusterManager.execSudoWithRetries(hostname, timeout, getCommand(ADD)); Thread.sleep(duration); } catch (InterruptedException e) { LOG.debug("Failed to run the command for the full duration", e); } finally { - clusterManager.execSudoWithRetries(hostname, timeout, getCommand("del")); + clusterManager.execSudoWithRetries(hostname, timeout, getCommand(DELETE)); } LOG.info("Finished to execute CorruptPackagesCommandAction"); } private String getCommand(String operation){ - return String.format("tc qdisc %s dev eth0 root netem corrupt %s%%", operation, ratio * 100); + return String.format("tc qdisc %s dev %s root netem corrupt %s%%", operation, network, ratio * 100); } } diff --git a/hbase-it/src/test/java/org/apache/hadoop/hbase/chaos/actions/DelayPackagesCommandAction.java b/hbase-it/src/test/java/org/apache/hadoop/hbase/chaos/actions/DelayPackagesCommandAction.java index c3b982f13eba..3c9e8b5eba0b 100644 --- a/hbase-it/src/test/java/org/apache/hadoop/hbase/chaos/actions/DelayPackagesCommandAction.java +++ b/hbase-it/src/test/java/org/apache/hadoop/hbase/chaos/actions/DelayPackagesCommandAction.java @@ -28,7 +28,7 @@ /** * Action adds latency to communication on a random regionserver. */ -public class DelayPackagesCommandAction extends CommandAction { +public class DelayPackagesCommandAction extends TCCommandAction { private static final Logger LOG = LoggerFactory.getLogger(DelayPackagesCommandAction.class); private long delay; private long duration; @@ -40,8 +40,8 @@ public class DelayPackagesCommandAction extends CommandAction { * @param duration the time this issue persists in milliseconds * @param timeout the timeout for executing required commands on the region server in milliseconds */ - public DelayPackagesCommandAction(long delay, long duration, long timeout) { - super(timeout); + public DelayPackagesCommandAction(long delay, long duration, long timeout, String network) { + super(timeout, network); this.delay = delay; this.duration = duration; } @@ -51,21 +51,20 @@ protected void localPerform() throws IOException { ServerName server = PolicyBasedChaosMonkey.selectRandomItem(getCurrentServers()); String hostname = server.getHostname(); - String base = getConf().get("hbase.home.dir"); try { - clusterManager.execSudoWithRetries(hostname, timeout, getCommand("add")); + clusterManager.execSudoWithRetries(hostname, timeout, getCommand(ADD)); Thread.sleep(duration); } catch (InterruptedException e) { LOG.debug("Failed to run the command for the full duration", e); } finally { - clusterManager.execSudoWithRetries(hostname, timeout, getCommand("del")); + clusterManager.execSudoWithRetries(hostname, timeout, getCommand(DELETE)); } LOG.info("Finished to execute DelayPackagesCommandAction"); } private String getCommand(String operation){ - return String.format("tc qdisc %s dev eth0 root netem delay %sms %sms", - operation, delay, delay/2); + return String.format("tc qdisc %s dev %s root netem delay %sms %sms", + operation, network, delay, delay/2); } } diff --git a/hbase-it/src/test/java/org/apache/hadoop/hbase/chaos/actions/DuplicatePackagesCommandAction.java b/hbase-it/src/test/java/org/apache/hadoop/hbase/chaos/actions/DuplicatePackagesCommandAction.java index d57a3d7d2928..2bcaa73289f6 100644 --- a/hbase-it/src/test/java/org/apache/hadoop/hbase/chaos/actions/DuplicatePackagesCommandAction.java +++ b/hbase-it/src/test/java/org/apache/hadoop/hbase/chaos/actions/DuplicatePackagesCommandAction.java @@ -29,7 +29,7 @@ * * Duplicate network packages on a random regionserver. */ -public class DuplicatePackagesCommandAction extends CommandAction { +public class DuplicatePackagesCommandAction extends TCCommandAction { private static final Logger LOG = LoggerFactory.getLogger(DuplicatePackagesCommandAction.class); private float ratio; private long duration; @@ -41,8 +41,8 @@ public class DuplicatePackagesCommandAction extends CommandAction { * @param duration the time this issue persists in milliseconds * @param timeout the timeout for executing required commands on the region server in milliseconds */ - public DuplicatePackagesCommandAction(float ratio, long duration, long timeout) { - super(timeout); + public DuplicatePackagesCommandAction(float ratio, long duration, long timeout, String network) { + super(timeout, network); this.ratio = ratio; this.duration = duration; } @@ -53,18 +53,18 @@ protected void localPerform() throws IOException { String hostname = server.getHostname(); try { - clusterManager.execSudoWithRetries(hostname, timeout, getCommand("add")); + clusterManager.execSudoWithRetries(hostname, timeout, getCommand(ADD)); Thread.sleep(duration); } catch (InterruptedException e) { LOG.debug("Failed to run the command for the full duration", e); } finally { - clusterManager.execSudoWithRetries(hostname, timeout, getCommand("del")); + clusterManager.execSudoWithRetries(hostname, timeout, getCommand(DELETE)); } LOG.info("Finished to execute DuplicatePackagesCommandAction"); } private String getCommand(String operation){ - return String.format("tc qdisc %s dev eth0 root netem duplicate %s%%", operation, ratio * 100); + return String.format("tc qdisc %s dev %s root netem duplicate %s%%", operation, network, ratio * 100); } } diff --git a/hbase-it/src/test/java/org/apache/hadoop/hbase/chaos/actions/FillDiskCommandAction.java b/hbase-it/src/test/java/org/apache/hadoop/hbase/chaos/actions/FillDiskCommandAction.java index 0de1a32ea26b..b7af31fffa5a 100644 --- a/hbase-it/src/test/java/org/apache/hadoop/hbase/chaos/actions/FillDiskCommandAction.java +++ b/hbase-it/src/test/java/org/apache/hadoop/hbase/chaos/actions/FillDiskCommandAction.java @@ -29,7 +29,7 @@ * * Fill the disk on a random regionserver. */ -public class FillDiskCommandAction extends CommandAction { +public class FillDiskCommandAction extends SudoCommandAction { private static final Logger LOG = LoggerFactory.getLogger(FillDiskCommandAction.class); private long size; private long duration; diff --git a/hbase-it/src/test/java/org/apache/hadoop/hbase/chaos/actions/LosePackagesCommandAction.java b/hbase-it/src/test/java/org/apache/hadoop/hbase/chaos/actions/LosePackagesCommandAction.java index 4278999442c1..fa797261c991 100644 --- a/hbase-it/src/test/java/org/apache/hadoop/hbase/chaos/actions/LosePackagesCommandAction.java +++ b/hbase-it/src/test/java/org/apache/hadoop/hbase/chaos/actions/LosePackagesCommandAction.java @@ -29,7 +29,7 @@ * * Lose network packages on a random regionserver. */ -public class LosePackagesCommandAction extends CommandAction { +public class LosePackagesCommandAction extends TCCommandAction { private static final Logger LOG = LoggerFactory.getLogger(LosePackagesCommandAction.class); private float ratio; private long duration; @@ -41,8 +41,8 @@ public class LosePackagesCommandAction extends CommandAction { * @param duration the time this issue persists in milliseconds * @param timeout the timeout for executing required commands on the region server in milliseconds */ - public LosePackagesCommandAction(float ratio, long duration, long timeout) { - super(timeout); + public LosePackagesCommandAction(float ratio, long duration, long timeout, String network) { + super(timeout, network); this.ratio = ratio; this.duration = duration; } @@ -53,18 +53,18 @@ protected void localPerform() throws IOException { String hostname = server.getHostname(); try { - clusterManager.execSudoWithRetries(hostname, timeout, getCommand("add")); + clusterManager.execSudoWithRetries(hostname, timeout, getCommand(ADD)); Thread.sleep(duration); } catch (InterruptedException e) { LOG.debug("Failed to run the command for the full duration", e); } finally { - clusterManager.execSudoWithRetries(hostname, timeout, getCommand("del")); + clusterManager.execSudoWithRetries(hostname, timeout, getCommand(DELETE)); } LOG.info("Finished to execute LosePackagesCommandAction"); } private String getCommand(String operation){ - return String.format("tc qdisc %s dev eth0 root netem loss %s%%", operation, ratio * 100); + return String.format("tc qdisc %s dev %s root netem loss %s%%", operation, network, ratio * 100); } } diff --git a/hbase-it/src/test/java/org/apache/hadoop/hbase/chaos/actions/ReorderPackagesCommandAction.java b/hbase-it/src/test/java/org/apache/hadoop/hbase/chaos/actions/ReorderPackagesCommandAction.java index 4939b623f5b2..8f2c37f743b9 100644 --- a/hbase-it/src/test/java/org/apache/hadoop/hbase/chaos/actions/ReorderPackagesCommandAction.java +++ b/hbase-it/src/test/java/org/apache/hadoop/hbase/chaos/actions/ReorderPackagesCommandAction.java @@ -29,7 +29,7 @@ * * Reorder network packages on a random regionserver. */ -public class ReorderPackagesCommandAction extends CommandAction { +public class ReorderPackagesCommandAction extends TCCommandAction { private static final Logger LOG = LoggerFactory.getLogger(ReorderPackagesCommandAction.class); private float ratio; private long duration; @@ -43,8 +43,8 @@ public class ReorderPackagesCommandAction extends CommandAction { * @param delay the delay between reordered and non-reordered packages in milliseconds * @param timeout the timeout for executing required commands on the region server in milliseconds */ - public ReorderPackagesCommandAction(float ratio, long duration, long delay, long timeout) { - super(timeout); + public ReorderPackagesCommandAction(float ratio, long duration, long delay, long timeout, String network) { + super(timeout, network); this.ratio = ratio; this.duration = duration; this.delay = delay; @@ -56,19 +56,19 @@ protected void localPerform() throws IOException { String hostname = server.getHostname(); try { - clusterManager.execSudoWithRetries(hostname, timeout, getCommand("add")); + clusterManager.execSudoWithRetries(hostname, timeout, getCommand(ADD)); Thread.sleep(duration); } catch (InterruptedException e) { LOG.debug("Failed to run the command for the full duration", e); } finally { - clusterManager.execSudoWithRetries(hostname, timeout, getCommand("del")); + clusterManager.execSudoWithRetries(hostname, timeout, getCommand(DELETE)); } LOG.info("Finished to execute ReorderPackagesCommandAction"); } private String getCommand(String operation){ - return String.format("tc qdisc %s dev eth0 root netem delay %sms reorder %s%% 50%", - operation, delay, ratio * 100); + return String.format("tc qdisc %s dev %s root netem delay %sms reorder %s%% 50%", + operation, network, delay, ratio * 100); } } diff --git a/hbase-it/src/test/java/org/apache/hadoop/hbase/chaos/actions/CommandAction.java b/hbase-it/src/test/java/org/apache/hadoop/hbase/chaos/actions/SudoCommandAction.java similarity index 91% rename from hbase-it/src/test/java/org/apache/hadoop/hbase/chaos/actions/CommandAction.java rename to hbase-it/src/test/java/org/apache/hadoop/hbase/chaos/actions/SudoCommandAction.java index 91b0820f099c..6092a5dbbc6a 100644 --- a/hbase-it/src/test/java/org/apache/hadoop/hbase/chaos/actions/CommandAction.java +++ b/hbase-it/src/test/java/org/apache/hadoop/hbase/chaos/actions/SudoCommandAction.java @@ -29,13 +29,13 @@ /** * Base class for performing Actions based on linux commands requiring sudo privileges */ -abstract public class CommandAction extends Action { - private static final Logger LOG = LoggerFactory.getLogger(CommandAction.class); +abstract public class SudoCommandAction extends Action { + private static final Logger LOG = LoggerFactory.getLogger(SudoCommandAction.class); protected long timeout; protected HBaseClusterManager clusterManager; - public CommandAction(long timeout) { + public SudoCommandAction(long timeout) { this.timeout = timeout; } diff --git a/hbase-it/src/test/java/org/apache/hadoop/hbase/chaos/actions/TCCommandAction.java b/hbase-it/src/test/java/org/apache/hadoop/hbase/chaos/actions/TCCommandAction.java new file mode 100644 index 000000000000..9444f876f72a --- /dev/null +++ b/hbase-it/src/test/java/org/apache/hadoop/hbase/chaos/actions/TCCommandAction.java @@ -0,0 +1,33 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hbase.chaos.actions; + +/** + * Base class for tc command actions + */ +abstract public class TCCommandAction extends SudoCommandAction { + protected static final String ADD = "add"; + protected static final String DELETE = "del"; + protected String network; + + public TCCommandAction(long timeout, String network) { + super(timeout); + this.network = network; + } +} diff --git a/hbase-it/src/test/java/org/apache/hadoop/hbase/chaos/factories/DistributedIssuesMonkeyFactory.java b/hbase-it/src/test/java/org/apache/hadoop/hbase/chaos/factories/DistributedIssuesMonkeyFactory.java index 7ff3376555a7..9f04136dfb3c 100644 --- a/hbase-it/src/test/java/org/apache/hadoop/hbase/chaos/factories/DistributedIssuesMonkeyFactory.java +++ b/hbase-it/src/test/java/org/apache/hadoop/hbase/chaos/factories/DistributedIssuesMonkeyFactory.java @@ -46,6 +46,7 @@ public class DistributedIssuesMonkeyFactory extends MonkeyFactory { private long networkIssueDuration; private float networkIssueRation; private long networkIssueDelay; + private String networkIssueInterface; private long fillDiskTimeout; private String fillDiskPath; private long fillDiskFileSize; @@ -57,15 +58,15 @@ public class DistributedIssuesMonkeyFactory extends MonkeyFactory { Action[] actions1 = new Action[] { new AddCPULoadAction(cpuLoadDuration, cpuLoadProcesses, networkIssueTimeout), new CorruptPackagesCommandAction(networkIssueRation, networkIssueDuration, - networkIssueTimeout), + networkIssueTimeout, networkIssueInterface), new DuplicatePackagesCommandAction(networkIssueRation, networkIssueDuration, - networkIssueTimeout), + networkIssueTimeout, networkIssueInterface), new LosePackagesCommandAction(networkIssueRation, networkIssueDuration, - networkIssueTimeout), + networkIssueTimeout, networkIssueInterface), new DelayPackagesCommandAction(networkIssueDelay, networkIssueDuration, - networkIssueTimeout), + networkIssueTimeout, networkIssueInterface), new ReorderPackagesCommandAction(networkIssueRation, networkIssueDuration, - networkIssueDelay, networkIssueTimeout), + networkIssueDelay, networkIssueTimeout, networkIssueInterface), new FillDiskCommandAction(fillDiskFileSize, fillDiskIssueduration, fillDiskPath, fillDiskTimeout)}; @@ -101,6 +102,9 @@ private void loadProperties() { networkIssueDelay = Long.parseLong(this.properties .getProperty(MonkeyConstants.NETWORK_ISSUE_DELAY, MonkeyConstants.DEFAULT_NETWORK_ISSUE_DELAY + "")); + networkIssueInterface = this.properties + .getProperty(MonkeyConstants.NETWORK_ISSUE_INTERFACE, + MonkeyConstants.DEFAULT_NETWORK_ISSUE_INTERFACE + ""); fillDiskTimeout = Long.parseLong(this.properties .getProperty(MonkeyConstants.FILL_DISK_COMMAND_TIMEOUT, MonkeyConstants.DEFAULT_FILL_DISK_COMMAND_TIMEOUT + "")); diff --git a/hbase-it/src/test/java/org/apache/hadoop/hbase/chaos/factories/MonkeyConstants.java b/hbase-it/src/test/java/org/apache/hadoop/hbase/chaos/factories/MonkeyConstants.java index f9702c292bd8..d9793316d925 100644 --- a/hbase-it/src/test/java/org/apache/hadoop/hbase/chaos/factories/MonkeyConstants.java +++ b/hbase-it/src/test/java/org/apache/hadoop/hbase/chaos/factories/MonkeyConstants.java @@ -53,7 +53,8 @@ public interface MonkeyConstants { String NETWORK_ISSUE_COMMAND_TIMEOUT = "network.issue.command.timeout"; String NETWORK_ISSUE_DURATION = "network.issueduration"; String NETWORK_ISSUE_RATIO = "network.issue.ratio"; - String NETWORK_ISSUE_DELAY = "networkissue.delay"; + String NETWORK_ISSUE_DELAY = "network.issue.delay"; + String NETWORK_ISSUE_INTERFACE = "network.issue.interface"; //should be big enough to create the file String FILL_DISK_COMMAND_TIMEOUT = "fill.disk.command.timeout"; String FILL_DISK_PATH = "fill.disk.path"; @@ -99,7 +100,8 @@ public interface MonkeyConstants { long DEFAULT_NETWORK_ISSUE_DURATION = 60 * 1000; float DEFAULT_NETWORK_ISSUE_RATIO = 0.1f; long DEFAULT_NETWORK_ISSUE_DELAY = 100; - long DEFAULT_FILL_DISK_COMMAND_TIMEOUT = 2 * 60 * 1000; + String DEFAULT_NETWORK_ISSUE_INTERFACE = "eth0"; + long DEFAULT_FILL_DISK_COMMAND_TIMEOUT = 5 * 60 * 1000 + 30 * 1000;//duration + timeout String DEFAULT_FILL_DISK_PATH = "/tmp"; long DEFAULT_FILL_DISK_FILE_SIZE = 0; long DEFAULT_FILL_DISK_ISSUE_DURATION = 5 * 60 * 1000; From 730169b51ee25236086a1329899a4123b55c0645 Mon Sep 17 00:00:00 2001 From: Bukros Szabolcs Date: Thu, 10 Oct 2019 09:47:33 +0200 Subject: [PATCH 3/4] HBASE-23085: Network and Data related Actions fix checkstyle --- .../hbase/chaos/actions/CorruptPackagesCommandAction.java | 3 ++- .../hbase/chaos/actions/DuplicatePackagesCommandAction.java | 3 ++- .../hadoop/hbase/chaos/actions/LosePackagesCommandAction.java | 3 ++- .../hbase/chaos/actions/ReorderPackagesCommandAction.java | 3 ++- .../hbase/chaos/factories/DistributedIssuesMonkeyFactory.java | 4 ++-- .../chaos/factories/StressAssignmentManagerMonkeyFactory.java | 4 ++-- 6 files changed, 12 insertions(+), 8 deletions(-) diff --git a/hbase-it/src/test/java/org/apache/hadoop/hbase/chaos/actions/CorruptPackagesCommandAction.java b/hbase-it/src/test/java/org/apache/hadoop/hbase/chaos/actions/CorruptPackagesCommandAction.java index 643154707fc9..e7e7b68269f4 100644 --- a/hbase-it/src/test/java/org/apache/hadoop/hbase/chaos/actions/CorruptPackagesCommandAction.java +++ b/hbase-it/src/test/java/org/apache/hadoop/hbase/chaos/actions/CorruptPackagesCommandAction.java @@ -65,6 +65,7 @@ protected void localPerform() throws IOException { } private String getCommand(String operation){ - return String.format("tc qdisc %s dev %s root netem corrupt %s%%", operation, network, ratio * 100); + return String.format("tc qdisc %s dev %s root netem corrupt %s%%", operation, network, + ratio * 100); } } diff --git a/hbase-it/src/test/java/org/apache/hadoop/hbase/chaos/actions/DuplicatePackagesCommandAction.java b/hbase-it/src/test/java/org/apache/hadoop/hbase/chaos/actions/DuplicatePackagesCommandAction.java index 2bcaa73289f6..ca794051eea4 100644 --- a/hbase-it/src/test/java/org/apache/hadoop/hbase/chaos/actions/DuplicatePackagesCommandAction.java +++ b/hbase-it/src/test/java/org/apache/hadoop/hbase/chaos/actions/DuplicatePackagesCommandAction.java @@ -65,6 +65,7 @@ protected void localPerform() throws IOException { } private String getCommand(String operation){ - return String.format("tc qdisc %s dev %s root netem duplicate %s%%", operation, network, ratio * 100); + return String.format("tc qdisc %s dev %s root netem duplicate %s%%", operation, network, + ratio * 100); } } diff --git a/hbase-it/src/test/java/org/apache/hadoop/hbase/chaos/actions/LosePackagesCommandAction.java b/hbase-it/src/test/java/org/apache/hadoop/hbase/chaos/actions/LosePackagesCommandAction.java index fa797261c991..38bb82a11c71 100644 --- a/hbase-it/src/test/java/org/apache/hadoop/hbase/chaos/actions/LosePackagesCommandAction.java +++ b/hbase-it/src/test/java/org/apache/hadoop/hbase/chaos/actions/LosePackagesCommandAction.java @@ -65,6 +65,7 @@ protected void localPerform() throws IOException { } private String getCommand(String operation){ - return String.format("tc qdisc %s dev %s root netem loss %s%%", operation, network, ratio * 100); + return String.format("tc qdisc %s dev %s root netem loss %s%%", operation, network, + ratio * 100); } } diff --git a/hbase-it/src/test/java/org/apache/hadoop/hbase/chaos/actions/ReorderPackagesCommandAction.java b/hbase-it/src/test/java/org/apache/hadoop/hbase/chaos/actions/ReorderPackagesCommandAction.java index 8f2c37f743b9..45eb0cac0216 100644 --- a/hbase-it/src/test/java/org/apache/hadoop/hbase/chaos/actions/ReorderPackagesCommandAction.java +++ b/hbase-it/src/test/java/org/apache/hadoop/hbase/chaos/actions/ReorderPackagesCommandAction.java @@ -43,7 +43,8 @@ public class ReorderPackagesCommandAction extends TCCommandAction { * @param delay the delay between reordered and non-reordered packages in milliseconds * @param timeout the timeout for executing required commands on the region server in milliseconds */ - public ReorderPackagesCommandAction(float ratio, long duration, long delay, long timeout, String network) { + public ReorderPackagesCommandAction(float ratio, long duration, long delay, long timeout, + String network) { super(timeout, network); this.ratio = ratio; this.duration = duration; diff --git a/hbase-it/src/test/java/org/apache/hadoop/hbase/chaos/factories/DistributedIssuesMonkeyFactory.java b/hbase-it/src/test/java/org/apache/hadoop/hbase/chaos/factories/DistributedIssuesMonkeyFactory.java index 9f04136dfb3c..745f1b9aae82 100644 --- a/hbase-it/src/test/java/org/apache/hadoop/hbase/chaos/factories/DistributedIssuesMonkeyFactory.java +++ b/hbase-it/src/test/java/org/apache/hadoop/hbase/chaos/factories/DistributedIssuesMonkeyFactory.java @@ -6,9 +6,9 @@ * to you under the Apache License, Version 2.0 (the * "License"); you may not use this file except in compliance * with the License. You may obtain a copy of the License at - *

+ * * http://www.apache.org/licenses/LICENSE-2.0 - *

+ * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. diff --git a/hbase-it/src/test/java/org/apache/hadoop/hbase/chaos/factories/StressAssignmentManagerMonkeyFactory.java b/hbase-it/src/test/java/org/apache/hadoop/hbase/chaos/factories/StressAssignmentManagerMonkeyFactory.java index 7f3aed4bb679..b25bef7a334f 100644 --- a/hbase-it/src/test/java/org/apache/hadoop/hbase/chaos/factories/StressAssignmentManagerMonkeyFactory.java +++ b/hbase-it/src/test/java/org/apache/hadoop/hbase/chaos/factories/StressAssignmentManagerMonkeyFactory.java @@ -6,9 +6,9 @@ * to you under the Apache License, Version 2.0 (the * "License"); you may not use this file except in compliance * with the License. You may obtain a copy of the License at - *

+ * * http://www.apache.org/licenses/LICENSE-2.0 - *

+ * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. From 4be3179ea6c1b8e4669a179ef16735776de28734 Mon Sep 17 00:00:00 2001 From: Bukros Szabolcs Date: Wed, 30 Oct 2019 16:45:54 +0100 Subject: [PATCH 4/4] HBASE-23085: Network and Data related Actions restrict file based monkeys to HFiles extend javadoc make sure new monkey properties are loaded from generic properties --- .../hbase/chaos/actions/CorruptDataFilesAction.java | 12 +++++++++--- .../chaos/actions/CorruptPackagesCommandAction.java | 1 + .../chaos/actions/DelayPackagesCommandAction.java | 1 + .../hbase/chaos/actions/DeleteDataFilesAction.java | 13 +++++++++---- .../actions/DuplicatePackagesCommandAction.java | 1 + .../chaos/actions/LosePackagesCommandAction.java | 1 + .../chaos/actions/ReorderPackagesCommandAction.java | 1 + .../hbase/chaos/factories/MonkeyConstants.java | 4 ++-- 8 files changed, 25 insertions(+), 9 deletions(-) diff --git a/hbase-it/src/test/java/org/apache/hadoop/hbase/chaos/actions/CorruptDataFilesAction.java b/hbase-it/src/test/java/org/apache/hadoop/hbase/chaos/actions/CorruptDataFilesAction.java index 1ad52217c15f..83e8fe08a495 100644 --- a/hbase-it/src/test/java/org/apache/hadoop/hbase/chaos/actions/CorruptDataFilesAction.java +++ b/hbase-it/src/test/java/org/apache/hadoop/hbase/chaos/actions/CorruptDataFilesAction.java @@ -24,19 +24,20 @@ import org.apache.hadoop.fs.LocatedFileStatus; import org.apache.hadoop.fs.Path; import org.apache.hadoop.fs.RemoteIterator; +import org.apache.hadoop.hbase.io.hfile.HFile; import org.apache.hadoop.hbase.util.CommonFSUtils; import org.slf4j.Logger; import org.slf4j.LoggerFactory; /** - * Action corrupts region server data. + * Action corrupts HFiles with a certain chance. */ public class CorruptDataFilesAction extends Action { private static final Logger LOG = LoggerFactory.getLogger(CorruptDataFilesAction.class); private float chance; /** - * Corrupts region server data file switch a certain chance + * Corrupts HFiles with a certain chance * @param chance chance to corrupt any give data file (0.5 => 50%) */ public CorruptDataFilesAction(float chance) { @@ -46,11 +47,16 @@ public CorruptDataFilesAction(float chance) { @Override public void perform() throws Exception { LOG.info("Start corrupting data files"); + FileSystem fs = CommonFSUtils.getRootDirFileSystem(getConf()); Path rootDir = CommonFSUtils.getRootDir(getConf()); - RemoteIterator iterator = fs.listFiles(rootDir, true); + Path defaultDir = rootDir.suffix("/data/default"); + RemoteIterator iterator = fs.listFiles(defaultDir, true); while (iterator.hasNext()){ LocatedFileStatus status = iterator.next(); + if(!HFile.isHFileFormat(fs, status.getPath())){ + continue; + } if(RandomUtils.nextFloat(0, 100) > chance){ continue; } diff --git a/hbase-it/src/test/java/org/apache/hadoop/hbase/chaos/actions/CorruptPackagesCommandAction.java b/hbase-it/src/test/java/org/apache/hadoop/hbase/chaos/actions/CorruptPackagesCommandAction.java index e7e7b68269f4..a89d5587a797 100644 --- a/hbase-it/src/test/java/org/apache/hadoop/hbase/chaos/actions/CorruptPackagesCommandAction.java +++ b/hbase-it/src/test/java/org/apache/hadoop/hbase/chaos/actions/CorruptPackagesCommandAction.java @@ -40,6 +40,7 @@ public class CorruptPackagesCommandAction extends TCCommandAction { * @param ratio the ratio of packages corrupted * @param duration the time this issue persists in milliseconds * @param timeout the timeout for executing required commands on the region server in milliseconds + * @param network network interface the regionserver uses for communication */ public CorruptPackagesCommandAction(float ratio, long duration, long timeout, String network) { super(timeout, network); diff --git a/hbase-it/src/test/java/org/apache/hadoop/hbase/chaos/actions/DelayPackagesCommandAction.java b/hbase-it/src/test/java/org/apache/hadoop/hbase/chaos/actions/DelayPackagesCommandAction.java index 3c9e8b5eba0b..e4de0a270a82 100644 --- a/hbase-it/src/test/java/org/apache/hadoop/hbase/chaos/actions/DelayPackagesCommandAction.java +++ b/hbase-it/src/test/java/org/apache/hadoop/hbase/chaos/actions/DelayPackagesCommandAction.java @@ -39,6 +39,7 @@ public class DelayPackagesCommandAction extends TCCommandAction { * @param delay the latency wil be delay +/-50% in milliseconds * @param duration the time this issue persists in milliseconds * @param timeout the timeout for executing required commands on the region server in milliseconds + * @param network network interface the regionserver uses for communication */ public DelayPackagesCommandAction(long delay, long duration, long timeout, String network) { super(timeout, network); diff --git a/hbase-it/src/test/java/org/apache/hadoop/hbase/chaos/actions/DeleteDataFilesAction.java b/hbase-it/src/test/java/org/apache/hadoop/hbase/chaos/actions/DeleteDataFilesAction.java index 0bd2fd2468eb..4919adce490c 100644 --- a/hbase-it/src/test/java/org/apache/hadoop/hbase/chaos/actions/DeleteDataFilesAction.java +++ b/hbase-it/src/test/java/org/apache/hadoop/hbase/chaos/actions/DeleteDataFilesAction.java @@ -23,20 +23,21 @@ import org.apache.hadoop.fs.LocatedFileStatus; import org.apache.hadoop.fs.Path; import org.apache.hadoop.fs.RemoteIterator; +import org.apache.hadoop.hbase.io.hfile.HFile; import org.apache.hadoop.hbase.util.CommonFSUtils; import org.slf4j.Logger; import org.slf4j.LoggerFactory; /** - * Action deletes region server data. + * Action deletes HFiles with a certain chance. */ public class DeleteDataFilesAction extends Action { private static final Logger LOG = LoggerFactory.getLogger(DeleteDataFilesAction.class); private float chance; /** - * Delets region server data file switch a certain chance - * @param chance chance to corrupt any give data file (0.5 => 50%) + * Delets HFiles with a certain chance + * @param chance chance to delete any give data file (0.5 => 50%) */ public DeleteDataFilesAction(float chance) { this.chance = chance * 100; @@ -47,9 +48,13 @@ public void perform() throws Exception { LOG.info("Start deleting data files"); FileSystem fs = CommonFSUtils.getRootDirFileSystem(getConf()); Path rootDir = CommonFSUtils.getRootDir(getConf()); - RemoteIterator iterator = fs.listFiles(rootDir, true); + Path defaultDir = rootDir.suffix("/data/default"); + RemoteIterator iterator = fs.listFiles(defaultDir, true); while (iterator.hasNext()){ LocatedFileStatus status = iterator.next(); + if(!HFile.isHFileFormat(fs, status.getPath())){ + continue; + } if(RandomUtils.nextFloat(0, 100) > chance){ continue; } diff --git a/hbase-it/src/test/java/org/apache/hadoop/hbase/chaos/actions/DuplicatePackagesCommandAction.java b/hbase-it/src/test/java/org/apache/hadoop/hbase/chaos/actions/DuplicatePackagesCommandAction.java index ca794051eea4..f3d54f18985d 100644 --- a/hbase-it/src/test/java/org/apache/hadoop/hbase/chaos/actions/DuplicatePackagesCommandAction.java +++ b/hbase-it/src/test/java/org/apache/hadoop/hbase/chaos/actions/DuplicatePackagesCommandAction.java @@ -40,6 +40,7 @@ public class DuplicatePackagesCommandAction extends TCCommandAction { * @param ratio the ratio of packages duplicated * @param duration the time this issue persists in milliseconds * @param timeout the timeout for executing required commands on the region server in milliseconds + * @param network network interface the regionserver uses for communication */ public DuplicatePackagesCommandAction(float ratio, long duration, long timeout, String network) { super(timeout, network); diff --git a/hbase-it/src/test/java/org/apache/hadoop/hbase/chaos/actions/LosePackagesCommandAction.java b/hbase-it/src/test/java/org/apache/hadoop/hbase/chaos/actions/LosePackagesCommandAction.java index 38bb82a11c71..e44cac7ade25 100644 --- a/hbase-it/src/test/java/org/apache/hadoop/hbase/chaos/actions/LosePackagesCommandAction.java +++ b/hbase-it/src/test/java/org/apache/hadoop/hbase/chaos/actions/LosePackagesCommandAction.java @@ -40,6 +40,7 @@ public class LosePackagesCommandAction extends TCCommandAction { * @param ratio the ratio of packages lost * @param duration the time this issue persists in milliseconds * @param timeout the timeout for executing required commands on the region server in milliseconds + * @param network network interface the regionserver uses for communication */ public LosePackagesCommandAction(float ratio, long duration, long timeout, String network) { super(timeout, network); diff --git a/hbase-it/src/test/java/org/apache/hadoop/hbase/chaos/actions/ReorderPackagesCommandAction.java b/hbase-it/src/test/java/org/apache/hadoop/hbase/chaos/actions/ReorderPackagesCommandAction.java index 45eb0cac0216..c1f196e830e0 100644 --- a/hbase-it/src/test/java/org/apache/hadoop/hbase/chaos/actions/ReorderPackagesCommandAction.java +++ b/hbase-it/src/test/java/org/apache/hadoop/hbase/chaos/actions/ReorderPackagesCommandAction.java @@ -42,6 +42,7 @@ public class ReorderPackagesCommandAction extends TCCommandAction { * @param duration the time this issue persists in milliseconds * @param delay the delay between reordered and non-reordered packages in milliseconds * @param timeout the timeout for executing required commands on the region server in milliseconds + * @param network network interface the regionserver uses for communication */ public ReorderPackagesCommandAction(float ratio, long duration, long delay, long timeout, String network) { diff --git a/hbase-it/src/test/java/org/apache/hadoop/hbase/chaos/factories/MonkeyConstants.java b/hbase-it/src/test/java/org/apache/hadoop/hbase/chaos/factories/MonkeyConstants.java index d9793316d925..f4c34b59959a 100644 --- a/hbase-it/src/test/java/org/apache/hadoop/hbase/chaos/factories/MonkeyConstants.java +++ b/hbase-it/src/test/java/org/apache/hadoop/hbase/chaos/factories/MonkeyConstants.java @@ -66,8 +66,8 @@ public interface MonkeyConstants { * A Set of prefixes which encompasses all of the configuration properties for the ChaosMonky. */ Set MONKEY_CONFIGURATION_KEY_PREFIXES = new HashSet<>( - Arrays.asList("sdm.", "move.", "restart.", "batch.", "rolling.", "compact.", - "unbalance.", "decrease.")); + Arrays.asList("sdm.", "move.", "restart.", "batch.", "rolling.", "compact.", "unbalance.", + "decrease.", "decrease.", "graceful.", "cpu.", "network.", "fill.", "data.")); long DEFAULT_PERIODIC_ACTION1_PERIOD = 60 * 1000; long DEFAULT_PERIODIC_ACTION2_PERIOD = 90 * 1000;