From 5ab8994114d782d42eb39594684262269b42cee1 Mon Sep 17 00:00:00 2001 From: "Doroszlai, Attila" Date: Tue, 25 Mar 2025 19:56:00 +0100 Subject: [PATCH 1/5] HDDS-12694. failed to shutdown MiniOzoneChaosCluster --- .../org/apache/hadoop/ozone/MiniOzoneChaosCluster.java | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/hadoop-ozone/fault-injection-test/mini-chaos-tests/src/test/java/org/apache/hadoop/ozone/MiniOzoneChaosCluster.java b/hadoop-ozone/fault-injection-test/mini-chaos-tests/src/test/java/org/apache/hadoop/ozone/MiniOzoneChaosCluster.java index 6db7b4b016e2..5871e9b3881e 100644 --- a/hadoop-ozone/fault-injection-test/mini-chaos-tests/src/test/java/org/apache/hadoop/ozone/MiniOzoneChaosCluster.java +++ b/hadoop-ozone/fault-injection-test/mini-chaos-tests/src/test/java/org/apache/hadoop/ozone/MiniOzoneChaosCluster.java @@ -132,12 +132,12 @@ void startChaos(long initialDelay, long period, TimeUnit timeUnit) { public void shutdown() { try { failureManager.stop(); - //this should be called after stopChaos to be sure that the - //datanode collection is not modified during the shutdown - super.shutdown(); } catch (Exception e) { - LOG.error("failed to shutdown MiniOzoneChaosCluster", e); + LOG.error("failed to stop FailureManager", e); } + //this should be called after failureManager.stop to be sure that the + //datanode collection is not modified during the shutdown + super.shutdown(); } /** From 4340ebee8a1d643ec0c97d9188a894d201fc98db Mon Sep 17 00:00:00 2001 From: "Doroszlai, Attila" Date: Tue, 25 Mar 2025 20:44:04 +0100 Subject: [PATCH 2/5] properly initialize Failures and LoadGenerators in subclasses --- .../ozone/TestAllMiniChaosOzoneCluster.java | 23 +++++------ .../TestDatanodeMiniChaosOzoneCluster.java | 12 ++++-- .../ozone/TestMiniChaosOzoneCluster.java | 39 +++++++++---------- ...TestOzoneManagerMiniChaosOzoneCluster.java | 11 +++++- ...ContainerManagerMiniChaosOzoneCluster.java | 11 +++++- 5 files changed, 58 insertions(+), 38 deletions(-) diff --git a/hadoop-ozone/fault-injection-test/mini-chaos-tests/src/test/java/org/apache/hadoop/ozone/TestAllMiniChaosOzoneCluster.java b/hadoop-ozone/fault-injection-test/mini-chaos-tests/src/test/java/org/apache/hadoop/ozone/TestAllMiniChaosOzoneCluster.java index 086a621d2bce..4275beed02fc 100644 --- a/hadoop-ozone/fault-injection-test/mini-chaos-tests/src/test/java/org/apache/hadoop/ozone/TestAllMiniChaosOzoneCluster.java +++ b/hadoop-ozone/fault-injection-test/mini-chaos-tests/src/test/java/org/apache/hadoop/ozone/TestAllMiniChaosOzoneCluster.java @@ -21,33 +21,34 @@ import org.apache.hadoop.hdds.cli.HddsVersionProvider; import org.apache.hadoop.ozone.failure.Failures; import org.apache.hadoop.ozone.loadgenerators.LoadGenerator; +import org.junit.jupiter.api.BeforeAll; +import org.junit.jupiter.api.TestInstance; import picocli.CommandLine; /** - * Command line utility to parse and dump a datanode ratis segment file. + * Test all kinds of chaos. */ @CommandLine.Command( name = "all", description = "run chaos cluster across all daemons", mixinStandardHelpOptions = true, versionProvider = HddsVersionProvider.class) +@TestInstance(TestInstance.Lifecycle.PER_CLASS) public class TestAllMiniChaosOzoneCluster extends TestMiniChaosOzoneCluster implements Callable { - @CommandLine.ParentCommand - private OzoneChaosCluster chaosCluster; - - @Override - public Void call() throws Exception { + @BeforeAll + void setup() { setNumManagers(3, 3, true); - LoadGenerator.getClassList().forEach( - TestMiniChaosOzoneCluster::addLoadClasses); - Failures.getClassList().forEach( - TestMiniChaosOzoneCluster::addFailureClasses); + LoadGenerator.getClassList().forEach(this::addLoadClasses); + Failures.getClassList().forEach(this::addFailureClasses); + } + @Override + public Void call() throws Exception { + setup(); startChaosCluster(); - return null; } diff --git a/hadoop-ozone/fault-injection-test/mini-chaos-tests/src/test/java/org/apache/hadoop/ozone/TestDatanodeMiniChaosOzoneCluster.java b/hadoop-ozone/fault-injection-test/mini-chaos-tests/src/test/java/org/apache/hadoop/ozone/TestDatanodeMiniChaosOzoneCluster.java index 8a803c2f78ca..828d57e3db9b 100644 --- a/hadoop-ozone/fault-injection-test/mini-chaos-tests/src/test/java/org/apache/hadoop/ozone/TestDatanodeMiniChaosOzoneCluster.java +++ b/hadoop-ozone/fault-injection-test/mini-chaos-tests/src/test/java/org/apache/hadoop/ozone/TestDatanodeMiniChaosOzoneCluster.java @@ -22,6 +22,8 @@ import org.apache.hadoop.ozone.failure.Failures; import org.apache.hadoop.ozone.loadgenerators.AgedLoadGenerator; import org.apache.hadoop.ozone.loadgenerators.RandomLoadGenerator; +import org.junit.jupiter.api.BeforeAll; +import org.junit.jupiter.api.TestInstance; import picocli.CommandLine; /** @@ -32,19 +34,23 @@ description = "run chaos cluster across Ozone Datanodes", mixinStandardHelpOptions = true, versionProvider = HddsVersionProvider.class) +@TestInstance(TestInstance.Lifecycle.PER_CLASS) public class TestDatanodeMiniChaosOzoneCluster extends TestMiniChaosOzoneCluster implements Callable { - @Override - public Void call() throws Exception { + @BeforeAll + void setup() { addLoadClasses(RandomLoadGenerator.class); addLoadClasses(AgedLoadGenerator.class); addFailureClasses(Failures.DatanodeStartStopFailure.class); addFailureClasses(Failures.DatanodeRestartFailure.class); + } + @Override + public Void call() throws Exception { + setup(); startChaosCluster(); - return null; } diff --git a/hadoop-ozone/fault-injection-test/mini-chaos-tests/src/test/java/org/apache/hadoop/ozone/TestMiniChaosOzoneCluster.java b/hadoop-ozone/fault-injection-test/mini-chaos-tests/src/test/java/org/apache/hadoop/ozone/TestMiniChaosOzoneCluster.java index 53e5b5e042cb..4b81fc00f2fd 100644 --- a/hadoop-ozone/fault-injection-test/mini-chaos-tests/src/test/java/org/apache/hadoop/ozone/TestMiniChaosOzoneCluster.java +++ b/hadoop-ozone/fault-injection-test/mini-chaos-tests/src/test/java/org/apache/hadoop/ozone/TestMiniChaosOzoneCluster.java @@ -33,9 +33,10 @@ import org.apache.hadoop.ozone.freon.FreonReplicationOptions; import org.apache.hadoop.ozone.loadgenerators.LoadGenerator; import org.apache.hadoop.ozone.om.helpers.BucketLayout; -import org.junit.jupiter.api.AfterAll; -import org.junit.jupiter.api.BeforeAll; +import org.junit.jupiter.api.AfterEach; +import org.junit.jupiter.api.BeforeEach; import org.junit.jupiter.api.Test; +import org.junit.jupiter.api.TestInstance; import picocli.CommandLine; import picocli.CommandLine.Command; import picocli.CommandLine.Option; @@ -45,12 +46,13 @@ */ @Command(description = "Starts IO with MiniOzoneChaosCluster", name = "chaos", mixinStandardHelpOptions = true) +@TestInstance(TestInstance.Lifecycle.PER_CLASS) public class TestMiniChaosOzoneCluster extends GenericCli { - private static List> failureClasses + private final List> failureClasses = new ArrayList<>(); - private static List> loadClasses + private final List> loadClasses = new ArrayList<>(); enum AllowedBucketLayouts { FILE_SYSTEM_OPTIMIZED, OBJECT_STORE } @@ -106,18 +108,18 @@ enum AllowedBucketLayouts { FILE_SYSTEM_OPTIMIZED, OBJECT_STORE } private static AllowedBucketLayouts allowedBucketLayout = AllowedBucketLayouts.FILE_SYSTEM_OPTIMIZED; - private static MiniOzoneChaosCluster cluster; - private static OzoneClient client; - private static MiniOzoneLoadGenerator loadGenerator; + private MiniOzoneChaosCluster cluster; + private OzoneClient client; + private MiniOzoneLoadGenerator loadGenerator; - private static String omServiceId = null; - private static String scmServiceId = null; + private String omServiceId = null; + private String scmServiceId = null; private static final String OM_SERVICE_ID = "ozoneChaosTest"; private static final String SCM_SERVICE_ID = "scmChaosTest"; - @BeforeAll - public static void init() throws Exception { + @BeforeEach + void init() throws Exception { OzoneConfiguration configuration = new OzoneConfiguration(); MiniOzoneChaosCluster.Builder chaosBuilder = @@ -164,19 +166,19 @@ public static void init() throws Exception { loadGenerator = loadBuilder.build(); } - static void addFailureClasses(Class clz) { + void addFailureClasses(Class clz) { failureClasses.add(clz); } - static void addLoadClasses(Class clz) { + void addLoadClasses(Class clz) { loadClasses.add(clz); } - static void setNumDatanodes(int nDns) { + void setNumDatanodes(int nDns) { numDatanodes = nDns; } - static void setNumManagers(int nOms, int numScms, boolean enableHA) { + void setNumManagers(int nOms, int numScms, boolean enableHA) { if (nOms > 1 || enableHA) { omServiceId = OM_SERVICE_ID; @@ -189,11 +191,8 @@ static void setNumManagers(int nOms, int numScms, boolean enableHA) { numStorageContainerManagerss = numScms; } - /** - * Shutdown MiniDFSCluster. - */ - @AfterAll - public static void shutdown() { + @AfterEach + void shutdown() { if (loadGenerator != null) { loadGenerator.shutdownLoadGenerator(); } diff --git a/hadoop-ozone/fault-injection-test/mini-chaos-tests/src/test/java/org/apache/hadoop/ozone/TestOzoneManagerMiniChaosOzoneCluster.java b/hadoop-ozone/fault-injection-test/mini-chaos-tests/src/test/java/org/apache/hadoop/ozone/TestOzoneManagerMiniChaosOzoneCluster.java index e7e024d49a7a..662e3f4b7c60 100644 --- a/hadoop-ozone/fault-injection-test/mini-chaos-tests/src/test/java/org/apache/hadoop/ozone/TestOzoneManagerMiniChaosOzoneCluster.java +++ b/hadoop-ozone/fault-injection-test/mini-chaos-tests/src/test/java/org/apache/hadoop/ozone/TestOzoneManagerMiniChaosOzoneCluster.java @@ -23,6 +23,8 @@ import org.apache.hadoop.ozone.loadgenerators.AgedDirLoadGenerator; import org.apache.hadoop.ozone.loadgenerators.NestedDirLoadGenerator; import org.apache.hadoop.ozone.loadgenerators.RandomDirLoadGenerator; +import org.junit.jupiter.api.BeforeAll; +import org.junit.jupiter.api.TestInstance; import picocli.CommandLine; /** @@ -33,11 +35,12 @@ description = "run chaos cluster across Ozone Managers", mixinStandardHelpOptions = true, versionProvider = HddsVersionProvider.class) +@TestInstance(TestInstance.Lifecycle.PER_CLASS) public class TestOzoneManagerMiniChaosOzoneCluster extends TestMiniChaosOzoneCluster implements Callable { - @Override - public Void call() throws Exception { + @BeforeAll + void setup() { setNumManagers(3, 1, true); setNumDatanodes(3); @@ -47,7 +50,11 @@ public Void call() throws Exception { addFailureClasses(Failures.OzoneManagerRestartFailure.class); addFailureClasses(Failures.OzoneManagerStartStopFailure.class); + } + @Override + public Void call() throws Exception { + setup(); startChaosCluster(); return null; } diff --git a/hadoop-ozone/fault-injection-test/mini-chaos-tests/src/test/java/org/apache/hadoop/ozone/TestStorageContainerManagerMiniChaosOzoneCluster.java b/hadoop-ozone/fault-injection-test/mini-chaos-tests/src/test/java/org/apache/hadoop/ozone/TestStorageContainerManagerMiniChaosOzoneCluster.java index bbaa00130cd4..bf0ea95663a5 100644 --- a/hadoop-ozone/fault-injection-test/mini-chaos-tests/src/test/java/org/apache/hadoop/ozone/TestStorageContainerManagerMiniChaosOzoneCluster.java +++ b/hadoop-ozone/fault-injection-test/mini-chaos-tests/src/test/java/org/apache/hadoop/ozone/TestStorageContainerManagerMiniChaosOzoneCluster.java @@ -23,6 +23,8 @@ import org.apache.hadoop.ozone.loadgenerators.AgedDirLoadGenerator; import org.apache.hadoop.ozone.loadgenerators.NestedDirLoadGenerator; import org.apache.hadoop.ozone.loadgenerators.RandomDirLoadGenerator; +import org.junit.jupiter.api.BeforeAll; +import org.junit.jupiter.api.TestInstance; import picocli.CommandLine; /** @@ -33,11 +35,12 @@ description = "run chaos cluster across Storage Container Managers", mixinStandardHelpOptions = true, versionProvider = HddsVersionProvider.class) +@TestInstance(TestInstance.Lifecycle.PER_CLASS) public class TestStorageContainerManagerMiniChaosOzoneCluster extends TestMiniChaosOzoneCluster implements Callable { - @Override - public Void call() throws Exception { + @BeforeAll + void setup() { setNumManagers(3, 3, true); setNumDatanodes(3); @@ -47,7 +50,11 @@ public Void call() throws Exception { addFailureClasses(Failures.StorageContainerManagerRestartFailure.class); addFailureClasses(Failures.StorageContainerManagerStartStopFailure.class); + } + @Override + public Void call() throws Exception { + setup(); startChaosCluster(); return null; } From bc10a3243238c2a2c4f20dbda71363ecca2ed03d Mon Sep 17 00:00:00 2001 From: "Doroszlai, Attila" Date: Tue, 25 Mar 2025 20:44:25 +0100 Subject: [PATCH 3/5] disable TestMiniChaosOzoneCluster as JUnit test --- .../java/org/apache/hadoop/ozone/TestMiniChaosOzoneCluster.java | 2 ++ 1 file changed, 2 insertions(+) diff --git a/hadoop-ozone/fault-injection-test/mini-chaos-tests/src/test/java/org/apache/hadoop/ozone/TestMiniChaosOzoneCluster.java b/hadoop-ozone/fault-injection-test/mini-chaos-tests/src/test/java/org/apache/hadoop/ozone/TestMiniChaosOzoneCluster.java index 4b81fc00f2fd..63dd39e95b9c 100644 --- a/hadoop-ozone/fault-injection-test/mini-chaos-tests/src/test/java/org/apache/hadoop/ozone/TestMiniChaosOzoneCluster.java +++ b/hadoop-ozone/fault-injection-test/mini-chaos-tests/src/test/java/org/apache/hadoop/ozone/TestMiniChaosOzoneCluster.java @@ -33,6 +33,7 @@ import org.apache.hadoop.ozone.freon.FreonReplicationOptions; import org.apache.hadoop.ozone.loadgenerators.LoadGenerator; import org.apache.hadoop.ozone.om.helpers.BucketLayout; +import org.apache.ozone.test.tag.Unhealthy; import org.junit.jupiter.api.AfterEach; import org.junit.jupiter.api.BeforeEach; import org.junit.jupiter.api.Test; @@ -47,6 +48,7 @@ @Command(description = "Starts IO with MiniOzoneChaosCluster", name = "chaos", mixinStandardHelpOptions = true) @TestInstance(TestInstance.Lifecycle.PER_CLASS) +@Unhealthy("HDDS-3131") public class TestMiniChaosOzoneCluster extends GenericCli { private final List> failureClasses From fc7f0ff1efce7dbf763fd5da7a9bdb2800e3613a Mon Sep 17 00:00:00 2001 From: "Doroszlai, Attila" Date: Tue, 25 Mar 2025 21:03:38 +0100 Subject: [PATCH 4/5] fix findbugs --- .../ozone/TestMiniChaosOzoneCluster.java | 24 +++++++++---------- 1 file changed, 12 insertions(+), 12 deletions(-) diff --git a/hadoop-ozone/fault-injection-test/mini-chaos-tests/src/test/java/org/apache/hadoop/ozone/TestMiniChaosOzoneCluster.java b/hadoop-ozone/fault-injection-test/mini-chaos-tests/src/test/java/org/apache/hadoop/ozone/TestMiniChaosOzoneCluster.java index 63dd39e95b9c..196686cf8127 100644 --- a/hadoop-ozone/fault-injection-test/mini-chaos-tests/src/test/java/org/apache/hadoop/ozone/TestMiniChaosOzoneCluster.java +++ b/hadoop-ozone/fault-injection-test/mini-chaos-tests/src/test/java/org/apache/hadoop/ozone/TestMiniChaosOzoneCluster.java @@ -62,60 +62,60 @@ enum AllowedBucketLayouts { FILE_SYSTEM_OPTIMIZED, OBJECT_STORE } @Option(names = {"-d", "--num-datanodes", "--numDatanodes"}, description = "num of datanodes. Full name --numDatanodes will be" + " removed in later versions.") - private static int numDatanodes = 20; + private int numDatanodes = 20; @Option(names = {"-o", "--num-ozone-manager", "--numOzoneManager"}, description = "num of ozoneManagers. Full name --numOzoneManager will" + " be removed in later versions.") - private static int numOzoneManagers = 1; + private int numOzoneManagers = 1; @Option(names = {"-s", "--num-storage-container-manager", "--numStorageContainerManagers"}, description = "num of storageContainerManagers." + "Full name --numStorageContainerManagers will" + " be removed in later versions.") - private static int numStorageContainerManagerss = 1; + private int numStorageContainerManagerss = 1; @Option(names = {"-t", "--num-threads", "--numThreads"}, description = "num of IO threads. Full name --numThreads will be" + " removed in later versions.") - private static int numThreads = 5; + private int numThreads = 5; @Option(names = {"-b", "--num-buffers", "--numBuffers"}, description = "num of IO buffers. Full name --numBuffers will be" + " removed in later versions.") - private static int numBuffers = 16; + private int numBuffers = 16; @Option(names = {"-m", "--num-minutes", "--numMinutes"}, description = "total run time. Full name --numMinutes will be " + "removed in later versions.") - private static int numMinutes = 1440; // 1 day by default + private int numMinutes = 1440; // 1 day by default @Option(names = {"-v", "--num-data-volume", "--numDataVolume"}, description = "number of datanode volumes to create. Full name " + "--numDataVolume will be removed in later versions.") - private static int numDataVolumes = 3; + private int numDataVolumes = 3; @Option(names = {"-i", "--failure-interval", "--failureInterval"}, description = "time between failure events in seconds. Full name " + "--failureInterval will be removed in later versions.") - private static int failureInterval = 300; // 5 minute period between failures. + private int failureInterval = 300; // 5 minute period between failures. @CommandLine.Mixin - private static FreonReplicationOptions freonReplication = + private FreonReplicationOptions freonReplication = new FreonReplicationOptions(); @Option(names = {"-l", "--layout"}, description = "Allowed Bucket Layouts: ${COMPLETION-CANDIDATES}") - private static AllowedBucketLayouts allowedBucketLayout = + private AllowedBucketLayouts allowedBucketLayout = AllowedBucketLayouts.FILE_SYSTEM_OPTIMIZED; private MiniOzoneChaosCluster cluster; private OzoneClient client; private MiniOzoneLoadGenerator loadGenerator; - private String omServiceId = null; - private String scmServiceId = null; + private String omServiceId; + private String scmServiceId; private static final String OM_SERVICE_ID = "ozoneChaosTest"; private static final String SCM_SERVICE_ID = "scmChaosTest"; From 7aaf6eb27574b8b9543c486609353035731dcb2b Mon Sep 17 00:00:00 2001 From: "Doroszlai, Attila" Date: Wed, 26 Mar 2025 07:59:25 +0100 Subject: [PATCH 5/5] call startChaosCluster() instead of using BeforeEach/AfterEach --- .../ozone/TestMiniChaosOzoneCluster.java | 28 +++++++++---------- 1 file changed, 13 insertions(+), 15 deletions(-) diff --git a/hadoop-ozone/fault-injection-test/mini-chaos-tests/src/test/java/org/apache/hadoop/ozone/TestMiniChaosOzoneCluster.java b/hadoop-ozone/fault-injection-test/mini-chaos-tests/src/test/java/org/apache/hadoop/ozone/TestMiniChaosOzoneCluster.java index 196686cf8127..f06e5433530e 100644 --- a/hadoop-ozone/fault-injection-test/mini-chaos-tests/src/test/java/org/apache/hadoop/ozone/TestMiniChaosOzoneCluster.java +++ b/hadoop-ozone/fault-injection-test/mini-chaos-tests/src/test/java/org/apache/hadoop/ozone/TestMiniChaosOzoneCluster.java @@ -34,8 +34,6 @@ import org.apache.hadoop.ozone.loadgenerators.LoadGenerator; import org.apache.hadoop.ozone.om.helpers.BucketLayout; import org.apache.ozone.test.tag.Unhealthy; -import org.junit.jupiter.api.AfterEach; -import org.junit.jupiter.api.BeforeEach; import org.junit.jupiter.api.Test; import org.junit.jupiter.api.TestInstance; import picocli.CommandLine; @@ -96,6 +94,10 @@ enum AllowedBucketLayouts { FILE_SYSTEM_OPTIMIZED, OBJECT_STORE } "--numDataVolume will be removed in later versions.") private int numDataVolumes = 3; + @Option(names = {"--initial-delay"}, + description = "time (in seconds) before first failure event") + private int initialDelay = 300; // seconds + @Option(names = {"-i", "--failure-interval", "--failureInterval"}, description = "time between failure events in seconds. Full name " + "--failureInterval will be removed in later versions.") @@ -120,8 +122,7 @@ enum AllowedBucketLayouts { FILE_SYSTEM_OPTIMIZED, OBJECT_STORE } private static final String OM_SERVICE_ID = "ozoneChaosTest"; private static final String SCM_SERVICE_ID = "scmChaosTest"; - @BeforeEach - void init() throws Exception { + private void init() throws Exception { OzoneConfiguration configuration = new OzoneConfiguration(); MiniOzoneChaosCluster.Builder chaosBuilder = @@ -193,23 +194,18 @@ void setNumManagers(int nOms, int numScms, boolean enableHA) { numStorageContainerManagerss = numScms; } - @AfterEach - void shutdown() { + private void shutdown() { if (loadGenerator != null) { loadGenerator.shutdownLoadGenerator(); } - IOUtils.closeQuietly(client); - - if (cluster != null) { - cluster.shutdown(); - } + IOUtils.closeQuietly(client, cluster); } public void startChaosCluster() throws Exception { try { init(); - cluster.startChaos(failureInterval, failureInterval, TimeUnit.SECONDS); + cluster.startChaos(initialDelay, failureInterval, TimeUnit.SECONDS); loadGenerator.startIO(numMinutes, TimeUnit.MINUTES); } finally { shutdown(); @@ -217,8 +213,10 @@ public void startChaosCluster() throws Exception { } @Test - public void testReadWriteWithChaosCluster() throws Exception { - cluster.startChaos(5, 10, TimeUnit.SECONDS); - loadGenerator.startIO(120, TimeUnit.SECONDS); + void test() throws Exception { + initialDelay = 5; // seconds + failureInterval = 10; // seconds + numMinutes = 2; + startChaosCluster(); } }