From 3be96d00a0dc6f35983d38899a01c9fa4af24bec Mon Sep 17 00:00:00 2001 From: Lokesh Jain Date: Wed, 19 May 2021 16:51:42 +0530 Subject: [PATCH 1/4] HDDS-5248. SCM HA: Continuous PipelineNotFoundException seen in SCM log. --- .../apache/hadoop/hdds/scm/ha/SCMHAUtils.java | 26 ++++++++++++++----- .../hadoop/ozone/TestContainerOperations.java | 21 ++++++++++++++- 2 files changed, 40 insertions(+), 7 deletions(-) diff --git a/hadoop-hdds/common/src/main/java/org/apache/hadoop/hdds/scm/ha/SCMHAUtils.java b/hadoop-hdds/common/src/main/java/org/apache/hadoop/hdds/scm/ha/SCMHAUtils.java index c03a74ffae36..381a2e856fec 100644 --- a/hadoop-hdds/common/src/main/java/org/apache/hadoop/hdds/scm/ha/SCMHAUtils.java +++ b/hadoop-hdds/common/src/main/java/org/apache/hadoop/hdds/scm/ha/SCMHAUtils.java @@ -28,6 +28,7 @@ import org.apache.hadoop.hdds.conf.OzoneConfiguration; import org.apache.hadoop.hdds.ratis.ServerNotLeaderException; import org.apache.hadoop.hdds.scm.ScmConfigKeys; +import org.apache.hadoop.hdds.scm.pipeline.PipelineNotFoundException; import org.apache.hadoop.hdds.server.ServerUtils; import org.apache.hadoop.io.retry.RetryPolicy; import org.apache.hadoop.ipc.RemoteException; @@ -65,6 +66,13 @@ public final class SCMHAUtils { .add(ResourceUnavailableException.class) .build(); + private static final List> + NON_RETRIABLE_EXCEPTION_LIST = + ImmutableList.>builder() + .add(PipelineNotFoundException.class) + .add(NonRetriableException.class) + .build(); + private SCMHAUtils() { // not used } @@ -206,13 +214,14 @@ public static Collection getSCMNodeIds( return getSCMNodeIds(configuration, scmServiceId); } - private static Throwable unwrapException(Exception e) { - IOException ioException = null; + public static Throwable unwrapException(Exception e) { Throwable cause = e.getCause(); - if (cause instanceof RemoteException) { - ioException = ((RemoteException) cause).unwrapRemoteException(); + if (e instanceof RemoteException) { + return ((RemoteException) e).unwrapRemoteException(); + } else if (cause instanceof RemoteException) { + return ((RemoteException) cause).unwrapRemoteException(); } - return ioException == null ? e : ioException; + return e; } /** @@ -231,7 +240,12 @@ public static boolean isNonRetriableException(Exception e) { */ public static boolean checkNonRetriableException(Exception e) { Throwable t = unwrapException(e); - return NonRetriableException.class.isInstance(t); + for (Class clazz : NON_RETRIABLE_EXCEPTION_LIST) { + if (clazz.isInstance(t)) { + return true; + } + } + return false; } // This will return the underlying exception after unwrapping diff --git a/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/TestContainerOperations.java b/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/TestContainerOperations.java index f1ca3d2c015f..55164688a16d 100644 --- a/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/TestContainerOperations.java +++ b/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/TestContainerOperations.java @@ -25,10 +25,13 @@ import org.apache.hadoop.hdds.scm.ScmConfigKeys; import org.apache.hadoop.hdds.scm.cli.ContainerOperationClient; import org.apache.hadoop.hdds.scm.client.ScmClient; +import org.apache.hadoop.hdds.scm.ha.SCMHAUtils; +import org.apache.hadoop.hdds.scm.pipeline.PipelineID; +import org.apache.hadoop.hdds.scm.pipeline.PipelineNotFoundException; import org.junit.AfterClass; +import org.junit.Assert; import org.junit.BeforeClass; import org.junit.Test; - import org.junit.Rule; import org.junit.rules.Timeout; import static org.junit.Assert.assertEquals; @@ -80,4 +83,20 @@ public void testCreate() throws Exception { .getContainerID()); } + /** + * A simple test to get Pipeline with {@link ContainerOperationClient}. + * @throws Exception + */ + @Test + public void testGetPipeline() throws Exception { + try { + storageClient.getPipeline(PipelineID.randomId().getProtobuf()); + Assert.fail("Get Pipeline should fail"); + } catch (Exception e) { + Assert.assertTrue( + SCMHAUtils.unwrapException(e) instanceof PipelineNotFoundException); + } + + Assert.assertFalse(storageClient.listPipelines().isEmpty()); + } } From 95ae0d9ddc82c6b735cacd5fd117ca46df4a838a Mon Sep 17 00:00:00 2001 From: Lokesh Jain Date: Thu, 20 May 2021 16:39:13 +0530 Subject: [PATCH 2/4] Make known exceptions subclass of SCMException --- .../hdds/scm/container/ContainerException.java | 14 ++++++++------ .../scm/container/ContainerNotFoundException.java | 4 ++-- .../ContainerReplicaNotFoundException.java | 4 ++-- .../hadoop/hdds/scm/exceptions/SCMException.java | 4 ++++ .../org/apache/hadoop/hdds/scm/ha/SCMHAUtils.java | 4 ++-- .../scm/pipeline/PipelineNotFoundException.java | 8 ++++---- .../pipeline/UnknownPipelineStateException.java | 8 +++++--- 7 files changed, 27 insertions(+), 19 deletions(-) diff --git a/hadoop-hdds/common/src/main/java/org/apache/hadoop/hdds/scm/container/ContainerException.java b/hadoop-hdds/common/src/main/java/org/apache/hadoop/hdds/scm/container/ContainerException.java index 9d37dfb1f335..00953df832da 100644 --- a/hadoop-hdds/common/src/main/java/org/apache/hadoop/hdds/scm/container/ContainerException.java +++ b/hadoop-hdds/common/src/main/java/org/apache/hadoop/hdds/scm/container/ContainerException.java @@ -17,20 +17,21 @@ package org.apache.hadoop.hdds.scm.container; -import java.io.IOException; +import org.apache.hadoop.hdds.scm.exceptions.SCMException; /** * Signals that ContainerException of some sort has occurred. This is parent * of all the exceptions thrown by ContainerManager. */ -public class ContainerException extends IOException { +public class ContainerException extends SCMException { /** * Constructs an {@code ContainerException} with {@code null} * as its error detail message. + * @param resultCode ResultCode for the exception */ - public ContainerException() { - super(); + public ContainerException(ResultCodes resultCode) { + super(resultCode); } /** @@ -39,8 +40,9 @@ public ContainerException() { * @param message * The detail message (which is saved for later retrieval * by the {@link #getMessage()} method) + * @param resultCode ResultCode for the exception */ - public ContainerException(String message) { - super(message); + public ContainerException(String message, ResultCodes resultCode) { + super(message, resultCode); } } diff --git a/hadoop-hdds/common/src/main/java/org/apache/hadoop/hdds/scm/container/ContainerNotFoundException.java b/hadoop-hdds/common/src/main/java/org/apache/hadoop/hdds/scm/container/ContainerNotFoundException.java index 3eebcce8403c..1f1e9cf90c56 100644 --- a/hadoop-hdds/common/src/main/java/org/apache/hadoop/hdds/scm/container/ContainerNotFoundException.java +++ b/hadoop-hdds/common/src/main/java/org/apache/hadoop/hdds/scm/container/ContainerNotFoundException.java @@ -27,7 +27,7 @@ public class ContainerNotFoundException extends ContainerException { * as its error detail message. */ public ContainerNotFoundException() { - super(); + super(ResultCodes.CONTAINER_NOT_FOUND); } /** @@ -39,6 +39,6 @@ public ContainerNotFoundException() { * by the {@link #getMessage()} method) */ public ContainerNotFoundException(String message) { - super(message); + super(message, ResultCodes.CONTAINER_NOT_FOUND); } } diff --git a/hadoop-hdds/common/src/main/java/org/apache/hadoop/hdds/scm/container/ContainerReplicaNotFoundException.java b/hadoop-hdds/common/src/main/java/org/apache/hadoop/hdds/scm/container/ContainerReplicaNotFoundException.java index fdbc18b1191e..8b3182ef8bdd 100644 --- a/hadoop-hdds/common/src/main/java/org/apache/hadoop/hdds/scm/container/ContainerReplicaNotFoundException.java +++ b/hadoop-hdds/common/src/main/java/org/apache/hadoop/hdds/scm/container/ContainerReplicaNotFoundException.java @@ -28,7 +28,7 @@ public class ContainerReplicaNotFoundException extends ContainerException { * as its error detail message. */ public ContainerReplicaNotFoundException() { - super(); + super(ResultCodes.CONTAINER_REPLICA_NOT_FOUND); } /** @@ -40,6 +40,6 @@ public ContainerReplicaNotFoundException() { * by the {@link #getMessage()} method) */ public ContainerReplicaNotFoundException(String message) { - super(message); + super(message, ResultCodes.CONTAINER_REPLICA_NOT_FOUND); } } diff --git a/hadoop-hdds/common/src/main/java/org/apache/hadoop/hdds/scm/exceptions/SCMException.java b/hadoop-hdds/common/src/main/java/org/apache/hadoop/hdds/scm/exceptions/SCMException.java index 8c1be36406d4..20185cdfbed9 100644 --- a/hadoop-hdds/common/src/main/java/org/apache/hadoop/hdds/scm/exceptions/SCMException.java +++ b/hadoop-hdds/common/src/main/java/org/apache/hadoop/hdds/scm/exceptions/SCMException.java @@ -127,5 +127,9 @@ public enum ResultCodes { FAILED_TO_INIT_LEADER_CHOOSE_POLICY, SCM_NOT_LEADER, FAILED_TO_REVOKE_CERTIFICATES, + PIPELINE_NOT_FOUND, + UNKNOWN_PIPELINE_STATE, + CONTAINER_NOT_FOUND, + CONTAINER_REPLICA_NOT_FOUND } } diff --git a/hadoop-hdds/common/src/main/java/org/apache/hadoop/hdds/scm/ha/SCMHAUtils.java b/hadoop-hdds/common/src/main/java/org/apache/hadoop/hdds/scm/ha/SCMHAUtils.java index 381a2e856fec..0d182c0ac39c 100644 --- a/hadoop-hdds/common/src/main/java/org/apache/hadoop/hdds/scm/ha/SCMHAUtils.java +++ b/hadoop-hdds/common/src/main/java/org/apache/hadoop/hdds/scm/ha/SCMHAUtils.java @@ -28,7 +28,7 @@ import org.apache.hadoop.hdds.conf.OzoneConfiguration; import org.apache.hadoop.hdds.ratis.ServerNotLeaderException; import org.apache.hadoop.hdds.scm.ScmConfigKeys; -import org.apache.hadoop.hdds.scm.pipeline.PipelineNotFoundException; +import org.apache.hadoop.hdds.scm.exceptions.SCMException; import org.apache.hadoop.hdds.server.ServerUtils; import org.apache.hadoop.io.retry.RetryPolicy; import org.apache.hadoop.ipc.RemoteException; @@ -69,7 +69,7 @@ public final class SCMHAUtils { private static final List> NON_RETRIABLE_EXCEPTION_LIST = ImmutableList.>builder() - .add(PipelineNotFoundException.class) + .add(SCMException.class) .add(NonRetriableException.class) .build(); diff --git a/hadoop-hdds/common/src/main/java/org/apache/hadoop/hdds/scm/pipeline/PipelineNotFoundException.java b/hadoop-hdds/common/src/main/java/org/apache/hadoop/hdds/scm/pipeline/PipelineNotFoundException.java index 2a89aab5288d..1aef83af12d5 100644 --- a/hadoop-hdds/common/src/main/java/org/apache/hadoop/hdds/scm/pipeline/PipelineNotFoundException.java +++ b/hadoop-hdds/common/src/main/java/org/apache/hadoop/hdds/scm/pipeline/PipelineNotFoundException.java @@ -18,18 +18,18 @@ package org.apache.hadoop.hdds.scm.pipeline; -import java.io.IOException; +import org.apache.hadoop.hdds.scm.exceptions.SCMException; /** * Signals that a pipeline is missing from PipelineManager. */ -public class PipelineNotFoundException extends IOException{ +public class PipelineNotFoundException extends SCMException { /** * Constructs an {@code PipelineNotFoundException} with {@code null} * as its error detail message. */ public PipelineNotFoundException() { - super(); + super(ResultCodes.PIPELINE_NOT_FOUND); } /** @@ -41,6 +41,6 @@ public PipelineNotFoundException() { * by the {@link #getMessage()} method) */ public PipelineNotFoundException(String message) { - super(message); + super(message, ResultCodes.PIPELINE_NOT_FOUND); } } diff --git a/hadoop-hdds/common/src/main/java/org/apache/hadoop/hdds/scm/pipeline/UnknownPipelineStateException.java b/hadoop-hdds/common/src/main/java/org/apache/hadoop/hdds/scm/pipeline/UnknownPipelineStateException.java index 7c75fc0a1397..7b679176ed07 100644 --- a/hadoop-hdds/common/src/main/java/org/apache/hadoop/hdds/scm/pipeline/UnknownPipelineStateException.java +++ b/hadoop-hdds/common/src/main/java/org/apache/hadoop/hdds/scm/pipeline/UnknownPipelineStateException.java @@ -18,18 +18,20 @@ package org.apache.hadoop.hdds.scm.pipeline; +import org.apache.hadoop.hdds.scm.exceptions.SCMException; + import java.io.IOException; /** * Signals that a pipeline state is not recognized. */ -public class UnknownPipelineStateException extends IOException { +public class UnknownPipelineStateException extends SCMException { /** * Constructs an {@code UnknownPipelineStateException} with {@code null} * as its error detail message. */ public UnknownPipelineStateException() { - super(); + super(ResultCodes.UNKNOWN_PIPELINE_STATE); } /** @@ -41,6 +43,6 @@ public UnknownPipelineStateException() { * by the {@link #getMessage()} method) */ public UnknownPipelineStateException(String message) { - super(message); + super(message, ResultCodes.UNKNOWN_PIPELINE_STATE); } } From 68f4ab049abfecca95d48e6b286b3c470b07f58b Mon Sep 17 00:00:00 2001 From: Lokesh Jain Date: Fri, 21 May 2021 10:43:24 +0530 Subject: [PATCH 3/4] Fix checkstyle issues --- .../hadoop/hdds/scm/pipeline/UnknownPipelineStateException.java | 2 -- 1 file changed, 2 deletions(-) diff --git a/hadoop-hdds/common/src/main/java/org/apache/hadoop/hdds/scm/pipeline/UnknownPipelineStateException.java b/hadoop-hdds/common/src/main/java/org/apache/hadoop/hdds/scm/pipeline/UnknownPipelineStateException.java index 7b679176ed07..bf1d75546e06 100644 --- a/hadoop-hdds/common/src/main/java/org/apache/hadoop/hdds/scm/pipeline/UnknownPipelineStateException.java +++ b/hadoop-hdds/common/src/main/java/org/apache/hadoop/hdds/scm/pipeline/UnknownPipelineStateException.java @@ -20,8 +20,6 @@ import org.apache.hadoop.hdds.scm.exceptions.SCMException; -import java.io.IOException; - /** * Signals that a pipeline state is not recognized. */ From 675beb8efa49b6c954e4b298a459b81dc78d24c5 Mon Sep 17 00:00:00 2001 From: Lokesh Jain Date: Fri, 21 May 2021 11:28:20 +0530 Subject: [PATCH 4/4] Fix test failure --- .../interface-server/src/main/proto/ScmServerProtocol.proto | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/hadoop-hdds/interface-server/src/main/proto/ScmServerProtocol.proto b/hadoop-hdds/interface-server/src/main/proto/ScmServerProtocol.proto index 23b667dce756..7f47b5f46f8d 100644 --- a/hadoop-hdds/interface-server/src/main/proto/ScmServerProtocol.proto +++ b/hadoop-hdds/interface-server/src/main/proto/ScmServerProtocol.proto @@ -124,6 +124,10 @@ enum Status { FAILED_TO_INIT_LEADER_CHOOSE_POLICY = 31; SCM_NOT_LEADER = 32; FAILED_TO_REVOKE_CERTIFICATES = 33; + PIPELINE_NOT_FOUND = 34; + UNKNOWN_PIPELINE_STATE = 35; + CONTAINER_NOT_FOUND = 36; + CONTAINER_REPLICA_NOT_FOUND = 37; } /**