From 7f91ea4150cea93c60bb1598180f4f14d7937104 Mon Sep 17 00:00:00 2001 From: Sammi Chen Date: Tue, 20 Jun 2023 23:52:34 +0800 Subject: [PATCH 01/15] HDDS-8689. Rotate Root CA and Sub CA in SCM. --- .../apache/hadoop/hdds/HddsConfigKeys.java | 6 + .../hdds/scm/exceptions/SCMException.java | 1 + .../hadoop/hdds/security/SecurityConfig.java | 44 ++ .../security/ssl/ReloadingX509KeyManager.java | 52 +- .../ssl/ReloadingX509TrustManager.java | 27 +- .../certificate/utils/CertificateCodec.java | 3 +- .../org/apache/hadoop/ozone/OzoneConsts.java | 6 +- .../src/main/resources/ozone-default.xml | 8 + .../authority/DefaultCAServer.java | 13 +- .../client/DNCertificateClient.java | 10 +- .../client/DefaultCertificateClient.java | 98 +++- .../client/SCMCertificateClient.java | 76 ++- .../hdds/security/x509/keys/KeyCodec.java | 7 +- .../client/TestDefaultCertificateClient.java | 16 +- .../src/main/proto/SCMRatisProtocol.proto | 1 + .../src/main/proto/ScmServerProtocol.proto | 1 + .../hadoop/hdds/scm/ha/HASecurityUtils.java | 100 ++-- .../hdds/scm/ha/SCMHAInvocationHandler.java | 11 +- .../hdds/scm/ha/SequenceIdGenerator.java | 45 +- .../scm/security/RootCARotationHandler.java | 60 ++ .../security/RootCARotationHandlerImpl.java | 276 +++++++++ .../scm/security/RootCARotationManager.java | 541 ++++++++++++++++-- .../scm/security/RootCARotationMetrics.java | 93 +++ .../hadoop/hdds/scm/server/SCMCertStore.java | 2 + .../scm/server/SCMClientProtocolServer.java | 8 + .../scm/server/SCMSecurityProtocolServer.java | 27 +- .../scm/server/StorageContainerManager.java | 34 +- .../security/TestRootCARotationManager.java | 53 +- .../main/compose/ozonesecure-carotation/.env | 22 + .../docker-compose.yaml | 386 +++++++++++++ .../ozonesecure-carotation/docker-config | 176 ++++++ .../ozonesecure-carotation/docker-config-scm4 | 18 + .../compose/ozonesecure-carotation/krb5.conf | 41 ++ .../compose/ozonesecure-carotation/test.sh | 79 +++ .../ozonesecure/certificate-rotation.yaml | 1 + hadoop-ozone/dist/src/main/compose/testlib.sh | 1 + .../smoketest/scmha/root-ca-rotation.robot | 35 ++ .../hadoop/ozone/TestSecureOzoneCluster.java | 18 +- .../ozoneimpl/TestOzoneContainerWithTLS.java | 2 + .../ozone/security/OMCertificateClient.java | 9 +- .../security/ReconCertificateClient.java | 11 +- 41 files changed, 2217 insertions(+), 201 deletions(-) create mode 100644 hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/security/RootCARotationHandler.java create mode 100644 hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/security/RootCARotationHandlerImpl.java create mode 100644 hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/security/RootCARotationMetrics.java create mode 100644 hadoop-ozone/dist/src/main/compose/ozonesecure-carotation/.env create mode 100644 hadoop-ozone/dist/src/main/compose/ozonesecure-carotation/docker-compose.yaml create mode 100644 hadoop-ozone/dist/src/main/compose/ozonesecure-carotation/docker-config create mode 100644 hadoop-ozone/dist/src/main/compose/ozonesecure-carotation/docker-config-scm4 create mode 100644 hadoop-ozone/dist/src/main/compose/ozonesecure-carotation/krb5.conf create mode 100755 hadoop-ozone/dist/src/main/compose/ozonesecure-carotation/test.sh create mode 100644 hadoop-ozone/dist/src/main/smoketest/scmha/root-ca-rotation.robot diff --git a/hadoop-hdds/common/src/main/java/org/apache/hadoop/hdds/HddsConfigKeys.java b/hadoop-hdds/common/src/main/java/org/apache/hadoop/hdds/HddsConfigKeys.java index 1cd0a6a47391..ac6c08867be3 100644 --- a/hadoop-hdds/common/src/main/java/org/apache/hadoop/hdds/HddsConfigKeys.java +++ b/hadoop-hdds/common/src/main/java/org/apache/hadoop/hdds/HddsConfigKeys.java @@ -207,6 +207,8 @@ public final class HddsConfigKeys { HDDS_X509_GRACE_DURATION_TOKEN_CHECKS_ENABLED_DEFAULT = true; public static final String HDDS_NEW_KEY_CERT_DIR_NAME_SUFFIX = "-next"; public static final String HDDS_BACKUP_KEY_CERT_DIR_NAME_SUFFIX = "-previous"; + public static final String HDDS_NEW_KEY_CERT_DIR_NAME_PROGRESS_SUFFIX = + "-progress"; public static final String HDDS_X509_CA_ROTATION_CHECK_INTERNAL = "hdds.x509.ca.rotation.check.interval"; public static final String HDDS_X509_CA_ROTATION_CHECK_INTERNAL_DEFAULT = @@ -216,6 +218,10 @@ public final class HddsConfigKeys { // format hh:mm:ss, representing hour, minute, and second public static final String HDDS_X509_CA_ROTATION_TIME_OF_DAY_DEFAULT = "02:00:00"; + public static final String HDDS_X509_CA_ROTATION_ACK_TIMEOUT = + "hdds.x509.ca.rotation.ack.timeout"; + public static final String HDDS_X509_CA_ROTATION_ACK_TIMEOUT_DEFAULT = + "PT15M"; public static final String HDDS_CONTAINER_REPLICATION_COMPRESSION = "hdds.container.replication.compression"; diff --git a/hadoop-hdds/common/src/main/java/org/apache/hadoop/hdds/scm/exceptions/SCMException.java b/hadoop-hdds/common/src/main/java/org/apache/hadoop/hdds/scm/exceptions/SCMException.java index 40b484988d55..1cfc28827488 100644 --- a/hadoop-hdds/common/src/main/java/org/apache/hadoop/hdds/scm/exceptions/SCMException.java +++ b/hadoop-hdds/common/src/main/java/org/apache/hadoop/hdds/scm/exceptions/SCMException.java @@ -136,5 +136,6 @@ public enum ResultCodes { INVALID_PIPELINE_STATE, DUPLICATED_PIPELINE_ID, TIMEOUT, + CA_ROTATION_IN_PROGRESS } } diff --git a/hadoop-hdds/common/src/main/java/org/apache/hadoop/hdds/security/SecurityConfig.java b/hadoop-hdds/common/src/main/java/org/apache/hadoop/hdds/security/SecurityConfig.java index 543d59348c3e..f3e747de63b7 100644 --- a/hadoop-hdds/common/src/main/java/org/apache/hadoop/hdds/security/SecurityConfig.java +++ b/hadoop-hdds/common/src/main/java/org/apache/hadoop/hdds/security/SecurityConfig.java @@ -44,6 +44,8 @@ import static org.apache.hadoop.hdds.HddsConfigKeys.HDDS_DEFAULT_KEY_ALGORITHM; import static org.apache.hadoop.hdds.HddsConfigKeys.HDDS_DEFAULT_KEY_LEN; import static org.apache.hadoop.hdds.HddsConfigKeys.HDDS_DEFAULT_SECURITY_PROVIDER; +import static org.apache.hadoop.hdds.HddsConfigKeys.HDDS_X509_CA_ROTATION_ACK_TIMEOUT; +import static org.apache.hadoop.hdds.HddsConfigKeys.HDDS_X509_CA_ROTATION_ACK_TIMEOUT_DEFAULT; import static org.apache.hadoop.hdds.HddsConfigKeys.HDDS_X509_CA_ROTATION_CHECK_INTERNAL; import static org.apache.hadoop.hdds.HddsConfigKeys.HDDS_X509_CA_ROTATION_CHECK_INTERNAL_DEFAULT; import static org.apache.hadoop.hdds.HddsConfigKeys.HDDS_X509_CA_ROTATION_TIME_OF_DAY; @@ -127,6 +129,7 @@ public class SecurityConfig { private final String caRotationTimeOfDay; private final Pattern caRotationTimeOfDayPattern = Pattern.compile("\\d{2}:\\d{2}:\\d{2}"); + private final Duration caAckTimeout; private final SslProvider grpcSSLProvider; /** @@ -218,6 +221,11 @@ public SecurityConfig(ConfigurationSource configuration) { } caRotationTimeOfDay = "1970-01-01T" + timeOfDayString; + String ackTimeString = configuration.get( + HDDS_X509_CA_ROTATION_ACK_TIMEOUT, + HDDS_X509_CA_ROTATION_ACK_TIMEOUT_DEFAULT); + caAckTimeout = Duration.parse(ackTimeString); + validateCertificateValidityConfig(); this.externalRootCaCert = configuration.get( @@ -287,12 +295,32 @@ private void validateCertificateValidityConfig() { throw new IllegalArgumentException(msg); } + if (caCheckInterval.isNegative() || caCheckInterval.isZero()) { + String msg = "Property " + HDDS_X509_CA_ROTATION_CHECK_INTERNAL + + " should not be zero or negative"; + LOG.error(msg); + throw new IllegalArgumentException(msg); + } + if (caCheckInterval.compareTo(renewalGracePeriod) >= 0) { throw new IllegalArgumentException("Property value of " + HDDS_X509_CA_ROTATION_CHECK_INTERNAL + " should be smaller than " + HDDS_X509_RENEW_GRACE_DURATION); } + if (caAckTimeout.isNegative() || caAckTimeout.isZero()) { + String msg = "Property " + HDDS_X509_CA_ROTATION_ACK_TIMEOUT + + " should not be zero or negative"; + LOG.error(msg); + throw new IllegalArgumentException(msg); + } + + if (caAckTimeout.compareTo(renewalGracePeriod) >= 0) { + throw new IllegalArgumentException("Property value of " + + HDDS_X509_CA_ROTATION_ACK_TIMEOUT + + " should be smaller than " + HDDS_X509_RENEW_GRACE_DURATION); + } + if (tokenSanityChecksEnabled && blockTokenExpiryDurationMs > renewalGracePeriod.toMillis()) { throw new IllegalArgumentException(" Certificate grace period " + @@ -396,6 +424,18 @@ public Path getCertificateLocation(String component) { return Paths.get(metadataDir, component, certificateDir); } + /** + * Returns the File path to where this component store key and certificates. + * + * @param component - Component Name - String. + * @return Path location. + */ + public Path getLocation(String component) { + Preconditions.checkNotNull(this.metadataDir, "Metadata directory can't be" + + " null. Please check configs."); + return Paths.get(metadataDir, component); + } + /** * Gets the Key Size, The default key size is 2048, since the default * algorithm used is RSA. User can change this by setting the "hdds.key.len" @@ -508,6 +548,10 @@ public String getCaRotationTimeOfDay() { return caRotationTimeOfDay; } + public Duration getCaAckTimeout() { + return caAckTimeout; + } + /** * Return true if using test certificates with authority as localhost. This * should be used only for unit test where certificates are generated by diff --git a/hadoop-hdds/common/src/main/java/org/apache/hadoop/hdds/security/ssl/ReloadingX509KeyManager.java b/hadoop-hdds/common/src/main/java/org/apache/hadoop/hdds/security/ssl/ReloadingX509KeyManager.java index 103bc462b89a..e3aba805463b 100644 --- a/hadoop-hdds/common/src/main/java/org/apache/hadoop/hdds/security/ssl/ReloadingX509KeyManager.java +++ b/hadoop-hdds/common/src/main/java/org/apache/hadoop/hdds/security/ssl/ReloadingX509KeyManager.java @@ -35,9 +35,11 @@ import java.security.PrivateKey; import java.security.cert.X509Certificate; import java.util.ArrayList; +import java.util.Arrays; import java.util.List; import java.util.Locale; import java.util.concurrent.atomic.AtomicReference; +import java.util.stream.Collectors; /** * An implementation of X509KeyManager that exposes a method, @@ -60,6 +62,11 @@ public class ReloadingX509KeyManager extends X509ExtendedKeyManager { */ static final char[] EMPTY_PASSWORD = new char[0]; private final AtomicReference keyManagerRef; + // Keep the old key managers, for currently we find that the netty + // tc-native component always query the first root certificate through + // chooseEngineClientAlias after the key manager is reloaded with a new one. + private final List oldKeyManagerRef; + /** * Current private key and cert used in keyManager. Used to detect if these * materials are changed. @@ -80,20 +87,53 @@ public ReloadingX509KeyManager(String type, CertificateClient caClient) this.type = type; keyManagerRef = new AtomicReference<>(); keyManagerRef.set(loadKeyManager(caClient)); + oldKeyManagerRef = new ArrayList<>(); } @Override public String chooseEngineClientAlias(String[] strings, Principal[] principals, SSLEngine sslEngine) { - return keyManagerRef.get() + String ret = keyManagerRef.get() .chooseEngineClientAlias(strings, principals, sslEngine); + if (ret == null && oldKeyManagerRef.size() != 0) { + for (X509ExtendedKeyManager manager: oldKeyManagerRef) { + ret = manager.chooseEngineClientAlias(strings, principals, sslEngine); + if (ret != null) { + break; + } + } + } + if (ret == null) { + LOG.info("Engine client aliases for {}, {}, {} is null", + strings == null ? "" : Arrays.stream(strings).map(Object::toString) + .collect(Collectors.joining(", ")), + principals == null ? "" : Arrays.stream(principals) + .map(Object::toString).collect(Collectors.joining(", ")), + sslEngine == null ? "" : sslEngine.toString()); + } + return ret; } @Override public String chooseEngineServerAlias(String s, Principal[] principals, SSLEngine sslEngine) { - return keyManagerRef.get() + String ret = keyManagerRef.get() .chooseEngineServerAlias(s, principals, sslEngine); + if (ret == null && oldKeyManagerRef.size() != 0) { + for (X509ExtendedKeyManager manager: oldKeyManagerRef) { + ret = manager.chooseEngineServerAlias(s, principals, sslEngine); + if (ret != null) { + break; + } + } + } + if (ret == null) { + LOG.info("Engine server aliases for {}, {}, {} is null", s, + principals == null ? "" : Arrays.stream(principals) + .map(Object::toString).collect(Collectors.joining(", ")), + sslEngine == null ? "" : sslEngine.toString()); + } + return ret; } @Override @@ -138,7 +178,8 @@ public ReloadingX509KeyManager loadFrom(CertificateClient caClient) { try { X509ExtendedKeyManager manager = loadKeyManager(caClient); if (manager != null) { - this.keyManagerRef.set(manager); + oldKeyManagerRef.add(keyManagerRef.get()); + keyManagerRef.set(manager); LOG.info("ReloadingX509KeyManager is reloaded"); } } catch (Exception ex) { @@ -170,6 +211,11 @@ private X509ExtendedKeyManager loadKeyManager(CertificateClient caClient) privateKey, EMPTY_PASSWORD, newCertList.toArray(new X509Certificate[0])); + LOG.info("New key manager is loaded with certificate chain"); + for (int i = 0; i < newCertList.size(); i++) { + LOG.info(newCertList.get(i).toString()); + } + KeyManagerFactory keyMgrFactory = KeyManagerFactory.getInstance( KeyManagerFactory.getDefaultAlgorithm()); keyMgrFactory.init(keystore, EMPTY_PASSWORD); diff --git a/hadoop-hdds/common/src/main/java/org/apache/hadoop/hdds/security/ssl/ReloadingX509TrustManager.java b/hadoop-hdds/common/src/main/java/org/apache/hadoop/hdds/security/ssl/ReloadingX509TrustManager.java index d2351b3e96ae..c64d09fcfe11 100644 --- a/hadoop-hdds/common/src/main/java/org/apache/hadoop/hdds/security/ssl/ReloadingX509TrustManager.java +++ b/hadoop-hdds/common/src/main/java/org/apache/hadoop/hdds/security/ssl/ReloadingX509TrustManager.java @@ -32,6 +32,8 @@ import java.security.KeyStoreException; import java.security.cert.CertificateException; import java.security.cert.X509Certificate; +import java.util.ArrayList; +import java.util.List; import java.util.Set; import java.util.concurrent.atomic.AtomicReference; @@ -55,7 +57,7 @@ public final class ReloadingX509TrustManager implements X509TrustManager { /** * Current Root CA cert in trustManager, to detect if certificate is changed. */ - private String currentRootCACertId = null; + private List currentRootCACertIds = new ArrayList<>(); /** * Creates a reloadable trustmanager. The trustmanager reloads itself @@ -127,23 +129,22 @@ public ReloadingX509TrustManager loadFrom(CertificateClient caClient) { X509TrustManager loadTrustManager(CertificateClient caClient) throws GeneralSecurityException, IOException { // SCM certificate client sets root CA as CA cert instead of root CA cert - X509Certificate rootCACert = caClient.getRootCACertificate() == null ? - caClient.getCACertificate() : caClient.getRootCACertificate(); + Set rootCACerts = caClient.getAllRootCaCerts().isEmpty() ? + caClient.getAllCaCerts() : caClient.getAllRootCaCerts(); - String rootCACertId = rootCACert.getSerialNumber().toString(); // Certificate keeps the same. - if (currentRootCACertId != null && - currentRootCACertId.equals(rootCACertId)) { + if (rootCACerts.size() > 0 && + currentRootCACertIds.size() == rootCACerts.size() && + !rootCACerts.stream().filter( + c -> !currentRootCACertIds.contains(c.getSerialNumber().toString())) + .findAny().isPresent()) { return null; } X509TrustManager trustManager = null; KeyStore ks = KeyStore.getInstance(type); ks.load(null, null); - Set caCertsToInsert = - caClient.getRootCACertificate() == null ? caClient.getAllCaCerts() : - caClient.getAllRootCaCerts(); - insertCertsToKeystore(caCertsToInsert, ks); + insertCertsToKeystore(rootCACerts, ks); TrustManagerFactory trustManagerFactory = TrustManagerFactory.getInstance( TrustManagerFactory.getDefaultAlgorithm()); @@ -155,15 +156,19 @@ X509TrustManager loadTrustManager(CertificateClient caClient) break; } } - currentRootCACertId = rootCACertId; + currentRootCACertIds.clear(); + rootCACerts.stream().forEach( + c -> currentRootCACertIds.add(c.getSerialNumber().toString())); return trustManager; } private void insertCertsToKeystore(Iterable certs, KeyStore ks) throws KeyStoreException { + LOG.info("New trust manager is loaded with certificates"); for (X509Certificate certToInsert : certs) { String certId = certToInsert.getSerialNumber().toString(); ks.setCertificateEntry(certId, certToInsert); + LOG.info(certToInsert.toString()); } } } diff --git a/hadoop-hdds/common/src/main/java/org/apache/hadoop/hdds/security/x509/certificate/utils/CertificateCodec.java b/hadoop-hdds/common/src/main/java/org/apache/hadoop/hdds/security/x509/certificate/utils/CertificateCodec.java index 6c7bb5389adf..c6d15ab2219f 100644 --- a/hadoop-hdds/common/src/main/java/org/apache/hadoop/hdds/security/x509/certificate/utils/CertificateCodec.java +++ b/hadoop-hdds/common/src/main/java/org/apache/hadoop/hdds/security/x509/certificate/utils/CertificateCodec.java @@ -296,7 +296,8 @@ public synchronized void writeCertificate(Path basePath, String fileName, try (FileOutputStream file = new FileOutputStream(certificateFile)) { file.write(pemEncodedCertificate.getBytes(DEFAULT_CHARSET)); } - + LOG.info("Save certificate to {}", certificateFile.getAbsolutePath()); + LOG.info("Certificate {}", pemEncodedCertificate); Files.setPosixFilePermissions(certificateFile.toPath(), permissionSet); } diff --git a/hadoop-hdds/common/src/main/java/org/apache/hadoop/ozone/OzoneConsts.java b/hadoop-hdds/common/src/main/java/org/apache/hadoop/ozone/OzoneConsts.java index f6bfcfd23705..c417062cf197 100644 --- a/hadoop-hdds/common/src/main/java/org/apache/hadoop/ozone/OzoneConsts.java +++ b/hadoop-hdds/common/src/main/java/org/apache/hadoop/ozone/OzoneConsts.java @@ -480,8 +480,10 @@ private OzoneConsts() { public static final String SCM_ROOT_CA_COMPONENT_NAME = Paths.get(SCM_CA_CERT_STORAGE_DIR, SCM_CA_PATH).toString(); - public static final String SCM_SUB_CA_PREFIX = "scm-sub@"; - public static final String SCM_ROOT_CA_PREFIX = "scm@"; + // %s to distinguish different certificates + public static final String SCM_SUB_CA = "scm-sub"; + public static final String SCM_SUB_CA_PREFIX = SCM_SUB_CA + "-%s@"; + public static final String SCM_ROOT_CA_PREFIX = "scm-%s@"; // Layout Version written into Meta Table ONLY during finalization. public static final String LAYOUT_VERSION_KEY = "#LAYOUTVERSION"; diff --git a/hadoop-hdds/common/src/main/resources/ozone-default.xml b/hadoop-hdds/common/src/main/resources/ozone-default.xml index a9a07371a1b8..484e5bfd3cd0 100644 --- a/hadoop-hdds/common/src/main/resources/ozone-default.xml +++ b/hadoop-hdds/common/src/main/resources/ozone-default.xml @@ -2260,6 +2260,14 @@ and second. + + hdds.x509.ca.rotation.ack.timeout + PT15M + OZONE, HDDS, SECURITY + Max time that SCM leader will wait for the rotation preparation acks before it believes the rotation + is failed. Default is 15 minutes. + + ozone.scm.security.handler.count.key 2 diff --git a/hadoop-hdds/framework/src/main/java/org/apache/hadoop/hdds/security/x509/certificate/authority/DefaultCAServer.java b/hadoop-hdds/framework/src/main/java/org/apache/hadoop/hdds/security/x509/certificate/authority/DefaultCAServer.java index c94408f08ee6..0187405f936b 100644 --- a/hadoop-hdds/framework/src/main/java/org/apache/hadoop/hdds/security/x509/certificate/authority/DefaultCAServer.java +++ b/hadoop-hdds/framework/src/main/java/org/apache/hadoop/hdds/security/x509/certificate/authority/DefaultCAServer.java @@ -132,6 +132,7 @@ public class DefaultCAServer implements CertificateServer { private CertificateStore store; private Lock lock; private static boolean testSecureFlag; + private BigInteger rootCertificateId; /** * Create an Instance of DefaultCAServer. @@ -141,17 +142,25 @@ public class DefaultCAServer implements CertificateServer { * @param certificateStore - A store used to persist Certificates. */ public DefaultCAServer(String subject, String clusterID, String scmID, - CertificateStore certificateStore, + CertificateStore certificateStore, BigInteger rootCertId, PKIProfile pkiProfile, String componentName) { this.subject = subject; this.clusterID = clusterID; this.scmID = scmID; this.store = certificateStore; + this.rootCertificateId = rootCertId; this.profile = pkiProfile; this.componentName = componentName; lock = new ReentrantLock(); } + public DefaultCAServer(String subject, String clusterID, String scmID, + CertificateStore certificateStore, PKIProfile pkiProfile, + String componentName) { + this(subject, clusterID, scmID, certificateStore, BigInteger.ONE, + pkiProfile, componentName); + } + @Override public void init(SecurityConfig securityConfig, CAType type) throws IOException { @@ -568,7 +577,7 @@ private void generateRootCertificate( .setClusterID(this.clusterID) .setBeginDate(beginDate) .setEndDate(endDate) - .makeCA() + .makeCA(rootCertificateId) .setConfiguration(securityConfig) .setKey(key); diff --git a/hadoop-hdds/framework/src/main/java/org/apache/hadoop/hdds/security/x509/certificate/client/DNCertificateClient.java b/hadoop-hdds/framework/src/main/java/org/apache/hadoop/hdds/security/x509/certificate/client/DNCertificateClient.java index 8c5c91320338..60853273bd37 100644 --- a/hadoop-hdds/framework/src/main/java/org/apache/hadoop/hdds/security/x509/certificate/client/DNCertificateClient.java +++ b/hadoop-hdds/framework/src/main/java/org/apache/hadoop/hdds/security/x509/certificate/client/DNCertificateClient.java @@ -99,7 +99,7 @@ public CertificateSignRequest.Builder getCSRBuilder() @Override public String signAndStoreCertificate(PKCS10CertificationRequest csr, - Path certificatePath) throws CertificateException { + Path certificatePath, boolean renew) throws CertificateException { try { // TODO: For SCM CA we should fetch certificate from multiple SCMs. SCMSecurityProtocolProtos.SCMGetCertResponseProto response = @@ -113,16 +113,14 @@ public String signAndStoreCertificate(PKCS10CertificationRequest csr, getSecurityConfig(), certificatePath); // Certs will be added to cert map after reloadAllCertificate called storeCertificate(pemEncodedCert, CAType.NONE, - certCodec, - false); + certCodec, false, !renew); storeCertificate(response.getX509CACertificate(), - CAType.SUBORDINATE, - certCodec, false); + CAType.SUBORDINATE, certCodec, false, !renew); // Store Root CA certificate. if (response.hasX509RootCACertificate()) { storeCertificate(response.getX509RootCACertificate(), - CAType.ROOT, certCodec, false); + CAType.ROOT, certCodec, false, !renew); } // Return the default certificate ID return CertificateCodec.getX509Certificate(pemEncodedCert) diff --git a/hadoop-hdds/framework/src/main/java/org/apache/hadoop/hdds/security/x509/certificate/client/DefaultCertificateClient.java b/hadoop-hdds/framework/src/main/java/org/apache/hadoop/hdds/security/x509/certificate/client/DefaultCertificateClient.java index d64cabf5c92a..2e4b599ab971 100644 --- a/hadoop-hdds/framework/src/main/java/org/apache/hadoop/hdds/security/x509/certificate/client/DefaultCertificateClient.java +++ b/hadoop-hdds/framework/src/main/java/org/apache/hadoop/hdds/security/x509/certificate/client/DefaultCertificateClient.java @@ -54,11 +54,13 @@ import java.util.concurrent.ConcurrentHashMap; import java.util.concurrent.Executors; import java.util.concurrent.ScheduledExecutorService; +import java.util.concurrent.Semaphore; import java.util.concurrent.TimeUnit; import java.util.function.Consumer; import java.util.stream.Stream; import java.util.stream.Collectors; +import com.google.common.annotations.VisibleForTesting; import com.google.common.util.concurrent.ThreadFactoryBuilder; import org.apache.commons.io.FileUtils; import org.apache.hadoop.hdds.protocolPB.SCMSecurityProtocolClientSideTranslatorPB; @@ -124,6 +126,7 @@ public abstract class DefaultCertificateClient implements CertificateClient { private Runnable shutdownCallback; private SCMSecurityProtocolClientSideTranslatorPB scmSecurityClient; private final Set notificationReceivers; + private static Semaphore semaphore = new Semaphore(1); protected DefaultCertificateClient( SecurityConfig securityConfig, @@ -550,12 +553,12 @@ public void storeCertificate(String pemEncodedCert, CertificateCodec certificateCodec = new CertificateCodec(securityConfig, component); storeCertificate(pemEncodedCert, caType, - certificateCodec, true); + certificateCodec, true, false); } public synchronized void storeCertificate(String pemEncodedCert, - CAType caType, CertificateCodec codec, boolean addToCertMap) - throws CertificateException { + CAType caType, CertificateCodec codec, boolean addToCertMap, + boolean updateCA) throws CertificateException { try { CertPath certificatePath = CertificateCodec.getCertPathFromPemEncodedString(pemEncodedCert); @@ -564,11 +567,13 @@ public synchronized void storeCertificate(String pemEncodedCert, String certName = String.format(CERT_FILE_NAME_FORMAT, caType.getFileNamePrefix() + cert.getSerialNumber().toString()); - if (caType == CAType.SUBORDINATE) { - caCertId = cert.getSerialNumber().toString(); - } - if (caType == CAType.ROOT) { - rootCaCertId = cert.getSerialNumber().toString(); + if (updateCA) { + if (caType == CAType.SUBORDINATE) { + caCertId = cert.getSerialNumber().toString(); + } + if (caType == CAType.ROOT) { + rootCaCertId = cert.getSerialNumber().toString(); + } } codec.writeCertificate(certName, @@ -894,12 +899,19 @@ public synchronized X509Certificate getRootCACertificate() { @Override public Set getAllRootCaCerts() { - return Collections.unmodifiableSet(rootCaCertificates); + Set certs = + Collections.unmodifiableSet(rootCaCertificates); + getLogger().info("{} has {} Root CA certificates", this.component, + certs.size()); + return certs; } @Override public Set getAllCaCerts() { - return Collections.unmodifiableSet(caCertificates); + Set certs = Collections.unmodifiableSet(caCertificates); + getLogger().info("{} has {} CA certificates", this.component, + certs.size()); + return certs; } @Override @@ -1043,7 +1055,7 @@ public String renewAndStoreKeyAndCertificate(boolean force) CertificateSignRequest.Builder csrBuilder = getCSRBuilder(); csrBuilder.setKey(newKeyPair); newCertSerialId = signAndStoreCertificate(csrBuilder.build(), - Paths.get(newCertPath)); + Paths.get(newCertPath), true); } catch (Exception e) { throw new CertificateException("Error while signing and storing new" + " certificates.", e, RENEW_ERROR); @@ -1183,16 +1195,20 @@ public void cleanBackupDir() { } } - synchronized void reloadKeyAndCertificate(String newCertId) { - // reset current value + public synchronized void reloadKeyAndCertificate(String newCertId) { privateKey = null; publicKey = null; certPath = null; caCertId = null; rootCaCertId = null; - updateCertSerialId(newCertId); - getLogger().info("Reset and reload key and all certificates."); + String oldCaCertId = updateCertSerialId(newCertId); + getLogger().info("Reset and reloaded key and all certificates for new " + + "certificate {}.", newCertId); + + // notify notification receivers + notificationReceivers.forEach(r -> r.notifyCertificateRenewed( + this, oldCaCertId, newCertId)); } public SecurityConfig getSecurityConfig() { @@ -1201,12 +1217,19 @@ public SecurityConfig getSecurityConfig() { private synchronized String updateCertSerialId(String newCertSerialId) { certSerialId = newCertSerialId; + getLogger().info("Certificate serial ID set to {}", certSerialId); loadAllCertificates(); return certSerialId; } - protected abstract String signAndStoreCertificate( + protected String signAndStoreCertificate( PKCS10CertificationRequest request, Path certificatePath) + throws CertificateException { + return signAndStoreCertificate(request, certificatePath, false); + } + + protected abstract String signAndStoreCertificate( + PKCS10CertificationRequest request, Path certificatePath, boolean renew) throws CertificateException; public String signAndStoreCertificate( @@ -1220,6 +1243,14 @@ public SCMSecurityProtocolClientSideTranslatorPB getScmSecureClient() return scmSecurityClient; } + public static void acquirePermit() throws InterruptedException { + semaphore.acquire(); + } + + public static void releasePermit() { + semaphore.release(); + } + public synchronized void startCertificateMonitor() { Preconditions.checkNotNull(getCertificate(), "Component certificate should not be empty"); @@ -1237,8 +1268,7 @@ public synchronized void startCertificateMonitor() { getComponentName() + "-CertificateLifetimeMonitor") .setDaemon(true).build()); } - this.executorService.scheduleAtFixedRate( - new CertificateLifetimeMonitor(this), + this.executorService.scheduleAtFixedRate(new CertificateLifetimeMonitor(), timeBeforeGracePeriod, interval, TimeUnit.MILLISECONDS); getLogger().info("CertificateLifetimeMonitor for {} is started with " + "first delay {} ms and interval {} ms.", component, @@ -1249,10 +1279,8 @@ public synchronized void startCertificateMonitor() { * Task to monitor certificate lifetime and renew the certificate if needed. */ public class CertificateLifetimeMonitor implements Runnable { - private CertificateClient certClient; - public CertificateLifetimeMonitor(CertificateClient client) { - this.certClient = client; + public CertificateLifetimeMonitor() { } @Override @@ -1264,14 +1292,21 @@ public void run() { // 2. switch on disk new keys and certificate with current ones // 3. save new certificate ID into service VERSION file // 4. refresh in memory certificate ID and reload all new certificates - synchronized (DefaultCertificateClient.class) { + try { + acquirePermit(); + } catch (InterruptedException e) { + Thread.currentThread().interrupt(); + return; + } + try { X509Certificate currentCert = getCertificate(); Duration timeLeft = timeBeforeExpiryGracePeriod(currentCert); if (timeLeft.isZero()) { String newCertId; try { - getLogger().info("Current certificate has entered the expiry" + + getLogger().info("Current certificate {} has entered the expiry" + " grace period {}. Starting renew key and certs.", + currentCert.getSerialNumber().toString(), timeLeft, securityConfig.getRenewalGracePeriod()); newCertId = renewAndStoreKeyAndCertificate(false); } catch (CertificateException e) { @@ -1297,11 +1332,22 @@ public void run() { reloadKeyAndCertificate(newCertId); // cleanup backup directory cleanBackupDir(); - // notify notification receivers - notificationReceivers.forEach(r -> r.notifyCertificateRenewed( - certClient, currentCert.getSerialNumber().toString(), newCertId)); } + } finally { + releasePermit(); } } } + + /** + * Set the CA certificate. For TEST only. + */ + @VisibleForTesting + public synchronized void setCACertificate(X509Certificate cert) + throws Exception { + caCertId = cert.getSerialNumber().toString(); + certificateMap.put(caCertId, + CertificateCodec.getCertPathFromPemEncodedString( + CertificateCodec.getPEMEncodedString(cert))); + } } diff --git a/hadoop-hdds/framework/src/main/java/org/apache/hadoop/hdds/security/x509/certificate/client/SCMCertificateClient.java b/hadoop-hdds/framework/src/main/java/org/apache/hadoop/hdds/security/x509/certificate/client/SCMCertificateClient.java index 51eb2959a6e9..ea2924d930bd 100644 --- a/hadoop-hdds/framework/src/main/java/org/apache/hadoop/hdds/security/x509/certificate/client/SCMCertificateClient.java +++ b/hadoop-hdds/framework/src/main/java/org/apache/hadoop/hdds/security/x509/certificate/client/SCMCertificateClient.java @@ -20,21 +20,30 @@ import org.apache.hadoop.hdds.protocolPB.SCMSecurityProtocolClientSideTranslatorPB; import org.apache.hadoop.hdds.security.SecurityConfig; +import org.apache.hadoop.hdds.protocol.proto.HddsProtos; +import org.apache.hadoop.hdds.protocol.proto.SCMSecurityProtocolProtos; +import org.apache.hadoop.hdds.security.x509.certificate.authority.CAType; +import org.apache.hadoop.hdds.security.x509.certificate.utils.CertificateCodec; import org.apache.hadoop.hdds.security.x509.certificate.utils.CertificateSignRequest; import org.apache.hadoop.hdds.security.x509.exception.CertificateException; +import org.apache.hadoop.hdds.utils.HddsServerUtil; import org.apache.hadoop.ozone.OzoneConsts; import org.bouncycastle.pkcs.PKCS10CertificationRequest; import org.slf4j.Logger; import org.slf4j.LoggerFactory; +import java.io.IOException; import java.nio.file.Path; import java.nio.file.Paths; import java.security.KeyPair; +import java.security.cert.X509Certificate; import static org.apache.hadoop.hdds.security.x509.certificate.client.CertificateClient.InitResponse.FAILURE; import static org.apache.hadoop.hdds.security.x509.certificate.client.CertificateClient.InitResponse.GETCERT; import static org.apache.hadoop.hdds.security.x509.certificate.client.CertificateClient.InitResponse.RECOVER; import static org.apache.hadoop.hdds.security.x509.certificate.client.CertificateClient.InitResponse.SUCCESS; +import static org.apache.hadoop.hdds.security.x509.certificate.utils.CertificateSignRequest.getEncodedString; +import static org.apache.hadoop.ozone.OzoneConsts.SCM_SUB_CA_PREFIX; /** * SCM Certificate Client which is used for generating public/private Key pair, @@ -49,12 +58,24 @@ public class SCMCertificateClient extends DefaultCertificateClient { public static final String COMPONENT_NAME = Paths.get(OzoneConsts.SCM_CA_CERT_STORAGE_DIR, OzoneConsts.SCM_SUB_CA_PATH).toString(); + private String scmId; + private String cId; + private String scmHostname; + + public SCMCertificateClient(SecurityConfig securityConfig, + SCMSecurityProtocolClientSideTranslatorPB scmClient, + String scmId, String clusterId, String scmCertId, String hostname) { + super(securityConfig, scmClient, LOG, scmCertId, + COMPONENT_NAME, null, null); + this.scmId = scmId; + this.cId = clusterId; + this.scmHostname = hostname; + } public SCMCertificateClient( SecurityConfig securityConfig, SCMSecurityProtocolClientSideTranslatorPB scmClient, - String certSerialId - ) { + String certSerialId) { super(securityConfig, scmClient, LOG, certSerialId, COMPONENT_NAME, null, null); } @@ -135,7 +156,16 @@ protected InitResponse handleCase(InitCase init) @Override public CertificateSignRequest.Builder getCSRBuilder() throws CertificateException { + String subject = String.format(SCM_SUB_CA_PREFIX, System.nanoTime()) + + scmHostname; + + LOG.info("Creating csr for SCM->hostName:{},scmId:{},clusterId:{}," + + "subject:{}", scmHostname, scmId, cId, subject); + return super.getCSRBuilder() + .setSubject(subject) + .setScmID(scmId) + .setClusterID(cId) .setDigitalEncryption(true) .setDigitalSignature(true) // Set CA to true, as this will be used to sign certs for OM/DN. @@ -151,8 +181,44 @@ public Logger getLogger() { @Override public String signAndStoreCertificate(PKCS10CertificationRequest request, - Path certPath) throws CertificateException { - throw new UnsupportedOperationException("signAndStoreCertificate of " + - " SCMCertificateClient is not supported currently"); + Path certPath, boolean renew) throws CertificateException { + try { + HddsProtos.ScmNodeDetailsProto scmNodeDetailsProto = + HddsProtos.ScmNodeDetailsProto.newBuilder() + .setClusterId(cId) + .setHostName(scmHostname) + .setScmNodeId(scmId).build(); + + // Get SCM sub CA cert. + SCMSecurityProtocolProtos.SCMGetCertResponseProto response = + getScmSecureClient().getSCMCertChain(scmNodeDetailsProto, + getEncodedString(request)); + + CertificateCodec certCodec = new CertificateCodec( + getSecurityConfig(), certPath); + String pemEncodedCert = response.getX509Certificate(); + + // Store SCM sub CA and root CA certificate. + if (response.hasX509CACertificate()) { + String pemEncodedRootCert = response.getX509CACertificate(); + storeCertificate(pemEncodedRootCert, + CAType.SUBORDINATE, certCodec, false, !renew); + storeCertificate(pemEncodedCert, CAType.NONE, certCodec, + false, !renew); + //note: this does exactly the same as store certificate + certCodec.writeCertificate(certCodec.getLocation().toAbsolutePath(), + getSecurityConfig().getCertificateFileName(), pemEncodedCert); + + X509Certificate certificate = + CertificateCodec.getX509Certificate(pemEncodedCert); + // return new scm cert serial ID. + return certificate.getSerialNumber().toString(); + } else { + throw new RuntimeException("Unable to retrieve SCM certificate chain"); + } + } catch (Throwable e) { + LOG.error("Error while fetching/storing SCM signed certificate.", e); + throw new RuntimeException(e); + } } } diff --git a/hadoop-hdds/framework/src/main/java/org/apache/hadoop/hdds/security/x509/keys/KeyCodec.java b/hadoop-hdds/framework/src/main/java/org/apache/hadoop/hdds/security/x509/keys/KeyCodec.java index c4e24783c3b8..1a3ef2d7f0d9 100644 --- a/hadoop-hdds/framework/src/main/java/org/apache/hadoop/hdds/security/x509/keys/KeyCodec.java +++ b/hadoop-hdds/framework/src/main/java/org/apache/hadoop/hdds/security/x509/keys/KeyCodec.java @@ -99,6 +99,11 @@ public KeyCodec(SecurityConfig config, Path keyDir) { this.securityConfig = config; isPosixFileSystem = KeyCodec::isPosix; this.location = keyDir; + if (!location.toFile().exists()) { + if (!location.toFile().mkdirs()) { + throw new RuntimeException("Failed to create directory " + location); + } + } } /** @@ -193,7 +198,7 @@ public void writePublicKey(PublicKey key) throws IOException { securityConfig.getPublicKeyFileName()).toFile(); if (Files.exists(publicKeyFile.toPath())) { - throw new IOException("Private key already exist."); + throw new IOException("Public key already exist."); } try (PemWriter keyWriter = new PemWriter(new diff --git a/hadoop-hdds/framework/src/test/java/org/apache/hadoop/hdds/security/x509/certificate/client/TestDefaultCertificateClient.java b/hadoop-hdds/framework/src/test/java/org/apache/hadoop/hdds/security/x509/certificate/client/TestDefaultCertificateClient.java index 2fe0bf84ea24..c0af10a3da65 100644 --- a/hadoop-hdds/framework/src/test/java/org/apache/hadoop/hdds/security/x509/certificate/client/TestDefaultCertificateClient.java +++ b/hadoop-hdds/framework/src/test/java/org/apache/hadoop/hdds/security/x509/certificate/client/TestDefaultCertificateClient.java @@ -488,6 +488,13 @@ public String signAndStoreCertificate( PKCS10CertificationRequest request, Path certificatePath) { return null; } + + @Override + public String signAndStoreCertificate( + PKCS10CertificationRequest request, Path certificatePath, + boolean renew) throws CertificateException { + return null; + } }) { InitResponse resp = client.init(); @@ -581,7 +588,7 @@ public void testRenewAndStoreKeyAndCertificate() throws Exception { newCertDir.toPath()); dnCertClient.storeCertificate(getPEMEncodedString(cert), CAType.NONE, - certCodec, false); + certCodec, false, false); // a success renew after auto cleanup new key and cert dir dnCertClient.renewAndStoreKeyAndCertificate(true); } @@ -623,6 +630,13 @@ protected String signAndStoreCertificate( PKCS10CertificationRequest request, Path certificatePath) { return ""; } + + @Override + protected String signAndStoreCertificate( + PKCS10CertificationRequest request, Path certificatePath, + boolean renew) throws CertificateException { + return null; + } }; Thread[] threads = new Thread[Thread.activeCount()]; diff --git a/hadoop-hdds/interface-server/src/main/proto/SCMRatisProtocol.proto b/hadoop-hdds/interface-server/src/main/proto/SCMRatisProtocol.proto index 4fb0737b3925..3ae9879f9404 100644 --- a/hadoop-hdds/interface-server/src/main/proto/SCMRatisProtocol.proto +++ b/hadoop-hdds/interface-server/src/main/proto/SCMRatisProtocol.proto @@ -30,6 +30,7 @@ enum RequestType { STATEFUL_SERVICE_CONFIG = 7; FINALIZE = 8; SECRET_KEY = 9; + CERT_ROTATE = 10; } message Method { diff --git a/hadoop-hdds/interface-server/src/main/proto/ScmServerProtocol.proto b/hadoop-hdds/interface-server/src/main/proto/ScmServerProtocol.proto index 098700642172..0d468ed0ab14 100644 --- a/hadoop-hdds/interface-server/src/main/proto/ScmServerProtocol.proto +++ b/hadoop-hdds/interface-server/src/main/proto/ScmServerProtocol.proto @@ -133,6 +133,7 @@ enum Status { INVALID_PIPELINE_STATE = 40; DUPLICATED_PIPELINE_ID = 41; TIMEOUT = 42; + CA_ROTATION_IN_PROGRESS = 43; } /** diff --git a/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/ha/HASecurityUtils.java b/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/ha/HASecurityUtils.java index a8cb1880ee06..370b5fd09497 100644 --- a/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/ha/HASecurityUtils.java +++ b/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/ha/HASecurityUtils.java @@ -53,8 +53,8 @@ import org.slf4j.LoggerFactory; import java.io.IOException; +import java.math.BigInteger; import java.net.InetAddress; -import java.net.InetSocketAddress; import java.security.cert.CertPath; import java.security.cert.CertificateException; import java.security.cert.X509Certificate; @@ -89,12 +89,11 @@ private HASecurityUtils() { * signed certificate and persist to local disk. * @param scmStorageConfig * @param conf - * @param scmAddress + * @param scmHostname * @throws IOException */ public static void initializeSecurity(SCMStorageConfig scmStorageConfig, - OzoneConfiguration conf, - InetSocketAddress scmAddress, boolean primaryscm) + OzoneConfiguration conf, String scmHostname, boolean primaryscm) throws IOException { LOG.info("Initializing secure StorageContainerManager."); @@ -102,8 +101,9 @@ public static void initializeSecurity(SCMStorageConfig scmStorageConfig, SCMSecurityProtocolClientSideTranslatorPB scmSecurityClient = getScmSecurityClientWithMaxRetry(conf, getCurrentUser()); try (CertificateClient certClient = - new SCMCertificateClient( - securityConfig, scmSecurityClient, scmStorageConfig.getScmId())) { + new SCMCertificateClient(securityConfig, scmSecurityClient, + scmStorageConfig.getScmId(), scmStorageConfig.getClusterID(), + scmStorageConfig.getScmCertSerialId(), scmHostname)) { InitResponse response = certClient.init(); LOG.info("Init response: {}", response); switch (response) { @@ -113,10 +113,10 @@ public static void initializeSecurity(SCMStorageConfig scmStorageConfig, case GETCERT: if (!primaryscm) { getRootCASignedSCMCert(conf, certClient, securityConfig, - scmStorageConfig, scmAddress); + scmStorageConfig, scmHostname); } else { getPrimarySCMSelfSignedCert(certClient, securityConfig, - scmStorageConfig, scmAddress); + scmStorageConfig, scmHostname); } LOG.info("Successfully stored SCM signed certificate."); break; @@ -141,21 +141,18 @@ public static void initializeSecurity(SCMStorageConfig scmStorageConfig, * client. */ private static void getRootCASignedSCMCert( - OzoneConfiguration configuration, - CertificateClient client, + OzoneConfiguration configuration, CertificateClient client, SecurityConfig securityConfig, - SCMStorageConfig scmStorageConfig, - InetSocketAddress scmAddress - ) { + SCMStorageConfig scmStorageConfig, String scmHostname) { try { // Generate CSR. PKCS10CertificationRequest csr = generateCSR(client, scmStorageConfig, - securityConfig, scmAddress); + securityConfig, scmHostname); ScmNodeDetailsProto scmNodeDetailsProto = ScmNodeDetailsProto.newBuilder() .setClusterId(scmStorageConfig.getClusterID()) - .setHostName(scmAddress.getHostName()) + .setHostName(scmHostname) .setScmNodeId(scmStorageConfig.getScmId()).build(); // Create SCM security client. @@ -198,16 +195,15 @@ private static void getRootCASignedSCMCert( */ private static void getPrimarySCMSelfSignedCert(CertificateClient client, SecurityConfig config, SCMStorageConfig scmStorageConfig, - InetSocketAddress scmAddress) { + String scmHostname) { try { - CertificateServer rootCAServer = initializeRootCertificateServer(config, null, scmStorageConfig, new DefaultCAProfile()); PKCS10CertificationRequest csr = generateCSR(client, scmStorageConfig, - config, scmAddress); + config, scmHostname); CertPath subSCMCertHolderList = rootCAServer. requestCertificate(csr, KERBEROS_TRUSTED, SCM).get(); @@ -251,47 +247,65 @@ private static void getPrimarySCMSelfSignedCert(CertificateClient client, * @param config * @param scmCertStore * @param scmStorageConfig + * @param pkiProfile + * @param component */ public static CertificateServer initializeRootCertificateServer( SecurityConfig config, CertificateStore scmCertStore, - SCMStorageConfig scmStorageConfig, PKIProfile pkiProfile) - throws IOException { - String subject = SCM_ROOT_CA_PREFIX + + SCMStorageConfig scmStorageConfig, BigInteger rootCertId, + PKIProfile pkiProfile, String component) throws IOException { + String subject = String.format(SCM_ROOT_CA_PREFIX, rootCertId) + InetAddress.getLocalHost().getHostName(); DefaultCAServer rootCAServer = new DefaultCAServer(subject, scmStorageConfig.getClusterID(), - scmStorageConfig.getScmId(), scmCertStore, pkiProfile, - SCM_ROOT_CA_COMPONENT_NAME); + scmStorageConfig.getScmId(), scmCertStore, rootCertId, pkiProfile, + component); rootCAServer.init(config, CAType.ROOT); return rootCAServer; } + /** + * This function creates/initializes a certificate server as needed. + * This function is idempotent, so calling this again and again after the + * server is initialized is not a problem. + * + * @param config + * @param scmCertStore + * @param scmStorageConfig + * @param pkiProfile + */ + public static CertificateServer initializeRootCertificateServer( + SecurityConfig config, CertificateStore scmCertStore, + SCMStorageConfig scmStorageConfig, PKIProfile pkiProfile) + throws IOException { + return initializeRootCertificateServer(config, scmCertStore, + scmStorageConfig, BigInteger.ONE, pkiProfile, + SCM_ROOT_CA_COMPONENT_NAME); + } + /** * Generate CSR to obtain SCM sub CA certificate. */ private static PKCS10CertificationRequest generateCSR( CertificateClient client, SCMStorageConfig scmStorageConfig, - SecurityConfig config, InetSocketAddress scmAddress) + SecurityConfig config, String scmHostname) throws IOException { CertificateSignRequest.Builder builder = client.getCSRBuilder(); // Get host name. - String hostname = scmAddress.getHostName(); - - String subject = SCM_SUB_CA_PREFIX + hostname; + String subject = String.format(SCM_SUB_CA_PREFIX, System.nanoTime()) + + scmHostname; - builder - .setConfiguration(config) + builder.setConfiguration(config) .setScmID(scmStorageConfig.getScmId()) .setClusterID(scmStorageConfig.getClusterID()) .setSubject(subject); - LOG.info("Creating csr for SCM->hostName:{},scmId:{},clusterId:{}," + - "subject:{}", hostname, scmStorageConfig.getScmId(), + "subject:{}", scmHostname, scmStorageConfig.getScmId(), scmStorageConfig.getClusterID(), subject); return builder.build(); @@ -336,24 +350,23 @@ public static GrpcTlsConfig createSCMRatisTLSConfig(SecurityConfig conf, } /** - * Submit SCM certs request to ratis using RaftClient. + * Submit SCM request to ratis using RaftClient. * @param raftGroup * @param tlsConfig * @param message * @return SCMRatisResponse. * @throws Exception */ - public static SCMRatisResponse submitScmCertsToRatis(RaftGroup raftGroup, + public static SCMRatisResponse submitScmRequestToRatis(RaftGroup raftGroup, GrpcTlsConfig tlsConfig, Message message) throws Exception { // TODO: GRPC TLS only for now, netty/hadoop RPC TLS support later. final SupportedRpcType rpc = SupportedRpcType.GRPC; final RaftProperties properties = RatisHelper.newRaftProperties(rpc); - // For now not making anything configurable, RaftClient is only used // in SCM for DB updates of sub-ca certs go via Ratis. - RaftClient.Builder builder = RaftClient.newBuilder() + RaftClient.Builder builder = RaftClient.newBuilder() .setRaftGroup(raftGroup) .setLeaderId(null) .setProperties(properties) @@ -370,7 +383,7 @@ public static SCMRatisResponse submitScmCertsToRatis(RaftGroup raftGroup, } private static SCMSecurityProtocolClientSideTranslatorPB - getScmSecurityClientWithFixedDuration(OzoneConfiguration conf) + getScmSecurityClientWithFixedDuration(OzoneConfiguration conf) throws IOException { // As for OM during init, we need to wait for specific duration so that // we can give response to user performed operation init in a definite @@ -391,5 +404,20 @@ public static SCMRatisResponse submitScmCertsToRatis(RaftGroup raftGroup, return new SCMSecurityProtocolClientSideTranslatorPB( new SCMSecurityProtocolFailoverProxyProvider(conf, UserGroupInformation.getCurrentUser())); + + } + + public static boolean isSelfSignedCertificate(X509Certificate cert) { + if (cert.getIssuerX500Principal().equals(cert.getSubjectX500Principal())) { + return true; + } + return false; + } + + public static boolean isCACertificate(X509Certificate cert) { + if (cert.getBasicConstraints() != -1) { + return true; + } + return false; } } diff --git a/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/ha/SCMHAInvocationHandler.java b/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/ha/SCMHAInvocationHandler.java index f87758676d89..32ad2c2adffe 100644 --- a/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/ha/SCMHAInvocationHandler.java +++ b/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/ha/SCMHAInvocationHandler.java @@ -120,14 +120,17 @@ private Object invokeRatisImpl(Method method, Object[] args) method.getName(), method.getParameterTypes(), args); // Scm Cert DB updates should use RaftClient. - // As rootCA which is primary SCM only can issues certificates to sub-CA. + // As rootCA which is primary SCM only can issue certificates to sub-CA. // In case primary is not leader SCM, still sub-ca cert DB updates should go // via ratis. So, in this special scenario we use RaftClient. + // Or rotationPrepareAck which every SCM will send out to confirm that + // sub CA rotation preparation is done. final SCMRatisResponse response; - if (method.getName().equals("storeValidCertificate") && - args[args.length - 1].equals(HddsProtos.NodeType.SCM)) { + if ((method.getName().equals("storeValidCertificate") && + args[args.length - 1].equals(HddsProtos.NodeType.SCM)) || + method.getName().equals("rotationPrepareAck")) { response = - HASecurityUtils.submitScmCertsToRatis( + HASecurityUtils.submitScmRequestToRatis( ratisHandler.getDivision().getGroup(), ratisHandler.getGrpcTlsConfig(), scmRatisRequest.encode()); diff --git a/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/ha/SequenceIdGenerator.java b/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/ha/SequenceIdGenerator.java index fba776471ace..94cbbc9297e0 100644 --- a/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/ha/SequenceIdGenerator.java +++ b/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/ha/SequenceIdGenerator.java @@ -25,6 +25,7 @@ import org.apache.hadoop.hdds.scm.container.ContainerInfo; import org.apache.hadoop.hdds.scm.metadata.Replicate; import org.apache.hadoop.hdds.scm.metadata.SCMMetadataStore; +import org.apache.hadoop.hdds.security.x509.certificate.CertInfo; import org.apache.hadoop.hdds.utils.UniqueId; import org.apache.hadoop.hdds.utils.db.Table; import org.apache.hadoop.hdds.utils.db.Table.KeyValue; @@ -34,6 +35,8 @@ import java.io.IOException; import java.lang.reflect.Proxy; +import java.math.BigInteger; +import java.security.cert.X509Certificate; import java.time.LocalDate; import java.util.HashMap; import java.util.Map; @@ -66,6 +69,7 @@ public class SequenceIdGenerator { public static final String LOCAL_ID = "localId"; public static final String DEL_TXN_ID = "delTxnId"; public static final String CONTAINER_ID = "containerId"; + public static final String ROOT_CERTIFICATE_ID = "rootCertificateId"; private static final long INVALID_SEQUENCE_ID = 0; @@ -121,7 +125,8 @@ public long getNextId(String sequenceIdName) throws SCMException { batch.nextId = prevLastId + 1; Preconditions.checkArgument(Long.MAX_VALUE - batch.lastId >= batchSize); - batch.lastId += batchSize; + batch.lastId += sequenceIdName.equals(ROOT_CERTIFICATE_ID) ? + 1 : batchSize; if (stateManager.allocateBatch(sequenceIdName, prevLastId, batch.lastId)) { @@ -364,7 +369,7 @@ public static void upgradeToSequenceId(SCMMetadataStore scmMetadataStore) long largestContainerId = 0; try (TableIterator> iterator = - scmMetadataStore.getContainerTable().iterator()) { + scmMetadataStore.getContainerTable().iterator()) { while (iterator.hasNext()) { ContainerInfo containerInfo = iterator.next().getValue(); largestContainerId = @@ -376,5 +381,41 @@ public static void upgradeToSequenceId(SCMMetadataStore scmMetadataStore) LOG.info("upgrade {} to {}", CONTAINER_ID, sequenceIdTable.get(CONTAINER_ID)); } + + // upgrade root certificate ID + if (sequenceIdTable.get(ROOT_CERTIFICATE_ID) == null) { + long largestRootCertId = BigInteger.ONE.longValueExact(); + try (TableIterator> iterator = + scmMetadataStore.getValidSCMCertsTable().iterator()) { + while (iterator.hasNext()) { + X509Certificate cert = iterator.next().getValue(); + if (HASecurityUtils.isSelfSignedCertificate(cert) && + HASecurityUtils.isCACertificate(cert)) { + largestRootCertId = + Long.max(cert.getSerialNumber().longValueExact(), + largestRootCertId); + } + } + } + + try (TableIterator> iterator = + scmMetadataStore.getRevokedCertsV2Table().iterator()) { + while (iterator.hasNext()) { + X509Certificate cert = + iterator.next().getValue().getX509Certificate(); + if (HASecurityUtils.isSelfSignedCertificate(cert) && + HASecurityUtils.isCACertificate(cert)) { + largestRootCertId = + Long.max(cert.getSerialNumber().longValueExact(), + largestRootCertId); + } + } + } + sequenceIdTable.put(ROOT_CERTIFICATE_ID, largestRootCertId); + LOG.info("upgrade {} to {}", + ROOT_CERTIFICATE_ID, sequenceIdTable.get(ROOT_CERTIFICATE_ID)); + } } } diff --git a/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/security/RootCARotationHandler.java b/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/security/RootCARotationHandler.java new file mode 100644 index 000000000000..a27424f46149 --- /dev/null +++ b/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/security/RootCARotationHandler.java @@ -0,0 +1,60 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + *

+ * http://www.apache.org/licenses/LICENSE-2.0 + *

+ * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hdds.scm.security; + +import org.apache.hadoop.hdds.scm.metadata.Replicate; + +import java.io.IOException; +import java.util.concurrent.TimeoutException; + +/** + * This interface defines APIs for sub-ca rotation instructions. + */ +public interface RootCARotationHandler { + + /** + * Notify SCM peers to do sub-ca rotation preparation and replicate + * this operation through RATIS. + * @param rootCertId the new root certificate serial ID + * @throws IOException on failure to persist configuration + */ + @Replicate + void rotationPrepare(String rootCertId) + throws IOException, TimeoutException; + + @Replicate + void rotationPrepareAck(String rootCertId, String scmCertId, String scmId) + throws IOException, TimeoutException; + + @Replicate + void rotationCommit(String rootCertId) + throws IOException, TimeoutException; + + @Replicate + void rotationCommitted(String rootCertId) + throws IOException, TimeoutException; + + int rotationPrepareAcks(); + + void resetRotationPrepareAcks(); + + void setSubCACertId(String subCACertId); + + String getSubCACertId(); +} diff --git a/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/security/RootCARotationHandlerImpl.java b/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/security/RootCARotationHandlerImpl.java new file mode 100644 index 000000000000..f8ffea5a1eed --- /dev/null +++ b/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/security/RootCARotationHandlerImpl.java @@ -0,0 +1,276 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + *

+ * http://www.apache.org/licenses/LICENSE-2.0 + *

+ * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hdds.scm.security; + +import com.google.common.base.Preconditions; +import org.apache.commons.io.FileUtils; +import org.apache.hadoop.hdds.scm.ha.SCMHAInvocationHandler; +import org.apache.hadoop.hdds.scm.ha.SCMRatisServer; +import org.apache.hadoop.hdds.scm.server.StorageContainerManager; +import org.apache.hadoop.hdds.security.SecurityConfig; +import org.apache.hadoop.hdds.security.x509.certificate.client.SCMCertificateClient; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import java.io.File; +import java.io.IOException; +import java.lang.reflect.Proxy; +import java.math.BigInteger; +import java.nio.file.Files; +import java.nio.file.StandardCopyOption; +import java.security.cert.X509Certificate; +import java.util.HashSet; +import java.util.List; +import java.util.Set; +import java.util.concurrent.TimeoutException; +import java.util.concurrent.atomic.AtomicReference; + +import static org.apache.hadoop.hdds.HddsConfigKeys.HDDS_BACKUP_KEY_CERT_DIR_NAME_SUFFIX; +import static org.apache.hadoop.hdds.HddsConfigKeys.HDDS_NEW_KEY_CERT_DIR_NAME_PROGRESS_SUFFIX; +import static org.apache.hadoop.hdds.HddsConfigKeys.HDDS_NEW_KEY_CERT_DIR_NAME_SUFFIX; +import static org.apache.hadoop.hdds.protocol.proto.SCMRatisProtocol.RequestType.CERT_ROTATE; +import static org.apache.hadoop.ozone.OzoneConsts.SCM_ROOT_CA_COMPONENT_NAME; + +/** + * Root CA Rotation Handler for ratis SCM statemachine. + */ +public class RootCARotationHandlerImpl implements RootCARotationHandler { + + public static final Logger LOG = + LoggerFactory.getLogger(RootCARotationHandlerImpl.class); + + private final StorageContainerManager scm; + private final SCMCertificateClient scmCertClient; + private final SecurityConfig secConfig; + private Set newScmCertIdSet = new HashSet<>(); + private String newCAComponent = SCM_ROOT_CA_COMPONENT_NAME + + HDDS_NEW_KEY_CERT_DIR_NAME_SUFFIX + + HDDS_NEW_KEY_CERT_DIR_NAME_PROGRESS_SUFFIX; + private final String newSubCAPath; + private final RootCARotationManager rotationManager; + private AtomicReference newSubCACertId = new AtomicReference(); + private AtomicReference newRootCACertId = new AtomicReference(); + + /** + * Constructs RootCARotationHandlerImpl with the specified arguments. + * + * @param scm the storage container manager + */ + public RootCARotationHandlerImpl(StorageContainerManager scm, + RootCARotationManager manager) { + this.scm = scm; + this.rotationManager = manager; + this.scmCertClient = (SCMCertificateClient) scm.getScmCertificateClient(); + this.secConfig = scmCertClient.getSecurityConfig(); + + this.newSubCAPath = secConfig.getLocation( + scmCertClient.getComponentName()).toString() + + HDDS_NEW_KEY_CERT_DIR_NAME_SUFFIX; + } + + @Override + public void rotationPrepare(String rootCertId) + throws IOException, TimeoutException { + LOG.info("Received rotation prepare command of root certificate {}", + rootCertId); + if (shouldSkipRootCert(rootCertId)) { + return; + } + + newRootCACertId.set(rootCertId); + newScmCertIdSet.clear(); + newSubCACertId.set(null); + rotationManager.scheduleSubCaRotationPrepareTask(rootCertId); + } + + @Override + public void rotationPrepareAck(String rootCertId, + String scmCertId, String scmId) throws IOException, TimeoutException { + LOG.info("Received rotation prepare ack of root certificate {} from scm {}", + rootCertId, scmId); + + // Only leader count the acks + if (rotationManager.isRunning()) { + if (shouldSkipRootCert(rootCertId)) { + return; + } + if (rootCertId.equals(newRootCACertId.get())) { + newScmCertIdSet.add(scmCertId); + } + } + } + + @Override + public void rotationCommit(String rootCertId) + throws IOException, TimeoutException { + LOG.info("Received rotation commit command of root certificate {}", + rootCertId); + if (shouldSkipRootCert(rootCertId)) { + return; + } + + // switch sub CA key and certs directory on disk + File currentSubCaDir = new File(secConfig.getLocation( + scmCertClient.getComponentName()).toString()); + File backupSubCaDir = new File(secConfig.getLocation( + scmCertClient.getComponentName() + + HDDS_BACKUP_KEY_CERT_DIR_NAME_SUFFIX).toString()); + File newSubCaDir = new File(newSubCAPath); + + try { + // move current -> backup + Files.move(currentSubCaDir.toPath(), backupSubCaDir.toPath(), + StandardCopyOption.ATOMIC_MOVE, StandardCopyOption.REPLACE_EXISTING); + } catch (IOException e) { + LOG.error("Failed to move {} to {}", currentSubCaDir, backupSubCaDir, e); + String message = "Terminate SCM, encounter IO exception(" + + e.getMessage() + ") when move " + currentSubCaDir + " to " + + backupSubCaDir; + scm.shutDown(message); + } + + try { + // move new -> current + Files.move(newSubCaDir.toPath(), currentSubCaDir.toPath(), + StandardCopyOption.ATOMIC_MOVE, StandardCopyOption.REPLACE_EXISTING); + } catch (IOException e) { + LOG.error("Failed to move {} to {}", newSubCaDir, currentSubCaDir, e); + String message = "Terminate SCM, encounter IO exception(" + + e.getMessage() + ") when move " + newSubCaDir + " to " + + currentSubCaDir; + scm.shutDown(message); + } + + try { + String certId = newSubCACertId.get(); + LOG.info("Persistent new scm certificate {}", certId); + scm.getScmStorageConfig().setScmCertSerialId(certId); + scm.getScmStorageConfig().persistCurrentState(); + } catch (IOException e) { + LOG.error("Failed to persist new SCM certificate ID", e); + String message = "Terminate SCM, encounter IO exception(" + + e.getMessage() + ") when persist new SCM certificate ID"; + scm.shutDown(message); + } + } + + @Override + public void rotationCommitted(String rootCertId) + throws IOException, TimeoutException { + LOG.info("Received rotation committed command of root certificate {}", + rootCertId); + if (shouldSkipRootCert(rootCertId)) { + return; + } + + // turn on new root CA certificate and sub CA certificate + scmCertClient.reloadKeyAndCertificate(newSubCACertId.get()); + + // cleanup backup directory + File backupSubCaDir = new File(secConfig.getLocation( + scmCertClient.getComponentName() + + HDDS_BACKUP_KEY_CERT_DIR_NAME_SUFFIX).toString()); + try { + FileUtils.deleteDirectory(backupSubCaDir); + } catch (IOException e) { + LOG.error("Failed to delete backup dir {}", backupSubCaDir, e); + } + + // release lock + rotationManager.checkAndReleaseLock(); + + // reset state + newSubCACertId.set(null); + } + + @Override + public int rotationPrepareAcks() { + return newScmCertIdSet.size(); + } + + @Override + public void resetRotationPrepareAcks() { + newScmCertIdSet.clear(); + } + + @Override + public void setSubCACertId(String subCACertId) { + newSubCACertId.set(subCACertId); + LOG.info("Scm sub CA new certificate is {}", subCACertId); + } + + public String getSubCACertId() { + return newSubCACertId.get(); + } + + private boolean shouldSkipRootCert(String newRootCertId) throws IOException { + List scmCertChain = scmCertClient.getTrustChain(); + Preconditions.checkArgument(scmCertChain.size() > 1); + X509Certificate rootCert = scmCertChain.get(scmCertChain.size() - 1); + if (rootCert.getSerialNumber().compareTo(new BigInteger(newRootCertId)) + >= 0) { + // usually this will happen when reapply RAFT log during SCM start + LOG.info("Sub CA certificate {} is already signed by root " + + "certificate {} or a newer root certificate.", + scmCertChain.get(0).getSerialNumber().toString(), newRootCertId); + return true; + } + return false; + } + + /** + * Builder for RootCARotationHandlerImpl. + */ + public static class Builder { + private StorageContainerManager scm; + private SCMRatisServer ratisServer; + private RootCARotationManager rootCARotationManager; + + public Builder setRatisServer( + final SCMRatisServer scmRatisServer) { + this.ratisServer = scmRatisServer; + return this; + } + + public Builder setStorageContainerManager( + final StorageContainerManager storageContainerManager) { + scm = storageContainerManager; + return this; + } + + public Builder setRootCARotationManager( + final RootCARotationManager manager) { + rootCARotationManager = manager; + return this; + } + + public RootCARotationHandler build() { + final RootCARotationHandler impl = + new RootCARotationHandlerImpl(scm, rootCARotationManager); + + final SCMHAInvocationHandler invocationHandler + = new SCMHAInvocationHandler(CERT_ROTATE, impl, ratisServer); + + return (RootCARotationHandler) Proxy.newProxyInstance( + SCMHAInvocationHandler.class.getClassLoader(), + new Class[]{RootCARotationHandler.class}, + invocationHandler); + } + } +} diff --git a/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/security/RootCARotationManager.java b/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/security/RootCARotationManager.java index bbd883cb6053..883928ecfada 100644 --- a/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/security/RootCARotationManager.java +++ b/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/security/RootCARotationManager.java @@ -18,17 +18,41 @@ package org.apache.hadoop.hdds.scm.security; +import com.google.common.annotations.VisibleForTesting; +import com.google.common.base.Preconditions; import com.google.common.util.concurrent.ThreadFactoryBuilder; +import org.apache.commons.io.FileUtils; import org.apache.hadoop.hdds.conf.OzoneConfiguration; +import org.apache.hadoop.hdds.protocol.proto.HddsProtos; +import org.apache.hadoop.hdds.scm.ha.HASecurityUtils; import org.apache.hadoop.hdds.scm.ha.SCMContext; import org.apache.hadoop.hdds.scm.ha.SCMService; import org.apache.hadoop.hdds.scm.ha.SCMServiceException; +import org.apache.hadoop.hdds.scm.ha.SequenceIdGenerator; +import org.apache.hadoop.hdds.scm.server.SCMStorageConfig; import org.apache.hadoop.hdds.scm.server.StorageContainerManager; import org.apache.hadoop.hdds.security.SecurityConfig; -import org.apache.hadoop.hdds.security.x509.certificate.client.CertificateClient; +import org.apache.hadoop.hdds.security.x509.certificate.authority.CertificateServer; +import org.apache.hadoop.hdds.security.x509.certificate.authority.profile.DefaultCAProfile; +import org.apache.hadoop.hdds.security.x509.certificate.client.DefaultCertificateClient; +import org.apache.hadoop.hdds.security.x509.certificate.client.SCMCertificateClient; +import org.apache.hadoop.hdds.security.x509.certificate.utils.CertificateCodec; +import org.apache.hadoop.hdds.security.x509.certificate.utils.CertificateSignRequest; +import org.apache.hadoop.hdds.security.x509.keys.HDDSKeyGenerator; +import org.apache.hadoop.hdds.security.x509.keys.KeyCodec; +import org.bouncycastle.cert.X509CertificateHolder; import org.slf4j.Logger; import org.slf4j.LoggerFactory; +import java.io.File; +import java.io.IOException; +import java.math.BigInteger; +import java.nio.file.Files; +import java.nio.file.Path; +import java.nio.file.Paths; +import java.nio.file.StandardCopyOption; +import java.security.KeyPair; +import java.security.cert.CertificateException; import java.security.cert.X509Certificate; import java.time.Duration; import java.time.LocalDateTime; @@ -36,34 +60,77 @@ import java.util.Date; import java.util.concurrent.Executors; import java.util.concurrent.ScheduledExecutorService; +import java.util.concurrent.ScheduledFuture; import java.util.concurrent.TimeUnit; +import java.util.concurrent.TimeoutException; import java.util.concurrent.atomic.AtomicBoolean; +import java.util.concurrent.atomic.AtomicReference; + +import static org.apache.hadoop.hdds.HddsConfigKeys.HDDS_NEW_KEY_CERT_DIR_NAME_PROGRESS_SUFFIX; +import static org.apache.hadoop.hdds.HddsConfigKeys.HDDS_NEW_KEY_CERT_DIR_NAME_SUFFIX; +import static org.apache.hadoop.hdds.HddsConfigKeys.HDDS_X509_DIR_NAME_DEFAULT; +import static org.apache.hadoop.hdds.scm.ha.SequenceIdGenerator.ROOT_CERTIFICATE_ID; +import static org.apache.hadoop.hdds.security.x509.certificate.authority.CertificateStore.CertType.VALID_CERTS; +import static org.apache.hadoop.ozone.OzoneConsts.SCM_ROOT_CA_COMPONENT_NAME; /** - * Root CA Rotation Manager is a service in SCM to control the CA rotation. + * Root CA Rotation Service is a service in SCM to control the CA rotation. */ public class RootCARotationManager implements SCMService { public static final Logger LOG = LoggerFactory.getLogger(RootCARotationManager.class); - private StorageContainerManager scm; + private final StorageContainerManager scm; + private final OzoneConfiguration ozoneConf; + private final SecurityConfig secConf; private final SCMContext scmContext; - private OzoneConfiguration ozoneConf; - private SecurityConfig secConf; - private ScheduledExecutorService executorService; - private Duration checkInterval; - private Duration renewalGracePeriod; - private Date timeOfDay; - private CertificateClient scmCertClient; - private AtomicBoolean isRunning = new AtomicBoolean(false); - private AtomicBoolean isScheduled = new AtomicBoolean(false); - private String threadName = this.getClass().getSimpleName(); + private final ScheduledExecutorService executorService; + private final Duration checkInterval; + private final Duration renewalGracePeriod; + private final Date timeOfDay; + private final Duration ackTimeout; + private final SCMCertificateClient scmCertClient; + private final AtomicBoolean isRunning = new AtomicBoolean(false); + private final AtomicBoolean isProcessing = new AtomicBoolean(false); + private final AtomicReference processStartTime = + new AtomicReference<>(); + private final String threadName = this.getClass().getSimpleName(); + private final String newCAComponent = SCM_ROOT_CA_COMPONENT_NAME + + HDDS_NEW_KEY_CERT_DIR_NAME_SUFFIX + + HDDS_NEW_KEY_CERT_DIR_NAME_PROGRESS_SUFFIX; + + private RootCARotationHandler handler; + private final SequenceIdGenerator sequenceIdGen; + private ScheduledFuture rotationTask; + private ScheduledFuture waitAckTask; + private ScheduledFuture waitAckTimeoutTask; + private ScheduledFuture releaseLockOnTimeoutTask; + private final RootCARotationMetrics metrics; /** * Constructs RootCARotationManager with the specified arguments. * * @param scm the storage container manager + * + * (1) (3)(4) + * ---------------------------> + * (2) scm2(Follower) + * (1) (3)(4) <--------------------------- + * ------- | + * | \ | + * -----> scm1(Leader) + * \ (2) | \ + * -------> \ (1) (3)(4) + * ---------------------------> + * (2) scm3(Follower) + * <--------------------------- + * + * + * (1) Rotation Prepare + * (2) Rotation Prepare Ack + * (3) Rotation Commit + * (4) Rotation Committed */ public RootCARotationManager(StorageContainerManager scm) { this.scm = scm; @@ -72,16 +139,24 @@ public RootCARotationManager(StorageContainerManager scm) { this.scmContext = scm.getScmContext(); checkInterval = secConf.getCaCheckInterval(); + ackTimeout = secConf.getCaAckTimeout(); + renewalGracePeriod = secConf.getRenewalGracePeriod(); timeOfDay = Date.from(LocalDateTime.parse(secConf.getCaRotationTimeOfDay()) .atZone(ZoneId.systemDefault()).toInstant()); - renewalGracePeriod = secConf.getRenewalGracePeriod(); executorService = Executors.newScheduledThreadPool(1, new ThreadFactoryBuilder().setNameFormat(threadName) .setDaemon(true).build()); - scmCertClient = scm.getScmCertificateClient(); + scmCertClient = (SCMCertificateClient) scm.getScmCertificateClient(); + sequenceIdGen = scm.getSequenceIdGen(); + handler = new RootCARotationHandlerImpl.Builder() + .setRatisServer(scm.getScmHAManager().getRatisServer()) + .setStorageContainerManager(scm) + .setRootCARotationManager(this) + .build(); scm.getSCMServiceManager().register(this); + metrics = RootCARotationMetrics.create(); } /** @@ -96,6 +171,22 @@ public void notifyStatusChanged() { if (!scmContext.isLeader() || scmContext.isInSafeMode()) { if (isRunning.compareAndSet(true, false)) { LOG.info("notifyStatusChanged: disable monitor task."); + if (rotationTask != null) { + rotationTask.cancel(true); + } + if (waitAckTask != null) { + waitAckTask.cancel(true); + } + if (waitAckTimeoutTask != null) { + waitAckTask.cancel(true); + } + if (releaseLockOnTimeoutTask != null) { + if (releaseLockOnTimeoutTask.cancel(true)) { + releaseLock(); + } + } + isProcessing.set(false); + processStartTime.set(null); } return; } @@ -130,8 +221,8 @@ public String getServiceName() { @Override public void start() throws SCMServiceException { executorService.scheduleAtFixedRate( - new MonitorTask(scmCertClient), 0, checkInterval.toMillis(), - TimeUnit.MILLISECONDS); + new MonitorTask(scmCertClient, scm.getScmStorageConfig()), + 0, checkInterval.toMillis(), TimeUnit.MILLISECONDS); LOG.info("Monitor task for root certificate {} is started with " + "interval {}.", scmCertClient.getCACertificate().getSerialNumber(), checkInterval); @@ -141,56 +232,98 @@ public boolean isRunning() { return isRunning.get(); } + public void scheduleSubCaRotationPrepareTask(String rootCertId) { + executorService.schedule(new SubCARotationPrepareTask(rootCertId), 0, + TimeUnit.MILLISECONDS); + } + + public void acquireLock() throws InterruptedException { + DefaultCertificateClient.acquirePermit(); + } + + public void releaseLock() { + DefaultCertificateClient.releasePermit(); + } + + public void checkAndReleaseLock() { + if (releaseLockOnTimeoutTask != null) { + if (releaseLockOnTimeoutTask.cancel(true)) { + releaseLock(); + } + } else { + releaseLock(); + } + } + + public boolean isRotationInProgress() { + return isProcessing.get(); + } + /** * Task to monitor certificate lifetime and start rotation if needed. */ public class MonitorTask implements Runnable { - private CertificateClient certClient; + private SCMCertificateClient certClient; + private SCMStorageConfig scmStorageConfig; - public MonitorTask(CertificateClient client) { + public MonitorTask(SCMCertificateClient client, + SCMStorageConfig storageConfig) { this.certClient = client; + this.scmStorageConfig = storageConfig; } @Override public void run() { Thread.currentThread().setName(threadName + (isRunning() ? "-Active" : "-Inactive")); - if (!isRunning.get() || isScheduled.get()) { + if (!isRunning.get()) { return; } // Lock to protect the root CA certificate rotation process, // to make sure there is only one task is ongoing at one time. synchronized (RootCARotationManager.class) { - X509Certificate rootCACert = certClient.getCACertificate(); - Duration timeLeft = timeBefore2ExpiryGracePeriod(rootCACert); - if (timeLeft.isZero()) { - LOG.info("Root certificate {} has entered the 2 * expiry" + - " grace period({}).", rootCACert.getSerialNumber().toString(), - renewalGracePeriod); - // schedule root CA rotation task - LocalDateTime now = LocalDateTime.now(); - LocalDateTime timeToSchedule = LocalDateTime.of( - now.getYear(), now.getMonthValue(), now.getDayOfMonth(), - timeOfDay.getHours(), timeOfDay.getMinutes(), - timeOfDay.getSeconds()); - if (timeToSchedule.isBefore(now)) { - timeToSchedule = timeToSchedule.plusDays(1); - } - long delay = Duration.between(now, timeToSchedule).toMillis(); - if (timeToSchedule.isAfter(rootCACert.getNotAfter().toInstant() - .atZone(ZoneId.systemDefault()).toLocalDateTime())) { - LOG.info("Configured rotation time {} is after root" + - " certificate {} end time {}. Start the rotation immediately.", - timeToSchedule, rootCACert.getSerialNumber().toString(), - rootCACert.getNotAfter()); - delay = 0; - } + if (isProcessing.get()) { + LOG.info("Root certificate rotation task is already running."); + return; + } + try { + X509Certificate rootCACert = certClient.getCACertificate(); + Duration timeLeft = timeBefore2ExpiryGracePeriod(rootCACert); + if (timeLeft.isZero()) { + LOG.info("Root certificate {} has entered the 2 * expiry" + + " grace period({}).", + rootCACert.getSerialNumber().toString(), renewalGracePeriod); + // schedule root CA rotation task + LocalDateTime now = LocalDateTime.now(); + LocalDateTime timeToSchedule = LocalDateTime.of( + now.getYear(), now.getMonthValue(), now.getDayOfMonth(), + timeOfDay.getHours(), timeOfDay.getMinutes(), + timeOfDay.getSeconds()); + if (timeToSchedule.isBefore(now)) { + timeToSchedule = timeToSchedule.plusDays(1); + } + long delay = Duration.between(now, timeToSchedule).toMillis(); + if (timeToSchedule.isAfter(rootCACert.getNotAfter().toInstant() + .atZone(ZoneId.systemDefault()).toLocalDateTime())) { + LOG.info("Configured rotation time {} is after root" + + " certificate {} end time {}. Start the rotation " + + "immediately.", timeToSchedule, + rootCACert.getSerialNumber().toString(), + rootCACert.getNotAfter()); + delay = 0; + } - executorService.schedule(new RotationTask(certClient), delay, - TimeUnit.MILLISECONDS); - isScheduled.set(true); - LOG.info("Root certificate {} rotation task is scheduled with {} ms " - + "delay", rootCACert.getSerialNumber().toString(), delay); + rotationTask = executorService.schedule( + new RotationTask(certClient, scmStorageConfig), delay, + TimeUnit.MILLISECONDS); + isProcessing.set(true); + metrics.incrTotalRotationNum(); + LOG.info("Root certificate {} rotation task is scheduled with {} ms" + + " delay", rootCACert.getSerialNumber().toString(), delay); + } + } catch (Throwable e) { + LOG.error("Error while scheduling root CA rotation task", e); + scm.shutDown("Error while scheduling root CA rotation task"); } } } @@ -200,16 +333,20 @@ public void run() { * Task to rotate root certificate. */ public class RotationTask implements Runnable { - private CertificateClient certClient; + private SCMCertificateClient certClient; + private SCMStorageConfig scmStorageConfig; - public RotationTask(CertificateClient client) { + public RotationTask(SCMCertificateClient client, + SCMStorageConfig storageConfig) { this.certClient = client; + this.scmStorageConfig = storageConfig; } @Override public void run() { if (!isRunning.get()) { - isScheduled.set(false); + isProcessing.set(false); + processStartTime.set(null); return; } // Lock to protect the root CA certificate rotation process, @@ -218,25 +355,118 @@ public void run() { // 1. generate new root CA keys and certificate, persist to disk // 2. start new Root CA server // 3. send scm Sub-CA rotation preparation request through RATIS - // 4. send scm Sub-CA rotation commit request through RATIS - // 5. send scm Sub-CA rotation finish request through RATIS + // 4. wait for all SCM to ack + // 5. send scm Sub-CA rotation commit request through RATIS + // 6. send scm Sub-CA rotation finish request through RATIS synchronized (RootCARotationManager.class) { X509Certificate rootCACert = certClient.getCACertificate(); Duration timeLeft = timeBefore2ExpiryGracePeriod(rootCACert); if (timeLeft.isZero()) { LOG.info("Root certificate {} rotation is started.", rootCACert.getSerialNumber().toString()); - // TODO: start the root CA rotation process + processStartTime.set(System.nanoTime()); + // generate new root key pair and persist new root certificate + CertificateServer newRootCAServer = null; + BigInteger newId = BigInteger.ONE; + try { + newId = new BigInteger(String.valueOf( + sequenceIdGen.getNextId(ROOT_CERTIFICATE_ID))); + newRootCAServer = + HASecurityUtils.initializeRootCertificateServer(secConf, + scm.getCertificateStore(), scmStorageConfig, newId, + new DefaultCAProfile(), newCAComponent); + } catch (Throwable e) { + LOG.error("Error while generating new root CA certificate " + + "under {}", newCAComponent, e); + String message = "Terminate SCM, encounter exception(" + + e.getMessage() + ") when generating new root CA certificate " + + "under " + newCAComponent; + scm.shutDown(message); + } + + String newRootCertId = ""; + X509CertificateHolder newRootCertificate; + try { + newRootCertificate = newRootCAServer.getCACertificate(); + newRootCertId = newRootCertificate.getSerialNumber().toString(); + Preconditions.checkState(newRootCertId.equals(newId.toString()), + "Root certificate doesn't match, " + + "expected:" + newId + ", fetched:" + newRootCertId); + scm.getSecurityProtocolServer() + .setRootCertificateServer(newRootCAServer); + if (isRunning()) { + handler.rotationPrepare(newRootCertId); + LOG.info("Send out sub CA rotation prepare request for new " + + "root certificate {}", newRootCertId); + } else { + LOG.info("SCM is not leader anymore. Delete the in-progress " + + "root CA directory"); + cleanupAndStop("SCM is not leader anymore"); + return; + } + } catch (Exception e) { + LOG.error("Error while sending rotation prepare request", e); + cleanupAndStop("Error while sending rotation prepare request"); + return; + } + + // save root certificate to certStore + try { + if (scm.getCertificateStore().getCertificateByID( + newRootCertificate.getSerialNumber(), VALID_CERTS) == null) { + LOG.info("Persist root certificate {} to cert store", + newRootCertId); + scm.getCertificateStore().storeValidCertificate( + newRootCertificate.getSerialNumber(), + CertificateCodec.getX509Certificate(newRootCertificate), + HddsProtos.NodeType.SCM); + } + } catch (CertificateException | IOException | TimeoutException e) { + LOG.error("Failed to save root certificate {} to cert store", + newRootCertId); + scm.shutDown("Failed to save root certificate to cert store"); + } + + // schedule task to wait for prepare acks + waitAckTask = executorService.scheduleAtFixedRate( + new WaitSubCARotationPrepareAckTask(newRootCertificate), + 1, 1, TimeUnit.SECONDS); + waitAckTimeoutTask = executorService.schedule(() -> { + // No enough acks are received + waitAckTask.cancel(false); + String msg = "Failed to receive all acks of rotation prepare" + + " after " + ackTimeout + ", received " + + handler.rotationPrepareAcks() + " acks"; + cleanupAndStop(msg); + }, ackTimeout.toMillis(), TimeUnit.MILLISECONDS); } else { LOG.warn("Root certificate {} hasn't entered the 2 * expiry" + " grace period {}. Skip root certificate rotation this time.", rootCACert.getSerialNumber().toString(), renewalGracePeriod); + isProcessing.set(false); + processStartTime.set(null); } } - isScheduled.set(false); } } + private void cleanupAndStop(String reason) { + scm.getSecurityProtocolServer() + .setRootCertificateServer(null); + try { + FileUtils.deleteDirectory(new File(scmCertClient.getSecurityConfig() + .getLocation(newCAComponent).toString())); + LOG.info("In-progress root CA directory {} is deleted for '{}'", + scmCertClient.getSecurityConfig().getLocation(newCAComponent), + reason); + } catch (IOException ex) { + LOG.error("Error when deleting in-progress root CA directory {} for {}", + scmCertClient.getSecurityConfig().getLocation(newCAComponent), reason, + ex); + } + isProcessing.set(false); + processStartTime.set(null); + } /** * Calculate time before root certificate will enter 2 * expiry grace period. * @return Duration, time before certificate enters the 2 * grace @@ -255,11 +485,205 @@ public Duration timeBefore2ExpiryGracePeriod(X509Certificate certificate) { } } + /** + * Task to generate sub-ca key and certificate. + */ + public class SubCARotationPrepareTask implements Runnable { + private String rootCACertId; + + public SubCARotationPrepareTask(String newRootCertId) { + this.rootCACertId = newRootCertId; + } + + @Override + public void run() { + // Lock to protect the sub CA certificate rotation preparation process, + // to make sure there is only one task is ongoing at one time. + // Sub CA rotation preparation steps: + // 1. generate new sub CA keys + // 2. send CSR to leader SCM + // 3. wait CSR response and persist the certificate to disk + try { + acquireLock(); + } catch (InterruptedException e) { + Thread.currentThread().interrupt(); + return; + } + try { + LOG.info("SubCARotationPrepareTask[rootCertId = {}] - started.", + rootCACertId); + + SecurityConfig securityConfig = + scmCertClient.getSecurityConfig(); + String progressComponent = SCMCertificateClient.COMPONENT_NAME + + HDDS_NEW_KEY_CERT_DIR_NAME_SUFFIX + + HDDS_NEW_KEY_CERT_DIR_NAME_PROGRESS_SUFFIX; + final String newSubCAProgressPath = + securityConfig.getLocation(progressComponent).toString(); + final String newSubCAPath = securityConfig.getLocation( + SCMCertificateClient.COMPONENT_NAME + + HDDS_NEW_KEY_CERT_DIR_NAME_SUFFIX).toString(); + + File newProgressDir = new File(newSubCAProgressPath); + File newDir = new File(newSubCAPath); + try { + FileUtils.deleteDirectory(newProgressDir); + FileUtils.deleteDirectory(newDir); + Files.createDirectories(newProgressDir.toPath()); + } catch (IOException e) { + LOG.error("Failed to delete and create {}, or delete {}", + newProgressDir, newDir, e); + String message = "Terminate SCM, encounter IO exception(" + + e.getMessage() + ") when deleting and create directory"; + scm.shutDown(message); + } + + // Generate key + Path keyDir = securityConfig.getKeyLocation(progressComponent); + KeyCodec keyCodec = new KeyCodec(securityConfig, keyDir); + KeyPair newKeyPair = null; + try { + HDDSKeyGenerator keyGenerator = new HDDSKeyGenerator(securityConfig); + newKeyPair = keyGenerator.generateKey(); + keyCodec.writePublicKey(newKeyPair.getPublic()); + keyCodec.writePrivateKey(newKeyPair.getPrivate()); + LOG.info("SubCARotationPrepareTask[rootCertId = {}] - " + + "scm key generated.", rootCACertId); + } catch (Exception e) { + LOG.error("Failed to generate key under {}", newProgressDir, e); + String message = "Terminate SCM, encounter exception(" + + e.getMessage() + ") when generating new key under " + + newProgressDir; + scm.shutDown(message); + } + + // Get certificate signed + String newCertSerialId = ""; + try { + CertificateSignRequest.Builder csrBuilder = + scmCertClient.getCSRBuilder(); + csrBuilder.setKey(newKeyPair); + newCertSerialId = scmCertClient.signAndStoreCertificate( + csrBuilder.build(), + Paths.get(newSubCAProgressPath, HDDS_X509_DIR_NAME_DEFAULT), + true); + LOG.info("SubCARotationPrepareTask[rootCertId = {}] - " + + "scm certificate signed.", rootCACertId); + } catch (Exception e) { + LOG.error("Failed to generate certificate under {}", + newProgressDir, e); + String message = "Terminate SCM, encounter exception(" + + e.getMessage() + ") when generating new certificate " + + newProgressDir; + scm.shutDown(message); + } + + // move dir from *-next-progress to *-next + try { + Files.move(newProgressDir.toPath(), newDir.toPath(), + StandardCopyOption.ATOMIC_MOVE, + StandardCopyOption.REPLACE_EXISTING); + } catch (IOException e) { + LOG.error("Failed to move {} to {}", + newSubCAProgressPath, newSubCAPath, e); + String message = "Terminate SCM, encounter exception(" + + e.getMessage() + ") when moving " + newSubCAProgressPath + + " to " + newSubCAPath; + scm.shutDown(message); + } + + // Send ack to rotationPrepare request + try { + handler.rotationPrepareAck(rootCACertId, newCertSerialId, + scm.getScmId()); + LOG.info("SubCARotationPrepareTask[rootCertId = {}] - " + + "rotation prepare ack sent out, new scm certificate {}", + rootCACertId, newCertSerialId); + } catch (Exception e) { + LOG.error("Failed to send ack to rotationPrepare request", e); + String message = "Terminate SCM, encounter exception(" + + e.getMessage() + ") when sending out rotationPrepare ack"; + scm.shutDown(message); + } + + handler.setSubCACertId(newCertSerialId); + + releaseLockOnTimeoutTask = executorService.schedule(() -> { + // If no rotation commit request received after rotation prepare + LOG.warn("Failed to have enough rotation acks from SCM. This " + + " time root rotation {} is failed. Release the lock.", + rootCACertId); + releaseLock(); + }, ackTimeout.toMillis(), TimeUnit.MILLISECONDS); + + } catch (Throwable e) { + LOG.error("Unexpected error happen", e); + scm.shutDown("Unexpected error happen, " + e.getMessage()); + } + } + } + + /** + * Task to wait the all acks of prepare request. + */ + public class WaitSubCARotationPrepareAckTask implements Runnable { + private String rootCACertId; + + public WaitSubCARotationPrepareAckTask( + X509CertificateHolder rootCertHolder) { + this.rootCACertId = rootCertHolder.getSerialNumber().toString(); + } + + @Override + public void run() { + if (!isRunning()) { + LOG.info("SCM is not leader anymore. Delete the in-progress " + + "root CA directory"); + cleanupAndStop("SCM is not leader anymore"); + return; + } + synchronized (RootCARotationManager.class) { + if (handler.rotationPrepareAcks() == + (scm.getSCMHANodeDetails().getPeerNodeDetails().size() + 1)) { + // all acks are received. + try { + waitAckTimeoutTask.cancel(false); + handler.rotationCommit(rootCACertId); + handler.rotationCommitted(rootCACertId); + + metrics.incrSuccessRotationNum(); + long timeTaken = System.nanoTime() - processStartTime.get(); + metrics.setSuccessTimeInNs(timeTaken); + processStartTime.set(null); + + // reset state + handler.resetRotationPrepareAcks(); + String msg = "Root certificate " + rootCACertId + + " rotation is finished successfully after " + timeTaken + " ns"; + cleanupAndStop(msg); + } catch (Throwable e) { + LOG.error("Execution error", e); + handler.resetRotationPrepareAcks(); + cleanupAndStop("Execution error, " + e.getMessage()); + } finally { + // stop this task to re-execute again in next cycle + throw new RuntimeException("Exit the this " + + "WaitSubCARotationPrepareAckTask for root certificate " + + rootCACertId + " since the rotation is finished execution"); + } + } + } + } + } + /** * Stops scheduled monitor task. */ @Override public void stop() { + if (metrics != null) { + metrics.unRegister(); + } try { executorService.shutdown(); if (!executorService.awaitTermination(3, TimeUnit.SECONDS)) { @@ -270,4 +694,9 @@ public void stop() { Thread.currentThread().interrupt(); } } + + @VisibleForTesting + public void setRootCARotationHandler(RootCARotationHandler newHandler) { + handler = newHandler; + } } diff --git a/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/security/RootCARotationMetrics.java b/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/security/RootCARotationMetrics.java new file mode 100644 index 000000000000..659adc616857 --- /dev/null +++ b/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/security/RootCARotationMetrics.java @@ -0,0 +1,93 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + */ + +package org.apache.hadoop.hdds.scm.security; + +import org.apache.hadoop.metrics2.MetricsSystem; +import org.apache.hadoop.metrics2.annotation.Metric; +import org.apache.hadoop.metrics2.annotation.Metrics; +import org.apache.hadoop.metrics2.lib.DefaultMetricsSystem; +import org.apache.hadoop.metrics2.lib.MutableCounterLong; +import org.apache.hadoop.metrics2.lib.MutableGaugeLong; + +/** + * Metrics related to Root CA rotation in SCM. + */ +@Metrics(name = "Root CA Rotation Metrics", about = "Metrics related to " + + "Root CA rotation in SCM", context = "SCM") +public final class RootCARotationMetrics { + public static final String NAME = + RootCARotationMetrics.class.getSimpleName(); + + private final MetricsSystem ms; + + @Metric(about = "Number of total tries, both successes and failures.") + private MutableCounterLong numTotalRotation; + + @Metric(about = "Number of successful rotations") + private MutableCounterLong numSuccessRotation; + + @Metric(about = "Time(nano second) spent on last successful rotation") + private MutableGaugeLong successTimeInNs; + + /** + * Create and register metrics named {@link RootCARotationMetrics#NAME} + * for {@link RootCARotationManager}. + * + * @return {@link RootCARotationMetrics} + */ + public static RootCARotationMetrics create() { + MetricsSystem metricsSystem = DefaultMetricsSystem.instance(); + return metricsSystem.register(NAME, "Root CA Rotation Metrics", + new RootCARotationMetrics(metricsSystem)); + } + + public void unRegister() { + MetricsSystem ms = DefaultMetricsSystem.instance(); + ms.unregisterSource(NAME); + } + + private RootCARotationMetrics(MetricsSystem ms) { + this.ms = ms; + } + + public MutableGaugeLong getSuccessTimeInNs() { + return successTimeInNs; + } + + public void setSuccessTimeInNs(long time) { + this.successTimeInNs.set(time); + } + + public void incrSuccessRotationNum() { + this.numSuccessRotation.incr(); + } + + public void incrTotalRotationNum() { + this.numTotalRotation.incr(); + } + + public long getSuccessRotationNum() { + return this.numSuccessRotation.value(); + } + + public long getTotalRotationNum() { + return this.numTotalRotation.value(); + } +} diff --git a/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/server/SCMCertStore.java b/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/server/SCMCertStore.java index f7fca6022555..b3dc7522b823 100644 --- a/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/server/SCMCertStore.java +++ b/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/server/SCMCertStore.java @@ -118,6 +118,8 @@ public void storeValidScmCertificate(BigInteger serialID, scmMetadataStore.getValidCertsTable().putWithBatch(batchOperation, serialID, certificate); scmMetadataStore.getStore().commitBatchOperation(batchOperation); + LOG.info("Scm certificate {} for {} is stored", serialID, + certificate.getSubjectDN()); } finally { lock.unlock(); } diff --git a/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/server/SCMClientProtocolServer.java b/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/server/SCMClientProtocolServer.java index 2c12656b1eb5..f3fa69f529c2 100644 --- a/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/server/SCMClientProtocolServer.java +++ b/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/server/SCMClientProtocolServer.java @@ -802,6 +802,14 @@ public void transferLeadership(String newLeaderId) if (!SCMHAUtils.isSCMHAEnabled(getScm().getConfiguration())) { throw new SCMException("SCM HA not enabled.", ResultCodes.INTERNAL_ERROR); } + + if (scm.getRootCARotationManager() != null && + scm.getRootCARotationManager().isRotationInProgress()) { + throw new SCMException(("Root CA and Sub CA rotation is inprogress." + + " Please try the operation later again."), + ResultCodes.CA_ROTATION_IN_PROGRESS); + } + boolean auditSuccess = true; final Map auditMap = Maps.newHashMap(); auditMap.put("newLeaderId", newLeaderId); diff --git a/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/server/SCMSecurityProtocolServer.java b/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/server/SCMSecurityProtocolServer.java index c576875603a2..c62e84edd785 100644 --- a/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/server/SCMSecurityProtocolServer.java +++ b/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/server/SCMSecurityProtocolServer.java @@ -61,6 +61,7 @@ import org.apache.hadoop.hdds.security.exception.SCMSecurityException; import org.apache.hadoop.hdds.security.symmetric.ManagedSecretKey; import org.apache.hadoop.hdds.security.symmetric.SecretKeyManager; +import org.apache.hadoop.hdds.security.x509.certificate.client.SCMCertificateClient; import org.apache.hadoop.hdds.security.x509.crl.CRLInfo; import org.apache.hadoop.hdds.utils.HddsServerUtil; import org.apache.hadoop.hdds.scm.ScmConfig; @@ -100,7 +101,7 @@ public class SCMSecurityProtocolServer implements SCMSecurityProtocol, private static final Logger LOGGER = LoggerFactory .getLogger(SCMSecurityProtocolServer.class); - private final CertificateServer rootCertificateServer; + private CertificateServer rootCertificateServer; private final CertificateServer scmCertificateServer; private final List rootCACertificateList; private final RPC.Server rpcServer; // HADOOP RPC SERVER @@ -115,7 +116,7 @@ public class SCMSecurityProtocolServer implements SCMSecurityProtocol, private final SecretKeyManager secretKeyManager; SCMSecurityProtocolServer(OzoneConfiguration conf, - CertificateServer rootCertificateServer, + @Nullable CertificateServer rootCertificateServer, CertificateServer scmCertificateServer, List rootCACertList, StorageContainerManager scm, @Nullable SecretKeyManager secretKeyManager) @@ -187,6 +188,12 @@ public String getDataNodeCertificate( LOGGER.info("Processing CSR for dn {}, UUID: {}", dnDetails.getHostName(), dnDetails.getUuid()); Objects.requireNonNull(dnDetails); + if (storageContainerManager.getRootCARotationManager() + .isRotationInProgress()) { + throw new SCMException(("Root CA and Sub CA rotation is inprogress." + + " Please try the operation later again."), + SCMException.ResultCodes.CA_ROTATION_IN_PROGRESS); + } return getEncodedCertToString(certSignReq, NodeType.DATANODE); } @@ -198,6 +205,12 @@ public String getCertificate( nodeDetails.getNodeType(), nodeDetails.getHostName(), nodeDetails.getUuid()); Objects.requireNonNull(nodeDetails); + if (storageContainerManager.getRootCARotationManager() + .isRotationInProgress()) { + throw new SCMException(("Root CA and Sub CA rotation is inprogress." + + " Please try the operation later again."), + SCMException.ResultCodes.CA_ROTATION_IN_PROGRESS); + } return getEncodedCertToString(certSignReq, nodeDetails.getNodeType()); } @@ -257,6 +270,12 @@ public String getOMCertificate(OzoneManagerDetailsProto omDetails, LOGGER.info("Processing CSR for om {}, UUID: {}", omDetails.getHostName(), omDetails.getUuid()); Objects.requireNonNull(omDetails); + if (storageContainerManager.getRootCARotationManager() + .isRotationInProgress()) { + throw new SCMException(("Root CA and Sub CA rotation is inprogress." + + " Please try the operation later again."), + SCMException.ResultCodes.CA_ROTATION_IN_PROGRESS); + } return getEncodedCertToString(certSignReq, NodeType.OM); } @@ -517,6 +536,10 @@ public CertificateServer getRootCertificateServer() { return rootCertificateServer; } + public void setRootCertificateServer( + CertificateServer newServer) { + this.rootCertificateServer = newServer; + } public CertificateServer getScmCertificateServer() { return scmCertificateServer; diff --git a/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/server/StorageContainerManager.java b/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/server/StorageContainerManager.java index b3985c854ff5..a4610e0824e1 100644 --- a/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/server/StorageContainerManager.java +++ b/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/server/StorageContainerManager.java @@ -572,8 +572,10 @@ private void initializeCertificateClient() throws IOException { SCMSecurityProtocolClientSideTranslatorPB scmSecurityClient = getScmSecurityClientWithMaxRetry(configuration, getCurrentUser()); scmCertificateClient = new SCMCertificateClient( - securityConfig, scmSecurityClient, - scmStorageConfig.getScmCertSerialId()); + securityConfig, scmSecurityClient, scmStorageConfig.getScmId(), + scmStorageConfig.getClusterID(), + scmStorageConfig.getScmCertSerialId(), + getScmAddress(scmHANodeDetails, configuration).getHostName()); } } @@ -854,14 +856,14 @@ private void initializeCAnSecurityProtocol(OzoneConfiguration conf, final CertificateServer rootCertificateServer; // Start specific instance SCM CA server. - String subject = SCM_SUB_CA_PREFIX + + String subject = String.format(SCM_SUB_CA_PREFIX, System.nanoTime()) + InetAddress.getLocalHost().getHostName(); if (configurator.getCertificateServer() != null) { scmCertificateServer = configurator.getCertificateServer(); } else { scmCertificateServer = new DefaultCAServer(subject, scmStorageConfig.getClusterID(), scmStorageConfig.getScmId(), - certificateStore, new DefaultCAProfile(), + certificateStore, null, new DefaultCAProfile(), scmCertificateClient.getComponentName()); // INTERMEDIARY_CA which issues certs to DN and OM. scmCertificateServer.init(new SecurityConfig(configuration), @@ -1160,8 +1162,7 @@ public static boolean scmBootstrap(OzoneConfiguration conf) scmStorageConfig.getScmId()); // Initialize security if security is enabled later. - initializeSecurityIfNeeded( - conf, scmhaNodeDetails, scmStorageConfig, false); + initializeSecurityIfNeeded(conf, scmStorageConfig, selfHostName, false); return true; } @@ -1185,9 +1186,7 @@ public static boolean scmBootstrap(OzoneConfiguration conf) } // Initialize security if security is enabled later. - initializeSecurityIfNeeded( - conf, scmhaNodeDetails, scmStorageConfig, false); - + initializeSecurityIfNeeded(conf, scmStorageConfig, selfHostName, false); } else { try { scmStorageConfig.setClusterId(fetchedId); @@ -1200,7 +1199,7 @@ public static boolean scmBootstrap(OzoneConfiguration conf) if (OzoneSecurityUtil.isSecurityEnabled(conf)) { HASecurityUtils.initializeSecurity(scmStorageConfig, config, - getScmAddress(scmhaNodeDetails, conf), false); + selfHostName, false); } scmStorageConfig.setPrimaryScmNodeId(scmInfo.getScmId()); scmStorageConfig.setSCMHAFlag(true); @@ -1222,14 +1221,13 @@ public static boolean scmBootstrap(OzoneConfiguration conf) * ScmStorageConfig does not have certificate serial id. */ private static void initializeSecurityIfNeeded( - OzoneConfiguration conf, SCMHANodeDetails scmhaNodeDetails, - SCMStorageConfig scmStorageConfig, boolean isPrimordial) - throws IOException { + OzoneConfiguration conf, SCMStorageConfig scmStorageConfig, + String scmHostname, boolean isPrimordial) throws IOException { // Initialize security if security is enabled later. if (OzoneSecurityUtil.isSecurityEnabled(conf) && scmStorageConfig.getScmCertSerialId() == null) { HASecurityUtils.initializeSecurity(scmStorageConfig, conf, - getScmAddress(scmhaNodeDetails, conf), isPrimordial); + scmHostname, isPrimordial); scmStorageConfig.forceInitialize(); LOG.info("SCM unsecure cluster is converted to secure cluster. " + "Persisted SCM Certificate SerialID {}", @@ -1272,7 +1270,7 @@ public static boolean scmInit(OzoneConfiguration conf, if (OzoneSecurityUtil.isSecurityEnabled(conf)) { HASecurityUtils.initializeSecurity(scmStorageConfig, conf, - getScmAddress(haDetails, conf), true); + getScmAddress(haDetails, conf).getHostName(), true); } // Ensure scmRatisServer#initialize() is called post scm storage @@ -1319,7 +1317,7 @@ public static boolean scmInit(OzoneConfiguration conf, final boolean isSCMHAEnabled = scmStorageConfig.isSCMHAEnabled(); // Initialize security if security is enabled later. - initializeSecurityIfNeeded(conf, haDetails, scmStorageConfig, true); + initializeSecurityIfNeeded(conf, scmStorageConfig, selfHostName, true); if (SCMHAUtils.isSCMHAEnabled(conf) && !isSCMHAEnabled) { SCMRatisServerImpl.initialize(scmStorageConfig.getClusterID(), @@ -1484,6 +1482,10 @@ public String getDatanodeRpcPort() { return addr == null ? "0" : Integer.toString(addr.getPort()); } + public CertificateStore getCertificateStore() { + return certificateStore; + } + /** * Start service. */ diff --git a/hadoop-hdds/server-scm/src/test/java/org/apache/hadoop/hdds/scm/security/TestRootCARotationManager.java b/hadoop-hdds/server-scm/src/test/java/org/apache/hadoop/hdds/scm/security/TestRootCARotationManager.java index 636641999acc..f661827f8afb 100644 --- a/hadoop-hdds/server-scm/src/test/java/org/apache/hadoop/hdds/scm/security/TestRootCARotationManager.java +++ b/hadoop-hdds/server-scm/src/test/java/org/apache/hadoop/hdds/scm/security/TestRootCARotationManager.java @@ -22,10 +22,15 @@ import org.apache.hadoop.hdds.conf.OzoneConfiguration; import org.apache.hadoop.hdds.scm.container.TestContainerManagerImpl; import org.apache.hadoop.hdds.scm.ha.SCMContext; +import org.apache.hadoop.hdds.scm.ha.SCMHAManager; +import org.apache.hadoop.hdds.scm.ha.SCMRatisServerImpl; import org.apache.hadoop.hdds.scm.ha.SCMServiceManager; +import org.apache.hadoop.hdds.scm.ha.SequenceIdGenerator; +import org.apache.hadoop.hdds.scm.server.SCMSecurityProtocolServer; +import org.apache.hadoop.hdds.scm.server.SCMStorageConfig; import org.apache.hadoop.hdds.scm.server.StorageContainerManager; import org.apache.hadoop.hdds.security.SecurityConfig; -import org.apache.hadoop.hdds.security.x509.certificate.client.CertificateClient; +import org.apache.hadoop.hdds.security.x509.certificate.client.SCMCertificateClient; import org.apache.hadoop.hdds.security.x509.certificate.utils.SelfSignedCertificate; import org.apache.hadoop.security.ssl.KeyStoreTestUtil; import org.apache.ozone.test.GenericTestUtils; @@ -38,6 +43,7 @@ import java.io.File; import java.io.IOException; +import java.math.BigInteger; import java.security.KeyPair; import java.security.cert.X509Certificate; import java.time.Duration; @@ -63,12 +69,21 @@ public class TestRootCARotationManager { private OzoneConfiguration ozoneConfig; + private SecurityConfig securityConfig; private RootCARotationManager rootCARotationManager; private StorageContainerManager scm; - private CertificateClient scmCertClient; + private SCMCertificateClient scmCertClient; private SCMServiceManager scmServiceManager; + private SCMHAManager scmhaManager; private SCMContext scmContext; + private SequenceIdGenerator sequenceIdGenerator; + private SCMStorageConfig scmStorageConfig; + private SCMSecurityProtocolServer scmSecurityProtocolServer; + private RootCARotationHandlerImpl handler; private File testDir; + private String cID = UUID.randomUUID().toString(); + private String scmID = UUID.randomUUID().toString(); + private BigInteger certID = new BigInteger("1"); @BeforeEach public void init() throws IOException, TimeoutException { @@ -80,14 +95,33 @@ public void init() throws IOException, TimeoutException { ozoneConfig .setBoolean(HDDS_X509_GRACE_DURATION_TOKEN_CHECKS_ENABLED, false); scm = Mockito.mock(StorageContainerManager.class); - scmCertClient = Mockito.mock(CertificateClient.class); + securityConfig = new SecurityConfig(ozoneConfig); + scmCertClient = new SCMCertificateClient(securityConfig, null, scmID, cID, + certID.toString(), "localhost"); scmServiceManager = new SCMServiceManager(); scmContext = Mockito.mock(SCMContext.class); + scmhaManager = Mockito.mock(SCMHAManager.class); + sequenceIdGenerator = Mockito.mock(SequenceIdGenerator.class); + scmStorageConfig = new SCMStorageConfig(ozoneConfig); + scmStorageConfig.setScmId(scmID); + scmStorageConfig.setClusterId(cID); + scmSecurityProtocolServer = Mockito.mock(SCMSecurityProtocolServer.class); + handler = Mockito.mock(RootCARotationHandlerImpl.class); when(scmContext.isLeader()).thenReturn(true); when(scm.getConfiguration()).thenReturn(ozoneConfig); when(scm.getScmCertificateClient()).thenReturn(scmCertClient); when(scm.getScmContext()).thenReturn(scmContext); when(scm.getSCMServiceManager()).thenReturn(scmServiceManager); + when(scm.getScmHAManager()).thenReturn(scmhaManager); + when(scmhaManager.getRatisServer()) + .thenReturn(Mockito.mock(SCMRatisServerImpl.class)); + when(scm.getSequenceIdGen()).thenReturn(sequenceIdGenerator); + when(sequenceIdGenerator.getNextId(Mockito.anyString())).thenReturn(2L); + when(scm.getScmStorageConfig()).thenReturn(scmStorageConfig); + when(scm.getSecurityProtocolServer()).thenReturn(scmSecurityProtocolServer); + Mockito.doNothing().when(scmSecurityProtocolServer) + .setRootCertificateServer(Mockito.anyObject()); + Mockito.doNothing().when(handler).rotationPrepare(Mockito.anyString()); } @AfterEach @@ -156,9 +190,10 @@ public void testRotationOnSchedule() throws Exception { X509Certificate cert = generateX509Cert(ozoneConfig, LocalDateTime.now(), Duration.ofSeconds(35)); - when(scmCertClient.getCACertificate()).thenReturn(cert); + scmCertClient.setCACertificate(cert); rootCARotationManager = new RootCARotationManager(scm); + rootCARotationManager.setRootCARotationHandler(handler); GenericTestUtils.LogCapturer logs = GenericTestUtils.LogCapturer.captureLogs(RootCARotationManager.LOG); GenericTestUtils.setLogLevel(RootCARotationManager.LOG, INFO); @@ -187,9 +222,10 @@ public void testRotationImmediately() throws Exception { X509Certificate cert = generateX509Cert(ozoneConfig, LocalDateTime.now(), Duration.ofSeconds(35)); - when(scmCertClient.getCACertificate()).thenReturn(cert); + scmCertClient.setCACertificate(cert); rootCARotationManager = new RootCARotationManager(scm); + rootCARotationManager.setRootCARotationHandler(handler); GenericTestUtils.LogCapturer logs = GenericTestUtils.LogCapturer.captureLogs(RootCARotationManager.LOG); GenericTestUtils.setLogLevel(RootCARotationManager.LOG, INFO); @@ -213,11 +249,12 @@ private X509Certificate generateX509Cert( SelfSignedCertificate.newBuilder() .setBeginDate(start) .setEndDate(end) - .setClusterID("cluster") - .setKey(keyPair) + .setScmID(scmID) + .setClusterID(cID) .setSubject("localhost") .setConfiguration(new SecurityConfig(conf)) - .setScmID("test") + .setKey(keyPair) + .makeCA(certID) .build()); } } diff --git a/hadoop-ozone/dist/src/main/compose/ozonesecure-carotation/.env b/hadoop-ozone/dist/src/main/compose/ozonesecure-carotation/.env new file mode 100644 index 000000000000..1ddd45333e1a --- /dev/null +++ b/hadoop-ozone/dist/src/main/compose/ozonesecure-carotation/.env @@ -0,0 +1,22 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +HDDS_VERSION=${hdds.version} +HADOOP_VERSION=3 +OZONE_RUNNER_VERSION=${docker.ozone-runner.version} +OZONE_RUNNER_IMAGE=apache/ozone-runner +OZONE_TESTKRB5_IMAGE=${docker.ozone-testkr5b.image} +OZONE_OPTS= diff --git a/hadoop-ozone/dist/src/main/compose/ozonesecure-carotation/docker-compose.yaml b/hadoop-ozone/dist/src/main/compose/ozonesecure-carotation/docker-compose.yaml new file mode 100644 index 000000000000..a13c0b2f50cf --- /dev/null +++ b/hadoop-ozone/dist/src/main/compose/ozonesecure-carotation/docker-compose.yaml @@ -0,0 +1,386 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +version: "3" +services: + kdc: + image: ${OZONE_TESTKRB5_IMAGE} + hostname: kdc + volumes: + - ../..:/opt/hadoop + - ../_keytabs:/etc/security/keytabs + command: ["krb5kdc","-n"] + networks: + ozone_net: + ipv4_address: 172.25.0.100 + kms: + image: apache/hadoop:${HADOOP_VERSION} + ports: + - 9600:9600 + env_file: + - ./docker-config + volumes: + - ../_keytabs:/etc/security/keytabs + - ./krb5.conf:/etc/krb5.conf + - ../../libexec/transformation.py:/opt/transformation.py + environment: + HADOOP_CONF_DIR: /opt/hadoop/etc/hadoop + command: ["hadoop", "kms"] + networks: + ozone_net: + ipv4_address: 172.25.0.101 + datanode1: + image: ${OZONE_RUNNER_IMAGE}:${OZONE_RUNNER_VERSION} + volumes: + - ../..:/opt/hadoop + - ../_keytabs:/etc/security/keytabs + - ./krb5.conf:/etc/krb5.conf + ports: + - 9864:9999 + command: ["/opt/hadoop/bin/ozone","datanode"] + extra_hosts: + - "scm1.org: 172.25.0.116" + - "scm2.org: 172.25.0.117" + - "scm3.org: 172.25.0.118" + - "recon: 172.25.0.115" + env_file: + - docker-config + environment: + WAITFOR: scm3.org:9894 + OZONE_OPTS: + networks: + ozone_net: + ipv4_address: 172.25.0.102 + datanode2: + image: ${OZONE_RUNNER_IMAGE}:${OZONE_RUNNER_VERSION} + volumes: + - ../..:/opt/hadoop + - ../_keytabs:/etc/security/keytabs + - ./krb5.conf:/etc/krb5.conf + ports: + - 9866:9999 + command: ["/opt/hadoop/bin/ozone","datanode"] + extra_hosts: + - "scm1.org: 172.25.0.116" + - "scm2.org: 172.25.0.117" + - "scm3.org: 172.25.0.118" + - "recon: 172.25.0.115" + env_file: + - docker-config + environment: + WAITFOR: scm3.org:9894 + OZONE_OPTS: + networks: + ozone_net: + ipv4_address: 172.25.0.103 + datanode3: + image: ${OZONE_RUNNER_IMAGE}:${OZONE_RUNNER_VERSION} + volumes: + - ../..:/opt/hadoop + - ../_keytabs:/etc/security/keytabs + - ./krb5.conf:/etc/krb5.conf + ports: + - 9868:9999 + command: ["/opt/hadoop/bin/ozone","datanode"] + extra_hosts: + - "scm1.org: 172.25.0.116" + - "scm2.org: 172.25.0.117" + - "scm3.org: 172.25.0.118" + - "recon: 172.25.0.115" + env_file: + - docker-config + environment: + WAITFOR: scm3.org:9894 + OZONE_OPTS: + networks: + ozone_net: + ipv4_address: 172.25.0.104 + om1: + image: ${OZONE_RUNNER_IMAGE}:${OZONE_RUNNER_VERSION} + hostname: om1 + volumes: + - ../..:/opt/hadoop + - ../_keytabs:/etc/security/keytabs + - ./krb5.conf:/etc/krb5.conf + ports: + - 9880:9874 + - 9890:9872 + #- 18001:18001 + environment: + WAITFOR: scm3.org:9894 + ENSURE_OM_INITIALIZED: /data/metadata/om/current/VERSION + OZONE_OPTS: + env_file: + - ./docker-config + command: ["/opt/hadoop/bin/ozone","om"] + extra_hosts: + - "scm1.org: 172.25.0.116" + - "scm2.org: 172.25.0.117" + - "scm3.org: 172.25.0.118" + networks: + ozone_net: + ipv4_address: 172.25.0.111 + om2: + image: ${OZONE_RUNNER_IMAGE}:${OZONE_RUNNER_VERSION} + hostname: om2 + volumes: + - ../..:/opt/hadoop + - ../_keytabs:/etc/security/keytabs + - ./krb5.conf:/etc/krb5.conf + ports: + - 9882:9874 + - 9892:9872 + #- 18002:18002 + environment: + WAITFOR: scm3.org:9894 + ENSURE_OM_INITIALIZED: /data/metadata/om/current/VERSION + OZONE_OPTS: + env_file: + - ./docker-config + command: ["/opt/hadoop/bin/ozone","om"] + extra_hosts: + - "scm1.org: 172.25.0.116" + - "scm2.org: 172.25.0.117" + - "scm3.org: 172.25.0.118" + networks: + ozone_net: + ipv4_address: 172.25.0.112 + om3: + image: ${OZONE_RUNNER_IMAGE}:${OZONE_RUNNER_VERSION} + hostname: om3 + volumes: + - ../..:/opt/hadoop + - ../_keytabs:/etc/security/keytabs + - ./krb5.conf:/etc/krb5.conf + ports: + - 9884:9874 + - 9894:9872 + #- 18003:18003 + environment: + WAITFOR: scm3.org:9894 + ENSURE_OM_INITIALIZED: /data/metadata/om/current/VERSION + OZONE_OPTS: + env_file: + - ./docker-config + command: ["/opt/hadoop/bin/ozone","om"] + extra_hosts: + - "scm1.org: 172.25.0.116" + - "scm2.org: 172.25.0.117" + - "scm3.org: 172.25.0.118" + networks: + ozone_net: + ipv4_address: 172.25.0.113 + httpfs: + image: apache/ozone-runner:${OZONE_RUNNER_VERSION} + hostname: httpfs + volumes: + - ../..:/opt/hadoop + - ../_keytabs:/etc/security/keytabs + - ./krb5.conf:/etc/krb5.conf + ports: + - 14000:14000 + env_file: + - ./docker-config + command: [ "/opt/hadoop/bin/ozone","httpfs" ] + environment: + OZONE-SITE.XML_hdds.scm.safemode.min.datanode: ${OZONE_SAFEMODE_MIN_DATANODES:-1} + OZONE_OPTS: + networks: + ozone_net: + ipv4_address: 172.25.0.119 + s3g: + image: ${OZONE_RUNNER_IMAGE}:${OZONE_RUNNER_VERSION} + hostname: s3g + volumes: + - ../..:/opt/hadoop + - ../_keytabs:/etc/security/keytabs + - ./krb5.conf:/etc/krb5.conf + ports: + - 9878:9878 + env_file: + - ./docker-config + command: ["/opt/hadoop/bin/ozone","s3g"] + environment: + OZONE_OPTS: + networks: + ozone_net: + ipv4_address: 172.25.0.114 + scm1.org: + image: ${OZONE_RUNNER_IMAGE}:${OZONE_RUNNER_VERSION} + hostname: scm1.org + volumes: + - ../..:/opt/hadoop + - ../_keytabs:/etc/security/keytabs + - ./krb5.conf:/etc/krb5.conf + ports: + - 9990:9876 + - 9992:9860 + env_file: + - docker-config + environment: + ENSURE_SCM_INITIALIZED: /data/metadata/scm/current/VERSION + OZONE-SITE.XML_hdds.scm.safemode.min.datanode: "${OZONE_SAFEMODE_MIN_DATANODES:-3}" + OZONE_OPTS: + command: ["/opt/hadoop/bin/ozone","scm"] + extra_hosts: + - "om1: 172.25.0.111" + - "om2: 172.25.0.112" + - "om3: 172.25.0.113" + - "scm1.org: 172.25.0.116" + - "scm2.org: 172.25.0.117" + - "scm3.org: 172.25.0.118" + networks: + ozone_net: + ipv4_address: 172.25.0.116 + scm2.org: + image: ${OZONE_RUNNER_IMAGE}:${OZONE_RUNNER_VERSION} + hostname: scm2.org + volumes: + - ../..:/opt/hadoop + - ../_keytabs:/etc/security/keytabs + - ./krb5.conf:/etc/krb5.conf + ports: + - 9994:9876 + - 9996:9860 + env_file: + - docker-config + environment: + WAITFOR: scm1.org:9894 + ENSURE_SCM_BOOTSTRAPPED: /data/metadata/scm/current/VERSION + OZONE-SITE.XML_hdds.scm.safemode.min.datanode: "${OZONE_SAFEMODE_MIN_DATANODES:-3}" + OZONE_OPTS: + command: ["/opt/hadoop/bin/ozone","scm"] + extra_hosts: + - "om1: 172.25.0.111" + - "om2: 172.25.0.112" + - "om3: 172.25.0.113" + - "scm1.org: 172.25.0.116" + - "scm2.org: 172.25.0.117" + - "scm3.org: 172.25.0.118" + networks: + ozone_net: + ipv4_address: 172.25.0.117 + scm3.org: + image: ${OZONE_RUNNER_IMAGE}:${OZONE_RUNNER_VERSION} + hostname: scm3.org + volumes: + - ../..:/opt/hadoop + - ../_keytabs:/etc/security/keytabs + - ./krb5.conf:/etc/krb5.conf + ports: + - 9998:9876 + - 10002:9860 + env_file: + - docker-config + environment: + WAITFOR: scm2.org:9894 + ENSURE_SCM_BOOTSTRAPPED: /data/metadata/scm/current/VERSION + OZONE-SITE.XML_hdds.scm.safemode.min.datanode: "${OZONE_SAFEMODE_MIN_DATANODES:-3}" + OZONE_OPTS: + command: ["/opt/hadoop/bin/ozone","scm"] + extra_hosts: + - "om1: 172.25.0.111" + - "om2: 172.25.0.112" + - "om3: 172.25.0.113" + - "scm1.org: 172.25.0.116" + - "scm2.org: 172.25.0.117" + - "scm3.org: 172.25.0.118" + networks: + ozone_net: + ipv4_address: 172.25.0.118 + scm4.org: + image: ${OZONE_RUNNER_IMAGE}:${OZONE_RUNNER_VERSION} + hostname: scm4.org + volumes: + - ../..:/opt/hadoop + - ../_keytabs:/etc/security/keytabs + - ./krb5.conf:/etc/krb5.conf + ports: + - 10004:9876 + - 10006:9860 + env_file: + - docker-config + - docker-config-scm4 + environment: + ENSURE_SCM_BOOTSTRAPPED: /data/metadata/scm/current/VERSION + OZONE_OPTS: + command: [ "/opt/hadoop/bin/ozone","scm" ] + extra_hosts: + - "om1: 172.25.0.111" + - "om2: 172.25.0.112" + - "om3: 172.25.0.113" + - "scm1.org: 172.25.0.116" + - "scm2.org: 172.25.0.117" + - "scm3.org: 172.25.0.118" + networks: + ozone_net: + ipv4_address: 172.25.0.120 + profiles: ["scm4.org"] + datanode4: + image: ${OZONE_RUNNER_IMAGE}:${OZONE_RUNNER_VERSION} + volumes: + - ../..:/opt/hadoop + - ../_keytabs:/etc/security/keytabs + - ./krb5.conf:/etc/krb5.conf + ports: + - 10008:9999 + command: [ "/opt/hadoop/bin/ozone","datanode" ] + extra_hosts: + - "scm1.org: 172.25.0.116" + - "scm2.org: 172.25.0.117" + - "scm3.org: 172.25.0.118" + - "scm4.org: 172.25.0.120" + - "recon: 172.25.0.115" + env_file: + - docker-config + - docker-config-scm4 + environment: + WAITFOR: scm4.org:9894 + OZONE_OPTS: + networks: + ozone_net: + ipv4_address: 172.25.0.121 + profiles: [ "datanode4" ] + recon: + image: ${OZONE_RUNNER_IMAGE}:${OZONE_RUNNER_VERSION} + hostname: recon + volumes: + - ../..:/opt/hadoop + - ../_keytabs:/etc/security/keytabs + - ./krb5.conf:/etc/krb5.conf + ports: + - 9888:9888 + env_file: + - ./docker-config + environment: + OZONE_OPTS: + command: ["/opt/hadoop/bin/ozone","recon"] + extra_hosts: + - "om1: 172.25.0.111" + - "om2: 172.25.0.112" + - "om3: 172.25.0.113" + - "scm1.org: 172.25.0.116" + - "scm2.org: 172.25.0.117" + - "scm3.org: 172.25.0.118" + networks: + ozone_net: + ipv4_address: 172.25.0.115 +networks: + ozone_net: + ipam: + driver: default + config: + - subnet: "172.25.0.0/24" diff --git a/hadoop-ozone/dist/src/main/compose/ozonesecure-carotation/docker-config b/hadoop-ozone/dist/src/main/compose/ozonesecure-carotation/docker-config new file mode 100644 index 000000000000..0e0cafa886c2 --- /dev/null +++ b/hadoop-ozone/dist/src/main/compose/ozonesecure-carotation/docker-config @@ -0,0 +1,176 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +# For HttpFS service it is required to enable proxying users. +CORE-SITE.XML_hadoop.proxyuser.httpfs.hosts=* +CORE-SITE.XML_hadoop.proxyuser.httpfs.groups=* + +CORE-SITE.XML_fs.defaultFS=ofs://omservice +CORE-SITE.XML_fs.trash.interval=1 + +OZONE-SITE.XML_ozone.om.service.ids=omservice +OZONE-SITE.XML_ozone.om.internal.service.id=omservice +OZONE-SITE.XML_ozone.om.nodes.omservice=om1,om2,om3 +OZONE-SITE.XML_ozone.om.address.omservice.om1=om1 +OZONE-SITE.XML_ozone.om.address.omservice.om2=om2 +OZONE-SITE.XML_ozone.om.address.omservice.om3=om3 +OZONE-SITE.XML_ozone.om.http-address.omservice.om1=om1 +OZONE-SITE.XML_ozone.om.http-address.omservice.om2=om2 +OZONE-SITE.XML_ozone.om.http-address.omservice.om3=om3 +OZONE-SITE.XML_ozone.om.ratis.enable=true + +OZONE-SITE.XML_ozone.scm.service.ids=scmservice +OZONE-SITE.XML_ozone.scm.nodes.scmservice=scm1,scm2,scm3 +OZONE-SITE.XML_ozone.scm.address.scmservice.scm1=scm1.org +OZONE-SITE.XML_ozone.scm.address.scmservice.scm2=scm2.org +OZONE-SITE.XML_ozone.scm.address.scmservice.scm3=scm3.org +OZONE-SITE.XML_ozone.scm.ratis.enable=true +OZONE-SITE.XML_ozone.scm.close.container.wait.duration=5s + +OZONE-SITE.XML_ozone.om.volume.listall.allowed=false + +OZONE-SITE.XML_ozone.scm.container.size=1GB +OZONE-SITE.XML_ozone.scm.datanode.ratis.volume.free-space.min=10MB +OZONE-SITE.XML_ozone.scm.pipeline.creation.interval=30s +OZONE-SITE.XML_ozone.scm.pipeline.owner.container.count=1 +OZONE-SITE.XML_ozone.scm.datanode.id.dir=/data +OZONE-SITE.XML_ozone.scm.block.client.address=scm +OZONE-SITE.XML_ozone.metadata.dirs=/data/metadata +OZONE-SITE.XML_ozone.handler.type=distributed +OZONE-SITE.XML_ozone.scm.client.address=scm +OZONE-SITE.XML_hdds.block.token.enabled=true +OZONE-SITE.XML_hdds.container.token.enabled=true +OZONE-SITE.XML_hdds.grpc.tls.enabled=true +OZONE-SITE.XML_ozone.replication=3 +OZONE-SITE.XML_hdds.container.report.interval=60s +OZONE-SITE.XML_dfs.container.ratis.datastream.enabled=true + +OZONE-SITE.XML_ozone.recon.om.snapshot.task.interval.delay=1m +OZONE-SITE.XML_ozone.recon.db.dir=/data/metadata/recon +OZONE-SITE.XML_ozone.recon.om.snapshot.task.initial.delay=20s +OZONE-SITE.XML_ozone.recon.address=recon:9891 + +OZONE-SITE.XML_ozone.security.enabled=true +OZONE-SITE.XML_ozone.acl.enabled=true +OZONE-SITE.XML_ozone.acl.authorizer.class=org.apache.hadoop.ozone.security.acl.OzoneNativeAuthorizer +OZONE-SITE.XML_ozone.administrators="testuser,recon,om" + +OZONE-SITE.XML_hdds.datanode.dir=/data/hdds +HDFS-SITE.XML_dfs.datanode.address=0.0.0.0:1019 +HDFS-SITE.XML_dfs.datanode.http.address=0.0.0.0:1012 +CORE-SITE.XML_dfs.data.transfer.protection=authentication +CORE-SITE.XML_hadoop.security.authentication=kerberos +CORE-SITE.XML_hadoop.security.auth_to_local="DEFAULT" +CORE-SITE.XML_hadoop.security.key.provider.path=kms://http@kms:9600/kms + + +OZONE-SITE.XML_hdds.scm.kerberos.principal=scm/scm@EXAMPLE.COM +OZONE-SITE.XML_hdds.scm.kerberos.keytab.file=/etc/security/keytabs/scm.keytab +OZONE-SITE.XML_ozone.om.kerberos.principal=om/om@EXAMPLE.COM +OZONE-SITE.XML_ozone.om.kerberos.keytab.file=/etc/security/keytabs/om.keytab +OZONE-SITE.XML_ozone.recon.kerberos.keytab.file=/etc/security/keytabs/recon.keytab +OZONE-SITE.XML_ozone.recon.kerberos.principal=recon/recon@EXAMPLE.COM + +OZONE-SITE.XML_ozone.s3g.kerberos.keytab.file=/etc/security/keytabs/s3g.keytab +OZONE-SITE.XML_ozone.s3g.kerberos.principal=s3g/s3g@EXAMPLE.COM + +OZONE-SITE.XML_ozone.httpfs.kerberos.keytab.file=/etc/security/keytabs/httpfs.keytab +OZONE-SITE.XML_ozone.httpfs.kerberos.principal=httpfs/httpfs@EXAMPLE.COM + +HDFS-SITE.XML_dfs.datanode.kerberos.principal=dn/dn@EXAMPLE.COM +HDFS-SITE.XML_dfs.datanode.kerberos.keytab.file=/etc/security/keytabs/dn.keytab +HDFS-SITE.XML_dfs.web.authentication.kerberos.principal=HTTP/ozone@EXAMPLE.COM +HDFS-SITE.XML_dfs.web.authentication.kerberos.keytab=/etc/security/keytabs/HTTP.keytab + + +OZONE-SITE.XML_ozone.security.http.kerberos.enabled=true +OZONE-SITE.XML_ozone.http.filter.initializers=org.apache.hadoop.security.HttpCrossOriginFilterInitializer + +OZONE-SITE.XML_ozone.om.http.auth.type=kerberos +OZONE-SITE.XML_hdds.scm.http.auth.type=kerberos +OZONE-SITE.XML_hdds.datanode.http.auth.type=kerberos +OZONE-SITE.XML_ozone.s3g.http.auth.type=kerberos +OZONE-SITE.XML_ozone.httpfs.http.auth.type=kerberos +OZONE-SITE.XML_ozone.recon.http.auth.type=kerberos + +OZONE-SITE.XML_hdds.scm.http.auth.kerberos.principal=HTTP/scm@EXAMPLE.COM +OZONE-SITE.XML_hdds.scm.http.auth.kerberos.keytab=/etc/security/keytabs/HTTP.keytab +OZONE-SITE.XML_ozone.om.http.auth.kerberos.principal=HTTP/om@EXAMPLE.COM +OZONE-SITE.XML_ozone.om.http.auth.kerberos.keytab=/etc/security/keytabs/HTTP.keytab +OZONE-SITE.XML_hdds.datanode.http.auth.kerberos.principal=HTTP/db@EXAMPLE.COM +OZONE-SITE.XML_hdds.datanode.http.auth.kerberos.keytab=/etc/security/keytabs/HTTP.keytab +OZONE-SITE.XML_ozone.s3g.http.auth.kerberos.keytab=/etc/security/keytabs/HTTP.keytab +OZONE-SITE.XML_ozone.s3g.http.auth.kerberos.principal=HTTP/s3g@EXAMPLE.COM +OZONE-SITE.XML_ozone.httpfs.http.auth.kerberos.keytab=/etc/security/keytabs/httpfs.keytab +OZONE-SITE.XML_ozone.httpfs.http.auth.kerberos.principal=HTTP/httpfs@EXAMPLE.COM +OZONE-SITE.XML_ozone.recon.http.auth.kerberos.principal=HTTP/recon@EXAMPLE.COM +OZONE-SITE.XML_ozone.recon.http.auth.kerberos.keytab=/etc/security/keytabs/recon.keytab +OZONE-SITE.XML_ozone.recon.http.auth.kerberos.keytab=/etc/security/keytabs/recon.keytab + +OZONE-SITE.XML_hdds.x509.max.duration=PT180S +OZONE-SITE.XML_hdds.x509.default.duration=PT60S +OZONE-SITE.XML_hdds.x509.renew.grace.duration=PT45S +OZONE-SITE.XML_hdds.x509.ca.rotation.check.interval=PT1S +OZONE-SITE.XML_hdds.x509.ca.rotation.ack.timeout=PT20S +OZONE-SITE.XML_hdds.block.token.expiry.time=15s +OZONE-SITE.XML_ozone.manager.delegation.token.max-lifetime=15s +OZONE-SITE.XML_ozone.manager.delegation.token.renew-interval=15s +OZONE-SITE.XML_hdds.scmclient.max.retry.timeout=60s +OZONE-SITE.XML_hdds.scmclient.failover.retry.interval=1s +OZONE-SITE.XML_hdds.scmclient.failover.max.retry=60 +OZONE-SITE.XML_ozone.scm.info.wait.duration=60s +OZONE-SITE.XML_ozone.scm.ha.ratis.request.timeout=2s + +CORE-SITE.XML_hadoop.http.authentication.simple.anonymous.allowed=false +CORE-SITE.XML_hadoop.http.authentication.signature.secret.file=/etc/security/http_secret +CORE-SITE.XML_hadoop.http.authentication.type=kerberos +CORE-SITE.XML_hadoop.http.authentication.kerberos.principal=HTTP/ozone@EXAMPLE.COM +CORE-SITE.XML_hadoop.http.authentication.kerberos.keytab=/etc/security/keytabs/HTTP.keytab + +CORE-SITE.XML_hadoop.security.authorization=true +HADOOP-POLICY.XML_ozone.om.security.client.protocol.acl=* +HADOOP-POLICY.XML_hdds.security.client.datanode.container.protocol.acl=* +HADOOP-POLICY.XML_hdds.security.client.scm.container.protocol.acl=* +HADOOP-POLICY.XML_hdds.security.client.scm.block.protocol.acl=* +HADOOP-POLICY.XML_hdds.security.client.scm.certificate.protocol.acl=* + +HDFS-SITE.XML_rpc.metrics.quantile.enable=true +HDFS-SITE.XML_rpc.metrics.percentiles.intervals=60,300 + +HTTPFS-SITE.XML_hadoop.http.authentication.type=kerberos +HTTPFS-SITE.XML_hadoop.http.authentication.kerberos.keytab=/etc/security/keytabs/httpfs.keytab +HTTPFS-SITE.XML_hadoop.http.authentication.kerberos.principal=HTTP/httpfs@EXAMPLE.COM +HTTPFS-SITE.XML_httpfs.hadoop.authentication.type=kerberos +HTTPFS-SITE.XML_httpfs.hadoop.authentication.kerberos.keytab=/etc/security/keytabs/httpfs.keytab +HTTPFS-SITE.XML_httpfs.hadoop.authentication.kerberos.principal=httpfs/httpfs@EXAMPLE.COM +KMS-SITE.XML_hadoop.kms.proxyuser.s3g.users=* +KMS-SITE.XML_hadoop.kms.proxyuser.s3g.groups=* +KMS-SITE.XML_hadoop.kms.proxyuser.s3g.hosts=* + +#Enable this variable to print out all hadoop rpc traffic to the stdout. See http://byteman.jboss.org/ to define your own instrumentation. +#BYTEMAN_SCRIPT_URL=https://raw.githubusercontent.com/apache/hadoop/trunk/dev-support/byteman/hadooprpc.btm + +OZONE_DATANODE_SECURE_USER=root +JAVA_HOME=/usr/lib/jvm/jre +JSVC_HOME=/usr/bin + +OZONE_CONF_DIR=/etc/hadoop +OZONE_LOG_DIR=/var/log/hadoop + +no_proxy=om,scm,recon,s3g,kdc,localhost,127.0.0.1 + +# Explicitly enable filesystem snapshot feature for this Docker compose cluster +OZONE-SITE.XML_ozone.filesystem.snapshot.enabled=true diff --git a/hadoop-ozone/dist/src/main/compose/ozonesecure-carotation/docker-config-scm4 b/hadoop-ozone/dist/src/main/compose/ozonesecure-carotation/docker-config-scm4 new file mode 100644 index 000000000000..39d7e8b583a3 --- /dev/null +++ b/hadoop-ozone/dist/src/main/compose/ozonesecure-carotation/docker-config-scm4 @@ -0,0 +1,18 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +OZONE-SITE.XML_ozone.scm.nodes.scmservice=scm1,scm2,scm3,scm4 +OZONE-SITE.XML_ozone.scm.address.scmservice.scm4=scm4.org diff --git a/hadoop-ozone/dist/src/main/compose/ozonesecure-carotation/krb5.conf b/hadoop-ozone/dist/src/main/compose/ozonesecure-carotation/krb5.conf new file mode 100644 index 000000000000..eefc5b9c6858 --- /dev/null +++ b/hadoop-ozone/dist/src/main/compose/ozonesecure-carotation/krb5.conf @@ -0,0 +1,41 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +[logging] +default = FILE:/var/log/krb5libs.log +kdc = FILE:/var/log/krb5kdc.log +admin_server = FILE:/var/log/kadmind.log + +[libdefaults] + dns_canonicalize_hostname = false + dns_lookup_realm = false + ticket_lifetime = 24h + renew_lifetime = 7d + forwardable = true + rdns = false + default_realm = EXAMPLE.COM + +[realms] + EXAMPLE.COM = { + kdc = kdc + admin_server = kdc + max_renewable_life = 7d + } + +[domain_realm] + .example.com = EXAMPLE.COM + example.com = EXAMPLE.COM + diff --git a/hadoop-ozone/dist/src/main/compose/ozonesecure-carotation/test.sh b/hadoop-ozone/dist/src/main/compose/ozonesecure-carotation/test.sh new file mode 100755 index 000000000000..1cdc7a8d3828 --- /dev/null +++ b/hadoop-ozone/dist/src/main/compose/ozonesecure-carotation/test.sh @@ -0,0 +1,79 @@ +#!/usr/bin/env bash +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +#suite:HA-secure + +COMPOSE_DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" >/dev/null 2>&1 && pwd )" +export COMPOSE_DIR + +export SECURITY_ENABLED=true +export OM_SERVICE_ID="omservice" +export SCM=scm1.org + +: ${OZONE_BUCKET_KEY_NAME:=key1} + +# shellcheck source=/dev/null +source "$COMPOSE_DIR/../testlib.sh" + +start_docker_env + +execute_command_in_container kms hadoop key create ${OZONE_BUCKET_KEY_NAME} + +execute_robot_test s3g kinit.robot + +# verify root CA rotation monitor task is active on leader +wait_for_execute_command scm1.org 30 "jps | grep StorageContainerManagerStarter | awk -F' ' '{print $1}' | xargs -I {} jstack {} | grep 'RootCARotationManager-MonitorTask-Active'" + +# wait and verify root CA is rotated +wait_for_execute_command scm1.org 90 "ozone admin cert info 2" + +# verify scm operations +execute_robot_test s3g admincli/pipeline.robot + +# transfer leader to another SCM +execute_robot_test s3g scmha/scm-leader-transfer.robot +wait_for_execute_command scm1.org 30 "jps | grep StorageContainerManagerStarter | awk -F' ' '{print $1}' | xargs -I {} jstack {} | grep 'RootCARotationManager-MonitorTask-Inactive'" + +# wait for second root CA rotation +wait_for_execute_command scm1.org 90 "ozone admin cert info 3" + +# verify om operations +wait_for_execute_command scm1.org 10 "ozone sh volume create rotation-vol" +wait_for_execute_command scm1.org 10 "ozone sh bucket create rotation-vol/rotation-bucket" + +# verify data read write +wait_for_execute_command scm1.org 10 "ozone sh key put /opt/hadoop/README.md /rotation-vol/rotation-bucket/README.md" +wait_for_execute_command scm1.org 10 "ozone sh key get /opt/hadoop/README.md.1 /rotation-vol/rotation-bucket/README.md" + +# bootstrap new SCM4 and verify certificate +docker-compose up -d scm4.org +wait_for_port scm4.org 9894 120 +execute_robot_test scm4.org kinit.robot +wait_for_execute_command scm4.org 120 "ozone admin scm roles | grep scm4.org" +wait_for_execute_command scm1.org 30 "ozone admin cert list --role=scm | grep scm4.org" + +# add new datanode4 and verify certificate +docker-compose up -d datanode4 +wait_for_port datanode4 9856 60 +wait_for_execute_command scm4.org 60 "ozone admin datanode list | grep datanode4" + +# check the metrics +execute_robot_test scm1.org scmha/root-ca-rotation.robot + +stop_docker_env + +generate_report diff --git a/hadoop-ozone/dist/src/main/compose/ozonesecure/certificate-rotation.yaml b/hadoop-ozone/dist/src/main/compose/ozonesecure/certificate-rotation.yaml index 4262e63620d4..85586a184445 100644 --- a/hadoop-ozone/dist/src/main/compose/ozonesecure/certificate-rotation.yaml +++ b/hadoop-ozone/dist/src/main/compose/ozonesecure/certificate-rotation.yaml @@ -23,6 +23,7 @@ x-cert-rotation-config: - OZONE-SITE.XML_hdds.x509.renew.grace.duration=PT30s - OZONE-SITE.XML_hdds.x509.ca.rotation.check.interval=PT1S - OZONE-SITE.XML_hdds.x509.grace.duration.token.checks.enabled=false + - OZONE-SITE.XML_hdds.x509.ca.rotation.ack.timeout=PT20S services: datanode: <<: *cert-rotation-config diff --git a/hadoop-ozone/dist/src/main/compose/testlib.sh b/hadoop-ozone/dist/src/main/compose/testlib.sh index 9cc2c1d51c06..e3d1b05faba2 100755 --- a/hadoop-ozone/dist/src/main/compose/testlib.sh +++ b/hadoop-ozone/dist/src/main/compose/testlib.sh @@ -265,6 +265,7 @@ execute_command_in_container(){ ## @description Stop a list of named containers ## @param List of container names, eg datanode_1 datanode_2 stop_containers() { + set -e docker-compose --ansi never stop $@ } diff --git a/hadoop-ozone/dist/src/main/smoketest/scmha/root-ca-rotation.robot b/hadoop-ozone/dist/src/main/smoketest/scmha/root-ca-rotation.robot new file mode 100644 index 000000000000..50b6d64713f5 --- /dev/null +++ b/hadoop-ozone/dist/src/main/smoketest/scmha/root-ca-rotation.robot @@ -0,0 +1,35 @@ +# Licensed to the Apache Software Foundation (ASF) under one or more +# contributor license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright ownership. +# The ASF licenses this file to You under the Apache License, Version 2.0 +# (the "License"); you may not use this file except in compliance with +# the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +*** Settings *** +Documentation Smoketest ozone cluster startup +Library OperatingSystem +Library BuiltIn +Resource ../commonlib.robot +Test Timeout 5 minutes + +*** Variables *** + +*** Test Cases *** +Verify root CA rotation metrics + # example "NumSuccessRotation" : 5, + ${successRotationLine} = Execute curl -sS 'http://localhost:9876/jmx' | grep NumSuccessRotation + LOG ${successRotationLine} + ${temp_1} = Split String ${successRotationLine} : + ${temp_2} = Strip String ${temp_1[1]} + ${temp_3} = Split String ${temp_2} , + ${successRotation} = Strip String ${temp_3[0]} + ${successRotation} = Convert To Number ${successRotation} + Should be true ${successRotation} >= 1 diff --git a/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/TestSecureOzoneCluster.java b/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/TestSecureOzoneCluster.java index ca29b34f09e3..100f4400bc26 100644 --- a/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/TestSecureOzoneCluster.java +++ b/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/TestSecureOzoneCluster.java @@ -83,7 +83,6 @@ import org.apache.hadoop.ipc.Server; import org.apache.hadoop.metrics2.lib.DefaultMetricsSystem; import org.apache.hadoop.minikdc.MiniKdc; -import org.apache.hadoop.net.NetUtils; import org.apache.hadoop.ozone.client.OzoneClient; import org.apache.hadoop.ozone.client.OzoneClientFactory; import org.apache.hadoop.ozone.common.Storage; @@ -115,6 +114,7 @@ import org.apache.commons.lang3.StringUtils; import static org.apache.hadoop.fs.CommonConfigurationKeysPublic.HADOOP_SECURITY_AUTHENTICATION; import static org.apache.hadoop.hdds.HddsConfigKeys.HDDS_GRPC_TLS_ENABLED; +import static org.apache.hadoop.hdds.HddsConfigKeys.HDDS_X509_CA_ROTATION_ACK_TIMEOUT; import static org.apache.hadoop.hdds.HddsConfigKeys.HDDS_X509_CA_ROTATION_CHECK_INTERNAL; import static org.apache.hadoop.hdds.HddsConfigKeys.HDDS_X509_DEFAULT_DURATION; import static org.apache.hadoop.hdds.HddsConfigKeys.HDDS_X509_GRACE_DURATION_TOKEN_CHECKS_ENABLED; @@ -140,6 +140,7 @@ import static org.apache.hadoop.ozone.OzoneConfigKeys.OZONE_ADMINISTRATORS; import static org.apache.hadoop.ozone.OzoneConfigKeys.OZONE_CLIENT_FAILOVER_MAX_ATTEMPTS_KEY; import static org.apache.hadoop.ozone.OzoneConfigKeys.OZONE_SECURITY_ENABLED_KEY; +import static org.apache.hadoop.ozone.OzoneConsts.SCM_SUB_CA; import static org.apache.hadoop.ozone.om.OMConfigKeys.DELEGATION_TOKEN_MAX_LIFETIME_KEY; import static org.apache.hadoop.ozone.om.OMConfigKeys.OZONE_OM_HTTP_KERBEROS_KEYTAB_FILE; import static org.apache.hadoop.ozone.om.OMConfigKeys.OZONE_OM_HTTP_KERBEROS_PRINCIPAL_KEY; @@ -253,6 +254,10 @@ public void init() { conf.set(HDDS_X509_RENEW_GRACE_DURATION, Duration.ofMillis(certGraceTime).toString()); conf.setBoolean(HDDS_X509_GRACE_DURATION_TOKEN_CHECKS_ENABLED, false); + conf.set(HDDS_X509_CA_ROTATION_CHECK_INTERNAL, + Duration.ofMillis(certGraceTime - 1000).toString()); + conf.set(HDDS_X509_CA_ROTATION_ACK_TIMEOUT, + Duration.ofMillis(certGraceTime - 1000).toString()); conf.setLong(OMConfigKeys.DELEGATION_TOKEN_MAX_LIFETIME_KEY, delegationTokenMaxTime); @@ -452,8 +457,7 @@ private void initSCM() throws IOException { scmStore.setClusterId(clusterId); scmStore.setScmId(scmId); HASecurityUtils.initializeSecurity(scmStore, conf, - NetUtils.createSocketAddr(InetAddress.getLocalHost().getHostName(), - OZONE_SCM_CLIENT_PORT_DEFAULT), true); + InetAddress.getLocalHost().getHostName(), true); scmStore.setPrimaryScmNodeId(scmId); // writes the version file properties scmStore.initialize(); @@ -1329,12 +1333,11 @@ public void validateCertificate(X509Certificate cert) throws Exception { X500Name x500Issuer = new JcaX509CertificateHolder(cert).getIssuer(); RDN cn = x500Issuer.getRDNs(BCStyle.CN)[0]; String hostName = InetAddress.getLocalHost().getHostName(); - String scmUser = OzoneConsts.SCM_SUB_CA_PREFIX + hostName; - assertEquals(scmUser, cn.getFirst().getValue().toString()); // Subject name should be om login user in real world but in this test // UGI has scm user context. - assertEquals(scmUser, cn.getFirst().getValue().toString()); + assertTrue(cn.getFirst().getValue().toString().contains(SCM_SUB_CA)); + assertTrue(cn.getFirst().getValue().toString().contains(hostName)); LocalDate today = LocalDateTime.now().toLocalDate(); Date invalidDate; @@ -1349,7 +1352,8 @@ public void validateCertificate(X509Certificate cert) throws Exception { assertTrue(cert.getSubjectDN().toString().contains(scmId)); assertTrue(cert.getSubjectDN().toString().contains(clusterId)); - assertTrue(cert.getIssuerDN().toString().contains(scmUser)); + assertTrue(cn.getFirst().getValue().toString().contains(SCM_SUB_CA)); + assertTrue(cn.getFirst().getValue().toString().contains(hostName)); assertTrue(cert.getIssuerDN().toString().contains(scmId)); assertTrue(cert.getIssuerDN().toString().contains(clusterId)); diff --git a/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/container/ozoneimpl/TestOzoneContainerWithTLS.java b/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/container/ozoneimpl/TestOzoneContainerWithTLS.java index c94e7eb10d07..21fc91f556d5 100644 --- a/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/container/ozoneimpl/TestOzoneContainerWithTLS.java +++ b/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/container/ozoneimpl/TestOzoneContainerWithTLS.java @@ -74,6 +74,7 @@ import static org.apache.hadoop.hdds.HddsConfigKeys.HDDS_KEY_DIR_NAME; import static org.apache.hadoop.hdds.HddsConfigKeys.HDDS_KEY_DIR_NAME_DEFAULT; import static org.apache.hadoop.hdds.HddsConfigKeys.HDDS_KEY_LEN; +import static org.apache.hadoop.hdds.HddsConfigKeys.HDDS_X509_CA_ROTATION_ACK_TIMEOUT; import static org.apache.hadoop.hdds.HddsConfigKeys.HDDS_X509_CA_ROTATION_CHECK_INTERNAL; import static org.apache.hadoop.hdds.HddsConfigKeys.HDDS_X509_DEFAULT_DURATION; import static org.apache.hadoop.hdds.HddsConfigKeys.HDDS_X509_GRACE_DURATION_TOKEN_CHECKS_ENABLED; @@ -144,6 +145,7 @@ public void setup() throws Exception { Duration.ofMillis(certLifetime).toString()); conf.set(HDDS_X509_RENEW_GRACE_DURATION, "PT2S"); conf.set(HDDS_X509_CA_ROTATION_CHECK_INTERNAL, "PT1S"); // 1s + conf.set(HDDS_X509_CA_ROTATION_ACK_TIMEOUT, "PT1S"); // 1s long expiryTime = conf.getTimeDuration( HddsConfigKeys.HDDS_BLOCK_TOKEN_EXPIRY_TIME, "1s", diff --git a/hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/security/OMCertificateClient.java b/hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/security/OMCertificateClient.java index 88312cacf403..1a94d16521af 100644 --- a/hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/security/OMCertificateClient.java +++ b/hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/security/OMCertificateClient.java @@ -122,7 +122,7 @@ public CertificateSignRequest.Builder getCSRBuilder() @Override public String signAndStoreCertificate(PKCS10CertificationRequest request, - Path certificatePath) throws CertificateException { + Path certificatePath, boolean renew) throws CertificateException { try { SCMGetCertResponseProto response = getScmSecureClient() .getOMCertChain(omInfo, getEncodedString(request)); @@ -135,14 +135,13 @@ public String signAndStoreCertificate(PKCS10CertificationRequest request, if (response.hasX509CACertificate()) { String pemEncodedRootCert = response.getX509CACertificate(); storeCertificate(pemEncodedRootCert, - CAType.SUBORDINATE, certCodec, false); - storeCertificate(pemEncodedCert, CAType.NONE, certCodec, - false); + CAType.SUBORDINATE, certCodec, false, !renew); + storeCertificate(pemEncodedCert, CAType.NONE, certCodec, false, !renew); // Store Root CA certificate if available. if (response.hasX509RootCACertificate()) { storeCertificate(response.getX509RootCACertificate(), - CAType.ROOT, certCodec, false); + CAType.ROOT, certCodec, false, !renew); } return CertificateCodec.getX509Certificate(pemEncodedCert) .getSerialNumber().toString(); diff --git a/hadoop-ozone/recon/src/main/java/org/apache/hadoop/ozone/recon/security/ReconCertificateClient.java b/hadoop-ozone/recon/src/main/java/org/apache/hadoop/ozone/recon/security/ReconCertificateClient.java index 2a78ddae23b3..5381a6159546 100644 --- a/hadoop-ozone/recon/src/main/java/org/apache/hadoop/ozone/recon/security/ReconCertificateClient.java +++ b/hadoop-ozone/recon/src/main/java/org/apache/hadoop/ozone/recon/security/ReconCertificateClient.java @@ -90,7 +90,7 @@ public CertificateSignRequest.Builder getCSRBuilder() @Override public String signAndStoreCertificate(PKCS10CertificationRequest csr, - Path certificatePath) throws CertificateException { + Path certificatePath, boolean renew) throws CertificateException { try { SCMSecurityProtocolProtos.SCMGetCertResponseProto response; HddsProtos.NodeDetailsProto.Builder reconDetailsProtoBuilder = @@ -108,17 +108,14 @@ public String signAndStoreCertificate(PKCS10CertificationRequest csr, String pemEncodedCert = response.getX509Certificate(); CertificateCodec certCodec = new CertificateCodec( getSecurityConfig(), certificatePath); - storeCertificate(pemEncodedCert, CAType.NONE, - certCodec, - false); + storeCertificate(pemEncodedCert, CAType.NONE, certCodec, false, !renew); storeCertificate(response.getX509CACertificate(), - CAType.SUBORDINATE, - certCodec, false); + CAType.SUBORDINATE, certCodec, false, !renew); // Store Root CA certificate. if (response.hasX509RootCACertificate()) { storeCertificate(response.getX509RootCACertificate(), - CAType.ROOT, certCodec, false); + CAType.ROOT, certCodec, false, !renew); } return getX509Certificate(pemEncodedCert).getSerialNumber().toString(); } else { From 8da1191c148d7021dd98e50aa0dcbab461a7b5e1 Mon Sep 17 00:00:00 2001 From: Sammi Chen Date: Wed, 21 Jun 2023 12:49:57 +0800 Subject: [PATCH 02/15] fix checkstyle and refactor robot test --- .../client/DefaultCertificateClient.java | 4 + .../client/SCMCertificateClient.java | 14 + .../scm/security/RootCARotationMetrics.java | 4 +- .../main/compose/ozonesecure-carotation/.env | 22 - .../docker-compose.yaml | 386 ------------------ .../ozonesecure-carotation/docker-config | 176 -------- .../ozonesecure-carotation/docker-config-scm4 | 18 - .../compose/ozonesecure-carotation/krb5.conf | 41 -- .../ozonesecure-ha/root-ca-rotation.yaml | 65 +++ .../test-root-ca-rotation.sh} | 5 +- .../compose/ozonesecure/root-ca-rotation.yaml | 49 +++ .../ozonesecure/test-root-ca-rotation.sh | 64 +++ .../smoketest/scmha/root-ca-rotation.robot | 2 +- 13 files changed, 202 insertions(+), 648 deletions(-) delete mode 100644 hadoop-ozone/dist/src/main/compose/ozonesecure-carotation/.env delete mode 100644 hadoop-ozone/dist/src/main/compose/ozonesecure-carotation/docker-compose.yaml delete mode 100644 hadoop-ozone/dist/src/main/compose/ozonesecure-carotation/docker-config delete mode 100644 hadoop-ozone/dist/src/main/compose/ozonesecure-carotation/docker-config-scm4 delete mode 100644 hadoop-ozone/dist/src/main/compose/ozonesecure-carotation/krb5.conf create mode 100644 hadoop-ozone/dist/src/main/compose/ozonesecure-ha/root-ca-rotation.yaml rename hadoop-ozone/dist/src/main/compose/{ozonesecure-carotation/test.sh => ozonesecure-ha/test-root-ca-rotation.sh} (95%) create mode 100644 hadoop-ozone/dist/src/main/compose/ozonesecure/root-ca-rotation.yaml create mode 100755 hadoop-ozone/dist/src/main/compose/ozonesecure/test-root-ca-rotation.sh diff --git a/hadoop-hdds/framework/src/main/java/org/apache/hadoop/hdds/security/x509/certificate/client/DefaultCertificateClient.java b/hadoop-hdds/framework/src/main/java/org/apache/hadoop/hdds/security/x509/certificate/client/DefaultCertificateClient.java index 2e4b599ab971..5ee5d4ca8b67 100644 --- a/hadoop-hdds/framework/src/main/java/org/apache/hadoop/hdds/security/x509/certificate/client/DefaultCertificateClient.java +++ b/hadoop-hdds/framework/src/main/java/org/apache/hadoop/hdds/security/x509/certificate/client/DefaultCertificateClient.java @@ -124,7 +124,11 @@ public abstract class DefaultCertificateClient implements CertificateClient { private ScheduledExecutorService executorService; private Consumer certIdSaveCallback; private Runnable shutdownCallback; +<<<<<<< HEAD private SCMSecurityProtocolClientSideTranslatorPB scmSecurityClient; +======= + private SCMSecurityProtocolClientSideTranslatorPB scmSecurityProtocolClient; +>>>>>>> 898ec9b4d (fix checkstyle and refactor robot test) private final Set notificationReceivers; private static Semaphore semaphore = new Semaphore(1); diff --git a/hadoop-hdds/framework/src/main/java/org/apache/hadoop/hdds/security/x509/certificate/client/SCMCertificateClient.java b/hadoop-hdds/framework/src/main/java/org/apache/hadoop/hdds/security/x509/certificate/client/SCMCertificateClient.java index ea2924d930bd..7c6a5a9073ae 100644 --- a/hadoop-hdds/framework/src/main/java/org/apache/hadoop/hdds/security/x509/certificate/client/SCMCertificateClient.java +++ b/hadoop-hdds/framework/src/main/java/org/apache/hadoop/hdds/security/x509/certificate/client/SCMCertificateClient.java @@ -61,6 +61,7 @@ public class SCMCertificateClient extends DefaultCertificateClient { private String scmId; private String cId; private String scmHostname; + private SCMSecurityProtocolClientSideTranslatorPB scmSecurityClient; public SCMCertificateClient(SecurityConfig securityConfig, SCMSecurityProtocolClientSideTranslatorPB scmClient, @@ -180,6 +181,19 @@ public Logger getLogger() { } @Override +<<<<<<< HEAD +======= + public SCMSecurityProtocolClientSideTranslatorPB getScmSecureClient() + throws IOException { + if (scmSecurityClient == null) { + scmSecurityClient = + HddsServerUtil.getScmSecurityClientWithFixedDuration(getConfig()); + } + return scmSecurityClient; + } + + @Override +>>>>>>> 898ec9b4d (fix checkstyle and refactor robot test) public String signAndStoreCertificate(PKCS10CertificationRequest request, Path certPath, boolean renew) throws CertificateException { try { diff --git a/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/security/RootCARotationMetrics.java b/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/security/RootCARotationMetrics.java index 659adc616857..fcd52d0ebd76 100644 --- a/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/security/RootCARotationMetrics.java +++ b/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/security/RootCARotationMetrics.java @@ -59,8 +59,8 @@ public static RootCARotationMetrics create() { } public void unRegister() { - MetricsSystem ms = DefaultMetricsSystem.instance(); - ms.unregisterSource(NAME); + MetricsSystem metricsSystem = DefaultMetricsSystem.instance(); + metricsSystem.unregisterSource(NAME); } private RootCARotationMetrics(MetricsSystem ms) { diff --git a/hadoop-ozone/dist/src/main/compose/ozonesecure-carotation/.env b/hadoop-ozone/dist/src/main/compose/ozonesecure-carotation/.env deleted file mode 100644 index 1ddd45333e1a..000000000000 --- a/hadoop-ozone/dist/src/main/compose/ozonesecure-carotation/.env +++ /dev/null @@ -1,22 +0,0 @@ -# Licensed to the Apache Software Foundation (ASF) under one -# or more contributor license agreements. See the NOTICE file -# distributed with this work for additional information -# regarding copyright ownership. The ASF licenses this file -# to you under the Apache License, Version 2.0 (the -# "License"); you may not use this file except in compliance -# with the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -HDDS_VERSION=${hdds.version} -HADOOP_VERSION=3 -OZONE_RUNNER_VERSION=${docker.ozone-runner.version} -OZONE_RUNNER_IMAGE=apache/ozone-runner -OZONE_TESTKRB5_IMAGE=${docker.ozone-testkr5b.image} -OZONE_OPTS= diff --git a/hadoop-ozone/dist/src/main/compose/ozonesecure-carotation/docker-compose.yaml b/hadoop-ozone/dist/src/main/compose/ozonesecure-carotation/docker-compose.yaml deleted file mode 100644 index a13c0b2f50cf..000000000000 --- a/hadoop-ozone/dist/src/main/compose/ozonesecure-carotation/docker-compose.yaml +++ /dev/null @@ -1,386 +0,0 @@ -# Licensed to the Apache Software Foundation (ASF) under one -# or more contributor license agreements. See the NOTICE file -# distributed with this work for additional information -# regarding copyright ownership. The ASF licenses this file -# to you under the Apache License, Version 2.0 (the -# "License"); you may not use this file except in compliance -# with the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -version: "3" -services: - kdc: - image: ${OZONE_TESTKRB5_IMAGE} - hostname: kdc - volumes: - - ../..:/opt/hadoop - - ../_keytabs:/etc/security/keytabs - command: ["krb5kdc","-n"] - networks: - ozone_net: - ipv4_address: 172.25.0.100 - kms: - image: apache/hadoop:${HADOOP_VERSION} - ports: - - 9600:9600 - env_file: - - ./docker-config - volumes: - - ../_keytabs:/etc/security/keytabs - - ./krb5.conf:/etc/krb5.conf - - ../../libexec/transformation.py:/opt/transformation.py - environment: - HADOOP_CONF_DIR: /opt/hadoop/etc/hadoop - command: ["hadoop", "kms"] - networks: - ozone_net: - ipv4_address: 172.25.0.101 - datanode1: - image: ${OZONE_RUNNER_IMAGE}:${OZONE_RUNNER_VERSION} - volumes: - - ../..:/opt/hadoop - - ../_keytabs:/etc/security/keytabs - - ./krb5.conf:/etc/krb5.conf - ports: - - 9864:9999 - command: ["/opt/hadoop/bin/ozone","datanode"] - extra_hosts: - - "scm1.org: 172.25.0.116" - - "scm2.org: 172.25.0.117" - - "scm3.org: 172.25.0.118" - - "recon: 172.25.0.115" - env_file: - - docker-config - environment: - WAITFOR: scm3.org:9894 - OZONE_OPTS: - networks: - ozone_net: - ipv4_address: 172.25.0.102 - datanode2: - image: ${OZONE_RUNNER_IMAGE}:${OZONE_RUNNER_VERSION} - volumes: - - ../..:/opt/hadoop - - ../_keytabs:/etc/security/keytabs - - ./krb5.conf:/etc/krb5.conf - ports: - - 9866:9999 - command: ["/opt/hadoop/bin/ozone","datanode"] - extra_hosts: - - "scm1.org: 172.25.0.116" - - "scm2.org: 172.25.0.117" - - "scm3.org: 172.25.0.118" - - "recon: 172.25.0.115" - env_file: - - docker-config - environment: - WAITFOR: scm3.org:9894 - OZONE_OPTS: - networks: - ozone_net: - ipv4_address: 172.25.0.103 - datanode3: - image: ${OZONE_RUNNER_IMAGE}:${OZONE_RUNNER_VERSION} - volumes: - - ../..:/opt/hadoop - - ../_keytabs:/etc/security/keytabs - - ./krb5.conf:/etc/krb5.conf - ports: - - 9868:9999 - command: ["/opt/hadoop/bin/ozone","datanode"] - extra_hosts: - - "scm1.org: 172.25.0.116" - - "scm2.org: 172.25.0.117" - - "scm3.org: 172.25.0.118" - - "recon: 172.25.0.115" - env_file: - - docker-config - environment: - WAITFOR: scm3.org:9894 - OZONE_OPTS: - networks: - ozone_net: - ipv4_address: 172.25.0.104 - om1: - image: ${OZONE_RUNNER_IMAGE}:${OZONE_RUNNER_VERSION} - hostname: om1 - volumes: - - ../..:/opt/hadoop - - ../_keytabs:/etc/security/keytabs - - ./krb5.conf:/etc/krb5.conf - ports: - - 9880:9874 - - 9890:9872 - #- 18001:18001 - environment: - WAITFOR: scm3.org:9894 - ENSURE_OM_INITIALIZED: /data/metadata/om/current/VERSION - OZONE_OPTS: - env_file: - - ./docker-config - command: ["/opt/hadoop/bin/ozone","om"] - extra_hosts: - - "scm1.org: 172.25.0.116" - - "scm2.org: 172.25.0.117" - - "scm3.org: 172.25.0.118" - networks: - ozone_net: - ipv4_address: 172.25.0.111 - om2: - image: ${OZONE_RUNNER_IMAGE}:${OZONE_RUNNER_VERSION} - hostname: om2 - volumes: - - ../..:/opt/hadoop - - ../_keytabs:/etc/security/keytabs - - ./krb5.conf:/etc/krb5.conf - ports: - - 9882:9874 - - 9892:9872 - #- 18002:18002 - environment: - WAITFOR: scm3.org:9894 - ENSURE_OM_INITIALIZED: /data/metadata/om/current/VERSION - OZONE_OPTS: - env_file: - - ./docker-config - command: ["/opt/hadoop/bin/ozone","om"] - extra_hosts: - - "scm1.org: 172.25.0.116" - - "scm2.org: 172.25.0.117" - - "scm3.org: 172.25.0.118" - networks: - ozone_net: - ipv4_address: 172.25.0.112 - om3: - image: ${OZONE_RUNNER_IMAGE}:${OZONE_RUNNER_VERSION} - hostname: om3 - volumes: - - ../..:/opt/hadoop - - ../_keytabs:/etc/security/keytabs - - ./krb5.conf:/etc/krb5.conf - ports: - - 9884:9874 - - 9894:9872 - #- 18003:18003 - environment: - WAITFOR: scm3.org:9894 - ENSURE_OM_INITIALIZED: /data/metadata/om/current/VERSION - OZONE_OPTS: - env_file: - - ./docker-config - command: ["/opt/hadoop/bin/ozone","om"] - extra_hosts: - - "scm1.org: 172.25.0.116" - - "scm2.org: 172.25.0.117" - - "scm3.org: 172.25.0.118" - networks: - ozone_net: - ipv4_address: 172.25.0.113 - httpfs: - image: apache/ozone-runner:${OZONE_RUNNER_VERSION} - hostname: httpfs - volumes: - - ../..:/opt/hadoop - - ../_keytabs:/etc/security/keytabs - - ./krb5.conf:/etc/krb5.conf - ports: - - 14000:14000 - env_file: - - ./docker-config - command: [ "/opt/hadoop/bin/ozone","httpfs" ] - environment: - OZONE-SITE.XML_hdds.scm.safemode.min.datanode: ${OZONE_SAFEMODE_MIN_DATANODES:-1} - OZONE_OPTS: - networks: - ozone_net: - ipv4_address: 172.25.0.119 - s3g: - image: ${OZONE_RUNNER_IMAGE}:${OZONE_RUNNER_VERSION} - hostname: s3g - volumes: - - ../..:/opt/hadoop - - ../_keytabs:/etc/security/keytabs - - ./krb5.conf:/etc/krb5.conf - ports: - - 9878:9878 - env_file: - - ./docker-config - command: ["/opt/hadoop/bin/ozone","s3g"] - environment: - OZONE_OPTS: - networks: - ozone_net: - ipv4_address: 172.25.0.114 - scm1.org: - image: ${OZONE_RUNNER_IMAGE}:${OZONE_RUNNER_VERSION} - hostname: scm1.org - volumes: - - ../..:/opt/hadoop - - ../_keytabs:/etc/security/keytabs - - ./krb5.conf:/etc/krb5.conf - ports: - - 9990:9876 - - 9992:9860 - env_file: - - docker-config - environment: - ENSURE_SCM_INITIALIZED: /data/metadata/scm/current/VERSION - OZONE-SITE.XML_hdds.scm.safemode.min.datanode: "${OZONE_SAFEMODE_MIN_DATANODES:-3}" - OZONE_OPTS: - command: ["/opt/hadoop/bin/ozone","scm"] - extra_hosts: - - "om1: 172.25.0.111" - - "om2: 172.25.0.112" - - "om3: 172.25.0.113" - - "scm1.org: 172.25.0.116" - - "scm2.org: 172.25.0.117" - - "scm3.org: 172.25.0.118" - networks: - ozone_net: - ipv4_address: 172.25.0.116 - scm2.org: - image: ${OZONE_RUNNER_IMAGE}:${OZONE_RUNNER_VERSION} - hostname: scm2.org - volumes: - - ../..:/opt/hadoop - - ../_keytabs:/etc/security/keytabs - - ./krb5.conf:/etc/krb5.conf - ports: - - 9994:9876 - - 9996:9860 - env_file: - - docker-config - environment: - WAITFOR: scm1.org:9894 - ENSURE_SCM_BOOTSTRAPPED: /data/metadata/scm/current/VERSION - OZONE-SITE.XML_hdds.scm.safemode.min.datanode: "${OZONE_SAFEMODE_MIN_DATANODES:-3}" - OZONE_OPTS: - command: ["/opt/hadoop/bin/ozone","scm"] - extra_hosts: - - "om1: 172.25.0.111" - - "om2: 172.25.0.112" - - "om3: 172.25.0.113" - - "scm1.org: 172.25.0.116" - - "scm2.org: 172.25.0.117" - - "scm3.org: 172.25.0.118" - networks: - ozone_net: - ipv4_address: 172.25.0.117 - scm3.org: - image: ${OZONE_RUNNER_IMAGE}:${OZONE_RUNNER_VERSION} - hostname: scm3.org - volumes: - - ../..:/opt/hadoop - - ../_keytabs:/etc/security/keytabs - - ./krb5.conf:/etc/krb5.conf - ports: - - 9998:9876 - - 10002:9860 - env_file: - - docker-config - environment: - WAITFOR: scm2.org:9894 - ENSURE_SCM_BOOTSTRAPPED: /data/metadata/scm/current/VERSION - OZONE-SITE.XML_hdds.scm.safemode.min.datanode: "${OZONE_SAFEMODE_MIN_DATANODES:-3}" - OZONE_OPTS: - command: ["/opt/hadoop/bin/ozone","scm"] - extra_hosts: - - "om1: 172.25.0.111" - - "om2: 172.25.0.112" - - "om3: 172.25.0.113" - - "scm1.org: 172.25.0.116" - - "scm2.org: 172.25.0.117" - - "scm3.org: 172.25.0.118" - networks: - ozone_net: - ipv4_address: 172.25.0.118 - scm4.org: - image: ${OZONE_RUNNER_IMAGE}:${OZONE_RUNNER_VERSION} - hostname: scm4.org - volumes: - - ../..:/opt/hadoop - - ../_keytabs:/etc/security/keytabs - - ./krb5.conf:/etc/krb5.conf - ports: - - 10004:9876 - - 10006:9860 - env_file: - - docker-config - - docker-config-scm4 - environment: - ENSURE_SCM_BOOTSTRAPPED: /data/metadata/scm/current/VERSION - OZONE_OPTS: - command: [ "/opt/hadoop/bin/ozone","scm" ] - extra_hosts: - - "om1: 172.25.0.111" - - "om2: 172.25.0.112" - - "om3: 172.25.0.113" - - "scm1.org: 172.25.0.116" - - "scm2.org: 172.25.0.117" - - "scm3.org: 172.25.0.118" - networks: - ozone_net: - ipv4_address: 172.25.0.120 - profiles: ["scm4.org"] - datanode4: - image: ${OZONE_RUNNER_IMAGE}:${OZONE_RUNNER_VERSION} - volumes: - - ../..:/opt/hadoop - - ../_keytabs:/etc/security/keytabs - - ./krb5.conf:/etc/krb5.conf - ports: - - 10008:9999 - command: [ "/opt/hadoop/bin/ozone","datanode" ] - extra_hosts: - - "scm1.org: 172.25.0.116" - - "scm2.org: 172.25.0.117" - - "scm3.org: 172.25.0.118" - - "scm4.org: 172.25.0.120" - - "recon: 172.25.0.115" - env_file: - - docker-config - - docker-config-scm4 - environment: - WAITFOR: scm4.org:9894 - OZONE_OPTS: - networks: - ozone_net: - ipv4_address: 172.25.0.121 - profiles: [ "datanode4" ] - recon: - image: ${OZONE_RUNNER_IMAGE}:${OZONE_RUNNER_VERSION} - hostname: recon - volumes: - - ../..:/opt/hadoop - - ../_keytabs:/etc/security/keytabs - - ./krb5.conf:/etc/krb5.conf - ports: - - 9888:9888 - env_file: - - ./docker-config - environment: - OZONE_OPTS: - command: ["/opt/hadoop/bin/ozone","recon"] - extra_hosts: - - "om1: 172.25.0.111" - - "om2: 172.25.0.112" - - "om3: 172.25.0.113" - - "scm1.org: 172.25.0.116" - - "scm2.org: 172.25.0.117" - - "scm3.org: 172.25.0.118" - networks: - ozone_net: - ipv4_address: 172.25.0.115 -networks: - ozone_net: - ipam: - driver: default - config: - - subnet: "172.25.0.0/24" diff --git a/hadoop-ozone/dist/src/main/compose/ozonesecure-carotation/docker-config b/hadoop-ozone/dist/src/main/compose/ozonesecure-carotation/docker-config deleted file mode 100644 index 0e0cafa886c2..000000000000 --- a/hadoop-ozone/dist/src/main/compose/ozonesecure-carotation/docker-config +++ /dev/null @@ -1,176 +0,0 @@ -# Licensed to the Apache Software Foundation (ASF) under one -# or more contributor license agreements. See the NOTICE file -# distributed with this work for additional information -# regarding copyright ownership. The ASF licenses this file -# to you under the Apache License, Version 2.0 (the -# "License"); you may not use this file except in compliance -# with the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -# For HttpFS service it is required to enable proxying users. -CORE-SITE.XML_hadoop.proxyuser.httpfs.hosts=* -CORE-SITE.XML_hadoop.proxyuser.httpfs.groups=* - -CORE-SITE.XML_fs.defaultFS=ofs://omservice -CORE-SITE.XML_fs.trash.interval=1 - -OZONE-SITE.XML_ozone.om.service.ids=omservice -OZONE-SITE.XML_ozone.om.internal.service.id=omservice -OZONE-SITE.XML_ozone.om.nodes.omservice=om1,om2,om3 -OZONE-SITE.XML_ozone.om.address.omservice.om1=om1 -OZONE-SITE.XML_ozone.om.address.omservice.om2=om2 -OZONE-SITE.XML_ozone.om.address.omservice.om3=om3 -OZONE-SITE.XML_ozone.om.http-address.omservice.om1=om1 -OZONE-SITE.XML_ozone.om.http-address.omservice.om2=om2 -OZONE-SITE.XML_ozone.om.http-address.omservice.om3=om3 -OZONE-SITE.XML_ozone.om.ratis.enable=true - -OZONE-SITE.XML_ozone.scm.service.ids=scmservice -OZONE-SITE.XML_ozone.scm.nodes.scmservice=scm1,scm2,scm3 -OZONE-SITE.XML_ozone.scm.address.scmservice.scm1=scm1.org -OZONE-SITE.XML_ozone.scm.address.scmservice.scm2=scm2.org -OZONE-SITE.XML_ozone.scm.address.scmservice.scm3=scm3.org -OZONE-SITE.XML_ozone.scm.ratis.enable=true -OZONE-SITE.XML_ozone.scm.close.container.wait.duration=5s - -OZONE-SITE.XML_ozone.om.volume.listall.allowed=false - -OZONE-SITE.XML_ozone.scm.container.size=1GB -OZONE-SITE.XML_ozone.scm.datanode.ratis.volume.free-space.min=10MB -OZONE-SITE.XML_ozone.scm.pipeline.creation.interval=30s -OZONE-SITE.XML_ozone.scm.pipeline.owner.container.count=1 -OZONE-SITE.XML_ozone.scm.datanode.id.dir=/data -OZONE-SITE.XML_ozone.scm.block.client.address=scm -OZONE-SITE.XML_ozone.metadata.dirs=/data/metadata -OZONE-SITE.XML_ozone.handler.type=distributed -OZONE-SITE.XML_ozone.scm.client.address=scm -OZONE-SITE.XML_hdds.block.token.enabled=true -OZONE-SITE.XML_hdds.container.token.enabled=true -OZONE-SITE.XML_hdds.grpc.tls.enabled=true -OZONE-SITE.XML_ozone.replication=3 -OZONE-SITE.XML_hdds.container.report.interval=60s -OZONE-SITE.XML_dfs.container.ratis.datastream.enabled=true - -OZONE-SITE.XML_ozone.recon.om.snapshot.task.interval.delay=1m -OZONE-SITE.XML_ozone.recon.db.dir=/data/metadata/recon -OZONE-SITE.XML_ozone.recon.om.snapshot.task.initial.delay=20s -OZONE-SITE.XML_ozone.recon.address=recon:9891 - -OZONE-SITE.XML_ozone.security.enabled=true -OZONE-SITE.XML_ozone.acl.enabled=true -OZONE-SITE.XML_ozone.acl.authorizer.class=org.apache.hadoop.ozone.security.acl.OzoneNativeAuthorizer -OZONE-SITE.XML_ozone.administrators="testuser,recon,om" - -OZONE-SITE.XML_hdds.datanode.dir=/data/hdds -HDFS-SITE.XML_dfs.datanode.address=0.0.0.0:1019 -HDFS-SITE.XML_dfs.datanode.http.address=0.0.0.0:1012 -CORE-SITE.XML_dfs.data.transfer.protection=authentication -CORE-SITE.XML_hadoop.security.authentication=kerberos -CORE-SITE.XML_hadoop.security.auth_to_local="DEFAULT" -CORE-SITE.XML_hadoop.security.key.provider.path=kms://http@kms:9600/kms - - -OZONE-SITE.XML_hdds.scm.kerberos.principal=scm/scm@EXAMPLE.COM -OZONE-SITE.XML_hdds.scm.kerberos.keytab.file=/etc/security/keytabs/scm.keytab -OZONE-SITE.XML_ozone.om.kerberos.principal=om/om@EXAMPLE.COM -OZONE-SITE.XML_ozone.om.kerberos.keytab.file=/etc/security/keytabs/om.keytab -OZONE-SITE.XML_ozone.recon.kerberos.keytab.file=/etc/security/keytabs/recon.keytab -OZONE-SITE.XML_ozone.recon.kerberos.principal=recon/recon@EXAMPLE.COM - -OZONE-SITE.XML_ozone.s3g.kerberos.keytab.file=/etc/security/keytabs/s3g.keytab -OZONE-SITE.XML_ozone.s3g.kerberos.principal=s3g/s3g@EXAMPLE.COM - -OZONE-SITE.XML_ozone.httpfs.kerberos.keytab.file=/etc/security/keytabs/httpfs.keytab -OZONE-SITE.XML_ozone.httpfs.kerberos.principal=httpfs/httpfs@EXAMPLE.COM - -HDFS-SITE.XML_dfs.datanode.kerberos.principal=dn/dn@EXAMPLE.COM -HDFS-SITE.XML_dfs.datanode.kerberos.keytab.file=/etc/security/keytabs/dn.keytab -HDFS-SITE.XML_dfs.web.authentication.kerberos.principal=HTTP/ozone@EXAMPLE.COM -HDFS-SITE.XML_dfs.web.authentication.kerberos.keytab=/etc/security/keytabs/HTTP.keytab - - -OZONE-SITE.XML_ozone.security.http.kerberos.enabled=true -OZONE-SITE.XML_ozone.http.filter.initializers=org.apache.hadoop.security.HttpCrossOriginFilterInitializer - -OZONE-SITE.XML_ozone.om.http.auth.type=kerberos -OZONE-SITE.XML_hdds.scm.http.auth.type=kerberos -OZONE-SITE.XML_hdds.datanode.http.auth.type=kerberos -OZONE-SITE.XML_ozone.s3g.http.auth.type=kerberos -OZONE-SITE.XML_ozone.httpfs.http.auth.type=kerberos -OZONE-SITE.XML_ozone.recon.http.auth.type=kerberos - -OZONE-SITE.XML_hdds.scm.http.auth.kerberos.principal=HTTP/scm@EXAMPLE.COM -OZONE-SITE.XML_hdds.scm.http.auth.kerberos.keytab=/etc/security/keytabs/HTTP.keytab -OZONE-SITE.XML_ozone.om.http.auth.kerberos.principal=HTTP/om@EXAMPLE.COM -OZONE-SITE.XML_ozone.om.http.auth.kerberos.keytab=/etc/security/keytabs/HTTP.keytab -OZONE-SITE.XML_hdds.datanode.http.auth.kerberos.principal=HTTP/db@EXAMPLE.COM -OZONE-SITE.XML_hdds.datanode.http.auth.kerberos.keytab=/etc/security/keytabs/HTTP.keytab -OZONE-SITE.XML_ozone.s3g.http.auth.kerberos.keytab=/etc/security/keytabs/HTTP.keytab -OZONE-SITE.XML_ozone.s3g.http.auth.kerberos.principal=HTTP/s3g@EXAMPLE.COM -OZONE-SITE.XML_ozone.httpfs.http.auth.kerberos.keytab=/etc/security/keytabs/httpfs.keytab -OZONE-SITE.XML_ozone.httpfs.http.auth.kerberos.principal=HTTP/httpfs@EXAMPLE.COM -OZONE-SITE.XML_ozone.recon.http.auth.kerberos.principal=HTTP/recon@EXAMPLE.COM -OZONE-SITE.XML_ozone.recon.http.auth.kerberos.keytab=/etc/security/keytabs/recon.keytab -OZONE-SITE.XML_ozone.recon.http.auth.kerberos.keytab=/etc/security/keytabs/recon.keytab - -OZONE-SITE.XML_hdds.x509.max.duration=PT180S -OZONE-SITE.XML_hdds.x509.default.duration=PT60S -OZONE-SITE.XML_hdds.x509.renew.grace.duration=PT45S -OZONE-SITE.XML_hdds.x509.ca.rotation.check.interval=PT1S -OZONE-SITE.XML_hdds.x509.ca.rotation.ack.timeout=PT20S -OZONE-SITE.XML_hdds.block.token.expiry.time=15s -OZONE-SITE.XML_ozone.manager.delegation.token.max-lifetime=15s -OZONE-SITE.XML_ozone.manager.delegation.token.renew-interval=15s -OZONE-SITE.XML_hdds.scmclient.max.retry.timeout=60s -OZONE-SITE.XML_hdds.scmclient.failover.retry.interval=1s -OZONE-SITE.XML_hdds.scmclient.failover.max.retry=60 -OZONE-SITE.XML_ozone.scm.info.wait.duration=60s -OZONE-SITE.XML_ozone.scm.ha.ratis.request.timeout=2s - -CORE-SITE.XML_hadoop.http.authentication.simple.anonymous.allowed=false -CORE-SITE.XML_hadoop.http.authentication.signature.secret.file=/etc/security/http_secret -CORE-SITE.XML_hadoop.http.authentication.type=kerberos -CORE-SITE.XML_hadoop.http.authentication.kerberos.principal=HTTP/ozone@EXAMPLE.COM -CORE-SITE.XML_hadoop.http.authentication.kerberos.keytab=/etc/security/keytabs/HTTP.keytab - -CORE-SITE.XML_hadoop.security.authorization=true -HADOOP-POLICY.XML_ozone.om.security.client.protocol.acl=* -HADOOP-POLICY.XML_hdds.security.client.datanode.container.protocol.acl=* -HADOOP-POLICY.XML_hdds.security.client.scm.container.protocol.acl=* -HADOOP-POLICY.XML_hdds.security.client.scm.block.protocol.acl=* -HADOOP-POLICY.XML_hdds.security.client.scm.certificate.protocol.acl=* - -HDFS-SITE.XML_rpc.metrics.quantile.enable=true -HDFS-SITE.XML_rpc.metrics.percentiles.intervals=60,300 - -HTTPFS-SITE.XML_hadoop.http.authentication.type=kerberos -HTTPFS-SITE.XML_hadoop.http.authentication.kerberos.keytab=/etc/security/keytabs/httpfs.keytab -HTTPFS-SITE.XML_hadoop.http.authentication.kerberos.principal=HTTP/httpfs@EXAMPLE.COM -HTTPFS-SITE.XML_httpfs.hadoop.authentication.type=kerberos -HTTPFS-SITE.XML_httpfs.hadoop.authentication.kerberos.keytab=/etc/security/keytabs/httpfs.keytab -HTTPFS-SITE.XML_httpfs.hadoop.authentication.kerberos.principal=httpfs/httpfs@EXAMPLE.COM -KMS-SITE.XML_hadoop.kms.proxyuser.s3g.users=* -KMS-SITE.XML_hadoop.kms.proxyuser.s3g.groups=* -KMS-SITE.XML_hadoop.kms.proxyuser.s3g.hosts=* - -#Enable this variable to print out all hadoop rpc traffic to the stdout. See http://byteman.jboss.org/ to define your own instrumentation. -#BYTEMAN_SCRIPT_URL=https://raw.githubusercontent.com/apache/hadoop/trunk/dev-support/byteman/hadooprpc.btm - -OZONE_DATANODE_SECURE_USER=root -JAVA_HOME=/usr/lib/jvm/jre -JSVC_HOME=/usr/bin - -OZONE_CONF_DIR=/etc/hadoop -OZONE_LOG_DIR=/var/log/hadoop - -no_proxy=om,scm,recon,s3g,kdc,localhost,127.0.0.1 - -# Explicitly enable filesystem snapshot feature for this Docker compose cluster -OZONE-SITE.XML_ozone.filesystem.snapshot.enabled=true diff --git a/hadoop-ozone/dist/src/main/compose/ozonesecure-carotation/docker-config-scm4 b/hadoop-ozone/dist/src/main/compose/ozonesecure-carotation/docker-config-scm4 deleted file mode 100644 index 39d7e8b583a3..000000000000 --- a/hadoop-ozone/dist/src/main/compose/ozonesecure-carotation/docker-config-scm4 +++ /dev/null @@ -1,18 +0,0 @@ -# Licensed to the Apache Software Foundation (ASF) under one -# or more contributor license agreements. See the NOTICE file -# distributed with this work for additional information -# regarding copyright ownership. The ASF licenses this file -# to you under the Apache License, Version 2.0 (the -# "License"); you may not use this file except in compliance -# with the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -OZONE-SITE.XML_ozone.scm.nodes.scmservice=scm1,scm2,scm3,scm4 -OZONE-SITE.XML_ozone.scm.address.scmservice.scm4=scm4.org diff --git a/hadoop-ozone/dist/src/main/compose/ozonesecure-carotation/krb5.conf b/hadoop-ozone/dist/src/main/compose/ozonesecure-carotation/krb5.conf deleted file mode 100644 index eefc5b9c6858..000000000000 --- a/hadoop-ozone/dist/src/main/compose/ozonesecure-carotation/krb5.conf +++ /dev/null @@ -1,41 +0,0 @@ -# Licensed to the Apache Software Foundation (ASF) under one -# or more contributor license agreements. See the NOTICE file -# distributed with this work for additional information -# regarding copyright ownership. The ASF licenses this file -# to you under the Apache License, Version 2.0 (the -# "License"); you may not use this file except in compliance -# with the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -[logging] -default = FILE:/var/log/krb5libs.log -kdc = FILE:/var/log/krb5kdc.log -admin_server = FILE:/var/log/kadmind.log - -[libdefaults] - dns_canonicalize_hostname = false - dns_lookup_realm = false - ticket_lifetime = 24h - renew_lifetime = 7d - forwardable = true - rdns = false - default_realm = EXAMPLE.COM - -[realms] - EXAMPLE.COM = { - kdc = kdc - admin_server = kdc - max_renewable_life = 7d - } - -[domain_realm] - .example.com = EXAMPLE.COM - example.com = EXAMPLE.COM - diff --git a/hadoop-ozone/dist/src/main/compose/ozonesecure-ha/root-ca-rotation.yaml b/hadoop-ozone/dist/src/main/compose/ozonesecure-ha/root-ca-rotation.yaml new file mode 100644 index 000000000000..4e88c49e3133 --- /dev/null +++ b/hadoop-ozone/dist/src/main/compose/ozonesecure-ha/root-ca-rotation.yaml @@ -0,0 +1,65 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +version: "3.4" + +x-root-cert-rotation-config: + &root-cert-rotation-config + environment: + - OZONE-SITE.XML_hdds.x509.grace.duration.token.checks.enabled=false + - OZONE-SITE.XML_hdds.x509.max.duration=PT180S + - OZONE-SITE.XML_hdds.x509.default.duration=PT60S + - OZONE-SITE.XML_hdds.x509.renew.grace.duration=PT45S + - OZONE-SITE.XML_hdds.x509.ca.rotation.check.interval=PT1S + - OZONE-SITE.XML_hdds.x509.ca.rotation.ack.timeout=PT20S + - OZONE-SITE.XML_hdds.block.token.expiry.time=15s + - OZONE-SITE.XML_ozone.manager.delegation.token.max-lifetime=15s + - OZONE-SITE.XML_ozone.manager.delegation.token.renew-interval=15s + - OZONE-SITE.XML_hdds.scmclient.max.retry.timeout=60s + - OZONE-SITE.XML_hdds.scmclient.failover.retry.interval=1s + - OZONE-SITE.XML_hdds.scmclient.failover.max.retry=60 + - OZONE-SITE.XML_ozone.scm.info.wait.duration=60s + - OZONE-SITE.XML_ozone.scm.ha.ratis.request.timeout=2s + - OZONE-SITE.XML_ozone.http.filter.initializers=org.apache.hadoop.security.HttpCrossOriginFilterInitializer +services: + datanode1: + <<: *root-cert-rotation-config + datanode2: + <<: *root-cert-rotation-config + datanode3: + <<: *root-cert-rotation-config + datanode4: + <<: *root-cert-rotation-config + om1: + <<: *root-cert-rotation-config + om2: + <<: *root-cert-rotation-config + om3: + <<: *root-cert-rotation-config + scm1.org: + <<: *root-cert-rotation-config + scm2.org: + <<: *root-cert-rotation-config + scm3.org: + <<: *root-cert-rotation-config + scm4.org: + <<: *root-cert-rotation-config + s3g: + <<: *root-cert-rotation-config + httpfs: + <<: *root-cert-rotation-config + recon: + <<: *root-cert-rotation-config diff --git a/hadoop-ozone/dist/src/main/compose/ozonesecure-carotation/test.sh b/hadoop-ozone/dist/src/main/compose/ozonesecure-ha/test-root-ca-rotation.sh similarity index 95% rename from hadoop-ozone/dist/src/main/compose/ozonesecure-carotation/test.sh rename to hadoop-ozone/dist/src/main/compose/ozonesecure-ha/test-root-ca-rotation.sh index 1cdc7a8d3828..1f7ca48c50ca 100755 --- a/hadoop-ozone/dist/src/main/compose/ozonesecure-carotation/test.sh +++ b/hadoop-ozone/dist/src/main/compose/ozonesecure-ha/test-root-ca-rotation.sh @@ -23,6 +23,7 @@ export COMPOSE_DIR export SECURITY_ENABLED=true export OM_SERVICE_ID="omservice" export SCM=scm1.org +export COMPOSE_FILE=docker-compose.yaml:root-ca-rotation.yaml : ${OZONE_BUCKET_KEY_NAME:=key1} @@ -36,7 +37,7 @@ execute_command_in_container kms hadoop key create ${OZONE_BUCKET_KEY_NAME} execute_robot_test s3g kinit.robot # verify root CA rotation monitor task is active on leader -wait_for_execute_command scm1.org 30 "jps | grep StorageContainerManagerStarter | awk -F' ' '{print $1}' | xargs -I {} jstack {} | grep 'RootCARotationManager-MonitorTask-Active'" +wait_for_execute_command scm1.org 30 "jps | grep StorageContainerManagerStarter | awk -F' ' '{print $1}' | xargs -I {} jstack {} | grep 'RootCARotationManager-Active'" # wait and verify root CA is rotated wait_for_execute_command scm1.org 90 "ozone admin cert info 2" @@ -46,7 +47,7 @@ execute_robot_test s3g admincli/pipeline.robot # transfer leader to another SCM execute_robot_test s3g scmha/scm-leader-transfer.robot -wait_for_execute_command scm1.org 30 "jps | grep StorageContainerManagerStarter | awk -F' ' '{print $1}' | xargs -I {} jstack {} | grep 'RootCARotationManager-MonitorTask-Inactive'" +wait_for_execute_command scm1.org 30 "jps | grep StorageContainerManagerStarter | awk -F' ' '{print $1}' | xargs -I {} jstack {} | grep 'RootCARotationManager-Inactive'" # wait for second root CA rotation wait_for_execute_command scm1.org 90 "ozone admin cert info 3" diff --git a/hadoop-ozone/dist/src/main/compose/ozonesecure/root-ca-rotation.yaml b/hadoop-ozone/dist/src/main/compose/ozonesecure/root-ca-rotation.yaml new file mode 100644 index 000000000000..8f7b944b0fb8 --- /dev/null +++ b/hadoop-ozone/dist/src/main/compose/ozonesecure/root-ca-rotation.yaml @@ -0,0 +1,49 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +version: "3.4" + +x-root-cert-rotation-config: + &root-cert-rotation-config + environment: + - OZONE-SITE.XML_hdds.x509.grace.duration.token.checks.enabled=false + - OZONE-SITE.XML_hdds.x509.max.duration=PT180S + - OZONE-SITE.XML_hdds.x509.default.duration=PT60S + - OZONE-SITE.XML_hdds.x509.renew.grace.duration=PT45S + - OZONE-SITE.XML_hdds.x509.ca.rotation.check.interval=PT1S + - OZONE-SITE.XML_hdds.x509.ca.rotation.ack.timeout=PT20S + - OZONE-SITE.XML_hdds.block.token.expiry.time=15s + - OZONE-SITE.XML_ozone.manager.delegation.token.max-lifetime=15s + - OZONE-SITE.XML_ozone.manager.delegation.token.renew-interval=15s + - OZONE-SITE.XML_hdds.scmclient.max.retry.timeout=60s + - OZONE-SITE.XML_hdds.scmclient.failover.retry.interval=1s + - OZONE-SITE.XML_hdds.scmclient.failover.max.retry=60 + - OZONE-SITE.XML_ozone.scm.info.wait.duration=60s + - OZONE-SITE.XML_ozone.scm.ha.ratis.request.timeout=2s + - OZONE-SITE.XML_ozone.http.filter.initializers=org.apache.hadoop.security.HttpCrossOriginFilterInitializer +services: + datanode: + <<: *root-cert-rotation-config + om: + <<: *root-cert-rotation-config + scm: + <<: *root-cert-rotation-config + s3g: + <<: *root-cert-rotation-config + httpfs: + <<: *root-cert-rotation-config + recon: + <<: *root-cert-rotation-config diff --git a/hadoop-ozone/dist/src/main/compose/ozonesecure/test-root-ca-rotation.sh b/hadoop-ozone/dist/src/main/compose/ozonesecure/test-root-ca-rotation.sh new file mode 100755 index 000000000000..e2f43670ba8e --- /dev/null +++ b/hadoop-ozone/dist/src/main/compose/ozonesecure/test-root-ca-rotation.sh @@ -0,0 +1,64 @@ +#!/usr/bin/env bash +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +#suite:HA-secure + +COMPOSE_DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" >/dev/null 2>&1 && pwd )" +export COMPOSE_DIR + +export SECURITY_ENABLED=true +export OM_SERVICE_ID="omservice" +export SCM=scm1.org +export COMPOSE_FILE=docker-compose.yaml:root-ca-rotation.yaml + +: ${OZONE_BUCKET_KEY_NAME:=key1} + +# shellcheck source=/dev/null +source "$COMPOSE_DIR/../testlib.sh" + +start_docker_env + +execute_command_in_container kms hadoop key create ${OZONE_BUCKET_KEY_NAME} + +execute_robot_test s3g kinit.robot + +# verify root CA rotation monitor task is active on leader +wait_for_execute_command scm 30 "jps | grep StorageContainerManagerStarter | awk -F' ' '{print $1}' | xargs -I {} jstack {} | grep 'RootCARotationManager-Active'" + +# wait and verify root CA is rotated +wait_for_execute_command scm 90 "ozone admin cert info 2" + +# verify scm operations +execute_robot_test s3g admincli/pipeline.robot + +# wait for second root CA rotation +wait_for_execute_command scm 90 "ozone admin cert info 3" + +# verify om operations +wait_for_execute_command scm 10 "ozone sh volume create rotation-vol" +wait_for_execute_command scm 10 "ozone sh bucket create rotation-vol/rotation-bucket" + +# verify data read write +wait_for_execute_command scm 10 "ozone sh key put /opt/hadoop/README.md /rotation-vol/rotation-bucket/README.md" +wait_for_execute_command scm 10 "ozone sh key get /opt/hadoop/README.md.1 /rotation-vol/rotation-bucket/README.md" + +# check the metrics +execute_robot_test scm scmha/root-ca-rotation.robot + +stop_docker_env + +generate_report diff --git a/hadoop-ozone/dist/src/main/smoketest/scmha/root-ca-rotation.robot b/hadoop-ozone/dist/src/main/smoketest/scmha/root-ca-rotation.robot index 50b6d64713f5..f625abaa1401 100644 --- a/hadoop-ozone/dist/src/main/smoketest/scmha/root-ca-rotation.robot +++ b/hadoop-ozone/dist/src/main/smoketest/scmha/root-ca-rotation.robot @@ -32,4 +32,4 @@ Verify root CA rotation metrics ${temp_3} = Split String ${temp_2} , ${successRotation} = Strip String ${temp_3[0]} ${successRotation} = Convert To Number ${successRotation} - Should be true ${successRotation} >= 1 + Should be true ${RootCARotationMetrics} >= 1 From 382f9fc1cc6b033aa67b827e2112aad12d2a97d5 Mon Sep 17 00:00:00 2001 From: Sammi Chen Date: Wed, 21 Jun 2023 18:03:31 +0800 Subject: [PATCH 03/15] fix findbugs --- .../ozone/TestHddsSecureDatanodeInit.java | 5 +++ .../hdds/protocol/SCMSecurityProtocol.java | 12 +++++++ ...ecurityProtocolClientSideTranslatorPB.java | 20 ++++++++++-- .../client/DefaultCertificateClient.java | 5 ++- .../client/SCMCertificateClient.java | 2 +- .../proto/ScmServerSecurityProtocol.proto | 1 + .../hadoop/hdds/scm/ha/HASecurityUtils.java | 2 +- ...ecurityProtocolServerSideTranslatorPB.java | 2 +- .../scm/security/RootCARotationManager.java | 4 +++ .../scm/server/SCMClientProtocolServer.java | 2 +- .../scm/server/SCMSecurityProtocolServer.java | 32 +++++++++++++++---- .../scm/server/StorageContainerManager.java | 7 ++++ .../server/TestSCMSecurityProtocolServer.java | 13 ++++++++ .../smoketest/scmha/root-ca-rotation.robot | 2 +- 14 files changed, 93 insertions(+), 16 deletions(-) diff --git a/hadoop-hdds/container-service/src/test/java/org/apache/hadoop/ozone/TestHddsSecureDatanodeInit.java b/hadoop-hdds/container-service/src/test/java/org/apache/hadoop/ozone/TestHddsSecureDatanodeInit.java index eea522438fab..f96f6e77d333 100644 --- a/hadoop-hdds/container-service/src/test/java/org/apache/hadoop/ozone/TestHddsSecureDatanodeInit.java +++ b/hadoop-hdds/container-service/src/test/java/org/apache/hadoop/ozone/TestHddsSecureDatanodeInit.java @@ -47,6 +47,7 @@ import org.apache.commons.io.FileUtils; +import static org.apache.hadoop.hdds.HddsConfigKeys.HDDS_X509_CA_ROTATION_ACK_TIMEOUT; import static org.apache.hadoop.hdds.HddsConfigKeys.HDDS_X509_CA_ROTATION_CHECK_INTERNAL; import static org.apache.hadoop.hdds.HddsConfigKeys.HDDS_X509_GRACE_DURATION_TOKEN_CHECKS_ENABLED; import static org.apache.hadoop.hdds.HddsConfigKeys.HDDS_X509_RENEW_GRACE_DURATION; @@ -104,8 +105,12 @@ public static void setUp() throws Exception { ServicePlugin.class); conf.set(HDDS_X509_RENEW_GRACE_DURATION, "PT5S"); // 5s conf.set(HDDS_X509_CA_ROTATION_CHECK_INTERNAL, "PT1S"); // 1s +<<<<<<< HEAD conf.setBoolean(HDDS_X509_GRACE_DURATION_TOKEN_CHECKS_ENABLED, false); +======= + conf.set(HDDS_X509_CA_ROTATION_ACK_TIMEOUT, "PT1S"); // 1s +>>>>>>> 2a37dcefd (fix findbugs) securityConfig = new SecurityConfig(conf); service = new HddsDatanodeService(args) { diff --git a/hadoop-hdds/framework/src/main/java/org/apache/hadoop/hdds/protocol/SCMSecurityProtocol.java b/hadoop-hdds/framework/src/main/java/org/apache/hadoop/hdds/protocol/SCMSecurityProtocol.java index ed4906e3b7ff..1b88cee107b3 100644 --- a/hadoop-hdds/framework/src/main/java/org/apache/hadoop/hdds/protocol/SCMSecurityProtocol.java +++ b/hadoop-hdds/framework/src/main/java/org/apache/hadoop/hdds/protocol/SCMSecurityProtocol.java @@ -77,6 +77,18 @@ String getOMCertificate(OzoneManagerDetailsProto omDetails, String getSCMCertificate(ScmNodeDetailsProto scmNodeDetails, String certSignReq) throws IOException; + /** + * Get signed certificate for SCM. + * + * @param scmNodeDetails - SCM Node Details. + * @param certSignReq - Certificate signing request. + * @param isRenew - if SCM is renewing certificate or not. + * @return String - pem encoded SCM signed + * certificate. + */ + String getSCMCertificate(ScmNodeDetailsProto scmNodeDetails, + String certSignReq, boolean isRenew) throws IOException; + /** * Get SCM signed certificate for given certificate serial id if it exists. * Throws exception if it's not found. diff --git a/hadoop-hdds/framework/src/main/java/org/apache/hadoop/hdds/protocolPB/SCMSecurityProtocolClientSideTranslatorPB.java b/hadoop-hdds/framework/src/main/java/org/apache/hadoop/hdds/protocolPB/SCMSecurityProtocolClientSideTranslatorPB.java index 73e7ede0790d..4aa32f04cfe0 100644 --- a/hadoop-hdds/framework/src/main/java/org/apache/hadoop/hdds/protocolPB/SCMSecurityProtocolClientSideTranslatorPB.java +++ b/hadoop-hdds/framework/src/main/java/org/apache/hadoop/hdds/protocolPB/SCMSecurityProtocolClientSideTranslatorPB.java @@ -194,9 +194,24 @@ public String getCertificate(NodeDetailsProto nodeDetails, @Override public String getSCMCertificate(ScmNodeDetailsProto scmNodeDetails, String certSignReq) throws IOException { - return getSCMCertChain(scmNodeDetails, certSignReq).getX509Certificate(); + return getSCMCertChain(scmNodeDetails, certSignReq, false) + .getX509Certificate(); } + /** + * Get signed certificate for SCM node. + * + * @param scmNodeDetails - SCM Node Details. + * @param certSignReq - Certificate signing request. + * @param renew - Whether SCM is trying to renew its certificate + * @return String - pem encoded SCM signed + * certificate. + */ + public String getSCMCertificate(ScmNodeDetailsProto scmNodeDetails, + String certSignReq, boolean renew) throws IOException { + return getSCMCertChain(scmNodeDetails, certSignReq, renew) + .getX509Certificate(); + } /** * Get signed certificate for SCM node and root CA certificate. @@ -207,12 +222,13 @@ public String getSCMCertificate(ScmNodeDetailsProto scmNodeDetails, * signed certificate and root CA certificate. */ public SCMGetCertResponseProto getSCMCertChain( - ScmNodeDetailsProto scmNodeDetails, String certSignReq) + ScmNodeDetailsProto scmNodeDetails, String certSignReq, boolean isRenew) throws IOException { SCMGetSCMCertRequestProto request = SCMGetSCMCertRequestProto.newBuilder() .setCSR(certSignReq) .setScmDetails(scmNodeDetails) + .setRenew(isRenew) .build(); return submitRequest(Type.GetSCMCertificate, builder -> builder.setGetSCMCertificateRequest(request)) diff --git a/hadoop-hdds/framework/src/main/java/org/apache/hadoop/hdds/security/x509/certificate/client/DefaultCertificateClient.java b/hadoop-hdds/framework/src/main/java/org/apache/hadoop/hdds/security/x509/certificate/client/DefaultCertificateClient.java index 5ee5d4ca8b67..a4155793eb32 100644 --- a/hadoop-hdds/framework/src/main/java/org/apache/hadoop/hdds/security/x509/certificate/client/DefaultCertificateClient.java +++ b/hadoop-hdds/framework/src/main/java/org/apache/hadoop/hdds/security/x509/certificate/client/DefaultCertificateClient.java @@ -195,7 +195,7 @@ private synchronized void readCertificateFile(Path filePath) { if (readCertSerialId.equals(certSerialId)) { this.certPath = allCertificates; } - certificateMap.putIfAbsent(readCertSerialId, allCertificates); + certificateMap.put(readCertSerialId, allCertificates); addCertsToSubCaMapIfNeeded(fileName, allCertificates); addCertToRootCaMapIfNeeded(fileName, allCertificates); @@ -583,8 +583,7 @@ public synchronized void storeCertificate(String pemEncodedCert, codec.writeCertificate(certName, pemEncodedCert); if (addToCertMap) { - certificateMap.putIfAbsent( - cert.getSerialNumber().toString(), certificatePath); + certificateMap.put(cert.getSerialNumber().toString(), certificatePath); } } catch (IOException | java.security.cert.CertificateException e) { throw new CertificateException("Error while storing certificate.", e, diff --git a/hadoop-hdds/framework/src/main/java/org/apache/hadoop/hdds/security/x509/certificate/client/SCMCertificateClient.java b/hadoop-hdds/framework/src/main/java/org/apache/hadoop/hdds/security/x509/certificate/client/SCMCertificateClient.java index 7c6a5a9073ae..c739e81cfd36 100644 --- a/hadoop-hdds/framework/src/main/java/org/apache/hadoop/hdds/security/x509/certificate/client/SCMCertificateClient.java +++ b/hadoop-hdds/framework/src/main/java/org/apache/hadoop/hdds/security/x509/certificate/client/SCMCertificateClient.java @@ -206,7 +206,7 @@ public String signAndStoreCertificate(PKCS10CertificationRequest request, // Get SCM sub CA cert. SCMSecurityProtocolProtos.SCMGetCertResponseProto response = getScmSecureClient().getSCMCertChain(scmNodeDetailsProto, - getEncodedString(request)); + getEncodedString(request), true); CertificateCodec certCodec = new CertificateCodec( getSecurityConfig(), certPath); diff --git a/hadoop-hdds/interface-server/src/main/proto/ScmServerSecurityProtocol.proto b/hadoop-hdds/interface-server/src/main/proto/ScmServerSecurityProtocol.proto index 85ae39379fd3..dd3ef42308c4 100644 --- a/hadoop-hdds/interface-server/src/main/proto/ScmServerSecurityProtocol.proto +++ b/hadoop-hdds/interface-server/src/main/proto/ScmServerSecurityProtocol.proto @@ -147,6 +147,7 @@ message SCMGetCertRequestProto { message SCMGetSCMCertRequestProto { required ScmNodeDetailsProto scmDetails = 1; required string CSR = 2; + optional bool renew = 3[default = false]; } /** diff --git a/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/ha/HASecurityUtils.java b/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/ha/HASecurityUtils.java index 370b5fd09497..4a362b1cab0c 100644 --- a/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/ha/HASecurityUtils.java +++ b/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/ha/HASecurityUtils.java @@ -161,7 +161,7 @@ private static void getRootCASignedSCMCert( // Get SCM sub CA cert. SCMGetCertResponseProto response = secureScmClient. - getSCMCertChain(scmNodeDetailsProto, getEncodedString(csr)); + getSCMCertChain(scmNodeDetailsProto, getEncodedString(csr), false); String pemEncodedCert = response.getX509Certificate(); // Store SCM sub CA and root CA certificate. diff --git a/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/protocol/SCMSecurityProtocolServerSideTranslatorPB.java b/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/protocol/SCMSecurityProtocolServerSideTranslatorPB.java index d65eacc84c76..a986aaf27fb3 100644 --- a/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/protocol/SCMSecurityProtocolServerSideTranslatorPB.java +++ b/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/protocol/SCMSecurityProtocolServerSideTranslatorPB.java @@ -253,7 +253,7 @@ public SCMGetCertResponseProto getSCMCertificate( throw createNotHAException(); } String certificate = impl.getSCMCertificate(request.getScmDetails(), - request.getCSR()); + request.getCSR(), request.hasRenew() ? request.getRenew() : false); SCMGetCertResponseProto.Builder builder = SCMGetCertResponseProto .newBuilder() diff --git a/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/security/RootCARotationManager.java b/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/security/RootCARotationManager.java index 883928ecfada..e0b6c0f93a52 100644 --- a/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/security/RootCARotationManager.java +++ b/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/security/RootCARotationManager.java @@ -387,6 +387,10 @@ public void run() { String newRootCertId = ""; X509CertificateHolder newRootCertificate; try { + // prevent findbugs false alert + if (newRootCAServer == null) { + throw new Exception("New root CA server should not be null"); + } newRootCertificate = newRootCAServer.getCACertificate(); newRootCertId = newRootCertificate.getSerialNumber().toString(); Preconditions.checkState(newRootCertId.equals(newId.toString()), diff --git a/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/server/SCMClientProtocolServer.java b/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/server/SCMClientProtocolServer.java index f3fa69f529c2..a344ad56fd1b 100644 --- a/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/server/SCMClientProtocolServer.java +++ b/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/server/SCMClientProtocolServer.java @@ -805,7 +805,7 @@ public void transferLeadership(String newLeaderId) if (scm.getRootCARotationManager() != null && scm.getRootCARotationManager().isRotationInProgress()) { - throw new SCMException(("Root CA and Sub CA rotation is inprogress." + + throw new SCMException(("Root CA and Sub CA rotation is in-progress." + " Please try the operation later again."), ResultCodes.CA_ROTATION_IN_PROGRESS); } diff --git a/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/server/SCMSecurityProtocolServer.java b/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/server/SCMSecurityProtocolServer.java index c62e84edd785..8b05d3013f32 100644 --- a/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/server/SCMSecurityProtocolServer.java +++ b/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/server/SCMSecurityProtocolServer.java @@ -190,7 +190,7 @@ public String getDataNodeCertificate( Objects.requireNonNull(dnDetails); if (storageContainerManager.getRootCARotationManager() .isRotationInProgress()) { - throw new SCMException(("Root CA and Sub CA rotation is inprogress." + + throw new SCMException(("Root CA and Sub CA rotation is in-progress." + " Please try the operation later again."), SCMException.ResultCodes.CA_ROTATION_IN_PROGRESS); } @@ -207,7 +207,7 @@ public String getCertificate( Objects.requireNonNull(nodeDetails); if (storageContainerManager.getRootCARotationManager() .isRotationInProgress()) { - throw new SCMException(("Root CA and Sub CA rotation is inprogress." + + throw new SCMException(("Root CA and Sub CA rotation is in-progress." + " Please try the operation later again."), SCMException.ResultCodes.CA_ROTATION_IN_PROGRESS); } @@ -272,24 +272,37 @@ public String getOMCertificate(OzoneManagerDetailsProto omDetails, Objects.requireNonNull(omDetails); if (storageContainerManager.getRootCARotationManager() .isRotationInProgress()) { - throw new SCMException(("Root CA and Sub CA rotation is inprogress." + + throw new SCMException(("Root CA and Sub CA rotation is in-progress." + " Please try the operation later again."), SCMException.ResultCodes.CA_ROTATION_IN_PROGRESS); } return getEncodedCertToString(certSignReq, NodeType.OM); } - /** * Get signed certificate for SCM Node. * * @param scmNodeDetails - SCM Node Details. - * @param certSignReq - Certificate signing request. - * @return String - SCM signed pem encoded certificate. + * @param certSignReq - Certificate signing request. + * @return String - SCM signed pem encoded certificate. */ @Override public String getSCMCertificate(ScmNodeDetailsProto scmNodeDetails, String certSignReq) throws IOException { + return getSCMCertificate(scmNodeDetails, certSignReq, false); + } + + /** + * Get signed certificate for SCM Node. + * + * @param scmNodeDetails - SCM Node Details. + * @param certSignReq - Certificate signing request. + * @param isRenew - if SCM is renewing certificate or not. + * @return String - SCM signed pem encoded certificate. + */ + @Override + public String getSCMCertificate(ScmNodeDetailsProto scmNodeDetails, + String certSignReq, boolean isRenew) throws IOException { Objects.requireNonNull(scmNodeDetails); // Check clusterID if (!storageContainerManager.getClusterId().equals( @@ -299,6 +312,13 @@ public String getSCMCertificate(ScmNodeDetailsProto scmNodeDetails, + storageContainerManager.getClusterId()); } + if (storageContainerManager.getRootCARotationManager() + .isRotationInProgress() && !isRenew) { + throw new SCMException(("Root CA and Sub CA rotation is in-progress." + + " Please try the operation later again."), + SCMException.ResultCodes.CA_ROTATION_IN_PROGRESS); + } + LOGGER.info("Processing CSR for scm {}, nodeId: {}", scmNodeDetails.getHostName(), scmNodeDetails.getScmNodeId()); diff --git a/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/server/StorageContainerManager.java b/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/server/StorageContainerManager.java index a4610e0824e1..edbc9e2e5097 100644 --- a/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/server/StorageContainerManager.java +++ b/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/server/StorageContainerManager.java @@ -2150,6 +2150,13 @@ public boolean removePeerFromHARing(String scmId) throw new IOException("Cannot remove current leader."); } + if (rootCARotationManager != null && + rootCARotationManager.isRotationInProgress()) { + throw new SCMException(("Root CA and Sub CA rotation is in-progress." + + " Please try the operation later again."), + ResultCodes.CA_ROTATION_IN_PROGRESS); + } + Preconditions.checkNotNull(getScmHAManager().getRatisServer() .getDivision().getGroup()); diff --git a/hadoop-hdds/server-scm/src/test/java/org/apache/hadoop/hdds/scm/server/TestSCMSecurityProtocolServer.java b/hadoop-hdds/server-scm/src/test/java/org/apache/hadoop/hdds/scm/server/TestSCMSecurityProtocolServer.java index 7803025d9d98..5ed1f287c269 100644 --- a/hadoop-hdds/server-scm/src/test/java/org/apache/hadoop/hdds/scm/server/TestSCMSecurityProtocolServer.java +++ b/hadoop-hdds/server-scm/src/test/java/org/apache/hadoop/hdds/scm/server/TestSCMSecurityProtocolServer.java @@ -20,16 +20,24 @@ import static org.apache.hadoop.hdds.scm.ScmConfigKeys.OZONE_SCM_SECURITY_SERVICE_BIND_HOST_DEFAULT; import org.apache.hadoop.hdds.conf.OzoneConfiguration; +<<<<<<< HEAD import org.apache.hadoop.hdds.security.x509.certificate.utils.CertificateCodec; import org.apache.hadoop.security.ssl.KeyStoreTestUtil; +======= +import org.apache.hadoop.hdds.security.x509.certificate.authority.CertificateServer; +>>>>>>> 2a37dcefd (fix findbugs) import org.junit.jupiter.api.AfterEach; import org.junit.jupiter.api.Assertions; import org.junit.jupiter.api.BeforeEach; import org.junit.jupiter.api.Test; import org.junit.jupiter.api.Timeout; +<<<<<<< HEAD import org.mockito.Mock; import org.mockito.Mockito; import org.mockito.MockitoAnnotations; +======= +import org.mockito.Mockito; +>>>>>>> 2a37dcefd (fix findbugs) import java.io.IOException; import java.security.KeyPair; @@ -54,8 +62,13 @@ public void setUp() throws Exception { config = new OzoneConfiguration(); config.set(OZONE_SCM_SECURITY_SERVICE_ADDRESS_KEY, OZONE_SCM_SECURITY_SERVICE_BIND_HOST_DEFAULT + ":0"); +<<<<<<< HEAD securityProtocolServer = new SCMSecurityProtocolServer(config, null, null, new ArrayList<>(), mockScm, null); +======= + securityProtocolServer = new SCMSecurityProtocolServer(config, + Mockito.mock(CertificateServer.class), null, null, null, null); +>>>>>>> 2a37dcefd (fix findbugs) } @AfterEach diff --git a/hadoop-ozone/dist/src/main/smoketest/scmha/root-ca-rotation.robot b/hadoop-ozone/dist/src/main/smoketest/scmha/root-ca-rotation.robot index f625abaa1401..50b6d64713f5 100644 --- a/hadoop-ozone/dist/src/main/smoketest/scmha/root-ca-rotation.robot +++ b/hadoop-ozone/dist/src/main/smoketest/scmha/root-ca-rotation.robot @@ -32,4 +32,4 @@ Verify root CA rotation metrics ${temp_3} = Split String ${temp_2} , ${successRotation} = Strip String ${temp_3[0]} ${successRotation} = Convert To Number ${successRotation} - Should be true ${RootCARotationMetrics} >= 1 + Should be true ${successRotation} >= 1 From 35a6a44d4ddee8b7691cb79ff1d5bcb4da659aad Mon Sep 17 00:00:00 2001 From: Sammi Chen Date: Thu, 22 Jun 2023 17:42:41 +0800 Subject: [PATCH 04/15] rebase fix --- .../hadoop/ozone/TestHddsSecureDatanodeInit.java | 4 ---- .../client/DefaultCertificateClient.java | 4 ---- .../certificate/client/SCMCertificateClient.java | 13 ------------- .../scm/security/TestRootCARotationManager.java | 3 +++ .../scm/server/TestSCMSecurityProtocolServer.java | 14 +------------- 5 files changed, 4 insertions(+), 34 deletions(-) diff --git a/hadoop-hdds/container-service/src/test/java/org/apache/hadoop/ozone/TestHddsSecureDatanodeInit.java b/hadoop-hdds/container-service/src/test/java/org/apache/hadoop/ozone/TestHddsSecureDatanodeInit.java index f96f6e77d333..67c95ce11152 100644 --- a/hadoop-hdds/container-service/src/test/java/org/apache/hadoop/ozone/TestHddsSecureDatanodeInit.java +++ b/hadoop-hdds/container-service/src/test/java/org/apache/hadoop/ozone/TestHddsSecureDatanodeInit.java @@ -105,12 +105,8 @@ public static void setUp() throws Exception { ServicePlugin.class); conf.set(HDDS_X509_RENEW_GRACE_DURATION, "PT5S"); // 5s conf.set(HDDS_X509_CA_ROTATION_CHECK_INTERNAL, "PT1S"); // 1s -<<<<<<< HEAD conf.setBoolean(HDDS_X509_GRACE_DURATION_TOKEN_CHECKS_ENABLED, false); - -======= conf.set(HDDS_X509_CA_ROTATION_ACK_TIMEOUT, "PT1S"); // 1s ->>>>>>> 2a37dcefd (fix findbugs) securityConfig = new SecurityConfig(conf); service = new HddsDatanodeService(args) { diff --git a/hadoop-hdds/framework/src/main/java/org/apache/hadoop/hdds/security/x509/certificate/client/DefaultCertificateClient.java b/hadoop-hdds/framework/src/main/java/org/apache/hadoop/hdds/security/x509/certificate/client/DefaultCertificateClient.java index a4155793eb32..631ea30b6925 100644 --- a/hadoop-hdds/framework/src/main/java/org/apache/hadoop/hdds/security/x509/certificate/client/DefaultCertificateClient.java +++ b/hadoop-hdds/framework/src/main/java/org/apache/hadoop/hdds/security/x509/certificate/client/DefaultCertificateClient.java @@ -124,11 +124,7 @@ public abstract class DefaultCertificateClient implements CertificateClient { private ScheduledExecutorService executorService; private Consumer certIdSaveCallback; private Runnable shutdownCallback; -<<<<<<< HEAD private SCMSecurityProtocolClientSideTranslatorPB scmSecurityClient; -======= - private SCMSecurityProtocolClientSideTranslatorPB scmSecurityProtocolClient; ->>>>>>> 898ec9b4d (fix checkstyle and refactor robot test) private final Set notificationReceivers; private static Semaphore semaphore = new Semaphore(1); diff --git a/hadoop-hdds/framework/src/main/java/org/apache/hadoop/hdds/security/x509/certificate/client/SCMCertificateClient.java b/hadoop-hdds/framework/src/main/java/org/apache/hadoop/hdds/security/x509/certificate/client/SCMCertificateClient.java index c739e81cfd36..35365e52918b 100644 --- a/hadoop-hdds/framework/src/main/java/org/apache/hadoop/hdds/security/x509/certificate/client/SCMCertificateClient.java +++ b/hadoop-hdds/framework/src/main/java/org/apache/hadoop/hdds/security/x509/certificate/client/SCMCertificateClient.java @@ -181,19 +181,6 @@ public Logger getLogger() { } @Override -<<<<<<< HEAD -======= - public SCMSecurityProtocolClientSideTranslatorPB getScmSecureClient() - throws IOException { - if (scmSecurityClient == null) { - scmSecurityClient = - HddsServerUtil.getScmSecurityClientWithFixedDuration(getConfig()); - } - return scmSecurityClient; - } - - @Override ->>>>>>> 898ec9b4d (fix checkstyle and refactor robot test) public String signAndStoreCertificate(PKCS10CertificationRequest request, Path certPath, boolean renew) throws CertificateException { try { diff --git a/hadoop-hdds/server-scm/src/test/java/org/apache/hadoop/hdds/scm/security/TestRootCARotationManager.java b/hadoop-hdds/server-scm/src/test/java/org/apache/hadoop/hdds/scm/security/TestRootCARotationManager.java index f661827f8afb..8b90dc52f618 100644 --- a/hadoop-hdds/server-scm/src/test/java/org/apache/hadoop/hdds/scm/security/TestRootCARotationManager.java +++ b/hadoop-hdds/server-scm/src/test/java/org/apache/hadoop/hdds/scm/security/TestRootCARotationManager.java @@ -54,6 +54,7 @@ import java.util.UUID; import java.util.concurrent.TimeoutException; +import static org.apache.hadoop.hdds.HddsConfigKeys.HDDS_X509_CA_ROTATION_ACK_TIMEOUT; import static org.apache.hadoop.hdds.HddsConfigKeys.HDDS_X509_CA_ROTATION_CHECK_INTERNAL; import static org.apache.hadoop.hdds.HddsConfigKeys.HDDS_X509_CA_ROTATION_TIME_OF_DAY; import static org.apache.hadoop.hdds.HddsConfigKeys.HDDS_X509_GRACE_DURATION_TOKEN_CHECKS_ENABLED; @@ -181,6 +182,7 @@ public void testProperties() { public void testRotationOnSchedule() throws Exception { ozoneConfig.set(HDDS_X509_CA_ROTATION_CHECK_INTERNAL, "PT2S"); ozoneConfig.set(HDDS_X509_RENEW_GRACE_DURATION, "PT15S"); + ozoneConfig.set(HDDS_X509_CA_ROTATION_ACK_TIMEOUT, "PT15S"); Date date = Calendar.getInstance().getTime(); date.setSeconds(date.getSeconds() + 10); ozoneConfig.set(HDDS_X509_CA_ROTATION_TIME_OF_DAY, @@ -213,6 +215,7 @@ public void testRotationOnSchedule() throws Exception { public void testRotationImmediately() throws Exception { ozoneConfig.set(HDDS_X509_CA_ROTATION_CHECK_INTERNAL, "PT2S"); ozoneConfig.set(HDDS_X509_RENEW_GRACE_DURATION, "PT15S"); + ozoneConfig.set(HDDS_X509_CA_ROTATION_ACK_TIMEOUT, "PT15S"); Date date = Calendar.getInstance().getTime(); date.setMinutes(date.getMinutes() + 5); ozoneConfig.set(HDDS_X509_CA_ROTATION_TIME_OF_DAY, diff --git a/hadoop-hdds/server-scm/src/test/java/org/apache/hadoop/hdds/scm/server/TestSCMSecurityProtocolServer.java b/hadoop-hdds/server-scm/src/test/java/org/apache/hadoop/hdds/scm/server/TestSCMSecurityProtocolServer.java index 5ed1f287c269..cc70e052cbec 100644 --- a/hadoop-hdds/server-scm/src/test/java/org/apache/hadoop/hdds/scm/server/TestSCMSecurityProtocolServer.java +++ b/hadoop-hdds/server-scm/src/test/java/org/apache/hadoop/hdds/scm/server/TestSCMSecurityProtocolServer.java @@ -20,24 +20,17 @@ import static org.apache.hadoop.hdds.scm.ScmConfigKeys.OZONE_SCM_SECURITY_SERVICE_BIND_HOST_DEFAULT; import org.apache.hadoop.hdds.conf.OzoneConfiguration; -<<<<<<< HEAD import org.apache.hadoop.hdds.security.x509.certificate.utils.CertificateCodec; import org.apache.hadoop.security.ssl.KeyStoreTestUtil; -======= import org.apache.hadoop.hdds.security.x509.certificate.authority.CertificateServer; ->>>>>>> 2a37dcefd (fix findbugs) import org.junit.jupiter.api.AfterEach; import org.junit.jupiter.api.Assertions; import org.junit.jupiter.api.BeforeEach; import org.junit.jupiter.api.Test; import org.junit.jupiter.api.Timeout; -<<<<<<< HEAD import org.mockito.Mock; import org.mockito.Mockito; import org.mockito.MockitoAnnotations; -======= -import org.mockito.Mockito; ->>>>>>> 2a37dcefd (fix findbugs) import java.io.IOException; import java.security.KeyPair; @@ -62,13 +55,8 @@ public void setUp() throws Exception { config = new OzoneConfiguration(); config.set(OZONE_SCM_SECURITY_SERVICE_ADDRESS_KEY, OZONE_SCM_SECURITY_SERVICE_BIND_HOST_DEFAULT + ":0"); -<<<<<<< HEAD - securityProtocolServer = new SCMSecurityProtocolServer(config, null, - null, new ArrayList<>(), mockScm, null); -======= securityProtocolServer = new SCMSecurityProtocolServer(config, - Mockito.mock(CertificateServer.class), null, null, null, null); ->>>>>>> 2a37dcefd (fix findbugs) + null, null, new ArrayList<>(), mockScm, null); } @AfterEach From cd2dd6e86f9938017373f8dc24a8a5ae856d2b7c Mon Sep 17 00:00:00 2001 From: Sammi Chen Date: Sun, 25 Jun 2023 23:11:42 +0800 Subject: [PATCH 05/15] fix test --- .../security/ssl/ReloadingX509KeyManager.java | 45 +++++++- .../org/apache/hadoop/ozone/OzoneConsts.java | 3 +- .../client/DefaultCertificateClient.java | 7 +- .../client/SCMCertificateClient.java | 2 - .../hadoop/hdds/scm/ha/HASecurityUtils.java | 3 +- .../scm/security/RootCARotationManager.java | 14 ++- .../scm/server/SCMSecurityProtocolServer.java | 53 +++++----- .../scm/server/StorageContainerManager.java | 10 +- .../security/TestRootCARotationManager.java | 8 +- .../server/TestSCMSecurityProtocolServer.java | 100 ------------------ .../ozonesecure-ha/test-root-ca-rotation.sh | 15 +++ hadoop-ozone/dist/src/main/compose/testlib.sh | 1 + .../smoketest/scmha/scm-leader-transfer.robot | 18 +++- 13 files changed, 123 insertions(+), 156 deletions(-) delete mode 100644 hadoop-hdds/server-scm/src/test/java/org/apache/hadoop/hdds/scm/server/TestSCMSecurityProtocolServer.java diff --git a/hadoop-hdds/common/src/main/java/org/apache/hadoop/hdds/security/ssl/ReloadingX509KeyManager.java b/hadoop-hdds/common/src/main/java/org/apache/hadoop/hdds/security/ssl/ReloadingX509KeyManager.java index e3aba805463b..d87344775163 100644 --- a/hadoop-hdds/common/src/main/java/org/apache/hadoop/hdds/security/ssl/ReloadingX509KeyManager.java +++ b/hadoop-hdds/common/src/main/java/org/apache/hadoop/hdds/security/ssl/ReloadingX509KeyManager.java @@ -39,6 +39,8 @@ import java.util.List; import java.util.Locale; import java.util.concurrent.atomic.AtomicReference; +import java.util.regex.Matcher; +import java.util.regex.Pattern; import java.util.stream.Collectors; /** @@ -73,6 +75,9 @@ public class ReloadingX509KeyManager extends X509ExtendedKeyManager { */ private PrivateKey currentPrivateKey; private List currentCertIdsList = new ArrayList<>(); + private final Pattern dnPattern = + Pattern.compile("^.+, CN=scm-\\d{1}@.+"); + private final String alias = "scm/sub-ca_key"; /** * Construct a Reloading509KeystoreManager. @@ -110,6 +115,42 @@ public String chooseEngineClientAlias(String[] strings, principals == null ? "" : Arrays.stream(principals) .map(Object::toString).collect(Collectors.joining(", ")), sslEngine == null ? "" : sslEngine.toString()); + /* + Workaround to address that netty tc-native doesn't handle the dynamic + KeyManager re-loading well. + + TODO: If this issue is fixed in tc-native in future version, following + logic can be removed. + + Example error message: + Engine client aliases for RSA, DH_RSA, EC, EC_RSA, EC_EC, + O=CID-f9f2b2cf-a784-49d7-8577-5d3b13bf0b46, + OU=9f52487c-f8f9-45ee-bb56-aca60b56327f, + CN=scm-1@scm1.org, + org.apache.ratis.thirdparty.io.netty.handler.ssl.OpenSslEngine@5eec0d10 + is null + + Example success message: + Engine client aliases for RSA, DH_RSA, EC, EC_RSA, EC_EC, + O=CID-f9f2b2cf-a784-49d7-8577-5d3b13bf0b46, + OU=9f52487c-f8f9-45ee-bb56-aca60b56327f, + CN=scm-1@scm1.org, + org.apache.ratis.thirdparty.io.netty.handler.ssl.OpenSslEngine@5eec0d10 + is scm/sub-ca_key + */ + if (principals != null) { + String dn = principals[0].toString(); + Matcher matcher = dnPattern.matcher(dn); + if (matcher.matches()) { + ret = alias; + } + LOG.warn("Engine client aliases for {}, {}, {} is changed to {}", + strings == null ? "" : Arrays.stream(strings).map(Object::toString) + .collect(Collectors.joining(",")), + principals == null ? "" : Arrays.stream(principals) + .map(Object::toString).collect(Collectors.joining(",")), + sslEngine == null ? "" : sslEngine.toString(), ret); + } } return ret; } @@ -127,8 +168,8 @@ public String chooseEngineServerAlias(String s, Principal[] principals, } } } - if (ret == null) { - LOG.info("Engine server aliases for {}, {}, {} is null", s, + if (ret == null && LOG.isDebugEnabled()) { + LOG.debug("Engine server aliases for {}, {}, {} is null", s, principals == null ? "" : Arrays.stream(principals) .map(Object::toString).collect(Collectors.joining(", ")), sslEngine == null ? "" : sslEngine.toString()); diff --git a/hadoop-hdds/common/src/main/java/org/apache/hadoop/ozone/OzoneConsts.java b/hadoop-hdds/common/src/main/java/org/apache/hadoop/ozone/OzoneConsts.java index c417062cf197..fd519bfd4393 100644 --- a/hadoop-hdds/common/src/main/java/org/apache/hadoop/ozone/OzoneConsts.java +++ b/hadoop-hdds/common/src/main/java/org/apache/hadoop/ozone/OzoneConsts.java @@ -483,7 +483,8 @@ private OzoneConsts() { // %s to distinguish different certificates public static final String SCM_SUB_CA = "scm-sub"; public static final String SCM_SUB_CA_PREFIX = SCM_SUB_CA + "-%s@"; - public static final String SCM_ROOT_CA_PREFIX = "scm-%s@"; + public static final String SCM_ROOT_CA = "scm"; + public static final String SCM_ROOT_CA_PREFIX = SCM_ROOT_CA + "-%s@"; // Layout Version written into Meta Table ONLY during finalization. public static final String LAYOUT_VERSION_KEY = "#LAYOUTVERSION"; diff --git a/hadoop-hdds/framework/src/main/java/org/apache/hadoop/hdds/security/x509/certificate/client/DefaultCertificateClient.java b/hadoop-hdds/framework/src/main/java/org/apache/hadoop/hdds/security/x509/certificate/client/DefaultCertificateClient.java index 631ea30b6925..17586a4841cb 100644 --- a/hadoop-hdds/framework/src/main/java/org/apache/hadoop/hdds/security/x509/certificate/client/DefaultCertificateClient.java +++ b/hadoop-hdds/framework/src/main/java/org/apache/hadoop/hdds/security/x509/certificate/client/DefaultCertificateClient.java @@ -157,8 +157,11 @@ protected DefaultCertificateClient( * Load all certificates from configured location. * */ private synchronized void loadAllCertificates() { - try (Stream certFiles = - Files.list(securityConfig.getCertificateLocation(component))) { + Path path = securityConfig.getCertificateLocation(component); + if (!path.toFile().exists() && certSerialId == null) { + return; + } + try (Stream certFiles = Files.list(path)) { certFiles .filter(Files::isRegularFile) .forEach(this::readCertificateFile); diff --git a/hadoop-hdds/framework/src/main/java/org/apache/hadoop/hdds/security/x509/certificate/client/SCMCertificateClient.java b/hadoop-hdds/framework/src/main/java/org/apache/hadoop/hdds/security/x509/certificate/client/SCMCertificateClient.java index 35365e52918b..acba77cf8cbe 100644 --- a/hadoop-hdds/framework/src/main/java/org/apache/hadoop/hdds/security/x509/certificate/client/SCMCertificateClient.java +++ b/hadoop-hdds/framework/src/main/java/org/apache/hadoop/hdds/security/x509/certificate/client/SCMCertificateClient.java @@ -26,13 +26,11 @@ import org.apache.hadoop.hdds.security.x509.certificate.utils.CertificateCodec; import org.apache.hadoop.hdds.security.x509.certificate.utils.CertificateSignRequest; import org.apache.hadoop.hdds.security.x509.exception.CertificateException; -import org.apache.hadoop.hdds.utils.HddsServerUtil; import org.apache.hadoop.ozone.OzoneConsts; import org.bouncycastle.pkcs.PKCS10CertificationRequest; import org.slf4j.Logger; import org.slf4j.LoggerFactory; -import java.io.IOException; import java.nio.file.Path; import java.nio.file.Paths; import java.security.KeyPair; diff --git a/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/ha/HASecurityUtils.java b/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/ha/HASecurityUtils.java index 4a362b1cab0c..2f86de150ee8 100644 --- a/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/ha/HASecurityUtils.java +++ b/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/ha/HASecurityUtils.java @@ -198,6 +198,7 @@ private static void getPrimarySCMSelfSignedCert(CertificateClient client, String scmHostname) { try { + CertificateServer rootCAServer = initializeRootCertificateServer(config, null, scmStorageConfig, new DefaultCAProfile()); @@ -383,7 +384,7 @@ public static SCMRatisResponse submitScmRequestToRatis(RaftGroup raftGroup, } private static SCMSecurityProtocolClientSideTranslatorPB - getScmSecurityClientWithFixedDuration(OzoneConfiguration conf) + getScmSecurityClientWithFixedDuration(OzoneConfiguration conf) throws IOException { // As for OM during init, we need to wait for specific duration so that // we can give response to user performed operation init in a definite diff --git a/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/security/RootCARotationManager.java b/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/security/RootCARotationManager.java index e0b6c0f93a52..9554caba6025 100644 --- a/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/security/RootCARotationManager.java +++ b/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/security/RootCARotationManager.java @@ -455,9 +455,9 @@ public void run() { } private void cleanupAndStop(String reason) { - scm.getSecurityProtocolServer() - .setRootCertificateServer(null); try { + scm.getSecurityProtocolServer().setRootCertificateServer(null); + FileUtils.deleteDirectory(new File(scmCertClient.getSecurityConfig() .getLocation(newCAComponent).toString())); LOG.info("In-progress root CA directory {} is deleted for '{}'", @@ -572,7 +572,7 @@ public void run() { Paths.get(newSubCAProgressPath, HDDS_X509_DIR_NAME_DEFAULT), true); LOG.info("SubCARotationPrepareTask[rootCertId = {}] - " + - "scm certificate signed.", rootCACertId); + "scm certificate {} signed.", rootCACertId, newCertSerialId); } catch (Exception e) { LOG.error("Failed to generate certificate under {}", newProgressDir, e); @@ -647,8 +647,14 @@ public void run() { return; } synchronized (RootCARotationManager.class) { + int numFromHADetails = + scm.getSCMHANodeDetails().getPeerNodeDetails().size() + 1; + int numFromRatisServer = scm.getScmHAManager().getRatisServer() + .getDivision().getGroup().getPeers().size(); + LOG.info("numFromHADetails {}, numFromRatisServer {}", + numFromHADetails, numFromRatisServer); if (handler.rotationPrepareAcks() == - (scm.getSCMHANodeDetails().getPeerNodeDetails().size() + 1)) { + (Math.max(numFromHADetails, numFromRatisServer))) { // all acks are received. try { waitAckTimeoutTask.cancel(false); diff --git a/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/server/SCMSecurityProtocolServer.java b/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/server/SCMSecurityProtocolServer.java index 8b05d3013f32..6fd44a4c0f76 100644 --- a/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/server/SCMSecurityProtocolServer.java +++ b/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/server/SCMSecurityProtocolServer.java @@ -30,6 +30,7 @@ import java.util.List; import java.util.Objects; import java.util.Optional; +import java.util.Set; import java.util.UUID; import java.util.concurrent.ExecutionException; import java.util.concurrent.Future; @@ -61,7 +62,7 @@ import org.apache.hadoop.hdds.security.exception.SCMSecurityException; import org.apache.hadoop.hdds.security.symmetric.ManagedSecretKey; import org.apache.hadoop.hdds.security.symmetric.SecretKeyManager; -import org.apache.hadoop.hdds.security.x509.certificate.client.SCMCertificateClient; +import org.apache.hadoop.hdds.security.x509.certificate.client.CertificateClient; import org.apache.hadoop.hdds.security.x509.crl.CRLInfo; import org.apache.hadoop.hdds.utils.HddsServerUtil; import org.apache.hadoop.hdds.scm.ScmConfig; @@ -103,13 +104,13 @@ public class SCMSecurityProtocolServer implements SCMSecurityProtocol, .getLogger(SCMSecurityProtocolServer.class); private CertificateServer rootCertificateServer; private final CertificateServer scmCertificateServer; - private final List rootCACertificateList; private final RPC.Server rpcServer; // HADOOP RPC SERVER private final SCMUpdateServiceGrpcServer grpcUpdateServer; // gRPC SERVER private final InetSocketAddress rpcAddress; private final ProtocolMessageMetrics metrics; private final ProtocolMessageMetrics secretKeyMetrics; private final StorageContainerManager storageContainerManager; + private final CertificateClient scmCertificateClient; // SecretKey may not be enabled when neither block token nor container // token is enabled. @@ -118,13 +119,14 @@ public class SCMSecurityProtocolServer implements SCMSecurityProtocol, SCMSecurityProtocolServer(OzoneConfiguration conf, @Nullable CertificateServer rootCertificateServer, CertificateServer scmCertificateServer, - List rootCACertList, StorageContainerManager scm, + CertificateClient scmCertClient, + StorageContainerManager scm, @Nullable SecretKeyManager secretKeyManager) throws IOException { this.storageContainerManager = scm; this.rootCertificateServer = rootCertificateServer; this.scmCertificateServer = scmCertificateServer; - this.rootCACertificateList = rootCACertList; + this.scmCertificateClient = scmCertClient; this.secretKeyManager = secretKeyManager; final int handlerCount = conf.getInt(ScmConfigKeys.OZONE_SCM_SECURITY_HANDLER_COUNT_KEY, @@ -249,9 +251,12 @@ private void validateSecretKeyStatus() throws SCMSecretKeyException { @Override public synchronized List getAllRootCaCertificates() throws IOException { - List pemEncodedList = - new ArrayList<>(rootCACertificateList.size()); - for (X509Certificate cert : rootCACertificateList) { + List pemEncodedList = new ArrayList<>(); + Set certList = + scmCertificateClient.getAllRootCaCerts().size() == 0 ? + scmCertificateClient.getAllCaCerts() : + scmCertificateClient.getAllRootCaCerts(); + for (X509Certificate cert : certList) { pemEncodedList.add(getPEMEncodedString(cert)); } return pemEncodedList; @@ -332,8 +337,8 @@ public String getSCMCertificate(ScmNodeDetailsProto scmNodeDetails, * @return String - SCM signed pem encoded certificate. * @throws IOException */ - private String getEncodedCertToString(String certSignReq, NodeType nodeType) - throws IOException { + private synchronized String getEncodedCertToString(String certSignReq, + NodeType nodeType) throws IOException { Future future; if (nodeType == NodeType.SCM && rootCertificateServer != null) { future = rootCertificateServer.requestCertificate(certSignReq, @@ -456,25 +461,18 @@ public List listCACertificate() throws IOException { @Override public synchronized String getRootCACertificate() throws IOException { LOGGER.debug("Getting Root CA certificate."); - X509Certificate lastExpiringRootCa = null; - if (storageContainerManager.getScmStorageConfig() - .checkPrimarySCMIdInitialized()) { - Date lastCertDate = new Date(0); - for (X509Certificate cert : rootCACertificateList) { - if (cert.getNotAfter().after(lastCertDate)) { - lastCertDate = cert.getNotAfter(); - lastExpiringRootCa = cert; - } + if (rootCertificateServer != null) { + try { + return CertificateCodec.getPEMEncodedString( + rootCertificateServer.getCACertificate()); + } catch (CertificateException e) { + LOGGER.error("Failed to get root CA certificate", e); + throw new IOException("Failed to get root CA certificate", e); } } - if (lastExpiringRootCa == null) { - return null; - } - return CertificateCodec.getPEMEncodedString(lastExpiringRootCa); - } - public synchronized void addNewRootCa(X509Certificate rootCaCertToAdd) { - rootCACertificateList.add(rootCaCertToAdd); + return CertificateCodec.getPEMEncodedString( + scmCertificateClient.getCACertificate()); } @Override @@ -549,14 +547,13 @@ public void join() throws InterruptedException { getRpcServer().join(); LOGGER.info("Join gRPC server for SCMSecurityProtocolServer."); getGrpcUpdateServer().join(); - } - public CertificateServer getRootCertificateServer() { + public synchronized CertificateServer getRootCertificateServer() { return rootCertificateServer; } - public void setRootCertificateServer( + public synchronized void setRootCertificateServer( CertificateServer newServer) { this.rootCertificateServer = newServer; } diff --git a/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/server/StorageContainerManager.java b/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/server/StorageContainerManager.java index edbc9e2e5097..cbf4f9618115 100644 --- a/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/server/StorageContainerManager.java +++ b/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/server/StorageContainerManager.java @@ -898,20 +898,12 @@ certificateStore, null, new DefaultCAProfile(), SecretKeyManager secretKeyManager = secretKeyManagerService != null ? secretKeyManagerService.getSecretKeyManager() : null; - X509Certificate rootCaCert = scmCertificateClient == null ? null : - scmCertificateClient.getRootCACertificate() != null ? - scmCertificateClient.getRootCACertificate() : - scmCertificateClient.getCACertificate(); - List rootCaList = new ArrayList<>(); - if (rootCaCert != null) { - rootCaList.add(rootCaCert); - } // We need to pass getCACertificate as rootCA certificate, // as for SCM CA is root-CA. securityProtocolServer = new SCMSecurityProtocolServer(conf, rootCertificateServer, scmCertificateServer, - rootCaList, + scmCertificateClient, this, secretKeyManager); diff --git a/hadoop-hdds/server-scm/src/test/java/org/apache/hadoop/hdds/scm/security/TestRootCARotationManager.java b/hadoop-hdds/server-scm/src/test/java/org/apache/hadoop/hdds/scm/security/TestRootCARotationManager.java index 8b90dc52f618..ed3ce75874c3 100644 --- a/hadoop-hdds/server-scm/src/test/java/org/apache/hadoop/hdds/scm/security/TestRootCARotationManager.java +++ b/hadoop-hdds/server-scm/src/test/java/org/apache/hadoop/hdds/scm/security/TestRootCARotationManager.java @@ -45,6 +45,7 @@ import java.io.IOException; import java.math.BigInteger; import java.security.KeyPair; +import java.security.cert.CertificateException; import java.security.cert.X509Certificate; import java.time.Duration; import java.time.LocalDateTime; @@ -87,7 +88,8 @@ public class TestRootCARotationManager { private BigInteger certID = new BigInteger("1"); @BeforeEach - public void init() throws IOException, TimeoutException { + public void init() throws IOException, TimeoutException, + CertificateException { ozoneConfig = new OzoneConfiguration(); testDir = GenericTestUtils.getTestDir( TestContainerManagerImpl.class.getSimpleName() + UUID.randomUUID()); @@ -182,7 +184,7 @@ public void testProperties() { public void testRotationOnSchedule() throws Exception { ozoneConfig.set(HDDS_X509_CA_ROTATION_CHECK_INTERNAL, "PT2S"); ozoneConfig.set(HDDS_X509_RENEW_GRACE_DURATION, "PT15S"); - ozoneConfig.set(HDDS_X509_CA_ROTATION_ACK_TIMEOUT, "PT15S"); + ozoneConfig.set(HDDS_X509_CA_ROTATION_ACK_TIMEOUT, "PT2S"); Date date = Calendar.getInstance().getTime(); date.setSeconds(date.getSeconds() + 10); ozoneConfig.set(HDDS_X509_CA_ROTATION_TIME_OF_DAY, @@ -215,7 +217,7 @@ public void testRotationOnSchedule() throws Exception { public void testRotationImmediately() throws Exception { ozoneConfig.set(HDDS_X509_CA_ROTATION_CHECK_INTERNAL, "PT2S"); ozoneConfig.set(HDDS_X509_RENEW_GRACE_DURATION, "PT15S"); - ozoneConfig.set(HDDS_X509_CA_ROTATION_ACK_TIMEOUT, "PT15S"); + ozoneConfig.set(HDDS_X509_CA_ROTATION_ACK_TIMEOUT, "PT2S"); Date date = Calendar.getInstance().getTime(); date.setMinutes(date.getMinutes() + 5); ozoneConfig.set(HDDS_X509_CA_ROTATION_TIME_OF_DAY, diff --git a/hadoop-hdds/server-scm/src/test/java/org/apache/hadoop/hdds/scm/server/TestSCMSecurityProtocolServer.java b/hadoop-hdds/server-scm/src/test/java/org/apache/hadoop/hdds/scm/server/TestSCMSecurityProtocolServer.java deleted file mode 100644 index cc70e052cbec..000000000000 --- a/hadoop-hdds/server-scm/src/test/java/org/apache/hadoop/hdds/scm/server/TestSCMSecurityProtocolServer.java +++ /dev/null @@ -1,100 +0,0 @@ -/** - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with this - * work for additional information regarding copyright ownership. The ASF - * licenses this file to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT - * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the - * License for the specific language governing permissions and limitations under - * the License. - */ -package org.apache.hadoop.hdds.scm.server; - -import static org.apache.hadoop.hdds.scm.ScmConfigKeys.OZONE_SCM_SECURITY_SERVICE_ADDRESS_KEY; -import static org.apache.hadoop.hdds.scm.ScmConfigKeys.OZONE_SCM_SECURITY_SERVICE_BIND_HOST_DEFAULT; - -import org.apache.hadoop.hdds.conf.OzoneConfiguration; -import org.apache.hadoop.hdds.security.x509.certificate.utils.CertificateCodec; -import org.apache.hadoop.security.ssl.KeyStoreTestUtil; -import org.apache.hadoop.hdds.security.x509.certificate.authority.CertificateServer; -import org.junit.jupiter.api.AfterEach; -import org.junit.jupiter.api.Assertions; -import org.junit.jupiter.api.BeforeEach; -import org.junit.jupiter.api.Test; -import org.junit.jupiter.api.Timeout; -import org.mockito.Mock; -import org.mockito.Mockito; -import org.mockito.MockitoAnnotations; - -import java.io.IOException; -import java.security.KeyPair; -import java.security.cert.X509Certificate; -import java.util.ArrayList; - -/** - * Test class for {@link SCMSecurityProtocolServer}. - */ -@Timeout(20) -public class TestSCMSecurityProtocolServer { - private SCMSecurityProtocolServer securityProtocolServer; - private OzoneConfiguration config; - @Mock - private StorageContainerManager mockScm; - @Mock - private SCMStorageConfig storageConfigMock; - - @BeforeEach - public void setUp() throws Exception { - MockitoAnnotations.openMocks(this); - config = new OzoneConfiguration(); - config.set(OZONE_SCM_SECURITY_SERVICE_ADDRESS_KEY, - OZONE_SCM_SECURITY_SERVICE_BIND_HOST_DEFAULT + ":0"); - securityProtocolServer = new SCMSecurityProtocolServer(config, - null, null, new ArrayList<>(), mockScm, null); - } - - @AfterEach - public void tearDown() { - if (securityProtocolServer != null) { - securityProtocolServer.stop(); - securityProtocolServer = null; - } - config = null; - } - - @Test - public void testStart() throws IOException { - securityProtocolServer.start(); - } - - @Test - public void testStop() { - securityProtocolServer.stop(); - } - - @Test - public void testReturnLastRootCa() throws Exception { - KeyPair keyPair = KeyStoreTestUtil.generateKeyPair("RSA"); - X509Certificate oldRootCa = KeyStoreTestUtil.generateCertificate("CN=dn", - keyPair, 15, "SHA256withRSA"); - X509Certificate latestRootCa = KeyStoreTestUtil.generateCertificate("CN=dn", - keyPair, 30, "SHA256withRSA"); - Assertions.assertTrue(oldRootCa.getNotAfter().toInstant() - .isBefore(latestRootCa.getNotAfter().toInstant())); - securityProtocolServer.addNewRootCa(oldRootCa); - securityProtocolServer.addNewRootCa(latestRootCa); - String pemEncodedLatestRootCa = - CertificateCodec.getPEMEncodedString(latestRootCa); - Mockito.when(mockScm.getScmStorageConfig()).thenReturn(storageConfigMock); - Mockito.when( - storageConfigMock.checkPrimarySCMIdInitialized()).thenReturn(true); - Assertions.assertEquals(securityProtocolServer.getRootCACertificate(), - pemEncodedLatestRootCa); - } -} diff --git a/hadoop-ozone/dist/src/main/compose/ozonesecure-ha/test-root-ca-rotation.sh b/hadoop-ozone/dist/src/main/compose/ozonesecure-ha/test-root-ca-rotation.sh index 1f7ca48c50ca..dcfab4fae53b 100755 --- a/hadoop-ozone/dist/src/main/compose/ozonesecure-ha/test-root-ca-rotation.sh +++ b/hadoop-ozone/dist/src/main/compose/ozonesecure-ha/test-root-ca-rotation.sh @@ -67,11 +67,26 @@ execute_robot_test scm4.org kinit.robot wait_for_execute_command scm4.org 120 "ozone admin scm roles | grep scm4.org" wait_for_execute_command scm1.org 30 "ozone admin cert list --role=scm | grep scm4.org" +#transfer leader to scm4.org +export TARGET_SCM=scm4.org +execute_robot_test scm4.org scmha/scm-leader-transfer.robot + +# verify data read write +wait_for_execute_command scm4.org 10 "ozone sh key put /opt/hadoop/LICENSE.txt /rotation-vol/rotation-bucket/LICENSE.txt" +wait_for_execute_command scm4.org 10 "ozone sh key get /opt/hadoop/LICENSE.txt.1 /rotation-vol/rotation-bucket/LICENSE.txt" + # add new datanode4 and verify certificate docker-compose up -d datanode4 wait_for_port datanode4 9856 60 wait_for_execute_command scm4.org 60 "ozone admin datanode list | grep datanode4" +#decomission scm1.org +execute_robot_test scm4.org scmha/scm-decommission.robot + +# verify data read write +wait_for_execute_command scm4.org 10 "ozone sh key put /opt/hadoop/NOTICE.txt /rotation-vol/rotation-bucket/NOTICE.txt" +wait_for_execute_command scm4.org 10 "ozone sh key get /opt/hadoop/NOTICE.txt.1 /rotation-vol/rotation-bucket/NOTICE.txt" + # check the metrics execute_robot_test scm1.org scmha/root-ca-rotation.robot diff --git a/hadoop-ozone/dist/src/main/compose/testlib.sh b/hadoop-ozone/dist/src/main/compose/testlib.sh index e3d1b05faba2..cace9c6153c2 100755 --- a/hadoop-ozone/dist/src/main/compose/testlib.sh +++ b/hadoop-ozone/dist/src/main/compose/testlib.sh @@ -203,6 +203,7 @@ execute_robot_test(){ -v OZONE_DIR:"${OZONE_DIR}" \ -v SECURITY_ENABLED:"${SECURITY_ENABLED}" \ -v SCM:"${SCM}" \ + -v TARGET_SCM:"${TARGET_SCM:-scm2.org}" \ ${ARGUMENTS[@]} --log NONE --report NONE "${OZONE_ROBOT_OPTS[@]}" --output "$OUTPUT_PATH" \ "$SMOKETEST_DIR_INSIDE/$TEST" local -i rc=$? diff --git a/hadoop-ozone/dist/src/main/smoketest/scmha/scm-leader-transfer.robot b/hadoop-ozone/dist/src/main/smoketest/scmha/scm-leader-transfer.robot index cf38de159702..791803c6e98c 100644 --- a/hadoop-ozone/dist/src/main/smoketest/scmha/scm-leader-transfer.robot +++ b/hadoop-ozone/dist/src/main/smoketest/scmha/scm-leader-transfer.robot @@ -21,6 +21,7 @@ Resource ../commonlib.robot Test Timeout 5 minutes *** Variables *** +${TARGET_SCM}= %{TARGET_SCM=scm2.org} ** Keywords *** Get SCM Leader Node @@ -34,13 +35,22 @@ Get SCM Leader Node LOG Leader SCM: ${leaderSCM} [return] ${leaderSCM} +Get SCM UUID + ${result} = Execute ozone admin scm roles --service-id=scmservice + LOG ${result} + ${scm_line} = Get Lines Containing String ${result} ${TARGET_SCM} + ${scm_split} = Split String ${scm_line} : + ${scm_uuid} = Strip String ${scm_split[3]} + [return] ${scm_uuid} + *** Test Cases *** -Transfer Leadership randomly - # Find Leader SCM and one Follower SCM +Transfer Leadership + # Find Leader SCM ${leaderSCM} = Get SCM Leader Node LOG Leader SCM: ${leaderSCM} - # Transfer leadership to the Follower SCM - ${result} = Execute ozone admin scm transfer --service-id=scmservice -r + ${target_scm_uuid} = Get SCM UUID + # Transfer leadership to target SCM + ${result} = Execute ozone admin scm transfer --service-id=scmservice -n ${target_scm_uuid} LOG ${result} Should Contain ${result} Transfer leadership successfully From 7f3a4a4f91d9c01576c2c218e3ecc3f89c096111 Mon Sep 17 00:00:00 2001 From: Sammi Chen Date: Mon, 26 Jun 2023 07:10:25 +0800 Subject: [PATCH 06/15] fix findbugs --- .../hadoop/hdds/security/ssl/ReloadingX509KeyManager.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/hadoop-hdds/common/src/main/java/org/apache/hadoop/hdds/security/ssl/ReloadingX509KeyManager.java b/hadoop-hdds/common/src/main/java/org/apache/hadoop/hdds/security/ssl/ReloadingX509KeyManager.java index d87344775163..32aadf3ab9f5 100644 --- a/hadoop-hdds/common/src/main/java/org/apache/hadoop/hdds/security/ssl/ReloadingX509KeyManager.java +++ b/hadoop-hdds/common/src/main/java/org/apache/hadoop/hdds/security/ssl/ReloadingX509KeyManager.java @@ -147,7 +147,7 @@ public String chooseEngineClientAlias(String[] strings, LOG.warn("Engine client aliases for {}, {}, {} is changed to {}", strings == null ? "" : Arrays.stream(strings).map(Object::toString) .collect(Collectors.joining(",")), - principals == null ? "" : Arrays.stream(principals) + Arrays.stream(principals) .map(Object::toString).collect(Collectors.joining(",")), sslEngine == null ? "" : sslEngine.toString(), ret); } From 11c9429715294ab9aed8ff009568a1c23f2f9881 Mon Sep 17 00:00:00 2001 From: Sammi Chen Date: Mon, 26 Jun 2023 19:22:26 +0800 Subject: [PATCH 07/15] change the way to fetch HA group member count --- .../hadoop/hdds/scm/security/RootCARotationManager.java | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/security/RootCARotationManager.java b/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/security/RootCARotationManager.java index 9554caba6025..51185790bb7a 100644 --- a/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/security/RootCARotationManager.java +++ b/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/security/RootCARotationManager.java @@ -650,11 +650,10 @@ public void run() { int numFromHADetails = scm.getSCMHANodeDetails().getPeerNodeDetails().size() + 1; int numFromRatisServer = scm.getScmHAManager().getRatisServer() - .getDivision().getGroup().getPeers().size(); + .getDivision().getRaftConf().getCurrentPeers().size(); LOG.info("numFromHADetails {}, numFromRatisServer {}", numFromHADetails, numFromRatisServer); - if (handler.rotationPrepareAcks() == - (Math.max(numFromHADetails, numFromRatisServer))) { + if (handler.rotationPrepareAcks() == numFromRatisServer) { // all acks are received. try { waitAckTimeoutTask.cancel(false); From 05670248434d4448ee641525b7b860b2dce59774 Mon Sep 17 00:00:00 2001 From: Sammi Chen Date: Mon, 26 Jun 2023 22:29:52 +0800 Subject: [PATCH 08/15] check if rotation can be skipped after get the lock resource --- .../scm/security/RootCARotationHandler.java | 2 - .../security/RootCARotationHandlerImpl.java | 31 +------- .../scm/security/RootCARotationManager.java | 74 +++++++++++++------ 3 files changed, 55 insertions(+), 52 deletions(-) diff --git a/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/security/RootCARotationHandler.java b/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/security/RootCARotationHandler.java index a27424f46149..7e9114018783 100644 --- a/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/security/RootCARotationHandler.java +++ b/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/security/RootCARotationHandler.java @@ -55,6 +55,4 @@ void rotationCommitted(String rootCertId) void resetRotationPrepareAcks(); void setSubCACertId(String subCACertId); - - String getSubCACertId(); } diff --git a/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/security/RootCARotationHandlerImpl.java b/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/security/RootCARotationHandlerImpl.java index f8ffea5a1eed..bcf82687468b 100644 --- a/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/security/RootCARotationHandlerImpl.java +++ b/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/security/RootCARotationHandlerImpl.java @@ -18,7 +18,6 @@ package org.apache.hadoop.hdds.scm.security; -import com.google.common.base.Preconditions; import org.apache.commons.io.FileUtils; import org.apache.hadoop.hdds.scm.ha.SCMHAInvocationHandler; import org.apache.hadoop.hdds.scm.ha.SCMRatisServer; @@ -31,12 +30,9 @@ import java.io.File; import java.io.IOException; import java.lang.reflect.Proxy; -import java.math.BigInteger; import java.nio.file.Files; import java.nio.file.StandardCopyOption; -import java.security.cert.X509Certificate; import java.util.HashSet; -import java.util.List; import java.util.Set; import java.util.concurrent.TimeoutException; import java.util.concurrent.atomic.AtomicReference; @@ -89,7 +85,7 @@ public void rotationPrepare(String rootCertId) throws IOException, TimeoutException { LOG.info("Received rotation prepare command of root certificate {}", rootCertId); - if (shouldSkipRootCert(rootCertId)) { + if (rotationManager.shouldSkipRootCert(rootCertId)) { return; } @@ -107,7 +103,7 @@ public void rotationPrepareAck(String rootCertId, // Only leader count the acks if (rotationManager.isRunning()) { - if (shouldSkipRootCert(rootCertId)) { + if (rotationManager.shouldSkipRootCert(rootCertId)) { return; } if (rootCertId.equals(newRootCACertId.get())) { @@ -121,7 +117,7 @@ public void rotationCommit(String rootCertId) throws IOException, TimeoutException { LOG.info("Received rotation commit command of root certificate {}", rootCertId); - if (shouldSkipRootCert(rootCertId)) { + if (rotationManager.shouldSkipRootCert(rootCertId)) { return; } @@ -175,7 +171,7 @@ public void rotationCommitted(String rootCertId) throws IOException, TimeoutException { LOG.info("Received rotation committed command of root certificate {}", rootCertId); - if (shouldSkipRootCert(rootCertId)) { + if (rotationManager.shouldSkipRootCert(rootCertId)) { return; } @@ -215,25 +211,6 @@ public void setSubCACertId(String subCACertId) { LOG.info("Scm sub CA new certificate is {}", subCACertId); } - public String getSubCACertId() { - return newSubCACertId.get(); - } - - private boolean shouldSkipRootCert(String newRootCertId) throws IOException { - List scmCertChain = scmCertClient.getTrustChain(); - Preconditions.checkArgument(scmCertChain.size() > 1); - X509Certificate rootCert = scmCertChain.get(scmCertChain.size() - 1); - if (rootCert.getSerialNumber().compareTo(new BigInteger(newRootCertId)) - >= 0) { - // usually this will happen when reapply RAFT log during SCM start - LOG.info("Sub CA certificate {} is already signed by root " + - "certificate {} or a newer root certificate.", - scmCertChain.get(0).getSerialNumber().toString(), newRootCertId); - return true; - } - return false; - } - /** * Builder for RootCARotationHandlerImpl. */ diff --git a/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/security/RootCARotationManager.java b/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/security/RootCARotationManager.java index 51185790bb7a..d34885d2412c 100644 --- a/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/security/RootCARotationManager.java +++ b/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/security/RootCARotationManager.java @@ -58,6 +58,7 @@ import java.time.LocalDateTime; import java.time.ZoneId; import java.util.Date; +import java.util.List; import java.util.concurrent.Executors; import java.util.concurrent.ScheduledExecutorService; import java.util.concurrent.ScheduledFuture; @@ -517,6 +518,13 @@ public void run() { LOG.info("SubCARotationPrepareTask[rootCertId = {}] - started.", rootCACertId); + if (shouldSkipRootCert(rootCACertId)) { + // Send ack to rotationPrepare request + sendRotationPrepareAck(rootCACertId, + scmCertClient.getCertificate().getSerialNumber().toString()); + return; + } + SecurityConfig securityConfig = scmCertClient.getSecurityConfig(); String progressComponent = SCMCertificateClient.COMPONENT_NAME + @@ -597,29 +605,7 @@ public void run() { } // Send ack to rotationPrepare request - try { - handler.rotationPrepareAck(rootCACertId, newCertSerialId, - scm.getScmId()); - LOG.info("SubCARotationPrepareTask[rootCertId = {}] - " + - "rotation prepare ack sent out, new scm certificate {}", - rootCACertId, newCertSerialId); - } catch (Exception e) { - LOG.error("Failed to send ack to rotationPrepare request", e); - String message = "Terminate SCM, encounter exception(" + - e.getMessage() + ") when sending out rotationPrepare ack"; - scm.shutDown(message); - } - - handler.setSubCACertId(newCertSerialId); - - releaseLockOnTimeoutTask = executorService.schedule(() -> { - // If no rotation commit request received after rotation prepare - LOG.warn("Failed to have enough rotation acks from SCM. This " + - " time root rotation {} is failed. Release the lock.", - rootCACertId); - releaseLock(); - }, ackTimeout.toMillis(), TimeUnit.MILLISECONDS); - + sendRotationPrepareAck(rootCACertId, newCertSerialId); } catch (Throwable e) { LOG.error("Unexpected error happen", e); scm.shutDown("Unexpected error happen, " + e.getMessage()); @@ -627,6 +613,33 @@ public void run() { } } + private void sendRotationPrepareAck(String newRootCACertId, + String newSubCACertId) { + // Send ack to rotationPrepare request + try { + handler.rotationPrepareAck(newRootCACertId, newSubCACertId, + scm.getScmId()); + LOG.info("SubCARotationPrepareTask[rootCertId = {}] - " + + "rotation prepare ack sent out, new scm certificate {}", + newRootCACertId, newSubCACertId); + } catch (Exception e) { + LOG.error("Failed to send ack to rotationPrepare request", e); + String message = "Terminate SCM, encounter exception(" + + e.getMessage() + ") when sending out rotationPrepare ack"; + scm.shutDown(message); + } + + handler.setSubCACertId(newSubCACertId); + + releaseLockOnTimeoutTask = executorService.schedule(() -> { + // If no rotation commit request received after rotation prepare + LOG.warn("Failed to have enough rotation acks from SCM. This " + + " time root rotation {} is failed. Release the lock.", + newRootCACertId); + releaseLock(); + }, ackTimeout.toMillis(), TimeUnit.MILLISECONDS); + } + /** * Task to wait the all acks of prepare request. */ @@ -708,4 +721,19 @@ public void stop() { public void setRootCARotationHandler(RootCARotationHandler newHandler) { handler = newHandler; } + + public boolean shouldSkipRootCert(String newRootCertId) throws IOException { + List scmCertChain = scmCertClient.getTrustChain(); + Preconditions.checkArgument(scmCertChain.size() > 1); + X509Certificate rootCert = scmCertChain.get(scmCertChain.size() - 1); + if (rootCert.getSerialNumber().compareTo(new BigInteger(newRootCertId)) + >= 0) { + // usually this will happen when reapply RAFT log during SCM start + LOG.info("Sub CA certificate {} is already signed by root " + + "certificate {} or a newer root certificate.", + scmCertChain.get(0).getSerialNumber().toString(), newRootCertId); + return true; + } + return false; + } } From 05097b06801270903f42980fa9fb12318f47ba11 Mon Sep 17 00:00:00 2001 From: Sammi Chen Date: Fri, 30 Jun 2023 17:07:50 +0800 Subject: [PATCH 09/15] address comments and improve robot test --- .github/workflows/ci.yml | 19 ++-- .../security/ssl/ReloadingX509KeyManager.java | 89 +++++++------------ .../ssl/ReloadingX509TrustManager.java | 39 ++++++-- .../hadoop/hdds/scm/ha/HASecurityUtils.java | 10 +-- .../security/RootCARotationHandlerImpl.java | 5 -- .../scm/security/RootCARotationManager.java | 42 +++++---- .../ozonesecure-ha/test-root-ca-rotation.sh | 37 +++----- .../src/main/compose/ozonesecure-ha/test.sh | 5 -- .../ozonesecure/test-root-ca-rotation.sh | 14 +-- hadoop-ozone/dist/src/main/compose/testlib.sh | 27 +++++- 10 files changed, 132 insertions(+), 155 deletions(-) diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index fe5ed82697f8..0add173fd7bc 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -25,14 +25,14 @@ jobs: env: GITHUB_CONTEXT: ${{ toJson(github) }} outputs: - basic-checks: ${{ steps.selective-checks.outputs.basic-checks }} - needs-basic-checks: ${{ steps.selective-checks.outputs.needs-basic-checks }} + basic-checks: false + needs-basic-checks: false needs-build: ${{ steps.selective-checks.outputs.needs-build }} - needs-compile: ${{ steps.selective-checks.outputs.needs-compile }} + needs-compile: false needs-compose-tests: ${{ steps.selective-checks.outputs.needs-compose-tests }} - needs-dependency-check: ${{ steps.selective-checks.outputs.needs-dependency-check }} - needs-integration-tests: ${{ steps.selective-checks.outputs.needs-integration-tests }} - needs-kubernetes-tests: ${{ steps.selective-checks.outputs.needs-kubernetes-tests }} + needs-dependency-check: false + needs-integration-tests: false + needs-kubernetes-tests: false steps: - name: "Checkout ${{ github.ref }} ( ${{ github.sha }} )" uses: actions/checkout@v3 @@ -248,14 +248,7 @@ jobs: strategy: matrix: suite: - - secure - - unsecure - - compat - - EC - HA-secure - - HA-unsecure - - MR - - misc fail-fast: false steps: - name: Checkout project diff --git a/hadoop-hdds/common/src/main/java/org/apache/hadoop/hdds/security/ssl/ReloadingX509KeyManager.java b/hadoop-hdds/common/src/main/java/org/apache/hadoop/hdds/security/ssl/ReloadingX509KeyManager.java index 32aadf3ab9f5..265676dd4587 100644 --- a/hadoop-hdds/common/src/main/java/org/apache/hadoop/hdds/security/ssl/ReloadingX509KeyManager.java +++ b/hadoop-hdds/common/src/main/java/org/apache/hadoop/hdds/security/ssl/ReloadingX509KeyManager.java @@ -39,8 +39,6 @@ import java.util.List; import java.util.Locale; import java.util.concurrent.atomic.AtomicReference; -import java.util.regex.Matcher; -import java.util.regex.Pattern; import java.util.stream.Collectors; /** @@ -64,10 +62,6 @@ public class ReloadingX509KeyManager extends X509ExtendedKeyManager { */ static final char[] EMPTY_PASSWORD = new char[0]; private final AtomicReference keyManagerRef; - // Keep the old key managers, for currently we find that the netty - // tc-native component always query the first root certificate through - // chooseEngineClientAlias after the key manager is reloaded with a new one. - private final List oldKeyManagerRef; /** * Current private key and cert used in keyManager. Used to detect if these @@ -75,9 +69,7 @@ public class ReloadingX509KeyManager extends X509ExtendedKeyManager { */ private PrivateKey currentPrivateKey; private List currentCertIdsList = new ArrayList<>(); - private final Pattern dnPattern = - Pattern.compile("^.+, CN=scm-\\d{1}@.+"); - private final String alias = "scm/sub-ca_key"; + private String alias; /** * Construct a Reloading509KeystoreManager. @@ -92,7 +84,6 @@ public ReloadingX509KeyManager(String type, CertificateClient caClient) this.type = type; keyManagerRef = new AtomicReference<>(); keyManagerRef.set(loadKeyManager(caClient)); - oldKeyManagerRef = new ArrayList<>(); } @Override @@ -100,27 +91,23 @@ public String chooseEngineClientAlias(String[] strings, Principal[] principals, SSLEngine sslEngine) { String ret = keyManagerRef.get() .chooseEngineClientAlias(strings, principals, sslEngine); - if (ret == null && oldKeyManagerRef.size() != 0) { - for (X509ExtendedKeyManager manager: oldKeyManagerRef) { - ret = manager.chooseEngineClientAlias(strings, principals, sslEngine); - if (ret != null) { - break; - } - } - } + if (ret == null) { - LOG.info("Engine client aliases for {}, {}, {} is null", - strings == null ? "" : Arrays.stream(strings).map(Object::toString) - .collect(Collectors.joining(", ")), - principals == null ? "" : Arrays.stream(principals) - .map(Object::toString).collect(Collectors.joining(", ")), - sslEngine == null ? "" : sslEngine.toString()); /* - Workaround to address that netty tc-native doesn't handle the dynamic - KeyManager re-loading well. - - TODO: If this issue is fixed in tc-native in future version, following - logic can be removed. + Workaround to address that netty tc-native cannot handle the dynamic + key and certificate refresh well. What happens is during the setup of + the grpc channel, an SSLContext is created, which is + ReferenceCountedOpenSslServerContext in the native tc-native case. + This class uses the TrustManager's getAcceptedIssuers() as the trusted + CA certificate list. The list is not updated after channel is built. + With the list being used to present the Principals during the mTLS + authentication via the Netty channel under Ratis implementation, + the counterpart(client) KeyManager's + chooseEngineClientAlias(String, Principal[], SSLEngine) method is + called with this old root certificate subject principal, which is now + not available in the new Key Manager after refreshed, so the method + will return null, which cause the mutual TLS connection establish + failure. Example error message: Engine client aliases for RSA, DH_RSA, EC, EC_RSA, EC_EC, @@ -138,19 +125,13 @@ public String chooseEngineClientAlias(String[] strings, org.apache.ratis.thirdparty.io.netty.handler.ssl.OpenSslEngine@5eec0d10 is scm/sub-ca_key */ - if (principals != null) { - String dn = principals[0].toString(); - Matcher matcher = dnPattern.matcher(dn); - if (matcher.matches()) { - ret = alias; - } - LOG.warn("Engine client aliases for {}, {}, {} is changed to {}", - strings == null ? "" : Arrays.stream(strings).map(Object::toString) - .collect(Collectors.joining(",")), - Arrays.stream(principals) - .map(Object::toString).collect(Collectors.joining(",")), - sslEngine == null ? "" : sslEngine.toString(), ret); - } + ret = alias; + LOG.info("Engine client aliases for {}, {}, {} is returned as {}", + strings == null ? "" : Arrays.stream(strings).map(Object::toString) + .collect(Collectors.joining(", ")), + principals == null ? "" : Arrays.stream(principals) + .map(Object::toString).collect(Collectors.joining(", ")), + sslEngine == null ? "" : sslEngine.toString(), ret); } return ret; } @@ -160,14 +141,6 @@ public String chooseEngineServerAlias(String s, Principal[] principals, SSLEngine sslEngine) { String ret = keyManagerRef.get() .chooseEngineServerAlias(s, principals, sslEngine); - if (ret == null && oldKeyManagerRef.size() != 0) { - for (X509ExtendedKeyManager manager: oldKeyManagerRef) { - ret = manager.chooseEngineServerAlias(s, principals, sslEngine); - if (ret != null) { - break; - } - } - } if (ret == null && LOG.isDebugEnabled()) { LOG.debug("Engine server aliases for {}, {}, {} is null", s, principals == null ? "" : Arrays.stream(principals) @@ -219,7 +192,6 @@ public ReloadingX509KeyManager loadFrom(CertificateClient caClient) { try { X509ExtendedKeyManager manager = loadKeyManager(caClient); if (manager != null) { - oldKeyManagerRef.add(keyManagerRef.get()); keyManagerRef.set(manager); LOG.info("ReloadingX509KeyManager is reloaded"); } @@ -237,9 +209,8 @@ private X509ExtendedKeyManager loadKeyManager(CertificateClient caClient) if (currentPrivateKey != null && currentPrivateKey.equals(privateKey) && currentCertIdsList.size() > 0 && newCertList.size() == currentCertIdsList.size() && - !newCertList.stream().filter( - c -> !currentCertIdsList.contains(c.getSerialNumber().toString())) - .findAny().isPresent()) { + newCertList.stream().allMatch(c -> + currentCertIdsList.contains(c.getSerialNumber().toString()))) { // Security materials(key and certificates) keep the same. return null; } @@ -248,13 +219,13 @@ private X509ExtendedKeyManager loadKeyManager(CertificateClient caClient) KeyStore keystore = KeyStore.getInstance(type); keystore.load(null, null); - keystore.setKeyEntry(caClient.getComponentName() + "_key", - privateKey, EMPTY_PASSWORD, + alias = caClient.getComponentName() + "_key"; + keystore.setKeyEntry(alias, privateKey, EMPTY_PASSWORD, newCertList.toArray(new X509Certificate[0])); - LOG.info("New key manager is loaded with certificate chain"); - for (int i = 0; i < newCertList.size(); i++) { - LOG.info(newCertList.get(i).toString()); + LOG.info("Key manager is loaded with certificate chain"); + for (X509Certificate x509Certificate : newCertList) { + LOG.info(x509Certificate.toString()); } KeyManagerFactory keyMgrFactory = KeyManagerFactory.getInstance( diff --git a/hadoop-hdds/common/src/main/java/org/apache/hadoop/hdds/security/ssl/ReloadingX509TrustManager.java b/hadoop-hdds/common/src/main/java/org/apache/hadoop/hdds/security/ssl/ReloadingX509TrustManager.java index c64d09fcfe11..bfc3939cd0a2 100644 --- a/hadoop-hdds/common/src/main/java/org/apache/hadoop/hdds/security/ssl/ReloadingX509TrustManager.java +++ b/hadoop-hdds/common/src/main/java/org/apache/hadoop/hdds/security/ssl/ReloadingX509TrustManager.java @@ -26,6 +26,7 @@ import javax.net.ssl.TrustManager; import javax.net.ssl.TrustManagerFactory; import javax.net.ssl.X509TrustManager; +import javax.security.auth.x500.X500Principal; import java.io.IOException; import java.security.GeneralSecurityException; import java.security.KeyStore; @@ -33,9 +34,11 @@ import java.security.cert.CertificateException; import java.security.cert.X509Certificate; import java.util.ArrayList; +import java.util.Arrays; import java.util.List; import java.util.Set; import java.util.concurrent.atomic.AtomicReference; +import java.util.stream.Collectors; /** * A {@link TrustManager} implementation that exposes a method, @@ -82,7 +85,16 @@ public void checkClientTrusted(X509Certificate[] chain, String authType) throws CertificateException { X509TrustManager tm = trustManagerRef.get(); if (tm != null) { - tm.checkClientTrusted(chain, authType); + try { + tm.checkClientTrusted(chain, authType); + } catch (CertificateException e) { + LOG.info("Client certificate chain {} for authType {} is not trusted", + chain == null ? "" : Arrays.stream(chain) + .map(X509Certificate::getSubjectX500Principal) + .map(X500Principal::toString) + .collect(Collectors.joining(",")), authType); + throw e; + } } else { throw new CertificateException("Unknown client chain certificate: " + chain[0].toString()); @@ -94,7 +106,16 @@ public void checkServerTrusted(X509Certificate[] chain, String authType) throws CertificateException { X509TrustManager tm = trustManagerRef.get(); if (tm != null) { - tm.checkServerTrusted(chain, authType); + try { + tm.checkServerTrusted(chain, authType); + } catch (CertificateException e) { + LOG.info("Client certificate chain {} for authType {} is not trusted", + chain == null ? "" : Arrays.stream(chain) + .map(X509Certificate::getSubjectX500Principal) + .map(X500Principal::toString) + .collect(Collectors.joining(",")), authType); + throw e; + } } else { throw new CertificateException("Unknown server chain certificate: " + chain[0].toString()); @@ -129,15 +150,15 @@ public ReloadingX509TrustManager loadFrom(CertificateClient caClient) { X509TrustManager loadTrustManager(CertificateClient caClient) throws GeneralSecurityException, IOException { // SCM certificate client sets root CA as CA cert instead of root CA cert - Set rootCACerts = caClient.getAllRootCaCerts().isEmpty() ? - caClient.getAllCaCerts() : caClient.getAllRootCaCerts(); + Set certList = caClient.getAllRootCaCerts(); + Set rootCACerts = certList.isEmpty() ? + caClient.getAllCaCerts() : certList; // Certificate keeps the same. if (rootCACerts.size() > 0 && currentRootCACertIds.size() == rootCACerts.size() && - !rootCACerts.stream().filter( - c -> !currentRootCACertIds.contains(c.getSerialNumber().toString())) - .findAny().isPresent()) { + rootCACerts.stream().allMatch(c -> + currentRootCACertIds.contains(c.getSerialNumber().toString()))) { return null; } @@ -157,14 +178,14 @@ X509TrustManager loadTrustManager(CertificateClient caClient) } } currentRootCACertIds.clear(); - rootCACerts.stream().forEach( + rootCACerts.forEach( c -> currentRootCACertIds.add(c.getSerialNumber().toString())); return trustManager; } private void insertCertsToKeystore(Iterable certs, KeyStore ks) throws KeyStoreException { - LOG.info("New trust manager is loaded with certificates"); + LOG.info("Trust manager is loaded with certificates"); for (X509Certificate certToInsert : certs) { String certId = certToInsert.getSerialNumber().toString(); ks.setCertificateEntry(certId, certToInsert); diff --git a/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/ha/HASecurityUtils.java b/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/ha/HASecurityUtils.java index 2f86de150ee8..f307a4030828 100644 --- a/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/ha/HASecurityUtils.java +++ b/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/ha/HASecurityUtils.java @@ -409,16 +409,10 @@ public static SCMRatisResponse submitScmRequestToRatis(RaftGroup raftGroup, } public static boolean isSelfSignedCertificate(X509Certificate cert) { - if (cert.getIssuerX500Principal().equals(cert.getSubjectX500Principal())) { - return true; - } - return false; + return cert.getIssuerX500Principal().equals(cert.getSubjectX500Principal()); } public static boolean isCACertificate(X509Certificate cert) { - if (cert.getBasicConstraints() != -1) { - return true; - } - return false; + return cert.getBasicConstraints() != -1; } } diff --git a/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/security/RootCARotationHandlerImpl.java b/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/security/RootCARotationHandlerImpl.java index bcf82687468b..86e0933c161f 100644 --- a/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/security/RootCARotationHandlerImpl.java +++ b/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/security/RootCARotationHandlerImpl.java @@ -38,10 +38,8 @@ import java.util.concurrent.atomic.AtomicReference; import static org.apache.hadoop.hdds.HddsConfigKeys.HDDS_BACKUP_KEY_CERT_DIR_NAME_SUFFIX; -import static org.apache.hadoop.hdds.HddsConfigKeys.HDDS_NEW_KEY_CERT_DIR_NAME_PROGRESS_SUFFIX; import static org.apache.hadoop.hdds.HddsConfigKeys.HDDS_NEW_KEY_CERT_DIR_NAME_SUFFIX; import static org.apache.hadoop.hdds.protocol.proto.SCMRatisProtocol.RequestType.CERT_ROTATE; -import static org.apache.hadoop.ozone.OzoneConsts.SCM_ROOT_CA_COMPONENT_NAME; /** * Root CA Rotation Handler for ratis SCM statemachine. @@ -55,9 +53,6 @@ public class RootCARotationHandlerImpl implements RootCARotationHandler { private final SCMCertificateClient scmCertClient; private final SecurityConfig secConfig; private Set newScmCertIdSet = new HashSet<>(); - private String newCAComponent = SCM_ROOT_CA_COMPONENT_NAME + - HDDS_NEW_KEY_CERT_DIR_NAME_SUFFIX + - HDDS_NEW_KEY_CERT_DIR_NAME_PROGRESS_SUFFIX; private final String newSubCAPath; private final RootCARotationManager rotationManager; private AtomicReference newSubCACertId = new AtomicReference(); diff --git a/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/security/RootCARotationManager.java b/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/security/RootCARotationManager.java index d34885d2412c..bb4e253b4cbb 100644 --- a/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/security/RootCARotationManager.java +++ b/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/security/RootCARotationManager.java @@ -63,7 +63,6 @@ import java.util.concurrent.ScheduledExecutorService; import java.util.concurrent.ScheduledFuture; import java.util.concurrent.TimeUnit; -import java.util.concurrent.TimeoutException; import java.util.concurrent.atomic.AtomicBoolean; import java.util.concurrent.atomic.AtomicReference; @@ -382,6 +381,7 @@ public void run() { String message = "Terminate SCM, encounter exception(" + e.getMessage() + ") when generating new root CA certificate " + "under " + newCAComponent; + cleanupAndStop(message); scm.shutDown(message); } @@ -399,7 +399,10 @@ public void run() { "expected:" + newId + ", fetched:" + newRootCertId); scm.getSecurityProtocolServer() .setRootCertificateServer(newRootCAServer); + + if (isRunning()) { + checkInterruptState(); handler.rotationPrepare(newRootCertId); LOG.info("Send out sub CA rotation prepare request for new " + "root certificate {}", newRootCertId); @@ -415,6 +418,7 @@ public void run() { return; } + checkInterruptState(); // save root certificate to certStore try { if (scm.getCertificateStore().getCertificateByID( @@ -426,7 +430,7 @@ public void run() { CertificateCodec.getX509Certificate(newRootCertificate), HddsProtos.NodeType.SCM); } - } catch (CertificateException | IOException | TimeoutException e) { + } catch (CertificateException | IOException e) { LOG.error("Failed to save root certificate {} to cert store", newRootCertId); scm.shutDown("Failed to save root certificate to cert store"); @@ -438,7 +442,7 @@ public void run() { 1, 1, TimeUnit.SECONDS); waitAckTimeoutTask = executorService.schedule(() -> { // No enough acks are received - waitAckTask.cancel(false); + waitAckTask.cancel(true); String msg = "Failed to receive all acks of rotation prepare" + " after " + ackTimeout + ", received " + handler.rotationPrepareAcks() + " acks"; @@ -455,6 +459,16 @@ public void run() { } } + private void checkInterruptState() { + // check whether thread is interrupted(cancelled) before + // time-consuming ratis request + if (Thread.currentThread().isInterrupted()) { + cleanupAndStop(this.getClass().getSimpleName() + + " is interrupted"); + return; + } + } + private void cleanupAndStop(String reason) { try { scm.getSecurityProtocolServer().setRootCertificateServer(null); @@ -569,6 +583,7 @@ public void run() { scm.shutDown(message); } + checkInterruptState(); // Get certificate signed String newCertSerialId = ""; try { @@ -605,6 +620,7 @@ public void run() { } // Send ack to rotationPrepare request + checkInterruptState(); sendRotationPrepareAck(rootCACertId, newCertSerialId); } catch (Throwable e) { LOG.error("Unexpected error happen", e); @@ -653,12 +669,14 @@ public WaitSubCARotationPrepareAckTask( @Override public void run() { + checkInterruptState(); if (!isRunning()) { LOG.info("SCM is not leader anymore. Delete the in-progress " + "root CA directory"); cleanupAndStop("SCM is not leader anymore"); return; } + synchronized (RootCARotationManager.class) { int numFromHADetails = scm.getSCMHANodeDetails().getPeerNodeDetails().size() + 1; @@ -669,7 +687,7 @@ public void run() { if (handler.rotationPrepareAcks() == numFromRatisServer) { // all acks are received. try { - waitAckTimeoutTask.cancel(false); + waitAckTimeoutTask.cancel(true); handler.rotationCommit(rootCACertId); handler.rotationCommitted(rootCACertId); @@ -688,10 +706,7 @@ public void run() { handler.resetRotationPrepareAcks(); cleanupAndStop("Execution error, " + e.getMessage()); } finally { - // stop this task to re-execute again in next cycle - throw new RuntimeException("Exit the this " + - "WaitSubCARotationPrepareAckTask for root certificate " + - rootCACertId + " since the rotation is finished execution"); + waitAckTask.cancel(true); } } } @@ -706,14 +721,9 @@ public void stop() { if (metrics != null) { metrics.unRegister(); } - try { - executorService.shutdown(); - if (!executorService.awaitTermination(3, TimeUnit.SECONDS)) { - executorService.shutdownNow(); - } - } catch (InterruptedException ie) { - // Ignore, we don't really care about the failure. - Thread.currentThread().interrupt(); + + if (executorService != null) { + executorService.shutdownNow(); } } diff --git a/hadoop-ozone/dist/src/main/compose/ozonesecure-ha/test-root-ca-rotation.sh b/hadoop-ozone/dist/src/main/compose/ozonesecure-ha/test-root-ca-rotation.sh index dcfab4fae53b..206fbd13595a 100755 --- a/hadoop-ozone/dist/src/main/compose/ozonesecure-ha/test-root-ca-rotation.sh +++ b/hadoop-ozone/dist/src/main/compose/ozonesecure-ha/test-root-ca-rotation.sh @@ -34,47 +34,38 @@ start_docker_env execute_command_in_container kms hadoop key create ${OZONE_BUCKET_KEY_NAME} -execute_robot_test s3g kinit.robot +execute_robot_test scm1.org kinit.robot # verify root CA rotation monitor task is active on leader -wait_for_execute_command scm1.org 30 "jps | grep StorageContainerManagerStarter | awk -F' ' '{print $1}' | xargs -I {} jstack {} | grep 'RootCARotationManager-Active'" +wait_for_execute_command scm1.org 30 "jps | grep StorageContainerManagerStarter | sed 's/StorageContainerManagerStarter//' | xargs | xargs -I {} jstack {} | grep 'RootCARotationManager-Active'" # wait and verify root CA is rotated wait_for_execute_command scm1.org 90 "ozone admin cert info 2" -# verify scm operations -execute_robot_test s3g admincli/pipeline.robot +# transfer leader to scm2.org +execute_robot_test scm1.org scmha/scm-leader-transfer.robot +wait_for_execute_command scm1.org 30 "jps | grep StorageContainerManagerStarter | sed 's/StorageContainerManagerStarter//' | xargs | xargs -I {} jstack {} | grep 'RootCARotationManager-Inactive'" -# transfer leader to another SCM -execute_robot_test s3g scmha/scm-leader-transfer.robot -wait_for_execute_command scm1.org 30 "jps | grep StorageContainerManagerStarter | awk -F' ' '{print $1}' | xargs -I {} jstack {} | grep 'RootCARotationManager-Inactive'" +# verify om operations +execute_commands_in_container scm1.org "ozone sh volume create /r-v1 && ozone sh bucket create /r-v1/r-b1" + +# verify scm operations +execute_robot_test scm1.org admincli/pipeline.robot # wait for second root CA rotation wait_for_execute_command scm1.org 90 "ozone admin cert info 3" -# verify om operations -wait_for_execute_command scm1.org 10 "ozone sh volume create rotation-vol" -wait_for_execute_command scm1.org 10 "ozone sh bucket create rotation-vol/rotation-bucket" - -# verify data read write -wait_for_execute_command scm1.org 10 "ozone sh key put /opt/hadoop/README.md /rotation-vol/rotation-bucket/README.md" -wait_for_execute_command scm1.org 10 "ozone sh key get /opt/hadoop/README.md.1 /rotation-vol/rotation-bucket/README.md" - # bootstrap new SCM4 and verify certificate docker-compose up -d scm4.org wait_for_port scm4.org 9894 120 execute_robot_test scm4.org kinit.robot wait_for_execute_command scm4.org 120 "ozone admin scm roles | grep scm4.org" -wait_for_execute_command scm1.org 30 "ozone admin cert list --role=scm | grep scm4.org" +wait_for_execute_command scm4.org 30 "ozone admin cert list --role=scm | grep scm4.org" #transfer leader to scm4.org export TARGET_SCM=scm4.org execute_robot_test scm4.org scmha/scm-leader-transfer.robot -# verify data read write -wait_for_execute_command scm4.org 10 "ozone sh key put /opt/hadoop/LICENSE.txt /rotation-vol/rotation-bucket/LICENSE.txt" -wait_for_execute_command scm4.org 10 "ozone sh key get /opt/hadoop/LICENSE.txt.1 /rotation-vol/rotation-bucket/LICENSE.txt" - # add new datanode4 and verify certificate docker-compose up -d datanode4 wait_for_port datanode4 9856 60 @@ -83,12 +74,8 @@ wait_for_execute_command scm4.org 60 "ozone admin datanode list | grep datanode4 #decomission scm1.org execute_robot_test scm4.org scmha/scm-decommission.robot -# verify data read write -wait_for_execute_command scm4.org 10 "ozone sh key put /opt/hadoop/NOTICE.txt /rotation-vol/rotation-bucket/NOTICE.txt" -wait_for_execute_command scm4.org 10 "ozone sh key get /opt/hadoop/NOTICE.txt.1 /rotation-vol/rotation-bucket/NOTICE.txt" - # check the metrics -execute_robot_test scm1.org scmha/root-ca-rotation.robot +execute_robot_test scm2.org scmha/root-ca-rotation.robot stop_docker_env diff --git a/hadoop-ozone/dist/src/main/compose/ozonesecure-ha/test.sh b/hadoop-ozone/dist/src/main/compose/ozonesecure-ha/test.sh index f1f3593ec293..41dcbb09d5b2 100755 --- a/hadoop-ozone/dist/src/main/compose/ozonesecure-ha/test.sh +++ b/hadoop-ozone/dist/src/main/compose/ozonesecure-ha/test.sh @@ -52,11 +52,6 @@ execute_robot_test s3g admincli execute_robot_test s3g omha/om-leader-transfer.robot -# verify root CA rotation monitor task -wait_for_execute_command scm1.org 30 "jps | grep StorageContainerManagerStarter | awk -F' ' '{print $1}' | xargs -I {} jstack {} | grep 'RootCARotationManager-MonitorTask-Active'" -execute_robot_test s3g scmha/scm-leader-transfer.robot -wait_for_execute_command scm1.org 30 "jps | grep StorageContainerManagerStarter | awk -F' ' '{print $1}' | xargs -I {} jstack {} | grep 'RootCARotationManager-MonitorTask-Inactive'" - execute_robot_test s3g httpfs export SCM=scm2.org diff --git a/hadoop-ozone/dist/src/main/compose/ozonesecure/test-root-ca-rotation.sh b/hadoop-ozone/dist/src/main/compose/ozonesecure/test-root-ca-rotation.sh index e2f43670ba8e..4045f81ffaa0 100755 --- a/hadoop-ozone/dist/src/main/compose/ozonesecure/test-root-ca-rotation.sh +++ b/hadoop-ozone/dist/src/main/compose/ozonesecure/test-root-ca-rotation.sh @@ -37,25 +37,17 @@ execute_command_in_container kms hadoop key create ${OZONE_BUCKET_KEY_NAME} execute_robot_test s3g kinit.robot # verify root CA rotation monitor task is active on leader -wait_for_execute_command scm 30 "jps | grep StorageContainerManagerStarter | awk -F' ' '{print $1}' | xargs -I {} jstack {} | grep 'RootCARotationManager-Active'" +wait_for_execute_command scm 30 "jps | grep StorageContainerManagerStarter | sed 's/StorageContainerManagerStarter//' | xargs | xargs -I {} jstack {} | grep 'RootCARotationManager-Active'" # wait and verify root CA is rotated wait_for_execute_command scm 90 "ozone admin cert info 2" -# verify scm operations -execute_robot_test s3g admincli/pipeline.robot +# verify om operations and data operations +execute_commands_in_container scm1.org "ozone sh volume create /r-v1 && ozone sh bucket create /r-v1/r-b1" # wait for second root CA rotation wait_for_execute_command scm 90 "ozone admin cert info 3" -# verify om operations -wait_for_execute_command scm 10 "ozone sh volume create rotation-vol" -wait_for_execute_command scm 10 "ozone sh bucket create rotation-vol/rotation-bucket" - -# verify data read write -wait_for_execute_command scm 10 "ozone sh key put /opt/hadoop/README.md /rotation-vol/rotation-bucket/README.md" -wait_for_execute_command scm 10 "ozone sh key get /opt/hadoop/README.md.1 /rotation-vol/rotation-bucket/README.md" - # check the metrics execute_robot_test scm scmha/root-ca-rotation.robot diff --git a/hadoop-ozone/dist/src/main/compose/testlib.sh b/hadoop-ozone/dist/src/main/compose/testlib.sh index cace9c6153c2..b13048f10201 100755 --- a/hadoop-ozone/dist/src/main/compose/testlib.sh +++ b/hadoop-ozone/dist/src/main/compose/testlib.sh @@ -263,6 +263,20 @@ execute_command_in_container(){ docker-compose exec -T "$@" } +## @description Execute specific commands in docker container +## @param container name +## @param specific commands to execute +execute_commands_in_container(){ + local container=$1 + shift 1 + local command=$@ + + set -e + # shellcheck disable=SC2068 + docker-compose exec -T $container /bin/bash -c "$command" + set +e +} + ## @description Stop a list of named containers ## @param List of container names, eg datanode_1 datanode_2 stop_containers() { @@ -325,15 +339,20 @@ wait_for_port(){ wait_for_execute_command(){ local container=$1 local timeout=$2 - local command=$3 + shift 2 + local command=$@ #Reset the timer SECONDS=0 while [[ $SECONDS -lt $timeout ]]; do - if docker-compose exec -T $container bash -c '$command'; then - echo "$command succeed" - return + set +e + docker-compose exec -T $container /bin/bash -c "$command" + status=$? + set -e + if [ $status -eq 0 ] ; then + echo "$command succeed" + return; fi echo "$command hasn't succeed yet" sleep 1 From e5d76d638521b6cf2ddc80a5dc1599a499fd1a98 Mon Sep 17 00:00:00 2001 From: Sammi Chen Date: Fri, 30 Jun 2023 20:03:21 +0800 Subject: [PATCH 10/15] rollback the changes in ci.yml for previous debug --- .github/workflows/ci.yml | 19 +++++++++++++------ 1 file changed, 13 insertions(+), 6 deletions(-) diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 0add173fd7bc..fe5ed82697f8 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -25,14 +25,14 @@ jobs: env: GITHUB_CONTEXT: ${{ toJson(github) }} outputs: - basic-checks: false - needs-basic-checks: false + basic-checks: ${{ steps.selective-checks.outputs.basic-checks }} + needs-basic-checks: ${{ steps.selective-checks.outputs.needs-basic-checks }} needs-build: ${{ steps.selective-checks.outputs.needs-build }} - needs-compile: false + needs-compile: ${{ steps.selective-checks.outputs.needs-compile }} needs-compose-tests: ${{ steps.selective-checks.outputs.needs-compose-tests }} - needs-dependency-check: false - needs-integration-tests: false - needs-kubernetes-tests: false + needs-dependency-check: ${{ steps.selective-checks.outputs.needs-dependency-check }} + needs-integration-tests: ${{ steps.selective-checks.outputs.needs-integration-tests }} + needs-kubernetes-tests: ${{ steps.selective-checks.outputs.needs-kubernetes-tests }} steps: - name: "Checkout ${{ github.ref }} ( ${{ github.sha }} )" uses: actions/checkout@v3 @@ -248,7 +248,14 @@ jobs: strategy: matrix: suite: + - secure + - unsecure + - compat + - EC - HA-secure + - HA-unsecure + - MR + - misc fail-fast: false steps: - name: Checkout project From 862c21b0fe4af41c2d9a4c6e1aead519f17fd0c8 Mon Sep 17 00:00:00 2001 From: Sammi Chen Date: Fri, 30 Jun 2023 22:04:41 +0800 Subject: [PATCH 11/15] disable monitor task in SCMCertificateClient --- .../client/DefaultCertificateClient.java | 37 +-- .../security/RootCARotationHandlerImpl.java | 3 - .../scm/security/RootCARotationManager.java | 224 ++++++++---------- 3 files changed, 106 insertions(+), 158 deletions(-) diff --git a/hadoop-hdds/framework/src/main/java/org/apache/hadoop/hdds/security/x509/certificate/client/DefaultCertificateClient.java b/hadoop-hdds/framework/src/main/java/org/apache/hadoop/hdds/security/x509/certificate/client/DefaultCertificateClient.java index 17586a4841cb..9944480acf5c 100644 --- a/hadoop-hdds/framework/src/main/java/org/apache/hadoop/hdds/security/x509/certificate/client/DefaultCertificateClient.java +++ b/hadoop-hdds/framework/src/main/java/org/apache/hadoop/hdds/security/x509/certificate/client/DefaultCertificateClient.java @@ -54,7 +54,6 @@ import java.util.concurrent.ConcurrentHashMap; import java.util.concurrent.Executors; import java.util.concurrent.ScheduledExecutorService; -import java.util.concurrent.Semaphore; import java.util.concurrent.TimeUnit; import java.util.function.Consumer; import java.util.stream.Stream; @@ -126,7 +125,6 @@ public abstract class DefaultCertificateClient implements CertificateClient { private Runnable shutdownCallback; private SCMSecurityProtocolClientSideTranslatorPB scmSecurityClient; private final Set notificationReceivers; - private static Semaphore semaphore = new Semaphore(1); protected DefaultCertificateClient( SecurityConfig securityConfig, @@ -135,8 +133,7 @@ protected DefaultCertificateClient( String certSerialId, String component, Consumer saveCertId, - Runnable shutdown - ) { + Runnable shutdown) { Objects.requireNonNull(securityConfig); this.securityConfig = securityConfig; this.scmSecurityClient = scmSecurityClient; @@ -170,13 +167,15 @@ private synchronized void loadAllCertificates() { return; } - if (certPath != null && executorService == null) { - startCertificateMonitor(); - } else { - if (executorService != null) { - getLogger().debug("CertificateLifetimeMonitor is already started."); + if (shouldStartCertificateMonitor()) { + if (certPath != null && executorService == null) { + startCertificateMonitor(); } else { - getLogger().warn("Component certificate was not loaded."); + if (executorService != null) { + getLogger().debug("CertificateLifetimeMonitor is already started."); + } else { + getLogger().warn("Component certificate was not loaded."); + } } } } @@ -1245,12 +1244,8 @@ public SCMSecurityProtocolClientSideTranslatorPB getScmSecureClient() return scmSecurityClient; } - public static void acquirePermit() throws InterruptedException { - semaphore.acquire(); - } - - public static void releasePermit() { - semaphore.release(); + protected boolean shouldStartCertificateMonitor() { + return true; } public synchronized void startCertificateMonitor() { @@ -1294,13 +1289,7 @@ public void run() { // 2. switch on disk new keys and certificate with current ones // 3. save new certificate ID into service VERSION file // 4. refresh in memory certificate ID and reload all new certificates - try { - acquirePermit(); - } catch (InterruptedException e) { - Thread.currentThread().interrupt(); - return; - } - try { + synchronized (DefaultCertificateClient.class) { X509Certificate currentCert = getCertificate(); Duration timeLeft = timeBeforeExpiryGracePeriod(currentCert); if (timeLeft.isZero()) { @@ -1335,8 +1324,6 @@ public void run() { // cleanup backup directory cleanBackupDir(); } - } finally { - releasePermit(); } } } diff --git a/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/security/RootCARotationHandlerImpl.java b/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/security/RootCARotationHandlerImpl.java index 86e0933c161f..17429293f4cf 100644 --- a/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/security/RootCARotationHandlerImpl.java +++ b/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/security/RootCARotationHandlerImpl.java @@ -183,9 +183,6 @@ public void rotationCommitted(String rootCertId) LOG.error("Failed to delete backup dir {}", backupSubCaDir, e); } - // release lock - rotationManager.checkAndReleaseLock(); - // reset state newSubCACertId.set(null); } diff --git a/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/security/RootCARotationManager.java b/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/security/RootCARotationManager.java index bb4e253b4cbb..92c3916d97d6 100644 --- a/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/security/RootCARotationManager.java +++ b/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/security/RootCARotationManager.java @@ -34,7 +34,6 @@ import org.apache.hadoop.hdds.security.SecurityConfig; import org.apache.hadoop.hdds.security.x509.certificate.authority.CertificateServer; import org.apache.hadoop.hdds.security.x509.certificate.authority.profile.DefaultCAProfile; -import org.apache.hadoop.hdds.security.x509.certificate.client.DefaultCertificateClient; import org.apache.hadoop.hdds.security.x509.certificate.client.SCMCertificateClient; import org.apache.hadoop.hdds.security.x509.certificate.utils.CertificateCodec; import org.apache.hadoop.hdds.security.x509.certificate.utils.CertificateSignRequest; @@ -105,7 +104,6 @@ public class RootCARotationManager implements SCMService { private ScheduledFuture rotationTask; private ScheduledFuture waitAckTask; private ScheduledFuture waitAckTimeoutTask; - private ScheduledFuture releaseLockOnTimeoutTask; private final RootCARotationMetrics metrics; /** @@ -180,11 +178,6 @@ public void notifyStatusChanged() { if (waitAckTimeoutTask != null) { waitAckTask.cancel(true); } - if (releaseLockOnTimeoutTask != null) { - if (releaseLockOnTimeoutTask.cancel(true)) { - releaseLock(); - } - } isProcessing.set(false); processStartTime.set(null); } @@ -237,24 +230,6 @@ public void scheduleSubCaRotationPrepareTask(String rootCertId) { TimeUnit.MILLISECONDS); } - public void acquireLock() throws InterruptedException { - DefaultCertificateClient.acquirePermit(); - } - - public void releaseLock() { - DefaultCertificateClient.releasePermit(); - } - - public void checkAndReleaseLock() { - if (releaseLockOnTimeoutTask != null) { - if (releaseLockOnTimeoutTask.cancel(true)) { - releaseLock(); - } - } else { - releaseLock(); - } - } - public boolean isRotationInProgress() { return isProcessing.get(); } @@ -522,109 +497,106 @@ public void run() { // 1. generate new sub CA keys // 2. send CSR to leader SCM // 3. wait CSR response and persist the certificate to disk - try { - acquireLock(); - } catch (InterruptedException e) { - Thread.currentThread().interrupt(); - return; - } - try { - LOG.info("SubCARotationPrepareTask[rootCertId = {}] - started.", - rootCACertId); + synchronized (RootCARotationManager.class) { + try { + LOG.info("SubCARotationPrepareTask[rootCertId = {}] - started.", + rootCACertId); - if (shouldSkipRootCert(rootCACertId)) { - // Send ack to rotationPrepare request - sendRotationPrepareAck(rootCACertId, - scmCertClient.getCertificate().getSerialNumber().toString()); - return; - } + if (shouldSkipRootCert(rootCACertId)) { + // Send ack to rotationPrepare request + sendRotationPrepareAck(rootCACertId, + scmCertClient.getCertificate().getSerialNumber().toString()); + return; + } - SecurityConfig securityConfig = - scmCertClient.getSecurityConfig(); - String progressComponent = SCMCertificateClient.COMPONENT_NAME + - HDDS_NEW_KEY_CERT_DIR_NAME_SUFFIX + - HDDS_NEW_KEY_CERT_DIR_NAME_PROGRESS_SUFFIX; - final String newSubCAProgressPath = - securityConfig.getLocation(progressComponent).toString(); - final String newSubCAPath = securityConfig.getLocation( - SCMCertificateClient.COMPONENT_NAME + - HDDS_NEW_KEY_CERT_DIR_NAME_SUFFIX).toString(); - - File newProgressDir = new File(newSubCAProgressPath); - File newDir = new File(newSubCAPath); - try { - FileUtils.deleteDirectory(newProgressDir); - FileUtils.deleteDirectory(newDir); - Files.createDirectories(newProgressDir.toPath()); - } catch (IOException e) { - LOG.error("Failed to delete and create {}, or delete {}", - newProgressDir, newDir, e); - String message = "Terminate SCM, encounter IO exception(" + - e.getMessage() + ") when deleting and create directory"; - scm.shutDown(message); - } + SecurityConfig securityConfig = + scmCertClient.getSecurityConfig(); + String progressComponent = SCMCertificateClient.COMPONENT_NAME + + HDDS_NEW_KEY_CERT_DIR_NAME_SUFFIX + + HDDS_NEW_KEY_CERT_DIR_NAME_PROGRESS_SUFFIX; + final String newSubCAProgressPath = + securityConfig.getLocation(progressComponent).toString(); + final String newSubCAPath = securityConfig.getLocation( + SCMCertificateClient.COMPONENT_NAME + + HDDS_NEW_KEY_CERT_DIR_NAME_SUFFIX).toString(); + + File newProgressDir = new File(newSubCAProgressPath); + File newDir = new File(newSubCAPath); + try { + FileUtils.deleteDirectory(newProgressDir); + FileUtils.deleteDirectory(newDir); + Files.createDirectories(newProgressDir.toPath()); + } catch (IOException e) { + LOG.error("Failed to delete and create {}, or delete {}", + newProgressDir, newDir, e); + String message = "Terminate SCM, encounter IO exception(" + + e.getMessage() + ") when deleting and create directory"; + scm.shutDown(message); + } - // Generate key - Path keyDir = securityConfig.getKeyLocation(progressComponent); - KeyCodec keyCodec = new KeyCodec(securityConfig, keyDir); - KeyPair newKeyPair = null; - try { - HDDSKeyGenerator keyGenerator = new HDDSKeyGenerator(securityConfig); - newKeyPair = keyGenerator.generateKey(); - keyCodec.writePublicKey(newKeyPair.getPublic()); - keyCodec.writePrivateKey(newKeyPair.getPrivate()); - LOG.info("SubCARotationPrepareTask[rootCertId = {}] - " + - "scm key generated.", rootCACertId); - } catch (Exception e) { - LOG.error("Failed to generate key under {}", newProgressDir, e); - String message = "Terminate SCM, encounter exception(" + - e.getMessage() + ") when generating new key under " + - newProgressDir; - scm.shutDown(message); - } + // Generate key + Path keyDir = securityConfig.getKeyLocation(progressComponent); + KeyCodec keyCodec = new KeyCodec(securityConfig, keyDir); + KeyPair newKeyPair = null; + try { + HDDSKeyGenerator keyGenerator = + new HDDSKeyGenerator(securityConfig); + newKeyPair = keyGenerator.generateKey(); + keyCodec.writePublicKey(newKeyPair.getPublic()); + keyCodec.writePrivateKey(newKeyPair.getPrivate()); + LOG.info("SubCARotationPrepareTask[rootCertId = {}] - " + + "scm key generated.", rootCACertId); + } catch (Exception e) { + LOG.error("Failed to generate key under {}", newProgressDir, e); + String message = "Terminate SCM, encounter exception(" + + e.getMessage() + ") when generating new key under " + + newProgressDir; + scm.shutDown(message); + } - checkInterruptState(); - // Get certificate signed - String newCertSerialId = ""; - try { - CertificateSignRequest.Builder csrBuilder = - scmCertClient.getCSRBuilder(); - csrBuilder.setKey(newKeyPair); - newCertSerialId = scmCertClient.signAndStoreCertificate( - csrBuilder.build(), - Paths.get(newSubCAProgressPath, HDDS_X509_DIR_NAME_DEFAULT), - true); - LOG.info("SubCARotationPrepareTask[rootCertId = {}] - " + - "scm certificate {} signed.", rootCACertId, newCertSerialId); - } catch (Exception e) { - LOG.error("Failed to generate certificate under {}", - newProgressDir, e); - String message = "Terminate SCM, encounter exception(" + - e.getMessage() + ") when generating new certificate " + - newProgressDir; - scm.shutDown(message); - } + checkInterruptState(); + // Get certificate signed + String newCertSerialId = ""; + try { + CertificateSignRequest.Builder csrBuilder = + scmCertClient.getCSRBuilder(); + csrBuilder.setKey(newKeyPair); + newCertSerialId = scmCertClient.signAndStoreCertificate( + csrBuilder.build(), + Paths.get(newSubCAProgressPath, HDDS_X509_DIR_NAME_DEFAULT), + true); + LOG.info("SubCARotationPrepareTask[rootCertId = {}] - " + + "scm certificate {} signed.", rootCACertId, newCertSerialId); + } catch (Exception e) { + LOG.error("Failed to generate certificate under {}", + newProgressDir, e); + String message = "Terminate SCM, encounter exception(" + + e.getMessage() + ") when generating new certificate " + + newProgressDir; + scm.shutDown(message); + } - // move dir from *-next-progress to *-next - try { - Files.move(newProgressDir.toPath(), newDir.toPath(), - StandardCopyOption.ATOMIC_MOVE, - StandardCopyOption.REPLACE_EXISTING); - } catch (IOException e) { - LOG.error("Failed to move {} to {}", - newSubCAProgressPath, newSubCAPath, e); - String message = "Terminate SCM, encounter exception(" + - e.getMessage() + ") when moving " + newSubCAProgressPath + - " to " + newSubCAPath; - scm.shutDown(message); - } + // move dir from *-next-progress to *-next + try { + Files.move(newProgressDir.toPath(), newDir.toPath(), + StandardCopyOption.ATOMIC_MOVE, + StandardCopyOption.REPLACE_EXISTING); + } catch (IOException e) { + LOG.error("Failed to move {} to {}", + newSubCAProgressPath, newSubCAPath, e); + String message = "Terminate SCM, encounter exception(" + + e.getMessage() + ") when moving " + newSubCAProgressPath + + " to " + newSubCAPath; + scm.shutDown(message); + } - // Send ack to rotationPrepare request - checkInterruptState(); - sendRotationPrepareAck(rootCACertId, newCertSerialId); - } catch (Throwable e) { - LOG.error("Unexpected error happen", e); - scm.shutDown("Unexpected error happen, " + e.getMessage()); + // Send ack to rotationPrepare request + checkInterruptState(); + sendRotationPrepareAck(rootCACertId, newCertSerialId); + } catch (Throwable e) { + LOG.error("Unexpected error happen", e); + scm.shutDown("Unexpected error happen, " + e.getMessage()); + } } } } @@ -646,14 +618,6 @@ private void sendRotationPrepareAck(String newRootCACertId, } handler.setSubCACertId(newSubCACertId); - - releaseLockOnTimeoutTask = executorService.schedule(() -> { - // If no rotation commit request received after rotation prepare - LOG.warn("Failed to have enough rotation acks from SCM. This " + - " time root rotation {} is failed. Release the lock.", - newRootCACertId); - releaseLock(); - }, ackTimeout.toMillis(), TimeUnit.MILLISECONDS); } /** From 7cb7937774647e214e2af0bdbd0e092d53d5a8bf Mon Sep 17 00:00:00 2001 From: Sammi Chen Date: Sun, 2 Jul 2023 14:49:18 +0800 Subject: [PATCH 12/15] improve robot test and move saving root certificate into rocksdb to process end --- .../scm/security/RootCARotationManager.java | 37 ++++++++++--------- .../ozonesecure-ha/test-root-ca-rotation.sh | 19 ++++++++-- .../ozonesecure/test-root-ca-rotation.sh | 13 +++---- hadoop-ozone/dist/src/main/compose/testlib.sh | 1 - 4 files changed, 40 insertions(+), 30 deletions(-) diff --git a/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/security/RootCARotationManager.java b/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/security/RootCARotationManager.java index 92c3916d97d6..61e032ea0b0e 100644 --- a/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/security/RootCARotationManager.java +++ b/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/security/RootCARotationManager.java @@ -393,24 +393,6 @@ public void run() { return; } - checkInterruptState(); - // save root certificate to certStore - try { - if (scm.getCertificateStore().getCertificateByID( - newRootCertificate.getSerialNumber(), VALID_CERTS) == null) { - LOG.info("Persist root certificate {} to cert store", - newRootCertId); - scm.getCertificateStore().storeValidCertificate( - newRootCertificate.getSerialNumber(), - CertificateCodec.getX509Certificate(newRootCertificate), - HddsProtos.NodeType.SCM); - } - } catch (CertificateException | IOException e) { - LOG.error("Failed to save root certificate {} to cert store", - newRootCertId); - scm.shutDown("Failed to save root certificate to cert store"); - } - // schedule task to wait for prepare acks waitAckTask = executorService.scheduleAtFixedRate( new WaitSubCARotationPrepareAckTask(newRootCertificate), @@ -625,9 +607,11 @@ private void sendRotationPrepareAck(String newRootCACertId, */ public class WaitSubCARotationPrepareAckTask implements Runnable { private String rootCACertId; + private X509CertificateHolder rootCACertHolder; public WaitSubCARotationPrepareAckTask( X509CertificateHolder rootCertHolder) { + this.rootCACertHolder = rootCertHolder; this.rootCACertId = rootCertHolder.getSerialNumber().toString(); } @@ -660,6 +644,23 @@ public void run() { metrics.setSuccessTimeInNs(timeTaken); processStartTime.set(null); + // save root certificate to certStore + try { + if (scm.getCertificateStore().getCertificateByID( + rootCACertHolder.getSerialNumber(), VALID_CERTS) == null) { + LOG.info("Persist root certificate {} to cert store", + rootCACertId); + scm.getCertificateStore().storeValidCertificate( + rootCACertHolder.getSerialNumber(), + CertificateCodec.getX509Certificate(rootCACertHolder), + HddsProtos.NodeType.SCM); + } + } catch (CertificateException | IOException e) { + LOG.error("Failed to save root certificate {} to cert store", + rootCACertId); + scm.shutDown("Failed to save root certificate to cert store"); + } + // reset state handler.resetRotationPrepareAcks(); String msg = "Root certificate " + rootCACertId + diff --git a/hadoop-ozone/dist/src/main/compose/ozonesecure-ha/test-root-ca-rotation.sh b/hadoop-ozone/dist/src/main/compose/ozonesecure-ha/test-root-ca-rotation.sh index 206fbd13595a..eabfa640c91e 100755 --- a/hadoop-ozone/dist/src/main/compose/ozonesecure-ha/test-root-ca-rotation.sh +++ b/hadoop-ozone/dist/src/main/compose/ozonesecure-ha/test-root-ca-rotation.sh @@ -40,7 +40,7 @@ execute_robot_test scm1.org kinit.robot wait_for_execute_command scm1.org 30 "jps | grep StorageContainerManagerStarter | sed 's/StorageContainerManagerStarter//' | xargs | xargs -I {} jstack {} | grep 'RootCARotationManager-Active'" # wait and verify root CA is rotated -wait_for_execute_command scm1.org 90 "ozone admin cert info 2" +wait_for_execute_command scm1.org 180 "ozone admin cert info 2" # transfer leader to scm2.org execute_robot_test scm1.org scmha/scm-leader-transfer.robot @@ -52,8 +52,8 @@ execute_commands_in_container scm1.org "ozone sh volume create /r-v1 && ozone sh # verify scm operations execute_robot_test scm1.org admincli/pipeline.robot -# wait for second root CA rotation -wait_for_execute_command scm1.org 90 "ozone admin cert info 3" +# wait for next root CA rotation +wait_for_execute_command scm1.org 180 "ozone admin cert info 3" # bootstrap new SCM4 and verify certificate docker-compose up -d scm4.org @@ -62,6 +62,9 @@ execute_robot_test scm4.org kinit.robot wait_for_execute_command scm4.org 120 "ozone admin scm roles | grep scm4.org" wait_for_execute_command scm4.org 30 "ozone admin cert list --role=scm | grep scm4.org" +# wait for next root CA rotation +wait_for_execute_command scm4.org 180 "ozone admin cert info 4" + #transfer leader to scm4.org export TARGET_SCM=scm4.org execute_robot_test scm4.org scmha/scm-leader-transfer.robot @@ -71,8 +74,16 @@ docker-compose up -d datanode4 wait_for_port datanode4 9856 60 wait_for_execute_command scm4.org 60 "ozone admin datanode list | grep datanode4" +#transfer leader to scm3.org +execute_robot_test scm3.org kinit.robot +export TARGET_SCM=scm3.org +execute_robot_test scm4.org scmha/scm-leader-transfer.robot + +# wait for next root CA rotation +wait_for_execute_command scm3.org 180 "ozone admin cert info 5" + #decomission scm1.org -execute_robot_test scm4.org scmha/scm-decommission.robot +execute_robot_test scm3.org scmha/scm-decommission.robot # check the metrics execute_robot_test scm2.org scmha/root-ca-rotation.robot diff --git a/hadoop-ozone/dist/src/main/compose/ozonesecure/test-root-ca-rotation.sh b/hadoop-ozone/dist/src/main/compose/ozonesecure/test-root-ca-rotation.sh index 4045f81ffaa0..66f1a6d01aec 100755 --- a/hadoop-ozone/dist/src/main/compose/ozonesecure/test-root-ca-rotation.sh +++ b/hadoop-ozone/dist/src/main/compose/ozonesecure/test-root-ca-rotation.sh @@ -15,14 +15,13 @@ # See the License for the specific language governing permissions and # limitations under the License. -#suite:HA-secure +#suite:secure COMPOSE_DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" >/dev/null 2>&1 && pwd )" export COMPOSE_DIR export SECURITY_ENABLED=true -export OM_SERVICE_ID="omservice" -export SCM=scm1.org +export SCM=scm export COMPOSE_FILE=docker-compose.yaml:root-ca-rotation.yaml : ${OZONE_BUCKET_KEY_NAME:=key1} @@ -34,19 +33,19 @@ start_docker_env execute_command_in_container kms hadoop key create ${OZONE_BUCKET_KEY_NAME} -execute_robot_test s3g kinit.robot +execute_robot_test scm kinit.robot # verify root CA rotation monitor task is active on leader wait_for_execute_command scm 30 "jps | grep StorageContainerManagerStarter | sed 's/StorageContainerManagerStarter//' | xargs | xargs -I {} jstack {} | grep 'RootCARotationManager-Active'" # wait and verify root CA is rotated -wait_for_execute_command scm 90 "ozone admin cert info 2" +wait_for_execute_command scm 180 "ozone admin cert info 2" # verify om operations and data operations -execute_commands_in_container scm1.org "ozone sh volume create /r-v1 && ozone sh bucket create /r-v1/r-b1" +execute_commands_in_container scm "ozone sh volume create /r-v1 && ozone sh bucket create /r-v1/r-b1" # wait for second root CA rotation -wait_for_execute_command scm 90 "ozone admin cert info 3" +wait_for_execute_command scm 180 "ozone admin cert info 3" # check the metrics execute_robot_test scm scmha/root-ca-rotation.robot diff --git a/hadoop-ozone/dist/src/main/compose/testlib.sh b/hadoop-ozone/dist/src/main/compose/testlib.sh index b13048f10201..c6b857e60db6 100755 --- a/hadoop-ozone/dist/src/main/compose/testlib.sh +++ b/hadoop-ozone/dist/src/main/compose/testlib.sh @@ -280,7 +280,6 @@ execute_commands_in_container(){ ## @description Stop a list of named containers ## @param List of container names, eg datanode_1 datanode_2 stop_containers() { - set -e docker-compose --ansi never stop $@ } From 494ecc9d4919e51571f729bc03412215c04b0c42 Mon Sep 17 00:00:00 2001 From: Sammi Chen Date: Tue, 4 Jul 2023 14:48:49 +0800 Subject: [PATCH 13/15] address comments --- .../security/ssl/ReloadingX509KeyManager.java | 14 +++++--------- ...MSecurityProtocolServerSideTranslatorPB.java | 2 +- .../scm/security/RootCARotationHandler.java | 9 ++++----- .../scm/security/RootCARotationHandlerImpl.java | 9 ++++----- .../ozonesecure-ha/test-root-ca-rotation.sh | 6 ++---- hadoop-ozone/dist/src/main/compose/testlib.sh | 17 +++++------------ .../smoketest/scmha/scm-leader-transfer.robot | 2 +- 7 files changed, 22 insertions(+), 37 deletions(-) diff --git a/hadoop-hdds/common/src/main/java/org/apache/hadoop/hdds/security/ssl/ReloadingX509KeyManager.java b/hadoop-hdds/common/src/main/java/org/apache/hadoop/hdds/security/ssl/ReloadingX509KeyManager.java index 265676dd4587..d88f40b4be25 100644 --- a/hadoop-hdds/common/src/main/java/org/apache/hadoop/hdds/security/ssl/ReloadingX509KeyManager.java +++ b/hadoop-hdds/common/src/main/java/org/apache/hadoop/hdds/security/ssl/ReloadingX509KeyManager.java @@ -39,7 +39,6 @@ import java.util.List; import java.util.Locale; import java.util.concurrent.atomic.AtomicReference; -import java.util.stream.Collectors; /** * An implementation of X509KeyManager that exposes a method, @@ -127,11 +126,9 @@ the counterpart(client) KeyManager's */ ret = alias; LOG.info("Engine client aliases for {}, {}, {} is returned as {}", - strings == null ? "" : Arrays.stream(strings).map(Object::toString) - .collect(Collectors.joining(", ")), - principals == null ? "" : Arrays.stream(principals) - .map(Object::toString).collect(Collectors.joining(", ")), - sslEngine == null ? "" : sslEngine.toString(), ret); + strings == null ? "" : Arrays.toString(strings), + principals == null ? "" : Arrays.toString(principals), + sslEngine == null ? "" : sslEngine, ret); } return ret; } @@ -143,9 +140,8 @@ public String chooseEngineServerAlias(String s, Principal[] principals, .chooseEngineServerAlias(s, principals, sslEngine); if (ret == null && LOG.isDebugEnabled()) { LOG.debug("Engine server aliases for {}, {}, {} is null", s, - principals == null ? "" : Arrays.stream(principals) - .map(Object::toString).collect(Collectors.joining(", ")), - sslEngine == null ? "" : sslEngine.toString()); + principals == null ? "" : Arrays.toString(principals), + sslEngine == null ? "" : sslEngine); } return ret; } diff --git a/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/protocol/SCMSecurityProtocolServerSideTranslatorPB.java b/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/protocol/SCMSecurityProtocolServerSideTranslatorPB.java index a986aaf27fb3..aeb1fb6ea6bc 100644 --- a/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/protocol/SCMSecurityProtocolServerSideTranslatorPB.java +++ b/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/protocol/SCMSecurityProtocolServerSideTranslatorPB.java @@ -253,7 +253,7 @@ public SCMGetCertResponseProto getSCMCertificate( throw createNotHAException(); } String certificate = impl.getSCMCertificate(request.getScmDetails(), - request.getCSR(), request.hasRenew() ? request.getRenew() : false); + request.getCSR(), request.hasRenew() && request.getRenew()); SCMGetCertResponseProto.Builder builder = SCMGetCertResponseProto .newBuilder() diff --git a/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/security/RootCARotationHandler.java b/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/security/RootCARotationHandler.java index 7e9114018783..a91c90e3df12 100644 --- a/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/security/RootCARotationHandler.java +++ b/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/security/RootCARotationHandler.java @@ -21,7 +21,6 @@ import org.apache.hadoop.hdds.scm.metadata.Replicate; import java.io.IOException; -import java.util.concurrent.TimeoutException; /** * This interface defines APIs for sub-ca rotation instructions. @@ -36,19 +35,19 @@ public interface RootCARotationHandler { */ @Replicate void rotationPrepare(String rootCertId) - throws IOException, TimeoutException; + throws IOException; @Replicate void rotationPrepareAck(String rootCertId, String scmCertId, String scmId) - throws IOException, TimeoutException; + throws IOException; @Replicate void rotationCommit(String rootCertId) - throws IOException, TimeoutException; + throws IOException; @Replicate void rotationCommitted(String rootCertId) - throws IOException, TimeoutException; + throws IOException; int rotationPrepareAcks(); diff --git a/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/security/RootCARotationHandlerImpl.java b/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/security/RootCARotationHandlerImpl.java index 17429293f4cf..cdaf2d34c2bc 100644 --- a/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/security/RootCARotationHandlerImpl.java +++ b/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/security/RootCARotationHandlerImpl.java @@ -34,7 +34,6 @@ import java.nio.file.StandardCopyOption; import java.util.HashSet; import java.util.Set; -import java.util.concurrent.TimeoutException; import java.util.concurrent.atomic.AtomicReference; import static org.apache.hadoop.hdds.HddsConfigKeys.HDDS_BACKUP_KEY_CERT_DIR_NAME_SUFFIX; @@ -77,7 +76,7 @@ public RootCARotationHandlerImpl(StorageContainerManager scm, @Override public void rotationPrepare(String rootCertId) - throws IOException, TimeoutException { + throws IOException { LOG.info("Received rotation prepare command of root certificate {}", rootCertId); if (rotationManager.shouldSkipRootCert(rootCertId)) { @@ -92,7 +91,7 @@ public void rotationPrepare(String rootCertId) @Override public void rotationPrepareAck(String rootCertId, - String scmCertId, String scmId) throws IOException, TimeoutException { + String scmCertId, String scmId) throws IOException { LOG.info("Received rotation prepare ack of root certificate {} from scm {}", rootCertId, scmId); @@ -109,7 +108,7 @@ public void rotationPrepareAck(String rootCertId, @Override public void rotationCommit(String rootCertId) - throws IOException, TimeoutException { + throws IOException { LOG.info("Received rotation commit command of root certificate {}", rootCertId); if (rotationManager.shouldSkipRootCert(rootCertId)) { @@ -163,7 +162,7 @@ public void rotationCommit(String rootCertId) @Override public void rotationCommitted(String rootCertId) - throws IOException, TimeoutException { + throws IOException { LOG.info("Received rotation committed command of root certificate {}", rootCertId); if (rotationManager.shouldSkipRootCert(rootCertId)) { diff --git a/hadoop-ozone/dist/src/main/compose/ozonesecure-ha/test-root-ca-rotation.sh b/hadoop-ozone/dist/src/main/compose/ozonesecure-ha/test-root-ca-rotation.sh index eabfa640c91e..f7efbf579e88 100755 --- a/hadoop-ozone/dist/src/main/compose/ozonesecure-ha/test-root-ca-rotation.sh +++ b/hadoop-ozone/dist/src/main/compose/ozonesecure-ha/test-root-ca-rotation.sh @@ -66,8 +66,7 @@ wait_for_execute_command scm4.org 30 "ozone admin cert list --role=scm | grep sc wait_for_execute_command scm4.org 180 "ozone admin cert info 4" #transfer leader to scm4.org -export TARGET_SCM=scm4.org -execute_robot_test scm4.org scmha/scm-leader-transfer.robot +execute_robot_test scm4.org -v "TARGET_SCM:scm4.org" scmha/scm-leader-transfer.robot # add new datanode4 and verify certificate docker-compose up -d datanode4 @@ -76,8 +75,7 @@ wait_for_execute_command scm4.org 60 "ozone admin datanode list | grep datanode4 #transfer leader to scm3.org execute_robot_test scm3.org kinit.robot -export TARGET_SCM=scm3.org -execute_robot_test scm4.org scmha/scm-leader-transfer.robot +execute_robot_test scm4.org -v "TARGET_SCM:scm3.org" scmha/scm-leader-transfer.robot # wait for next root CA rotation wait_for_execute_command scm3.org 180 "ozone admin cert info 5" diff --git a/hadoop-ozone/dist/src/main/compose/testlib.sh b/hadoop-ozone/dist/src/main/compose/testlib.sh index c6b857e60db6..085f448dea00 100755 --- a/hadoop-ozone/dist/src/main/compose/testlib.sh +++ b/hadoop-ozone/dist/src/main/compose/testlib.sh @@ -203,7 +203,6 @@ execute_robot_test(){ -v OZONE_DIR:"${OZONE_DIR}" \ -v SECURITY_ENABLED:"${SECURITY_ENABLED}" \ -v SCM:"${SCM}" \ - -v TARGET_SCM:"${TARGET_SCM:-scm2.org}" \ ${ARGUMENTS[@]} --log NONE --report NONE "${OZONE_ROBOT_OPTS[@]}" --output "$OUTPUT_PATH" \ "$SMOKETEST_DIR_INSIDE/$TEST" local -i rc=$? @@ -271,10 +270,8 @@ execute_commands_in_container(){ shift 1 local command=$@ - set -e # shellcheck disable=SC2068 docker-compose exec -T $container /bin/bash -c "$command" - set +e } ## @description Stop a list of named containers @@ -345,16 +342,12 @@ wait_for_execute_command(){ SECONDS=0 while [[ $SECONDS -lt $timeout ]]; do - set +e - docker-compose exec -T $container /bin/bash -c "$command" - status=$? - set -e - if [ $status -eq 0 ] ; then - echo "$command succeed" - return; + if docker-compose exec -T $container /bin/bash -c "$command"; then + echo "$command succeed" + return fi - echo "$command hasn't succeed yet" - sleep 1 + echo "$command hasn't succeed yet" + sleep 1 done echo "Timed out waiting on $command to be successful" return 1 diff --git a/hadoop-ozone/dist/src/main/smoketest/scmha/scm-leader-transfer.robot b/hadoop-ozone/dist/src/main/smoketest/scmha/scm-leader-transfer.robot index 791803c6e98c..4c8796d41b79 100644 --- a/hadoop-ozone/dist/src/main/smoketest/scmha/scm-leader-transfer.robot +++ b/hadoop-ozone/dist/src/main/smoketest/scmha/scm-leader-transfer.robot @@ -21,7 +21,7 @@ Resource ../commonlib.robot Test Timeout 5 minutes *** Variables *** -${TARGET_SCM}= %{TARGET_SCM=scm2.org} +${TARGET_SCM}= scm2.org ** Keywords *** Get SCM Leader Node From 4287822cee7b4a57087c566e6ad61cb25cb86d4b Mon Sep 17 00:00:00 2001 From: Sammi Chen Date: Tue, 4 Jul 2023 17:23:19 +0800 Subject: [PATCH 14/15] override shouldStartCertificateMonitor in SCMCertificateClient --- .../x509/certificate/client/DefaultCertificateClient.java | 3 +++ .../x509/certificate/client/SCMCertificateClient.java | 4 ++++ 2 files changed, 7 insertions(+) diff --git a/hadoop-hdds/framework/src/main/java/org/apache/hadoop/hdds/security/x509/certificate/client/DefaultCertificateClient.java b/hadoop-hdds/framework/src/main/java/org/apache/hadoop/hdds/security/x509/certificate/client/DefaultCertificateClient.java index 9944480acf5c..abd2beec506c 100644 --- a/hadoop-hdds/framework/src/main/java/org/apache/hadoop/hdds/security/x509/certificate/client/DefaultCertificateClient.java +++ b/hadoop-hdds/framework/src/main/java/org/apache/hadoop/hdds/security/x509/certificate/client/DefaultCertificateClient.java @@ -177,6 +177,9 @@ private synchronized void loadAllCertificates() { getLogger().warn("Component certificate was not loaded."); } } + } else { + getLogger().info("CertificateLifetimeMonitor is disabled for {}", + component); } } diff --git a/hadoop-hdds/framework/src/main/java/org/apache/hadoop/hdds/security/x509/certificate/client/SCMCertificateClient.java b/hadoop-hdds/framework/src/main/java/org/apache/hadoop/hdds/security/x509/certificate/client/SCMCertificateClient.java index acba77cf8cbe..26305624b490 100644 --- a/hadoop-hdds/framework/src/main/java/org/apache/hadoop/hdds/security/x509/certificate/client/SCMCertificateClient.java +++ b/hadoop-hdds/framework/src/main/java/org/apache/hadoop/hdds/security/x509/certificate/client/SCMCertificateClient.java @@ -172,6 +172,10 @@ public CertificateSignRequest.Builder getCSRBuilder() .setKey(new KeyPair(getPublicKey(), getPrivateKey())); } + @Override + protected boolean shouldStartCertificateMonitor() { + return false; + } @Override public Logger getLogger() { From 0e0b8e45db40ed05afe0a68c4a9bab9548b9bdb9 Mon Sep 17 00:00:00 2001 From: Sammi Chen Date: Tue, 4 Jul 2023 22:35:16 +0800 Subject: [PATCH 15/15] increase max certificate lifetime to reduce the possibility of random failure because of rotation is fast and some daemons start slowly --- .../src/main/compose/ozonesecure-ha/root-ca-rotation.yaml | 2 +- .../main/compose/ozonesecure-ha/test-root-ca-rotation.sh | 8 ++++---- 2 files changed, 5 insertions(+), 5 deletions(-) diff --git a/hadoop-ozone/dist/src/main/compose/ozonesecure-ha/root-ca-rotation.yaml b/hadoop-ozone/dist/src/main/compose/ozonesecure-ha/root-ca-rotation.yaml index 4e88c49e3133..bedf6de3c3bf 100644 --- a/hadoop-ozone/dist/src/main/compose/ozonesecure-ha/root-ca-rotation.yaml +++ b/hadoop-ozone/dist/src/main/compose/ozonesecure-ha/root-ca-rotation.yaml @@ -20,7 +20,7 @@ x-root-cert-rotation-config: &root-cert-rotation-config environment: - OZONE-SITE.XML_hdds.x509.grace.duration.token.checks.enabled=false - - OZONE-SITE.XML_hdds.x509.max.duration=PT180S + - OZONE-SITE.XML_hdds.x509.max.duration=PT240S - OZONE-SITE.XML_hdds.x509.default.duration=PT60S - OZONE-SITE.XML_hdds.x509.renew.grace.duration=PT45S - OZONE-SITE.XML_hdds.x509.ca.rotation.check.interval=PT1S diff --git a/hadoop-ozone/dist/src/main/compose/ozonesecure-ha/test-root-ca-rotation.sh b/hadoop-ozone/dist/src/main/compose/ozonesecure-ha/test-root-ca-rotation.sh index f7efbf579e88..c7ab83670866 100755 --- a/hadoop-ozone/dist/src/main/compose/ozonesecure-ha/test-root-ca-rotation.sh +++ b/hadoop-ozone/dist/src/main/compose/ozonesecure-ha/test-root-ca-rotation.sh @@ -40,7 +40,7 @@ execute_robot_test scm1.org kinit.robot wait_for_execute_command scm1.org 30 "jps | grep StorageContainerManagerStarter | sed 's/StorageContainerManagerStarter//' | xargs | xargs -I {} jstack {} | grep 'RootCARotationManager-Active'" # wait and verify root CA is rotated -wait_for_execute_command scm1.org 180 "ozone admin cert info 2" +wait_for_execute_command scm1.org 240 "ozone admin cert info 2" # transfer leader to scm2.org execute_robot_test scm1.org scmha/scm-leader-transfer.robot @@ -53,7 +53,7 @@ execute_commands_in_container scm1.org "ozone sh volume create /r-v1 && ozone sh execute_robot_test scm1.org admincli/pipeline.robot # wait for next root CA rotation -wait_for_execute_command scm1.org 180 "ozone admin cert info 3" +wait_for_execute_command scm1.org 240 "ozone admin cert info 3" # bootstrap new SCM4 and verify certificate docker-compose up -d scm4.org @@ -63,7 +63,7 @@ wait_for_execute_command scm4.org 120 "ozone admin scm roles | grep scm4.org" wait_for_execute_command scm4.org 30 "ozone admin cert list --role=scm | grep scm4.org" # wait for next root CA rotation -wait_for_execute_command scm4.org 180 "ozone admin cert info 4" +wait_for_execute_command scm4.org 240 "ozone admin cert info 4" #transfer leader to scm4.org execute_robot_test scm4.org -v "TARGET_SCM:scm4.org" scmha/scm-leader-transfer.robot @@ -78,7 +78,7 @@ execute_robot_test scm3.org kinit.robot execute_robot_test scm4.org -v "TARGET_SCM:scm3.org" scmha/scm-leader-transfer.robot # wait for next root CA rotation -wait_for_execute_command scm3.org 180 "ozone admin cert info 5" +wait_for_execute_command scm3.org 240 "ozone admin cert info 5" #decomission scm1.org execute_robot_test scm3.org scmha/scm-decommission.robot