diff --git a/hadoop-hdds/common/src/main/java/org/apache/hadoop/hdds/HddsConfigKeys.java b/hadoop-hdds/common/src/main/java/org/apache/hadoop/hdds/HddsConfigKeys.java index 1cd0a6a47391..ac6c08867be3 100644 --- a/hadoop-hdds/common/src/main/java/org/apache/hadoop/hdds/HddsConfigKeys.java +++ b/hadoop-hdds/common/src/main/java/org/apache/hadoop/hdds/HddsConfigKeys.java @@ -207,6 +207,8 @@ public final class HddsConfigKeys { HDDS_X509_GRACE_DURATION_TOKEN_CHECKS_ENABLED_DEFAULT = true; public static final String HDDS_NEW_KEY_CERT_DIR_NAME_SUFFIX = "-next"; public static final String HDDS_BACKUP_KEY_CERT_DIR_NAME_SUFFIX = "-previous"; + public static final String HDDS_NEW_KEY_CERT_DIR_NAME_PROGRESS_SUFFIX = + "-progress"; public static final String HDDS_X509_CA_ROTATION_CHECK_INTERNAL = "hdds.x509.ca.rotation.check.interval"; public static final String HDDS_X509_CA_ROTATION_CHECK_INTERNAL_DEFAULT = @@ -216,6 +218,10 @@ public final class HddsConfigKeys { // format hh:mm:ss, representing hour, minute, and second public static final String HDDS_X509_CA_ROTATION_TIME_OF_DAY_DEFAULT = "02:00:00"; + public static final String HDDS_X509_CA_ROTATION_ACK_TIMEOUT = + "hdds.x509.ca.rotation.ack.timeout"; + public static final String HDDS_X509_CA_ROTATION_ACK_TIMEOUT_DEFAULT = + "PT15M"; public static final String HDDS_CONTAINER_REPLICATION_COMPRESSION = "hdds.container.replication.compression"; diff --git a/hadoop-hdds/common/src/main/java/org/apache/hadoop/hdds/scm/exceptions/SCMException.java b/hadoop-hdds/common/src/main/java/org/apache/hadoop/hdds/scm/exceptions/SCMException.java index 40b484988d55..1cfc28827488 100644 --- a/hadoop-hdds/common/src/main/java/org/apache/hadoop/hdds/scm/exceptions/SCMException.java +++ b/hadoop-hdds/common/src/main/java/org/apache/hadoop/hdds/scm/exceptions/SCMException.java @@ -136,5 +136,6 @@ public enum ResultCodes { INVALID_PIPELINE_STATE, DUPLICATED_PIPELINE_ID, TIMEOUT, + CA_ROTATION_IN_PROGRESS } } diff --git a/hadoop-hdds/common/src/main/java/org/apache/hadoop/hdds/security/SecurityConfig.java b/hadoop-hdds/common/src/main/java/org/apache/hadoop/hdds/security/SecurityConfig.java index 543d59348c3e..f3e747de63b7 100644 --- a/hadoop-hdds/common/src/main/java/org/apache/hadoop/hdds/security/SecurityConfig.java +++ b/hadoop-hdds/common/src/main/java/org/apache/hadoop/hdds/security/SecurityConfig.java @@ -44,6 +44,8 @@ import static org.apache.hadoop.hdds.HddsConfigKeys.HDDS_DEFAULT_KEY_ALGORITHM; import static org.apache.hadoop.hdds.HddsConfigKeys.HDDS_DEFAULT_KEY_LEN; import static org.apache.hadoop.hdds.HddsConfigKeys.HDDS_DEFAULT_SECURITY_PROVIDER; +import static org.apache.hadoop.hdds.HddsConfigKeys.HDDS_X509_CA_ROTATION_ACK_TIMEOUT; +import static org.apache.hadoop.hdds.HddsConfigKeys.HDDS_X509_CA_ROTATION_ACK_TIMEOUT_DEFAULT; import static org.apache.hadoop.hdds.HddsConfigKeys.HDDS_X509_CA_ROTATION_CHECK_INTERNAL; import static org.apache.hadoop.hdds.HddsConfigKeys.HDDS_X509_CA_ROTATION_CHECK_INTERNAL_DEFAULT; import static org.apache.hadoop.hdds.HddsConfigKeys.HDDS_X509_CA_ROTATION_TIME_OF_DAY; @@ -127,6 +129,7 @@ public class SecurityConfig { private final String caRotationTimeOfDay; private final Pattern caRotationTimeOfDayPattern = Pattern.compile("\\d{2}:\\d{2}:\\d{2}"); + private final Duration caAckTimeout; private final SslProvider grpcSSLProvider; /** @@ -218,6 +221,11 @@ public SecurityConfig(ConfigurationSource configuration) { } caRotationTimeOfDay = "1970-01-01T" + timeOfDayString; + String ackTimeString = configuration.get( + HDDS_X509_CA_ROTATION_ACK_TIMEOUT, + HDDS_X509_CA_ROTATION_ACK_TIMEOUT_DEFAULT); + caAckTimeout = Duration.parse(ackTimeString); + validateCertificateValidityConfig(); this.externalRootCaCert = configuration.get( @@ -287,12 +295,32 @@ private void validateCertificateValidityConfig() { throw new IllegalArgumentException(msg); } + if (caCheckInterval.isNegative() || caCheckInterval.isZero()) { + String msg = "Property " + HDDS_X509_CA_ROTATION_CHECK_INTERNAL + + " should not be zero or negative"; + LOG.error(msg); + throw new IllegalArgumentException(msg); + } + if (caCheckInterval.compareTo(renewalGracePeriod) >= 0) { throw new IllegalArgumentException("Property value of " + HDDS_X509_CA_ROTATION_CHECK_INTERNAL + " should be smaller than " + HDDS_X509_RENEW_GRACE_DURATION); } + if (caAckTimeout.isNegative() || caAckTimeout.isZero()) { + String msg = "Property " + HDDS_X509_CA_ROTATION_ACK_TIMEOUT + + " should not be zero or negative"; + LOG.error(msg); + throw new IllegalArgumentException(msg); + } + + if (caAckTimeout.compareTo(renewalGracePeriod) >= 0) { + throw new IllegalArgumentException("Property value of " + + HDDS_X509_CA_ROTATION_ACK_TIMEOUT + + " should be smaller than " + HDDS_X509_RENEW_GRACE_DURATION); + } + if (tokenSanityChecksEnabled && blockTokenExpiryDurationMs > renewalGracePeriod.toMillis()) { throw new IllegalArgumentException(" Certificate grace period " + @@ -396,6 +424,18 @@ public Path getCertificateLocation(String component) { return Paths.get(metadataDir, component, certificateDir); } + /** + * Returns the File path to where this component store key and certificates. + * + * @param component - Component Name - String. + * @return Path location. + */ + public Path getLocation(String component) { + Preconditions.checkNotNull(this.metadataDir, "Metadata directory can't be" + + " null. Please check configs."); + return Paths.get(metadataDir, component); + } + /** * Gets the Key Size, The default key size is 2048, since the default * algorithm used is RSA. User can change this by setting the "hdds.key.len" @@ -508,6 +548,10 @@ public String getCaRotationTimeOfDay() { return caRotationTimeOfDay; } + public Duration getCaAckTimeout() { + return caAckTimeout; + } + /** * Return true if using test certificates with authority as localhost. This * should be used only for unit test where certificates are generated by diff --git a/hadoop-hdds/common/src/main/java/org/apache/hadoop/hdds/security/ssl/ReloadingX509KeyManager.java b/hadoop-hdds/common/src/main/java/org/apache/hadoop/hdds/security/ssl/ReloadingX509KeyManager.java index 103bc462b89a..d88f40b4be25 100644 --- a/hadoop-hdds/common/src/main/java/org/apache/hadoop/hdds/security/ssl/ReloadingX509KeyManager.java +++ b/hadoop-hdds/common/src/main/java/org/apache/hadoop/hdds/security/ssl/ReloadingX509KeyManager.java @@ -35,6 +35,7 @@ import java.security.PrivateKey; import java.security.cert.X509Certificate; import java.util.ArrayList; +import java.util.Arrays; import java.util.List; import java.util.Locale; import java.util.concurrent.atomic.AtomicReference; @@ -60,12 +61,14 @@ public class ReloadingX509KeyManager extends X509ExtendedKeyManager { */ static final char[] EMPTY_PASSWORD = new char[0]; private final AtomicReference keyManagerRef; + /** * Current private key and cert used in keyManager. Used to detect if these * materials are changed. */ private PrivateKey currentPrivateKey; private List currentCertIdsList = new ArrayList<>(); + private String alias; /** * Construct a Reloading509KeystoreManager. @@ -85,15 +88,62 @@ public ReloadingX509KeyManager(String type, CertificateClient caClient) @Override public String chooseEngineClientAlias(String[] strings, Principal[] principals, SSLEngine sslEngine) { - return keyManagerRef.get() + String ret = keyManagerRef.get() .chooseEngineClientAlias(strings, principals, sslEngine); + + if (ret == null) { + /* + Workaround to address that netty tc-native cannot handle the dynamic + key and certificate refresh well. What happens is during the setup of + the grpc channel, an SSLContext is created, which is + ReferenceCountedOpenSslServerContext in the native tc-native case. + This class uses the TrustManager's getAcceptedIssuers() as the trusted + CA certificate list. The list is not updated after channel is built. + With the list being used to present the Principals during the mTLS + authentication via the Netty channel under Ratis implementation, + the counterpart(client) KeyManager's + chooseEngineClientAlias(String, Principal[], SSLEngine) method is + called with this old root certificate subject principal, which is now + not available in the new Key Manager after refreshed, so the method + will return null, which cause the mutual TLS connection establish + failure. + + Example error message: + Engine client aliases for RSA, DH_RSA, EC, EC_RSA, EC_EC, + O=CID-f9f2b2cf-a784-49d7-8577-5d3b13bf0b46, + OU=9f52487c-f8f9-45ee-bb56-aca60b56327f, + CN=scm-1@scm1.org, + org.apache.ratis.thirdparty.io.netty.handler.ssl.OpenSslEngine@5eec0d10 + is null + + Example success message: + Engine client aliases for RSA, DH_RSA, EC, EC_RSA, EC_EC, + O=CID-f9f2b2cf-a784-49d7-8577-5d3b13bf0b46, + OU=9f52487c-f8f9-45ee-bb56-aca60b56327f, + CN=scm-1@scm1.org, + org.apache.ratis.thirdparty.io.netty.handler.ssl.OpenSslEngine@5eec0d10 + is scm/sub-ca_key + */ + ret = alias; + LOG.info("Engine client aliases for {}, {}, {} is returned as {}", + strings == null ? "" : Arrays.toString(strings), + principals == null ? "" : Arrays.toString(principals), + sslEngine == null ? "" : sslEngine, ret); + } + return ret; } @Override public String chooseEngineServerAlias(String s, Principal[] principals, SSLEngine sslEngine) { - return keyManagerRef.get() + String ret = keyManagerRef.get() .chooseEngineServerAlias(s, principals, sslEngine); + if (ret == null && LOG.isDebugEnabled()) { + LOG.debug("Engine server aliases for {}, {}, {} is null", s, + principals == null ? "" : Arrays.toString(principals), + sslEngine == null ? "" : sslEngine); + } + return ret; } @Override @@ -138,7 +188,7 @@ public ReloadingX509KeyManager loadFrom(CertificateClient caClient) { try { X509ExtendedKeyManager manager = loadKeyManager(caClient); if (manager != null) { - this.keyManagerRef.set(manager); + keyManagerRef.set(manager); LOG.info("ReloadingX509KeyManager is reloaded"); } } catch (Exception ex) { @@ -155,9 +205,8 @@ private X509ExtendedKeyManager loadKeyManager(CertificateClient caClient) if (currentPrivateKey != null && currentPrivateKey.equals(privateKey) && currentCertIdsList.size() > 0 && newCertList.size() == currentCertIdsList.size() && - !newCertList.stream().filter( - c -> !currentCertIdsList.contains(c.getSerialNumber().toString())) - .findAny().isPresent()) { + newCertList.stream().allMatch(c -> + currentCertIdsList.contains(c.getSerialNumber().toString()))) { // Security materials(key and certificates) keep the same. return null; } @@ -166,10 +215,15 @@ private X509ExtendedKeyManager loadKeyManager(CertificateClient caClient) KeyStore keystore = KeyStore.getInstance(type); keystore.load(null, null); - keystore.setKeyEntry(caClient.getComponentName() + "_key", - privateKey, EMPTY_PASSWORD, + alias = caClient.getComponentName() + "_key"; + keystore.setKeyEntry(alias, privateKey, EMPTY_PASSWORD, newCertList.toArray(new X509Certificate[0])); + LOG.info("Key manager is loaded with certificate chain"); + for (X509Certificate x509Certificate : newCertList) { + LOG.info(x509Certificate.toString()); + } + KeyManagerFactory keyMgrFactory = KeyManagerFactory.getInstance( KeyManagerFactory.getDefaultAlgorithm()); keyMgrFactory.init(keystore, EMPTY_PASSWORD); diff --git a/hadoop-hdds/common/src/main/java/org/apache/hadoop/hdds/security/ssl/ReloadingX509TrustManager.java b/hadoop-hdds/common/src/main/java/org/apache/hadoop/hdds/security/ssl/ReloadingX509TrustManager.java index d2351b3e96ae..bfc3939cd0a2 100644 --- a/hadoop-hdds/common/src/main/java/org/apache/hadoop/hdds/security/ssl/ReloadingX509TrustManager.java +++ b/hadoop-hdds/common/src/main/java/org/apache/hadoop/hdds/security/ssl/ReloadingX509TrustManager.java @@ -26,14 +26,19 @@ import javax.net.ssl.TrustManager; import javax.net.ssl.TrustManagerFactory; import javax.net.ssl.X509TrustManager; +import javax.security.auth.x500.X500Principal; import java.io.IOException; import java.security.GeneralSecurityException; import java.security.KeyStore; import java.security.KeyStoreException; import java.security.cert.CertificateException; import java.security.cert.X509Certificate; +import java.util.ArrayList; +import java.util.Arrays; +import java.util.List; import java.util.Set; import java.util.concurrent.atomic.AtomicReference; +import java.util.stream.Collectors; /** * A {@link TrustManager} implementation that exposes a method, @@ -55,7 +60,7 @@ public final class ReloadingX509TrustManager implements X509TrustManager { /** * Current Root CA cert in trustManager, to detect if certificate is changed. */ - private String currentRootCACertId = null; + private List currentRootCACertIds = new ArrayList<>(); /** * Creates a reloadable trustmanager. The trustmanager reloads itself @@ -80,7 +85,16 @@ public void checkClientTrusted(X509Certificate[] chain, String authType) throws CertificateException { X509TrustManager tm = trustManagerRef.get(); if (tm != null) { - tm.checkClientTrusted(chain, authType); + try { + tm.checkClientTrusted(chain, authType); + } catch (CertificateException e) { + LOG.info("Client certificate chain {} for authType {} is not trusted", + chain == null ? "" : Arrays.stream(chain) + .map(X509Certificate::getSubjectX500Principal) + .map(X500Principal::toString) + .collect(Collectors.joining(",")), authType); + throw e; + } } else { throw new CertificateException("Unknown client chain certificate: " + chain[0].toString()); @@ -92,7 +106,16 @@ public void checkServerTrusted(X509Certificate[] chain, String authType) throws CertificateException { X509TrustManager tm = trustManagerRef.get(); if (tm != null) { - tm.checkServerTrusted(chain, authType); + try { + tm.checkServerTrusted(chain, authType); + } catch (CertificateException e) { + LOG.info("Client certificate chain {} for authType {} is not trusted", + chain == null ? "" : Arrays.stream(chain) + .map(X509Certificate::getSubjectX500Principal) + .map(X500Principal::toString) + .collect(Collectors.joining(",")), authType); + throw e; + } } else { throw new CertificateException("Unknown server chain certificate: " + chain[0].toString()); @@ -127,23 +150,22 @@ public ReloadingX509TrustManager loadFrom(CertificateClient caClient) { X509TrustManager loadTrustManager(CertificateClient caClient) throws GeneralSecurityException, IOException { // SCM certificate client sets root CA as CA cert instead of root CA cert - X509Certificate rootCACert = caClient.getRootCACertificate() == null ? - caClient.getCACertificate() : caClient.getRootCACertificate(); + Set certList = caClient.getAllRootCaCerts(); + Set rootCACerts = certList.isEmpty() ? + caClient.getAllCaCerts() : certList; - String rootCACertId = rootCACert.getSerialNumber().toString(); // Certificate keeps the same. - if (currentRootCACertId != null && - currentRootCACertId.equals(rootCACertId)) { + if (rootCACerts.size() > 0 && + currentRootCACertIds.size() == rootCACerts.size() && + rootCACerts.stream().allMatch(c -> + currentRootCACertIds.contains(c.getSerialNumber().toString()))) { return null; } X509TrustManager trustManager = null; KeyStore ks = KeyStore.getInstance(type); ks.load(null, null); - Set caCertsToInsert = - caClient.getRootCACertificate() == null ? caClient.getAllCaCerts() : - caClient.getAllRootCaCerts(); - insertCertsToKeystore(caCertsToInsert, ks); + insertCertsToKeystore(rootCACerts, ks); TrustManagerFactory trustManagerFactory = TrustManagerFactory.getInstance( TrustManagerFactory.getDefaultAlgorithm()); @@ -155,15 +177,19 @@ X509TrustManager loadTrustManager(CertificateClient caClient) break; } } - currentRootCACertId = rootCACertId; + currentRootCACertIds.clear(); + rootCACerts.forEach( + c -> currentRootCACertIds.add(c.getSerialNumber().toString())); return trustManager; } private void insertCertsToKeystore(Iterable certs, KeyStore ks) throws KeyStoreException { + LOG.info("Trust manager is loaded with certificates"); for (X509Certificate certToInsert : certs) { String certId = certToInsert.getSerialNumber().toString(); ks.setCertificateEntry(certId, certToInsert); + LOG.info(certToInsert.toString()); } } } diff --git a/hadoop-hdds/common/src/main/java/org/apache/hadoop/hdds/security/x509/certificate/utils/CertificateCodec.java b/hadoop-hdds/common/src/main/java/org/apache/hadoop/hdds/security/x509/certificate/utils/CertificateCodec.java index 6c7bb5389adf..c6d15ab2219f 100644 --- a/hadoop-hdds/common/src/main/java/org/apache/hadoop/hdds/security/x509/certificate/utils/CertificateCodec.java +++ b/hadoop-hdds/common/src/main/java/org/apache/hadoop/hdds/security/x509/certificate/utils/CertificateCodec.java @@ -296,7 +296,8 @@ public synchronized void writeCertificate(Path basePath, String fileName, try (FileOutputStream file = new FileOutputStream(certificateFile)) { file.write(pemEncodedCertificate.getBytes(DEFAULT_CHARSET)); } - + LOG.info("Save certificate to {}", certificateFile.getAbsolutePath()); + LOG.info("Certificate {}", pemEncodedCertificate); Files.setPosixFilePermissions(certificateFile.toPath(), permissionSet); } diff --git a/hadoop-hdds/common/src/main/java/org/apache/hadoop/ozone/OzoneConsts.java b/hadoop-hdds/common/src/main/java/org/apache/hadoop/ozone/OzoneConsts.java index f6bfcfd23705..fd519bfd4393 100644 --- a/hadoop-hdds/common/src/main/java/org/apache/hadoop/ozone/OzoneConsts.java +++ b/hadoop-hdds/common/src/main/java/org/apache/hadoop/ozone/OzoneConsts.java @@ -480,8 +480,11 @@ private OzoneConsts() { public static final String SCM_ROOT_CA_COMPONENT_NAME = Paths.get(SCM_CA_CERT_STORAGE_DIR, SCM_CA_PATH).toString(); - public static final String SCM_SUB_CA_PREFIX = "scm-sub@"; - public static final String SCM_ROOT_CA_PREFIX = "scm@"; + // %s to distinguish different certificates + public static final String SCM_SUB_CA = "scm-sub"; + public static final String SCM_SUB_CA_PREFIX = SCM_SUB_CA + "-%s@"; + public static final String SCM_ROOT_CA = "scm"; + public static final String SCM_ROOT_CA_PREFIX = SCM_ROOT_CA + "-%s@"; // Layout Version written into Meta Table ONLY during finalization. public static final String LAYOUT_VERSION_KEY = "#LAYOUTVERSION"; diff --git a/hadoop-hdds/common/src/main/resources/ozone-default.xml b/hadoop-hdds/common/src/main/resources/ozone-default.xml index a9a07371a1b8..484e5bfd3cd0 100644 --- a/hadoop-hdds/common/src/main/resources/ozone-default.xml +++ b/hadoop-hdds/common/src/main/resources/ozone-default.xml @@ -2260,6 +2260,14 @@ and second. + + hdds.x509.ca.rotation.ack.timeout + PT15M + OZONE, HDDS, SECURITY + Max time that SCM leader will wait for the rotation preparation acks before it believes the rotation + is failed. Default is 15 minutes. + + ozone.scm.security.handler.count.key 2 diff --git a/hadoop-hdds/container-service/src/test/java/org/apache/hadoop/ozone/TestHddsSecureDatanodeInit.java b/hadoop-hdds/container-service/src/test/java/org/apache/hadoop/ozone/TestHddsSecureDatanodeInit.java index eea522438fab..67c95ce11152 100644 --- a/hadoop-hdds/container-service/src/test/java/org/apache/hadoop/ozone/TestHddsSecureDatanodeInit.java +++ b/hadoop-hdds/container-service/src/test/java/org/apache/hadoop/ozone/TestHddsSecureDatanodeInit.java @@ -47,6 +47,7 @@ import org.apache.commons.io.FileUtils; +import static org.apache.hadoop.hdds.HddsConfigKeys.HDDS_X509_CA_ROTATION_ACK_TIMEOUT; import static org.apache.hadoop.hdds.HddsConfigKeys.HDDS_X509_CA_ROTATION_CHECK_INTERNAL; import static org.apache.hadoop.hdds.HddsConfigKeys.HDDS_X509_GRACE_DURATION_TOKEN_CHECKS_ENABLED; import static org.apache.hadoop.hdds.HddsConfigKeys.HDDS_X509_RENEW_GRACE_DURATION; @@ -105,7 +106,7 @@ public static void setUp() throws Exception { conf.set(HDDS_X509_RENEW_GRACE_DURATION, "PT5S"); // 5s conf.set(HDDS_X509_CA_ROTATION_CHECK_INTERNAL, "PT1S"); // 1s conf.setBoolean(HDDS_X509_GRACE_DURATION_TOKEN_CHECKS_ENABLED, false); - + conf.set(HDDS_X509_CA_ROTATION_ACK_TIMEOUT, "PT1S"); // 1s securityConfig = new SecurityConfig(conf); service = new HddsDatanodeService(args) { diff --git a/hadoop-hdds/framework/src/main/java/org/apache/hadoop/hdds/protocol/SCMSecurityProtocol.java b/hadoop-hdds/framework/src/main/java/org/apache/hadoop/hdds/protocol/SCMSecurityProtocol.java index ed4906e3b7ff..1b88cee107b3 100644 --- a/hadoop-hdds/framework/src/main/java/org/apache/hadoop/hdds/protocol/SCMSecurityProtocol.java +++ b/hadoop-hdds/framework/src/main/java/org/apache/hadoop/hdds/protocol/SCMSecurityProtocol.java @@ -77,6 +77,18 @@ String getOMCertificate(OzoneManagerDetailsProto omDetails, String getSCMCertificate(ScmNodeDetailsProto scmNodeDetails, String certSignReq) throws IOException; + /** + * Get signed certificate for SCM. + * + * @param scmNodeDetails - SCM Node Details. + * @param certSignReq - Certificate signing request. + * @param isRenew - if SCM is renewing certificate or not. + * @return String - pem encoded SCM signed + * certificate. + */ + String getSCMCertificate(ScmNodeDetailsProto scmNodeDetails, + String certSignReq, boolean isRenew) throws IOException; + /** * Get SCM signed certificate for given certificate serial id if it exists. * Throws exception if it's not found. diff --git a/hadoop-hdds/framework/src/main/java/org/apache/hadoop/hdds/protocolPB/SCMSecurityProtocolClientSideTranslatorPB.java b/hadoop-hdds/framework/src/main/java/org/apache/hadoop/hdds/protocolPB/SCMSecurityProtocolClientSideTranslatorPB.java index 73e7ede0790d..4aa32f04cfe0 100644 --- a/hadoop-hdds/framework/src/main/java/org/apache/hadoop/hdds/protocolPB/SCMSecurityProtocolClientSideTranslatorPB.java +++ b/hadoop-hdds/framework/src/main/java/org/apache/hadoop/hdds/protocolPB/SCMSecurityProtocolClientSideTranslatorPB.java @@ -194,9 +194,24 @@ public String getCertificate(NodeDetailsProto nodeDetails, @Override public String getSCMCertificate(ScmNodeDetailsProto scmNodeDetails, String certSignReq) throws IOException { - return getSCMCertChain(scmNodeDetails, certSignReq).getX509Certificate(); + return getSCMCertChain(scmNodeDetails, certSignReq, false) + .getX509Certificate(); } + /** + * Get signed certificate for SCM node. + * + * @param scmNodeDetails - SCM Node Details. + * @param certSignReq - Certificate signing request. + * @param renew - Whether SCM is trying to renew its certificate + * @return String - pem encoded SCM signed + * certificate. + */ + public String getSCMCertificate(ScmNodeDetailsProto scmNodeDetails, + String certSignReq, boolean renew) throws IOException { + return getSCMCertChain(scmNodeDetails, certSignReq, renew) + .getX509Certificate(); + } /** * Get signed certificate for SCM node and root CA certificate. @@ -207,12 +222,13 @@ public String getSCMCertificate(ScmNodeDetailsProto scmNodeDetails, * signed certificate and root CA certificate. */ public SCMGetCertResponseProto getSCMCertChain( - ScmNodeDetailsProto scmNodeDetails, String certSignReq) + ScmNodeDetailsProto scmNodeDetails, String certSignReq, boolean isRenew) throws IOException { SCMGetSCMCertRequestProto request = SCMGetSCMCertRequestProto.newBuilder() .setCSR(certSignReq) .setScmDetails(scmNodeDetails) + .setRenew(isRenew) .build(); return submitRequest(Type.GetSCMCertificate, builder -> builder.setGetSCMCertificateRequest(request)) diff --git a/hadoop-hdds/framework/src/main/java/org/apache/hadoop/hdds/security/x509/certificate/authority/DefaultCAServer.java b/hadoop-hdds/framework/src/main/java/org/apache/hadoop/hdds/security/x509/certificate/authority/DefaultCAServer.java index c94408f08ee6..0187405f936b 100644 --- a/hadoop-hdds/framework/src/main/java/org/apache/hadoop/hdds/security/x509/certificate/authority/DefaultCAServer.java +++ b/hadoop-hdds/framework/src/main/java/org/apache/hadoop/hdds/security/x509/certificate/authority/DefaultCAServer.java @@ -132,6 +132,7 @@ public class DefaultCAServer implements CertificateServer { private CertificateStore store; private Lock lock; private static boolean testSecureFlag; + private BigInteger rootCertificateId; /** * Create an Instance of DefaultCAServer. @@ -141,17 +142,25 @@ public class DefaultCAServer implements CertificateServer { * @param certificateStore - A store used to persist Certificates. */ public DefaultCAServer(String subject, String clusterID, String scmID, - CertificateStore certificateStore, + CertificateStore certificateStore, BigInteger rootCertId, PKIProfile pkiProfile, String componentName) { this.subject = subject; this.clusterID = clusterID; this.scmID = scmID; this.store = certificateStore; + this.rootCertificateId = rootCertId; this.profile = pkiProfile; this.componentName = componentName; lock = new ReentrantLock(); } + public DefaultCAServer(String subject, String clusterID, String scmID, + CertificateStore certificateStore, PKIProfile pkiProfile, + String componentName) { + this(subject, clusterID, scmID, certificateStore, BigInteger.ONE, + pkiProfile, componentName); + } + @Override public void init(SecurityConfig securityConfig, CAType type) throws IOException { @@ -568,7 +577,7 @@ private void generateRootCertificate( .setClusterID(this.clusterID) .setBeginDate(beginDate) .setEndDate(endDate) - .makeCA() + .makeCA(rootCertificateId) .setConfiguration(securityConfig) .setKey(key); diff --git a/hadoop-hdds/framework/src/main/java/org/apache/hadoop/hdds/security/x509/certificate/client/DNCertificateClient.java b/hadoop-hdds/framework/src/main/java/org/apache/hadoop/hdds/security/x509/certificate/client/DNCertificateClient.java index 8c5c91320338..60853273bd37 100644 --- a/hadoop-hdds/framework/src/main/java/org/apache/hadoop/hdds/security/x509/certificate/client/DNCertificateClient.java +++ b/hadoop-hdds/framework/src/main/java/org/apache/hadoop/hdds/security/x509/certificate/client/DNCertificateClient.java @@ -99,7 +99,7 @@ public CertificateSignRequest.Builder getCSRBuilder() @Override public String signAndStoreCertificate(PKCS10CertificationRequest csr, - Path certificatePath) throws CertificateException { + Path certificatePath, boolean renew) throws CertificateException { try { // TODO: For SCM CA we should fetch certificate from multiple SCMs. SCMSecurityProtocolProtos.SCMGetCertResponseProto response = @@ -113,16 +113,14 @@ public String signAndStoreCertificate(PKCS10CertificationRequest csr, getSecurityConfig(), certificatePath); // Certs will be added to cert map after reloadAllCertificate called storeCertificate(pemEncodedCert, CAType.NONE, - certCodec, - false); + certCodec, false, !renew); storeCertificate(response.getX509CACertificate(), - CAType.SUBORDINATE, - certCodec, false); + CAType.SUBORDINATE, certCodec, false, !renew); // Store Root CA certificate. if (response.hasX509RootCACertificate()) { storeCertificate(response.getX509RootCACertificate(), - CAType.ROOT, certCodec, false); + CAType.ROOT, certCodec, false, !renew); } // Return the default certificate ID return CertificateCodec.getX509Certificate(pemEncodedCert) diff --git a/hadoop-hdds/framework/src/main/java/org/apache/hadoop/hdds/security/x509/certificate/client/DefaultCertificateClient.java b/hadoop-hdds/framework/src/main/java/org/apache/hadoop/hdds/security/x509/certificate/client/DefaultCertificateClient.java index d64cabf5c92a..abd2beec506c 100644 --- a/hadoop-hdds/framework/src/main/java/org/apache/hadoop/hdds/security/x509/certificate/client/DefaultCertificateClient.java +++ b/hadoop-hdds/framework/src/main/java/org/apache/hadoop/hdds/security/x509/certificate/client/DefaultCertificateClient.java @@ -59,6 +59,7 @@ import java.util.stream.Stream; import java.util.stream.Collectors; +import com.google.common.annotations.VisibleForTesting; import com.google.common.util.concurrent.ThreadFactoryBuilder; import org.apache.commons.io.FileUtils; import org.apache.hadoop.hdds.protocolPB.SCMSecurityProtocolClientSideTranslatorPB; @@ -132,8 +133,7 @@ protected DefaultCertificateClient( String certSerialId, String component, Consumer saveCertId, - Runnable shutdown - ) { + Runnable shutdown) { Objects.requireNonNull(securityConfig); this.securityConfig = securityConfig; this.scmSecurityClient = scmSecurityClient; @@ -154,8 +154,11 @@ protected DefaultCertificateClient( * Load all certificates from configured location. * */ private synchronized void loadAllCertificates() { - try (Stream certFiles = - Files.list(securityConfig.getCertificateLocation(component))) { + Path path = securityConfig.getCertificateLocation(component); + if (!path.toFile().exists() && certSerialId == null) { + return; + } + try (Stream certFiles = Files.list(path)) { certFiles .filter(Files::isRegularFile) .forEach(this::readCertificateFile); @@ -164,14 +167,19 @@ private synchronized void loadAllCertificates() { return; } - if (certPath != null && executorService == null) { - startCertificateMonitor(); - } else { - if (executorService != null) { - getLogger().debug("CertificateLifetimeMonitor is already started."); + if (shouldStartCertificateMonitor()) { + if (certPath != null && executorService == null) { + startCertificateMonitor(); } else { - getLogger().warn("Component certificate was not loaded."); + if (executorService != null) { + getLogger().debug("CertificateLifetimeMonitor is already started."); + } else { + getLogger().warn("Component certificate was not loaded."); + } } + } else { + getLogger().info("CertificateLifetimeMonitor is disabled for {}", + component); } } @@ -188,7 +196,7 @@ private synchronized void readCertificateFile(Path filePath) { if (readCertSerialId.equals(certSerialId)) { this.certPath = allCertificates; } - certificateMap.putIfAbsent(readCertSerialId, allCertificates); + certificateMap.put(readCertSerialId, allCertificates); addCertsToSubCaMapIfNeeded(fileName, allCertificates); addCertToRootCaMapIfNeeded(fileName, allCertificates); @@ -550,12 +558,12 @@ public void storeCertificate(String pemEncodedCert, CertificateCodec certificateCodec = new CertificateCodec(securityConfig, component); storeCertificate(pemEncodedCert, caType, - certificateCodec, true); + certificateCodec, true, false); } public synchronized void storeCertificate(String pemEncodedCert, - CAType caType, CertificateCodec codec, boolean addToCertMap) - throws CertificateException { + CAType caType, CertificateCodec codec, boolean addToCertMap, + boolean updateCA) throws CertificateException { try { CertPath certificatePath = CertificateCodec.getCertPathFromPemEncodedString(pemEncodedCert); @@ -564,18 +572,19 @@ public synchronized void storeCertificate(String pemEncodedCert, String certName = String.format(CERT_FILE_NAME_FORMAT, caType.getFileNamePrefix() + cert.getSerialNumber().toString()); - if (caType == CAType.SUBORDINATE) { - caCertId = cert.getSerialNumber().toString(); - } - if (caType == CAType.ROOT) { - rootCaCertId = cert.getSerialNumber().toString(); + if (updateCA) { + if (caType == CAType.SUBORDINATE) { + caCertId = cert.getSerialNumber().toString(); + } + if (caType == CAType.ROOT) { + rootCaCertId = cert.getSerialNumber().toString(); + } } codec.writeCertificate(certName, pemEncodedCert); if (addToCertMap) { - certificateMap.putIfAbsent( - cert.getSerialNumber().toString(), certificatePath); + certificateMap.put(cert.getSerialNumber().toString(), certificatePath); } } catch (IOException | java.security.cert.CertificateException e) { throw new CertificateException("Error while storing certificate.", e, @@ -894,12 +903,19 @@ public synchronized X509Certificate getRootCACertificate() { @Override public Set getAllRootCaCerts() { - return Collections.unmodifiableSet(rootCaCertificates); + Set certs = + Collections.unmodifiableSet(rootCaCertificates); + getLogger().info("{} has {} Root CA certificates", this.component, + certs.size()); + return certs; } @Override public Set getAllCaCerts() { - return Collections.unmodifiableSet(caCertificates); + Set certs = Collections.unmodifiableSet(caCertificates); + getLogger().info("{} has {} CA certificates", this.component, + certs.size()); + return certs; } @Override @@ -1043,7 +1059,7 @@ public String renewAndStoreKeyAndCertificate(boolean force) CertificateSignRequest.Builder csrBuilder = getCSRBuilder(); csrBuilder.setKey(newKeyPair); newCertSerialId = signAndStoreCertificate(csrBuilder.build(), - Paths.get(newCertPath)); + Paths.get(newCertPath), true); } catch (Exception e) { throw new CertificateException("Error while signing and storing new" + " certificates.", e, RENEW_ERROR); @@ -1183,16 +1199,20 @@ public void cleanBackupDir() { } } - synchronized void reloadKeyAndCertificate(String newCertId) { - // reset current value + public synchronized void reloadKeyAndCertificate(String newCertId) { privateKey = null; publicKey = null; certPath = null; caCertId = null; rootCaCertId = null; - updateCertSerialId(newCertId); - getLogger().info("Reset and reload key and all certificates."); + String oldCaCertId = updateCertSerialId(newCertId); + getLogger().info("Reset and reloaded key and all certificates for new " + + "certificate {}.", newCertId); + + // notify notification receivers + notificationReceivers.forEach(r -> r.notifyCertificateRenewed( + this, oldCaCertId, newCertId)); } public SecurityConfig getSecurityConfig() { @@ -1201,12 +1221,19 @@ public SecurityConfig getSecurityConfig() { private synchronized String updateCertSerialId(String newCertSerialId) { certSerialId = newCertSerialId; + getLogger().info("Certificate serial ID set to {}", certSerialId); loadAllCertificates(); return certSerialId; } - protected abstract String signAndStoreCertificate( + protected String signAndStoreCertificate( PKCS10CertificationRequest request, Path certificatePath) + throws CertificateException { + return signAndStoreCertificate(request, certificatePath, false); + } + + protected abstract String signAndStoreCertificate( + PKCS10CertificationRequest request, Path certificatePath, boolean renew) throws CertificateException; public String signAndStoreCertificate( @@ -1220,6 +1247,10 @@ public SCMSecurityProtocolClientSideTranslatorPB getScmSecureClient() return scmSecurityClient; } + protected boolean shouldStartCertificateMonitor() { + return true; + } + public synchronized void startCertificateMonitor() { Preconditions.checkNotNull(getCertificate(), "Component certificate should not be empty"); @@ -1237,8 +1268,7 @@ public synchronized void startCertificateMonitor() { getComponentName() + "-CertificateLifetimeMonitor") .setDaemon(true).build()); } - this.executorService.scheduleAtFixedRate( - new CertificateLifetimeMonitor(this), + this.executorService.scheduleAtFixedRate(new CertificateLifetimeMonitor(), timeBeforeGracePeriod, interval, TimeUnit.MILLISECONDS); getLogger().info("CertificateLifetimeMonitor for {} is started with " + "first delay {} ms and interval {} ms.", component, @@ -1249,10 +1279,8 @@ public synchronized void startCertificateMonitor() { * Task to monitor certificate lifetime and renew the certificate if needed. */ public class CertificateLifetimeMonitor implements Runnable { - private CertificateClient certClient; - public CertificateLifetimeMonitor(CertificateClient client) { - this.certClient = client; + public CertificateLifetimeMonitor() { } @Override @@ -1270,8 +1298,9 @@ public void run() { if (timeLeft.isZero()) { String newCertId; try { - getLogger().info("Current certificate has entered the expiry" + + getLogger().info("Current certificate {} has entered the expiry" + " grace period {}. Starting renew key and certs.", + currentCert.getSerialNumber().toString(), timeLeft, securityConfig.getRenewalGracePeriod()); newCertId = renewAndStoreKeyAndCertificate(false); } catch (CertificateException e) { @@ -1297,11 +1326,20 @@ public void run() { reloadKeyAndCertificate(newCertId); // cleanup backup directory cleanBackupDir(); - // notify notification receivers - notificationReceivers.forEach(r -> r.notifyCertificateRenewed( - certClient, currentCert.getSerialNumber().toString(), newCertId)); } } } } + + /** + * Set the CA certificate. For TEST only. + */ + @VisibleForTesting + public synchronized void setCACertificate(X509Certificate cert) + throws Exception { + caCertId = cert.getSerialNumber().toString(); + certificateMap.put(caCertId, + CertificateCodec.getCertPathFromPemEncodedString( + CertificateCodec.getPEMEncodedString(cert))); + } } diff --git a/hadoop-hdds/framework/src/main/java/org/apache/hadoop/hdds/security/x509/certificate/client/SCMCertificateClient.java b/hadoop-hdds/framework/src/main/java/org/apache/hadoop/hdds/security/x509/certificate/client/SCMCertificateClient.java index 51eb2959a6e9..26305624b490 100644 --- a/hadoop-hdds/framework/src/main/java/org/apache/hadoop/hdds/security/x509/certificate/client/SCMCertificateClient.java +++ b/hadoop-hdds/framework/src/main/java/org/apache/hadoop/hdds/security/x509/certificate/client/SCMCertificateClient.java @@ -20,6 +20,10 @@ import org.apache.hadoop.hdds.protocolPB.SCMSecurityProtocolClientSideTranslatorPB; import org.apache.hadoop.hdds.security.SecurityConfig; +import org.apache.hadoop.hdds.protocol.proto.HddsProtos; +import org.apache.hadoop.hdds.protocol.proto.SCMSecurityProtocolProtos; +import org.apache.hadoop.hdds.security.x509.certificate.authority.CAType; +import org.apache.hadoop.hdds.security.x509.certificate.utils.CertificateCodec; import org.apache.hadoop.hdds.security.x509.certificate.utils.CertificateSignRequest; import org.apache.hadoop.hdds.security.x509.exception.CertificateException; import org.apache.hadoop.ozone.OzoneConsts; @@ -30,11 +34,14 @@ import java.nio.file.Path; import java.nio.file.Paths; import java.security.KeyPair; +import java.security.cert.X509Certificate; import static org.apache.hadoop.hdds.security.x509.certificate.client.CertificateClient.InitResponse.FAILURE; import static org.apache.hadoop.hdds.security.x509.certificate.client.CertificateClient.InitResponse.GETCERT; import static org.apache.hadoop.hdds.security.x509.certificate.client.CertificateClient.InitResponse.RECOVER; import static org.apache.hadoop.hdds.security.x509.certificate.client.CertificateClient.InitResponse.SUCCESS; +import static org.apache.hadoop.hdds.security.x509.certificate.utils.CertificateSignRequest.getEncodedString; +import static org.apache.hadoop.ozone.OzoneConsts.SCM_SUB_CA_PREFIX; /** * SCM Certificate Client which is used for generating public/private Key pair, @@ -49,12 +56,25 @@ public class SCMCertificateClient extends DefaultCertificateClient { public static final String COMPONENT_NAME = Paths.get(OzoneConsts.SCM_CA_CERT_STORAGE_DIR, OzoneConsts.SCM_SUB_CA_PATH).toString(); + private String scmId; + private String cId; + private String scmHostname; + private SCMSecurityProtocolClientSideTranslatorPB scmSecurityClient; + + public SCMCertificateClient(SecurityConfig securityConfig, + SCMSecurityProtocolClientSideTranslatorPB scmClient, + String scmId, String clusterId, String scmCertId, String hostname) { + super(securityConfig, scmClient, LOG, scmCertId, + COMPONENT_NAME, null, null); + this.scmId = scmId; + this.cId = clusterId; + this.scmHostname = hostname; + } public SCMCertificateClient( SecurityConfig securityConfig, SCMSecurityProtocolClientSideTranslatorPB scmClient, - String certSerialId - ) { + String certSerialId) { super(securityConfig, scmClient, LOG, certSerialId, COMPONENT_NAME, null, null); } @@ -135,7 +155,16 @@ protected InitResponse handleCase(InitCase init) @Override public CertificateSignRequest.Builder getCSRBuilder() throws CertificateException { + String subject = String.format(SCM_SUB_CA_PREFIX, System.nanoTime()) + + scmHostname; + + LOG.info("Creating csr for SCM->hostName:{},scmId:{},clusterId:{}," + + "subject:{}", scmHostname, scmId, cId, subject); + return super.getCSRBuilder() + .setSubject(subject) + .setScmID(scmId) + .setClusterID(cId) .setDigitalEncryption(true) .setDigitalSignature(true) // Set CA to true, as this will be used to sign certs for OM/DN. @@ -143,6 +172,10 @@ public CertificateSignRequest.Builder getCSRBuilder() .setKey(new KeyPair(getPublicKey(), getPrivateKey())); } + @Override + protected boolean shouldStartCertificateMonitor() { + return false; + } @Override public Logger getLogger() { @@ -151,8 +184,44 @@ public Logger getLogger() { @Override public String signAndStoreCertificate(PKCS10CertificationRequest request, - Path certPath) throws CertificateException { - throw new UnsupportedOperationException("signAndStoreCertificate of " + - " SCMCertificateClient is not supported currently"); + Path certPath, boolean renew) throws CertificateException { + try { + HddsProtos.ScmNodeDetailsProto scmNodeDetailsProto = + HddsProtos.ScmNodeDetailsProto.newBuilder() + .setClusterId(cId) + .setHostName(scmHostname) + .setScmNodeId(scmId).build(); + + // Get SCM sub CA cert. + SCMSecurityProtocolProtos.SCMGetCertResponseProto response = + getScmSecureClient().getSCMCertChain(scmNodeDetailsProto, + getEncodedString(request), true); + + CertificateCodec certCodec = new CertificateCodec( + getSecurityConfig(), certPath); + String pemEncodedCert = response.getX509Certificate(); + + // Store SCM sub CA and root CA certificate. + if (response.hasX509CACertificate()) { + String pemEncodedRootCert = response.getX509CACertificate(); + storeCertificate(pemEncodedRootCert, + CAType.SUBORDINATE, certCodec, false, !renew); + storeCertificate(pemEncodedCert, CAType.NONE, certCodec, + false, !renew); + //note: this does exactly the same as store certificate + certCodec.writeCertificate(certCodec.getLocation().toAbsolutePath(), + getSecurityConfig().getCertificateFileName(), pemEncodedCert); + + X509Certificate certificate = + CertificateCodec.getX509Certificate(pemEncodedCert); + // return new scm cert serial ID. + return certificate.getSerialNumber().toString(); + } else { + throw new RuntimeException("Unable to retrieve SCM certificate chain"); + } + } catch (Throwable e) { + LOG.error("Error while fetching/storing SCM signed certificate.", e); + throw new RuntimeException(e); + } } } diff --git a/hadoop-hdds/framework/src/main/java/org/apache/hadoop/hdds/security/x509/keys/KeyCodec.java b/hadoop-hdds/framework/src/main/java/org/apache/hadoop/hdds/security/x509/keys/KeyCodec.java index c4e24783c3b8..1a3ef2d7f0d9 100644 --- a/hadoop-hdds/framework/src/main/java/org/apache/hadoop/hdds/security/x509/keys/KeyCodec.java +++ b/hadoop-hdds/framework/src/main/java/org/apache/hadoop/hdds/security/x509/keys/KeyCodec.java @@ -99,6 +99,11 @@ public KeyCodec(SecurityConfig config, Path keyDir) { this.securityConfig = config; isPosixFileSystem = KeyCodec::isPosix; this.location = keyDir; + if (!location.toFile().exists()) { + if (!location.toFile().mkdirs()) { + throw new RuntimeException("Failed to create directory " + location); + } + } } /** @@ -193,7 +198,7 @@ public void writePublicKey(PublicKey key) throws IOException { securityConfig.getPublicKeyFileName()).toFile(); if (Files.exists(publicKeyFile.toPath())) { - throw new IOException("Private key already exist."); + throw new IOException("Public key already exist."); } try (PemWriter keyWriter = new PemWriter(new diff --git a/hadoop-hdds/framework/src/test/java/org/apache/hadoop/hdds/security/x509/certificate/client/TestDefaultCertificateClient.java b/hadoop-hdds/framework/src/test/java/org/apache/hadoop/hdds/security/x509/certificate/client/TestDefaultCertificateClient.java index 2fe0bf84ea24..c0af10a3da65 100644 --- a/hadoop-hdds/framework/src/test/java/org/apache/hadoop/hdds/security/x509/certificate/client/TestDefaultCertificateClient.java +++ b/hadoop-hdds/framework/src/test/java/org/apache/hadoop/hdds/security/x509/certificate/client/TestDefaultCertificateClient.java @@ -488,6 +488,13 @@ public String signAndStoreCertificate( PKCS10CertificationRequest request, Path certificatePath) { return null; } + + @Override + public String signAndStoreCertificate( + PKCS10CertificationRequest request, Path certificatePath, + boolean renew) throws CertificateException { + return null; + } }) { InitResponse resp = client.init(); @@ -581,7 +588,7 @@ public void testRenewAndStoreKeyAndCertificate() throws Exception { newCertDir.toPath()); dnCertClient.storeCertificate(getPEMEncodedString(cert), CAType.NONE, - certCodec, false); + certCodec, false, false); // a success renew after auto cleanup new key and cert dir dnCertClient.renewAndStoreKeyAndCertificate(true); } @@ -623,6 +630,13 @@ protected String signAndStoreCertificate( PKCS10CertificationRequest request, Path certificatePath) { return ""; } + + @Override + protected String signAndStoreCertificate( + PKCS10CertificationRequest request, Path certificatePath, + boolean renew) throws CertificateException { + return null; + } }; Thread[] threads = new Thread[Thread.activeCount()]; diff --git a/hadoop-hdds/interface-server/src/main/proto/SCMRatisProtocol.proto b/hadoop-hdds/interface-server/src/main/proto/SCMRatisProtocol.proto index 4fb0737b3925..3ae9879f9404 100644 --- a/hadoop-hdds/interface-server/src/main/proto/SCMRatisProtocol.proto +++ b/hadoop-hdds/interface-server/src/main/proto/SCMRatisProtocol.proto @@ -30,6 +30,7 @@ enum RequestType { STATEFUL_SERVICE_CONFIG = 7; FINALIZE = 8; SECRET_KEY = 9; + CERT_ROTATE = 10; } message Method { diff --git a/hadoop-hdds/interface-server/src/main/proto/ScmServerProtocol.proto b/hadoop-hdds/interface-server/src/main/proto/ScmServerProtocol.proto index 098700642172..0d468ed0ab14 100644 --- a/hadoop-hdds/interface-server/src/main/proto/ScmServerProtocol.proto +++ b/hadoop-hdds/interface-server/src/main/proto/ScmServerProtocol.proto @@ -133,6 +133,7 @@ enum Status { INVALID_PIPELINE_STATE = 40; DUPLICATED_PIPELINE_ID = 41; TIMEOUT = 42; + CA_ROTATION_IN_PROGRESS = 43; } /** diff --git a/hadoop-hdds/interface-server/src/main/proto/ScmServerSecurityProtocol.proto b/hadoop-hdds/interface-server/src/main/proto/ScmServerSecurityProtocol.proto index 85ae39379fd3..dd3ef42308c4 100644 --- a/hadoop-hdds/interface-server/src/main/proto/ScmServerSecurityProtocol.proto +++ b/hadoop-hdds/interface-server/src/main/proto/ScmServerSecurityProtocol.proto @@ -147,6 +147,7 @@ message SCMGetCertRequestProto { message SCMGetSCMCertRequestProto { required ScmNodeDetailsProto scmDetails = 1; required string CSR = 2; + optional bool renew = 3[default = false]; } /** diff --git a/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/ha/HASecurityUtils.java b/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/ha/HASecurityUtils.java index a8cb1880ee06..f307a4030828 100644 --- a/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/ha/HASecurityUtils.java +++ b/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/ha/HASecurityUtils.java @@ -53,8 +53,8 @@ import org.slf4j.LoggerFactory; import java.io.IOException; +import java.math.BigInteger; import java.net.InetAddress; -import java.net.InetSocketAddress; import java.security.cert.CertPath; import java.security.cert.CertificateException; import java.security.cert.X509Certificate; @@ -89,12 +89,11 @@ private HASecurityUtils() { * signed certificate and persist to local disk. * @param scmStorageConfig * @param conf - * @param scmAddress + * @param scmHostname * @throws IOException */ public static void initializeSecurity(SCMStorageConfig scmStorageConfig, - OzoneConfiguration conf, - InetSocketAddress scmAddress, boolean primaryscm) + OzoneConfiguration conf, String scmHostname, boolean primaryscm) throws IOException { LOG.info("Initializing secure StorageContainerManager."); @@ -102,8 +101,9 @@ public static void initializeSecurity(SCMStorageConfig scmStorageConfig, SCMSecurityProtocolClientSideTranslatorPB scmSecurityClient = getScmSecurityClientWithMaxRetry(conf, getCurrentUser()); try (CertificateClient certClient = - new SCMCertificateClient( - securityConfig, scmSecurityClient, scmStorageConfig.getScmId())) { + new SCMCertificateClient(securityConfig, scmSecurityClient, + scmStorageConfig.getScmId(), scmStorageConfig.getClusterID(), + scmStorageConfig.getScmCertSerialId(), scmHostname)) { InitResponse response = certClient.init(); LOG.info("Init response: {}", response); switch (response) { @@ -113,10 +113,10 @@ public static void initializeSecurity(SCMStorageConfig scmStorageConfig, case GETCERT: if (!primaryscm) { getRootCASignedSCMCert(conf, certClient, securityConfig, - scmStorageConfig, scmAddress); + scmStorageConfig, scmHostname); } else { getPrimarySCMSelfSignedCert(certClient, securityConfig, - scmStorageConfig, scmAddress); + scmStorageConfig, scmHostname); } LOG.info("Successfully stored SCM signed certificate."); break; @@ -141,21 +141,18 @@ public static void initializeSecurity(SCMStorageConfig scmStorageConfig, * client. */ private static void getRootCASignedSCMCert( - OzoneConfiguration configuration, - CertificateClient client, + OzoneConfiguration configuration, CertificateClient client, SecurityConfig securityConfig, - SCMStorageConfig scmStorageConfig, - InetSocketAddress scmAddress - ) { + SCMStorageConfig scmStorageConfig, String scmHostname) { try { // Generate CSR. PKCS10CertificationRequest csr = generateCSR(client, scmStorageConfig, - securityConfig, scmAddress); + securityConfig, scmHostname); ScmNodeDetailsProto scmNodeDetailsProto = ScmNodeDetailsProto.newBuilder() .setClusterId(scmStorageConfig.getClusterID()) - .setHostName(scmAddress.getHostName()) + .setHostName(scmHostname) .setScmNodeId(scmStorageConfig.getScmId()).build(); // Create SCM security client. @@ -164,7 +161,7 @@ private static void getRootCASignedSCMCert( // Get SCM sub CA cert. SCMGetCertResponseProto response = secureScmClient. - getSCMCertChain(scmNodeDetailsProto, getEncodedString(csr)); + getSCMCertChain(scmNodeDetailsProto, getEncodedString(csr), false); String pemEncodedCert = response.getX509Certificate(); // Store SCM sub CA and root CA certificate. @@ -198,7 +195,7 @@ private static void getRootCASignedSCMCert( */ private static void getPrimarySCMSelfSignedCert(CertificateClient client, SecurityConfig config, SCMStorageConfig scmStorageConfig, - InetSocketAddress scmAddress) { + String scmHostname) { try { @@ -207,7 +204,7 @@ private static void getPrimarySCMSelfSignedCert(CertificateClient client, new DefaultCAProfile()); PKCS10CertificationRequest csr = generateCSR(client, scmStorageConfig, - config, scmAddress); + config, scmHostname); CertPath subSCMCertHolderList = rootCAServer. requestCertificate(csr, KERBEROS_TRUSTED, SCM).get(); @@ -251,47 +248,65 @@ private static void getPrimarySCMSelfSignedCert(CertificateClient client, * @param config * @param scmCertStore * @param scmStorageConfig + * @param pkiProfile + * @param component */ public static CertificateServer initializeRootCertificateServer( SecurityConfig config, CertificateStore scmCertStore, - SCMStorageConfig scmStorageConfig, PKIProfile pkiProfile) - throws IOException { - String subject = SCM_ROOT_CA_PREFIX + + SCMStorageConfig scmStorageConfig, BigInteger rootCertId, + PKIProfile pkiProfile, String component) throws IOException { + String subject = String.format(SCM_ROOT_CA_PREFIX, rootCertId) + InetAddress.getLocalHost().getHostName(); DefaultCAServer rootCAServer = new DefaultCAServer(subject, scmStorageConfig.getClusterID(), - scmStorageConfig.getScmId(), scmCertStore, pkiProfile, - SCM_ROOT_CA_COMPONENT_NAME); + scmStorageConfig.getScmId(), scmCertStore, rootCertId, pkiProfile, + component); rootCAServer.init(config, CAType.ROOT); return rootCAServer; } + /** + * This function creates/initializes a certificate server as needed. + * This function is idempotent, so calling this again and again after the + * server is initialized is not a problem. + * + * @param config + * @param scmCertStore + * @param scmStorageConfig + * @param pkiProfile + */ + public static CertificateServer initializeRootCertificateServer( + SecurityConfig config, CertificateStore scmCertStore, + SCMStorageConfig scmStorageConfig, PKIProfile pkiProfile) + throws IOException { + return initializeRootCertificateServer(config, scmCertStore, + scmStorageConfig, BigInteger.ONE, pkiProfile, + SCM_ROOT_CA_COMPONENT_NAME); + } + /** * Generate CSR to obtain SCM sub CA certificate. */ private static PKCS10CertificationRequest generateCSR( CertificateClient client, SCMStorageConfig scmStorageConfig, - SecurityConfig config, InetSocketAddress scmAddress) + SecurityConfig config, String scmHostname) throws IOException { CertificateSignRequest.Builder builder = client.getCSRBuilder(); // Get host name. - String hostname = scmAddress.getHostName(); - - String subject = SCM_SUB_CA_PREFIX + hostname; + String subject = String.format(SCM_SUB_CA_PREFIX, System.nanoTime()) + + scmHostname; - builder - .setConfiguration(config) + builder.setConfiguration(config) .setScmID(scmStorageConfig.getScmId()) .setClusterID(scmStorageConfig.getClusterID()) .setSubject(subject); - LOG.info("Creating csr for SCM->hostName:{},scmId:{},clusterId:{}," + - "subject:{}", hostname, scmStorageConfig.getScmId(), + "subject:{}", scmHostname, scmStorageConfig.getScmId(), scmStorageConfig.getClusterID(), subject); return builder.build(); @@ -336,24 +351,23 @@ public static GrpcTlsConfig createSCMRatisTLSConfig(SecurityConfig conf, } /** - * Submit SCM certs request to ratis using RaftClient. + * Submit SCM request to ratis using RaftClient. * @param raftGroup * @param tlsConfig * @param message * @return SCMRatisResponse. * @throws Exception */ - public static SCMRatisResponse submitScmCertsToRatis(RaftGroup raftGroup, + public static SCMRatisResponse submitScmRequestToRatis(RaftGroup raftGroup, GrpcTlsConfig tlsConfig, Message message) throws Exception { // TODO: GRPC TLS only for now, netty/hadoop RPC TLS support later. final SupportedRpcType rpc = SupportedRpcType.GRPC; final RaftProperties properties = RatisHelper.newRaftProperties(rpc); - // For now not making anything configurable, RaftClient is only used // in SCM for DB updates of sub-ca certs go via Ratis. - RaftClient.Builder builder = RaftClient.newBuilder() + RaftClient.Builder builder = RaftClient.newBuilder() .setRaftGroup(raftGroup) .setLeaderId(null) .setProperties(properties) @@ -391,5 +405,14 @@ public static SCMRatisResponse submitScmCertsToRatis(RaftGroup raftGroup, return new SCMSecurityProtocolClientSideTranslatorPB( new SCMSecurityProtocolFailoverProxyProvider(conf, UserGroupInformation.getCurrentUser())); + + } + + public static boolean isSelfSignedCertificate(X509Certificate cert) { + return cert.getIssuerX500Principal().equals(cert.getSubjectX500Principal()); + } + + public static boolean isCACertificate(X509Certificate cert) { + return cert.getBasicConstraints() != -1; } } diff --git a/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/ha/SCMHAInvocationHandler.java b/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/ha/SCMHAInvocationHandler.java index f87758676d89..32ad2c2adffe 100644 --- a/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/ha/SCMHAInvocationHandler.java +++ b/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/ha/SCMHAInvocationHandler.java @@ -120,14 +120,17 @@ private Object invokeRatisImpl(Method method, Object[] args) method.getName(), method.getParameterTypes(), args); // Scm Cert DB updates should use RaftClient. - // As rootCA which is primary SCM only can issues certificates to sub-CA. + // As rootCA which is primary SCM only can issue certificates to sub-CA. // In case primary is not leader SCM, still sub-ca cert DB updates should go // via ratis. So, in this special scenario we use RaftClient. + // Or rotationPrepareAck which every SCM will send out to confirm that + // sub CA rotation preparation is done. final SCMRatisResponse response; - if (method.getName().equals("storeValidCertificate") && - args[args.length - 1].equals(HddsProtos.NodeType.SCM)) { + if ((method.getName().equals("storeValidCertificate") && + args[args.length - 1].equals(HddsProtos.NodeType.SCM)) || + method.getName().equals("rotationPrepareAck")) { response = - HASecurityUtils.submitScmCertsToRatis( + HASecurityUtils.submitScmRequestToRatis( ratisHandler.getDivision().getGroup(), ratisHandler.getGrpcTlsConfig(), scmRatisRequest.encode()); diff --git a/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/ha/SequenceIdGenerator.java b/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/ha/SequenceIdGenerator.java index fba776471ace..94cbbc9297e0 100644 --- a/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/ha/SequenceIdGenerator.java +++ b/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/ha/SequenceIdGenerator.java @@ -25,6 +25,7 @@ import org.apache.hadoop.hdds.scm.container.ContainerInfo; import org.apache.hadoop.hdds.scm.metadata.Replicate; import org.apache.hadoop.hdds.scm.metadata.SCMMetadataStore; +import org.apache.hadoop.hdds.security.x509.certificate.CertInfo; import org.apache.hadoop.hdds.utils.UniqueId; import org.apache.hadoop.hdds.utils.db.Table; import org.apache.hadoop.hdds.utils.db.Table.KeyValue; @@ -34,6 +35,8 @@ import java.io.IOException; import java.lang.reflect.Proxy; +import java.math.BigInteger; +import java.security.cert.X509Certificate; import java.time.LocalDate; import java.util.HashMap; import java.util.Map; @@ -66,6 +69,7 @@ public class SequenceIdGenerator { public static final String LOCAL_ID = "localId"; public static final String DEL_TXN_ID = "delTxnId"; public static final String CONTAINER_ID = "containerId"; + public static final String ROOT_CERTIFICATE_ID = "rootCertificateId"; private static final long INVALID_SEQUENCE_ID = 0; @@ -121,7 +125,8 @@ public long getNextId(String sequenceIdName) throws SCMException { batch.nextId = prevLastId + 1; Preconditions.checkArgument(Long.MAX_VALUE - batch.lastId >= batchSize); - batch.lastId += batchSize; + batch.lastId += sequenceIdName.equals(ROOT_CERTIFICATE_ID) ? + 1 : batchSize; if (stateManager.allocateBatch(sequenceIdName, prevLastId, batch.lastId)) { @@ -364,7 +369,7 @@ public static void upgradeToSequenceId(SCMMetadataStore scmMetadataStore) long largestContainerId = 0; try (TableIterator> iterator = - scmMetadataStore.getContainerTable().iterator()) { + scmMetadataStore.getContainerTable().iterator()) { while (iterator.hasNext()) { ContainerInfo containerInfo = iterator.next().getValue(); largestContainerId = @@ -376,5 +381,41 @@ public static void upgradeToSequenceId(SCMMetadataStore scmMetadataStore) LOG.info("upgrade {} to {}", CONTAINER_ID, sequenceIdTable.get(CONTAINER_ID)); } + + // upgrade root certificate ID + if (sequenceIdTable.get(ROOT_CERTIFICATE_ID) == null) { + long largestRootCertId = BigInteger.ONE.longValueExact(); + try (TableIterator> iterator = + scmMetadataStore.getValidSCMCertsTable().iterator()) { + while (iterator.hasNext()) { + X509Certificate cert = iterator.next().getValue(); + if (HASecurityUtils.isSelfSignedCertificate(cert) && + HASecurityUtils.isCACertificate(cert)) { + largestRootCertId = + Long.max(cert.getSerialNumber().longValueExact(), + largestRootCertId); + } + } + } + + try (TableIterator> iterator = + scmMetadataStore.getRevokedCertsV2Table().iterator()) { + while (iterator.hasNext()) { + X509Certificate cert = + iterator.next().getValue().getX509Certificate(); + if (HASecurityUtils.isSelfSignedCertificate(cert) && + HASecurityUtils.isCACertificate(cert)) { + largestRootCertId = + Long.max(cert.getSerialNumber().longValueExact(), + largestRootCertId); + } + } + } + sequenceIdTable.put(ROOT_CERTIFICATE_ID, largestRootCertId); + LOG.info("upgrade {} to {}", + ROOT_CERTIFICATE_ID, sequenceIdTable.get(ROOT_CERTIFICATE_ID)); + } } } diff --git a/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/protocol/SCMSecurityProtocolServerSideTranslatorPB.java b/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/protocol/SCMSecurityProtocolServerSideTranslatorPB.java index d65eacc84c76..aeb1fb6ea6bc 100644 --- a/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/protocol/SCMSecurityProtocolServerSideTranslatorPB.java +++ b/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/protocol/SCMSecurityProtocolServerSideTranslatorPB.java @@ -253,7 +253,7 @@ public SCMGetCertResponseProto getSCMCertificate( throw createNotHAException(); } String certificate = impl.getSCMCertificate(request.getScmDetails(), - request.getCSR()); + request.getCSR(), request.hasRenew() && request.getRenew()); SCMGetCertResponseProto.Builder builder = SCMGetCertResponseProto .newBuilder() diff --git a/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/security/RootCARotationHandler.java b/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/security/RootCARotationHandler.java new file mode 100644 index 000000000000..a91c90e3df12 --- /dev/null +++ b/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/security/RootCARotationHandler.java @@ -0,0 +1,57 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + *

+ * http://www.apache.org/licenses/LICENSE-2.0 + *

+ * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hdds.scm.security; + +import org.apache.hadoop.hdds.scm.metadata.Replicate; + +import java.io.IOException; + +/** + * This interface defines APIs for sub-ca rotation instructions. + */ +public interface RootCARotationHandler { + + /** + * Notify SCM peers to do sub-ca rotation preparation and replicate + * this operation through RATIS. + * @param rootCertId the new root certificate serial ID + * @throws IOException on failure to persist configuration + */ + @Replicate + void rotationPrepare(String rootCertId) + throws IOException; + + @Replicate + void rotationPrepareAck(String rootCertId, String scmCertId, String scmId) + throws IOException; + + @Replicate + void rotationCommit(String rootCertId) + throws IOException; + + @Replicate + void rotationCommitted(String rootCertId) + throws IOException; + + int rotationPrepareAcks(); + + void resetRotationPrepareAcks(); + + void setSubCACertId(String subCACertId); +} diff --git a/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/security/RootCARotationHandlerImpl.java b/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/security/RootCARotationHandlerImpl.java new file mode 100644 index 000000000000..cdaf2d34c2bc --- /dev/null +++ b/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/security/RootCARotationHandlerImpl.java @@ -0,0 +1,244 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + *

+ * http://www.apache.org/licenses/LICENSE-2.0 + *

+ * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hdds.scm.security; + +import org.apache.commons.io.FileUtils; +import org.apache.hadoop.hdds.scm.ha.SCMHAInvocationHandler; +import org.apache.hadoop.hdds.scm.ha.SCMRatisServer; +import org.apache.hadoop.hdds.scm.server.StorageContainerManager; +import org.apache.hadoop.hdds.security.SecurityConfig; +import org.apache.hadoop.hdds.security.x509.certificate.client.SCMCertificateClient; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import java.io.File; +import java.io.IOException; +import java.lang.reflect.Proxy; +import java.nio.file.Files; +import java.nio.file.StandardCopyOption; +import java.util.HashSet; +import java.util.Set; +import java.util.concurrent.atomic.AtomicReference; + +import static org.apache.hadoop.hdds.HddsConfigKeys.HDDS_BACKUP_KEY_CERT_DIR_NAME_SUFFIX; +import static org.apache.hadoop.hdds.HddsConfigKeys.HDDS_NEW_KEY_CERT_DIR_NAME_SUFFIX; +import static org.apache.hadoop.hdds.protocol.proto.SCMRatisProtocol.RequestType.CERT_ROTATE; + +/** + * Root CA Rotation Handler for ratis SCM statemachine. + */ +public class RootCARotationHandlerImpl implements RootCARotationHandler { + + public static final Logger LOG = + LoggerFactory.getLogger(RootCARotationHandlerImpl.class); + + private final StorageContainerManager scm; + private final SCMCertificateClient scmCertClient; + private final SecurityConfig secConfig; + private Set newScmCertIdSet = new HashSet<>(); + private final String newSubCAPath; + private final RootCARotationManager rotationManager; + private AtomicReference newSubCACertId = new AtomicReference(); + private AtomicReference newRootCACertId = new AtomicReference(); + + /** + * Constructs RootCARotationHandlerImpl with the specified arguments. + * + * @param scm the storage container manager + */ + public RootCARotationHandlerImpl(StorageContainerManager scm, + RootCARotationManager manager) { + this.scm = scm; + this.rotationManager = manager; + this.scmCertClient = (SCMCertificateClient) scm.getScmCertificateClient(); + this.secConfig = scmCertClient.getSecurityConfig(); + + this.newSubCAPath = secConfig.getLocation( + scmCertClient.getComponentName()).toString() + + HDDS_NEW_KEY_CERT_DIR_NAME_SUFFIX; + } + + @Override + public void rotationPrepare(String rootCertId) + throws IOException { + LOG.info("Received rotation prepare command of root certificate {}", + rootCertId); + if (rotationManager.shouldSkipRootCert(rootCertId)) { + return; + } + + newRootCACertId.set(rootCertId); + newScmCertIdSet.clear(); + newSubCACertId.set(null); + rotationManager.scheduleSubCaRotationPrepareTask(rootCertId); + } + + @Override + public void rotationPrepareAck(String rootCertId, + String scmCertId, String scmId) throws IOException { + LOG.info("Received rotation prepare ack of root certificate {} from scm {}", + rootCertId, scmId); + + // Only leader count the acks + if (rotationManager.isRunning()) { + if (rotationManager.shouldSkipRootCert(rootCertId)) { + return; + } + if (rootCertId.equals(newRootCACertId.get())) { + newScmCertIdSet.add(scmCertId); + } + } + } + + @Override + public void rotationCommit(String rootCertId) + throws IOException { + LOG.info("Received rotation commit command of root certificate {}", + rootCertId); + if (rotationManager.shouldSkipRootCert(rootCertId)) { + return; + } + + // switch sub CA key and certs directory on disk + File currentSubCaDir = new File(secConfig.getLocation( + scmCertClient.getComponentName()).toString()); + File backupSubCaDir = new File(secConfig.getLocation( + scmCertClient.getComponentName() + + HDDS_BACKUP_KEY_CERT_DIR_NAME_SUFFIX).toString()); + File newSubCaDir = new File(newSubCAPath); + + try { + // move current -> backup + Files.move(currentSubCaDir.toPath(), backupSubCaDir.toPath(), + StandardCopyOption.ATOMIC_MOVE, StandardCopyOption.REPLACE_EXISTING); + } catch (IOException e) { + LOG.error("Failed to move {} to {}", currentSubCaDir, backupSubCaDir, e); + String message = "Terminate SCM, encounter IO exception(" + + e.getMessage() + ") when move " + currentSubCaDir + " to " + + backupSubCaDir; + scm.shutDown(message); + } + + try { + // move new -> current + Files.move(newSubCaDir.toPath(), currentSubCaDir.toPath(), + StandardCopyOption.ATOMIC_MOVE, StandardCopyOption.REPLACE_EXISTING); + } catch (IOException e) { + LOG.error("Failed to move {} to {}", newSubCaDir, currentSubCaDir, e); + String message = "Terminate SCM, encounter IO exception(" + + e.getMessage() + ") when move " + newSubCaDir + " to " + + currentSubCaDir; + scm.shutDown(message); + } + + try { + String certId = newSubCACertId.get(); + LOG.info("Persistent new scm certificate {}", certId); + scm.getScmStorageConfig().setScmCertSerialId(certId); + scm.getScmStorageConfig().persistCurrentState(); + } catch (IOException e) { + LOG.error("Failed to persist new SCM certificate ID", e); + String message = "Terminate SCM, encounter IO exception(" + + e.getMessage() + ") when persist new SCM certificate ID"; + scm.shutDown(message); + } + } + + @Override + public void rotationCommitted(String rootCertId) + throws IOException { + LOG.info("Received rotation committed command of root certificate {}", + rootCertId); + if (rotationManager.shouldSkipRootCert(rootCertId)) { + return; + } + + // turn on new root CA certificate and sub CA certificate + scmCertClient.reloadKeyAndCertificate(newSubCACertId.get()); + + // cleanup backup directory + File backupSubCaDir = new File(secConfig.getLocation( + scmCertClient.getComponentName() + + HDDS_BACKUP_KEY_CERT_DIR_NAME_SUFFIX).toString()); + try { + FileUtils.deleteDirectory(backupSubCaDir); + } catch (IOException e) { + LOG.error("Failed to delete backup dir {}", backupSubCaDir, e); + } + + // reset state + newSubCACertId.set(null); + } + + @Override + public int rotationPrepareAcks() { + return newScmCertIdSet.size(); + } + + @Override + public void resetRotationPrepareAcks() { + newScmCertIdSet.clear(); + } + + @Override + public void setSubCACertId(String subCACertId) { + newSubCACertId.set(subCACertId); + LOG.info("Scm sub CA new certificate is {}", subCACertId); + } + + /** + * Builder for RootCARotationHandlerImpl. + */ + public static class Builder { + private StorageContainerManager scm; + private SCMRatisServer ratisServer; + private RootCARotationManager rootCARotationManager; + + public Builder setRatisServer( + final SCMRatisServer scmRatisServer) { + this.ratisServer = scmRatisServer; + return this; + } + + public Builder setStorageContainerManager( + final StorageContainerManager storageContainerManager) { + scm = storageContainerManager; + return this; + } + + public Builder setRootCARotationManager( + final RootCARotationManager manager) { + rootCARotationManager = manager; + return this; + } + + public RootCARotationHandler build() { + final RootCARotationHandler impl = + new RootCARotationHandlerImpl(scm, rootCARotationManager); + + final SCMHAInvocationHandler invocationHandler + = new SCMHAInvocationHandler(CERT_ROTATE, impl, ratisServer); + + return (RootCARotationHandler) Proxy.newProxyInstance( + SCMHAInvocationHandler.class.getClassLoader(), + new Class[]{RootCARotationHandler.class}, + invocationHandler); + } + } +} diff --git a/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/security/RootCARotationManager.java b/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/security/RootCARotationManager.java index bbd883cb6053..61e032ea0b0e 100644 --- a/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/security/RootCARotationManager.java +++ b/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/security/RootCARotationManager.java @@ -18,52 +18,117 @@ package org.apache.hadoop.hdds.scm.security; +import com.google.common.annotations.VisibleForTesting; +import com.google.common.base.Preconditions; import com.google.common.util.concurrent.ThreadFactoryBuilder; +import org.apache.commons.io.FileUtils; import org.apache.hadoop.hdds.conf.OzoneConfiguration; +import org.apache.hadoop.hdds.protocol.proto.HddsProtos; +import org.apache.hadoop.hdds.scm.ha.HASecurityUtils; import org.apache.hadoop.hdds.scm.ha.SCMContext; import org.apache.hadoop.hdds.scm.ha.SCMService; import org.apache.hadoop.hdds.scm.ha.SCMServiceException; +import org.apache.hadoop.hdds.scm.ha.SequenceIdGenerator; +import org.apache.hadoop.hdds.scm.server.SCMStorageConfig; import org.apache.hadoop.hdds.scm.server.StorageContainerManager; import org.apache.hadoop.hdds.security.SecurityConfig; -import org.apache.hadoop.hdds.security.x509.certificate.client.CertificateClient; +import org.apache.hadoop.hdds.security.x509.certificate.authority.CertificateServer; +import org.apache.hadoop.hdds.security.x509.certificate.authority.profile.DefaultCAProfile; +import org.apache.hadoop.hdds.security.x509.certificate.client.SCMCertificateClient; +import org.apache.hadoop.hdds.security.x509.certificate.utils.CertificateCodec; +import org.apache.hadoop.hdds.security.x509.certificate.utils.CertificateSignRequest; +import org.apache.hadoop.hdds.security.x509.keys.HDDSKeyGenerator; +import org.apache.hadoop.hdds.security.x509.keys.KeyCodec; +import org.bouncycastle.cert.X509CertificateHolder; import org.slf4j.Logger; import org.slf4j.LoggerFactory; +import java.io.File; +import java.io.IOException; +import java.math.BigInteger; +import java.nio.file.Files; +import java.nio.file.Path; +import java.nio.file.Paths; +import java.nio.file.StandardCopyOption; +import java.security.KeyPair; +import java.security.cert.CertificateException; import java.security.cert.X509Certificate; import java.time.Duration; import java.time.LocalDateTime; import java.time.ZoneId; import java.util.Date; +import java.util.List; import java.util.concurrent.Executors; import java.util.concurrent.ScheduledExecutorService; +import java.util.concurrent.ScheduledFuture; import java.util.concurrent.TimeUnit; import java.util.concurrent.atomic.AtomicBoolean; +import java.util.concurrent.atomic.AtomicReference; + +import static org.apache.hadoop.hdds.HddsConfigKeys.HDDS_NEW_KEY_CERT_DIR_NAME_PROGRESS_SUFFIX; +import static org.apache.hadoop.hdds.HddsConfigKeys.HDDS_NEW_KEY_CERT_DIR_NAME_SUFFIX; +import static org.apache.hadoop.hdds.HddsConfigKeys.HDDS_X509_DIR_NAME_DEFAULT; +import static org.apache.hadoop.hdds.scm.ha.SequenceIdGenerator.ROOT_CERTIFICATE_ID; +import static org.apache.hadoop.hdds.security.x509.certificate.authority.CertificateStore.CertType.VALID_CERTS; +import static org.apache.hadoop.ozone.OzoneConsts.SCM_ROOT_CA_COMPONENT_NAME; /** - * Root CA Rotation Manager is a service in SCM to control the CA rotation. + * Root CA Rotation Service is a service in SCM to control the CA rotation. */ public class RootCARotationManager implements SCMService { public static final Logger LOG = LoggerFactory.getLogger(RootCARotationManager.class); - private StorageContainerManager scm; + private final StorageContainerManager scm; + private final OzoneConfiguration ozoneConf; + private final SecurityConfig secConf; private final SCMContext scmContext; - private OzoneConfiguration ozoneConf; - private SecurityConfig secConf; - private ScheduledExecutorService executorService; - private Duration checkInterval; - private Duration renewalGracePeriod; - private Date timeOfDay; - private CertificateClient scmCertClient; - private AtomicBoolean isRunning = new AtomicBoolean(false); - private AtomicBoolean isScheduled = new AtomicBoolean(false); - private String threadName = this.getClass().getSimpleName(); + private final ScheduledExecutorService executorService; + private final Duration checkInterval; + private final Duration renewalGracePeriod; + private final Date timeOfDay; + private final Duration ackTimeout; + private final SCMCertificateClient scmCertClient; + private final AtomicBoolean isRunning = new AtomicBoolean(false); + private final AtomicBoolean isProcessing = new AtomicBoolean(false); + private final AtomicReference processStartTime = + new AtomicReference<>(); + private final String threadName = this.getClass().getSimpleName(); + private final String newCAComponent = SCM_ROOT_CA_COMPONENT_NAME + + HDDS_NEW_KEY_CERT_DIR_NAME_SUFFIX + + HDDS_NEW_KEY_CERT_DIR_NAME_PROGRESS_SUFFIX; + + private RootCARotationHandler handler; + private final SequenceIdGenerator sequenceIdGen; + private ScheduledFuture rotationTask; + private ScheduledFuture waitAckTask; + private ScheduledFuture waitAckTimeoutTask; + private final RootCARotationMetrics metrics; /** * Constructs RootCARotationManager with the specified arguments. * * @param scm the storage container manager + * + * (1) (3)(4) + * ---------------------------> + * (2) scm2(Follower) + * (1) (3)(4) <--------------------------- + * ------- | + * | \ | + * -----> scm1(Leader) + * \ (2) | \ + * -------> \ (1) (3)(4) + * ---------------------------> + * (2) scm3(Follower) + * <--------------------------- + * + * + * (1) Rotation Prepare + * (2) Rotation Prepare Ack + * (3) Rotation Commit + * (4) Rotation Committed */ public RootCARotationManager(StorageContainerManager scm) { this.scm = scm; @@ -72,16 +137,24 @@ public RootCARotationManager(StorageContainerManager scm) { this.scmContext = scm.getScmContext(); checkInterval = secConf.getCaCheckInterval(); + ackTimeout = secConf.getCaAckTimeout(); + renewalGracePeriod = secConf.getRenewalGracePeriod(); timeOfDay = Date.from(LocalDateTime.parse(secConf.getCaRotationTimeOfDay()) .atZone(ZoneId.systemDefault()).toInstant()); - renewalGracePeriod = secConf.getRenewalGracePeriod(); executorService = Executors.newScheduledThreadPool(1, new ThreadFactoryBuilder().setNameFormat(threadName) .setDaemon(true).build()); - scmCertClient = scm.getScmCertificateClient(); + scmCertClient = (SCMCertificateClient) scm.getScmCertificateClient(); + sequenceIdGen = scm.getSequenceIdGen(); + handler = new RootCARotationHandlerImpl.Builder() + .setRatisServer(scm.getScmHAManager().getRatisServer()) + .setStorageContainerManager(scm) + .setRootCARotationManager(this) + .build(); scm.getSCMServiceManager().register(this); + metrics = RootCARotationMetrics.create(); } /** @@ -96,6 +169,17 @@ public void notifyStatusChanged() { if (!scmContext.isLeader() || scmContext.isInSafeMode()) { if (isRunning.compareAndSet(true, false)) { LOG.info("notifyStatusChanged: disable monitor task."); + if (rotationTask != null) { + rotationTask.cancel(true); + } + if (waitAckTask != null) { + waitAckTask.cancel(true); + } + if (waitAckTimeoutTask != null) { + waitAckTask.cancel(true); + } + isProcessing.set(false); + processStartTime.set(null); } return; } @@ -130,8 +214,8 @@ public String getServiceName() { @Override public void start() throws SCMServiceException { executorService.scheduleAtFixedRate( - new MonitorTask(scmCertClient), 0, checkInterval.toMillis(), - TimeUnit.MILLISECONDS); + new MonitorTask(scmCertClient, scm.getScmStorageConfig()), + 0, checkInterval.toMillis(), TimeUnit.MILLISECONDS); LOG.info("Monitor task for root certificate {} is started with " + "interval {}.", scmCertClient.getCACertificate().getSerialNumber(), checkInterval); @@ -141,56 +225,80 @@ public boolean isRunning() { return isRunning.get(); } + public void scheduleSubCaRotationPrepareTask(String rootCertId) { + executorService.schedule(new SubCARotationPrepareTask(rootCertId), 0, + TimeUnit.MILLISECONDS); + } + + public boolean isRotationInProgress() { + return isProcessing.get(); + } + /** * Task to monitor certificate lifetime and start rotation if needed. */ public class MonitorTask implements Runnable { - private CertificateClient certClient; + private SCMCertificateClient certClient; + private SCMStorageConfig scmStorageConfig; - public MonitorTask(CertificateClient client) { + public MonitorTask(SCMCertificateClient client, + SCMStorageConfig storageConfig) { this.certClient = client; + this.scmStorageConfig = storageConfig; } @Override public void run() { Thread.currentThread().setName(threadName + (isRunning() ? "-Active" : "-Inactive")); - if (!isRunning.get() || isScheduled.get()) { + if (!isRunning.get()) { return; } // Lock to protect the root CA certificate rotation process, // to make sure there is only one task is ongoing at one time. synchronized (RootCARotationManager.class) { - X509Certificate rootCACert = certClient.getCACertificate(); - Duration timeLeft = timeBefore2ExpiryGracePeriod(rootCACert); - if (timeLeft.isZero()) { - LOG.info("Root certificate {} has entered the 2 * expiry" + - " grace period({}).", rootCACert.getSerialNumber().toString(), - renewalGracePeriod); - // schedule root CA rotation task - LocalDateTime now = LocalDateTime.now(); - LocalDateTime timeToSchedule = LocalDateTime.of( - now.getYear(), now.getMonthValue(), now.getDayOfMonth(), - timeOfDay.getHours(), timeOfDay.getMinutes(), - timeOfDay.getSeconds()); - if (timeToSchedule.isBefore(now)) { - timeToSchedule = timeToSchedule.plusDays(1); - } - long delay = Duration.between(now, timeToSchedule).toMillis(); - if (timeToSchedule.isAfter(rootCACert.getNotAfter().toInstant() - .atZone(ZoneId.systemDefault()).toLocalDateTime())) { - LOG.info("Configured rotation time {} is after root" + - " certificate {} end time {}. Start the rotation immediately.", - timeToSchedule, rootCACert.getSerialNumber().toString(), - rootCACert.getNotAfter()); - delay = 0; - } + if (isProcessing.get()) { + LOG.info("Root certificate rotation task is already running."); + return; + } + try { + X509Certificate rootCACert = certClient.getCACertificate(); + Duration timeLeft = timeBefore2ExpiryGracePeriod(rootCACert); + if (timeLeft.isZero()) { + LOG.info("Root certificate {} has entered the 2 * expiry" + + " grace period({}).", + rootCACert.getSerialNumber().toString(), renewalGracePeriod); + // schedule root CA rotation task + LocalDateTime now = LocalDateTime.now(); + LocalDateTime timeToSchedule = LocalDateTime.of( + now.getYear(), now.getMonthValue(), now.getDayOfMonth(), + timeOfDay.getHours(), timeOfDay.getMinutes(), + timeOfDay.getSeconds()); + if (timeToSchedule.isBefore(now)) { + timeToSchedule = timeToSchedule.plusDays(1); + } + long delay = Duration.between(now, timeToSchedule).toMillis(); + if (timeToSchedule.isAfter(rootCACert.getNotAfter().toInstant() + .atZone(ZoneId.systemDefault()).toLocalDateTime())) { + LOG.info("Configured rotation time {} is after root" + + " certificate {} end time {}. Start the rotation " + + "immediately.", timeToSchedule, + rootCACert.getSerialNumber().toString(), + rootCACert.getNotAfter()); + delay = 0; + } - executorService.schedule(new RotationTask(certClient), delay, - TimeUnit.MILLISECONDS); - isScheduled.set(true); - LOG.info("Root certificate {} rotation task is scheduled with {} ms " - + "delay", rootCACert.getSerialNumber().toString(), delay); + rotationTask = executorService.schedule( + new RotationTask(certClient, scmStorageConfig), delay, + TimeUnit.MILLISECONDS); + isProcessing.set(true); + metrics.incrTotalRotationNum(); + LOG.info("Root certificate {} rotation task is scheduled with {} ms" + + " delay", rootCACert.getSerialNumber().toString(), delay); + } + } catch (Throwable e) { + LOG.error("Error while scheduling root CA rotation task", e); + scm.shutDown("Error while scheduling root CA rotation task"); } } } @@ -200,16 +308,20 @@ public void run() { * Task to rotate root certificate. */ public class RotationTask implements Runnable { - private CertificateClient certClient; + private SCMCertificateClient certClient; + private SCMStorageConfig scmStorageConfig; - public RotationTask(CertificateClient client) { + public RotationTask(SCMCertificateClient client, + SCMStorageConfig storageConfig) { this.certClient = client; + this.scmStorageConfig = storageConfig; } @Override public void run() { if (!isRunning.get()) { - isScheduled.set(false); + isProcessing.set(false); + processStartTime.set(null); return; } // Lock to protect the root CA certificate rotation process, @@ -218,25 +330,119 @@ public void run() { // 1. generate new root CA keys and certificate, persist to disk // 2. start new Root CA server // 3. send scm Sub-CA rotation preparation request through RATIS - // 4. send scm Sub-CA rotation commit request through RATIS - // 5. send scm Sub-CA rotation finish request through RATIS + // 4. wait for all SCM to ack + // 5. send scm Sub-CA rotation commit request through RATIS + // 6. send scm Sub-CA rotation finish request through RATIS synchronized (RootCARotationManager.class) { X509Certificate rootCACert = certClient.getCACertificate(); Duration timeLeft = timeBefore2ExpiryGracePeriod(rootCACert); if (timeLeft.isZero()) { LOG.info("Root certificate {} rotation is started.", rootCACert.getSerialNumber().toString()); - // TODO: start the root CA rotation process + processStartTime.set(System.nanoTime()); + // generate new root key pair and persist new root certificate + CertificateServer newRootCAServer = null; + BigInteger newId = BigInteger.ONE; + try { + newId = new BigInteger(String.valueOf( + sequenceIdGen.getNextId(ROOT_CERTIFICATE_ID))); + newRootCAServer = + HASecurityUtils.initializeRootCertificateServer(secConf, + scm.getCertificateStore(), scmStorageConfig, newId, + new DefaultCAProfile(), newCAComponent); + } catch (Throwable e) { + LOG.error("Error while generating new root CA certificate " + + "under {}", newCAComponent, e); + String message = "Terminate SCM, encounter exception(" + + e.getMessage() + ") when generating new root CA certificate " + + "under " + newCAComponent; + cleanupAndStop(message); + scm.shutDown(message); + } + + String newRootCertId = ""; + X509CertificateHolder newRootCertificate; + try { + // prevent findbugs false alert + if (newRootCAServer == null) { + throw new Exception("New root CA server should not be null"); + } + newRootCertificate = newRootCAServer.getCACertificate(); + newRootCertId = newRootCertificate.getSerialNumber().toString(); + Preconditions.checkState(newRootCertId.equals(newId.toString()), + "Root certificate doesn't match, " + + "expected:" + newId + ", fetched:" + newRootCertId); + scm.getSecurityProtocolServer() + .setRootCertificateServer(newRootCAServer); + + + if (isRunning()) { + checkInterruptState(); + handler.rotationPrepare(newRootCertId); + LOG.info("Send out sub CA rotation prepare request for new " + + "root certificate {}", newRootCertId); + } else { + LOG.info("SCM is not leader anymore. Delete the in-progress " + + "root CA directory"); + cleanupAndStop("SCM is not leader anymore"); + return; + } + } catch (Exception e) { + LOG.error("Error while sending rotation prepare request", e); + cleanupAndStop("Error while sending rotation prepare request"); + return; + } + + // schedule task to wait for prepare acks + waitAckTask = executorService.scheduleAtFixedRate( + new WaitSubCARotationPrepareAckTask(newRootCertificate), + 1, 1, TimeUnit.SECONDS); + waitAckTimeoutTask = executorService.schedule(() -> { + // No enough acks are received + waitAckTask.cancel(true); + String msg = "Failed to receive all acks of rotation prepare" + + " after " + ackTimeout + ", received " + + handler.rotationPrepareAcks() + " acks"; + cleanupAndStop(msg); + }, ackTimeout.toMillis(), TimeUnit.MILLISECONDS); } else { LOG.warn("Root certificate {} hasn't entered the 2 * expiry" + " grace period {}. Skip root certificate rotation this time.", rootCACert.getSerialNumber().toString(), renewalGracePeriod); + isProcessing.set(false); + processStartTime.set(null); } } - isScheduled.set(false); } } + private void checkInterruptState() { + // check whether thread is interrupted(cancelled) before + // time-consuming ratis request + if (Thread.currentThread().isInterrupted()) { + cleanupAndStop(this.getClass().getSimpleName() + + " is interrupted"); + return; + } + } + + private void cleanupAndStop(String reason) { + try { + scm.getSecurityProtocolServer().setRootCertificateServer(null); + + FileUtils.deleteDirectory(new File(scmCertClient.getSecurityConfig() + .getLocation(newCAComponent).toString())); + LOG.info("In-progress root CA directory {} is deleted for '{}'", + scmCertClient.getSecurityConfig().getLocation(newCAComponent), + reason); + } catch (IOException ex) { + LOG.error("Error when deleting in-progress root CA directory {} for {}", + scmCertClient.getSecurityConfig().getLocation(newCAComponent), reason, + ex); + } + isProcessing.set(false); + processStartTime.set(null); + } /** * Calculate time before root certificate will enter 2 * expiry grace period. * @return Duration, time before certificate enters the 2 * grace @@ -255,19 +461,254 @@ public Duration timeBefore2ExpiryGracePeriod(X509Certificate certificate) { } } + /** + * Task to generate sub-ca key and certificate. + */ + public class SubCARotationPrepareTask implements Runnable { + private String rootCACertId; + + public SubCARotationPrepareTask(String newRootCertId) { + this.rootCACertId = newRootCertId; + } + + @Override + public void run() { + // Lock to protect the sub CA certificate rotation preparation process, + // to make sure there is only one task is ongoing at one time. + // Sub CA rotation preparation steps: + // 1. generate new sub CA keys + // 2. send CSR to leader SCM + // 3. wait CSR response and persist the certificate to disk + synchronized (RootCARotationManager.class) { + try { + LOG.info("SubCARotationPrepareTask[rootCertId = {}] - started.", + rootCACertId); + + if (shouldSkipRootCert(rootCACertId)) { + // Send ack to rotationPrepare request + sendRotationPrepareAck(rootCACertId, + scmCertClient.getCertificate().getSerialNumber().toString()); + return; + } + + SecurityConfig securityConfig = + scmCertClient.getSecurityConfig(); + String progressComponent = SCMCertificateClient.COMPONENT_NAME + + HDDS_NEW_KEY_CERT_DIR_NAME_SUFFIX + + HDDS_NEW_KEY_CERT_DIR_NAME_PROGRESS_SUFFIX; + final String newSubCAProgressPath = + securityConfig.getLocation(progressComponent).toString(); + final String newSubCAPath = securityConfig.getLocation( + SCMCertificateClient.COMPONENT_NAME + + HDDS_NEW_KEY_CERT_DIR_NAME_SUFFIX).toString(); + + File newProgressDir = new File(newSubCAProgressPath); + File newDir = new File(newSubCAPath); + try { + FileUtils.deleteDirectory(newProgressDir); + FileUtils.deleteDirectory(newDir); + Files.createDirectories(newProgressDir.toPath()); + } catch (IOException e) { + LOG.error("Failed to delete and create {}, or delete {}", + newProgressDir, newDir, e); + String message = "Terminate SCM, encounter IO exception(" + + e.getMessage() + ") when deleting and create directory"; + scm.shutDown(message); + } + + // Generate key + Path keyDir = securityConfig.getKeyLocation(progressComponent); + KeyCodec keyCodec = new KeyCodec(securityConfig, keyDir); + KeyPair newKeyPair = null; + try { + HDDSKeyGenerator keyGenerator = + new HDDSKeyGenerator(securityConfig); + newKeyPair = keyGenerator.generateKey(); + keyCodec.writePublicKey(newKeyPair.getPublic()); + keyCodec.writePrivateKey(newKeyPair.getPrivate()); + LOG.info("SubCARotationPrepareTask[rootCertId = {}] - " + + "scm key generated.", rootCACertId); + } catch (Exception e) { + LOG.error("Failed to generate key under {}", newProgressDir, e); + String message = "Terminate SCM, encounter exception(" + + e.getMessage() + ") when generating new key under " + + newProgressDir; + scm.shutDown(message); + } + + checkInterruptState(); + // Get certificate signed + String newCertSerialId = ""; + try { + CertificateSignRequest.Builder csrBuilder = + scmCertClient.getCSRBuilder(); + csrBuilder.setKey(newKeyPair); + newCertSerialId = scmCertClient.signAndStoreCertificate( + csrBuilder.build(), + Paths.get(newSubCAProgressPath, HDDS_X509_DIR_NAME_DEFAULT), + true); + LOG.info("SubCARotationPrepareTask[rootCertId = {}] - " + + "scm certificate {} signed.", rootCACertId, newCertSerialId); + } catch (Exception e) { + LOG.error("Failed to generate certificate under {}", + newProgressDir, e); + String message = "Terminate SCM, encounter exception(" + + e.getMessage() + ") when generating new certificate " + + newProgressDir; + scm.shutDown(message); + } + + // move dir from *-next-progress to *-next + try { + Files.move(newProgressDir.toPath(), newDir.toPath(), + StandardCopyOption.ATOMIC_MOVE, + StandardCopyOption.REPLACE_EXISTING); + } catch (IOException e) { + LOG.error("Failed to move {} to {}", + newSubCAProgressPath, newSubCAPath, e); + String message = "Terminate SCM, encounter exception(" + + e.getMessage() + ") when moving " + newSubCAProgressPath + + " to " + newSubCAPath; + scm.shutDown(message); + } + + // Send ack to rotationPrepare request + checkInterruptState(); + sendRotationPrepareAck(rootCACertId, newCertSerialId); + } catch (Throwable e) { + LOG.error("Unexpected error happen", e); + scm.shutDown("Unexpected error happen, " + e.getMessage()); + } + } + } + } + + private void sendRotationPrepareAck(String newRootCACertId, + String newSubCACertId) { + // Send ack to rotationPrepare request + try { + handler.rotationPrepareAck(newRootCACertId, newSubCACertId, + scm.getScmId()); + LOG.info("SubCARotationPrepareTask[rootCertId = {}] - " + + "rotation prepare ack sent out, new scm certificate {}", + newRootCACertId, newSubCACertId); + } catch (Exception e) { + LOG.error("Failed to send ack to rotationPrepare request", e); + String message = "Terminate SCM, encounter exception(" + + e.getMessage() + ") when sending out rotationPrepare ack"; + scm.shutDown(message); + } + + handler.setSubCACertId(newSubCACertId); + } + + /** + * Task to wait the all acks of prepare request. + */ + public class WaitSubCARotationPrepareAckTask implements Runnable { + private String rootCACertId; + private X509CertificateHolder rootCACertHolder; + + public WaitSubCARotationPrepareAckTask( + X509CertificateHolder rootCertHolder) { + this.rootCACertHolder = rootCertHolder; + this.rootCACertId = rootCertHolder.getSerialNumber().toString(); + } + + @Override + public void run() { + checkInterruptState(); + if (!isRunning()) { + LOG.info("SCM is not leader anymore. Delete the in-progress " + + "root CA directory"); + cleanupAndStop("SCM is not leader anymore"); + return; + } + + synchronized (RootCARotationManager.class) { + int numFromHADetails = + scm.getSCMHANodeDetails().getPeerNodeDetails().size() + 1; + int numFromRatisServer = scm.getScmHAManager().getRatisServer() + .getDivision().getRaftConf().getCurrentPeers().size(); + LOG.info("numFromHADetails {}, numFromRatisServer {}", + numFromHADetails, numFromRatisServer); + if (handler.rotationPrepareAcks() == numFromRatisServer) { + // all acks are received. + try { + waitAckTimeoutTask.cancel(true); + handler.rotationCommit(rootCACertId); + handler.rotationCommitted(rootCACertId); + + metrics.incrSuccessRotationNum(); + long timeTaken = System.nanoTime() - processStartTime.get(); + metrics.setSuccessTimeInNs(timeTaken); + processStartTime.set(null); + + // save root certificate to certStore + try { + if (scm.getCertificateStore().getCertificateByID( + rootCACertHolder.getSerialNumber(), VALID_CERTS) == null) { + LOG.info("Persist root certificate {} to cert store", + rootCACertId); + scm.getCertificateStore().storeValidCertificate( + rootCACertHolder.getSerialNumber(), + CertificateCodec.getX509Certificate(rootCACertHolder), + HddsProtos.NodeType.SCM); + } + } catch (CertificateException | IOException e) { + LOG.error("Failed to save root certificate {} to cert store", + rootCACertId); + scm.shutDown("Failed to save root certificate to cert store"); + } + + // reset state + handler.resetRotationPrepareAcks(); + String msg = "Root certificate " + rootCACertId + + " rotation is finished successfully after " + timeTaken + " ns"; + cleanupAndStop(msg); + } catch (Throwable e) { + LOG.error("Execution error", e); + handler.resetRotationPrepareAcks(); + cleanupAndStop("Execution error, " + e.getMessage()); + } finally { + waitAckTask.cancel(true); + } + } + } + } + } + /** * Stops scheduled monitor task. */ @Override public void stop() { - try { - executorService.shutdown(); - if (!executorService.awaitTermination(3, TimeUnit.SECONDS)) { - executorService.shutdownNow(); - } - } catch (InterruptedException ie) { - // Ignore, we don't really care about the failure. - Thread.currentThread().interrupt(); + if (metrics != null) { + metrics.unRegister(); + } + + if (executorService != null) { + executorService.shutdownNow(); + } + } + + @VisibleForTesting + public void setRootCARotationHandler(RootCARotationHandler newHandler) { + handler = newHandler; + } + + public boolean shouldSkipRootCert(String newRootCertId) throws IOException { + List scmCertChain = scmCertClient.getTrustChain(); + Preconditions.checkArgument(scmCertChain.size() > 1); + X509Certificate rootCert = scmCertChain.get(scmCertChain.size() - 1); + if (rootCert.getSerialNumber().compareTo(new BigInteger(newRootCertId)) + >= 0) { + // usually this will happen when reapply RAFT log during SCM start + LOG.info("Sub CA certificate {} is already signed by root " + + "certificate {} or a newer root certificate.", + scmCertChain.get(0).getSerialNumber().toString(), newRootCertId); + return true; } + return false; } } diff --git a/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/security/RootCARotationMetrics.java b/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/security/RootCARotationMetrics.java new file mode 100644 index 000000000000..fcd52d0ebd76 --- /dev/null +++ b/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/security/RootCARotationMetrics.java @@ -0,0 +1,93 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + */ + +package org.apache.hadoop.hdds.scm.security; + +import org.apache.hadoop.metrics2.MetricsSystem; +import org.apache.hadoop.metrics2.annotation.Metric; +import org.apache.hadoop.metrics2.annotation.Metrics; +import org.apache.hadoop.metrics2.lib.DefaultMetricsSystem; +import org.apache.hadoop.metrics2.lib.MutableCounterLong; +import org.apache.hadoop.metrics2.lib.MutableGaugeLong; + +/** + * Metrics related to Root CA rotation in SCM. + */ +@Metrics(name = "Root CA Rotation Metrics", about = "Metrics related to " + + "Root CA rotation in SCM", context = "SCM") +public final class RootCARotationMetrics { + public static final String NAME = + RootCARotationMetrics.class.getSimpleName(); + + private final MetricsSystem ms; + + @Metric(about = "Number of total tries, both successes and failures.") + private MutableCounterLong numTotalRotation; + + @Metric(about = "Number of successful rotations") + private MutableCounterLong numSuccessRotation; + + @Metric(about = "Time(nano second) spent on last successful rotation") + private MutableGaugeLong successTimeInNs; + + /** + * Create and register metrics named {@link RootCARotationMetrics#NAME} + * for {@link RootCARotationManager}. + * + * @return {@link RootCARotationMetrics} + */ + public static RootCARotationMetrics create() { + MetricsSystem metricsSystem = DefaultMetricsSystem.instance(); + return metricsSystem.register(NAME, "Root CA Rotation Metrics", + new RootCARotationMetrics(metricsSystem)); + } + + public void unRegister() { + MetricsSystem metricsSystem = DefaultMetricsSystem.instance(); + metricsSystem.unregisterSource(NAME); + } + + private RootCARotationMetrics(MetricsSystem ms) { + this.ms = ms; + } + + public MutableGaugeLong getSuccessTimeInNs() { + return successTimeInNs; + } + + public void setSuccessTimeInNs(long time) { + this.successTimeInNs.set(time); + } + + public void incrSuccessRotationNum() { + this.numSuccessRotation.incr(); + } + + public void incrTotalRotationNum() { + this.numTotalRotation.incr(); + } + + public long getSuccessRotationNum() { + return this.numSuccessRotation.value(); + } + + public long getTotalRotationNum() { + return this.numTotalRotation.value(); + } +} diff --git a/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/server/SCMCertStore.java b/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/server/SCMCertStore.java index f7fca6022555..b3dc7522b823 100644 --- a/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/server/SCMCertStore.java +++ b/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/server/SCMCertStore.java @@ -118,6 +118,8 @@ public void storeValidScmCertificate(BigInteger serialID, scmMetadataStore.getValidCertsTable().putWithBatch(batchOperation, serialID, certificate); scmMetadataStore.getStore().commitBatchOperation(batchOperation); + LOG.info("Scm certificate {} for {} is stored", serialID, + certificate.getSubjectDN()); } finally { lock.unlock(); } diff --git a/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/server/SCMClientProtocolServer.java b/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/server/SCMClientProtocolServer.java index 2c12656b1eb5..a344ad56fd1b 100644 --- a/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/server/SCMClientProtocolServer.java +++ b/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/server/SCMClientProtocolServer.java @@ -802,6 +802,14 @@ public void transferLeadership(String newLeaderId) if (!SCMHAUtils.isSCMHAEnabled(getScm().getConfiguration())) { throw new SCMException("SCM HA not enabled.", ResultCodes.INTERNAL_ERROR); } + + if (scm.getRootCARotationManager() != null && + scm.getRootCARotationManager().isRotationInProgress()) { + throw new SCMException(("Root CA and Sub CA rotation is in-progress." + + " Please try the operation later again."), + ResultCodes.CA_ROTATION_IN_PROGRESS); + } + boolean auditSuccess = true; final Map auditMap = Maps.newHashMap(); auditMap.put("newLeaderId", newLeaderId); diff --git a/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/server/SCMSecurityProtocolServer.java b/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/server/SCMSecurityProtocolServer.java index c576875603a2..6fd44a4c0f76 100644 --- a/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/server/SCMSecurityProtocolServer.java +++ b/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/server/SCMSecurityProtocolServer.java @@ -30,6 +30,7 @@ import java.util.List; import java.util.Objects; import java.util.Optional; +import java.util.Set; import java.util.UUID; import java.util.concurrent.ExecutionException; import java.util.concurrent.Future; @@ -61,6 +62,7 @@ import org.apache.hadoop.hdds.security.exception.SCMSecurityException; import org.apache.hadoop.hdds.security.symmetric.ManagedSecretKey; import org.apache.hadoop.hdds.security.symmetric.SecretKeyManager; +import org.apache.hadoop.hdds.security.x509.certificate.client.CertificateClient; import org.apache.hadoop.hdds.security.x509.crl.CRLInfo; import org.apache.hadoop.hdds.utils.HddsServerUtil; import org.apache.hadoop.hdds.scm.ScmConfig; @@ -100,30 +102,31 @@ public class SCMSecurityProtocolServer implements SCMSecurityProtocol, private static final Logger LOGGER = LoggerFactory .getLogger(SCMSecurityProtocolServer.class); - private final CertificateServer rootCertificateServer; + private CertificateServer rootCertificateServer; private final CertificateServer scmCertificateServer; - private final List rootCACertificateList; private final RPC.Server rpcServer; // HADOOP RPC SERVER private final SCMUpdateServiceGrpcServer grpcUpdateServer; // gRPC SERVER private final InetSocketAddress rpcAddress; private final ProtocolMessageMetrics metrics; private final ProtocolMessageMetrics secretKeyMetrics; private final StorageContainerManager storageContainerManager; + private final CertificateClient scmCertificateClient; // SecretKey may not be enabled when neither block token nor container // token is enabled. private final SecretKeyManager secretKeyManager; SCMSecurityProtocolServer(OzoneConfiguration conf, - CertificateServer rootCertificateServer, + @Nullable CertificateServer rootCertificateServer, CertificateServer scmCertificateServer, - List rootCACertList, StorageContainerManager scm, + CertificateClient scmCertClient, + StorageContainerManager scm, @Nullable SecretKeyManager secretKeyManager) throws IOException { this.storageContainerManager = scm; this.rootCertificateServer = rootCertificateServer; this.scmCertificateServer = scmCertificateServer; - this.rootCACertificateList = rootCACertList; + this.scmCertificateClient = scmCertClient; this.secretKeyManager = secretKeyManager; final int handlerCount = conf.getInt(ScmConfigKeys.OZONE_SCM_SECURITY_HANDLER_COUNT_KEY, @@ -187,6 +190,12 @@ public String getDataNodeCertificate( LOGGER.info("Processing CSR for dn {}, UUID: {}", dnDetails.getHostName(), dnDetails.getUuid()); Objects.requireNonNull(dnDetails); + if (storageContainerManager.getRootCARotationManager() + .isRotationInProgress()) { + throw new SCMException(("Root CA and Sub CA rotation is in-progress." + + " Please try the operation later again."), + SCMException.ResultCodes.CA_ROTATION_IN_PROGRESS); + } return getEncodedCertToString(certSignReq, NodeType.DATANODE); } @@ -198,6 +207,12 @@ public String getCertificate( nodeDetails.getNodeType(), nodeDetails.getHostName(), nodeDetails.getUuid()); Objects.requireNonNull(nodeDetails); + if (storageContainerManager.getRootCARotationManager() + .isRotationInProgress()) { + throw new SCMException(("Root CA and Sub CA rotation is in-progress." + + " Please try the operation later again."), + SCMException.ResultCodes.CA_ROTATION_IN_PROGRESS); + } return getEncodedCertToString(certSignReq, nodeDetails.getNodeType()); } @@ -236,9 +251,12 @@ private void validateSecretKeyStatus() throws SCMSecretKeyException { @Override public synchronized List getAllRootCaCertificates() throws IOException { - List pemEncodedList = - new ArrayList<>(rootCACertificateList.size()); - for (X509Certificate cert : rootCACertificateList) { + List pemEncodedList = new ArrayList<>(); + Set certList = + scmCertificateClient.getAllRootCaCerts().size() == 0 ? + scmCertificateClient.getAllCaCerts() : + scmCertificateClient.getAllRootCaCerts(); + for (X509Certificate cert : certList) { pemEncodedList.add(getPEMEncodedString(cert)); } return pemEncodedList; @@ -257,20 +275,39 @@ public String getOMCertificate(OzoneManagerDetailsProto omDetails, LOGGER.info("Processing CSR for om {}, UUID: {}", omDetails.getHostName(), omDetails.getUuid()); Objects.requireNonNull(omDetails); + if (storageContainerManager.getRootCARotationManager() + .isRotationInProgress()) { + throw new SCMException(("Root CA and Sub CA rotation is in-progress." + + " Please try the operation later again."), + SCMException.ResultCodes.CA_ROTATION_IN_PROGRESS); + } return getEncodedCertToString(certSignReq, NodeType.OM); } - /** * Get signed certificate for SCM Node. * * @param scmNodeDetails - SCM Node Details. - * @param certSignReq - Certificate signing request. - * @return String - SCM signed pem encoded certificate. + * @param certSignReq - Certificate signing request. + * @return String - SCM signed pem encoded certificate. */ @Override public String getSCMCertificate(ScmNodeDetailsProto scmNodeDetails, String certSignReq) throws IOException { + return getSCMCertificate(scmNodeDetails, certSignReq, false); + } + + /** + * Get signed certificate for SCM Node. + * + * @param scmNodeDetails - SCM Node Details. + * @param certSignReq - Certificate signing request. + * @param isRenew - if SCM is renewing certificate or not. + * @return String - SCM signed pem encoded certificate. + */ + @Override + public String getSCMCertificate(ScmNodeDetailsProto scmNodeDetails, + String certSignReq, boolean isRenew) throws IOException { Objects.requireNonNull(scmNodeDetails); // Check clusterID if (!storageContainerManager.getClusterId().equals( @@ -280,6 +317,13 @@ public String getSCMCertificate(ScmNodeDetailsProto scmNodeDetails, + storageContainerManager.getClusterId()); } + if (storageContainerManager.getRootCARotationManager() + .isRotationInProgress() && !isRenew) { + throw new SCMException(("Root CA and Sub CA rotation is in-progress." + + " Please try the operation later again."), + SCMException.ResultCodes.CA_ROTATION_IN_PROGRESS); + } + LOGGER.info("Processing CSR for scm {}, nodeId: {}", scmNodeDetails.getHostName(), scmNodeDetails.getScmNodeId()); @@ -293,8 +337,8 @@ public String getSCMCertificate(ScmNodeDetailsProto scmNodeDetails, * @return String - SCM signed pem encoded certificate. * @throws IOException */ - private String getEncodedCertToString(String certSignReq, NodeType nodeType) - throws IOException { + private synchronized String getEncodedCertToString(String certSignReq, + NodeType nodeType) throws IOException { Future future; if (nodeType == NodeType.SCM && rootCertificateServer != null) { future = rootCertificateServer.requestCertificate(certSignReq, @@ -417,25 +461,18 @@ public List listCACertificate() throws IOException { @Override public synchronized String getRootCACertificate() throws IOException { LOGGER.debug("Getting Root CA certificate."); - X509Certificate lastExpiringRootCa = null; - if (storageContainerManager.getScmStorageConfig() - .checkPrimarySCMIdInitialized()) { - Date lastCertDate = new Date(0); - for (X509Certificate cert : rootCACertificateList) { - if (cert.getNotAfter().after(lastCertDate)) { - lastCertDate = cert.getNotAfter(); - lastExpiringRootCa = cert; - } + if (rootCertificateServer != null) { + try { + return CertificateCodec.getPEMEncodedString( + rootCertificateServer.getCACertificate()); + } catch (CertificateException e) { + LOGGER.error("Failed to get root CA certificate", e); + throw new IOException("Failed to get root CA certificate", e); } } - if (lastExpiringRootCa == null) { - return null; - } - return CertificateCodec.getPEMEncodedString(lastExpiringRootCa); - } - public synchronized void addNewRootCa(X509Certificate rootCaCertToAdd) { - rootCACertificateList.add(rootCaCertToAdd); + return CertificateCodec.getPEMEncodedString( + scmCertificateClient.getCACertificate()); } @Override @@ -510,13 +547,16 @@ public void join() throws InterruptedException { getRpcServer().join(); LOGGER.info("Join gRPC server for SCMSecurityProtocolServer."); getGrpcUpdateServer().join(); - } - public CertificateServer getRootCertificateServer() { + public synchronized CertificateServer getRootCertificateServer() { return rootCertificateServer; } + public synchronized void setRootCertificateServer( + CertificateServer newServer) { + this.rootCertificateServer = newServer; + } public CertificateServer getScmCertificateServer() { return scmCertificateServer; diff --git a/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/server/StorageContainerManager.java b/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/server/StorageContainerManager.java index b3985c854ff5..cbf4f9618115 100644 --- a/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/server/StorageContainerManager.java +++ b/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/server/StorageContainerManager.java @@ -572,8 +572,10 @@ private void initializeCertificateClient() throws IOException { SCMSecurityProtocolClientSideTranslatorPB scmSecurityClient = getScmSecurityClientWithMaxRetry(configuration, getCurrentUser()); scmCertificateClient = new SCMCertificateClient( - securityConfig, scmSecurityClient, - scmStorageConfig.getScmCertSerialId()); + securityConfig, scmSecurityClient, scmStorageConfig.getScmId(), + scmStorageConfig.getClusterID(), + scmStorageConfig.getScmCertSerialId(), + getScmAddress(scmHANodeDetails, configuration).getHostName()); } } @@ -854,14 +856,14 @@ private void initializeCAnSecurityProtocol(OzoneConfiguration conf, final CertificateServer rootCertificateServer; // Start specific instance SCM CA server. - String subject = SCM_SUB_CA_PREFIX + + String subject = String.format(SCM_SUB_CA_PREFIX, System.nanoTime()) + InetAddress.getLocalHost().getHostName(); if (configurator.getCertificateServer() != null) { scmCertificateServer = configurator.getCertificateServer(); } else { scmCertificateServer = new DefaultCAServer(subject, scmStorageConfig.getClusterID(), scmStorageConfig.getScmId(), - certificateStore, new DefaultCAProfile(), + certificateStore, null, new DefaultCAProfile(), scmCertificateClient.getComponentName()); // INTERMEDIARY_CA which issues certs to DN and OM. scmCertificateServer.init(new SecurityConfig(configuration), @@ -896,20 +898,12 @@ certificateStore, new DefaultCAProfile(), SecretKeyManager secretKeyManager = secretKeyManagerService != null ? secretKeyManagerService.getSecretKeyManager() : null; - X509Certificate rootCaCert = scmCertificateClient == null ? null : - scmCertificateClient.getRootCACertificate() != null ? - scmCertificateClient.getRootCACertificate() : - scmCertificateClient.getCACertificate(); - List rootCaList = new ArrayList<>(); - if (rootCaCert != null) { - rootCaList.add(rootCaCert); - } // We need to pass getCACertificate as rootCA certificate, // as for SCM CA is root-CA. securityProtocolServer = new SCMSecurityProtocolServer(conf, rootCertificateServer, scmCertificateServer, - rootCaList, + scmCertificateClient, this, secretKeyManager); @@ -1160,8 +1154,7 @@ public static boolean scmBootstrap(OzoneConfiguration conf) scmStorageConfig.getScmId()); // Initialize security if security is enabled later. - initializeSecurityIfNeeded( - conf, scmhaNodeDetails, scmStorageConfig, false); + initializeSecurityIfNeeded(conf, scmStorageConfig, selfHostName, false); return true; } @@ -1185,9 +1178,7 @@ public static boolean scmBootstrap(OzoneConfiguration conf) } // Initialize security if security is enabled later. - initializeSecurityIfNeeded( - conf, scmhaNodeDetails, scmStorageConfig, false); - + initializeSecurityIfNeeded(conf, scmStorageConfig, selfHostName, false); } else { try { scmStorageConfig.setClusterId(fetchedId); @@ -1200,7 +1191,7 @@ public static boolean scmBootstrap(OzoneConfiguration conf) if (OzoneSecurityUtil.isSecurityEnabled(conf)) { HASecurityUtils.initializeSecurity(scmStorageConfig, config, - getScmAddress(scmhaNodeDetails, conf), false); + selfHostName, false); } scmStorageConfig.setPrimaryScmNodeId(scmInfo.getScmId()); scmStorageConfig.setSCMHAFlag(true); @@ -1222,14 +1213,13 @@ public static boolean scmBootstrap(OzoneConfiguration conf) * ScmStorageConfig does not have certificate serial id. */ private static void initializeSecurityIfNeeded( - OzoneConfiguration conf, SCMHANodeDetails scmhaNodeDetails, - SCMStorageConfig scmStorageConfig, boolean isPrimordial) - throws IOException { + OzoneConfiguration conf, SCMStorageConfig scmStorageConfig, + String scmHostname, boolean isPrimordial) throws IOException { // Initialize security if security is enabled later. if (OzoneSecurityUtil.isSecurityEnabled(conf) && scmStorageConfig.getScmCertSerialId() == null) { HASecurityUtils.initializeSecurity(scmStorageConfig, conf, - getScmAddress(scmhaNodeDetails, conf), isPrimordial); + scmHostname, isPrimordial); scmStorageConfig.forceInitialize(); LOG.info("SCM unsecure cluster is converted to secure cluster. " + "Persisted SCM Certificate SerialID {}", @@ -1272,7 +1262,7 @@ public static boolean scmInit(OzoneConfiguration conf, if (OzoneSecurityUtil.isSecurityEnabled(conf)) { HASecurityUtils.initializeSecurity(scmStorageConfig, conf, - getScmAddress(haDetails, conf), true); + getScmAddress(haDetails, conf).getHostName(), true); } // Ensure scmRatisServer#initialize() is called post scm storage @@ -1319,7 +1309,7 @@ public static boolean scmInit(OzoneConfiguration conf, final boolean isSCMHAEnabled = scmStorageConfig.isSCMHAEnabled(); // Initialize security if security is enabled later. - initializeSecurityIfNeeded(conf, haDetails, scmStorageConfig, true); + initializeSecurityIfNeeded(conf, scmStorageConfig, selfHostName, true); if (SCMHAUtils.isSCMHAEnabled(conf) && !isSCMHAEnabled) { SCMRatisServerImpl.initialize(scmStorageConfig.getClusterID(), @@ -1484,6 +1474,10 @@ public String getDatanodeRpcPort() { return addr == null ? "0" : Integer.toString(addr.getPort()); } + public CertificateStore getCertificateStore() { + return certificateStore; + } + /** * Start service. */ @@ -2148,6 +2142,13 @@ public boolean removePeerFromHARing(String scmId) throw new IOException("Cannot remove current leader."); } + if (rootCARotationManager != null && + rootCARotationManager.isRotationInProgress()) { + throw new SCMException(("Root CA and Sub CA rotation is in-progress." + + " Please try the operation later again."), + ResultCodes.CA_ROTATION_IN_PROGRESS); + } + Preconditions.checkNotNull(getScmHAManager().getRatisServer() .getDivision().getGroup()); diff --git a/hadoop-hdds/server-scm/src/test/java/org/apache/hadoop/hdds/scm/security/TestRootCARotationManager.java b/hadoop-hdds/server-scm/src/test/java/org/apache/hadoop/hdds/scm/security/TestRootCARotationManager.java index 636641999acc..ed3ce75874c3 100644 --- a/hadoop-hdds/server-scm/src/test/java/org/apache/hadoop/hdds/scm/security/TestRootCARotationManager.java +++ b/hadoop-hdds/server-scm/src/test/java/org/apache/hadoop/hdds/scm/security/TestRootCARotationManager.java @@ -22,10 +22,15 @@ import org.apache.hadoop.hdds.conf.OzoneConfiguration; import org.apache.hadoop.hdds.scm.container.TestContainerManagerImpl; import org.apache.hadoop.hdds.scm.ha.SCMContext; +import org.apache.hadoop.hdds.scm.ha.SCMHAManager; +import org.apache.hadoop.hdds.scm.ha.SCMRatisServerImpl; import org.apache.hadoop.hdds.scm.ha.SCMServiceManager; +import org.apache.hadoop.hdds.scm.ha.SequenceIdGenerator; +import org.apache.hadoop.hdds.scm.server.SCMSecurityProtocolServer; +import org.apache.hadoop.hdds.scm.server.SCMStorageConfig; import org.apache.hadoop.hdds.scm.server.StorageContainerManager; import org.apache.hadoop.hdds.security.SecurityConfig; -import org.apache.hadoop.hdds.security.x509.certificate.client.CertificateClient; +import org.apache.hadoop.hdds.security.x509.certificate.client.SCMCertificateClient; import org.apache.hadoop.hdds.security.x509.certificate.utils.SelfSignedCertificate; import org.apache.hadoop.security.ssl.KeyStoreTestUtil; import org.apache.ozone.test.GenericTestUtils; @@ -38,7 +43,9 @@ import java.io.File; import java.io.IOException; +import java.math.BigInteger; import java.security.KeyPair; +import java.security.cert.CertificateException; import java.security.cert.X509Certificate; import java.time.Duration; import java.time.LocalDateTime; @@ -48,6 +55,7 @@ import java.util.UUID; import java.util.concurrent.TimeoutException; +import static org.apache.hadoop.hdds.HddsConfigKeys.HDDS_X509_CA_ROTATION_ACK_TIMEOUT; import static org.apache.hadoop.hdds.HddsConfigKeys.HDDS_X509_CA_ROTATION_CHECK_INTERNAL; import static org.apache.hadoop.hdds.HddsConfigKeys.HDDS_X509_CA_ROTATION_TIME_OF_DAY; import static org.apache.hadoop.hdds.HddsConfigKeys.HDDS_X509_GRACE_DURATION_TOKEN_CHECKS_ENABLED; @@ -63,15 +71,25 @@ public class TestRootCARotationManager { private OzoneConfiguration ozoneConfig; + private SecurityConfig securityConfig; private RootCARotationManager rootCARotationManager; private StorageContainerManager scm; - private CertificateClient scmCertClient; + private SCMCertificateClient scmCertClient; private SCMServiceManager scmServiceManager; + private SCMHAManager scmhaManager; private SCMContext scmContext; + private SequenceIdGenerator sequenceIdGenerator; + private SCMStorageConfig scmStorageConfig; + private SCMSecurityProtocolServer scmSecurityProtocolServer; + private RootCARotationHandlerImpl handler; private File testDir; + private String cID = UUID.randomUUID().toString(); + private String scmID = UUID.randomUUID().toString(); + private BigInteger certID = new BigInteger("1"); @BeforeEach - public void init() throws IOException, TimeoutException { + public void init() throws IOException, TimeoutException, + CertificateException { ozoneConfig = new OzoneConfiguration(); testDir = GenericTestUtils.getTestDir( TestContainerManagerImpl.class.getSimpleName() + UUID.randomUUID()); @@ -80,14 +98,33 @@ public void init() throws IOException, TimeoutException { ozoneConfig .setBoolean(HDDS_X509_GRACE_DURATION_TOKEN_CHECKS_ENABLED, false); scm = Mockito.mock(StorageContainerManager.class); - scmCertClient = Mockito.mock(CertificateClient.class); + securityConfig = new SecurityConfig(ozoneConfig); + scmCertClient = new SCMCertificateClient(securityConfig, null, scmID, cID, + certID.toString(), "localhost"); scmServiceManager = new SCMServiceManager(); scmContext = Mockito.mock(SCMContext.class); + scmhaManager = Mockito.mock(SCMHAManager.class); + sequenceIdGenerator = Mockito.mock(SequenceIdGenerator.class); + scmStorageConfig = new SCMStorageConfig(ozoneConfig); + scmStorageConfig.setScmId(scmID); + scmStorageConfig.setClusterId(cID); + scmSecurityProtocolServer = Mockito.mock(SCMSecurityProtocolServer.class); + handler = Mockito.mock(RootCARotationHandlerImpl.class); when(scmContext.isLeader()).thenReturn(true); when(scm.getConfiguration()).thenReturn(ozoneConfig); when(scm.getScmCertificateClient()).thenReturn(scmCertClient); when(scm.getScmContext()).thenReturn(scmContext); when(scm.getSCMServiceManager()).thenReturn(scmServiceManager); + when(scm.getScmHAManager()).thenReturn(scmhaManager); + when(scmhaManager.getRatisServer()) + .thenReturn(Mockito.mock(SCMRatisServerImpl.class)); + when(scm.getSequenceIdGen()).thenReturn(sequenceIdGenerator); + when(sequenceIdGenerator.getNextId(Mockito.anyString())).thenReturn(2L); + when(scm.getScmStorageConfig()).thenReturn(scmStorageConfig); + when(scm.getSecurityProtocolServer()).thenReturn(scmSecurityProtocolServer); + Mockito.doNothing().when(scmSecurityProtocolServer) + .setRootCertificateServer(Mockito.anyObject()); + Mockito.doNothing().when(handler).rotationPrepare(Mockito.anyString()); } @AfterEach @@ -147,6 +184,7 @@ public void testProperties() { public void testRotationOnSchedule() throws Exception { ozoneConfig.set(HDDS_X509_CA_ROTATION_CHECK_INTERNAL, "PT2S"); ozoneConfig.set(HDDS_X509_RENEW_GRACE_DURATION, "PT15S"); + ozoneConfig.set(HDDS_X509_CA_ROTATION_ACK_TIMEOUT, "PT2S"); Date date = Calendar.getInstance().getTime(); date.setSeconds(date.getSeconds() + 10); ozoneConfig.set(HDDS_X509_CA_ROTATION_TIME_OF_DAY, @@ -156,9 +194,10 @@ public void testRotationOnSchedule() throws Exception { X509Certificate cert = generateX509Cert(ozoneConfig, LocalDateTime.now(), Duration.ofSeconds(35)); - when(scmCertClient.getCACertificate()).thenReturn(cert); + scmCertClient.setCACertificate(cert); rootCARotationManager = new RootCARotationManager(scm); + rootCARotationManager.setRootCARotationHandler(handler); GenericTestUtils.LogCapturer logs = GenericTestUtils.LogCapturer.captureLogs(RootCARotationManager.LOG); GenericTestUtils.setLogLevel(RootCARotationManager.LOG, INFO); @@ -178,6 +217,7 @@ public void testRotationOnSchedule() throws Exception { public void testRotationImmediately() throws Exception { ozoneConfig.set(HDDS_X509_CA_ROTATION_CHECK_INTERNAL, "PT2S"); ozoneConfig.set(HDDS_X509_RENEW_GRACE_DURATION, "PT15S"); + ozoneConfig.set(HDDS_X509_CA_ROTATION_ACK_TIMEOUT, "PT2S"); Date date = Calendar.getInstance().getTime(); date.setMinutes(date.getMinutes() + 5); ozoneConfig.set(HDDS_X509_CA_ROTATION_TIME_OF_DAY, @@ -187,9 +227,10 @@ public void testRotationImmediately() throws Exception { X509Certificate cert = generateX509Cert(ozoneConfig, LocalDateTime.now(), Duration.ofSeconds(35)); - when(scmCertClient.getCACertificate()).thenReturn(cert); + scmCertClient.setCACertificate(cert); rootCARotationManager = new RootCARotationManager(scm); + rootCARotationManager.setRootCARotationHandler(handler); GenericTestUtils.LogCapturer logs = GenericTestUtils.LogCapturer.captureLogs(RootCARotationManager.LOG); GenericTestUtils.setLogLevel(RootCARotationManager.LOG, INFO); @@ -213,11 +254,12 @@ private X509Certificate generateX509Cert( SelfSignedCertificate.newBuilder() .setBeginDate(start) .setEndDate(end) - .setClusterID("cluster") - .setKey(keyPair) + .setScmID(scmID) + .setClusterID(cID) .setSubject("localhost") .setConfiguration(new SecurityConfig(conf)) - .setScmID("test") + .setKey(keyPair) + .makeCA(certID) .build()); } } diff --git a/hadoop-hdds/server-scm/src/test/java/org/apache/hadoop/hdds/scm/server/TestSCMSecurityProtocolServer.java b/hadoop-hdds/server-scm/src/test/java/org/apache/hadoop/hdds/scm/server/TestSCMSecurityProtocolServer.java deleted file mode 100644 index 7803025d9d98..000000000000 --- a/hadoop-hdds/server-scm/src/test/java/org/apache/hadoop/hdds/scm/server/TestSCMSecurityProtocolServer.java +++ /dev/null @@ -1,99 +0,0 @@ -/** - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with this - * work for additional information regarding copyright ownership. The ASF - * licenses this file to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT - * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the - * License for the specific language governing permissions and limitations under - * the License. - */ -package org.apache.hadoop.hdds.scm.server; - -import static org.apache.hadoop.hdds.scm.ScmConfigKeys.OZONE_SCM_SECURITY_SERVICE_ADDRESS_KEY; -import static org.apache.hadoop.hdds.scm.ScmConfigKeys.OZONE_SCM_SECURITY_SERVICE_BIND_HOST_DEFAULT; - -import org.apache.hadoop.hdds.conf.OzoneConfiguration; -import org.apache.hadoop.hdds.security.x509.certificate.utils.CertificateCodec; -import org.apache.hadoop.security.ssl.KeyStoreTestUtil; -import org.junit.jupiter.api.AfterEach; -import org.junit.jupiter.api.Assertions; -import org.junit.jupiter.api.BeforeEach; -import org.junit.jupiter.api.Test; -import org.junit.jupiter.api.Timeout; -import org.mockito.Mock; -import org.mockito.Mockito; -import org.mockito.MockitoAnnotations; - -import java.io.IOException; -import java.security.KeyPair; -import java.security.cert.X509Certificate; -import java.util.ArrayList; - -/** - * Test class for {@link SCMSecurityProtocolServer}. - */ -@Timeout(20) -public class TestSCMSecurityProtocolServer { - private SCMSecurityProtocolServer securityProtocolServer; - private OzoneConfiguration config; - @Mock - private StorageContainerManager mockScm; - @Mock - private SCMStorageConfig storageConfigMock; - - @BeforeEach - public void setUp() throws Exception { - MockitoAnnotations.openMocks(this); - config = new OzoneConfiguration(); - config.set(OZONE_SCM_SECURITY_SERVICE_ADDRESS_KEY, - OZONE_SCM_SECURITY_SERVICE_BIND_HOST_DEFAULT + ":0"); - securityProtocolServer = new SCMSecurityProtocolServer(config, null, - null, new ArrayList<>(), mockScm, null); - } - - @AfterEach - public void tearDown() { - if (securityProtocolServer != null) { - securityProtocolServer.stop(); - securityProtocolServer = null; - } - config = null; - } - - @Test - public void testStart() throws IOException { - securityProtocolServer.start(); - } - - @Test - public void testStop() { - securityProtocolServer.stop(); - } - - @Test - public void testReturnLastRootCa() throws Exception { - KeyPair keyPair = KeyStoreTestUtil.generateKeyPair("RSA"); - X509Certificate oldRootCa = KeyStoreTestUtil.generateCertificate("CN=dn", - keyPair, 15, "SHA256withRSA"); - X509Certificate latestRootCa = KeyStoreTestUtil.generateCertificate("CN=dn", - keyPair, 30, "SHA256withRSA"); - Assertions.assertTrue(oldRootCa.getNotAfter().toInstant() - .isBefore(latestRootCa.getNotAfter().toInstant())); - securityProtocolServer.addNewRootCa(oldRootCa); - securityProtocolServer.addNewRootCa(latestRootCa); - String pemEncodedLatestRootCa = - CertificateCodec.getPEMEncodedString(latestRootCa); - Mockito.when(mockScm.getScmStorageConfig()).thenReturn(storageConfigMock); - Mockito.when( - storageConfigMock.checkPrimarySCMIdInitialized()).thenReturn(true); - Assertions.assertEquals(securityProtocolServer.getRootCACertificate(), - pemEncodedLatestRootCa); - } -} diff --git a/hadoop-ozone/dist/src/main/compose/ozonesecure-ha/root-ca-rotation.yaml b/hadoop-ozone/dist/src/main/compose/ozonesecure-ha/root-ca-rotation.yaml new file mode 100644 index 000000000000..bedf6de3c3bf --- /dev/null +++ b/hadoop-ozone/dist/src/main/compose/ozonesecure-ha/root-ca-rotation.yaml @@ -0,0 +1,65 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +version: "3.4" + +x-root-cert-rotation-config: + &root-cert-rotation-config + environment: + - OZONE-SITE.XML_hdds.x509.grace.duration.token.checks.enabled=false + - OZONE-SITE.XML_hdds.x509.max.duration=PT240S + - OZONE-SITE.XML_hdds.x509.default.duration=PT60S + - OZONE-SITE.XML_hdds.x509.renew.grace.duration=PT45S + - OZONE-SITE.XML_hdds.x509.ca.rotation.check.interval=PT1S + - OZONE-SITE.XML_hdds.x509.ca.rotation.ack.timeout=PT20S + - OZONE-SITE.XML_hdds.block.token.expiry.time=15s + - OZONE-SITE.XML_ozone.manager.delegation.token.max-lifetime=15s + - OZONE-SITE.XML_ozone.manager.delegation.token.renew-interval=15s + - OZONE-SITE.XML_hdds.scmclient.max.retry.timeout=60s + - OZONE-SITE.XML_hdds.scmclient.failover.retry.interval=1s + - OZONE-SITE.XML_hdds.scmclient.failover.max.retry=60 + - OZONE-SITE.XML_ozone.scm.info.wait.duration=60s + - OZONE-SITE.XML_ozone.scm.ha.ratis.request.timeout=2s + - OZONE-SITE.XML_ozone.http.filter.initializers=org.apache.hadoop.security.HttpCrossOriginFilterInitializer +services: + datanode1: + <<: *root-cert-rotation-config + datanode2: + <<: *root-cert-rotation-config + datanode3: + <<: *root-cert-rotation-config + datanode4: + <<: *root-cert-rotation-config + om1: + <<: *root-cert-rotation-config + om2: + <<: *root-cert-rotation-config + om3: + <<: *root-cert-rotation-config + scm1.org: + <<: *root-cert-rotation-config + scm2.org: + <<: *root-cert-rotation-config + scm3.org: + <<: *root-cert-rotation-config + scm4.org: + <<: *root-cert-rotation-config + s3g: + <<: *root-cert-rotation-config + httpfs: + <<: *root-cert-rotation-config + recon: + <<: *root-cert-rotation-config diff --git a/hadoop-ozone/dist/src/main/compose/ozonesecure-ha/test-root-ca-rotation.sh b/hadoop-ozone/dist/src/main/compose/ozonesecure-ha/test-root-ca-rotation.sh new file mode 100755 index 000000000000..c7ab83670866 --- /dev/null +++ b/hadoop-ozone/dist/src/main/compose/ozonesecure-ha/test-root-ca-rotation.sh @@ -0,0 +1,91 @@ +#!/usr/bin/env bash +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +#suite:HA-secure + +COMPOSE_DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" >/dev/null 2>&1 && pwd )" +export COMPOSE_DIR + +export SECURITY_ENABLED=true +export OM_SERVICE_ID="omservice" +export SCM=scm1.org +export COMPOSE_FILE=docker-compose.yaml:root-ca-rotation.yaml + +: ${OZONE_BUCKET_KEY_NAME:=key1} + +# shellcheck source=/dev/null +source "$COMPOSE_DIR/../testlib.sh" + +start_docker_env + +execute_command_in_container kms hadoop key create ${OZONE_BUCKET_KEY_NAME} + +execute_robot_test scm1.org kinit.robot + +# verify root CA rotation monitor task is active on leader +wait_for_execute_command scm1.org 30 "jps | grep StorageContainerManagerStarter | sed 's/StorageContainerManagerStarter//' | xargs | xargs -I {} jstack {} | grep 'RootCARotationManager-Active'" + +# wait and verify root CA is rotated +wait_for_execute_command scm1.org 240 "ozone admin cert info 2" + +# transfer leader to scm2.org +execute_robot_test scm1.org scmha/scm-leader-transfer.robot +wait_for_execute_command scm1.org 30 "jps | grep StorageContainerManagerStarter | sed 's/StorageContainerManagerStarter//' | xargs | xargs -I {} jstack {} | grep 'RootCARotationManager-Inactive'" + +# verify om operations +execute_commands_in_container scm1.org "ozone sh volume create /r-v1 && ozone sh bucket create /r-v1/r-b1" + +# verify scm operations +execute_robot_test scm1.org admincli/pipeline.robot + +# wait for next root CA rotation +wait_for_execute_command scm1.org 240 "ozone admin cert info 3" + +# bootstrap new SCM4 and verify certificate +docker-compose up -d scm4.org +wait_for_port scm4.org 9894 120 +execute_robot_test scm4.org kinit.robot +wait_for_execute_command scm4.org 120 "ozone admin scm roles | grep scm4.org" +wait_for_execute_command scm4.org 30 "ozone admin cert list --role=scm | grep scm4.org" + +# wait for next root CA rotation +wait_for_execute_command scm4.org 240 "ozone admin cert info 4" + +#transfer leader to scm4.org +execute_robot_test scm4.org -v "TARGET_SCM:scm4.org" scmha/scm-leader-transfer.robot + +# add new datanode4 and verify certificate +docker-compose up -d datanode4 +wait_for_port datanode4 9856 60 +wait_for_execute_command scm4.org 60 "ozone admin datanode list | grep datanode4" + +#transfer leader to scm3.org +execute_robot_test scm3.org kinit.robot +execute_robot_test scm4.org -v "TARGET_SCM:scm3.org" scmha/scm-leader-transfer.robot + +# wait for next root CA rotation +wait_for_execute_command scm3.org 240 "ozone admin cert info 5" + +#decomission scm1.org +execute_robot_test scm3.org scmha/scm-decommission.robot + +# check the metrics +execute_robot_test scm2.org scmha/root-ca-rotation.robot + +stop_docker_env + +generate_report diff --git a/hadoop-ozone/dist/src/main/compose/ozonesecure-ha/test.sh b/hadoop-ozone/dist/src/main/compose/ozonesecure-ha/test.sh index f1f3593ec293..41dcbb09d5b2 100755 --- a/hadoop-ozone/dist/src/main/compose/ozonesecure-ha/test.sh +++ b/hadoop-ozone/dist/src/main/compose/ozonesecure-ha/test.sh @@ -52,11 +52,6 @@ execute_robot_test s3g admincli execute_robot_test s3g omha/om-leader-transfer.robot -# verify root CA rotation monitor task -wait_for_execute_command scm1.org 30 "jps | grep StorageContainerManagerStarter | awk -F' ' '{print $1}' | xargs -I {} jstack {} | grep 'RootCARotationManager-MonitorTask-Active'" -execute_robot_test s3g scmha/scm-leader-transfer.robot -wait_for_execute_command scm1.org 30 "jps | grep StorageContainerManagerStarter | awk -F' ' '{print $1}' | xargs -I {} jstack {} | grep 'RootCARotationManager-MonitorTask-Inactive'" - execute_robot_test s3g httpfs export SCM=scm2.org diff --git a/hadoop-ozone/dist/src/main/compose/ozonesecure/certificate-rotation.yaml b/hadoop-ozone/dist/src/main/compose/ozonesecure/certificate-rotation.yaml index 4262e63620d4..85586a184445 100644 --- a/hadoop-ozone/dist/src/main/compose/ozonesecure/certificate-rotation.yaml +++ b/hadoop-ozone/dist/src/main/compose/ozonesecure/certificate-rotation.yaml @@ -23,6 +23,7 @@ x-cert-rotation-config: - OZONE-SITE.XML_hdds.x509.renew.grace.duration=PT30s - OZONE-SITE.XML_hdds.x509.ca.rotation.check.interval=PT1S - OZONE-SITE.XML_hdds.x509.grace.duration.token.checks.enabled=false + - OZONE-SITE.XML_hdds.x509.ca.rotation.ack.timeout=PT20S services: datanode: <<: *cert-rotation-config diff --git a/hadoop-ozone/dist/src/main/compose/ozonesecure/root-ca-rotation.yaml b/hadoop-ozone/dist/src/main/compose/ozonesecure/root-ca-rotation.yaml new file mode 100644 index 000000000000..8f7b944b0fb8 --- /dev/null +++ b/hadoop-ozone/dist/src/main/compose/ozonesecure/root-ca-rotation.yaml @@ -0,0 +1,49 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +version: "3.4" + +x-root-cert-rotation-config: + &root-cert-rotation-config + environment: + - OZONE-SITE.XML_hdds.x509.grace.duration.token.checks.enabled=false + - OZONE-SITE.XML_hdds.x509.max.duration=PT180S + - OZONE-SITE.XML_hdds.x509.default.duration=PT60S + - OZONE-SITE.XML_hdds.x509.renew.grace.duration=PT45S + - OZONE-SITE.XML_hdds.x509.ca.rotation.check.interval=PT1S + - OZONE-SITE.XML_hdds.x509.ca.rotation.ack.timeout=PT20S + - OZONE-SITE.XML_hdds.block.token.expiry.time=15s + - OZONE-SITE.XML_ozone.manager.delegation.token.max-lifetime=15s + - OZONE-SITE.XML_ozone.manager.delegation.token.renew-interval=15s + - OZONE-SITE.XML_hdds.scmclient.max.retry.timeout=60s + - OZONE-SITE.XML_hdds.scmclient.failover.retry.interval=1s + - OZONE-SITE.XML_hdds.scmclient.failover.max.retry=60 + - OZONE-SITE.XML_ozone.scm.info.wait.duration=60s + - OZONE-SITE.XML_ozone.scm.ha.ratis.request.timeout=2s + - OZONE-SITE.XML_ozone.http.filter.initializers=org.apache.hadoop.security.HttpCrossOriginFilterInitializer +services: + datanode: + <<: *root-cert-rotation-config + om: + <<: *root-cert-rotation-config + scm: + <<: *root-cert-rotation-config + s3g: + <<: *root-cert-rotation-config + httpfs: + <<: *root-cert-rotation-config + recon: + <<: *root-cert-rotation-config diff --git a/hadoop-ozone/dist/src/main/compose/ozonesecure/test-root-ca-rotation.sh b/hadoop-ozone/dist/src/main/compose/ozonesecure/test-root-ca-rotation.sh new file mode 100755 index 000000000000..66f1a6d01aec --- /dev/null +++ b/hadoop-ozone/dist/src/main/compose/ozonesecure/test-root-ca-rotation.sh @@ -0,0 +1,55 @@ +#!/usr/bin/env bash +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +#suite:secure + +COMPOSE_DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" >/dev/null 2>&1 && pwd )" +export COMPOSE_DIR + +export SECURITY_ENABLED=true +export SCM=scm +export COMPOSE_FILE=docker-compose.yaml:root-ca-rotation.yaml + +: ${OZONE_BUCKET_KEY_NAME:=key1} + +# shellcheck source=/dev/null +source "$COMPOSE_DIR/../testlib.sh" + +start_docker_env + +execute_command_in_container kms hadoop key create ${OZONE_BUCKET_KEY_NAME} + +execute_robot_test scm kinit.robot + +# verify root CA rotation monitor task is active on leader +wait_for_execute_command scm 30 "jps | grep StorageContainerManagerStarter | sed 's/StorageContainerManagerStarter//' | xargs | xargs -I {} jstack {} | grep 'RootCARotationManager-Active'" + +# wait and verify root CA is rotated +wait_for_execute_command scm 180 "ozone admin cert info 2" + +# verify om operations and data operations +execute_commands_in_container scm "ozone sh volume create /r-v1 && ozone sh bucket create /r-v1/r-b1" + +# wait for second root CA rotation +wait_for_execute_command scm 180 "ozone admin cert info 3" + +# check the metrics +execute_robot_test scm scmha/root-ca-rotation.robot + +stop_docker_env + +generate_report diff --git a/hadoop-ozone/dist/src/main/compose/testlib.sh b/hadoop-ozone/dist/src/main/compose/testlib.sh index 9cc2c1d51c06..085f448dea00 100755 --- a/hadoop-ozone/dist/src/main/compose/testlib.sh +++ b/hadoop-ozone/dist/src/main/compose/testlib.sh @@ -262,6 +262,18 @@ execute_command_in_container(){ docker-compose exec -T "$@" } +## @description Execute specific commands in docker container +## @param container name +## @param specific commands to execute +execute_commands_in_container(){ + local container=$1 + shift 1 + local command=$@ + + # shellcheck disable=SC2068 + docker-compose exec -T $container /bin/bash -c "$command" +} + ## @description Stop a list of named containers ## @param List of container names, eg datanode_1 datanode_2 stop_containers() { @@ -323,18 +335,19 @@ wait_for_port(){ wait_for_execute_command(){ local container=$1 local timeout=$2 - local command=$3 + shift 2 + local command=$@ #Reset the timer SECONDS=0 while [[ $SECONDS -lt $timeout ]]; do - if docker-compose exec -T $container bash -c '$command'; then - echo "$command succeed" - return + if docker-compose exec -T $container /bin/bash -c "$command"; then + echo "$command succeed" + return fi - echo "$command hasn't succeed yet" - sleep 1 + echo "$command hasn't succeed yet" + sleep 1 done echo "Timed out waiting on $command to be successful" return 1 diff --git a/hadoop-ozone/dist/src/main/smoketest/scmha/root-ca-rotation.robot b/hadoop-ozone/dist/src/main/smoketest/scmha/root-ca-rotation.robot new file mode 100644 index 000000000000..50b6d64713f5 --- /dev/null +++ b/hadoop-ozone/dist/src/main/smoketest/scmha/root-ca-rotation.robot @@ -0,0 +1,35 @@ +# Licensed to the Apache Software Foundation (ASF) under one or more +# contributor license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright ownership. +# The ASF licenses this file to You under the Apache License, Version 2.0 +# (the "License"); you may not use this file except in compliance with +# the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +*** Settings *** +Documentation Smoketest ozone cluster startup +Library OperatingSystem +Library BuiltIn +Resource ../commonlib.robot +Test Timeout 5 minutes + +*** Variables *** + +*** Test Cases *** +Verify root CA rotation metrics + # example "NumSuccessRotation" : 5, + ${successRotationLine} = Execute curl -sS 'http://localhost:9876/jmx' | grep NumSuccessRotation + LOG ${successRotationLine} + ${temp_1} = Split String ${successRotationLine} : + ${temp_2} = Strip String ${temp_1[1]} + ${temp_3} = Split String ${temp_2} , + ${successRotation} = Strip String ${temp_3[0]} + ${successRotation} = Convert To Number ${successRotation} + Should be true ${successRotation} >= 1 diff --git a/hadoop-ozone/dist/src/main/smoketest/scmha/scm-leader-transfer.robot b/hadoop-ozone/dist/src/main/smoketest/scmha/scm-leader-transfer.robot index cf38de159702..4c8796d41b79 100644 --- a/hadoop-ozone/dist/src/main/smoketest/scmha/scm-leader-transfer.robot +++ b/hadoop-ozone/dist/src/main/smoketest/scmha/scm-leader-transfer.robot @@ -21,6 +21,7 @@ Resource ../commonlib.robot Test Timeout 5 minutes *** Variables *** +${TARGET_SCM}= scm2.org ** Keywords *** Get SCM Leader Node @@ -34,13 +35,22 @@ Get SCM Leader Node LOG Leader SCM: ${leaderSCM} [return] ${leaderSCM} +Get SCM UUID + ${result} = Execute ozone admin scm roles --service-id=scmservice + LOG ${result} + ${scm_line} = Get Lines Containing String ${result} ${TARGET_SCM} + ${scm_split} = Split String ${scm_line} : + ${scm_uuid} = Strip String ${scm_split[3]} + [return] ${scm_uuid} + *** Test Cases *** -Transfer Leadership randomly - # Find Leader SCM and one Follower SCM +Transfer Leadership + # Find Leader SCM ${leaderSCM} = Get SCM Leader Node LOG Leader SCM: ${leaderSCM} - # Transfer leadership to the Follower SCM - ${result} = Execute ozone admin scm transfer --service-id=scmservice -r + ${target_scm_uuid} = Get SCM UUID + # Transfer leadership to target SCM + ${result} = Execute ozone admin scm transfer --service-id=scmservice -n ${target_scm_uuid} LOG ${result} Should Contain ${result} Transfer leadership successfully diff --git a/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/TestSecureOzoneCluster.java b/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/TestSecureOzoneCluster.java index ca29b34f09e3..100f4400bc26 100644 --- a/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/TestSecureOzoneCluster.java +++ b/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/TestSecureOzoneCluster.java @@ -83,7 +83,6 @@ import org.apache.hadoop.ipc.Server; import org.apache.hadoop.metrics2.lib.DefaultMetricsSystem; import org.apache.hadoop.minikdc.MiniKdc; -import org.apache.hadoop.net.NetUtils; import org.apache.hadoop.ozone.client.OzoneClient; import org.apache.hadoop.ozone.client.OzoneClientFactory; import org.apache.hadoop.ozone.common.Storage; @@ -115,6 +114,7 @@ import org.apache.commons.lang3.StringUtils; import static org.apache.hadoop.fs.CommonConfigurationKeysPublic.HADOOP_SECURITY_AUTHENTICATION; import static org.apache.hadoop.hdds.HddsConfigKeys.HDDS_GRPC_TLS_ENABLED; +import static org.apache.hadoop.hdds.HddsConfigKeys.HDDS_X509_CA_ROTATION_ACK_TIMEOUT; import static org.apache.hadoop.hdds.HddsConfigKeys.HDDS_X509_CA_ROTATION_CHECK_INTERNAL; import static org.apache.hadoop.hdds.HddsConfigKeys.HDDS_X509_DEFAULT_DURATION; import static org.apache.hadoop.hdds.HddsConfigKeys.HDDS_X509_GRACE_DURATION_TOKEN_CHECKS_ENABLED; @@ -140,6 +140,7 @@ import static org.apache.hadoop.ozone.OzoneConfigKeys.OZONE_ADMINISTRATORS; import static org.apache.hadoop.ozone.OzoneConfigKeys.OZONE_CLIENT_FAILOVER_MAX_ATTEMPTS_KEY; import static org.apache.hadoop.ozone.OzoneConfigKeys.OZONE_SECURITY_ENABLED_KEY; +import static org.apache.hadoop.ozone.OzoneConsts.SCM_SUB_CA; import static org.apache.hadoop.ozone.om.OMConfigKeys.DELEGATION_TOKEN_MAX_LIFETIME_KEY; import static org.apache.hadoop.ozone.om.OMConfigKeys.OZONE_OM_HTTP_KERBEROS_KEYTAB_FILE; import static org.apache.hadoop.ozone.om.OMConfigKeys.OZONE_OM_HTTP_KERBEROS_PRINCIPAL_KEY; @@ -253,6 +254,10 @@ public void init() { conf.set(HDDS_X509_RENEW_GRACE_DURATION, Duration.ofMillis(certGraceTime).toString()); conf.setBoolean(HDDS_X509_GRACE_DURATION_TOKEN_CHECKS_ENABLED, false); + conf.set(HDDS_X509_CA_ROTATION_CHECK_INTERNAL, + Duration.ofMillis(certGraceTime - 1000).toString()); + conf.set(HDDS_X509_CA_ROTATION_ACK_TIMEOUT, + Duration.ofMillis(certGraceTime - 1000).toString()); conf.setLong(OMConfigKeys.DELEGATION_TOKEN_MAX_LIFETIME_KEY, delegationTokenMaxTime); @@ -452,8 +457,7 @@ private void initSCM() throws IOException { scmStore.setClusterId(clusterId); scmStore.setScmId(scmId); HASecurityUtils.initializeSecurity(scmStore, conf, - NetUtils.createSocketAddr(InetAddress.getLocalHost().getHostName(), - OZONE_SCM_CLIENT_PORT_DEFAULT), true); + InetAddress.getLocalHost().getHostName(), true); scmStore.setPrimaryScmNodeId(scmId); // writes the version file properties scmStore.initialize(); @@ -1329,12 +1333,11 @@ public void validateCertificate(X509Certificate cert) throws Exception { X500Name x500Issuer = new JcaX509CertificateHolder(cert).getIssuer(); RDN cn = x500Issuer.getRDNs(BCStyle.CN)[0]; String hostName = InetAddress.getLocalHost().getHostName(); - String scmUser = OzoneConsts.SCM_SUB_CA_PREFIX + hostName; - assertEquals(scmUser, cn.getFirst().getValue().toString()); // Subject name should be om login user in real world but in this test // UGI has scm user context. - assertEquals(scmUser, cn.getFirst().getValue().toString()); + assertTrue(cn.getFirst().getValue().toString().contains(SCM_SUB_CA)); + assertTrue(cn.getFirst().getValue().toString().contains(hostName)); LocalDate today = LocalDateTime.now().toLocalDate(); Date invalidDate; @@ -1349,7 +1352,8 @@ public void validateCertificate(X509Certificate cert) throws Exception { assertTrue(cert.getSubjectDN().toString().contains(scmId)); assertTrue(cert.getSubjectDN().toString().contains(clusterId)); - assertTrue(cert.getIssuerDN().toString().contains(scmUser)); + assertTrue(cn.getFirst().getValue().toString().contains(SCM_SUB_CA)); + assertTrue(cn.getFirst().getValue().toString().contains(hostName)); assertTrue(cert.getIssuerDN().toString().contains(scmId)); assertTrue(cert.getIssuerDN().toString().contains(clusterId)); diff --git a/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/container/ozoneimpl/TestOzoneContainerWithTLS.java b/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/container/ozoneimpl/TestOzoneContainerWithTLS.java index c94e7eb10d07..21fc91f556d5 100644 --- a/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/container/ozoneimpl/TestOzoneContainerWithTLS.java +++ b/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/container/ozoneimpl/TestOzoneContainerWithTLS.java @@ -74,6 +74,7 @@ import static org.apache.hadoop.hdds.HddsConfigKeys.HDDS_KEY_DIR_NAME; import static org.apache.hadoop.hdds.HddsConfigKeys.HDDS_KEY_DIR_NAME_DEFAULT; import static org.apache.hadoop.hdds.HddsConfigKeys.HDDS_KEY_LEN; +import static org.apache.hadoop.hdds.HddsConfigKeys.HDDS_X509_CA_ROTATION_ACK_TIMEOUT; import static org.apache.hadoop.hdds.HddsConfigKeys.HDDS_X509_CA_ROTATION_CHECK_INTERNAL; import static org.apache.hadoop.hdds.HddsConfigKeys.HDDS_X509_DEFAULT_DURATION; import static org.apache.hadoop.hdds.HddsConfigKeys.HDDS_X509_GRACE_DURATION_TOKEN_CHECKS_ENABLED; @@ -144,6 +145,7 @@ public void setup() throws Exception { Duration.ofMillis(certLifetime).toString()); conf.set(HDDS_X509_RENEW_GRACE_DURATION, "PT2S"); conf.set(HDDS_X509_CA_ROTATION_CHECK_INTERNAL, "PT1S"); // 1s + conf.set(HDDS_X509_CA_ROTATION_ACK_TIMEOUT, "PT1S"); // 1s long expiryTime = conf.getTimeDuration( HddsConfigKeys.HDDS_BLOCK_TOKEN_EXPIRY_TIME, "1s", diff --git a/hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/security/OMCertificateClient.java b/hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/security/OMCertificateClient.java index 88312cacf403..1a94d16521af 100644 --- a/hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/security/OMCertificateClient.java +++ b/hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/security/OMCertificateClient.java @@ -122,7 +122,7 @@ public CertificateSignRequest.Builder getCSRBuilder() @Override public String signAndStoreCertificate(PKCS10CertificationRequest request, - Path certificatePath) throws CertificateException { + Path certificatePath, boolean renew) throws CertificateException { try { SCMGetCertResponseProto response = getScmSecureClient() .getOMCertChain(omInfo, getEncodedString(request)); @@ -135,14 +135,13 @@ public String signAndStoreCertificate(PKCS10CertificationRequest request, if (response.hasX509CACertificate()) { String pemEncodedRootCert = response.getX509CACertificate(); storeCertificate(pemEncodedRootCert, - CAType.SUBORDINATE, certCodec, false); - storeCertificate(pemEncodedCert, CAType.NONE, certCodec, - false); + CAType.SUBORDINATE, certCodec, false, !renew); + storeCertificate(pemEncodedCert, CAType.NONE, certCodec, false, !renew); // Store Root CA certificate if available. if (response.hasX509RootCACertificate()) { storeCertificate(response.getX509RootCACertificate(), - CAType.ROOT, certCodec, false); + CAType.ROOT, certCodec, false, !renew); } return CertificateCodec.getX509Certificate(pemEncodedCert) .getSerialNumber().toString(); diff --git a/hadoop-ozone/recon/src/main/java/org/apache/hadoop/ozone/recon/security/ReconCertificateClient.java b/hadoop-ozone/recon/src/main/java/org/apache/hadoop/ozone/recon/security/ReconCertificateClient.java index 2a78ddae23b3..5381a6159546 100644 --- a/hadoop-ozone/recon/src/main/java/org/apache/hadoop/ozone/recon/security/ReconCertificateClient.java +++ b/hadoop-ozone/recon/src/main/java/org/apache/hadoop/ozone/recon/security/ReconCertificateClient.java @@ -90,7 +90,7 @@ public CertificateSignRequest.Builder getCSRBuilder() @Override public String signAndStoreCertificate(PKCS10CertificationRequest csr, - Path certificatePath) throws CertificateException { + Path certificatePath, boolean renew) throws CertificateException { try { SCMSecurityProtocolProtos.SCMGetCertResponseProto response; HddsProtos.NodeDetailsProto.Builder reconDetailsProtoBuilder = @@ -108,17 +108,14 @@ public String signAndStoreCertificate(PKCS10CertificationRequest csr, String pemEncodedCert = response.getX509Certificate(); CertificateCodec certCodec = new CertificateCodec( getSecurityConfig(), certificatePath); - storeCertificate(pemEncodedCert, CAType.NONE, - certCodec, - false); + storeCertificate(pemEncodedCert, CAType.NONE, certCodec, false, !renew); storeCertificate(response.getX509CACertificate(), - CAType.SUBORDINATE, - certCodec, false); + CAType.SUBORDINATE, certCodec, false, !renew); // Store Root CA certificate. if (response.hasX509RootCACertificate()) { storeCertificate(response.getX509RootCACertificate(), - CAType.ROOT, certCodec, false); + CAType.ROOT, certCodec, false, !renew); } return getX509Certificate(pemEncodedCert).getSerialNumber().toString(); } else {