diff --git a/hadoop-hdds/common/src/main/resources/ozone-default.xml b/hadoop-hdds/common/src/main/resources/ozone-default.xml index f20d606d4365..bc44120ce2ab 100644 --- a/hadoop-hdds/common/src/main/resources/ozone-default.xml +++ b/hadoop-hdds/common/src/main/resources/ozone-default.xml @@ -4674,4 +4674,30 @@ default value of DEFAULT_RACK is returned for all node names. + + + ozone.om.edekcacheloader.interval.ms + 1000 + When KeyProvider is configured, the interval time of warming + up edek cache on OM starts up. All edeks will be loaded + from KMS into provider cache. The edek cache loader will try to warm up the + cache until succeed or OM leaves active state. + + + + + ozone.om.edekcacheloader.initial.delay.ms + 3000 + When KeyProvider is configured, the time delayed until the first + attempt to warm up edek cache on OM start up. + + + + + ozone.om.edekcacheloader.max-retries + 10 + When KeyProvider is configured, the max retries allowed to attempt + warm up edek cache if none of key successful on OM start up. + + diff --git a/hadoop-ozone/common/src/main/java/org/apache/hadoop/ozone/om/OMConfigKeys.java b/hadoop-ozone/common/src/main/java/org/apache/hadoop/ozone/om/OMConfigKeys.java index 7e80766c7fe5..141676622e4b 100644 --- a/hadoop-ozone/common/src/main/java/org/apache/hadoop/ozone/om/OMConfigKeys.java +++ b/hadoop-ozone/common/src/main/java/org/apache/hadoop/ozone/om/OMConfigKeys.java @@ -622,4 +622,17 @@ private OMConfigKeys() { public static final String OZONE_OM_MAX_BUCKET = "ozone.om.max.buckets"; public static final int OZONE_OM_MAX_BUCKET_DEFAULT = 100000; + + public static final String OZONE_OM_EDEKCACHELOADER_INITIAL_DELAY_MS_KEY = + "ozone.om.edekcacheloader.initial.delay.ms"; + + public static final int OZONE_OM_EDEKCACHELOADER_INITIAL_DELAY_MS_DEFAULT = 3000; + + public static final String OZONE_OM_EDEKCACHELOADER_INTERVAL_MS_KEY = "ozone.om.edekcacheloader.interval.ms"; + + public static final int OZONE_OM_EDEKCACHELOADER_INTERVAL_MS_DEFAULT = 1000; + + public static final String OZONE_OM_EDEKCACHELOADER_MAX_RETRIES_KEY = + "ozone.om.edekcacheloader.max-retries"; + public static final int OZONE_OM_EDEKCACHELOADER_MAX_RETRIES_DEFAULT = 10; } diff --git a/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/client/rpc/TestOzoneAtRestEncryption.java b/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/client/rpc/TestOzoneAtRestEncryption.java index 709f3f515739..0bd501b7d786 100644 --- a/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/client/rpc/TestOzoneAtRestEncryption.java +++ b/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/client/rpc/TestOzoneAtRestEncryption.java @@ -59,10 +59,12 @@ import java.util.Random; import java.util.TreeMap; import java.util.UUID; +import java.util.function.BooleanSupplier; import javax.xml.bind.DatatypeConverter; import org.apache.commons.lang3.RandomUtils; import org.apache.hadoop.crypto.key.KeyProvider; import org.apache.hadoop.crypto.key.kms.KMSClientProvider; +import org.apache.hadoop.crypto.key.kms.LoadBalancingKMSClientProvider; import org.apache.hadoop.crypto.key.kms.server.MiniKMS; import org.apache.hadoop.fs.CommonConfigurationKeysPublic; import org.apache.hadoop.fs.FSDataOutputStream; @@ -95,6 +97,7 @@ import org.apache.hadoop.ozone.client.io.OzoneInputStream; import org.apache.hadoop.ozone.client.io.OzoneOutputStream; import org.apache.hadoop.ozone.container.common.interfaces.Container; +import org.apache.hadoop.ozone.om.OMConfigKeys; import org.apache.hadoop.ozone.om.OMMetadataManager; import org.apache.hadoop.ozone.om.OzoneManager; import org.apache.hadoop.ozone.om.exceptions.OMException; @@ -106,6 +109,7 @@ import org.apache.hadoop.ozone.om.helpers.OmMultipartInfo; import org.apache.hadoop.ozone.om.helpers.OmMultipartUploadCompleteInfo; import org.apache.hadoop.ozone.om.helpers.RepeatedOmKeyInfo; +import org.apache.hadoop.test.Whitebox; import org.apache.ozone.test.GenericTestUtils; import org.apache.ozone.test.tag.Flaky; import org.apache.ozone.test.tag.Unhealthy; @@ -211,6 +215,34 @@ static void reInitClient() throws IOException { store = ozClient.getObjectStore(); } + @Test + public void testWarmupEDEKCacheOnStartup() throws Exception { + + createVolumeAndBucket("vol", "buck", BucketLayout.OBJECT_STORE); + + @SuppressWarnings("unchecked") KMSClientProvider spy = getKMSClientProvider(); + assertTrue(spy.getEncKeyQueueSize(TEST_KEY) > 0); + + conf.setInt(OMConfigKeys.OZONE_OM_EDEKCACHELOADER_INITIAL_DELAY_MS_KEY, 0); + cluster.restartOzoneManager(); + + GenericTestUtils.waitFor(new BooleanSupplier() { + @Override + public boolean getAsBoolean() { + final KMSClientProvider kspy = getKMSClientProvider(); + return kspy.getEncKeyQueueSize(TEST_KEY) > 0; + } + }, 1000, 60000); + } + + private KMSClientProvider getKMSClientProvider() { + LoadBalancingKMSClientProvider lbkmscp = + (LoadBalancingKMSClientProvider) Whitebox.getInternalState( + cluster.getOzoneManager().getKmsProvider(), "extension"); + assert lbkmscp.getProviders().length == 1; + return lbkmscp.getProviders()[0]; + } + @ParameterizedTest @EnumSource diff --git a/hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/om/OzoneManager.java b/hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/om/OzoneManager.java index 2d955e7cea66..22c3c5c318dd 100644 --- a/hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/om/OzoneManager.java +++ b/hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/om/OzoneManager.java @@ -54,6 +54,12 @@ import static org.apache.hadoop.ozone.om.OMConfigKeys.OZONE_DEFAULT_BUCKET_LAYOUT_DEFAULT; import static org.apache.hadoop.ozone.om.OMConfigKeys.OZONE_KEY_DELETING_LIMIT_PER_TASK; import static org.apache.hadoop.ozone.om.OMConfigKeys.OZONE_OM_ADDRESS_KEY; +import static org.apache.hadoop.ozone.om.OMConfigKeys.OZONE_OM_EDEKCACHELOADER_INITIAL_DELAY_MS_DEFAULT; +import static org.apache.hadoop.ozone.om.OMConfigKeys.OZONE_OM_EDEKCACHELOADER_INITIAL_DELAY_MS_KEY; +import static org.apache.hadoop.ozone.om.OMConfigKeys.OZONE_OM_EDEKCACHELOADER_INTERVAL_MS_DEFAULT; +import static org.apache.hadoop.ozone.om.OMConfigKeys.OZONE_OM_EDEKCACHELOADER_INTERVAL_MS_KEY; +import static org.apache.hadoop.ozone.om.OMConfigKeys.OZONE_OM_EDEKCACHELOADER_MAX_RETRIES_DEFAULT; +import static org.apache.hadoop.ozone.om.OMConfigKeys.OZONE_OM_EDEKCACHELOADER_MAX_RETRIES_KEY; import static org.apache.hadoop.ozone.om.OMConfigKeys.OZONE_OM_HANDLER_COUNT_DEFAULT; import static org.apache.hadoop.ozone.om.OMConfigKeys.OZONE_OM_HANDLER_COUNT_KEY; import static org.apache.hadoop.ozone.om.OMConfigKeys.OZONE_OM_HTTP_AUTH_TYPE; @@ -97,6 +103,7 @@ import static org.apache.hadoop.ozone.protocol.proto.OzoneManagerProtocolProtos.PrepareStatusResponse.PrepareStatus; import static org.apache.hadoop.security.UserGroupInformation.getCurrentUser; import static org.apache.hadoop.util.ExitUtil.terminate; +import static org.apache.hadoop.util.Time.monotonicNow; import static org.apache.ozone.graph.PrintableGraph.GraphType.FILE_NAME; import com.fasterxml.jackson.databind.ObjectMapper; @@ -105,6 +112,7 @@ import com.google.common.annotations.VisibleForTesting; import com.google.common.base.Preconditions; import com.google.common.base.Strings; +import com.google.common.util.concurrent.ThreadFactoryBuilder; import com.google.protobuf.BlockingService; import com.google.protobuf.ProtocolMessageEnum; import java.io.BufferedWriter; @@ -137,6 +145,8 @@ import java.util.TimerTask; import java.util.UUID; import java.util.concurrent.ConcurrentHashMap; +import java.util.concurrent.ExecutorService; +import java.util.concurrent.Executors; import java.util.concurrent.TimeUnit; import java.util.concurrent.atomic.AtomicReference; import java.util.stream.Collectors; @@ -386,6 +396,7 @@ public final class OzoneManager extends ServiceRuntimeInfoImpl private KeyManager keyManager; private PrefixManagerImpl prefixManager; private final UpgradeFinalizer upgradeFinalizer; + private ExecutorService edekCacheLoader = null; /** * OM super user / admin list. @@ -727,6 +738,110 @@ private OzoneManager(OzoneConfiguration conf, StartupOption startupOption) omHostName = HddsUtils.getHostName(conf); } + public void initializeEdekCache(OzoneConfiguration conf) { + int edekCacheLoaderDelay = + conf.getInt(OZONE_OM_EDEKCACHELOADER_INITIAL_DELAY_MS_KEY, OZONE_OM_EDEKCACHELOADER_INITIAL_DELAY_MS_DEFAULT); + int edekCacheLoaderInterval = + conf.getInt(OZONE_OM_EDEKCACHELOADER_INTERVAL_MS_KEY, OZONE_OM_EDEKCACHELOADER_INTERVAL_MS_DEFAULT); + int edekCacheLoaderMaxRetries = + conf.getInt(OZONE_OM_EDEKCACHELOADER_MAX_RETRIES_KEY, OZONE_OM_EDEKCACHELOADER_MAX_RETRIES_DEFAULT); + if (kmsProvider != null) { + edekCacheLoader = Executors.newSingleThreadExecutor( + new ThreadFactoryBuilder().setDaemon(true) + .setNameFormat("Warm Up EDEK Cache Thread #%d") + .build()); + warmUpEdekCache(edekCacheLoader, edekCacheLoaderDelay, edekCacheLoaderInterval, edekCacheLoaderMaxRetries); + } + } + + static class EDEKCacheLoader implements Runnable { + private final String[] keyNames; + private final KeyProviderCryptoExtension kp; + private int initialDelay; + private int retryInterval; + private int maxRetries; + + EDEKCacheLoader(final String[] names, final KeyProviderCryptoExtension kp, + final int delay, final int interval, final int maxRetries) { + this.keyNames = names; + this.kp = kp; + this.initialDelay = delay; + this.retryInterval = interval; + this.maxRetries = maxRetries; + } + + @Override + public void run() { + LOG.info("Warming up {} EDEKs... (initialDelay={}, " + + "retryInterval={}, maxRetries={})", keyNames.length, initialDelay, retryInterval, + maxRetries); + try { + Thread.sleep(initialDelay); + } catch (InterruptedException ie) { + LOG.info("EDEKCacheLoader interrupted before warming up."); + return; + } + + boolean success = false; + int retryCount = 0; + IOException lastSeenIOE = null; + long warmUpEDEKStartTime = monotonicNow(); + + while (!success && retryCount < maxRetries) { + try { + kp.warmUpEncryptedKeys(keyNames); + LOG.info("Successfully warmed up {} EDEKs.", keyNames.length); + success = true; + } catch (IOException ioe) { + lastSeenIOE = ioe; + LOG.info("Failed to warm up EDEKs.", ioe); + } catch (Exception e) { + LOG.error("Cannot warm up EDEKs.", e); + throw e; + } + + if (!success) { + try { + Thread.sleep(retryInterval); + } catch (InterruptedException ie) { + LOG.info("EDEKCacheLoader interrupted during retry."); + break; + } + retryCount++; + } + } + + long warmUpEDEKTime = monotonicNow() - warmUpEDEKStartTime; + LOG.debug("Time taken to load EDEK keys to the cache: {}", warmUpEDEKTime); + if (!success) { + LOG.warn("Max retry {} reached, unable to warm up EDEKs.", maxRetries); + if (lastSeenIOE != null) { + LOG.warn("Last seen exception:", lastSeenIOE); + } + } + } + } + + public void warmUpEdekCache(final ExecutorService executor, final int delay, final int interval, int maxRetries) { + Set keys = new HashSet<>(); + try ( + TableIterator> iterator = + metadataManager.getBucketTable().iterator()) { + while (iterator.hasNext()) { + Table.KeyValue entry = iterator.next(); + if (entry.getValue().getEncryptionKeyInfo() != null) { + String encKey = entry.getValue().getEncryptionKeyInfo().getKeyName(); + keys.add(encKey); + } + } + } catch (IOException ex) { + LOG.error("Error while retrieving encryption keys for warming up EDEK cache", ex); + } + String[] edeks = new String[keys.size()]; + edeks = keys.toArray(edeks); + executor.execute(new EDEKCacheLoader(edeks, getKmsProvider(), delay, interval, maxRetries)); + } + public boolean isStopped() { return omState == State.STOPPED; } @@ -2299,6 +2414,10 @@ public boolean stop() { if (versionManager != null) { versionManager.close(); } + + if (edekCacheLoader != null) { + edekCacheLoader.shutdown(); + } return true; } catch (Exception e) { LOG.error("OzoneManager stop failed.", e); diff --git a/hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/om/ratis/OzoneManagerStateMachine.java b/hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/om/ratis/OzoneManagerStateMachine.java index 0394ee08c77f..dce2951b9556 100644 --- a/hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/om/ratis/OzoneManagerStateMachine.java +++ b/hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/om/ratis/OzoneManagerStateMachine.java @@ -159,6 +159,11 @@ public SnapshotInfo getLatestSnapshot() { @Override public void notifyLeaderChanged(RaftGroupMemberId groupMemberId, RaftPeerId newLeaderId) { + RaftPeerId currentPeerId = groupMemberId.getPeerId(); + if (newLeaderId.equals(currentPeerId)) { + // warmup cache + ozoneManager.initializeEdekCache(ozoneManager.getConfiguration()); + } // Initialize OMHAMetrics ozoneManager.omHAMetricsInit(newLeaderId.toString()); LOG.info("{}: leader changed to {}", groupMemberId, newLeaderId);