diff --git a/hadoop-hdds/common/src/main/java/org/apache/hadoop/ozone/OzoneConfigKeys.java b/hadoop-hdds/common/src/main/java/org/apache/hadoop/ozone/OzoneConfigKeys.java
index 482ac88f366c..21157baa99ef 100644
--- a/hadoop-hdds/common/src/main/java/org/apache/hadoop/ozone/OzoneConfigKeys.java
+++ b/hadoop-hdds/common/src/main/java/org/apache/hadoop/ozone/OzoneConfigKeys.java
@@ -343,6 +343,11 @@ public final class OzoneConfigKeys {
public static final double
HDDS_DATANODE_STORAGE_UTILIZATION_CRITICAL_THRESHOLD_DEFAULT = 0.95;
+ public static final String HDDS_DATANODE_METADATA_ROCKSDB_CACHE_SIZE =
+ "hdds.datanode.metadata.rocksdb.cache.size";
+ public static final String
+ HDDS_DATANODE_METADATA_ROCKSDB_CACHE_SIZE_DEFAULT = "64MB";
+
public static final String OZONE_SECURITY_ENABLED_KEY =
"ozone.security.enabled";
public static final boolean OZONE_SECURITY_ENABLED_DEFAULT = false;
diff --git a/hadoop-hdds/common/src/main/resources/ozone-default.xml b/hadoop-hdds/common/src/main/resources/ozone-default.xml
index 485397819fcd..0f7c94913322 100644
--- a/hadoop-hdds/common/src/main/resources/ozone-default.xml
+++ b/hadoop-hdds/common/src/main/resources/ozone-default.xml
@@ -1278,6 +1278,16 @@
+
+ hdds.datanode.metadata.rocksdb.cache.size
+ 64MB
+ OZONE, DATANODE, MANAGEMENT
+
+ Size of the block metadata cache shared among RocksDB instances on each
+ datanode. All containers on a datanode will share this cache.
+
+
+
hdds.command.status.report.interval
30s
diff --git a/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/metadata/AbstractDatanodeStore.java b/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/metadata/AbstractDatanodeStore.java
index 6c258eda7cd0..efbc24730af7 100644
--- a/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/metadata/AbstractDatanodeStore.java
+++ b/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/metadata/AbstractDatanodeStore.java
@@ -17,28 +17,41 @@
*/
package org.apache.hadoop.ozone.container.metadata;
+import com.google.common.annotations.VisibleForTesting;
import org.apache.hadoop.hdds.StringUtils;
import org.apache.hadoop.hdds.annotation.InterfaceAudience;
import org.apache.hadoop.hdds.conf.ConfigurationSource;
+import org.apache.hadoop.hdds.conf.StorageUnit;
import org.apache.hadoop.hdds.utils.MetadataKeyFilters;
import org.apache.hadoop.hdds.utils.MetadataKeyFilters.KeyPrefixFilter;
import org.apache.hadoop.hdds.utils.db.*;
import org.apache.hadoop.ozone.container.common.helpers.BlockData;
import org.apache.hadoop.ozone.container.common.helpers.ChunkInfoList;
import org.apache.hadoop.ozone.container.common.interfaces.BlockIterator;
+import org.rocksdb.BlockBasedTableConfig;
+import org.rocksdb.BloomFilter;
+import org.rocksdb.ColumnFamilyOptions;
import org.rocksdb.DBOptions;
+import org.rocksdb.LRUCache;
+import org.rocksdb.RocksDB;
import org.rocksdb.Statistics;
import org.rocksdb.StatsLevel;
+import org.rocksdb.util.SizeUnit;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import java.io.Closeable;
import java.io.IOException;
+import java.util.Collections;
+import java.util.Map;
import java.util.NoSuchElementException;
+import java.util.concurrent.ConcurrentHashMap;
import static org.apache.hadoop.ozone.OzoneConfigKeys.OZONE_METADATA_STORE_ROCKSDB_STATISTICS;
import static org.apache.hadoop.ozone.OzoneConfigKeys.OZONE_METADATA_STORE_ROCKSDB_STATISTICS_DEFAULT;
import static org.apache.hadoop.ozone.OzoneConfigKeys.OZONE_METADATA_STORE_ROCKSDB_STATISTICS_OFF;
+import static org.apache.hadoop.ozone.OzoneConfigKeys.HDDS_DATANODE_METADATA_ROCKSDB_CACHE_SIZE;
+import static org.apache.hadoop.ozone.OzoneConfigKeys.HDDS_DATANODE_METADATA_ROCKSDB_CACHE_SIZE_DEFAULT;
/**
* Implementation of the {@link DatanodeStore} interface that contains
@@ -55,10 +68,15 @@ public abstract class AbstractDatanodeStore implements DatanodeStore {
private Table deletedBlocksTable;
private static final Logger LOG =
- LoggerFactory.getLogger(AbstractDatanodeStore.class);
+ LoggerFactory.getLogger(AbstractDatanodeStore.class);
private DBStore store;
private final AbstractDatanodeDBDefinition dbDef;
private final long containerID;
+ private final ColumnFamilyOptions cfOptions;
+
+ private static final DBProfile DEFAULT_PROFILE = DBProfile.DISK;
+ private static final Map
+ OPTIONS_CACHE = new ConcurrentHashMap<>();
/**
* Constructs the metadata store and starts the DB services.
@@ -67,8 +85,16 @@ public abstract class AbstractDatanodeStore implements DatanodeStore {
* @throws IOException - on Failure.
*/
protected AbstractDatanodeStore(ConfigurationSource config, long containerID,
- AbstractDatanodeDBDefinition dbDef)
- throws IOException {
+ AbstractDatanodeDBDefinition dbDef) throws IOException {
+
+ // The same config instance is used on each datanode, so we can share the
+ // corresponding column family options, providing a single shared cache
+ // for all containers on a datanode.
+ if (!OPTIONS_CACHE.containsKey(config)) {
+ OPTIONS_CACHE.put(config, buildColumnFamilyOptions(config));
+ }
+ cfOptions = OPTIONS_CACHE.get(config);
+
this.dbDef = dbDef;
this.containerID = containerID;
start(config);
@@ -76,9 +102,9 @@ protected AbstractDatanodeStore(ConfigurationSource config, long containerID,
@Override
public void start(ConfigurationSource config)
- throws IOException {
+ throws IOException {
if (this.store == null) {
- DBOptions options = new DBOptions();
+ DBOptions options = DEFAULT_PROFILE.getDBOptions();
options.setCreateIfMissing(true);
options.setCreateMissingColumnFamilies(true);
@@ -93,7 +119,8 @@ public void start(ConfigurationSource config)
}
this.store = DBStoreBuilder.newBuilder(config, dbDef)
- .setDBOption(options)
+ .setDBOptions(options)
+ .setDefaultCFOptions(cfOptions)
.build();
// Use the DatanodeTable wrapper to disable the table iterator on
@@ -179,6 +206,12 @@ public void compactDB() throws IOException {
store.compactDB();
}
+ @VisibleForTesting
+ public static Map
+ getColumnFamilyOptionsCache() {
+ return Collections.unmodifiableMap(OPTIONS_CACHE);
+ }
+
private static void checkTableStatus(Table, ?> table, String name)
throws IOException {
String logMessage = "Unable to get a reference to %s table. Cannot " +
@@ -191,6 +224,26 @@ private static void checkTableStatus(Table, ?> table, String name)
}
}
+ private static ColumnFamilyOptions buildColumnFamilyOptions(
+ ConfigurationSource config) {
+ long cacheSize = (long) config.getStorageSize(
+ HDDS_DATANODE_METADATA_ROCKSDB_CACHE_SIZE,
+ HDDS_DATANODE_METADATA_ROCKSDB_CACHE_SIZE_DEFAULT,
+ StorageUnit.BYTES);
+
+ // Enables static creation of RocksDB objects.
+ RocksDB.loadLibrary();
+
+ BlockBasedTableConfig tableConfig = new BlockBasedTableConfig();
+ tableConfig.setBlockCache(new LRUCache(cacheSize * SizeUnit.MB))
+ .setPinL0FilterAndIndexBlocksInCache(true)
+ .setFilterPolicy(new BloomFilter());
+
+ return DEFAULT_PROFILE
+ .getColumnFamilyOptions()
+ .setTableFormatConfig(tableConfig);
+ }
+
/**
* Block Iterator for KeyValue Container. This block iterator returns blocks
* which match with the {@link MetadataKeyFilters.KeyPrefixFilter}. If no
diff --git a/hadoop-hdds/container-service/src/test/java/org/apache/hadoop/ozone/container/keyvalue/TestKeyValueContainer.java b/hadoop-hdds/container-service/src/test/java/org/apache/hadoop/ozone/container/keyvalue/TestKeyValueContainer.java
index 79cf6a7e1ecc..c2b487be2933 100644
--- a/hadoop-hdds/container-service/src/test/java/org/apache/hadoop/ozone/container/keyvalue/TestKeyValueContainer.java
+++ b/hadoop-hdds/container-service/src/test/java/org/apache/hadoop/ozone/container/keyvalue/TestKeyValueContainer.java
@@ -20,6 +20,7 @@
import org.apache.hadoop.conf.StorageUnit;
import org.apache.hadoop.hdds.client.BlockID;
+import org.apache.hadoop.hdds.conf.ConfigurationSource;
import org.apache.hadoop.hdds.conf.OzoneConfiguration;
import org.apache.hadoop.hdds.protocol.datanode.proto.ContainerProtos;
@@ -37,6 +38,7 @@
import org.apache.hadoop.ozone.container.common.volume.VolumeSet;
import org.apache.hadoop.ozone.container.common.volume.MutableVolumeSet;
import org.apache.hadoop.ozone.container.keyvalue.helpers.BlockUtils;
+import org.apache.hadoop.ozone.container.metadata.AbstractDatanodeStore;
import org.apache.hadoop.test.GenericTestUtils;
import org.apache.hadoop.util.DiskChecker;
import org.apache.hadoop.ozone.container.common.utils.ReferenceCountedDB;
@@ -50,6 +52,7 @@
import org.junit.runner.RunWith;
import org.junit.runners.Parameterized;
import org.mockito.Mockito;
+import org.rocksdb.ColumnFamilyOptions;
import java.io.File;
@@ -79,7 +82,6 @@ public class TestKeyValueContainer {
@Rule
public TemporaryFolder folder = new TemporaryFolder();
- private OzoneConfiguration conf;
private String scmId = UUID.randomUUID().toString();
private VolumeSet volumeSet;
private RoundRobinVolumeChoosingPolicy volumeChoosingPolicy;
@@ -89,6 +91,11 @@ public class TestKeyValueContainer {
private final ChunkLayOutVersion layout;
+ // Use one configuration object across parameterized runs of tests.
+ // This preserves the column family options in the container options
+ // cache for testContainersShareColumnFamilyOptions.
+ private static final OzoneConfiguration CONF = new OzoneConfiguration();
+
public TestKeyValueContainer(ChunkLayOutVersion layout) {
this.layout = layout;
}
@@ -100,10 +107,9 @@ public static Iterable