diff --git a/hadoop-hdds/common/src/main/java/org/apache/hadoop/hdds/scm/ScmConfigKeys.java b/hadoop-hdds/common/src/main/java/org/apache/hadoop/hdds/scm/ScmConfigKeys.java index 2d8f8c06c851..db76fd0de4c4 100644 --- a/hadoop-hdds/common/src/main/java/org/apache/hadoop/hdds/scm/ScmConfigKeys.java +++ b/hadoop-hdds/common/src/main/java/org/apache/hadoop/hdds/scm/ScmConfigKeys.java @@ -143,6 +143,10 @@ public final class ScmConfigKeys { public static final String OZONE_CHUNK_READ_BUFFER_DEFAULT_SIZE_DEFAULT = "64KB"; + public static final String OZONE_CHUNK_LIST_INCREMENTAL = + "ozone.chunk.list.incremental"; + public static final boolean OZONE_CHUNK_LIST_INCREMENTAL_DEFAULT = false; + public static final String OZONE_SCM_CONTAINER_LAYOUT_KEY = "ozone.scm.container.layout"; diff --git a/hadoop-hdds/common/src/main/java/org/apache/hadoop/hdds/upgrade/HDDSLayoutFeature.java b/hadoop-hdds/common/src/main/java/org/apache/hadoop/hdds/upgrade/HDDSLayoutFeature.java index 0e54decb73c2..f4efe1790bf7 100644 --- a/hadoop-hdds/common/src/main/java/org/apache/hadoop/hdds/upgrade/HDDSLayoutFeature.java +++ b/hadoop-hdds/common/src/main/java/org/apache/hadoop/hdds/upgrade/HDDSLayoutFeature.java @@ -41,7 +41,9 @@ public enum HDDSLayoutFeature implements LayoutFeature { WEBUI_PORTS_IN_DATANODEDETAILS(6, "Adding HTTP and HTTPS ports " + "to DatanodeDetails."), HADOOP_PRC_PORTS_IN_DATANODEDETAILS(7, "Adding Hadoop RPC ports " + - "to DatanodeDetails."); + "to DatanodeDetails."), + HBASE_SUPPORT(8, "Datanode RocksDB Schema Version 3 has an extra table " + + "for the last chunk of blocks to support HBase.)"); ////////////////////////////// ////////////////////////////// diff --git a/hadoop-hdds/common/src/main/resources/ozone-default.xml b/hadoop-hdds/common/src/main/resources/ozone-default.xml index b8774f47b1f5..cb37a7b8b374 100644 --- a/hadoop-hdds/common/src/main/resources/ozone-default.xml +++ b/hadoop-hdds/common/src/main/resources/ozone-default.xml @@ -840,6 +840,17 @@ (ozone.client.bytes.per.checksum) corresponding to the chunk. + + ozone.chunk.list.incremental + false + OZONE, CLIENT, DATANODE, PERFORMANCE + + By default, a writer client sends full chunk list of a block when it + sends PutBlock requests. Changing this configuration to true will send + only incremental chunk list which reduces metadata overhead and improves + hsync performance. + + ozone.scm.container.layout FILE_PER_BLOCK diff --git a/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/keyvalue/impl/BlockManagerImpl.java b/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/keyvalue/impl/BlockManagerImpl.java index 38ca691ec121..b5dd97309ce2 100644 --- a/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/keyvalue/impl/BlockManagerImpl.java +++ b/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/keyvalue/impl/BlockManagerImpl.java @@ -27,6 +27,7 @@ import org.apache.hadoop.hdds.conf.StorageUnit; import org.apache.hadoop.hdds.scm.ScmConfigKeys; import org.apache.hadoop.hdds.scm.container.common.helpers.StorageContainerException; +import org.apache.hadoop.hdds.upgrade.HDDSLayoutFeature; import org.apache.hadoop.hdds.utils.db.BatchOperation; import org.apache.hadoop.hdds.utils.db.Table; import org.apache.hadoop.ozone.container.common.helpers.BlockData; @@ -36,11 +37,15 @@ import org.apache.hadoop.ozone.container.keyvalue.KeyValueContainerData; import org.apache.hadoop.ozone.container.keyvalue.helpers.BlockUtils; import org.apache.hadoop.ozone.container.keyvalue.interfaces.BlockManager; +import org.apache.hadoop.ozone.container.upgrade.VersionedDatanodeFeatures; import com.google.common.base.Preconditions; + import static org.apache.hadoop.hdds.protocol.datanode.proto.ContainerProtos.Result.BCSID_MISMATCH; -import static org.apache.hadoop.hdds.protocol.datanode.proto.ContainerProtos.Result.NO_SUCH_BLOCK; import static org.apache.hadoop.hdds.protocol.datanode.proto.ContainerProtos.Result.UNKNOWN_BCSID; +import static org.apache.hadoop.hdds.protocol.datanode.proto.ContainerProtos.Result.UNSUPPORTED_REQUEST; +import static org.apache.hadoop.hdds.scm.ScmConfigKeys.OZONE_CHUNK_LIST_INCREMENTAL; +import static org.apache.hadoop.hdds.scm.ScmConfigKeys.OZONE_CHUNK_LIST_INCREMENTAL_DEFAULT; import org.slf4j.Logger; import org.slf4j.LoggerFactory; @@ -56,8 +61,8 @@ public class BlockManagerImpl implements BlockManager { private ConfigurationSource config; private static final String DB_NULL_ERR_MSG = "DB cannot be null here"; - private static final String NO_SUCH_BLOCK_ERR_MSG = - "Unable to find the block."; + public static final String INCREMENTAL_CHUNK_LIST = "incremental"; + public static final String FULL_CHUNK = "full"; // Default Read Buffer capacity when Checksum is not present private final int defaultReadBufferCapacity; @@ -103,6 +108,15 @@ public long putBlock(Container container, BlockData data, public static long persistPutBlock(KeyValueContainer container, BlockData data, ConfigurationSource config, boolean endOfBlock) throws IOException { + boolean incrementalEnabled = + config.getBoolean(OZONE_CHUNK_LIST_INCREMENTAL, + OZONE_CHUNK_LIST_INCREMENTAL_DEFAULT); + if (incrementalEnabled && !VersionedDatanodeFeatures.isFinalized( + HDDSLayoutFeature.HBASE_SUPPORT)) { + throw new StorageContainerException("DataNode has not finalized " + + "upgrading to a version that supports incremental chunk list.", + UNSUPPORTED_REQUEST); + } Preconditions.checkNotNull(data, "BlockData cannot be null for put " + "operation."); Preconditions.checkState(data.getContainerID() >= 0, "Container Id " + @@ -145,7 +159,6 @@ public static long persistPutBlock(KeyValueContainer container, // update the blockData as well as BlockCommitSequenceId here try (BatchOperation batch = db.getStore().getBatchHandler() .initBatchOperation()) { - // If the block does not exist in the pendingPutBlockCache of the // container, then check the DB to ascertain if it exists or not. // If block exists in cache, blockCount should not be incremented. @@ -158,8 +171,8 @@ public static long persistPutBlock(KeyValueContainer container, } } - db.getStore().getBlockDataTable().putWithBatch( - batch, containerData.getBlockKey(localID), data); + db.getStore().putBlockByID(batch, incrementalEnabled, localID, data, + containerData, endOfBlock); if (bcsId != 0) { db.getStore().getMetadataTable().putWithBatch( batch, containerData.getBcsIdKey(), bcsId); @@ -354,14 +367,6 @@ public void shutdown() { private BlockData getBlockByID(DBHandle db, BlockID blockID, KeyValueContainerData containerData) throws IOException { - String blockKey = containerData.getBlockKey(blockID.getLocalID()); - - BlockData blockData = db.getStore().getBlockDataTable().get(blockKey); - if (blockData == null) { - throw new StorageContainerException(NO_SUCH_BLOCK_ERR_MSG + - " BlockID : " + blockID, NO_SUCH_BLOCK); - } - - return blockData; + return db.getStore().getBlockByID(blockID, containerData); } } diff --git a/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/metadata/AbstractDatanodeDBDefinition.java b/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/metadata/AbstractDatanodeDBDefinition.java index e1b10e6df571..fec2a3f7d220 100644 --- a/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/metadata/AbstractDatanodeDBDefinition.java +++ b/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/metadata/AbstractDatanodeDBDefinition.java @@ -77,4 +77,6 @@ public ConfigurationSource getConfig() { public DBColumnFamilyDefinition getFinalizeBlocksColumnFamily() { return null; } + public abstract DBColumnFamilyDefinition + getLastChunkInfoColumnFamily(); } diff --git a/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/metadata/AbstractDatanodeStore.java b/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/metadata/AbstractDatanodeStore.java index b949a191453a..36200d890aa9 100644 --- a/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/metadata/AbstractDatanodeStore.java +++ b/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/metadata/AbstractDatanodeStore.java @@ -63,6 +63,8 @@ public abstract class AbstractDatanodeStore implements DatanodeStore { private Table blockDataTable; + private Table lastChunkInfoTable; + private Table blockDataTableWithIterator; private Table deletedBlocksTable; @@ -186,6 +188,12 @@ public void start(ConfigurationSource config) finalizeBlocksTableWithIterator); checkTableStatus(finalizeBlocksTable, finalizeBlocksTable.getName()); } + + if (dbDef.getLastChunkInfoColumnFamily() != null) { + lastChunkInfoTable = new DatanodeTable<>( + dbDef.getLastChunkInfoColumnFamily().getTable(this.store)); + checkTableStatus(lastChunkInfoTable, lastChunkInfoTable.getName()); + } } } @@ -217,6 +225,11 @@ public Table getBlockDataTable() { return blockDataTable; } + @Override + public Table getLastChunkInfoTable() { + return lastChunkInfoTable; + } + @Override public Table getDeletedBlocksTable() { return deletedBlocksTable; diff --git a/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/metadata/DatanodeSchemaOneDBDefinition.java b/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/metadata/DatanodeSchemaOneDBDefinition.java index a002eef3f72a..d34edb3a48a7 100644 --- a/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/metadata/DatanodeSchemaOneDBDefinition.java +++ b/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/metadata/DatanodeSchemaOneDBDefinition.java @@ -102,6 +102,12 @@ public DBColumnFamilyDefinition getMetadataColumnFamily() { return DELETED_BLOCKS; } + @Override + public DBColumnFamilyDefinition + getLastChunkInfoColumnFamily() { + return null; + } + @Override public List> getColumnFamilies(String name) { return COLUMN_FAMILIES.get(name); diff --git a/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/metadata/DatanodeSchemaThreeDBDefinition.java b/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/metadata/DatanodeSchemaThreeDBDefinition.java index 87a283e45836..d5aeb45f602a 100644 --- a/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/metadata/DatanodeSchemaThreeDBDefinition.java +++ b/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/metadata/DatanodeSchemaThreeDBDefinition.java @@ -101,6 +101,15 @@ public class DatanodeSchemaThreeDBDefinition Long.class, LongCodec.get()); + public static final DBColumnFamilyDefinition + LAST_CHUNK_INFO = + new DBColumnFamilyDefinition<>( + "last_chunk_info", + String.class, + FixedLengthStringCodec.get(), + BlockData.class, + BlockData.getCodec()); + private static String separator = ""; private static final Map> @@ -109,8 +118,8 @@ public class DatanodeSchemaThreeDBDefinition METADATA, DELETED_BLOCKS, DELETE_TRANSACTION, - FINALIZE_BLOCKS); - + FINALIZE_BLOCKS, + LAST_CHUNK_INFO); public DatanodeSchemaThreeDBDefinition(String dbPath, ConfigurationSource config) { @@ -134,6 +143,7 @@ public DatanodeSchemaThreeDBDefinition(String dbPath, DELETED_BLOCKS.setCfOptions(cfOptions); DELETE_TRANSACTION.setCfOptions(cfOptions); FINALIZE_BLOCKS.setCfOptions(cfOptions); + LAST_CHUNK_INFO.setCfOptions(cfOptions); } @Override @@ -158,6 +168,12 @@ public DBColumnFamilyDefinition getMetadataColumnFamily() { return DELETED_BLOCKS; } + @Override + public DBColumnFamilyDefinition + getLastChunkInfoColumnFamily() { + return LAST_CHUNK_INFO; + } + public DBColumnFamilyDefinition getDeleteTransactionsColumnFamily() { return DELETE_TRANSACTION; diff --git a/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/metadata/DatanodeSchemaTwoDBDefinition.java b/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/metadata/DatanodeSchemaTwoDBDefinition.java index e0e491a9ea65..e23c96996c22 100644 --- a/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/metadata/DatanodeSchemaTwoDBDefinition.java +++ b/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/metadata/DatanodeSchemaTwoDBDefinition.java @@ -86,6 +86,15 @@ public class DatanodeSchemaTwoDBDefinition Long.class, LongCodec.get()); + public static final DBColumnFamilyDefinition + LAST_CHUNK_INFO = + new DBColumnFamilyDefinition<>( + "last_chunk_info", + String.class, + FixedLengthStringCodec.get(), + BlockData.class, + BlockData.getCodec()); + public DatanodeSchemaTwoDBDefinition(String dbPath, ConfigurationSource config) { super(dbPath, config); @@ -97,7 +106,8 @@ public DatanodeSchemaTwoDBDefinition(String dbPath, METADATA, DELETED_BLOCKS, DELETE_TRANSACTION, - FINALIZE_BLOCKS); + FINALIZE_BLOCKS, + LAST_CHUNK_INFO); @Override public Map> getMap() { @@ -121,6 +131,12 @@ public DBColumnFamilyDefinition getMetadataColumnFamily() { return DELETED_BLOCKS; } + @Override + public DBColumnFamilyDefinition + getLastChunkInfoColumnFamily() { + return LAST_CHUNK_INFO; + } + public DBColumnFamilyDefinition getDeleteTransactionsColumnFamily() { return DELETE_TRANSACTION; diff --git a/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/metadata/DatanodeStore.java b/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/metadata/DatanodeStore.java index 4ca81a03722e..4abfb60c4f4b 100644 --- a/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/metadata/DatanodeStore.java +++ b/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/metadata/DatanodeStore.java @@ -18,22 +18,31 @@ package org.apache.hadoop.ozone.container.metadata; import com.google.common.annotations.VisibleForTesting; + +import org.apache.hadoop.hdds.client.BlockID; import org.apache.hadoop.hdds.conf.ConfigurationSource; +import org.apache.hadoop.hdds.scm.container.common.helpers.StorageContainerException; import org.apache.hadoop.hdds.utils.MetadataKeyFilters.KeyPrefixFilter; +import org.apache.hadoop.hdds.utils.db.BatchOperation; import org.apache.hadoop.hdds.utils.db.BatchOperationHandler; import org.apache.hadoop.hdds.utils.db.DBStore; import org.apache.hadoop.hdds.utils.db.Table; import org.apache.hadoop.ozone.container.common.helpers.BlockData; import org.apache.hadoop.ozone.container.common.helpers.ChunkInfoList; import org.apache.hadoop.ozone.container.common.interfaces.BlockIterator; +import org.apache.hadoop.ozone.container.keyvalue.KeyValueContainerData; import java.io.Closeable; import java.io.IOException; +import static org.apache.hadoop.hdds.protocol.datanode.proto.ContainerProtos.Result.NO_SUCH_BLOCK; + /** * Interface for interacting with datanode databases. */ public interface DatanodeStore extends Closeable { + String NO_SUCH_BLOCK_ERR_MSG = + "Unable to find the block."; /** * Start datanode manager. @@ -84,6 +93,13 @@ public interface DatanodeStore extends Closeable { */ Table getFinalizeBlocksTable(); + /** + * A Table that keeps the metadata of the last chunk of blocks. + * + * @return Table + */ + Table getLastChunkInfoTable(); + /** * Helper to create and write batch transactions. */ @@ -112,4 +128,28 @@ BlockIterator getFinalizeBlockIterator(long containerID, default void compactionIfNeeded() throws Exception { } + + default BlockData getBlockByID(BlockID blockID, + KeyValueContainerData containerData) throws IOException { + String blockKey = containerData.getBlockKey(blockID.getLocalID()); + + // check block data table + BlockData blockData = getBlockDataTable().get(blockKey); + + if (blockData == null) { + throw new StorageContainerException( + NO_SUCH_BLOCK_ERR_MSG + " BlockID : " + blockID, NO_SUCH_BLOCK); + } + + return blockData; + } + + default void putBlockByID(BatchOperation batch, boolean incremental, + long localID, BlockData data, KeyValueContainerData containerData, + boolean endOfBlock) + throws IOException { + // old client: override chunk list. + getBlockDataTable().putWithBatch( + batch, containerData.getBlockKey(localID), data); + } }