Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -143,6 +143,10 @@ public final class ScmConfigKeys {
public static final String OZONE_CHUNK_READ_BUFFER_DEFAULT_SIZE_DEFAULT =
"64KB";

public static final String OZONE_CHUNK_LIST_INCREMENTAL =
"ozone.chunk.list.incremental";
public static final boolean OZONE_CHUNK_LIST_INCREMENTAL_DEFAULT = false;

public static final String OZONE_SCM_CONTAINER_LAYOUT_KEY =
"ozone.scm.container.layout";

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -41,7 +41,9 @@ public enum HDDSLayoutFeature implements LayoutFeature {
WEBUI_PORTS_IN_DATANODEDETAILS(6, "Adding HTTP and HTTPS ports " +
"to DatanodeDetails."),
HADOOP_PRC_PORTS_IN_DATANODEDETAILS(7, "Adding Hadoop RPC ports " +
"to DatanodeDetails.");
"to DatanodeDetails."),
HBASE_SUPPORT(8, "Datanode RocksDB Schema Version 3 has an extra table " +
"for the last chunk of blocks to support HBase.)");

////////////////////////////// //////////////////////////////

Expand Down
11 changes: 11 additions & 0 deletions hadoop-hdds/common/src/main/resources/ozone-default.xml
Original file line number Diff line number Diff line change
Expand Up @@ -840,6 +840,17 @@
(ozone.client.bytes.per.checksum) corresponding to the chunk.
</description>
</property>
<property>
<name>ozone.chunk.list.incremental</name>
<value>false</value>
<tag>OZONE, CLIENT, DATANODE, PERFORMANCE</tag>
<description>
By default, a writer client sends full chunk list of a block when it
sends PutBlock requests. Changing this configuration to true will send
only incremental chunk list which reduces metadata overhead and improves
hsync performance.
</description>
</property>
<property>
<name>ozone.scm.container.layout</name>
<value>FILE_PER_BLOCK</value>
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,7 @@
import org.apache.hadoop.hdds.conf.StorageUnit;
import org.apache.hadoop.hdds.scm.ScmConfigKeys;
import org.apache.hadoop.hdds.scm.container.common.helpers.StorageContainerException;
import org.apache.hadoop.hdds.upgrade.HDDSLayoutFeature;
import org.apache.hadoop.hdds.utils.db.BatchOperation;
import org.apache.hadoop.hdds.utils.db.Table;
import org.apache.hadoop.ozone.container.common.helpers.BlockData;
Expand All @@ -36,11 +37,15 @@
import org.apache.hadoop.ozone.container.keyvalue.KeyValueContainerData;
import org.apache.hadoop.ozone.container.keyvalue.helpers.BlockUtils;
import org.apache.hadoop.ozone.container.keyvalue.interfaces.BlockManager;
import org.apache.hadoop.ozone.container.upgrade.VersionedDatanodeFeatures;

import com.google.common.base.Preconditions;

import static org.apache.hadoop.hdds.protocol.datanode.proto.ContainerProtos.Result.BCSID_MISMATCH;
import static org.apache.hadoop.hdds.protocol.datanode.proto.ContainerProtos.Result.NO_SUCH_BLOCK;
import static org.apache.hadoop.hdds.protocol.datanode.proto.ContainerProtos.Result.UNKNOWN_BCSID;
import static org.apache.hadoop.hdds.protocol.datanode.proto.ContainerProtos.Result.UNSUPPORTED_REQUEST;
import static org.apache.hadoop.hdds.scm.ScmConfigKeys.OZONE_CHUNK_LIST_INCREMENTAL;
import static org.apache.hadoop.hdds.scm.ScmConfigKeys.OZONE_CHUNK_LIST_INCREMENTAL_DEFAULT;

import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
Expand All @@ -56,8 +61,8 @@ public class BlockManagerImpl implements BlockManager {
private ConfigurationSource config;

private static final String DB_NULL_ERR_MSG = "DB cannot be null here";
private static final String NO_SUCH_BLOCK_ERR_MSG =
"Unable to find the block.";
public static final String INCREMENTAL_CHUNK_LIST = "incremental";
public static final String FULL_CHUNK = "full";

// Default Read Buffer capacity when Checksum is not present
private final int defaultReadBufferCapacity;
Expand Down Expand Up @@ -103,6 +108,15 @@ public long putBlock(Container container, BlockData data,
public static long persistPutBlock(KeyValueContainer container,
BlockData data, ConfigurationSource config, boolean endOfBlock)
throws IOException {
boolean incrementalEnabled =
config.getBoolean(OZONE_CHUNK_LIST_INCREMENTAL,
Copy link
Contributor

@ChenSammi ChenSammi Jan 10, 2024

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The indent here looks like 8. It's expected to be 4. It's odd that there is no checkstyle warning.

OZONE_CHUNK_LIST_INCREMENTAL_DEFAULT);
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Getting OZONE_CHUNK_LIST_INCREMENTAL can move to BlockManagerImpl constructor to avoid reading in every putBlock call.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yeah I understand but this is a static method, and it uses less than 0.1% of CPU time

if (incrementalEnabled && !VersionedDatanodeFeatures.isFinalized(
HDDSLayoutFeature.HBASE_SUPPORT)) {
throw new StorageContainerException("DataNode has not finalized " +
"upgrading to a version that supports incremental chunk list.",
UNSUPPORTED_REQUEST);
}
Preconditions.checkNotNull(data, "BlockData cannot be null for put " +
"operation.");
Preconditions.checkState(data.getContainerID() >= 0, "Container Id " +
Expand Down Expand Up @@ -145,7 +159,6 @@ public static long persistPutBlock(KeyValueContainer container,
// update the blockData as well as BlockCommitSequenceId here
try (BatchOperation batch = db.getStore().getBatchHandler()
.initBatchOperation()) {

// If the block does not exist in the pendingPutBlockCache of the
// container, then check the DB to ascertain if it exists or not.
// If block exists in cache, blockCount should not be incremented.
Expand All @@ -158,8 +171,8 @@ public static long persistPutBlock(KeyValueContainer container,
}
}

db.getStore().getBlockDataTable().putWithBatch(
batch, containerData.getBlockKey(localID), data);
db.getStore().putBlockByID(batch, incrementalEnabled, localID, data,
containerData, endOfBlock);
if (bcsId != 0) {
db.getStore().getMetadataTable().putWithBatch(
batch, containerData.getBcsIdKey(), bcsId);
Expand Down Expand Up @@ -354,14 +367,6 @@ public void shutdown() {

private BlockData getBlockByID(DBHandle db, BlockID blockID,
KeyValueContainerData containerData) throws IOException {
String blockKey = containerData.getBlockKey(blockID.getLocalID());

BlockData blockData = db.getStore().getBlockDataTable().get(blockKey);
if (blockData == null) {
throw new StorageContainerException(NO_SUCH_BLOCK_ERR_MSG +
" BlockID : " + blockID, NO_SUCH_BLOCK);
}

return blockData;
return db.getStore().getBlockByID(blockID, containerData);
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -77,4 +77,6 @@ public ConfigurationSource getConfig() {
public DBColumnFamilyDefinition<String, Long> getFinalizeBlocksColumnFamily() {
return null;
}
public abstract DBColumnFamilyDefinition<String, BlockData>
getLastChunkInfoColumnFamily();
}
Original file line number Diff line number Diff line change
Expand Up @@ -63,6 +63,8 @@ public abstract class AbstractDatanodeStore implements DatanodeStore {

private Table<String, BlockData> blockDataTable;

private Table<String, BlockData> lastChunkInfoTable;

private Table<String, BlockData> blockDataTableWithIterator;

private Table<String, ChunkInfoList> deletedBlocksTable;
Expand Down Expand Up @@ -186,6 +188,12 @@ public void start(ConfigurationSource config)
finalizeBlocksTableWithIterator);
checkTableStatus(finalizeBlocksTable, finalizeBlocksTable.getName());
}

if (dbDef.getLastChunkInfoColumnFamily() != null) {
lastChunkInfoTable = new DatanodeTable<>(
dbDef.getLastChunkInfoColumnFamily().getTable(this.store));
checkTableStatus(lastChunkInfoTable, lastChunkInfoTable.getName());
}
}
}

Expand Down Expand Up @@ -217,6 +225,11 @@ public Table<String, BlockData> getBlockDataTable() {
return blockDataTable;
}

@Override
public Table<String, BlockData> getLastChunkInfoTable() {
return lastChunkInfoTable;
}

@Override
public Table<String, ChunkInfoList> getDeletedBlocksTable() {
return deletedBlocksTable;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -102,6 +102,12 @@ public DBColumnFamilyDefinition<String, Long> getMetadataColumnFamily() {
return DELETED_BLOCKS;
}

@Override
public DBColumnFamilyDefinition<String, BlockData>
getLastChunkInfoColumnFamily() {
return null;
}

@Override
public List<DBColumnFamilyDefinition<?, ?>> getColumnFamilies(String name) {
return COLUMN_FAMILIES.get(name);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -101,6 +101,15 @@ public class DatanodeSchemaThreeDBDefinition
Long.class,
LongCodec.get());

public static final DBColumnFamilyDefinition<String, BlockData>
LAST_CHUNK_INFO =
new DBColumnFamilyDefinition<>(
"last_chunk_info",
String.class,
FixedLengthStringCodec.get(),
BlockData.class,
BlockData.getCodec());

private static String separator = "";

private static final Map<String, DBColumnFamilyDefinition<?, ?>>
Expand All @@ -109,8 +118,8 @@ public class DatanodeSchemaThreeDBDefinition
METADATA,
DELETED_BLOCKS,
DELETE_TRANSACTION,
FINALIZE_BLOCKS);

FINALIZE_BLOCKS,
LAST_CHUNK_INFO);

public DatanodeSchemaThreeDBDefinition(String dbPath,
ConfigurationSource config) {
Expand All @@ -134,6 +143,7 @@ public DatanodeSchemaThreeDBDefinition(String dbPath,
DELETED_BLOCKS.setCfOptions(cfOptions);
DELETE_TRANSACTION.setCfOptions(cfOptions);
FINALIZE_BLOCKS.setCfOptions(cfOptions);
LAST_CHUNK_INFO.setCfOptions(cfOptions);
}

@Override
Expand All @@ -158,6 +168,12 @@ public DBColumnFamilyDefinition<String, Long> getMetadataColumnFamily() {
return DELETED_BLOCKS;
}

@Override
public DBColumnFamilyDefinition<String, BlockData>
getLastChunkInfoColumnFamily() {
return LAST_CHUNK_INFO;
}

public DBColumnFamilyDefinition<String, DeletedBlocksTransaction>
getDeleteTransactionsColumnFamily() {
return DELETE_TRANSACTION;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -86,6 +86,15 @@ public class DatanodeSchemaTwoDBDefinition
Long.class,
LongCodec.get());

public static final DBColumnFamilyDefinition<String, BlockData>
LAST_CHUNK_INFO =
new DBColumnFamilyDefinition<>(
"last_chunk_info",
String.class,
FixedLengthStringCodec.get(),
BlockData.class,
BlockData.getCodec());

public DatanodeSchemaTwoDBDefinition(String dbPath,
ConfigurationSource config) {
super(dbPath, config);
Expand All @@ -97,7 +106,8 @@ public DatanodeSchemaTwoDBDefinition(String dbPath,
METADATA,
DELETED_BLOCKS,
DELETE_TRANSACTION,
FINALIZE_BLOCKS);
FINALIZE_BLOCKS,
LAST_CHUNK_INFO);

@Override
public Map<String, DBColumnFamilyDefinition<?, ?>> getMap() {
Expand All @@ -121,6 +131,12 @@ public DBColumnFamilyDefinition<String, Long> getMetadataColumnFamily() {
return DELETED_BLOCKS;
}

@Override
public DBColumnFamilyDefinition<String, BlockData>
getLastChunkInfoColumnFamily() {
return LAST_CHUNK_INFO;
}

public DBColumnFamilyDefinition<Long, DeletedBlocksTransaction>
getDeleteTransactionsColumnFamily() {
return DELETE_TRANSACTION;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -18,22 +18,31 @@
package org.apache.hadoop.ozone.container.metadata;

import com.google.common.annotations.VisibleForTesting;

import org.apache.hadoop.hdds.client.BlockID;
import org.apache.hadoop.hdds.conf.ConfigurationSource;
import org.apache.hadoop.hdds.scm.container.common.helpers.StorageContainerException;
import org.apache.hadoop.hdds.utils.MetadataKeyFilters.KeyPrefixFilter;
import org.apache.hadoop.hdds.utils.db.BatchOperation;
import org.apache.hadoop.hdds.utils.db.BatchOperationHandler;
import org.apache.hadoop.hdds.utils.db.DBStore;
import org.apache.hadoop.hdds.utils.db.Table;
import org.apache.hadoop.ozone.container.common.helpers.BlockData;
import org.apache.hadoop.ozone.container.common.helpers.ChunkInfoList;
import org.apache.hadoop.ozone.container.common.interfaces.BlockIterator;
import org.apache.hadoop.ozone.container.keyvalue.KeyValueContainerData;

import java.io.Closeable;
import java.io.IOException;

import static org.apache.hadoop.hdds.protocol.datanode.proto.ContainerProtos.Result.NO_SUCH_BLOCK;

/**
* Interface for interacting with datanode databases.
*/
public interface DatanodeStore extends Closeable {
String NO_SUCH_BLOCK_ERR_MSG =
"Unable to find the block.";

/**
* Start datanode manager.
Expand Down Expand Up @@ -84,6 +93,13 @@ public interface DatanodeStore extends Closeable {
*/
Table<String, Long> getFinalizeBlocksTable();

/**
* A Table that keeps the metadata of the last chunk of blocks.
*
* @return Table
*/
Table<String, BlockData> getLastChunkInfoTable();

/**
* Helper to create and write batch transactions.
*/
Expand Down Expand Up @@ -112,4 +128,28 @@ BlockIterator<Long> getFinalizeBlockIterator(long containerID,

default void compactionIfNeeded() throws Exception {
}

default BlockData getBlockByID(BlockID blockID,
KeyValueContainerData containerData) throws IOException {
String blockKey = containerData.getBlockKey(blockID.getLocalID());

// check block data table
BlockData blockData = getBlockDataTable().get(blockKey);

if (blockData == null) {
throw new StorageContainerException(
NO_SUCH_BLOCK_ERR_MSG + " BlockID : " + blockID, NO_SUCH_BLOCK);
}

return blockData;
}

default void putBlockByID(BatchOperation batch, boolean incremental,
long localID, BlockData data, KeyValueContainerData containerData,
boolean endOfBlock)
throws IOException {
// old client: override chunk list.
getBlockDataTable().putWithBatch(
batch, containerData.getBlockKey(localID), data);
}
}