Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
28 commits
Select commit Hold shift + click to select a range
c1d94aa
HDDS-12926. remove *.tmp.* exclusion in DU
sumitagrawl May 20, 2025
0eabb6e
Merge branch 'apache:master' into master
sumitagrawl May 21, 2025
d39a809
Merge branch 'apache:master' into master
sumitagrawl Jun 4, 2025
44e2cb8
HDDS-13176. containerIds table value format change to proto from string
sumitagrawl Jun 9, 2025
15f8143
fix checkstyle
sumitagrawl Jun 9, 2025
9e5522a
fix test failure
sumitagrawl Jun 9, 2025
99b6ef3
fix copilot comment
sumitagrawl Jun 9, 2025
82eeb0d
add test cases
sumitagrawl Jun 10, 2025
1f39476
fix checkstyle
sumitagrawl Jun 10, 2025
bd5f494
fix review comment
sumitagrawl Jun 11, 2025
f89fc31
fix checkstyle
sumitagrawl Jun 11, 2025
ec8e08f
review comment fix
sumitagrawl Jun 13, 2025
fc5ba25
fix review comments
sumitagrawl Jun 13, 2025
b3c3c12
fix review comment
sumitagrawl Jun 13, 2025
6a1d9b2
fix review comment
sumitagrawl Jun 13, 2025
2f8040c
Merge branch 'apache:master' into master
sumitagrawl Jun 16, 2025
788aad7
Merge branch 'apache:master' into master
sumitagrawl Jun 23, 2025
1052e97
Merge remote-tracking branch 'origin/master' into HDDS-13176
sumitagrawl Jun 23, 2025
6e22964
fix review comments
sumitagrawl Jun 23, 2025
2850588
Merge branch 'apache:master' into master
sumitagrawl Jun 26, 2025
ec08da8
Merge remote-tracking branch 'origin/master' into HDDS-13176
sumitagrawl Jun 26, 2025
fa05a56
fix failure after merge
sumitagrawl Jun 26, 2025
d14ae78
Merge branch 'apache:master' into master
sumitagrawl Jul 10, 2025
8b28beb
Merge remote-tracking branch 'origin/master' into HDDS-13176
sumitagrawl Jul 10, 2025
073b74b
checkstyle fix
sumitagrawl Jul 11, 2025
b4dc071
Merge branch 'apache:master' into master
sumitagrawl Jul 14, 2025
c1ff360
Merge remote-tracking branch 'origin/master' into HDDS-13176
sumitagrawl Jul 14, 2025
9af70bd
review fix
sumitagrawl Jul 22, 2025
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -41,7 +41,8 @@ public enum HDDSLayoutFeature implements LayoutFeature {
HADOOP_PRC_PORTS_IN_DATANODEDETAILS(7, "Adding Hadoop RPC ports " +
"to DatanodeDetails."),
HBASE_SUPPORT(8, "Datanode RocksDB Schema Version 3 has an extra table " +
"for the last chunk of blocks to support HBase.)");
"for the last chunk of blocks to support HBase.)"),
WITNESSED_CONTAINER_DB_PROTO_VALUE(9, "ContainerID table schema to use value type as proto");

////////////////////////////// //////////////////////////////

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,7 @@
import com.google.common.base.Preconditions;
import com.google.common.collect.ImmutableMap;
import com.google.protobuf.Message;
import jakarta.annotation.Nullable;
import java.io.IOException;
import java.time.Clock;
import java.util.ArrayList;
Expand All @@ -43,11 +44,12 @@
import org.apache.hadoop.hdds.protocol.proto.StorageContainerDatanodeProtocolProtos.ContainerReportsProto;
import org.apache.hadoop.hdds.scm.container.ContainerID;
import org.apache.hadoop.hdds.scm.container.common.helpers.StorageContainerException;
import org.apache.hadoop.hdds.utils.db.Table;
import org.apache.hadoop.ozone.container.common.interfaces.Container;
import org.apache.hadoop.ozone.container.common.statemachine.StateContext;
import org.apache.hadoop.ozone.container.common.utils.ContainerLogger;
import org.apache.hadoop.ozone.container.common.volume.HddsVolume;
import org.apache.hadoop.ozone.container.metadata.ContainerCreateInfo;
import org.apache.hadoop.ozone.container.metadata.WitnessedContainerMetadataStore;
import org.apache.hadoop.ozone.container.ozoneimpl.OnDemandContainerScanner;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
Expand All @@ -67,26 +69,28 @@ public class ContainerSet implements Iterable<Container<?>> {
new ConcurrentSkipListMap<>();
private final Clock clock;
private long recoveringTimeout;
private final Table<ContainerID, String> containerIdsTable;
@Nullable
private final WitnessedContainerMetadataStore containerMetadataStore;
// Handler that will be invoked when a scan of a container in this set is requested.
private OnDemandContainerScanner containerScanner;

public static ContainerSet newReadOnlyContainerSet(long recoveringTimeout) {
return new ContainerSet(null, recoveringTimeout);
}

public static ContainerSet newRwContainerSet(Table<ContainerID, String> containerIdsTable, long recoveringTimeout) {
Objects.requireNonNull(containerIdsTable, "containerIdsTable == null");
return new ContainerSet(containerIdsTable, recoveringTimeout);
public static ContainerSet newRwContainerSet(
WitnessedContainerMetadataStore metadataStore, long recoveringTimeout) {
Objects.requireNonNull(metadataStore, "WitnessedContainerMetadataStore == null");
return new ContainerSet(metadataStore, recoveringTimeout);
}

private ContainerSet(Table<ContainerID, String> continerIdsTable, long recoveringTimeout) {
this(continerIdsTable, recoveringTimeout, null);
private ContainerSet(WitnessedContainerMetadataStore containerMetadataStore, long recoveringTimeout) {
this(containerMetadataStore, recoveringTimeout, null);
}

ContainerSet(Table<ContainerID, String> continerIdsTable, long recoveringTimeout, Clock clock) {
ContainerSet(WitnessedContainerMetadataStore containerMetadataStore, long recoveringTimeout, Clock clock) {
this.clock = clock != null ? clock : Clock.systemUTC();
this.containerIdsTable = continerIdsTable;
this.containerMetadataStore = containerMetadataStore;
this.recoveringTimeout = recoveringTimeout;
}

Expand Down Expand Up @@ -188,13 +192,7 @@ private boolean addContainer(Container<?> container, boolean overwrite) throws
LOG.debug("Container with container Id {} is added to containerMap",
containerId);
}
try {
if (containerIdsTable != null) {
containerIdsTable.put(ContainerID.valueOf(containerId), containerState.toString());
}
} catch (IOException e) {
throw new StorageContainerException(e, ContainerProtos.Result.IO_EXCEPTION);
}
updateContainerIdTable(containerId, containerState);
missingContainerSet.remove(containerId);
if (container.getContainerData().getState() == RECOVERING) {
recoveringContainerMap.put(
Expand All @@ -209,6 +207,17 @@ private boolean addContainer(Container<?> container, boolean overwrite) throws
}
}

private void updateContainerIdTable(long containerId, State containerState) throws StorageContainerException {
if (null != containerMetadataStore) {
try {
containerMetadataStore.getContainerCreateInfoTable().put(ContainerID.valueOf(containerId),
ContainerCreateInfo.valueOf(containerState));
} catch (IOException e) {
throw new StorageContainerException(e, ContainerProtos.Result.IO_EXCEPTION);
}
}
}

/**
* Returns the Container with specified containerId.
* @param containerId ID of the container to get
Expand Down Expand Up @@ -270,13 +279,7 @@ private boolean removeContainer(long containerId, boolean markMissing, boolean r
}
Container<?> removed = containerMap.remove(containerId);
if (removeFromDB) {
try {
if (containerIdsTable != null) {
containerIdsTable.delete(ContainerID.valueOf(containerId));
}
} catch (IOException e) {
throw new StorageContainerException(e, ContainerProtos.Result.IO_EXCEPTION);
}
deleteFromContainerTable(containerId);
}
if (removed == null) {
LOG.debug("Container with containerId {} is not present in " +
Expand All @@ -289,6 +292,16 @@ private boolean removeContainer(long containerId, boolean markMissing, boolean r
}
}

private void deleteFromContainerTable(long containerId) throws StorageContainerException {
if (null != containerMetadataStore) {
try {
containerMetadataStore.getContainerCreateInfoTable().delete(ContainerID.valueOf(containerId));
} catch (IOException e) {
throw new StorageContainerException(e, ContainerProtos.Result.IO_EXCEPTION);
}
}
}

/**
* Removes the Recovering Container matching with specified containerId.
* @param containerId ID of the container to remove.
Expand Down Expand Up @@ -503,10 +516,6 @@ public Set<Long> getMissingContainerSet() {
return missingContainerSet;
}

public Table<ContainerID, String> getContainerIdsTable() {
return containerIdsTable;
}

/**
* Builds the missing container set by taking a diff between total no
* containers actually found and number of containers which actually
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,76 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

package org.apache.hadoop.ozone.container.metadata;

import java.util.function.Supplier;
import net.jcip.annotations.Immutable;
import org.apache.hadoop.hdds.protocol.datanode.proto.ContainerProtos;
import org.apache.hadoop.hdds.utils.db.Codec;
import org.apache.hadoop.hdds.utils.db.DelegatedCodec;
import org.apache.hadoop.hdds.utils.db.Proto3Codec;
import org.apache.ratis.util.MemoizedSupplier;

/**
* ContainerCreateInfo is a class that holds information about the state and other information on creation
* This class is immutable.
*/
@Immutable
public final class ContainerCreateInfo {
private static final Codec<ContainerCreateInfo> CODEC = new DelegatedCodec<>(
Proto3Codec.get(ContainerProtos.ContainerCreateInfo.getDefaultInstance()),
ContainerCreateInfo::getFromProtobuf, ContainerCreateInfo::getProtobuf,
ContainerCreateInfo.class);

private final ContainerProtos.ContainerDataProto.State state;
private final Supplier<ContainerProtos.ContainerCreateInfo> proto;

public static Codec<ContainerCreateInfo> getCodec() {
return CODEC;
}

public static Codec<ContainerCreateInfo> getNewCodec() {
return CODEC;
}

private ContainerCreateInfo(ContainerProtos.ContainerDataProto.State state) {
this.state = state;
this.proto = MemoizedSupplier.valueOf(
() -> ContainerProtos.ContainerCreateInfo.newBuilder().setState(state).build());
}

/**
* Factory method for creation of ContainerCreateInfo.
* @param state State
* @return ContainerCreateInfo.
*/
public static ContainerCreateInfo valueOf(final ContainerProtos.ContainerDataProto.State state) {
return new ContainerCreateInfo(state);
}

public ContainerProtos.ContainerCreateInfo getProtobuf() {
return proto.get();
}

public static ContainerCreateInfo getFromProtobuf(ContainerProtos.ContainerCreateInfo proto) {
return ContainerCreateInfo.valueOf(proto.getState());
}

public ContainerProtos.ContainerDataProto.State getState() {
return state;
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -22,25 +22,24 @@
import org.apache.hadoop.hdds.scm.container.ContainerID;
import org.apache.hadoop.hdds.utils.db.DBColumnFamilyDefinition;
import org.apache.hadoop.hdds.utils.db.DBDefinition;
import org.apache.hadoop.hdds.utils.db.StringCodec;
import org.apache.hadoop.ozone.OzoneConsts;

/**
* Class for defining the schema for master volume in a datanode.
*/
public final class WitnessedContainerDBDefinition extends DBDefinition.WithMap {

private static final String CONTAINER_IDS_TABLE_NAME = "containerIds";
private static final String CONTAINER_CREATE_INFO_TABLE_NAME = "ContainerCreateInfoTable";

public static final DBColumnFamilyDefinition<ContainerID, String>
CONTAINER_IDS_TABLE = new DBColumnFamilyDefinition<>(
CONTAINER_IDS_TABLE_NAME,
public static final DBColumnFamilyDefinition<ContainerID, ContainerCreateInfo>
CONTAINER_CREATE_INFO_TABLE_DEF = new DBColumnFamilyDefinition<>(
CONTAINER_CREATE_INFO_TABLE_NAME,
ContainerID.getCodec(),
StringCodec.get());
ContainerCreateInfo.getCodec());

private static final Map<String, DBColumnFamilyDefinition<?, ?>>
COLUMN_FAMILIES = DBColumnFamilyDefinition.newUnmodifiableMap(
CONTAINER_IDS_TABLE);
CONTAINER_CREATE_INFO_TABLE_DEF);

private static final WitnessedContainerDBDefinition INSTANCE = new WitnessedContainerDBDefinition();

Expand All @@ -62,7 +61,7 @@ public String getLocationConfigKey() {
return ScmConfigKeys.OZONE_SCM_DATANODE_ID_DIR;
}

DBColumnFamilyDefinition<ContainerID, String> getContainerIdsTable() {
return CONTAINER_IDS_TABLE;
DBColumnFamilyDefinition<ContainerID, ContainerCreateInfo> getContainerCreateInfoTableDef() {
return CONTAINER_CREATE_INFO_TABLE_DEF;
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -29,5 +29,5 @@ public interface WitnessedContainerMetadataStore extends DBStoreManager {
*
* @return Table
*/
Table<ContainerID, String> getContainerIdsTable();
Table<ContainerID, ContainerCreateInfo> getContainerCreateInfoTable();
}
Original file line number Diff line number Diff line change
Expand Up @@ -22,21 +22,28 @@
import java.util.concurrent.ConcurrentHashMap;
import java.util.concurrent.ConcurrentMap;
import org.apache.hadoop.hdds.conf.ConfigurationSource;
import org.apache.hadoop.hdds.protocol.datanode.proto.ContainerProtos;
import org.apache.hadoop.hdds.scm.container.ContainerID;
import org.apache.hadoop.hdds.upgrade.HDDSLayoutFeature;
import org.apache.hadoop.hdds.utils.db.CodecException;
import org.apache.hadoop.hdds.utils.db.DBStore;
import org.apache.hadoop.hdds.utils.db.DBStoreBuilder;
import org.apache.hadoop.hdds.utils.db.DelegatedCodec;
import org.apache.hadoop.hdds.utils.db.RocksDatabaseException;
import org.apache.hadoop.hdds.utils.db.StringCodec;
import org.apache.hadoop.hdds.utils.db.Table;
import org.apache.hadoop.hdds.utils.db.managed.ManagedDBOptions;
import org.apache.hadoop.ozone.container.upgrade.VersionedDatanodeFeatures;

/**
* Class for interacting with database in the master volume of a datanode.
*/
public final class WitnessedContainerMetadataStoreImpl extends AbstractRDBStore<WitnessedContainerDBDefinition>
implements WitnessedContainerMetadataStore {

private Table<ContainerID, String> containerIdsTable;
private Table<ContainerID, ContainerCreateInfo> containerCreateInfoTable;
private PreviousVersionTables previousVersionTables;

private static final ConcurrentMap<String, WitnessedContainerMetadataStore> INSTANCES =
new ConcurrentHashMap<>();

Expand Down Expand Up @@ -67,13 +74,53 @@ private WitnessedContainerMetadataStoreImpl(ConfigurationSource config, boolean
@Override
protected DBStore initDBStore(DBStoreBuilder dbStoreBuilder, ManagedDBOptions options, ConfigurationSource config)
throws RocksDatabaseException, CodecException {
previousVersionTables = new PreviousVersionTables();
previousVersionTables.addTables(dbStoreBuilder);
final DBStore dbStore = dbStoreBuilder.build();
this.containerIdsTable = this.getDbDef().getContainerIdsTable().getTable(dbStore);
previousVersionTables.init(dbStore);
this.containerCreateInfoTable = this.getDbDef().getContainerCreateInfoTableDef().getTable(dbStore);
return dbStore;
}

@Override
public Table<ContainerID, String> getContainerIdsTable() {
return containerIdsTable;
public Table<ContainerID, ContainerCreateInfo> getContainerCreateInfoTable() {
if (!VersionedDatanodeFeatures.isFinalized(HDDSLayoutFeature.WITNESSED_CONTAINER_DB_PROTO_VALUE)) {
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Let us create a wrapper Table implementation which would automatically get updated to the finalized table once the layout upgrade is finalized. We can return the wrapped Table interface which would mean the caller of this method can keep this value in memory and we wouldn't have to update anything on the caller behaviour. Right now the caller is having to call witnessedMetadataStore.getContainerCreateInfoTable().put or delete everytime we can completely get this abstracted out.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

We could have this Wrapped object present as member in the class itself. So on finalize all we need to update is a boolean value for the present in the wrapped table.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This can lead to bugs if called incorrectly. It is important that we abstract this out.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Created above JIRA for reference, I do not agree for caching table, as it need re-update after upgrade or other cases in future to update the reference.

It can be more discussed if have real value.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

We have multiple instances in the code base where we cache the reference to the table. The expectation from what I understand is that once the metadata store is initialized the caller is free to cache the reference right now this model doesn't prevent that. We should either come up with a model which prevents caching the reference altogether which would mean redesigning the interface or we need to make the implementation abstracted so that it is fool proof to reference caching.

Copy link
Contributor

@swamirishi swamirishi Jul 22, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

A few instances...

MinHeapIterator(OMMetadataManager omMetadataManager, String prefixKey,
String startKey, String volumeName, String bucketName,
Table... tables) throws IOException {

private <VALUE> void recalculateUsages(
Table<String, VALUE> table, Map<String, CountPair> prefixUsageMap,
String strType, boolean haveValue) throws UncheckedIOException,
UncheckedExecutionException {

public PipelineStateManagerImpl(
Table<PipelineID, Pipeline> pipelineStore, NodeManager nodeManager,
DBTransactionBuffer buffer) throws IOException {
this.pipelineStateMap = new PipelineStateMap();
this.nodeManager = nodeManager;
this.pipelineStore = pipelineStore;

Our entire code base is spread with such use cases.
If you are proposing something different then we should design the interface to prevent future bugs.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

It can be taken up with the JIRA as mentioned above for designing and defining interface

return previousVersionTables.getContainerIdsTable();
}
return containerCreateInfoTable;
}

public PreviousVersionTables getPreviousVersionTables() {
return previousVersionTables;
}

/**
* this will hold old version tables required during upgrade, and these are initialized based on version only.
*/
public static class PreviousVersionTables {
private static final String CONTAINER_IDS_STR_VAL_TABLE = "containerIds";
private Table<ContainerID, ContainerCreateInfo> containerIdsTable;

public PreviousVersionTables() {
}

public void addTables(DBStoreBuilder dbStoreBuilder) {
if (!VersionedDatanodeFeatures.isFinalized(HDDSLayoutFeature.WITNESSED_CONTAINER_DB_PROTO_VALUE)) {
dbStoreBuilder.addTable(CONTAINER_IDS_STR_VAL_TABLE);
}
}

public void init(DBStore dbStore) throws RocksDatabaseException, CodecException {
if (!VersionedDatanodeFeatures.isFinalized(HDDSLayoutFeature.WITNESSED_CONTAINER_DB_PROTO_VALUE)) {
this.containerIdsTable = dbStore.getTable(CONTAINER_IDS_STR_VAL_TABLE, ContainerID.getCodec(),
new DelegatedCodec<>(StringCodec.get(),
(strVal) -> ContainerCreateInfo.valueOf(ContainerProtos.ContainerDataProto.State.valueOf(strVal)),
(obj) -> obj.getState().name(), ContainerCreateInfo.class));
}
}

public Table<ContainerID, ContainerCreateInfo> getContainerIdsTable() {
return containerIdsTable;
}
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -197,8 +197,7 @@ public OzoneContainer(HddsDatanodeService hddsDatanodeService,
OZONE_RECOVERING_CONTAINER_TIMEOUT,
OZONE_RECOVERING_CONTAINER_TIMEOUT_DEFAULT, TimeUnit.MILLISECONDS);
this.witnessedContainerMetadataStore = WitnessedContainerMetadataStoreImpl.get(conf);
containerSet = ContainerSet.newRwContainerSet(witnessedContainerMetadataStore.getContainerIdsTable(),
recoveringContainerTimeout);
containerSet = ContainerSet.newRwContainerSet(witnessedContainerMetadataStore, recoveringContainerTimeout);
volumeSet.setGatherContainerUsages(this::gatherContainerUsages);
metadataScanner = null;

Expand Down Expand Up @@ -351,7 +350,8 @@ public void buildContainerSet() throws IOException {
for (Thread volumeThread : volumeThreads) {
volumeThread.join();
}
try (TableIterator<ContainerID, ContainerID> itr = containerSet.getContainerIdsTable().keyIterator()) {
try (TableIterator<ContainerID, ContainerID> itr
= getWitnessedContainerMetadataStore().getContainerCreateInfoTable().keyIterator()) {
final Map<ContainerID, Long> containerIds = new HashMap<>();
while (itr.hasNext()) {
containerIds.put(itr.next(), 0L);
Expand Down Expand Up @@ -686,4 +686,7 @@ public void compactDb() {
}
}

public WitnessedContainerMetadataStore getWitnessedContainerMetadataStore() {
return witnessedContainerMetadataStore;
}
}
Loading