-
Notifications
You must be signed in to change notification settings - Fork 588
HDDS-11650. ContainerId list to track all containers created in a datanode #7402
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from all commits
4d2ea78
87a4809
5603b56
c048a5e
f4c538f
f22f0d1
c94734a
e579d0e
2c376bc
4b77481
5027029
c5392d0
7c4837a
1ae494b
bdc2e50
06ca347
8f98ab9
7d7f078
108bf82
5b3d27a
af0f757
b97d874
09b2dfe
082cfc9
3766bc3
46ee375
564ae17
af144a2
79e5de8
b0ffe5d
7a0e341
730d75e
5446f4a
9cbc45f
261f8fc
4320d50
ac3918c
3d9431a
50b27bf
827bc86
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -23,8 +23,12 @@ | |
| import com.google.common.collect.ImmutableMap; | ||
| import com.google.protobuf.Message; | ||
| import org.apache.hadoop.hdds.protocol.datanode.proto.ContainerProtos; | ||
| import org.apache.hadoop.hdds.protocol.datanode.proto.ContainerProtos.ContainerDataProto.State; | ||
|
|
||
| import org.apache.hadoop.hdds.protocol.proto.StorageContainerDatanodeProtocolProtos.ContainerReportsProto; | ||
| import org.apache.hadoop.hdds.scm.container.common.helpers.StorageContainerException; | ||
| import org.apache.hadoop.hdds.utils.db.InMemoryTestTable; | ||
| import org.apache.hadoop.hdds.utils.db.Table; | ||
| import org.apache.hadoop.ozone.container.common.interfaces.Container; | ||
| import org.apache.hadoop.ozone.container.common.statemachine.StateContext; | ||
| import org.apache.hadoop.ozone.container.common.utils.ContainerLogger; | ||
|
|
@@ -65,10 +69,24 @@ public class ContainerSet implements Iterable<Container<?>> { | |
| new ConcurrentSkipListMap<>(); | ||
| private Clock clock; | ||
| private long recoveringTimeout; | ||
| private final Table<Long, String> containerIdsTable; | ||
|
|
||
| @VisibleForTesting | ||
| public ContainerSet(long recoveringTimeout) { | ||
| this(new InMemoryTestTable<>(), recoveringTimeout); | ||
| } | ||
|
|
||
| public ContainerSet(Table<Long, String> continerIdsTable, long recoveringTimeout) { | ||
| this(continerIdsTable, recoveringTimeout, false); | ||
| } | ||
|
|
||
| public ContainerSet(Table<Long, String> continerIdsTable, long recoveringTimeout, boolean readOnly) { | ||
| this.clock = Clock.system(ZoneOffset.UTC); | ||
| this.containerIdsTable = continerIdsTable; | ||
| this.recoveringTimeout = recoveringTimeout; | ||
| if (!readOnly && containerIdsTable == null) { | ||
| throw new IllegalArgumentException("Container table cannot be null when container set is not read only"); | ||
| } | ||
| } | ||
|
|
||
| public long getCurrentTime() { | ||
|
|
@@ -85,22 +103,64 @@ public void setRecoveringTimeout(long recoveringTimeout) { | |
| this.recoveringTimeout = recoveringTimeout; | ||
| } | ||
|
|
||
| /** | ||
| * Add Container to container map. This would fail if the container is already present or has been marked as missing. | ||
| * @param container container to be added | ||
| * @return If container is added to containerMap returns true, otherwise | ||
| * false | ||
| */ | ||
| public boolean addContainer(Container<?> container) throws StorageContainerException { | ||
| return addContainer(container, false); | ||
| } | ||
|
|
||
| /** | ||
| * Add Container to container map. This would overwrite the container even if it is missing. But would fail if the | ||
| * container is already present. | ||
| * @param container container to be added | ||
| * @return If container is added to containerMap returns true, otherwise | ||
| * false | ||
| */ | ||
| public boolean addContainerByOverwriteMissingContainer(Container<?> container) throws StorageContainerException { | ||
| return addContainer(container, true); | ||
| } | ||
|
|
||
| public void ensureContainerNotMissing(long containerId, State state) throws StorageContainerException { | ||
| if (missingContainerSet.contains(containerId)) { | ||
| throw new StorageContainerException(String.format("Container with container Id %d with state : %s is missing in" + | ||
| " the DN.", containerId, state), | ||
| ContainerProtos.Result.CONTAINER_MISSING); | ||
| } | ||
| } | ||
|
|
||
| /** | ||
| * Add Container to container map. | ||
| * @param container container to be added | ||
| * @param overwrite if true should overwrite the container if the container was missing. | ||
| * @return If container is added to containerMap returns true, otherwise | ||
| * false | ||
| */ | ||
| public boolean addContainer(Container<?> container) throws | ||
| private boolean addContainer(Container<?> container, boolean overwrite) throws | ||
| StorageContainerException { | ||
| Preconditions.checkNotNull(container, "container cannot be null"); | ||
|
|
||
| long containerId = container.getContainerData().getContainerID(); | ||
| State containerState = container.getContainerData().getState(); | ||
| if (!overwrite) { | ||
| ensureContainerNotMissing(containerId, containerState); | ||
| } | ||
| if (containerMap.putIfAbsent(containerId, container) == null) { | ||
| if (LOG.isDebugEnabled()) { | ||
| LOG.debug("Container with container Id {} is added to containerMap", | ||
| containerId); | ||
| } | ||
| try { | ||
| if (containerIdsTable != null) { | ||
| containerIdsTable.put(containerId, containerState.toString()); | ||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. @swamirishi , why not putting the bcsid ? Then, we can get it back (instead of setting it to zero) when rebuilding the container set.
Contributor
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. We don't need the bcsid since bcsid is already stored in the ContainerTable. We don't use the value anywhere today. But eventually we want to store the replica index for EC case. We intend to use this for correctness. Look at https://issues.apache.org/jira/browse/HDDS-12722
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Currently, it put the |
||
| } | ||
| } catch (IOException e) { | ||
| throw new StorageContainerException(e, ContainerProtos.Result.IO_EXCEPTION); | ||
| } | ||
| missingContainerSet.remove(containerId); | ||
| // wish we could have done this from ContainerData.setState | ||
| container.getContainerData().commitSpace(); | ||
| if (container.getContainerData().getState() == RECOVERING) { | ||
|
|
@@ -122,21 +182,69 @@ public boolean addContainer(Container<?> container) throws | |
| * @return Container | ||
| */ | ||
| public Container<?> getContainer(long containerId) { | ||
| Preconditions.checkState(containerId >= 0, | ||
| "Container Id cannot be negative."); | ||
| Preconditions.checkState(containerId >= 0, "Container Id cannot be negative."); | ||
| return containerMap.get(containerId); | ||
| } | ||
|
|
||
| /** | ||
| * Removes container from both memory and database. This should be used when the containerData on disk has been | ||
| * removed completely from the node. | ||
| * @param containerId | ||
| * @return True if container is removed from containerMap. | ||
| * @throws StorageContainerException | ||
| */ | ||
| public boolean removeContainer(long containerId) throws StorageContainerException { | ||
| return removeContainer(containerId, false, true); | ||
| } | ||
|
|
||
| /** | ||
| * Removes containerId from memory. This needs to be used when the container is still present on disk, and the | ||
| * inmemory state of the container needs to be updated. | ||
| * @param containerId | ||
| * @return True if container is removed from containerMap. | ||
| * @throws StorageContainerException | ||
| */ | ||
| public boolean removeContainerOnlyFromMemory(long containerId) throws StorageContainerException { | ||
| return removeContainer(containerId, false, false); | ||
| } | ||
|
|
||
| /** | ||
| * Marks a container to be missing, thus it removes the container from inmemory containerMap and marks the | ||
| * container as missing. | ||
| * @param containerId | ||
| * @return True if container is removed from containerMap. | ||
| * @throws StorageContainerException | ||
| */ | ||
| public boolean removeMissingContainer(long containerId) throws StorageContainerException { | ||
| return removeContainer(containerId, true, false); | ||
| } | ||
|
|
||
| /** | ||
| * Removes the Container matching with specified containerId. | ||
| * @param containerId ID of the container to remove | ||
| * @return If container is removed from containerMap returns true, otherwise | ||
| * false | ||
| */ | ||
| public boolean removeContainer(long containerId) { | ||
| private boolean removeContainer(long containerId, boolean markMissing, boolean removeFromDB) | ||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
Contributor
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. false, false : This happens on finding a duplicate container on startup where the container info in memory gets swapped by removing and adding the updated container info.
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I was under the impression that once a container is created, it will remain the DB forever.
Contributor
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. We don't have any use of that container information. Why keep it if it is not required? |
||
| throws StorageContainerException { | ||
| Preconditions.checkState(containerId >= 0, | ||
| "Container Id cannot be negative."); | ||
| //We need to add to missing container set before removing containerMap since there could be write chunk operation | ||
| // that could recreate the container in another volume if we remove it from the map before adding to missing | ||
| // container. | ||
| if (markMissing) { | ||
| missingContainerSet.add(containerId); | ||
| } | ||
| Container<?> removed = containerMap.remove(containerId); | ||
| if (removeFromDB) { | ||
| try { | ||
| if (containerIdsTable != null) { | ||
| containerIdsTable.delete(containerId); | ||
| } | ||
| } catch (IOException e) { | ||
| throw new StorageContainerException(e, ContainerProtos.Result.IO_EXCEPTION); | ||
| } | ||
| } | ||
| if (removed == null) { | ||
| LOG.debug("Container with containerId {} is not present in " + | ||
| "containerMap", containerId); | ||
|
|
@@ -190,20 +298,20 @@ public int containerCount() { | |
| * | ||
| * @param context StateContext | ||
| */ | ||
| public void handleVolumeFailures(StateContext context) { | ||
| public void handleVolumeFailures(StateContext context) throws StorageContainerException { | ||
| AtomicBoolean failedVolume = new AtomicBoolean(false); | ||
| AtomicInteger containerCount = new AtomicInteger(0); | ||
| containerMap.values().forEach(c -> { | ||
| for (Container<?> c : containerMap.values()) { | ||
| ContainerData data = c.getContainerData(); | ||
| if (data.getVolume().isFailed()) { | ||
| removeContainer(data.getContainerID()); | ||
| removeMissingContainer(data.getContainerID()); | ||
| LOG.debug("Removing Container {} as the Volume {} " + | ||
| "has failed", data.getContainerID(), data.getVolume()); | ||
| "has failed", data.getContainerID(), data.getVolume()); | ||
| failedVolume.set(true); | ||
| containerCount.incrementAndGet(); | ||
| ContainerLogger.logLost(data, "Volume failure"); | ||
| } | ||
| }); | ||
| } | ||
|
|
||
| if (failedVolume.get()) { | ||
| try { | ||
|
|
@@ -362,6 +470,10 @@ public Set<Long> getMissingContainerSet() { | |
| return missingContainerSet; | ||
| } | ||
|
|
||
| public Table<Long, String> getContainerIdsTable() { | ||
| return containerIdsTable; | ||
| } | ||
|
|
||
| /** | ||
| * Builds the missing container set by taking a diff between total no | ||
| * containers actually found and number of containers which actually | ||
|
|
||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
@swamirishi , it won't overwrite when
overwriteis true. It will justensureContainerNotMissing.There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
It would do a putToMap which would overwrite the existing data in the Map. Skipping ensureContainerNotMissing will do this since ensureContainerNotMissing will throw an exception
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
What is
putToMap? Do you meancontainerMap.putIfAbsent(..)?When
putIfAbsent(..)returns non-null, it willUh oh!
There was an error while loading. Please reload this page.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Yeah the container would be either in the missingContainerSet or containerMap. It cannot be in both. On overwrite we are saying we will skip missingContainerSet check and forcefully overwrite the missing container replica.