Skip to content
Merged
Original file line number Diff line number Diff line change
Expand Up @@ -72,8 +72,8 @@ public void init() throws Exception {
taskConfig.setMissingContainerTaskInterval(Duration.ofSeconds(15));
conf.setFromObject(taskConfig);

conf.set("ozone.scm.stale.node.interval", "10s");
conf.set("ozone.scm.dead.node.interval", "20s");
conf.set("ozone.scm.stale.node.interval", "6s");
conf.set("ozone.scm.dead.node.interval", "10s");
cluster = MiniOzoneCluster.newBuilder(conf).setNumDatanodes(1)
.includeRecon(true).build();
cluster.waitForClusterToBeReady();
Expand Down Expand Up @@ -102,9 +102,6 @@ public void testSyncSCMContainerInfo() throws Exception {
final ContainerInfo container2 = scmContainerManager.allocateContainer(
RatisReplicationConfig.getInstance(
HddsProtos.ReplicationFactor.ONE), "admin");
reconContainerManager.allocateContainer(
RatisReplicationConfig.getInstance(
HddsProtos.ReplicationFactor.ONE), "admin");
scmContainerManager.updateContainerState(container1.containerID(),
HddsProtos.LifeCycleEvent.FINALIZE);
scmContainerManager.updateContainerState(container2.containerID(),
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -53,6 +53,7 @@
import org.slf4j.LoggerFactory;

import static org.apache.hadoop.ozone.recon.ReconConstants.CONTAINER_COUNT;
import static org.apache.hadoop.ozone.recon.ReconConstants.DEFAULT_FETCH_COUNT;
import static org.apache.hadoop.ozone.recon.ReconConstants.TOTAL_KEYS;
import static org.apache.hadoop.ozone.recon.ReconConstants.TOTAL_USED_BYTES;

Expand All @@ -65,6 +66,7 @@ public class ContainerHealthTask extends ReconScmTask {

private static final Logger LOG =
LoggerFactory.getLogger(ContainerHealthTask.class);
public static final int FETCH_COUNT = Integer.parseInt(DEFAULT_FETCH_COUNT);

private ReadWriteLock lock = new ReentrantReadWriteLock(true);

Expand Down Expand Up @@ -131,8 +133,23 @@ public void triggerContainerHealthCheck() {
LOG.info("Container Health task thread took {} milliseconds to" +
" process {} existing database records.",
Time.monotonicNow() - start, existingCount);

checkAndProcessContainers(unhealthyContainerStateStatsMap, currentTime);
processedContainers.clear();
} finally {
lock.writeLock().unlock();
}
}

private void checkAndProcessContainers(
Map<UnHealthyContainerStates, Map<String, Long>>
unhealthyContainerStateStatsMap, long currentTime) {
ContainerID startID = ContainerID.valueOf(1);
List<ContainerInfo> containers = containerManager.getContainers(startID,
FETCH_COUNT);
long start;
while (!containers.isEmpty()) {
start = Time.monotonicNow();
final List<ContainerInfo> containers = containerManager.getContainers();
containers.stream()
.filter(c -> !processedContainers.contains(c))
.forEach(c -> processContainer(c, currentTime,
Expand All @@ -142,9 +159,13 @@ public void triggerContainerHealthCheck() {
" processing {} containers.", Time.monotonicNow() - start,
containers.size());
logUnhealthyContainerStats(unhealthyContainerStateStatsMap);
processedContainers.clear();
} finally {
lock.writeLock().unlock();
if (containers.size() > FETCH_COUNT) {
startID = ContainerID.valueOf(
containers.get(containers.size() - 1).getContainerID());
containers.clear();
containers = containerManager.getContainers(startID, FETCH_COUNT);
}
containers.clear();
Copy link
Contributor

@ArafatKhan2198 ArafatKhan2198 Jan 8, 2024

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The containers.clear(); statement inside the if block is meant to clear the list before fetching the next batch. However, the last containers.clear(); statement just outside the if block seems redundant since the loop condition (while (!containers.isEmpty())) ensures that it's only executed when containers is empty.?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@ArafatKhan2198 I couldn't understand this comment, as I don't see containers.clear() outside the loop. Can you pls clarify ?

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Apologies for the earlier mistake in the comment. I have now corrected it. Please take a look.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

As discussed, we have a case where last iteration in batch will not execute the if condition , so need to clear -off any in memory.

}
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -19,9 +19,11 @@
package org.apache.hadoop.ozone.recon.fsck;

import static org.hadoop.ozone.recon.schema.ContainerSchemaDefinition.UnHealthyContainerStates.ALL_REPLICAS_UNHEALTHY;
import static org.assertj.core.api.Assertions.assertThat;
import static org.junit.jupiter.api.Assertions.assertEquals;
import static org.junit.jupiter.api.Assertions.assertNotNull;
import static org.junit.jupiter.api.Assertions.assertTrue;
import static org.mockito.ArgumentMatchers.any;
import static org.mockito.ArgumentMatchers.anyInt;
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Please keep assertThat instead of restoring assertTrue.

import static org.mockito.Mockito.mock;
import static org.mockito.Mockito.when;

Expand Down Expand Up @@ -96,7 +98,8 @@ public void testRun() throws Exception {
List<ContainerInfo> mockContainers = getMockContainers(7);
when(scmMock.getScmServiceProvider()).thenReturn(scmClientMock);
when(scmMock.getContainerManager()).thenReturn(containerManagerMock);
when(containerManagerMock.getContainers()).thenReturn(mockContainers);
when(containerManagerMock.getContainers(any(ContainerID.class),
anyInt())).thenReturn(mockContainers);
for (ContainerInfo c : mockContainers) {
when(containerManagerMock.getContainer(c.containerID())).thenReturn(c);
when(scmClientMock.getContainerWithPipeline(c.getContainerID()))
Expand Down Expand Up @@ -137,7 +140,7 @@ public void testRun() throws Exception {
.thenReturn(Collections.emptySet());

List<UnhealthyContainers> all = unHealthyContainersTableHandle.findAll();
assertThat(all).isEmpty();
assertTrue(all.isEmpty());
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Please keep assertThat, added by HDDS-10034.

Suggested change
assertTrue(all.isEmpty());
assertThat(all).isEmpty();


long currentTime = System.currentTimeMillis();
ReconTaskStatusDao reconTaskStatusDao = getDao(ReconTaskStatusDao.class);
Expand All @@ -151,7 +154,7 @@ public void testRun() throws Exception {
reconTaskStatusDao, containerHealthSchemaManager,
placementMock, reconTaskConfig, reconContainerMetadataManager);
containerHealthTask.start();
LambdaTestUtils.await(6000, 1000, () ->
LambdaTestUtils.await(60000, 1000, () ->
(unHealthyContainersTableHandle.count() == 6));
UnhealthyContainers rec =
unHealthyContainersTableHandle.fetchByContainerId(1L).get(0);
Expand Down Expand Up @@ -192,7 +195,8 @@ public void testRun() throws Exception {

ReconTaskStatus taskStatus =
reconTaskStatusDao.findById(containerHealthTask.getTaskName());
assertThat(taskStatus.getLastUpdatedTimestamp()).isGreaterThan(currentTime);
assertTrue(taskStatus.getLastUpdatedTimestamp() >
currentTime);
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Please keep assertThat, added by HDDS-10034.

Suggested change
assertTrue(taskStatus.getLastUpdatedTimestamp() >
currentTime);
assertThat(taskStatus.getLastUpdatedTimestamp()).isGreaterThan(currentTime);


// Now run the job again, to check that relevant records are updated or
// removed as appropriate. Need to adjust the return value for all the mocks
Expand Down Expand Up @@ -267,7 +271,8 @@ public void testDeletedContainer() throws Exception {
List<ContainerInfo> mockContainers = getMockContainers(3);
when(scmMock.getScmServiceProvider()).thenReturn(scmClientMock);
when(scmMock.getContainerManager()).thenReturn(containerManagerMock);
when(containerManagerMock.getContainers()).thenReturn(mockContainers);
when(containerManagerMock.getContainers(any(ContainerID.class),
anyInt())).thenReturn(mockContainers);
for (ContainerInfo c : mockContainers) {
when(containerManagerMock.getContainer(c.containerID())).thenReturn(c);
when(scmClientMock.getContainerWithPipeline(c.getContainerID()))
Expand Down Expand Up @@ -299,7 +304,7 @@ public void testDeletedContainer() throws Exception {
.thenReturn(new ContainerWithPipeline(mockContainers.get(0), null));

List<UnhealthyContainers> all = unHealthyContainersTableHandle.findAll();
assertThat(all).isEmpty();
assertTrue(all.isEmpty());
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Please keep assertThat, added by HDDS-10034.

Suggested change
assertTrue(all.isEmpty());
assertThat(all).isEmpty();


long currentTime = System.currentTimeMillis();
ReconTaskStatusDao reconTaskStatusDao = getDao(ReconTaskStatusDao.class);
Expand Down Expand Up @@ -327,7 +332,8 @@ public void testDeletedContainer() throws Exception {

ReconTaskStatus taskStatus =
reconTaskStatusDao.findById(containerHealthTask.getTaskName());
assertThat(taskStatus.getLastUpdatedTimestamp()).isGreaterThan(currentTime);
assertTrue(taskStatus.getLastUpdatedTimestamp() >
currentTime);
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Please keep assertThat, added by HDDS-10034.

Suggested change
assertTrue(taskStatus.getLastUpdatedTimestamp() >
currentTime);
assertThat(taskStatus.getLastUpdatedTimestamp()).isGreaterThan(currentTime);

}

private Set<ContainerReplica> getMockReplicas(
Expand Down