diff --git a/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/common/interfaces/Container.java b/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/common/interfaces/Container.java index 9dfd94dcb454..9a17d1fc711e 100644 --- a/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/common/interfaces/Container.java +++ b/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/common/interfaces/Container.java @@ -163,6 +163,12 @@ ContainerReplicaProto getContainerReport() */ long getBlockCommitSequenceId(); + /** + * Returns if the container metadata should be checked. The result depends + * on the state of the container. + */ + boolean shouldScanMetadata(); + /** * check and report the structural integrity of the container. * @return true if the integrity checks pass diff --git a/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/keyvalue/KeyValueContainer.java b/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/keyvalue/KeyValueContainer.java index 776d63b7045d..b14d624a7f02 100644 --- a/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/keyvalue/KeyValueContainer.java +++ b/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/keyvalue/KeyValueContainer.java @@ -859,6 +859,19 @@ private ContainerReplicaProto.State getHddsState() */ public File getContainerDBFile() { return KeyValueContainerLocationUtil.getContainerDBFile(containerData); + + } + + @Override + public boolean shouldScanMetadata() { + boolean shouldScan = + getContainerState() != ContainerDataProto.State.UNHEALTHY; + if (!shouldScan && LOG.isDebugEnabled()) { + LOG.debug("Container {} in state {} should not have its metadata " + + "scanned.", + containerData.getContainerID(), containerData.getState()); + } + return shouldScan; } @Override @@ -873,12 +886,13 @@ public boolean scanMetaData() { @Override public boolean shouldScanData() { boolean shouldScan = - containerData.getState() == ContainerDataProto.State.CLOSED - || containerData.getState() == ContainerDataProto.State.QUASI_CLOSED; + getContainerState() == ContainerDataProto.State.CLOSED + || getContainerState() == ContainerDataProto.State.QUASI_CLOSED; if (!shouldScan && LOG.isDebugEnabled()) { LOG.debug("Container {} in state {} should not have its data scanned.", containerData.getContainerID(), containerData.getState()); } + return shouldScan; } diff --git a/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/ozoneimpl/BackgroundContainerMetadataScanner.java b/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/ozoneimpl/BackgroundContainerMetadataScanner.java index 44f1bdd102a7..ee67b68d3d18 100644 --- a/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/ozoneimpl/BackgroundContainerMetadataScanner.java +++ b/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/ozoneimpl/BackgroundContainerMetadataScanner.java @@ -70,9 +70,7 @@ public void scanContainer(Container container) throws IOException { return; } - // Full data scan also does a metadata scan. If a full data scan was done - // recently, we can skip this metadata scan. - if (ContainerUtils.recentlyScanned(container, minScanGap, LOG)) { + if (!shouldScan(container)) { return; } @@ -89,4 +87,11 @@ public void scanContainer(Container container) throws IOException { public ContainerMetadataScannerMetrics getMetrics() { return this.metrics; } + + private boolean shouldScan(Container container) { + // Full data scan also does a metadata scan. If a full data scan was done + // recently, we can skip this metadata scan. + return container.shouldScanMetadata() && + !ContainerUtils.recentlyScanned(container, minScanGap, LOG); + } } diff --git a/hadoop-hdds/container-service/src/test/java/org/apache/hadoop/ozone/container/common/ContainerTestUtils.java b/hadoop-hdds/container-service/src/test/java/org/apache/hadoop/ozone/container/common/ContainerTestUtils.java index 3e0ddcd67f68..17e8efb86566 100644 --- a/hadoop-hdds/container-service/src/test/java/org/apache/hadoop/ozone/container/common/ContainerTestUtils.java +++ b/hadoop-hdds/container-service/src/test/java/org/apache/hadoop/ozone/container/common/ContainerTestUtils.java @@ -180,6 +180,7 @@ public static void setupMockContainer( when(c.shouldScanData()).thenReturn(shouldScanData); when(c.scanData(any(DataTransferThrottler.class), any(Canceler.class))) .thenReturn(scanDataSuccess); + when(c.shouldScanMetadata()).thenReturn(true); Mockito.lenient().when(c.scanMetaData()).thenReturn(scanMetaDataSuccess); when(c.getContainerData().getVolume()).thenReturn(vol); } diff --git a/hadoop-hdds/container-service/src/test/java/org/apache/hadoop/ozone/container/ozoneimpl/TestBackgroundContainerDataScanner.java b/hadoop-hdds/container-service/src/test/java/org/apache/hadoop/ozone/container/ozoneimpl/TestBackgroundContainerDataScanner.java index dea5d344df08..eb644d035038 100644 --- a/hadoop-hdds/container-service/src/test/java/org/apache/hadoop/ozone/container/ozoneimpl/TestBackgroundContainerDataScanner.java +++ b/hadoop-hdds/container-service/src/test/java/org/apache/hadoop/ozone/container/ozoneimpl/TestBackgroundContainerDataScanner.java @@ -20,7 +20,10 @@ package org.apache.hadoop.ozone.container.ozoneimpl; import edu.umd.cs.findbugs.annotations.SuppressFBWarnings; +import org.apache.hadoop.hdfs.util.Canceler; +import org.apache.hadoop.hdfs.util.DataTransferThrottler; import org.apache.hadoop.metrics2.lib.DefaultMetricsSystem; +import org.apache.hadoop.ozone.container.common.interfaces.Container; import org.apache.ozone.test.GenericTestUtils; import org.junit.jupiter.api.Test; import org.junit.jupiter.api.BeforeEach; @@ -30,13 +33,17 @@ import java.util.Optional; +import static org.apache.hadoop.hdds.protocol.datanode.proto.ContainerProtos.ContainerDataProto.State.UNHEALTHY; +import static org.junit.jupiter.api.Assertions.assertFalse; import static org.junit.jupiter.api.Assertions.assertEquals; import static org.junit.jupiter.api.Assertions.assertNotNull; import static org.junit.jupiter.api.Assertions.assertNull; import static org.mockito.ArgumentMatchers.any; import static org.mockito.ArgumentMatchers.eq; import static org.mockito.Mockito.atLeastOnce; +import static org.mockito.Mockito.atMostOnce; import static org.mockito.Mockito.never; +import static org.mockito.Mockito.when; /** * Unit tests for the background container data scanner. @@ -134,6 +141,45 @@ public void testScanTimestampUpdated() throws Exception { eq(corruptData.getContainerData().getContainerID()), any()); } + @Test + @Override + public void testUnhealthyContainerNotRescanned() throws Exception { + Container unhealthy = mockKeyValueContainer(); + when(unhealthy.scanMetaData()).thenReturn(true); + when(unhealthy.scanData(any(DataTransferThrottler.class), + any(Canceler.class))).thenReturn(false); + + setContainers(unhealthy, healthy); + + // First iteration should find the unhealthy container. + scanner.runIteration(); + verifyContainerMarkedUnhealthy(unhealthy, atMostOnce()); + ContainerDataScannerMetrics metrics = scanner.getMetrics(); + assertEquals(1, metrics.getNumScanIterations()); + assertEquals(2, metrics.getNumContainersScanned()); + assertEquals(1, metrics.getNumUnHealthyContainers()); + + // The unhealthy container should have been moved to the unhealthy state. + Mockito.verify(unhealthy.getContainerData(), atMostOnce()) + .setState(UNHEALTHY); + // Update the mock to reflect this. + Mockito.when(unhealthy.getContainerState()).thenReturn(UNHEALTHY); + assertFalse(unhealthy.shouldScanData()); + + // Clear metrics to check the next run. + metrics.resetNumContainersScanned(); + metrics.resetNumUnhealthyContainers(); + + scanner.runIteration(); + // The only invocation of unhealthy on this container should have been from + // the previous scan. + verifyContainerMarkedUnhealthy(unhealthy, atMostOnce()); + // This iteration should skip the already unhealthy container. + assertEquals(2, metrics.getNumScanIterations()); + assertEquals(1, metrics.getNumContainersScanned()); + assertEquals(0, metrics.getNumUnHealthyContainers()); + } + /** * A datanode will have one background data scanner per volume. When the * volume fails, the scanner thread should be terminated. diff --git a/hadoop-hdds/container-service/src/test/java/org/apache/hadoop/ozone/container/ozoneimpl/TestBackgroundContainerMetadataScanner.java b/hadoop-hdds/container-service/src/test/java/org/apache/hadoop/ozone/container/ozoneimpl/TestBackgroundContainerMetadataScanner.java index 5fe9975bddeb..712e89fde4d3 100644 --- a/hadoop-hdds/container-service/src/test/java/org/apache/hadoop/ozone/container/ozoneimpl/TestBackgroundContainerMetadataScanner.java +++ b/hadoop-hdds/container-service/src/test/java/org/apache/hadoop/ozone/container/ozoneimpl/TestBackgroundContainerMetadataScanner.java @@ -20,7 +20,10 @@ package org.apache.hadoop.ozone.container.ozoneimpl; import edu.umd.cs.findbugs.annotations.SuppressFBWarnings; +import org.apache.hadoop.hdfs.util.Canceler; +import org.apache.hadoop.hdfs.util.DataTransferThrottler; import org.apache.hadoop.metrics2.lib.DefaultMetricsSystem; +import org.apache.hadoop.ozone.container.common.interfaces.Container; import org.apache.ozone.test.GenericTestUtils; import org.junit.jupiter.api.Test; import org.junit.jupiter.api.BeforeEach; @@ -30,12 +33,17 @@ import java.util.Optional; +import static org.apache.hadoop.hdds.protocol.datanode.proto.ContainerProtos.ContainerDataProto.State.UNHEALTHY; import static org.junit.jupiter.api.Assertions.assertEquals; import static org.junit.jupiter.api.Assertions.assertNotNull; import static org.junit.jupiter.api.Assertions.assertNull; import static org.junit.jupiter.api.Assertions.assertTrue; +import static org.junit.jupiter.api.Assertions.assertFalse; +import static org.mockito.ArgumentMatchers.any; import static org.mockito.Mockito.atLeastOnce; +import static org.mockito.Mockito.atMostOnce; import static org.mockito.Mockito.never; +import static org.mockito.Mockito.when; /** * Unit tests for the background container metadata scanner. @@ -115,6 +123,46 @@ public void testUnhealthyContainersDetected() throws Exception { verifyContainerMarkedUnhealthy(openContainer, never()); } + @Test + @Override + public void testUnhealthyContainerNotRescanned() throws Exception { + Container unhealthy = mockKeyValueContainer(); + when(unhealthy.scanMetaData()).thenReturn(false); + when(unhealthy.scanData( + any(DataTransferThrottler.class), any(Canceler.class))) + .thenReturn(true); + + setContainers(unhealthy, healthy); + + // First iteration should find the unhealthy container. + scanner.runIteration(); + verifyContainerMarkedUnhealthy(unhealthy, atMostOnce()); + ContainerMetadataScannerMetrics metrics = scanner.getMetrics(); + assertEquals(1, metrics.getNumScanIterations()); + assertEquals(2, metrics.getNumContainersScanned()); + assertEquals(1, metrics.getNumUnHealthyContainers()); + + // The unhealthy container should have been moved to the unhealthy state. + Mockito.verify(unhealthy.getContainerData(), atMostOnce()) + .setState(UNHEALTHY); + // Update the mock to reflect this. + Mockito.when(unhealthy.getContainerState()).thenReturn(UNHEALTHY); + assertFalse(unhealthy.shouldScanMetadata()); + + // Clear metrics to check the next run. + metrics.resetNumContainersScanned(); + metrics.resetNumUnhealthyContainers(); + + scanner.runIteration(); + // The only invocation of unhealthy on this container should have been from + // the previous scan. + verifyContainerMarkedUnhealthy(unhealthy, atMostOnce()); + // This iteration should skip the already unhealthy container. + assertEquals(2, metrics.getNumScanIterations()); + assertEquals(1, metrics.getNumContainersScanned()); + assertEquals(0, metrics.getNumUnHealthyContainers()); + } + /** * A datanode will have one metadata scanner thread for the whole process. * When a volume fails, any the containers queued for scanning in that volume diff --git a/hadoop-hdds/container-service/src/test/java/org/apache/hadoop/ozone/container/ozoneimpl/TestContainerScannersAbstract.java b/hadoop-hdds/container-service/src/test/java/org/apache/hadoop/ozone/container/ozoneimpl/TestContainerScannersAbstract.java index 08a5b651360f..504d4304f1f2 100644 --- a/hadoop-hdds/container-service/src/test/java/org/apache/hadoop/ozone/container/ozoneimpl/TestContainerScannersAbstract.java +++ b/hadoop-hdds/container-service/src/test/java/org/apache/hadoop/ozone/container/ozoneimpl/TestContainerScannersAbstract.java @@ -21,6 +21,8 @@ import org.apache.hadoop.ozone.container.common.impl.ContainerData; import org.apache.hadoop.ozone.container.common.interfaces.Container; import org.apache.hadoop.ozone.container.common.volume.HddsVolume; +import org.apache.hadoop.ozone.container.keyvalue.KeyValueContainer; +import org.apache.hadoop.ozone.container.keyvalue.KeyValueContainerData; import org.junit.jupiter.api.Test; import org.mockito.Mock; import org.mockito.Mockito; @@ -30,13 +32,17 @@ import java.time.Instant; import java.time.temporal.ChronoUnit; +import java.util.ArrayList; import java.util.Arrays; import java.util.Collection; import java.util.Optional; import java.util.concurrent.atomic.AtomicLong; +import java.util.stream.Collectors; import static org.apache.hadoop.hdds.conf.OzoneConfiguration.newInstanceOf; +import static org.apache.hadoop.hdds.protocol.datanode.proto.ContainerProtos.ContainerDataProto.State.CLOSED; import static org.apache.hadoop.ozone.container.ozoneimpl.ContainerScannerConfiguration.CONTAINER_SCAN_MIN_GAP_DEFAULT; +import static org.junit.jupiter.api.Assertions.assertTrue; import static org.mockito.Mockito.mock; import static org.mockito.Mockito.when; @@ -68,7 +74,11 @@ public abstract class TestContainerScannersAbstract { protected ContainerScannerConfiguration conf; protected ContainerController controller; + + private Collection> containers; + public void setup() { + containers = new ArrayList<>(); conf = newInstanceOf(ContainerScannerConfiguration.class); conf.setMetadataScanInterval(0); conf.setDataScanInterval(0); @@ -100,6 +110,9 @@ public abstract void testPreviouslyScannedContainerIsScanned() @Test public abstract void testWithVolumeFailure() throws Exception; + @Test + public abstract void testUnhealthyContainerNotRescanned() throws Exception; + // HELPER METHODS protected void setScannedTimestampOld(Container container) { @@ -123,12 +136,47 @@ protected void setScannedTimestampRecent(Container container) { } protected void verifyContainerMarkedUnhealthy( - Container container, VerificationMode invocationTimes) + Container container, VerificationMode invocationTimes) throws Exception { Mockito.verify(controller, invocationTimes).markContainerUnhealthy( container.getContainerData().getContainerID()); } + /** + * Mock a KeyValueContainer implementation instead of a container + * interface like ContainerTestUtils#setupMockContainer. + * This allows testing that the shouldScanData method skips unhealthy + * containers. + */ + protected Container mockKeyValueContainer() { + KeyValueContainer unhealthy = Mockito.mock(KeyValueContainer.class); + + KeyValueContainerData data = mock(KeyValueContainerData.class); + when(data.getContainerID()).thenReturn(CONTAINER_SEQ_ID.incrementAndGet()); + when(unhealthy.getContainerData()).thenReturn(data); + when(unhealthy.getContainerState()).thenReturn(CLOSED); + // The above mocks should be enough for the scanners to call this method + // and test it. + when(unhealthy.shouldScanData()).thenCallRealMethod(); + assertTrue(unhealthy.shouldScanData()); + when(unhealthy.shouldScanMetadata()).thenCallRealMethod(); + assertTrue(unhealthy.shouldScanMetadata()); + + when(unhealthy.getContainerData().getVolume()).thenReturn(vol); + + return unhealthy; + } + + /** + * Add a container to be returned by the mock ContainerController. + */ + protected void setContainers(Container... containers) { + this.containers = Arrays.stream(containers).collect(Collectors.toList()); + when(controller.getContainers(vol)) + .thenAnswer(i -> this.containers.iterator()); + when(controller.getContainers()).thenReturn(this.containers); + } + private ContainerController mockContainerController() { // healthy container ContainerTestUtils.setupMockContainer(healthy, @@ -147,8 +195,7 @@ private ContainerController mockContainerController() { ContainerTestUtils.setupMockContainer(openCorruptMetadata, false, false, false, CONTAINER_SEQ_ID, vol); - Collection> containers = Arrays.asList( - healthy, corruptData, openCorruptMetadata); + containers.addAll(Arrays.asList(healthy, corruptData, openCorruptMetadata)); ContainerController mock = mock(ContainerController.class); when(mock.getContainers(vol)).thenReturn(containers.iterator()); when(mock.getContainers()).thenReturn(containers); diff --git a/hadoop-hdds/container-service/src/test/java/org/apache/hadoop/ozone/container/ozoneimpl/TestOnDemandContainerDataScanner.java b/hadoop-hdds/container-service/src/test/java/org/apache/hadoop/ozone/container/ozoneimpl/TestOnDemandContainerDataScanner.java index 8f1f2f2f9086..e89cecfae083 100644 --- a/hadoop-hdds/container-service/src/test/java/org/apache/hadoop/ozone/container/ozoneimpl/TestOnDemandContainerDataScanner.java +++ b/hadoop-hdds/container-service/src/test/java/org/apache/hadoop/ozone/container/ozoneimpl/TestOnDemandContainerDataScanner.java @@ -20,8 +20,9 @@ package org.apache.hadoop.ozone.container.ozoneimpl; import org.apache.commons.compress.utils.Lists; +import org.apache.hadoop.hdfs.util.Canceler; +import org.apache.hadoop.hdfs.util.DataTransferThrottler; import org.apache.hadoop.metrics2.lib.DefaultMetricsSystem; -import org.apache.hadoop.ozone.container.common.impl.ContainerData; import org.apache.hadoop.ozone.container.common.interfaces.Container; import org.junit.jupiter.api.AfterEach; import org.junit.jupiter.api.BeforeEach; @@ -38,13 +39,17 @@ import java.util.concurrent.ExecutionException; import java.util.concurrent.Future; +import static org.apache.hadoop.hdds.protocol.datanode.proto.ContainerProtos.ContainerDataProto.State.UNHEALTHY; import static org.junit.jupiter.api.Assertions.assertEquals; +import static org.junit.jupiter.api.Assertions.assertFalse; import static org.junit.jupiter.api.Assertions.assertNotNull; import static org.junit.jupiter.api.Assertions.assertNull; import static org.mockito.ArgumentMatchers.any; import static org.mockito.ArgumentMatchers.eq; import static org.mockito.Mockito.atLeastOnce; +import static org.mockito.Mockito.atMostOnce; import static org.mockito.Mockito.never; +import static org.mockito.Mockito.when; /** * Unit tests for the on-demand container scanner. @@ -217,7 +222,43 @@ public void testWithVolumeFailure() throws Exception { assertEquals(0, metrics.getNumUnHealthyContainers()); } - private void scanContainer(Container container) + @Test + @Override + public void testUnhealthyContainerNotRescanned() throws Exception { + Container unhealthy = mockKeyValueContainer(); + when(unhealthy.scanMetaData()).thenReturn(true); + when(unhealthy.scanData( + any(DataTransferThrottler.class), any(Canceler.class))) + .thenReturn(false); + + // First iteration should find the unhealthy container. + scanContainer(unhealthy); + verifyContainerMarkedUnhealthy(unhealthy, atMostOnce()); + OnDemandScannerMetrics metrics = OnDemandContainerDataScanner.getMetrics(); + assertEquals(1, metrics.getNumContainersScanned()); + assertEquals(1, metrics.getNumUnHealthyContainers()); + + // The unhealthy container should have been moved to the unhealthy state. + Mockito.verify(unhealthy.getContainerData(), atMostOnce()) + .setState(UNHEALTHY); + // Update the mock to reflect this. + Mockito.when(unhealthy.getContainerState()).thenReturn(UNHEALTHY); + assertFalse(unhealthy.shouldScanData()); + + // Clear metrics to check the next run. + metrics.resetNumContainersScanned(); + metrics.resetNumUnhealthyContainers(); + + scanContainer(unhealthy); + // The only invocation of unhealthy on this container should have been from + // the previous scan. + verifyContainerMarkedUnhealthy(unhealthy, atMostOnce()); + // This iteration should skip the already unhealthy container. + assertEquals(0, metrics.getNumContainersScanned()); + assertEquals(0, metrics.getNumUnHealthyContainers()); + } + + private void scanContainer(Container container) throws Exception { OnDemandContainerDataScanner.init(conf, controller); Optional> scanFuture =