Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -55,6 +55,7 @@
import org.yaml.snakeyaml.Yaml;
import org.yaml.snakeyaml.constructor.AbstractConstruct;
import org.yaml.snakeyaml.constructor.SafeConstructor;
import org.yaml.snakeyaml.error.YAMLException;
import org.yaml.snakeyaml.introspector.BeanAccess;
import org.yaml.snakeyaml.introspector.Property;
import org.yaml.snakeyaml.introspector.PropertyUtils;
Expand Down Expand Up @@ -167,8 +168,20 @@ public static ContainerData readContainer(InputStream input)
Yaml yaml = new Yaml(containerDataConstructor, representer);
yaml.setBeanAccess(BeanAccess.FIELD);

containerData = (ContainerData)
yaml.load(input);
try {
containerData = yaml.load(input);
} catch (YAMLException ex) {
// Unchecked exception. Convert to IOException since an error with one
// container file is not fatal for the whole thread or datanode.
throw new IOException(ex);
}

if (containerData == null) {
// If Yaml#load returned null, then the file is empty. This is valid yaml
// but considered an error in this case since we have lost data about
// the container.
throw new IOException("Failed to load container file. File is empty.");
}

return containerData;
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -42,6 +42,10 @@ public class ContainerScannerConfiguration {
// only for log
public static final String HDDS_CONTAINER_SCRUB_ENABLED =
"hdds.container.scrub.enabled";
public static final String HDDS_CONTAINER_SCRUB_DEV_DATA_ENABLED =
"hdds.container.scrub.dev.data.enabled";
public static final String HDDS_CONTAINER_SCRUB_DEV_METADATA_ENABLED =
"hdds.container.scrub.dev.metadata.enabled";
public static final String METADATA_SCAN_INTERVAL_KEY =
"hdds.container.scrub.metadata.scan.interval";
public static final String DATA_SCAN_INTERVAL_KEY =
Expand Down Expand Up @@ -69,9 +73,25 @@ public class ContainerScannerConfiguration {
type = ConfigType.BOOLEAN,
defaultValue = "false",
tags = {ConfigTag.STORAGE},
description = "Config parameter to enable container scanner.")
description = "Config parameter to enable all container scanners.")
private boolean enabled = false;

@Config(key = "dev.data.scan.enabled",
type = ConfigType.BOOLEAN,
defaultValue = "true",
tags = {ConfigTag.STORAGE},
description = "Can be used to disable the background container data " +
"scanner for developer testing purposes.")
private boolean dataScanEnabled = true;

@Config(key = "dev.metadata.scan.enabled",
type = ConfigType.BOOLEAN,
defaultValue = "true",
tags = {ConfigTag.STORAGE},
description = "Can be used to disable the background container metadata" +
" scanner for developer testing purposes.")
private boolean metadataScanEnabled = true;

@Config(key = "metadata.scan.interval",
type = ConfigType.TIME,
defaultValue = "3h",
Expand Down Expand Up @@ -163,6 +183,14 @@ public boolean isEnabled() {
return enabled;
}

public boolean isDataScanEnabled() {
return dataScanEnabled;
}

public boolean isMetadataScanEnabled() {
return metadataScanEnabled;
}

public void setMetadataScanInterval(long metadataScanInterval) {
this.metadataScanInterval = metadataScanInterval;
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -329,8 +329,16 @@ private void startContainerScrub() {
return;
}
initOnDemandContainerScanner(c);
initMetadataScanner(c);
initContainerScanner(c);

// This config is for testing the scanners in isolation.
if (c.isMetadataScanEnabled()) {
initMetadataScanner(c);
}

// This config is for testing the scanners in isolation.
if (c.isDataScanEnabled()) {
initContainerScanner(c);
}
}

private void initContainerScanner(ContainerScannerConfiguration c) {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -21,3 +21,4 @@ log4j.threshold=ALL
log4j.appender.stdout=org.apache.log4j.ConsoleAppender
log4j.appender.stdout.layout=org.apache.log4j.PatternLayout
log4j.appender.stdout.layout.ConversionPattern=%d{ISO8601} [%t] %-5p %c{2} (%F:%M(%L)) - %m%n

Original file line number Diff line number Diff line change
@@ -0,0 +1,95 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*
*/
package org.apache.hadoop.ozone.dn.scanner;

import org.apache.hadoop.hdds.conf.OzoneConfiguration;
import org.apache.hadoop.hdds.protocol.datanode.proto.ContainerProtos;
import org.apache.hadoop.ozone.container.ozoneimpl.BackgroundContainerDataScanner;
import org.apache.hadoop.ozone.container.ozoneimpl.ContainerScannerConfiguration;
import org.apache.ozone.test.GenericTestUtils;
import org.junit.Assert;
import org.junit.BeforeClass;
import org.junit.Test;
import org.junit.runner.RunWith;
import org.junit.runners.Parameterized;

import java.util.Collection;
import java.util.concurrent.TimeUnit;

/**
* Integration tests for the background container data scanner. This scanner
* checks all data and metadata in the container.
*/
@RunWith(Parameterized.class)
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

nit: since it is new code, it would be great if you use JUnit-5 and its parameterized tests.

public class TestBackgroundContainerDataScannerIntegration
extends TestContainerScannerIntegrationAbstract {

private final ContainerCorruptions corruption;

@Parameterized.Parameters(name = "{0}")
public static Collection<Object[]> supportedCorruptionTypes() {
// Background container data scanner should be able to detect all errors.
return ContainerCorruptions.getAllParamsExcept();
}

@BeforeClass
public static void init() throws Exception {
OzoneConfiguration ozoneConfig = new OzoneConfiguration();
ozoneConfig.setBoolean(
ContainerScannerConfiguration.HDDS_CONTAINER_SCRUB_ENABLED, true);
// Make sure the background metadata scanner does not detect failures
// before the data scanner under test does.
ozoneConfig.setBoolean(
ContainerScannerConfiguration.HDDS_CONTAINER_SCRUB_DEV_METADATA_ENABLED,
false);
// Make the background data scanner run frequently to reduce test time.
ozoneConfig.setTimeDuration(
ContainerScannerConfiguration.DATA_SCAN_INTERVAL_KEY,
SCAN_INTERVAL.getSeconds(), TimeUnit.SECONDS);
buildCluster(ozoneConfig);
}

public TestBackgroundContainerDataScannerIntegration(
ContainerCorruptions corruption) {
this.corruption = corruption;
}

/**
* {@link BackgroundContainerDataScanner} should detect corrupted blocks
* in a closed container without client interaction.
*/
@Test
public void testCorruptionDetected() throws Exception {
long containerID = writeDataThenCloseContainer();
// Container corruption has not yet been introduced.
Assert.assertEquals(ContainerProtos.ContainerDataProto.State.CLOSED,
getDnContainer(containerID).getContainerState());

corruption.applyTo(getDnContainer(containerID));
// Wait for the scanner to detect corruption.
GenericTestUtils.waitFor(() ->
getDnContainer(containerID).getContainerState() ==
ContainerProtos.ContainerDataProto.State.UNHEALTHY,
500, 5000);

// Wait for SCM to get a report of the unhealthy replica.
waitForScmToSeeUnhealthyReplica(containerID);
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,120 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*
*/
package org.apache.hadoop.ozone.dn.scanner;

import org.apache.hadoop.hdds.conf.OzoneConfiguration;
import org.apache.hadoop.hdds.protocol.datanode.proto.ContainerProtos;
import org.apache.hadoop.hdds.scm.container.replication.ReplicationManager;
import org.apache.hadoop.ozone.container.ozoneimpl.BackgroundContainerMetadataScanner;
import org.apache.hadoop.ozone.container.ozoneimpl.ContainerScannerConfiguration;
import org.apache.ozone.test.GenericTestUtils;
import org.junit.Assert;
import org.junit.BeforeClass;
import org.junit.Test;
import org.junit.runner.RunWith;
import org.junit.runners.Parameterized;

import java.time.Duration;
import java.util.Collection;
import java.util.concurrent.TimeUnit;

/**
* Integration tests for the background container metadata scanner. This
* scanner does a quick check of container metadata to find obvious failures
* faster than a full data scan.
*/
@RunWith(Parameterized.class)
public class TestBackgroundContainerMetadataScannerIntegration
extends TestContainerScannerIntegrationAbstract {

private final ContainerCorruptions corruption;

@Parameterized.Parameters(name = "{0}")
public static Collection<Object[]> supportedCorruptionTypes() {
return ContainerCorruptions.getAllParamsExcept(
ContainerCorruptions.MISSING_BLOCK,
ContainerCorruptions.CORRUPT_BLOCK,
ContainerCorruptions.TRUNCATED_BLOCK);
}

@BeforeClass
public static void init() throws Exception {
OzoneConfiguration ozoneConfig = new OzoneConfiguration();
// Speed up SCM closing of open container when an unhealthy replica is
// reported.
ReplicationManager.ReplicationManagerConfiguration rmConf = ozoneConfig
.getObject(ReplicationManager.ReplicationManagerConfiguration.class);
rmConf.setInterval(Duration.ofSeconds(1));
ozoneConfig.setFromObject(rmConf);

ozoneConfig.setBoolean(
ContainerScannerConfiguration.HDDS_CONTAINER_SCRUB_ENABLED, true);
// Make sure the background data scanner does not detect failures
// before the metadata scanner under test does.
ozoneConfig.setBoolean(
ContainerScannerConfiguration.HDDS_CONTAINER_SCRUB_DEV_DATA_ENABLED,
false);
// Make the background metadata scanner run frequently to reduce test time.
ozoneConfig.setTimeDuration(
ContainerScannerConfiguration.METADATA_SCAN_INTERVAL_KEY,
SCAN_INTERVAL.getSeconds(), TimeUnit.SECONDS);
buildCluster(ozoneConfig);
}

public TestBackgroundContainerMetadataScannerIntegration(
ContainerCorruptions corruption) {
this.corruption = corruption;
}

/**
* {@link BackgroundContainerMetadataScanner} should detect corrupted metadata
* in open or closed containers without client interaction.
*/
@Test
public void testCorruptionDetected() throws Exception {
// Write data to an open and closed container.
long closedContainerID = writeDataThenCloseContainer();
Assert.assertEquals(ContainerProtos.ContainerDataProto.State.CLOSED,
getDnContainer(closedContainerID).getContainerState());
long openContainerID = writeDataToOpenContainer();
Assert.assertEquals(ContainerProtos.ContainerDataProto.State.OPEN,
getDnContainer(openContainerID).getContainerState());

// Corrupt both containers.
corruption.applyTo(getDnContainer(closedContainerID));
corruption.applyTo(getDnContainer(openContainerID));
// Wait for the scanner to detect corruption.
GenericTestUtils.waitFor(() ->
getDnContainer(closedContainerID).getContainerState() ==
ContainerProtos.ContainerDataProto.State.UNHEALTHY,
500, 5000);
GenericTestUtils.waitFor(() ->
getDnContainer(openContainerID).getContainerState() ==
ContainerProtos.ContainerDataProto.State.UNHEALTHY,
500, 5000);

// Wait for SCM to get reports of the unhealthy replicas.
waitForScmToSeeUnhealthyReplica(closedContainerID);
waitForScmToSeeUnhealthyReplica(openContainerID);
// Once the unhealthy replica is reported, the open container's lifecycle
// state in SCM should move to closed.
waitForScmToCloseContainer(openContainerID);
}
}
Loading