diff --git a/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/common/volume/HddsVolume.java b/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/common/volume/HddsVolume.java index cc9be3892bed..c34a443d15c0 100644 --- a/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/common/volume/HddsVolume.java +++ b/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/common/volume/HddsVolume.java @@ -36,6 +36,7 @@ import org.apache.hadoop.hdds.annotation.InterfaceAudience; import org.apache.hadoop.hdds.annotation.InterfaceStability; import org.apache.hadoop.hdds.conf.ConfigurationSource; +import org.apache.hadoop.hdds.fs.SpaceUsageSource; import org.apache.hadoop.hdds.upgrade.HDDSLayoutFeature; import org.apache.hadoop.hdds.utils.db.managed.ManagedOptions; import org.apache.hadoop.hdds.utils.db.managed.ManagedRocksDB; @@ -285,6 +286,8 @@ public void cleanDeletedContainerDir() { @Override public synchronized VolumeCheckResult check(@Nullable Boolean unused) throws Exception { + checkVolumeUsages(); + VolumeCheckResult result = super.check(unused); if (isDbLoadFailure()) { @@ -346,6 +349,33 @@ public VolumeCheckResult checkDbHealth(File dbFile) throws InterruptedException return VolumeCheckResult.HEALTHY; } + @VisibleForTesting + public void checkVolumeUsages() { + boolean isEnoughSpaceAvailable = true; + SpaceUsageSource currentUsage = getCurrentUsage(); + long getFreeSpaceToSpare = getFreeSpaceToSpare(currentUsage.getCapacity()); + if (currentUsage.getAvailable() < getFreeSpaceToSpare) { + LOG.warn("Volume {} has insufficient space for write operation. Available: {}, Free space to spare: {}", + getStorageDir(), currentUsage.getAvailable(), getFreeSpaceToSpare); + isEnoughSpaceAvailable = false; + } else if (committedBytes.get() > 0 && currentUsage.getAvailable() < committedBytes.get() + getFreeSpaceToSpare) { + LOG.warn("Volume {} has insufficient space for on-going container write operation. " + + "Committed: {}, Available: {}, Free space to spare: {}", + getStorageDir(), committedBytes.get(), currentUsage.getAvailable(), getFreeSpaceToSpare); + isEnoughSpaceAvailable = false; + } + + volumeInfoMetrics.setAvailableSpaceInsufficient(!isEnoughSpaceAvailable); + + if (!getVolumeUsage().map(VolumeUsage::isReservedUsagesInRange).orElse(true)) { + LOG.warn("Volume {} reserved usages is higher than actual allocated reserved space.", + getStorageDir()); + volumeInfoMetrics.setReservedCrossesLimit(true); + } else { + volumeInfoMetrics.setReservedCrossesLimit(false); + } + } + /** * add "delta" bytes to committed space in the volume. * diff --git a/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/common/volume/VolumeInfoMetrics.java b/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/common/volume/VolumeInfoMetrics.java index c864f416cdb4..c8cb2a522159 100644 --- a/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/common/volume/VolumeInfoMetrics.java +++ b/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/common/volume/VolumeInfoMetrics.java @@ -28,6 +28,7 @@ import org.apache.hadoop.metrics2.lib.DefaultMetricsSystem; import org.apache.hadoop.metrics2.lib.Interns; import org.apache.hadoop.metrics2.lib.MetricsRegistry; +import org.apache.hadoop.metrics2.lib.MutableGaugeInt; import org.apache.hadoop.metrics2.lib.MutableRate; import org.apache.hadoop.ozone.OzoneConsts; @@ -57,6 +58,10 @@ public class VolumeInfoMetrics implements MetricsSource { private final HddsVolume volume; @Metric("Returns the RocksDB compact times of the Volume") private MutableRate dbCompactLatency; + @Metric("Volume reserved space crosses reserved usages limit") + private MutableGaugeInt reservedCrossesLimit; + @Metric("Volume available space is insufficient") + private MutableGaugeInt availableSpaceInsufficient; /** * @param identifier Typically, path to volume root. E.g. /data/hdds @@ -119,6 +124,30 @@ public void dbCompactTimesNanoSecondsIncr(long time) { dbCompactLatency.add(time); } + public int getAvailableSpaceInsufficient() { + return availableSpaceInsufficient.value(); + } + + public void setAvailableSpaceInsufficient(boolean isInSufficient) { + if (isInSufficient) { + availableSpaceInsufficient.set(1); + } else { + availableSpaceInsufficient.set(0); + } + } + + public int getReservedCrossesLimit() { + return reservedCrossesLimit.value(); + } + + public void setReservedCrossesLimit(boolean isLimitCrossed) { + if (isLimitCrossed) { + reservedCrossesLimit.set(1); + } else { + reservedCrossesLimit.set(0); + } + } + /** * Return the Container Count of the Volume. */ diff --git a/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/common/volume/VolumeUsage.java b/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/common/volume/VolumeUsage.java index 84f04ec87149..0fbfb474b402 100644 --- a/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/common/volume/VolumeUsage.java +++ b/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/common/volume/VolumeUsage.java @@ -238,4 +238,14 @@ private static long getReserved(ConfigurationSource conf, String rootDir, return (long) Math.ceil(capacity * percentage); } + + public boolean isReservedUsagesInRange() { + SpaceUsageSource spaceUsageSource = realUsage(); + long reservedUsed = getOtherUsed(spaceUsageSource); + if (reservedInBytes > 0 && reservedUsed > reservedInBytes) { + LOG.warn("Reserved usages {} is higher than actual allocated reserved space {}.", reservedUsed, reservedInBytes); + return false; + } + return true; + } } diff --git a/hadoop-hdds/container-service/src/test/java/org/apache/hadoop/ozone/container/common/volume/TestHddsVolume.java b/hadoop-hdds/container-service/src/test/java/org/apache/hadoop/ozone/container/common/volume/TestHddsVolume.java index a5daf91889a2..0e90b091fef1 100644 --- a/hadoop-hdds/container-service/src/test/java/org/apache/hadoop/ozone/container/common/volume/TestHddsVolume.java +++ b/hadoop-hdds/container-service/src/test/java/org/apache/hadoop/ozone/container/common/volume/TestHddsVolume.java @@ -538,6 +538,36 @@ public void testDBDirFailureDetected() throws Exception { volume.shutdown(); } + @Test + public void testVolumeUsagesMetrics() throws Exception { + // Build a volume with mocked usage, with reserved: 100B, Min free: 10B + CONF.set("hdds.datanode.volume.min.free.space", "10B"); + volumeBuilder.usageCheckFactory(MockSpaceUsageCheckFactory.of(new SpaceUsageSource.Fixed(1000, 100, 700), + Duration.ZERO, inMemory(new AtomicLong(0)))); + HddsVolume volume = volumeBuilder.build(); + volume.incCommittedBytes(100); + + // available space (>= 0) available - committed - min.free.space = 100 - 100 - 10 = -10, + // insufficient space unavailable + volume.checkVolumeUsages(); + assertEquals(1, volume.getVolumeInfoStats().getAvailableSpaceInsufficient()); + // reserved used = capacity - available - used = 1000 - 100 - 700 = 200 more than 100B for reserved, + // reserve usages crosses limit true + assertEquals(1, volume.getVolumeInfoStats().getReservedCrossesLimit()); + + // remove committed, sufficient space is available, reset the flag of metrics + volume.incCommittedBytes(-100); + volume.checkVolumeUsages(); + assertEquals(0, volume.getVolumeInfoStats().getAvailableSpaceInsufficient()); + + // reduce available less then min.free.space + volume.incrementUsedSpace(100); + volume.checkVolumeUsages(); + assertEquals(1, volume.getVolumeInfoStats().getAvailableSpaceInsufficient()); + + volume.shutdown(); + } + private MutableVolumeSet createDbVolumeSet() throws IOException { File dbVolumeDir = Files.createDirectory(folder.resolve("NewDir")).toFile(); CONF.set(OzoneConfigKeys.HDDS_DATANODE_CONTAINER_DB_DIR,