From 57ea5fd1416607554677042bf18de18debc803cb Mon Sep 17 00:00:00 2001 From: slfan1989 Date: Sat, 5 Oct 2024 15:33:14 +0800 Subject: [PATCH 1/2] HDDS-11531. Collect iowait and system on the node. --- .../container/io/BackgroundIOAnalyzer.java | 190 ++++++++++++++++++ .../ozone/container/io/DataNodeIOMetrics.java | 63 ++++++ .../container/io/IOAnalyzerConfiguration.java | 55 +++++ .../container/ozoneimpl/OzoneContainer.java | 8 + 4 files changed, 316 insertions(+) create mode 100644 hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/io/BackgroundIOAnalyzer.java create mode 100644 hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/io/DataNodeIOMetrics.java create mode 100644 hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/io/IOAnalyzerConfiguration.java diff --git a/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/io/BackgroundIOAnalyzer.java b/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/io/BackgroundIOAnalyzer.java new file mode 100644 index 000000000000..5e793e25342b --- /dev/null +++ b/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/io/BackgroundIOAnalyzer.java @@ -0,0 +1,190 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.ozone.container.io; + +import org.apache.commons.lang3.ArrayUtils; +import org.apache.hadoop.util.Shell; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import java.io.BufferedReader; +import java.io.FileReader; +import java.io.IOException; +import java.util.concurrent.atomic.AtomicBoolean; + +public class BackgroundIOAnalyzer extends Thread { + + private static final Logger LOG = + LoggerFactory.getLogger(BackgroundIOAnalyzer.class); + + private static final String NAME = "BackgroundIOAnalyzer"; + private static final String PROCFS_STAT = "/proc/stat"; + private static final String PROCFS_CPU = "cpu"; + private DataNodeIOMetrics metrics; + private final AtomicBoolean stopping; + private final long remainingSleep; + + public BackgroundIOAnalyzer(IOAnalyzerConfiguration conf) { + this.metrics = DataNodeIOMetrics.create(); + this.stopping = new AtomicBoolean(false); + this.remainingSleep = conf.getIOAnalyzerInterval(); + setName(NAME); + setDaemon(true); + } + + @Override + public void run() { + try { + while (!stopping.get()) { + analyzerIoWaitAndSystem(); + } + LOG.info("{} exiting.", this); + } catch (Exception e) { + LOG.error("{} exiting because of exception ", this, e); + } finally { + if (metrics != null) { + metrics.unregister(); + } + } + } + + /** + * Analyzes the usage of IOWait and System metrics. + * + *

Drive Types

+ * + * + *

Monitoring Purpose

+ *

Monitoring IOWait and System metrics is crucial for:

+ * + * + *

Data Collection Approach

+ *

This method employs a lightweight strategy to gather relevant data by:

+ * + * + *

Metrics Breakdown

+ * + * + * + * + * + * + * + * + * + * + * + * + * + * + * + * + * + * + * + * + * + * + * + * + * + *
usernicesystemidleiowaitirqsoftirqstealguestguest_nice
1067263464817921665141335547939726143614217949886055070272000
+ * + */ + private void analyzerIoWaitAndSystem() { + + if (!Shell.LINUX) { + LOG.warn("Analyzing IO: We currently only support Linux systems."); + return; + } + + try (BufferedReader reader = new BufferedReader(new FileReader(PROCFS_STAT))) { + String line; + while ((line = reader.readLine()) != null) { + if (line.startsWith(PROCFS_CPU)) { + String[] values = line.split("\\s+"); + if(ArrayUtils.isNotEmpty(values)) { + + // Step1. Retrieve all CPU system time data. + long user = Long.parseLong(values[1]); + long nice = Long.parseLong(values[2]); + long system = Long.parseLong(values[3]); + long idle = Long.parseLong(values[4]); + long iowait = Long.parseLong(values[5]); + + // Step2. Calculate total CPU time. + long totalCpuTime = user + nice + system + idle + iowait; + + // Step3. Calculate the ratio. + long iowaitRatio = (long) Math.floor((double) iowait / totalCpuTime * 100); + metrics.setDNIoWait(iowaitRatio); + + long systemRatio = (long) Math.floor((double) system / totalCpuTime * 100); + metrics.setDNSystem(systemRatio); + + LOG.debug("IO Analyzer : IoWait = {}, System = {}.", iowaitRatio, systemRatio); + } + break; + } + } + } catch (IOException e) { + LOG.error("An error occurred during the Analyzing IO process.", e); + } + + // We collect IO performance data at regular intervals, + // which is usually every 30 seconds. + handleRemainingSleep(remainingSleep); + } + + public final void handleRemainingSleep(long remainingSleep) { + if (remainingSleep > 0) { + try { + Thread.sleep(remainingSleep); + } catch (InterruptedException ignored) { + stopping.set(true); + LOG.warn("Background IOAnalyzer was interrupted."); + Thread.currentThread().interrupt(); + } + } + } + + public synchronized void shutdown() { + if (stopping.compareAndSet(false, true)) { + this.interrupt(); + try { + this.join(); + } catch (InterruptedException ex) { + LOG.warn("Unexpected exception while stopping io analyzer.", ex); + Thread.currentThread().interrupt(); + } + } + + if (metrics != null) { + metrics.unregister(); + } + } +} diff --git a/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/io/DataNodeIOMetrics.java b/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/io/DataNodeIOMetrics.java new file mode 100644 index 000000000000..28f6a57851c7 --- /dev/null +++ b/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/io/DataNodeIOMetrics.java @@ -0,0 +1,63 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + * + */ +package org.apache.hadoop.ozone.container.io; + +import org.apache.hadoop.hdds.annotation.InterfaceAudience.Private; +import org.apache.hadoop.metrics2.MetricsSystem; +import org.apache.hadoop.metrics2.annotation.Metric; +import org.apache.hadoop.metrics2.annotation.Metrics; +import org.apache.hadoop.metrics2.lib.DefaultMetricsSystem; +import org.apache.hadoop.metrics2.lib.MutableGaugeLong; + +@Private +@Metrics(about = "Datanode io metrics", context = "io") +public class DataNodeIOMetrics { + private final String name; + private final MetricsSystem ms; + + @Metric + private MutableGaugeLong ioWaitGauge; + + @Metric + private MutableGaugeLong systemGauge; + + public DataNodeIOMetrics(String name, MetricsSystem ms) { + this.name = name; + this.ms = ms; + } + + public void setDNIoWait(long ioWait) { + ioWaitGauge.set(ioWait); + } + + public void setDNSystem(long system) { + systemGauge.set(system); + } + + public static DataNodeIOMetrics create() { + MetricsSystem ms = DefaultMetricsSystem.instance(); + String name = "DataNodeIOMetrics"; + return ms.register(name, null, new DataNodeIOMetrics(name, ms)); + } + + public void unregister() { + ms.unregisterSource(name); + } +} diff --git a/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/io/IOAnalyzerConfiguration.java b/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/io/IOAnalyzerConfiguration.java new file mode 100644 index 000000000000..0039de66657e --- /dev/null +++ b/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/io/IOAnalyzerConfiguration.java @@ -0,0 +1,55 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + * + */ +package org.apache.hadoop.ozone.container.io; + +import org.apache.hadoop.hdds.conf.Config; +import org.apache.hadoop.hdds.conf.ConfigGroup; +import org.apache.hadoop.hdds.conf.ConfigTag; +import org.apache.hadoop.hdds.conf.ConfigType; +import org.apache.hadoop.ozone.container.ozoneimpl.ContainerScannerConfiguration; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import java.time.Duration; + +@ConfigGroup(prefix = "hdds.datanode.io") +public class IOAnalyzerConfiguration { + + public static final long IO_ANALYZER_INTERVAL_DEFAULT = + Duration.ofDays(7).toMillis(); + + @Config(key = "enabled", + type = ConfigType.BOOLEAN, + defaultValue = "true", + tags = {ConfigTag.DATANODE}, + description = "Config parameter to enable datanode io analyzer.") + private boolean enabled = true; + + @Config(key = "io.analyzer.interval", + type = ConfigType.TIME, + defaultValue = "30s", + tags = {ConfigTag.STORAGE}, + description = "The time interval for acquiring IO data is set to 30s.") + private long iOAnalyzerInterval = IO_ANALYZER_INTERVAL_DEFAULT; + + public long getIOAnalyzerInterval() { + return iOAnalyzerInterval; + } +} diff --git a/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/ozoneimpl/OzoneContainer.java b/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/ozoneimpl/OzoneContainer.java index e40fa635c121..2f0be59ca43f 100644 --- a/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/ozoneimpl/OzoneContainer.java +++ b/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/ozoneimpl/OzoneContainer.java @@ -56,6 +56,8 @@ import org.apache.hadoop.ozone.container.common.volume.StorageVolume; import org.apache.hadoop.ozone.container.common.volume.StorageVolume.VolumeType; import org.apache.hadoop.ozone.container.common.volume.StorageVolumeChecker; +import org.apache.hadoop.ozone.container.io.BackgroundIOAnalyzer; +import org.apache.hadoop.ozone.container.io.IOAnalyzerConfiguration; import org.apache.hadoop.ozone.container.keyvalue.statemachine.background.StaleRecoveringContainerScrubbingService; import org.apache.hadoop.ozone.container.replication.ContainerImporter; import org.apache.hadoop.ozone.container.replication.ReplicationServer; @@ -126,6 +128,7 @@ public class OzoneContainer { private DatanodeDetails datanodeDetails; private StateContext context; private ScheduledExecutorService dbCompactionExecutorService; + private BackgroundIOAnalyzer ioAnalyzer; private final ContainerMetrics metrics; @@ -284,6 +287,10 @@ public OzoneContainer(HddsDatanodeService hddsDatanodeService, initializingStatus = new AtomicReference<>(InitializingStatus.UNINITIALIZED); + + IOAnalyzerConfiguration c = config.getObject( + IOAnalyzerConfiguration.class); + ioAnalyzer = new BackgroundIOAnalyzer(c); } /** @@ -513,6 +520,7 @@ public void stop() { blockDeletingService.shutdown(); recoveringContainerScrubbingService.shutdown(); ContainerMetrics.remove(); + ioAnalyzer.shutdown(); } public void handleVolumeFailures() { From f57364832c43fa9fd6ed2907903a828ee70878a9 Mon Sep 17 00:00:00 2001 From: "Doroszlai, Attila" Date: Sat, 15 Feb 2025 22:02:44 +0100 Subject: [PATCH 2/2] fix header and imports --- .../container/io/BackgroundIOAnalyzer.java | 33 +++++++++--------- .../ozone/container/io/DataNodeIOMetrics.java | 28 +++++++-------- .../container/io/IOAnalyzerConfiguration.java | 34 ++++++++----------- 3 files changed, 43 insertions(+), 52 deletions(-) diff --git a/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/io/BackgroundIOAnalyzer.java b/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/io/BackgroundIOAnalyzer.java index 5e793e25342b..5ce4941fa6aa 100644 --- a/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/io/BackgroundIOAnalyzer.java +++ b/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/io/BackgroundIOAnalyzer.java @@ -1,31 +1,30 @@ /* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. */ -package org.apache.hadoop.ozone.container.io; -import org.apache.commons.lang3.ArrayUtils; -import org.apache.hadoop.util.Shell; -import org.slf4j.Logger; -import org.slf4j.LoggerFactory; +package org.apache.hadoop.ozone.container.io; import java.io.BufferedReader; import java.io.FileReader; import java.io.IOException; import java.util.concurrent.atomic.AtomicBoolean; +import org.apache.commons.lang3.ArrayUtils; +import org.apache.hadoop.util.Shell; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; public class BackgroundIOAnalyzer extends Thread { diff --git a/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/io/DataNodeIOMetrics.java b/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/io/DataNodeIOMetrics.java index 28f6a57851c7..ece4b0eefd52 100644 --- a/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/io/DataNodeIOMetrics.java +++ b/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/io/DataNodeIOMetrics.java @@ -1,22 +1,20 @@ /* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. + * http://www.apache.org/licenses/LICENSE-2.0 * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. */ + package org.apache.hadoop.ozone.container.io; import org.apache.hadoop.hdds.annotation.InterfaceAudience.Private; diff --git a/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/io/IOAnalyzerConfiguration.java b/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/io/IOAnalyzerConfiguration.java index 0039de66657e..36ffb817f594 100644 --- a/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/io/IOAnalyzerConfiguration.java +++ b/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/io/IOAnalyzerConfiguration.java @@ -1,33 +1,27 @@ /* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. + * http://www.apache.org/licenses/LICENSE-2.0 * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. */ + package org.apache.hadoop.ozone.container.io; +import java.time.Duration; import org.apache.hadoop.hdds.conf.Config; import org.apache.hadoop.hdds.conf.ConfigGroup; import org.apache.hadoop.hdds.conf.ConfigTag; import org.apache.hadoop.hdds.conf.ConfigType; -import org.apache.hadoop.ozone.container.ozoneimpl.ContainerScannerConfiguration; -import org.slf4j.Logger; -import org.slf4j.LoggerFactory; - -import java.time.Duration; @ConfigGroup(prefix = "hdds.datanode.io") public class IOAnalyzerConfiguration {