-
Notifications
You must be signed in to change notification settings - Fork 25.9k
Strengthen handling of unavailable cgroup stats #21094
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Merged
Merged
Changes from all commits
Commits
Show all changes
6 commits
Select commit
Hold shift + click to select a range
4d00b38
Strengthen handling of unavailable cgroup stats
jasontedor 29ed3ba
Check for existence of cgroup stats upfront
jasontedor 4c8a2cc
Make explicit upfront checking for cgroup stats
jasontedor c322b76
Add Javadocs for OsProbe#areCgroupStatsAvailable
jasontedor 8c7d7be
Remove obsolete leniency when reading cgroups
jasontedor 1d53577
Fix serialization bug in OsStats
jasontedor File filter
Filter by extension
Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
There are no files selected for viewing
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -32,6 +32,7 @@ | |
| import java.lang.reflect.Method; | ||
| import java.nio.file.Files; | ||
| import java.nio.file.Path; | ||
| import java.util.Collections; | ||
| import java.util.HashMap; | ||
| import java.util.List; | ||
| import java.util.Locale; | ||
|
|
@@ -211,7 +212,7 @@ private String readSingleLine(final Path path) throws IOException { | |
| * @return a map from subsystems to the control group for the | ||
| * Elasticsearch process. | ||
| * @throws IOException if an I/O exception occurs reading | ||
| * {@code /proc/self/cgroup} | ||
| * {@code /proc/self/cgroup} | ||
| */ | ||
| private Map<String, String> getControlGroups() throws IOException { | ||
| final List<String> lines = readProcSelfCgroup(); | ||
|
|
@@ -248,7 +249,7 @@ private Map<String, String> getControlGroups() throws IOException { | |
| * | ||
| * @return the lines from {@code /proc/self/cgroup} | ||
| * @throws IOException if an I/O exception occurs reading | ||
| * {@code /proc/self/cgroup} | ||
| * {@code /proc/self/cgroup} | ||
| */ | ||
| @SuppressForbidden(reason = "access /proc/self/cgroup") | ||
| List<String> readProcSelfCgroup() throws IOException { | ||
|
|
@@ -266,7 +267,7 @@ List<String> readProcSelfCgroup() throws IOException { | |
| * process for the {@code cpuacct} subsystem | ||
| * @return the total CPU time in nanoseconds | ||
| * @throws IOException if an I/O exception occurs reading | ||
| * {@code cpuacct.usage} for the control group | ||
| * {@code cpuacct.usage} for the control group | ||
| */ | ||
| private long getCgroupCpuAcctUsageNanos(final String controlGroup) throws IOException { | ||
| return Long.parseLong(readSysFsCgroupCpuAcctCpuAcctUsage(controlGroup)); | ||
|
|
@@ -284,7 +285,7 @@ private long getCgroupCpuAcctUsageNanos(final String controlGroup) throws IOExce | |
| * subsystem | ||
| * @return the line from {@code cpuacct.usage} | ||
| * @throws IOException if an I/O exception occurs reading | ||
| * {@code cpuacct.usage} for the control group | ||
| * {@code cpuacct.usage} for the control group | ||
| */ | ||
| @SuppressForbidden(reason = "access /sys/fs/cgroup/cpuacct") | ||
| String readSysFsCgroupCpuAcctCpuAcctUsage(final String controlGroup) throws IOException { | ||
|
|
@@ -300,7 +301,7 @@ String readSysFsCgroupCpuAcctCpuAcctUsage(final String controlGroup) throws IOEx | |
| * process for the {@code cpuacct} subsystem | ||
| * @return the CFS quota period in microseconds | ||
| * @throws IOException if an I/O exception occurs reading | ||
| * {@code cpu.cfs_period_us} for the control group | ||
| * {@code cpu.cfs_period_us} for the control group | ||
| */ | ||
| private long getCgroupCpuAcctCpuCfsPeriodMicros(final String controlGroup) throws IOException { | ||
| return Long.parseLong(readSysFsCgroupCpuAcctCpuCfsPeriod(controlGroup)); | ||
|
|
@@ -318,7 +319,7 @@ private long getCgroupCpuAcctCpuCfsPeriodMicros(final String controlGroup) throw | |
| * subsystem | ||
| * @return the line from {@code cpu.cfs_period_us} | ||
| * @throws IOException if an I/O exception occurs reading | ||
| * {@code cpu.cfs_period_us} for the control group | ||
| * {@code cpu.cfs_period_us} for the control group | ||
| */ | ||
| @SuppressForbidden(reason = "access /sys/fs/cgroup/cpu") | ||
| String readSysFsCgroupCpuAcctCpuCfsPeriod(final String controlGroup) throws IOException { | ||
|
|
@@ -334,9 +335,9 @@ String readSysFsCgroupCpuAcctCpuCfsPeriod(final String controlGroup) throws IOEx | |
| * process for the {@code cpuacct} subsystem | ||
| * @return the CFS quota in microseconds | ||
| * @throws IOException if an I/O exception occurs reading | ||
| * {@code cpu.cfs_quota_us} for the control group | ||
| * {@code cpu.cfs_quota_us} for the control group | ||
| */ | ||
| private long getCGroupCpuAcctCpuCfsQuotaMicros(final String controlGroup) throws IOException { | ||
| private long getCgroupCpuAcctCpuCfsQuotaMicros(final String controlGroup) throws IOException { | ||
| return Long.parseLong(readSysFsCgroupCpuAcctCpuAcctCfsQuota(controlGroup)); | ||
| } | ||
|
|
||
|
|
@@ -352,7 +353,7 @@ private long getCGroupCpuAcctCpuCfsQuotaMicros(final String controlGroup) throws | |
| * subsystem | ||
| * @return the line from {@code cpu.cfs_quota_us} | ||
| * @throws IOException if an I/O exception occurs reading | ||
| * {@code cpu.cfs_quota_us} for the control group | ||
| * {@code cpu.cfs_quota_us} for the control group | ||
| */ | ||
| @SuppressForbidden(reason = "access /sys/fs/cgroup/cpu") | ||
| String readSysFsCgroupCpuAcctCpuAcctCfsQuota(final String controlGroup) throws IOException { | ||
|
|
@@ -367,7 +368,7 @@ String readSysFsCgroupCpuAcctCpuAcctCfsQuota(final String controlGroup) throws I | |
| * process for the {@code cpuacct} subsystem | ||
| * @return the CPU time statistics | ||
| * @throws IOException if an I/O exception occurs reading | ||
| * {@code cpu.stat} for the control group | ||
| * {@code cpu.stat} for the control group | ||
| */ | ||
| private OsStats.Cgroup.CpuStat getCgroupCpuAcctCpuStat(final String controlGroup) throws IOException { | ||
| final List<String> lines = readSysFsCgroupCpuAcctCpuStat(controlGroup); | ||
|
|
@@ -399,11 +400,11 @@ private OsStats.Cgroup.CpuStat getCgroupCpuAcctCpuStat(final String controlGroup | |
| * group to which the Elasticsearch process belongs for the | ||
| * {@code cpu} subsystem. These lines represent the CPU time | ||
| * statistics and have the form | ||
| * | ||
| * <p> | ||
| * nr_periods \d+ | ||
| * nr_throttled \d+ | ||
| * throttled_time \d+ | ||
| * | ||
| * <p> | ||
| * where {@code nr_periods} is the number of period intervals | ||
| * as specified by {@code cpu.cfs_period_us} that have elapsed, | ||
| * {@code nr_throttled} is the number of times tasks in the given | ||
|
|
@@ -414,10 +415,9 @@ private OsStats.Cgroup.CpuStat getCgroupCpuAcctCpuStat(final String controlGroup | |
| * @param controlGroup the control group to which the Elasticsearch | ||
| * process belongs for the {@code cpu} | ||
| * subsystem | ||
| * | ||
| * @return the lines from {@code cpu.stat} | ||
| * @throws IOException if an I/O exception occurs reading | ||
| * {@code cpu.stat} for the control group | ||
| * {@code cpu.stat} for the control group | ||
| */ | ||
| @SuppressForbidden(reason = "access /sys/fs/cgroup/cpu") | ||
| List<String> readSysFsCgroupCpuAcctCpuStat(final String controlGroup) throws IOException { | ||
|
|
@@ -426,6 +426,27 @@ List<String> readSysFsCgroupCpuAcctCpuStat(final String controlGroup) throws IOE | |
| return lines; | ||
| } | ||
|
|
||
| /** | ||
| * Checks if cgroup stats are available by checking for the existence of {@code /proc/self/cgroup}, | ||
| * {@code /sys/fs/cgroup/cpu}, and {@code /sys/fs/cgroup/cpuacct}. | ||
| * | ||
| * @return {@code true} if the stats are available, otherwise | ||
| * {@code false} | ||
| */ | ||
| @SuppressForbidden(reason = "access /proc/self/cgroup, /sys/fs/cgroup/cpu, and /sys/fs/cgroup/cpuacct") | ||
| private boolean areCgroupStatsAvailable() { | ||
| if (!Files.exists(PathUtils.get("/proc/self/cgroup"))) { | ||
| return false; | ||
| } | ||
| if (!Files.exists(PathUtils.get("/sys/fs/cgroup/cpu"))) { | ||
| return false; | ||
| } | ||
| if (!Files.exists(PathUtils.get("/sys/fs/cgroup/cpuacct"))) { | ||
| return false; | ||
| } | ||
| return true; | ||
| } | ||
|
|
||
| /** | ||
| * Basic cgroup stats. | ||
| * | ||
|
|
@@ -434,16 +455,30 @@ List<String> readSysFsCgroupCpuAcctCpuStat(final String controlGroup) throws IOE | |
| */ | ||
| private OsStats.Cgroup getCgroup() { | ||
| try { | ||
| final Map<String, String> controllerMap = getControlGroups(); | ||
| final String cpuControlGroup = controllerMap.get("cpu"); | ||
| final String cpuAcctControlGroup = controllerMap.get("cpuacct"); | ||
| return new OsStats.Cgroup( | ||
| cpuAcctControlGroup, | ||
| getCgroupCpuAcctUsageNanos(cpuAcctControlGroup), | ||
| cpuControlGroup, | ||
| getCgroupCpuAcctCpuCfsPeriodMicros(cpuControlGroup), | ||
| getCGroupCpuAcctCpuCfsQuotaMicros(cpuControlGroup), | ||
| getCgroupCpuAcctCpuStat(cpuControlGroup)); | ||
| if (!areCgroupStatsAvailable()) { | ||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. 👍 |
||
| return null; | ||
| } else { | ||
| final Map<String, String> controllerMap = getControlGroups(); | ||
| assert !controllerMap.isEmpty(); | ||
|
|
||
| final String cpuAcctControlGroup = controllerMap.get("cpuacct"); | ||
| assert cpuAcctControlGroup != null; | ||
| final long cgroupCpuAcctUsageNanos = getCgroupCpuAcctUsageNanos(cpuAcctControlGroup); | ||
|
|
||
| final String cpuControlGroup = controllerMap.get("cpu"); | ||
| assert cpuControlGroup != null; | ||
| final long cgroupCpuAcctCpuCfsPeriodMicros = getCgroupCpuAcctCpuCfsPeriodMicros(cpuControlGroup); | ||
| final long cgroupCpuAcctCpuCfsQuotaMicros = getCgroupCpuAcctCpuCfsQuotaMicros(cpuControlGroup); | ||
| final OsStats.Cgroup.CpuStat cpuStat = getCgroupCpuAcctCpuStat(cpuControlGroup); | ||
|
|
||
| return new OsStats.Cgroup( | ||
| cpuAcctControlGroup, | ||
| cgroupCpuAcctUsageNanos, | ||
| cpuControlGroup, | ||
| cgroupCpuAcctCpuCfsPeriodMicros, | ||
| cgroupCpuAcctCpuCfsQuotaMicros, | ||
| cpuStat); | ||
| } | ||
| } catch (final IOException e) { | ||
| if (logger.isDebugEnabled()) { | ||
| logger.debug("error reading control group stats", e); | ||
|
|
||
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Add this suggestion to a batch that can be applied as a single commit.
This suggestion is invalid because no changes were made to the code.
Suggestions cannot be applied while the pull request is closed.
Suggestions cannot be applied while viewing a subset of changes.
Only one suggestion per line can be applied in a batch.
Add this suggestion to a batch that can be applied as a single commit.
Applying suggestions on deleted lines is not supported.
You must change the existing code in this line in order to create a valid suggestion.
Outdated suggestions cannot be applied.
This suggestion has been applied or marked resolved.
Suggestions cannot be applied from pending reviews.
Suggestions cannot be applied on multi-line comments.
Suggestions cannot be applied while the pull request is queued to merge.
Suggestion cannot be applied right now. Please check back later.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
👍