From 4d00b38c7e5dfe4760f847fb2dfd325ca491937d Mon Sep 17 00:00:00 2001 From: Jason Tedor Date: Mon, 24 Oct 2016 10:46:14 -0400 Subject: [PATCH 1/6] Strengthen handling of unavailable cgroup stats On some systems, cgroups will be available but not configured. And in some cases, cgroups will be configured, but not for the subsystems that we are expecting (e.g., cpu and cpuacct). This commit strengthens the handling of cgroup stats on such systems. --- .../org/elasticsearch/monitor/os/OsProbe.java | 197 +++++++++++------- .../org/elasticsearch/monitor/os/OsStats.java | 16 +- 2 files changed, 137 insertions(+), 76 deletions(-) diff --git a/core/src/main/java/org/elasticsearch/monitor/os/OsProbe.java b/core/src/main/java/org/elasticsearch/monitor/os/OsProbe.java index 346a3915cbf34..e11553509c974 100644 --- a/core/src/main/java/org/elasticsearch/monitor/os/OsProbe.java +++ b/core/src/main/java/org/elasticsearch/monitor/os/OsProbe.java @@ -32,6 +32,7 @@ import java.lang.reflect.Method; import java.nio.file.Files; import java.nio.file.Path; +import java.util.Collections; import java.util.HashMap; import java.util.List; import java.util.Locale; @@ -215,22 +216,27 @@ private String readSingleLine(final Path path) throws IOException { */ private Map getControlGroups() throws IOException { final List lines = readProcSelfCgroup(); - final Map controllerMap = new HashMap<>(); - for (final String line : lines) { - final Matcher matcher = CONTROL_GROUP_PATTERN.matcher(line); - // note that Matcher#matches must be invoked as - // matching is lazy; this can not happen in an assert - // as assertions might not be enabled - final boolean matches = matcher.matches(); - assert matches : line; - // at this point we have captured the subsystems and the - // control group - final String[] controllers = matcher.group(1).split(","); - for (final String controller : controllers) { - controllerMap.put(controller, matcher.group(2)); + if (lines == null || lines.isEmpty()) { + return Collections.emptyMap(); + } + else { + final Map controllerMap = new HashMap<>(); + for (final String line : lines) { + final Matcher matcher = CONTROL_GROUP_PATTERN.matcher(line); + // note that Matcher#matches must be invoked as + // matching is lazy; this can not happen in an assert + // as assertions might not be enabled + final boolean matches = matcher.matches(); + assert matches : line; + // at this point we have captured the subsystems and the + // control group + final String[] controllers = matcher.group(1).split(","); + for (final String controller : controllers) { + controllerMap.put(controller, matcher.group(2)); + } } + return controllerMap; } - return controllerMap; } /** @@ -246,15 +252,21 @@ private Map getControlGroups() throws IOException { * bound to the hierarchy, and the last field representing the * control group. * - * @return the lines from {@code /proc/self/cgroup} + * @return the lines from {@code /proc/self/cgroup} or {@code null} + * if this file does not exist * @throws IOException if an I/O exception occurs reading - * {@code /proc/self/cgroup} + * {@code /proc/self/cgroup} */ @SuppressForbidden(reason = "access /proc/self/cgroup") List readProcSelfCgroup() throws IOException { - final List lines = Files.readAllLines(PathUtils.get("/proc/self/cgroup")); - assert lines != null && !lines.isEmpty(); - return lines; + final Path path = PathUtils.get("/proc/self/cgroup"); + if (!Files.exists(path)) { + return null; + } else { + final List lines = Files.readAllLines(path); + assert lines != null; + return lines; + } } /** @@ -264,12 +276,17 @@ List readProcSelfCgroup() throws IOException { * * @param controlGroup the control group for the Elasticsearch * process for the {@code cpuacct} subsystem - * @return the total CPU time in nanoseconds + * @return the total CPU time in nanoseconds or -1 if unavailable * @throws IOException if an I/O exception occurs reading - * {@code cpuacct.usage} for the control group + * {@code cpuacct.usage} for the control group */ private long getCgroupCpuAcctUsageNanos(final String controlGroup) throws IOException { - return Long.parseLong(readSysFsCgroupCpuAcctCpuAcctUsage(controlGroup)); + final String line = readSysFsCgroupCpuAcctCpuAcctUsage(controlGroup); + if (line == null) { + return -1; + } else { + return Long.parseLong(line); + } } /** @@ -282,13 +299,19 @@ private long getCgroupCpuAcctUsageNanos(final String controlGroup) throws IOExce * @param controlGroup the control group to which the Elasticsearch * process belongs for the {@code cpuacct} * subsystem - * @return the line from {@code cpuacct.usage} + * @return the line from {@code cpuacct.usage} or {@code null} if + * this file does not exist * @throws IOException if an I/O exception occurs reading - * {@code cpuacct.usage} for the control group + * {@code cpuacct.usage} for the control group */ @SuppressForbidden(reason = "access /sys/fs/cgroup/cpuacct") String readSysFsCgroupCpuAcctCpuAcctUsage(final String controlGroup) throws IOException { - return readSingleLine(PathUtils.get("/sys/fs/cgroup/cpuacct", controlGroup, "cpuacct.usage")); + final Path path = PathUtils.get("/sys/fs/cgroup/cpuacct", controlGroup, "cpuacct.usage"); + if (!Files.exists(path)) { + return null; + } else { + return readSingleLine(path); + } } /** @@ -298,12 +321,17 @@ String readSysFsCgroupCpuAcctCpuAcctUsage(final String controlGroup) throws IOEx * * @param controlGroup the control group for the Elasticsearch * process for the {@code cpuacct} subsystem - * @return the CFS quota period in microseconds + * @return the CFS quota period in microsecondsor -1 if unavailable * @throws IOException if an I/O exception occurs reading - * {@code cpu.cfs_period_us} for the control group + * {@code cpu.cfs_period_us} for the control group */ private long getCgroupCpuAcctCpuCfsPeriodMicros(final String controlGroup) throws IOException { - return Long.parseLong(readSysFsCgroupCpuAcctCpuCfsPeriod(controlGroup)); + final String line = readSysFsCgroupCpuAcctCpuCfsPeriod(controlGroup); + if (line == null) { + return -1; + } else { + return Long.parseLong(line); + } } /** @@ -316,13 +344,19 @@ private long getCgroupCpuAcctCpuCfsPeriodMicros(final String controlGroup) throw * @param controlGroup the control group to which the Elasticsearch * process belongs for the {@code cpu} * subsystem - * @return the line from {@code cpu.cfs_period_us} + * @return the line from {@code cpu.cfs_period_us} or {@code null} + * if this file does not exist * @throws IOException if an I/O exception occurs reading - * {@code cpu.cfs_period_us} for the control group + * {@code cpu.cfs_period_us} for the control group */ @SuppressForbidden(reason = "access /sys/fs/cgroup/cpu") String readSysFsCgroupCpuAcctCpuCfsPeriod(final String controlGroup) throws IOException { - return readSingleLine(PathUtils.get("/sys/fs/cgroup/cpu", controlGroup, "cpu.cfs_period_us")); + final Path path = PathUtils.get("/sys/fs/cgroup/cpu", controlGroup, "cpu.cfs_period_us"); + if (!Files.exists(path)) { + return null; + } else { + return readSingleLine(path); + } } /** @@ -332,12 +366,17 @@ String readSysFsCgroupCpuAcctCpuCfsPeriod(final String controlGroup) throws IOEx * * @param controlGroup the control group for the Elasticsearch * process for the {@code cpuacct} subsystem - * @return the CFS quota in microseconds + * @return the CFS quota in microseconds or -1 if unavailable * @throws IOException if an I/O exception occurs reading - * {@code cpu.cfs_quota_us} for the control group + * {@code cpu.cfs_quota_us} for the control group */ private long getCGroupCpuAcctCpuCfsQuotaMicros(final String controlGroup) throws IOException { - return Long.parseLong(readSysFsCgroupCpuAcctCpuAcctCfsQuota(controlGroup)); + final String line = readSysFsCgroupCpuAcctCpuAcctCfsQuota(controlGroup); + if (line == null) { + return -1; + } else { + return Long.parseLong(line); + } } /** @@ -350,13 +389,19 @@ private long getCGroupCpuAcctCpuCfsQuotaMicros(final String controlGroup) throws * @param controlGroup the control group to which the Elasticsearch * process belongs for the {@code cpu} * subsystem - * @return the line from {@code cpu.cfs_quota_us} + * @return the line from {@code cpu.cfs_quota_us} or {@code null} + * if this file does not exist * @throws IOException if an I/O exception occurs reading - * {@code cpu.cfs_quota_us} for the control group + * {@code cpu.cfs_quota_us} for the control group */ @SuppressForbidden(reason = "access /sys/fs/cgroup/cpu") String readSysFsCgroupCpuAcctCpuAcctCfsQuota(final String controlGroup) throws IOException { - return readSingleLine(PathUtils.get("/sys/fs/cgroup/cpu", controlGroup, "cpu.cfs_quota_us")); + final Path path = PathUtils.get("/sys/fs/cgroup/cpu", controlGroup, "cpu.cfs_quota_us"); + if (!Files.exists(path)) { + return null; + } else { + return readSingleLine(path); + } } /** @@ -365,33 +410,37 @@ String readSysFsCgroupCpuAcctCpuAcctCfsQuota(final String controlGroup) throws I * * @param controlGroup the control group for the Elasticsearch * process for the {@code cpuacct} subsystem - * @return the CPU time statistics + * @return the CPU time statistics or {@code null} if unavailable * @throws IOException if an I/O exception occurs reading - * {@code cpu.stat} for the control group + * {@code cpu.stat} for the control group */ private OsStats.Cgroup.CpuStat getCgroupCpuAcctCpuStat(final String controlGroup) throws IOException { final List lines = readSysFsCgroupCpuAcctCpuStat(controlGroup); - long numberOfPeriods = -1; - long numberOfTimesThrottled = -1; - long timeThrottledNanos = -1; - for (final String line : lines) { - final String[] fields = line.split("\\s+"); - switch (fields[0]) { - case "nr_periods": - numberOfPeriods = Long.parseLong(fields[1]); - break; - case "nr_throttled": - numberOfTimesThrottled = Long.parseLong(fields[1]); - break; - case "throttled_time": - timeThrottledNanos = Long.parseLong(fields[1]); - break; + if (lines == null) { + return null; + } else { + long numberOfPeriods = -1; + long numberOfTimesThrottled = -1; + long timeThrottledNanos = -1; + for (final String line : lines) { + final String[] fields = line.split("\\s+"); + switch (fields[0]) { + case "nr_periods": + numberOfPeriods = Long.parseLong(fields[1]); + break; + case "nr_throttled": + numberOfTimesThrottled = Long.parseLong(fields[1]); + break; + case "throttled_time": + timeThrottledNanos = Long.parseLong(fields[1]); + break; + } } + assert numberOfPeriods != -1; + assert numberOfTimesThrottled != -1; + assert timeThrottledNanos != -1; + return new OsStats.Cgroup.CpuStat(numberOfPeriods, numberOfTimesThrottled, timeThrottledNanos); } - assert numberOfPeriods != -1; - assert numberOfTimesThrottled != -1; - assert timeThrottledNanos != -1; - return new OsStats.Cgroup.CpuStat(numberOfPeriods, numberOfTimesThrottled, timeThrottledNanos); } /** @@ -399,11 +448,11 @@ private OsStats.Cgroup.CpuStat getCgroupCpuAcctCpuStat(final String controlGroup * group to which the Elasticsearch process belongs for the * {@code cpu} subsystem. These lines represent the CPU time * statistics and have the form - * + *

* nr_periods \d+ * nr_throttled \d+ * throttled_time \d+ - * + *

* where {@code nr_periods} is the number of period intervals * as specified by {@code cpu.cfs_period_us} that have elapsed, * {@code nr_throttled} is the number of times tasks in the given @@ -414,16 +463,21 @@ private OsStats.Cgroup.CpuStat getCgroupCpuAcctCpuStat(final String controlGroup * @param controlGroup the control group to which the Elasticsearch * process belongs for the {@code cpu} * subsystem - * - * @return the lines from {@code cpu.stat} + * @return the lines from {@code cpu.stat} or {@code null} if this + * file does not exist * @throws IOException if an I/O exception occurs reading - * {@code cpu.stat} for the control group + * {@code cpu.stat} for the control group */ @SuppressForbidden(reason = "access /sys/fs/cgroup/cpu") List readSysFsCgroupCpuAcctCpuStat(final String controlGroup) throws IOException { - final List lines = Files.readAllLines(PathUtils.get("/sys/fs/cgroup/cpu", controlGroup, "cpu.stat")); - assert lines != null && lines.size() == 3; - return lines; + final Path path = PathUtils.get("/sys/fs/cgroup/cpu", controlGroup, "cpu.stat"); + if (!Files.exists(path)) { + return null; + } else { + final List lines = Files.readAllLines(path); + assert lines != null && lines.size() == 3; + return lines; + } } /** @@ -435,15 +489,18 @@ List readSysFsCgroupCpuAcctCpuStat(final String controlGroup) throws IOE private OsStats.Cgroup getCgroup() { try { final Map controllerMap = getControlGroups(); + if (controllerMap.isEmpty()) { + return null; + } final String cpuControlGroup = controllerMap.get("cpu"); final String cpuAcctControlGroup = controllerMap.get("cpuacct"); return new OsStats.Cgroup( cpuAcctControlGroup, - getCgroupCpuAcctUsageNanos(cpuAcctControlGroup), + cpuAcctControlGroup == null ? -1 : getCgroupCpuAcctUsageNanos(cpuAcctControlGroup), cpuControlGroup, - getCgroupCpuAcctCpuCfsPeriodMicros(cpuControlGroup), - getCGroupCpuAcctCpuCfsQuotaMicros(cpuControlGroup), - getCgroupCpuAcctCpuStat(cpuControlGroup)); + cpuControlGroup == null ? -1 : getCgroupCpuAcctCpuCfsPeriodMicros(cpuControlGroup), + cpuControlGroup == null ? -1 : getCGroupCpuAcctCpuCfsQuotaMicros(cpuControlGroup), + cpuControlGroup == null ? null : getCgroupCpuAcctCpuStat(cpuControlGroup)); } catch (final IOException e) { if (logger.isDebugEnabled()) { logger.debug("error reading control group stats", e); diff --git a/core/src/main/java/org/elasticsearch/monitor/os/OsStats.java b/core/src/main/java/org/elasticsearch/monitor/os/OsStats.java index 8a7a842e9de1d..1579c059d2e8c 100644 --- a/core/src/main/java/org/elasticsearch/monitor/os/OsStats.java +++ b/core/src/main/java/org/elasticsearch/monitor/os/OsStats.java @@ -360,26 +360,26 @@ public Cgroup( this.cpuControlGroup = cpuControlGroup; this.cpuCfsPeriodMicros = cpuCfsPeriodMicros; this.cpuCfsQuotaMicros = cpuCfsQuotaMicros; - this.cpuStat = Objects.requireNonNull(cpuStat); + this.cpuStat = cpuStat; } Cgroup(final StreamInput in) throws IOException { - cpuAcctControlGroup = in.readString(); + cpuAcctControlGroup = in.readOptionalString(); cpuAcctUsageNanos = in.readLong(); cpuControlGroup = in.readString(); cpuCfsPeriodMicros = in.readLong(); cpuCfsQuotaMicros = in.readLong(); - cpuStat = new CpuStat(in); + cpuStat = in.readOptionalWriteable(CpuStat::new); } @Override public void writeTo(final StreamOutput out) throws IOException { - out.writeString(cpuAcctControlGroup); + out.writeOptionalString(cpuAcctControlGroup); out.writeLong(cpuAcctUsageNanos); out.writeString(cpuControlGroup); out.writeLong(cpuCfsPeriodMicros); out.writeLong(cpuCfsQuotaMicros); - cpuStat.writeTo(out); + out.writeOptionalWriteable(cpuStat); } @Override @@ -397,7 +397,11 @@ public XContentBuilder toXContent(final XContentBuilder builder, final Params pa builder.field("control_group", cpuControlGroup); builder.field("cfs_period_micros", cpuCfsPeriodMicros); builder.field("cfs_quota_micros", cpuCfsQuotaMicros); - cpuStat.toXContent(builder, params); + if (cpuStat == null) { + builder.field("stat", (Object) null); + } else { + cpuStat.toXContent(builder, params); + } } builder.endObject(); } From 29ed3ba68f122c3f064e631a3f8205ce118fa570 Mon Sep 17 00:00:00 2001 From: Jason Tedor Date: Mon, 24 Oct 2016 11:54:05 -0400 Subject: [PATCH 2/6] Check for existence of cgroup stats upfront This commit checks for the existence of cgroup stats upfront rather than allowing leniency in the cgroup methods. --- .../org/elasticsearch/monitor/os/OsProbe.java | 170 ++++++++---------- .../elasticsearch/bootstrap/security.policy | 2 + 2 files changed, 75 insertions(+), 97 deletions(-) diff --git a/core/src/main/java/org/elasticsearch/monitor/os/OsProbe.java b/core/src/main/java/org/elasticsearch/monitor/os/OsProbe.java index e11553509c974..0d32c88d43b78 100644 --- a/core/src/main/java/org/elasticsearch/monitor/os/OsProbe.java +++ b/core/src/main/java/org/elasticsearch/monitor/os/OsProbe.java @@ -212,11 +212,11 @@ private String readSingleLine(final Path path) throws IOException { * @return a map from subsystems to the control group for the * Elasticsearch process. * @throws IOException if an I/O exception occurs reading - * {@code /proc/self/cgroup} + * {@code /proc/self/cgroup} */ private Map getControlGroups() throws IOException { final List lines = readProcSelfCgroup(); - if (lines == null || lines.isEmpty()) { + if (lines.isEmpty()) { return Collections.emptyMap(); } else { @@ -252,21 +252,15 @@ private Map getControlGroups() throws IOException { * bound to the hierarchy, and the last field representing the * control group. * - * @return the lines from {@code /proc/self/cgroup} or {@code null} - * if this file does not exist + * @return the lines from {@code /proc/self/cgroup} * @throws IOException if an I/O exception occurs reading * {@code /proc/self/cgroup} */ @SuppressForbidden(reason = "access /proc/self/cgroup") List readProcSelfCgroup() throws IOException { - final Path path = PathUtils.get("/proc/self/cgroup"); - if (!Files.exists(path)) { - return null; - } else { - final List lines = Files.readAllLines(path); - assert lines != null; - return lines; - } + final List lines = Files.readAllLines(PathUtils.get("/proc/self/cgroup")); + assert lines != null; + return lines; } /** @@ -276,17 +270,12 @@ List readProcSelfCgroup() throws IOException { * * @param controlGroup the control group for the Elasticsearch * process for the {@code cpuacct} subsystem - * @return the total CPU time in nanoseconds or -1 if unavailable + * @return the total CPU time in nanoseconds * @throws IOException if an I/O exception occurs reading * {@code cpuacct.usage} for the control group */ private long getCgroupCpuAcctUsageNanos(final String controlGroup) throws IOException { - final String line = readSysFsCgroupCpuAcctCpuAcctUsage(controlGroup); - if (line == null) { - return -1; - } else { - return Long.parseLong(line); - } + return Long.parseLong(readSysFsCgroupCpuAcctCpuAcctUsage(controlGroup)); } /** @@ -299,19 +288,13 @@ private long getCgroupCpuAcctUsageNanos(final String controlGroup) throws IOExce * @param controlGroup the control group to which the Elasticsearch * process belongs for the {@code cpuacct} * subsystem - * @return the line from {@code cpuacct.usage} or {@code null} if - * this file does not exist + * @return the line from {@code cpuacct.usage} * @throws IOException if an I/O exception occurs reading * {@code cpuacct.usage} for the control group */ @SuppressForbidden(reason = "access /sys/fs/cgroup/cpuacct") String readSysFsCgroupCpuAcctCpuAcctUsage(final String controlGroup) throws IOException { - final Path path = PathUtils.get("/sys/fs/cgroup/cpuacct", controlGroup, "cpuacct.usage"); - if (!Files.exists(path)) { - return null; - } else { - return readSingleLine(path); - } + return readSingleLine(PathUtils.get("/sys/fs/cgroup/cpuacct", controlGroup, "cpuacct.usage")); } /** @@ -321,17 +304,12 @@ String readSysFsCgroupCpuAcctCpuAcctUsage(final String controlGroup) throws IOEx * * @param controlGroup the control group for the Elasticsearch * process for the {@code cpuacct} subsystem - * @return the CFS quota period in microsecondsor -1 if unavailable + * @return the CFS quota period in microseconds * @throws IOException if an I/O exception occurs reading * {@code cpu.cfs_period_us} for the control group */ private long getCgroupCpuAcctCpuCfsPeriodMicros(final String controlGroup) throws IOException { - final String line = readSysFsCgroupCpuAcctCpuCfsPeriod(controlGroup); - if (line == null) { - return -1; - } else { - return Long.parseLong(line); - } + return Long.parseLong(readSysFsCgroupCpuAcctCpuCfsPeriod(controlGroup)); } /** @@ -344,19 +322,13 @@ private long getCgroupCpuAcctCpuCfsPeriodMicros(final String controlGroup) throw * @param controlGroup the control group to which the Elasticsearch * process belongs for the {@code cpu} * subsystem - * @return the line from {@code cpu.cfs_period_us} or {@code null} - * if this file does not exist + * @return the line from {@code cpu.cfs_period_us} * @throws IOException if an I/O exception occurs reading * {@code cpu.cfs_period_us} for the control group */ @SuppressForbidden(reason = "access /sys/fs/cgroup/cpu") String readSysFsCgroupCpuAcctCpuCfsPeriod(final String controlGroup) throws IOException { - final Path path = PathUtils.get("/sys/fs/cgroup/cpu", controlGroup, "cpu.cfs_period_us"); - if (!Files.exists(path)) { - return null; - } else { - return readSingleLine(path); - } + return readSingleLine(PathUtils.get("/sys/fs/cgroup/cpu", controlGroup, "cpu.cfs_period_us")); } /** @@ -366,17 +338,12 @@ String readSysFsCgroupCpuAcctCpuCfsPeriod(final String controlGroup) throws IOEx * * @param controlGroup the control group for the Elasticsearch * process for the {@code cpuacct} subsystem - * @return the CFS quota in microseconds or -1 if unavailable + * @return the CFS quota in microseconds * @throws IOException if an I/O exception occurs reading * {@code cpu.cfs_quota_us} for the control group */ - private long getCGroupCpuAcctCpuCfsQuotaMicros(final String controlGroup) throws IOException { - final String line = readSysFsCgroupCpuAcctCpuAcctCfsQuota(controlGroup); - if (line == null) { - return -1; - } else { - return Long.parseLong(line); - } + private long getCgroupCpuAcctCpuCfsQuotaMicros(final String controlGroup) throws IOException { + return Long.parseLong(readSysFsCgroupCpuAcctCpuAcctCfsQuota(controlGroup)); } /** @@ -389,19 +356,13 @@ private long getCGroupCpuAcctCpuCfsQuotaMicros(final String controlGroup) throws * @param controlGroup the control group to which the Elasticsearch * process belongs for the {@code cpu} * subsystem - * @return the line from {@code cpu.cfs_quota_us} or {@code null} - * if this file does not exist + * @return the line from {@code cpu.cfs_quota_us} * @throws IOException if an I/O exception occurs reading * {@code cpu.cfs_quota_us} for the control group */ @SuppressForbidden(reason = "access /sys/fs/cgroup/cpu") String readSysFsCgroupCpuAcctCpuAcctCfsQuota(final String controlGroup) throws IOException { - final Path path = PathUtils.get("/sys/fs/cgroup/cpu", controlGroup, "cpu.cfs_quota_us"); - if (!Files.exists(path)) { - return null; - } else { - return readSingleLine(path); - } + return readSingleLine(PathUtils.get("/sys/fs/cgroup/cpu", controlGroup, "cpu.cfs_quota_us")); } /** @@ -410,37 +371,33 @@ String readSysFsCgroupCpuAcctCpuAcctCfsQuota(final String controlGroup) throws I * * @param controlGroup the control group for the Elasticsearch * process for the {@code cpuacct} subsystem - * @return the CPU time statistics or {@code null} if unavailable + * @return the CPU time statistics * @throws IOException if an I/O exception occurs reading * {@code cpu.stat} for the control group */ private OsStats.Cgroup.CpuStat getCgroupCpuAcctCpuStat(final String controlGroup) throws IOException { final List lines = readSysFsCgroupCpuAcctCpuStat(controlGroup); - if (lines == null) { - return null; - } else { - long numberOfPeriods = -1; - long numberOfTimesThrottled = -1; - long timeThrottledNanos = -1; - for (final String line : lines) { - final String[] fields = line.split("\\s+"); - switch (fields[0]) { - case "nr_periods": - numberOfPeriods = Long.parseLong(fields[1]); - break; - case "nr_throttled": - numberOfTimesThrottled = Long.parseLong(fields[1]); - break; - case "throttled_time": - timeThrottledNanos = Long.parseLong(fields[1]); - break; - } + long numberOfPeriods = -1; + long numberOfTimesThrottled = -1; + long timeThrottledNanos = -1; + for (final String line : lines) { + final String[] fields = line.split("\\s+"); + switch (fields[0]) { + case "nr_periods": + numberOfPeriods = Long.parseLong(fields[1]); + break; + case "nr_throttled": + numberOfTimesThrottled = Long.parseLong(fields[1]); + break; + case "throttled_time": + timeThrottledNanos = Long.parseLong(fields[1]); + break; } - assert numberOfPeriods != -1; - assert numberOfTimesThrottled != -1; - assert timeThrottledNanos != -1; - return new OsStats.Cgroup.CpuStat(numberOfPeriods, numberOfTimesThrottled, timeThrottledNanos); } + assert numberOfPeriods != -1; + assert numberOfTimesThrottled != -1; + assert timeThrottledNanos != -1; + return new OsStats.Cgroup.CpuStat(numberOfPeriods, numberOfTimesThrottled, timeThrottledNanos); } /** @@ -463,21 +420,15 @@ private OsStats.Cgroup.CpuStat getCgroupCpuAcctCpuStat(final String controlGroup * @param controlGroup the control group to which the Elasticsearch * process belongs for the {@code cpu} * subsystem - * @return the lines from {@code cpu.stat} or {@code null} if this - * file does not exist + * @return the lines from {@code cpu.stat} * @throws IOException if an I/O exception occurs reading * {@code cpu.stat} for the control group */ @SuppressForbidden(reason = "access /sys/fs/cgroup/cpu") List readSysFsCgroupCpuAcctCpuStat(final String controlGroup) throws IOException { - final Path path = PathUtils.get("/sys/fs/cgroup/cpu", controlGroup, "cpu.stat"); - if (!Files.exists(path)) { - return null; - } else { - final List lines = Files.readAllLines(path); - assert lines != null && lines.size() == 3; - return lines; - } + final List lines = Files.readAllLines(PathUtils.get("/sys/fs/cgroup/cpu", controlGroup, "cpu.stat")); + assert lines != null && lines.size() == 3; + return lines; } /** @@ -486,21 +437,46 @@ List readSysFsCgroupCpuAcctCpuStat(final String controlGroup) throws IOE * @return basic cgroup stats, or {@code null} if an I/O exception * occurred reading the cgroup stats */ + @SuppressForbidden(reason = "access /proc/self/cgroup, /sys/fs/cgroup/cpu, and /sys/fs/cgroup/cpuacct") private OsStats.Cgroup getCgroup() { try { + if (!Files.exists(PathUtils.get("/proc/self/cgroup"))) { + return null; + } final Map controllerMap = getControlGroups(); if (controllerMap.isEmpty()) { return null; } - final String cpuControlGroup = controllerMap.get("cpu"); + final String cpuAcctControlGroup = controllerMap.get("cpuacct"); + final long cgroupCpuAcctUsageNanos; + if (cpuAcctControlGroup == null || !Files.exists(PathUtils.get("/sys/fs/cgroup/cpuacct"))) { + cgroupCpuAcctUsageNanos = -1; + } else { + cgroupCpuAcctUsageNanos = getCgroupCpuAcctUsageNanos(cpuAcctControlGroup); + } + + final String cpuControlGroup = controllerMap.get("cpu"); + final long cgroupCpuAcctCpuCfsPeriodMicros; + final long cgroupCpuAcctCpuCfsQuotaMicros; + final OsStats.Cgroup.CpuStat cpuStat; + if (cpuControlGroup == null || !Files.exists(PathUtils.get("/sys/fs/cgroup/cpu"))) { + cgroupCpuAcctCpuCfsPeriodMicros = -1; + cgroupCpuAcctCpuCfsQuotaMicros = -1; + cpuStat = null; + } else { + cgroupCpuAcctCpuCfsPeriodMicros = getCgroupCpuAcctCpuCfsPeriodMicros(cpuControlGroup); + cgroupCpuAcctCpuCfsQuotaMicros = getCgroupCpuAcctCpuCfsQuotaMicros(cpuControlGroup); + cpuStat = getCgroupCpuAcctCpuStat(cpuControlGroup); + } + return new OsStats.Cgroup( cpuAcctControlGroup, - cpuAcctControlGroup == null ? -1 : getCgroupCpuAcctUsageNanos(cpuAcctControlGroup), + cgroupCpuAcctUsageNanos, cpuControlGroup, - cpuControlGroup == null ? -1 : getCgroupCpuAcctCpuCfsPeriodMicros(cpuControlGroup), - cpuControlGroup == null ? -1 : getCGroupCpuAcctCpuCfsQuotaMicros(cpuControlGroup), - cpuControlGroup == null ? null : getCgroupCpuAcctCpuStat(cpuControlGroup)); + cgroupCpuAcctCpuCfsPeriodMicros, + cgroupCpuAcctCpuCfsQuotaMicros, + cpuStat); } catch (final IOException e) { if (logger.isDebugEnabled()) { logger.debug("error reading control group stats", e); diff --git a/core/src/main/resources/org/elasticsearch/bootstrap/security.policy b/core/src/main/resources/org/elasticsearch/bootstrap/security.policy index dfc00dcb01ac8..cbd1f93491b68 100644 --- a/core/src/main/resources/org/elasticsearch/bootstrap/security.policy +++ b/core/src/main/resources/org/elasticsearch/bootstrap/security.policy @@ -124,6 +124,8 @@ grant { // control group stats on Linux permission java.io.FilePermission "/proc/self/cgroup", "read"; + permission java.io.FilePermission "/sys/fs/cgroup/cpu", "read"; permission java.io.FilePermission "/sys/fs/cgroup/cpu/-", "read"; + permission java.io.FilePermission "/sys/fs/cgroup/cpuacct", "read"; permission java.io.FilePermission "/sys/fs/cgroup/cpuacct/-", "read"; }; From 4c8a2cc72d7dddd460cc28117c6d1623f77374fb Mon Sep 17 00:00:00 2001 From: Jason Tedor Date: Mon, 24 Oct 2016 14:27:22 -0400 Subject: [PATCH 3/6] Make explicit upfront checking for cgroup stats This commit refactors the logic for the upfront checking whether the relevant cgroup stats are available or not, and adds a note to the docs. --- .../org/elasticsearch/monitor/os/OsProbe.java | 70 +++++++++---------- .../org/elasticsearch/monitor/os/OsStats.java | 18 ++--- docs/reference/cluster/nodes-stats.asciidoc | 5 ++ 3 files changed, 47 insertions(+), 46 deletions(-) diff --git a/core/src/main/java/org/elasticsearch/monitor/os/OsProbe.java b/core/src/main/java/org/elasticsearch/monitor/os/OsProbe.java index 0d32c88d43b78..b0365236b0f62 100644 --- a/core/src/main/java/org/elasticsearch/monitor/os/OsProbe.java +++ b/core/src/main/java/org/elasticsearch/monitor/os/OsProbe.java @@ -259,7 +259,7 @@ private Map getControlGroups() throws IOException { @SuppressForbidden(reason = "access /proc/self/cgroup") List readProcSelfCgroup() throws IOException { final List lines = Files.readAllLines(PathUtils.get("/proc/self/cgroup")); - assert lines != null; + assert lines != null && !lines.isEmpty(); return lines; } @@ -431,6 +431,19 @@ List readSysFsCgroupCpuAcctCpuStat(final String controlGroup) throws IOE return lines; } + private boolean areCgroupStatsAvailable() { + if (!Files.exists(PathUtils.get("/proc/self/cgroup"))) { + return false; + } + if (!Files.exists(PathUtils.get("/sys/fs/cgroup/cpu"))) { + return false; + } + if (!Files.exists(PathUtils.get("/sys/fs/cgroup/cpuacct"))) { + return false; + } + return true; + } + /** * Basic cgroup stats. * @@ -440,43 +453,30 @@ List readSysFsCgroupCpuAcctCpuStat(final String controlGroup) throws IOE @SuppressForbidden(reason = "access /proc/self/cgroup, /sys/fs/cgroup/cpu, and /sys/fs/cgroup/cpuacct") private OsStats.Cgroup getCgroup() { try { - if (!Files.exists(PathUtils.get("/proc/self/cgroup"))) { - return null; - } - final Map controllerMap = getControlGroups(); - if (controllerMap.isEmpty()) { + if (!areCgroupStatsAvailable()) { return null; - } - - final String cpuAcctControlGroup = controllerMap.get("cpuacct"); - final long cgroupCpuAcctUsageNanos; - if (cpuAcctControlGroup == null || !Files.exists(PathUtils.get("/sys/fs/cgroup/cpuacct"))) { - cgroupCpuAcctUsageNanos = -1; - } else { - cgroupCpuAcctUsageNanos = getCgroupCpuAcctUsageNanos(cpuAcctControlGroup); - } - - final String cpuControlGroup = controllerMap.get("cpu"); - final long cgroupCpuAcctCpuCfsPeriodMicros; - final long cgroupCpuAcctCpuCfsQuotaMicros; - final OsStats.Cgroup.CpuStat cpuStat; - if (cpuControlGroup == null || !Files.exists(PathUtils.get("/sys/fs/cgroup/cpu"))) { - cgroupCpuAcctCpuCfsPeriodMicros = -1; - cgroupCpuAcctCpuCfsQuotaMicros = -1; - cpuStat = null; } else { - cgroupCpuAcctCpuCfsPeriodMicros = getCgroupCpuAcctCpuCfsPeriodMicros(cpuControlGroup); - cgroupCpuAcctCpuCfsQuotaMicros = getCgroupCpuAcctCpuCfsQuotaMicros(cpuControlGroup); - cpuStat = getCgroupCpuAcctCpuStat(cpuControlGroup); + final Map controllerMap = getControlGroups(); + assert !controllerMap.isEmpty(); + + final String cpuAcctControlGroup = controllerMap.get("cpuacct"); + assert cpuAcctControlGroup != null; + final long cgroupCpuAcctUsageNanos = getCgroupCpuAcctUsageNanos(cpuAcctControlGroup); + + final String cpuControlGroup = controllerMap.get("cpu"); + assert cpuControlGroup != null; + final long cgroupCpuAcctCpuCfsPeriodMicros = getCgroupCpuAcctCpuCfsPeriodMicros(cpuControlGroup); + final long cgroupCpuAcctCpuCfsQuotaMicros = getCgroupCpuAcctCpuCfsQuotaMicros(cpuControlGroup); + final OsStats.Cgroup.CpuStat cpuStat = getCgroupCpuAcctCpuStat(cpuControlGroup); + + return new OsStats.Cgroup( + cpuAcctControlGroup, + cgroupCpuAcctUsageNanos, + cpuControlGroup, + cgroupCpuAcctCpuCfsPeriodMicros, + cgroupCpuAcctCpuCfsQuotaMicros, + cpuStat); } - - return new OsStats.Cgroup( - cpuAcctControlGroup, - cgroupCpuAcctUsageNanos, - cpuControlGroup, - cgroupCpuAcctCpuCfsPeriodMicros, - cgroupCpuAcctCpuCfsQuotaMicros, - cpuStat); } catch (final IOException e) { if (logger.isDebugEnabled()) { logger.debug("error reading control group stats", e); diff --git a/core/src/main/java/org/elasticsearch/monitor/os/OsStats.java b/core/src/main/java/org/elasticsearch/monitor/os/OsStats.java index 1579c059d2e8c..8c1cda5d554a8 100644 --- a/core/src/main/java/org/elasticsearch/monitor/os/OsStats.java +++ b/core/src/main/java/org/elasticsearch/monitor/os/OsStats.java @@ -355,21 +355,21 @@ public Cgroup( final long cpuCfsPeriodMicros, final long cpuCfsQuotaMicros, final CpuStat cpuStat) { - this.cpuAcctControlGroup = cpuAcctControlGroup; + this.cpuAcctControlGroup = Objects.requireNonNull(cpuAcctControlGroup); this.cpuAcctUsageNanos = cpuAcctUsageNanos; - this.cpuControlGroup = cpuControlGroup; + this.cpuControlGroup = Objects.requireNonNull(cpuControlGroup); this.cpuCfsPeriodMicros = cpuCfsPeriodMicros; this.cpuCfsQuotaMicros = cpuCfsQuotaMicros; - this.cpuStat = cpuStat; + this.cpuStat = Objects.requireNonNull(cpuStat); } Cgroup(final StreamInput in) throws IOException { - cpuAcctControlGroup = in.readOptionalString(); + cpuAcctControlGroup = in.readString(); cpuAcctUsageNanos = in.readLong(); cpuControlGroup = in.readString(); cpuCfsPeriodMicros = in.readLong(); cpuCfsQuotaMicros = in.readLong(); - cpuStat = in.readOptionalWriteable(CpuStat::new); + cpuStat = new CpuStat(in); } @Override @@ -379,7 +379,7 @@ public void writeTo(final StreamOutput out) throws IOException { out.writeString(cpuControlGroup); out.writeLong(cpuCfsPeriodMicros); out.writeLong(cpuCfsQuotaMicros); - out.writeOptionalWriteable(cpuStat); + cpuStat.writeTo(out); } @Override @@ -397,11 +397,7 @@ public XContentBuilder toXContent(final XContentBuilder builder, final Params pa builder.field("control_group", cpuControlGroup); builder.field("cfs_period_micros", cpuCfsPeriodMicros); builder.field("cfs_quota_micros", cpuCfsQuotaMicros); - if (cpuStat == null) { - builder.field("stat", (Object) null); - } else { - cpuStat.toXContent(builder, params); - } + cpuStat.toXContent(builder, params); } builder.endObject(); } diff --git a/docs/reference/cluster/nodes-stats.asciidoc b/docs/reference/cluster/nodes-stats.asciidoc index a9b21d8ddaa7d..94d954f066444 100644 --- a/docs/reference/cluster/nodes-stats.asciidoc +++ b/docs/reference/cluster/nodes-stats.asciidoc @@ -250,6 +250,11 @@ the operating system: The total amount of time (in nanoseconds) for which all tasks in the same cgroup as the Elasticsearch process have been throttled. +NOTE: For the cgroup stats to be visible, cgroups must be compiled into +the kernal, the `cpu` and `cpuacct` cgroup subsystems must be +configured and stats must be readable from `/sys/fs/cgroup/cpu` +and `/sys/fs/cgroup/cpuacct`. + [float] [[process-stats]] ==== Process statistics From c322b7619add537b7d2e186349b8f912a62c6189 Mon Sep 17 00:00:00 2001 From: Jason Tedor Date: Mon, 24 Oct 2016 14:58:53 -0400 Subject: [PATCH 4/6] Add Javadocs for OsProbe#areCgroupStatsAvailable This commit adds Javadocs for a convenience method OsProbe#areCgroupStatsAvailable. --- .../main/java/org/elasticsearch/monitor/os/OsProbe.java | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/core/src/main/java/org/elasticsearch/monitor/os/OsProbe.java b/core/src/main/java/org/elasticsearch/monitor/os/OsProbe.java index b0365236b0f62..94349a59c9edf 100644 --- a/core/src/main/java/org/elasticsearch/monitor/os/OsProbe.java +++ b/core/src/main/java/org/elasticsearch/monitor/os/OsProbe.java @@ -431,6 +431,14 @@ List readSysFsCgroupCpuAcctCpuStat(final String controlGroup) throws IOE return lines; } + /** + * Checks if cgroup stats are available by checking for the existence of {@code /proc/self/cgroup}, + * {@code /sys/fs/cgroup/cpu}, and {@code /sys/fs/cgroup/cpuacct}. + * + * @return {@code true} if the stats are available, otherwise + * {@code false} + */ + @SuppressForbidden(reason = "access /proc/self/cgroup, /sys/fs/cgroup/cpu, and /sys/fs/cgroup/cpuacct") private boolean areCgroupStatsAvailable() { if (!Files.exists(PathUtils.get("/proc/self/cgroup"))) { return false; @@ -450,7 +458,6 @@ private boolean areCgroupStatsAvailable() { * @return basic cgroup stats, or {@code null} if an I/O exception * occurred reading the cgroup stats */ - @SuppressForbidden(reason = "access /proc/self/cgroup, /sys/fs/cgroup/cpu, and /sys/fs/cgroup/cpuacct") private OsStats.Cgroup getCgroup() { try { if (!areCgroupStatsAvailable()) { From 8c7d7be253f5c524c6d1a8aa0ae36af2c438401e Mon Sep 17 00:00:00 2001 From: Jason Tedor Date: Mon, 24 Oct 2016 15:53:43 -0400 Subject: [PATCH 5/6] Remove obsolete leniency when reading cgroups This commit removes some leniency when reading cgroups that was leftover after a previous refactoring. --- .../org/elasticsearch/monitor/os/OsProbe.java | 33 ++++++++----------- 1 file changed, 14 insertions(+), 19 deletions(-) diff --git a/core/src/main/java/org/elasticsearch/monitor/os/OsProbe.java b/core/src/main/java/org/elasticsearch/monitor/os/OsProbe.java index 94349a59c9edf..49c24fad9f4d5 100644 --- a/core/src/main/java/org/elasticsearch/monitor/os/OsProbe.java +++ b/core/src/main/java/org/elasticsearch/monitor/os/OsProbe.java @@ -216,27 +216,22 @@ private String readSingleLine(final Path path) throws IOException { */ private Map getControlGroups() throws IOException { final List lines = readProcSelfCgroup(); - if (lines.isEmpty()) { - return Collections.emptyMap(); - } - else { - final Map controllerMap = new HashMap<>(); - for (final String line : lines) { - final Matcher matcher = CONTROL_GROUP_PATTERN.matcher(line); - // note that Matcher#matches must be invoked as - // matching is lazy; this can not happen in an assert - // as assertions might not be enabled - final boolean matches = matcher.matches(); - assert matches : line; - // at this point we have captured the subsystems and the - // control group - final String[] controllers = matcher.group(1).split(","); - for (final String controller : controllers) { - controllerMap.put(controller, matcher.group(2)); - } + final Map controllerMap = new HashMap<>(); + for (final String line : lines) { + final Matcher matcher = CONTROL_GROUP_PATTERN.matcher(line); + // note that Matcher#matches must be invoked as + // matching is lazy; this can not happen in an assert + // as assertions might not be enabled + final boolean matches = matcher.matches(); + assert matches : line; + // at this point we have captured the subsystems and the + // control group + final String[] controllers = matcher.group(1).split(","); + for (final String controller : controllers) { + controllerMap.put(controller, matcher.group(2)); } - return controllerMap; } + return controllerMap; } /** From 1d53577c0254a37a26997459c4eb453d29011c18 Mon Sep 17 00:00:00 2001 From: Jason Tedor Date: Mon, 24 Oct 2016 15:55:00 -0400 Subject: [PATCH 6/6] Fix serialization bug in OsStats This commit fixes a serialization bug in OsStats when serializing Cgroup instances. Namely, the control groups were made non-optional in a previous refactoring but the commit that cleaned this up left one case of writing a string as optional. --- core/src/main/java/org/elasticsearch/monitor/os/OsStats.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/core/src/main/java/org/elasticsearch/monitor/os/OsStats.java b/core/src/main/java/org/elasticsearch/monitor/os/OsStats.java index 8c1cda5d554a8..fa3c6aa861def 100644 --- a/core/src/main/java/org/elasticsearch/monitor/os/OsStats.java +++ b/core/src/main/java/org/elasticsearch/monitor/os/OsStats.java @@ -374,7 +374,7 @@ public Cgroup( @Override public void writeTo(final StreamOutput out) throws IOException { - out.writeOptionalString(cpuAcctControlGroup); + out.writeString(cpuAcctControlGroup); out.writeLong(cpuAcctUsageNanos); out.writeString(cpuControlGroup); out.writeLong(cpuCfsPeriodMicros);