From a440bfad0a47d58eb7a81b5f20c243fb35b3c6a5 Mon Sep 17 00:00:00 2001
From: Stef Nestor <26751266+stefnestor@users.noreply.github.com>
Date: Thu, 25 Jul 2024 17:20:58 -0600
Subject: [PATCH 1/5] Add link to flood-stage watermark exception message
---
.../disk-usage-exceeded.asciidoc | 73 +++++++++++++------
.../geoip/EnterpriseGeoIpDownloaderTests.java | 5 +-
.../ingest/geoip/GeoIpDownloaderTests.java | 5 +-
.../indices/delete/DeleteIndexBlocksIT.java | 5 +-
.../cluster/metadata/IndexMetadata.java | 4 +-
.../elasticsearch/common/ReferenceDocs.java | 1 +
.../common/reference-docs-links.json | 3 +-
7 files changed, 67 insertions(+), 29 deletions(-)
diff --git a/docs/reference/troubleshooting/common-issues/disk-usage-exceeded.asciidoc b/docs/reference/troubleshooting/common-issues/disk-usage-exceeded.asciidoc
index 2b3fcc1b6df9f..83e3092d62d7a 100644
--- a/docs/reference/troubleshooting/common-issues/disk-usage-exceeded.asciidoc
+++ b/docs/reference/troubleshooting/common-issues/disk-usage-exceeded.asciidoc
@@ -4,31 +4,40 @@
++++
Watermark errors
++++
-:keywords: {es}, high watermark, low watermark, full disk
+:keywords: {es}, high watermark, low watermark, full disk, flood stage watermark
When a data node is critically low on disk space and has reached the
<>, the following
-error is logged: `Error: disk usage exceeded flood-stage watermark, index has read-only-allow-delete block`.
+error is logged: `Error: disk usage exceeded flood-stage watermark, index has
+read-only-allow-delete block`.
-To prevent a full disk, when a node reaches this watermark, {es} blocks writes
+To prevent a full disk, when a node reaches this watermark, {es} <>
to any index with a shard on the node. If the block affects related system
-indices, {kib} and other {stack} features may become unavailable.
+indices, {kib} and other {stack} features may become unavailable. For example,
+this could induce {kib}'s `Kibana Server is not Ready yet`
+{kibana-ref}/access.html#not-ready[error message].
{es} will automatically remove the write block when the affected node's disk
-usage goes below the <>. To
-achieve this, {es} automatically moves some of the affected node's shards to
-other nodes in the same data tier.
+usage falls below the <>.
+To achieve this, {es} attempts to rebalances some of the affected node's shards
+to other nodes in the same data tier.
-To verify that shards are moving off the affected node, use the <>.
+[[fix-watermark-errors-rebalance]]
+==== Monitor rebalancing
+
+To verify that shards are moving off the affected node until it falls below high
+watermark., use the <> and <>:
[source,console]
----
GET _cat/shards?v=true
+
+GET _cat/recovery?v=true&active_only=true
----
-If shards remain on the node, use the <> to get an explanation for their allocation status.
+If shards remain on the node keeping it about high watermark, use the
+<> to get an
+explanation for their allocation status.
[source,console]
----
@@ -44,8 +53,12 @@ GET _cluster/allocation/explain
// TEST[s/"primary": false,/"primary": false/]
// TEST[s/"current_node": "my-node"//]
-To immediately restore write operations, you can temporarily increase the disk
-watermarks and remove the write block.
+[[fix-watermark-errors-temporary]]
+==== Temporary Relief
+
+To immediately restore write operations, you can temporarily increase the
+<> and remove the
+<>.
[source,console]
----
@@ -67,21 +80,15 @@ PUT */_settings?expand_wildcards=all
{
"index.blocks.read_only_allow_delete": null
}
-----
-// TEST[s/^/PUT my-index\n/]
-
-As a long-term solution, we recommend you add nodes to the affected data tiers
-or upgrade existing nodes to increase disk space. To free up additional disk
-space, you can delete unneeded indices using the <>.
-[source,console]
-----
-DELETE my-index
+PUT .kibana*/_settings?expand_wildcards=all
+{
+ "index.blocks.write": null
+}
----
// TEST[s/^/PUT my-index\n/]
-When a long-term solution is in place, reset or reconfigure the disk watermarks.
+When a long-term solution is in place, to reset or reconfigure the disk watermarks:
[source,console]
----
@@ -99,3 +106,21 @@ PUT _cluster/settings
}
}
----
+
+[[fix-watermark-errors-resolve]]
+==== Resolve
+
+As a long-term solution, we recommend you do one of the following best suited
+to your use case:
+
+* add nodes to the affected <>
+
+* upgrade existing nodes to increase disk space
++
+TIP: On {ess}, https://support.elastic.co[Elastic Support] intervention may
+become necessary if <> reaches `status:red`.
+
+* delete unneeded indices using the <>
+
+* update related <> to push indices
+through to later <>
diff --git a/modules/ingest-geoip/src/test/java/org/elasticsearch/ingest/geoip/EnterpriseGeoIpDownloaderTests.java b/modules/ingest-geoip/src/test/java/org/elasticsearch/ingest/geoip/EnterpriseGeoIpDownloaderTests.java
index 58cb566165db2..203ecaea72c0e 100644
--- a/modules/ingest-geoip/src/test/java/org/elasticsearch/ingest/geoip/EnterpriseGeoIpDownloaderTests.java
+++ b/modules/ingest-geoip/src/test/java/org/elasticsearch/ingest/geoip/EnterpriseGeoIpDownloaderTests.java
@@ -27,6 +27,7 @@
import org.elasticsearch.cluster.block.ClusterBlocks;
import org.elasticsearch.cluster.metadata.IndexMetadata;
import org.elasticsearch.cluster.service.ClusterService;
+import org.elasticsearch.common.ReferenceDocs;
import org.elasticsearch.common.hash.MessageDigests;
import org.elasticsearch.common.settings.ClusterSettings;
import org.elasticsearch.common.settings.Settings;
@@ -476,7 +477,9 @@ public void testUpdateDatabasesWriteBlock() {
"index ["
+ geoIpIndex
+ "] blocked by: [TOO_MANY_REQUESTS/12/disk usage exceeded flood-stage watermark, "
- + "index has read-only-allow-delete block];"
+ + "index has read-only-allow-delete block; for more information, see "
+ + ReferenceDocs.FLOOD_STAGE_WATERMARK
+ + "];"
)
);
verifyNoInteractions(httpClient);
diff --git a/modules/ingest-geoip/src/test/java/org/elasticsearch/ingest/geoip/GeoIpDownloaderTests.java b/modules/ingest-geoip/src/test/java/org/elasticsearch/ingest/geoip/GeoIpDownloaderTests.java
index 06b2605bd6d41..984bd37181fe7 100644
--- a/modules/ingest-geoip/src/test/java/org/elasticsearch/ingest/geoip/GeoIpDownloaderTests.java
+++ b/modules/ingest-geoip/src/test/java/org/elasticsearch/ingest/geoip/GeoIpDownloaderTests.java
@@ -28,6 +28,7 @@
import org.elasticsearch.cluster.block.ClusterBlocks;
import org.elasticsearch.cluster.metadata.IndexMetadata;
import org.elasticsearch.cluster.service.ClusterService;
+import org.elasticsearch.common.ReferenceDocs;
import org.elasticsearch.common.settings.ClusterSettings;
import org.elasticsearch.common.settings.Settings;
import org.elasticsearch.index.reindex.BulkByScrollResponse;
@@ -590,7 +591,9 @@ public void testUpdateDatabasesWriteBlock() {
"index ["
+ geoIpIndex
+ "] blocked by: [TOO_MANY_REQUESTS/12/disk usage exceeded flood-stage watermark, "
- + "index has read-only-allow-delete block];"
+ + "index has read-only-allow-delete block; for more information, see "
+ + ReferenceDocs.FLOOD_STAGE_WATERMARK
+ + "];"
)
);
verifyNoInteractions(httpClient);
diff --git a/server/src/internalClusterTest/java/org/elasticsearch/action/admin/indices/delete/DeleteIndexBlocksIT.java b/server/src/internalClusterTest/java/org/elasticsearch/action/admin/indices/delete/DeleteIndexBlocksIT.java
index 3560b74189d1d..415cfff459a67 100644
--- a/server/src/internalClusterTest/java/org/elasticsearch/action/admin/indices/delete/DeleteIndexBlocksIT.java
+++ b/server/src/internalClusterTest/java/org/elasticsearch/action/admin/indices/delete/DeleteIndexBlocksIT.java
@@ -12,6 +12,7 @@
import org.elasticsearch.cluster.block.ClusterBlockException;
import org.elasticsearch.cluster.metadata.IndexMetadata;
import org.elasticsearch.cluster.metadata.Metadata;
+import org.elasticsearch.common.ReferenceDocs;
import org.elasticsearch.common.settings.Settings;
import org.elasticsearch.test.ESIntegTestCase;
@@ -68,7 +69,9 @@ public void testClusterBlockMessageHasIndexName() {
ClusterBlockException e = expectThrows(ClusterBlockException.class, prepareIndex("test").setId("1").setSource("foo", "bar"));
assertEquals(
"index [test] blocked by: [TOO_MANY_REQUESTS/12/disk usage exceeded flood-stage watermark, "
- + "index has read-only-allow-delete block];",
+ + "index has read-only-allow-delete block; for more information, see "
+ + ReferenceDocs.FLOOD_STAGE_WATERMARK
+ + "];",
e.getMessage()
);
} finally {
diff --git a/server/src/main/java/org/elasticsearch/cluster/metadata/IndexMetadata.java b/server/src/main/java/org/elasticsearch/cluster/metadata/IndexMetadata.java
index 32a3af0c341e5..742439c9a2484 100644
--- a/server/src/main/java/org/elasticsearch/cluster/metadata/IndexMetadata.java
+++ b/server/src/main/java/org/elasticsearch/cluster/metadata/IndexMetadata.java
@@ -26,6 +26,7 @@
import org.elasticsearch.cluster.routing.allocation.IndexMetadataUpdater;
import org.elasticsearch.cluster.routing.allocation.decider.DiskThresholdDecider;
import org.elasticsearch.cluster.routing.allocation.decider.ShardsLimitAllocationDecider;
+import org.elasticsearch.common.ReferenceDocs;
import org.elasticsearch.common.collect.ImmutableOpenMap;
import org.elasticsearch.common.compress.CompressedXContent;
import org.elasticsearch.common.io.stream.StreamInput;
@@ -129,7 +130,8 @@ public class IndexMetadata implements Diffable, ToXContentFragmen
);
public static final ClusterBlock INDEX_READ_ONLY_ALLOW_DELETE_BLOCK = new ClusterBlock(
12,
- "disk usage exceeded flood-stage watermark, index has read-only-allow-delete block",
+ "disk usage exceeded flood-stage watermark, index has read-only-allow-delete block; for more information, see "
+ + ReferenceDocs.FLOOD_STAGE_WATERMARK,
false,
false,
true,
diff --git a/server/src/main/java/org/elasticsearch/common/ReferenceDocs.java b/server/src/main/java/org/elasticsearch/common/ReferenceDocs.java
index a87f3b3d4bda0..c11d369e3cc76 100644
--- a/server/src/main/java/org/elasticsearch/common/ReferenceDocs.java
+++ b/server/src/main/java/org/elasticsearch/common/ReferenceDocs.java
@@ -79,6 +79,7 @@ public enum ReferenceDocs {
S3_COMPATIBLE_REPOSITORIES,
LUCENE_MAX_DOCS_LIMIT,
MAX_SHARDS_PER_NODE,
+ FLOOD_STAGE_WATERMARK,
// this comment keeps the ';' on the next line so every entry above has a trailing ',' which makes the diff for adding new links cleaner
;
diff --git a/server/src/main/resources/org/elasticsearch/common/reference-docs-links.json b/server/src/main/resources/org/elasticsearch/common/reference-docs-links.json
index 0d11629803ced..e325aea9f1089 100644
--- a/server/src/main/resources/org/elasticsearch/common/reference-docs-links.json
+++ b/server/src/main/resources/org/elasticsearch/common/reference-docs-links.json
@@ -39,5 +39,6 @@
"SNAPSHOT_REPOSITORY_ANALYSIS": "repo-analysis-api.html",
"S3_COMPATIBLE_REPOSITORIES": "repository-s3.html#repository-s3-compatible-services",
"LUCENE_MAX_DOCS_LIMIT": "size-your-shards.html#troubleshooting-max-docs-limit",
- "MAX_SHARDS_PER_NODE": "size-your-shards.html#troubleshooting-max-shards-open"
+ "MAX_SHARDS_PER_NODE": "size-your-shards.html#troubleshooting-max-shards-open",
+ "FLOOD_STAGE_WATERMARK": "fix-watermark-errors.html"
}
From a789f01ad48d39ef521ea8ee5b8cd40a4184e48f Mon Sep 17 00:00:00 2001
From: Stef Nestor <26751266+stefnestor@users.noreply.github.com>
Date: Thu, 25 Jul 2024 17:28:39 -0600
Subject: [PATCH 2/5] Update docs/changelog/111315.yaml
---
docs/changelog/111315.yaml | 5 +++++
1 file changed, 5 insertions(+)
create mode 100644 docs/changelog/111315.yaml
diff --git a/docs/changelog/111315.yaml b/docs/changelog/111315.yaml
new file mode 100644
index 0000000000000..0e2e56898b51c
--- /dev/null
+++ b/docs/changelog/111315.yaml
@@ -0,0 +1,5 @@
+pr: 111315
+summary: Add link to flood-stage watermark exception message
+area: Allocation
+type: enhancement
+issues: []
From 585b31277f3dd307d8c0ec21190db392d6bf2f8b Mon Sep 17 00:00:00 2001
From: Stef Nestor <26751266+stefnestor@users.noreply.github.com>
Date: Thu, 25 Jul 2024 17:30:02 -0600
Subject: [PATCH 3/5] typo
---
.../troubleshooting/common-issues/disk-usage-exceeded.asciidoc | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/docs/reference/troubleshooting/common-issues/disk-usage-exceeded.asciidoc b/docs/reference/troubleshooting/common-issues/disk-usage-exceeded.asciidoc
index 83e3092d62d7a..e7c3db1c60458 100644
--- a/docs/reference/troubleshooting/common-issues/disk-usage-exceeded.asciidoc
+++ b/docs/reference/troubleshooting/common-issues/disk-usage-exceeded.asciidoc
@@ -19,7 +19,7 @@ this could induce {kib}'s `Kibana Server is not Ready yet`
{es} will automatically remove the write block when the affected node's disk
usage falls below the <>.
-To achieve this, {es} attempts to rebalances some of the affected node's shards
+To achieve this, {es} attempts to rebalance some of the affected node's shards
to other nodes in the same data tier.
[[fix-watermark-errors-rebalance]]
From e215ff90ea6e1de1675f3bc310ee2a98695d6295 Mon Sep 17 00:00:00 2001
From: Stef Nestor <26751266+stefnestor@users.noreply.github.com>
Date: Fri, 26 Jul 2024 13:58:31 -0600
Subject: [PATCH 4/5] question
---
.../common-issues/disk-usage-exceeded.asciidoc | 5 -----
1 file changed, 5 deletions(-)
diff --git a/docs/reference/troubleshooting/common-issues/disk-usage-exceeded.asciidoc b/docs/reference/troubleshooting/common-issues/disk-usage-exceeded.asciidoc
index e7c3db1c60458..369528987ed86 100644
--- a/docs/reference/troubleshooting/common-issues/disk-usage-exceeded.asciidoc
+++ b/docs/reference/troubleshooting/common-issues/disk-usage-exceeded.asciidoc
@@ -80,11 +80,6 @@ PUT */_settings?expand_wildcards=all
{
"index.blocks.read_only_allow_delete": null
}
-
-PUT .kibana*/_settings?expand_wildcards=all
-{
- "index.blocks.write": null
-}
----
// TEST[s/^/PUT my-index\n/]
From 061109a5efb30a33c2d841cf8d23e151746f58b5 Mon Sep 17 00:00:00 2001
From: Stef Nestor <26751266+stefnestor@users.noreply.github.com>
Date: Wed, 31 Jul 2024 17:12:20 -0600
Subject: [PATCH 5/5] feedback
Co-authored-by: David Turner
---
.../common-issues/disk-usage-exceeded.asciidoc | 5 ++---
1 file changed, 2 insertions(+), 3 deletions(-)
diff --git a/docs/reference/troubleshooting/common-issues/disk-usage-exceeded.asciidoc b/docs/reference/troubleshooting/common-issues/disk-usage-exceeded.asciidoc
index 369528987ed86..728d805db7a30 100644
--- a/docs/reference/troubleshooting/common-issues/disk-usage-exceeded.asciidoc
+++ b/docs/reference/troubleshooting/common-issues/disk-usage-exceeded.asciidoc
@@ -8,8 +8,7 @@
When a data node is critically low on disk space and has reached the
<>, the following
-error is logged: `Error: disk usage exceeded flood-stage watermark, index has
-read-only-allow-delete block`.
+error is logged: `Error: disk usage exceeded flood-stage watermark, index has read-only-allow-delete block`.
To prevent a full disk, when a node reaches this watermark, {es} <>
to any index with a shard on the node. If the block affects related system
@@ -20,7 +19,7 @@ this could induce {kib}'s `Kibana Server is not Ready yet`
{es} will automatically remove the write block when the affected node's disk
usage falls below the <>.
To achieve this, {es} attempts to rebalance some of the affected node's shards
-to other nodes in the same data tier.
+to other nodes in the same data tier.
[[fix-watermark-errors-rebalance]]
==== Monitor rebalancing