Skip to content

Commit 35ae826

Browse files
committed
Merge pull request #16745 from s1monw/probe_write_access
Assert that we can write in all data-path on startup Today we might start a node and some of the paths might not have the required permissions. This commit goes through all data directories as well as index, shard and state directories and ensures we have write access. To make this work across all OS etc. we are trying to write a real file and remove it again in each of those directories
2 parents 1e15ae6 + 387f047 commit 35ae826

File tree

4 files changed

+251
-65
lines changed

4 files changed

+251
-65
lines changed

core/src/main/java/org/elasticsearch/env/NodeEnvironment.java

+96-45
Original file line numberDiff line numberDiff line change
@@ -40,9 +40,11 @@
4040
import org.elasticsearch.common.settings.Settings;
4141
import org.elasticsearch.common.unit.ByteSizeValue;
4242
import org.elasticsearch.common.unit.TimeValue;
43+
import org.elasticsearch.gateway.MetaDataStateFormat;
4344
import org.elasticsearch.index.Index;
4445
import org.elasticsearch.index.IndexSettings;
4546
import org.elasticsearch.index.shard.ShardId;
47+
import org.elasticsearch.index.shard.ShardPath;
4648
import org.elasticsearch.index.store.FsDirectoryService;
4749
import org.elasticsearch.monitor.fs.FsInfo;
4850
import org.elasticsearch.monitor.fs.FsProbe;
@@ -76,7 +78,7 @@
7678
/**
7779
* A component that holds all data paths for a single node.
7880
*/
79-
public class NodeEnvironment extends AbstractComponent implements Closeable {
81+
public final class NodeEnvironment extends AbstractComponent implements Closeable {
8082
public static class NodePath {
8183
/* ${data.paths}/nodes/{node.id} */
8284
public final Path path;
@@ -167,63 +169,71 @@ public NodeEnvironment(Settings settings, Environment environment) throws IOExce
167169
localNodeId = -1;
168170
return;
169171
}
170-
171172
final NodePath[] nodePaths = new NodePath[environment.dataWithClusterFiles().length];
172173
final Lock[] locks = new Lock[nodePaths.length];
173-
sharedDataPath = environment.sharedDataFile();
174-
175-
int localNodeId = -1;
176-
IOException lastException = null;
177-
int maxLocalStorageNodes = MAX_LOCAL_STORAGE_NODES_SETTING.get(settings);
178-
for (int possibleLockId = 0; possibleLockId < maxLocalStorageNodes; possibleLockId++) {
179-
for (int dirIndex = 0; dirIndex < environment.dataWithClusterFiles().length; dirIndex++) {
180-
Path dir = environment.dataWithClusterFiles()[dirIndex].resolve(NODES_FOLDER).resolve(Integer.toString(possibleLockId));
181-
Files.createDirectories(dir);
182-
183-
try (Directory luceneDir = FSDirectory.open(dir, NativeFSLockFactory.INSTANCE)) {
184-
logger.trace("obtaining node lock on {} ...", dir.toAbsolutePath());
185-
try {
186-
locks[dirIndex] = luceneDir.obtainLock(NODE_LOCK_FILENAME);
187-
nodePaths[dirIndex] = new NodePath(dir, environment);
188-
localNodeId = possibleLockId;
189-
} catch (LockObtainFailedException ex) {
190-
logger.trace("failed to obtain node lock on {}", dir.toAbsolutePath());
174+
boolean success = false;
175+
176+
try {
177+
sharedDataPath = environment.sharedDataFile();
178+
int localNodeId = -1;
179+
IOException lastException = null;
180+
int maxLocalStorageNodes = MAX_LOCAL_STORAGE_NODES_SETTING.get(settings);
181+
for (int possibleLockId = 0; possibleLockId < maxLocalStorageNodes; possibleLockId++) {
182+
for (int dirIndex = 0; dirIndex < environment.dataWithClusterFiles().length; dirIndex++) {
183+
Path dir = environment.dataWithClusterFiles()[dirIndex].resolve(NODES_FOLDER).resolve(Integer.toString(possibleLockId));
184+
Files.createDirectories(dir);
185+
186+
try (Directory luceneDir = FSDirectory.open(dir, NativeFSLockFactory.INSTANCE)) {
187+
logger.trace("obtaining node lock on {} ...", dir.toAbsolutePath());
188+
try {
189+
locks[dirIndex] = luceneDir.obtainLock(NODE_LOCK_FILENAME);
190+
nodePaths[dirIndex] = new NodePath(dir, environment);
191+
localNodeId = possibleLockId;
192+
} catch (LockObtainFailedException ex) {
193+
logger.trace("failed to obtain node lock on {}", dir.toAbsolutePath());
194+
// release all the ones that were obtained up until now
195+
releaseAndNullLocks(locks);
196+
break;
197+
}
198+
199+
} catch (IOException e) {
200+
logger.trace("failed to obtain node lock on {}", e, dir.toAbsolutePath());
201+
lastException = new IOException("failed to obtain lock on " + dir.toAbsolutePath(), e);
191202
// release all the ones that were obtained up until now
192203
releaseAndNullLocks(locks);
193204
break;
194205
}
195-
196-
} catch (IOException e) {
197-
logger.trace("failed to obtain node lock on {}", e, dir.toAbsolutePath());
198-
lastException = new IOException("failed to obtain lock on " + dir.toAbsolutePath(), e);
199-
// release all the ones that were obtained up until now
200-
releaseAndNullLocks(locks);
206+
}
207+
if (locks[0] != null) {
208+
// we found a lock, break
201209
break;
202210
}
203211
}
204-
if (locks[0] != null) {
205-
// we found a lock, break
206-
break;
207-
}
208-
}
209212

210-
if (locks[0] == null) {
211-
throw new IllegalStateException("Failed to obtain node lock, is the following location writable?: "
213+
if (locks[0] == null) {
214+
throw new IllegalStateException("Failed to obtain node lock, is the following location writable?: "
212215
+ Arrays.toString(environment.dataWithClusterFiles()), lastException);
213-
}
216+
}
214217

215-
this.localNodeId = localNodeId;
216-
this.locks = locks;
217-
this.nodePaths = nodePaths;
218+
this.localNodeId = localNodeId;
219+
this.locks = locks;
220+
this.nodePaths = nodePaths;
218221

219-
if (logger.isDebugEnabled()) {
220-
logger.debug("using node location [{}], local_node_id [{}]", nodePaths, localNodeId);
221-
}
222-
223-
maybeLogPathDetails();
224-
maybeLogHeapDetails();
222+
if (logger.isDebugEnabled()) {
223+
logger.debug("using node location [{}], local_node_id [{}]", nodePaths, localNodeId);
224+
}
225225

226-
applySegmentInfosTrace(settings);
226+
maybeLogPathDetails();
227+
maybeLogHeapDetails();
228+
229+
applySegmentInfosTrace(settings);
230+
assertCanWrite();
231+
success = true;
232+
} finally {
233+
if (success == false) {
234+
IOUtils.closeWhileHandlingException(locks);
235+
}
236+
}
227237
}
228238

229239
private static void releaseAndNullLocks(Lock[] locks) {
@@ -793,7 +803,7 @@ private static Set<ShardId> findAllShardsForIndex(Path indexPath, Index index) t
793803
}
794804

795805
@Override
796-
public void close() {
806+
public final void close() {
797807
if (closed.compareAndSet(false, true) && locks != null) {
798808
for (Lock lock : locks) {
799809
try {
@@ -909,4 +919,45 @@ public static Path shardStatePathToDataPath(Path shardPath) {
909919

910920
return shardPath.getParent().getParent().getParent();
911921
}
922+
923+
/**
924+
* This is a best effort to ensure that we actually have write permissions to write in all our data directories.
925+
* This prevents disasters if nodes are started under the wrong username etc.
926+
*/
927+
private void assertCanWrite() throws IOException {
928+
for (Path path : nodeDataPaths()) { // check node-paths are writable
929+
tryWriteTempFile(path);
930+
}
931+
for (String index : this.findAllIndices()) {
932+
for (Path path : this.indexPaths(index)) { // check index paths are writable
933+
Path statePath = path.resolve(MetaDataStateFormat.STATE_DIR_NAME);
934+
tryWriteTempFile(statePath);
935+
tryWriteTempFile(path);
936+
}
937+
for (ShardId shardID : this.findAllShardIds(new Index(index, IndexMetaData.INDEX_UUID_NA_VALUE))) {
938+
Path[] paths = this.availableShardPaths(shardID);
939+
for (Path path : paths) { // check shard paths are writable
940+
Path indexDir = path.resolve(ShardPath.INDEX_FOLDER_NAME);
941+
Path statePath = path.resolve(MetaDataStateFormat.STATE_DIR_NAME);
942+
Path translogDir = path.resolve(ShardPath.TRANSLOG_FOLDER_NAME);
943+
tryWriteTempFile(indexDir);
944+
tryWriteTempFile(translogDir);
945+
tryWriteTempFile(statePath);
946+
tryWriteTempFile(path);
947+
}
948+
}
949+
}
950+
}
951+
952+
private static void tryWriteTempFile(Path path) throws IOException {
953+
if (Files.exists(path)) {
954+
Path resolve = path.resolve(".es_temp_file");
955+
try {
956+
Files.createFile(resolve);
957+
Files.deleteIfExists(resolve);
958+
} catch (IOException ex) {
959+
throw new IOException("failed to write in data directory [" + path + "] write permission is required", ex);
960+
}
961+
}
962+
}
912963
}
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,101 @@
1+
/*
2+
* Licensed to Elasticsearch under one or more contributor
3+
* license agreements. See the NOTICE file distributed with
4+
* this work for additional information regarding copyright
5+
* ownership. Elasticsearch licenses this file to you under
6+
* the Apache License, Version 2.0 (the "License"); you may
7+
* not use this file except in compliance with the License.
8+
* You may obtain a copy of the License at
9+
*
10+
* http://www.apache.org/licenses/LICENSE-2.0
11+
*
12+
* Unless required by applicable law or agreed to in writing,
13+
* software distributed under the License is distributed on an
14+
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
15+
* KIND, either express or implied. See the License for the
16+
* specific language governing permissions and limitations
17+
* under the License.
18+
*/
19+
package org.elasticsearch.env;
20+
21+
import org.elasticsearch.common.io.PathUtils;
22+
import org.elasticsearch.common.settings.Settings;
23+
import org.elasticsearch.test.ESTestCase;
24+
import org.elasticsearch.test.PosixPermissionsResetter;
25+
import org.junit.BeforeClass;
26+
27+
import java.io.IOException;
28+
import java.nio.file.Files;
29+
import java.nio.file.Path;
30+
import java.nio.file.attribute.PosixFileAttributeView;
31+
import java.nio.file.attribute.PosixFilePermission;
32+
import java.util.Arrays;
33+
import java.util.Collections;
34+
import java.util.HashSet;
35+
36+
public class NodeEnvironmentEvilTests extends ESTestCase {
37+
38+
private static boolean isPosix;
39+
40+
@BeforeClass
41+
public static void checkPosix() throws IOException {
42+
isPosix = Files.getFileAttributeView(createTempFile(), PosixFileAttributeView.class) != null;
43+
}
44+
45+
public void testMissingWritePermission() throws IOException {
46+
assumeTrue("posix filesystem", isPosix);
47+
final String[] tempPaths = tmpPaths();
48+
Path path = PathUtils.get(randomFrom(tempPaths));
49+
try (PosixPermissionsResetter attr = new PosixPermissionsResetter(path)) {
50+
attr.setPermissions(new HashSet<>(Arrays.asList(PosixFilePermission.OTHERS_READ, PosixFilePermission.GROUP_READ, PosixFilePermission.OWNER_READ)));
51+
Settings build = Settings.builder()
52+
.put(Environment.PATH_HOME_SETTING.getKey(), createTempDir().toAbsolutePath().toString())
53+
.putArray(Environment.PATH_DATA_SETTING.getKey(), tempPaths).build();
54+
IOException ioException = expectThrows(IOException.class, () -> {
55+
new NodeEnvironment(build, new Environment(build));
56+
});
57+
assertTrue(ioException.getMessage(), ioException.getMessage().startsWith(path.toString()));
58+
}
59+
}
60+
61+
public void testMissingWritePermissionOnIndex() throws IOException {
62+
assumeTrue("posix filesystem", isPosix);
63+
final String[] tempPaths = tmpPaths();
64+
Path path = PathUtils.get(randomFrom(tempPaths));
65+
Path fooIndex = path.resolve("elasticsearch").resolve("nodes").resolve("0").resolve(NodeEnvironment.INDICES_FOLDER).resolve("foo");
66+
Files.createDirectories(fooIndex);
67+
try (PosixPermissionsResetter attr = new PosixPermissionsResetter(fooIndex)) {
68+
attr.setPermissions(new HashSet<>(Arrays.asList(PosixFilePermission.OTHERS_READ, PosixFilePermission.GROUP_READ, PosixFilePermission.OWNER_READ)));
69+
Settings build = Settings.builder()
70+
.put(Environment.PATH_HOME_SETTING.getKey(), createTempDir().toAbsolutePath().toString())
71+
.putArray(Environment.PATH_DATA_SETTING.getKey(), tempPaths).build();
72+
IOException ioException = expectThrows(IOException.class, () -> {
73+
new NodeEnvironment(build, new Environment(build));
74+
});
75+
assertTrue(ioException.getMessage(), ioException.getMessage().startsWith("failed to write in data directory"));
76+
}
77+
}
78+
79+
public void testMissingWritePermissionOnShard() throws IOException {
80+
assumeTrue("posix filesystem", isPosix);
81+
final String[] tempPaths = tmpPaths();
82+
Path path = PathUtils.get(randomFrom(tempPaths));
83+
Path fooIndex = path.resolve("elasticsearch").resolve("nodes").resolve("0").resolve(NodeEnvironment.INDICES_FOLDER).resolve("foo");
84+
Path fooShard = fooIndex.resolve("0");
85+
Path fooShardIndex = fooShard.resolve("index");
86+
Path fooShardTranslog = fooShard.resolve("translog");
87+
Path fooShardState = fooShard.resolve("_state");
88+
Path pick = randomFrom(fooShard, fooShardIndex, fooShardTranslog, fooShardState);
89+
Files.createDirectories(pick);
90+
try (PosixPermissionsResetter attr = new PosixPermissionsResetter(pick)) {
91+
attr.setPermissions(new HashSet<>(Arrays.asList(PosixFilePermission.OTHERS_READ, PosixFilePermission.GROUP_READ, PosixFilePermission.OWNER_READ)));
92+
Settings build = Settings.builder()
93+
.put(Environment.PATH_HOME_SETTING.getKey(), createTempDir().toAbsolutePath().toString())
94+
.putArray(Environment.PATH_DATA_SETTING.getKey(), tempPaths).build();
95+
IOException ioException = expectThrows(IOException.class, () -> {
96+
new NodeEnvironment(build, new Environment(build));
97+
});
98+
assertTrue(ioException.getMessage(), ioException.getMessage().startsWith("failed to write in data directory"));
99+
}
100+
}
101+
}

qa/evil-tests/src/test/java/org/elasticsearch/plugins/InstallPluginCommandTests.java

+3-20
Original file line numberDiff line numberDiff line change
@@ -51,6 +51,7 @@
5151
import org.elasticsearch.common.settings.Settings;
5252
import org.elasticsearch.env.Environment;
5353
import org.elasticsearch.test.ESTestCase;
54+
import org.elasticsearch.test.PosixPermissionsResetter;
5455
import org.junit.BeforeClass;
5556

5657
@LuceneTestCase.SuppressFileSystems("*")
@@ -63,24 +64,6 @@ public static void checkPosix() throws IOException {
6364
isPosix = Files.getFileAttributeView(createTempFile(), PosixFileAttributeView.class) != null;
6465
}
6566

66-
/** Stores the posix attributes for a path and resets them on close. */
67-
static class PosixPermissionsResetter implements AutoCloseable {
68-
private final PosixFileAttributeView attributeView;
69-
final Set<PosixFilePermission> permissions;
70-
public PosixPermissionsResetter(Path path) throws IOException {
71-
attributeView = Files.getFileAttributeView(path, PosixFileAttributeView.class);
72-
assertNotNull(attributeView);
73-
permissions = attributeView.readAttributes().permissions();
74-
}
75-
@Override
76-
public void close() throws IOException {
77-
attributeView.setPermissions(permissions);
78-
}
79-
public void setPermissions(Set<PosixFilePermission> newPermissions) throws IOException {
80-
attributeView.setPermissions(newPermissions);
81-
}
82-
}
83-
8467
/** Creates a test environment with bin, config and plugins directories. */
8568
static Environment createEnv() throws IOException {
8669
Path home = createTempDir();
@@ -103,7 +86,7 @@ static void writeJar(Path jar, String... classes) throws IOException {
10386
}
10487
}
10588
}
106-
89+
10790
static String writeZip(Path structure, String prefix) throws IOException {
10891
Path zip = createTempDir().resolve(structure.getFileName() + ".zip");
10992
try (ZipOutputStream stream = new ZipOutputStream(Files.newOutputStream(zip))) {
@@ -382,7 +365,7 @@ public void testBinPermissions() throws Exception {
382365
Files.createFile(binDir.resolve("somescript"));
383366
String pluginZip = createPlugin("fake", pluginDir);
384367
try (PosixPermissionsResetter binAttrs = new PosixPermissionsResetter(env.binFile())) {
385-
Set<PosixFilePermission> perms = new HashSet<>(binAttrs.permissions);
368+
Set<PosixFilePermission> perms = binAttrs.getCopyPermissions();
386369
// make sure at least one execute perm is missing, so we know we forced it during installation
387370
perms.remove(PosixFilePermission.GROUP_EXECUTE);
388371
binAttrs.setPermissions(perms);
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,51 @@
1+
/*
2+
* Licensed to Elasticsearch under one or more contributor
3+
* license agreements. See the NOTICE file distributed with
4+
* this work for additional information regarding copyright
5+
* ownership. Elasticsearch licenses this file to you under
6+
* the Apache License, Version 2.0 (the "License"); you may
7+
* not use this file except in compliance with the License.
8+
* You may obtain a copy of the License at
9+
*
10+
* http://www.apache.org/licenses/LICENSE-2.0
11+
*
12+
* Unless required by applicable law or agreed to in writing,
13+
* software distributed under the License is distributed on an
14+
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
15+
* KIND, either express or implied. See the License for the
16+
* specific language governing permissions and limitations
17+
* under the License.
18+
*/
19+
package org.elasticsearch.test;
20+
21+
import org.junit.Assert;
22+
23+
import java.io.IOException;
24+
import java.nio.file.Files;
25+
import java.nio.file.Path;
26+
import java.nio.file.attribute.PosixFileAttributeView;
27+
import java.nio.file.attribute.PosixFilePermission;
28+
import java.util.HashSet;
29+
import java.util.Set;
30+
31+
/** Stores the posix attributes for a path and resets them on close. */
32+
public class PosixPermissionsResetter implements AutoCloseable {
33+
private final PosixFileAttributeView attributeView;
34+
private final Set<PosixFilePermission> permissions;
35+
public PosixPermissionsResetter(Path path) throws IOException {
36+
attributeView = Files.getFileAttributeView(path, PosixFileAttributeView.class);
37+
Assert.assertNotNull(attributeView);
38+
permissions = attributeView.readAttributes().permissions();
39+
}
40+
@Override
41+
public void close() throws IOException {
42+
attributeView.setPermissions(permissions);
43+
}
44+
public void setPermissions(Set<PosixFilePermission> newPermissions) throws IOException {
45+
attributeView.setPermissions(newPermissions);
46+
}
47+
48+
public Set<PosixFilePermission> getCopyPermissions() {
49+
return new HashSet<>(permissions);
50+
}
51+
}

0 commit comments

Comments
 (0)