|
40 | 40 | import org.elasticsearch.common.settings.Settings;
|
41 | 41 | import org.elasticsearch.common.unit.ByteSizeValue;
|
42 | 42 | import org.elasticsearch.common.unit.TimeValue;
|
| 43 | +import org.elasticsearch.gateway.MetaDataStateFormat; |
43 | 44 | import org.elasticsearch.index.Index;
|
44 | 45 | import org.elasticsearch.index.IndexSettings;
|
45 | 46 | import org.elasticsearch.index.shard.ShardId;
|
| 47 | +import org.elasticsearch.index.shard.ShardPath; |
46 | 48 | import org.elasticsearch.index.store.FsDirectoryService;
|
47 | 49 | import org.elasticsearch.monitor.fs.FsInfo;
|
48 | 50 | import org.elasticsearch.monitor.fs.FsProbe;
|
|
76 | 78 | /**
|
77 | 79 | * A component that holds all data paths for a single node.
|
78 | 80 | */
|
79 |
| -public class NodeEnvironment extends AbstractComponent implements Closeable { |
| 81 | +public final class NodeEnvironment extends AbstractComponent implements Closeable { |
80 | 82 | public static class NodePath {
|
81 | 83 | /* ${data.paths}/nodes/{node.id} */
|
82 | 84 | public final Path path;
|
@@ -167,63 +169,71 @@ public NodeEnvironment(Settings settings, Environment environment) throws IOExce
|
167 | 169 | localNodeId = -1;
|
168 | 170 | return;
|
169 | 171 | }
|
170 |
| - |
171 | 172 | final NodePath[] nodePaths = new NodePath[environment.dataWithClusterFiles().length];
|
172 | 173 | final Lock[] locks = new Lock[nodePaths.length];
|
173 |
| - sharedDataPath = environment.sharedDataFile(); |
174 |
| - |
175 |
| - int localNodeId = -1; |
176 |
| - IOException lastException = null; |
177 |
| - int maxLocalStorageNodes = MAX_LOCAL_STORAGE_NODES_SETTING.get(settings); |
178 |
| - for (int possibleLockId = 0; possibleLockId < maxLocalStorageNodes; possibleLockId++) { |
179 |
| - for (int dirIndex = 0; dirIndex < environment.dataWithClusterFiles().length; dirIndex++) { |
180 |
| - Path dir = environment.dataWithClusterFiles()[dirIndex].resolve(NODES_FOLDER).resolve(Integer.toString(possibleLockId)); |
181 |
| - Files.createDirectories(dir); |
182 |
| - |
183 |
| - try (Directory luceneDir = FSDirectory.open(dir, NativeFSLockFactory.INSTANCE)) { |
184 |
| - logger.trace("obtaining node lock on {} ...", dir.toAbsolutePath()); |
185 |
| - try { |
186 |
| - locks[dirIndex] = luceneDir.obtainLock(NODE_LOCK_FILENAME); |
187 |
| - nodePaths[dirIndex] = new NodePath(dir, environment); |
188 |
| - localNodeId = possibleLockId; |
189 |
| - } catch (LockObtainFailedException ex) { |
190 |
| - logger.trace("failed to obtain node lock on {}", dir.toAbsolutePath()); |
| 174 | + boolean success = false; |
| 175 | + |
| 176 | + try { |
| 177 | + sharedDataPath = environment.sharedDataFile(); |
| 178 | + int localNodeId = -1; |
| 179 | + IOException lastException = null; |
| 180 | + int maxLocalStorageNodes = MAX_LOCAL_STORAGE_NODES_SETTING.get(settings); |
| 181 | + for (int possibleLockId = 0; possibleLockId < maxLocalStorageNodes; possibleLockId++) { |
| 182 | + for (int dirIndex = 0; dirIndex < environment.dataWithClusterFiles().length; dirIndex++) { |
| 183 | + Path dir = environment.dataWithClusterFiles()[dirIndex].resolve(NODES_FOLDER).resolve(Integer.toString(possibleLockId)); |
| 184 | + Files.createDirectories(dir); |
| 185 | + |
| 186 | + try (Directory luceneDir = FSDirectory.open(dir, NativeFSLockFactory.INSTANCE)) { |
| 187 | + logger.trace("obtaining node lock on {} ...", dir.toAbsolutePath()); |
| 188 | + try { |
| 189 | + locks[dirIndex] = luceneDir.obtainLock(NODE_LOCK_FILENAME); |
| 190 | + nodePaths[dirIndex] = new NodePath(dir, environment); |
| 191 | + localNodeId = possibleLockId; |
| 192 | + } catch (LockObtainFailedException ex) { |
| 193 | + logger.trace("failed to obtain node lock on {}", dir.toAbsolutePath()); |
| 194 | + // release all the ones that were obtained up until now |
| 195 | + releaseAndNullLocks(locks); |
| 196 | + break; |
| 197 | + } |
| 198 | + |
| 199 | + } catch (IOException e) { |
| 200 | + logger.trace("failed to obtain node lock on {}", e, dir.toAbsolutePath()); |
| 201 | + lastException = new IOException("failed to obtain lock on " + dir.toAbsolutePath(), e); |
191 | 202 | // release all the ones that were obtained up until now
|
192 | 203 | releaseAndNullLocks(locks);
|
193 | 204 | break;
|
194 | 205 | }
|
195 |
| - |
196 |
| - } catch (IOException e) { |
197 |
| - logger.trace("failed to obtain node lock on {}", e, dir.toAbsolutePath()); |
198 |
| - lastException = new IOException("failed to obtain lock on " + dir.toAbsolutePath(), e); |
199 |
| - // release all the ones that were obtained up until now |
200 |
| - releaseAndNullLocks(locks); |
| 206 | + } |
| 207 | + if (locks[0] != null) { |
| 208 | + // we found a lock, break |
201 | 209 | break;
|
202 | 210 | }
|
203 | 211 | }
|
204 |
| - if (locks[0] != null) { |
205 |
| - // we found a lock, break |
206 |
| - break; |
207 |
| - } |
208 |
| - } |
209 | 212 |
|
210 |
| - if (locks[0] == null) { |
211 |
| - throw new IllegalStateException("Failed to obtain node lock, is the following location writable?: " |
| 213 | + if (locks[0] == null) { |
| 214 | + throw new IllegalStateException("Failed to obtain node lock, is the following location writable?: " |
212 | 215 | + Arrays.toString(environment.dataWithClusterFiles()), lastException);
|
213 |
| - } |
| 216 | + } |
214 | 217 |
|
215 |
| - this.localNodeId = localNodeId; |
216 |
| - this.locks = locks; |
217 |
| - this.nodePaths = nodePaths; |
| 218 | + this.localNodeId = localNodeId; |
| 219 | + this.locks = locks; |
| 220 | + this.nodePaths = nodePaths; |
218 | 221 |
|
219 |
| - if (logger.isDebugEnabled()) { |
220 |
| - logger.debug("using node location [{}], local_node_id [{}]", nodePaths, localNodeId); |
221 |
| - } |
222 |
| - |
223 |
| - maybeLogPathDetails(); |
224 |
| - maybeLogHeapDetails(); |
| 222 | + if (logger.isDebugEnabled()) { |
| 223 | + logger.debug("using node location [{}], local_node_id [{}]", nodePaths, localNodeId); |
| 224 | + } |
225 | 225 |
|
226 |
| - applySegmentInfosTrace(settings); |
| 226 | + maybeLogPathDetails(); |
| 227 | + maybeLogHeapDetails(); |
| 228 | + |
| 229 | + applySegmentInfosTrace(settings); |
| 230 | + assertCanWrite(); |
| 231 | + success = true; |
| 232 | + } finally { |
| 233 | + if (success == false) { |
| 234 | + IOUtils.closeWhileHandlingException(locks); |
| 235 | + } |
| 236 | + } |
227 | 237 | }
|
228 | 238 |
|
229 | 239 | private static void releaseAndNullLocks(Lock[] locks) {
|
@@ -793,7 +803,7 @@ private static Set<ShardId> findAllShardsForIndex(Path indexPath, Index index) t
|
793 | 803 | }
|
794 | 804 |
|
795 | 805 | @Override
|
796 |
| - public void close() { |
| 806 | + public final void close() { |
797 | 807 | if (closed.compareAndSet(false, true) && locks != null) {
|
798 | 808 | for (Lock lock : locks) {
|
799 | 809 | try {
|
@@ -909,4 +919,45 @@ public static Path shardStatePathToDataPath(Path shardPath) {
|
909 | 919 |
|
910 | 920 | return shardPath.getParent().getParent().getParent();
|
911 | 921 | }
|
| 922 | + |
| 923 | + /** |
| 924 | + * This is a best effort to ensure that we actually have write permissions to write in all our data directories. |
| 925 | + * This prevents disasters if nodes are started under the wrong username etc. |
| 926 | + */ |
| 927 | + private void assertCanWrite() throws IOException { |
| 928 | + for (Path path : nodeDataPaths()) { // check node-paths are writable |
| 929 | + tryWriteTempFile(path); |
| 930 | + } |
| 931 | + for (String index : this.findAllIndices()) { |
| 932 | + for (Path path : this.indexPaths(index)) { // check index paths are writable |
| 933 | + Path statePath = path.resolve(MetaDataStateFormat.STATE_DIR_NAME); |
| 934 | + tryWriteTempFile(statePath); |
| 935 | + tryWriteTempFile(path); |
| 936 | + } |
| 937 | + for (ShardId shardID : this.findAllShardIds(new Index(index, IndexMetaData.INDEX_UUID_NA_VALUE))) { |
| 938 | + Path[] paths = this.availableShardPaths(shardID); |
| 939 | + for (Path path : paths) { // check shard paths are writable |
| 940 | + Path indexDir = path.resolve(ShardPath.INDEX_FOLDER_NAME); |
| 941 | + Path statePath = path.resolve(MetaDataStateFormat.STATE_DIR_NAME); |
| 942 | + Path translogDir = path.resolve(ShardPath.TRANSLOG_FOLDER_NAME); |
| 943 | + tryWriteTempFile(indexDir); |
| 944 | + tryWriteTempFile(translogDir); |
| 945 | + tryWriteTempFile(statePath); |
| 946 | + tryWriteTempFile(path); |
| 947 | + } |
| 948 | + } |
| 949 | + } |
| 950 | + } |
| 951 | + |
| 952 | + private static void tryWriteTempFile(Path path) throws IOException { |
| 953 | + if (Files.exists(path)) { |
| 954 | + Path resolve = path.resolve(".es_temp_file"); |
| 955 | + try { |
| 956 | + Files.createFile(resolve); |
| 957 | + Files.deleteIfExists(resolve); |
| 958 | + } catch (IOException ex) { |
| 959 | + throw new IOException("failed to write in data directory [" + path + "] write permission is required", ex); |
| 960 | + } |
| 961 | + } |
| 962 | + } |
912 | 963 | }
|
0 commit comments