Skip to content

Commit 3210ae3

Browse files
committed
HADOOP-16233. S3AFileStatus to declare that isEncrypted() is always true (#685)
This is needed to fix up some confusion about caching of job.addCache() handling of S3A paths; all parent dirs -the files are downloaded by the NM without using the DTs of the user submitting the job. This means that when you submit jobs to an EC2 cluster with lower IAM permissions than the user, cached resources don't get downloaded and the job doesn't start. Production code changes: * S3AFileStatus Adds "true" to the superclass's encrypted flag during construction. Tests * Base AbstractContractOpenTest can control whether zero byte files created in tests are encrypted. Not done via an XML attribute, just a subclass point. Thoughts? * Verify that the filecache considers paths to not have the permissions which trigger reduce-privilege downloads * And extend ITestDelegatedMRJob to test a completely different bucket (open street map), to verify that cached resources do get their tokens picked up Docs: * Advise FS developers to say all files are encrypted. It's otherwise harmless and it'll stop other people seeing impossible to debug error messages on app launch. Contributed by Steve Loughran. Change-Id: Ifaae4c9d735ccc5eafeebd2584b65daf2d4e5da3
1 parent 084fb9d commit 3210ae3

File tree

5 files changed

+101
-3
lines changed

5 files changed

+101
-3
lines changed

hadoop-common-project/hadoop-common/src/site/markdown/filesystem/filesystem.md

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -96,6 +96,17 @@ can be queried to find if the path has an ACL. `getFileStatus(Path p).isEncrypte
9696
can be queried to find if the path is encrypted. `getFileStatus(Path p).isErasureCoded()`
9797
will tell if the path is erasure coded or not.
9898

99+
YARN's distributed cache lets applications add paths to be cached across
100+
containers and applications via `Job.addCacheFile()` and `Job.addCacheArchive()`.
101+
The cache treats world-readable resources paths added as shareable across
102+
applications, and downloads them differently, unless they are declared as encrypted.
103+
104+
To avoid failures during container launching, especially when delegation tokens
105+
are used, filesystems and object stores which not implement POSIX access permissions
106+
for both files and directories, MUST always return `true` to the `isEncrypted()`
107+
predicate. This can be done by setting the `encrypted` flag to true when creating
108+
the `FileStatus` instance.
109+
99110
### `Path getHomeDirectory()`
100111

101112
The function `getHomeDirectory` returns the home directory for the FileSystem

hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/fs/contract/AbstractContractOpenTest.java

Lines changed: 11 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -73,10 +73,20 @@ public void testFsIsEncrypted() throws Exception {
7373
final Path path = path("file");
7474
createFile(getFileSystem(), path, false, new byte[0]);
7575
final FileStatus stat = getFileSystem().getFileStatus(path);
76-
assertFalse("Expecting false for stat.isEncrypted()",
76+
assertEquals("Result wrong for for isEncrypted() in " + stat,
77+
areZeroByteFilesEncrypted(),
7778
stat.isEncrypted());
7879
}
7980

81+
/**
82+
* Are zero byte files encrypted. This is implicitly
83+
* false for filesystems which do not encrypt.
84+
* @return true iff zero byte files are encrypted.
85+
*/
86+
protected boolean areZeroByteFilesEncrypted() {
87+
return false;
88+
}
89+
8090
@Test
8191
public void testOpenReadDir() throws Throwable {
8292
describe("create & read a directory");

hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/S3AFileStatus.java

Lines changed: 6 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -54,7 +54,9 @@ public S3AFileStatus(boolean isemptydir,
5454
public S3AFileStatus(Tristate isemptydir,
5555
Path path,
5656
String owner) {
57-
super(0, true, 1, 0, 0, path);
57+
super(0, true, 1, 0, 0, 0,
58+
null, null, null, null,
59+
path, false, true, false);
5860
isEmptyDirectory = isemptydir;
5961
setOwner(owner);
6062
setGroup(owner);
@@ -70,7 +72,9 @@ public S3AFileStatus(Tristate isemptydir,
7072
*/
7173
public S3AFileStatus(long length, long modification_time, Path path,
7274
long blockSize, String owner) {
73-
super(length, false, 1, blockSize, modification_time, path);
75+
super(length, false, 1, blockSize, modification_time, 0,
76+
null, null, null, null,
77+
path, false, true, false);
7478
isEmptyDirectory = Tristate.FALSE;
7579
setOwner(owner);
7680
setGroup(owner);

hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/contract/s3a/ITestS3AContractOpen.java

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -45,4 +45,13 @@ protected Configuration createConfiguration() {
4545
protected AbstractFSContract createContract(Configuration conf) {
4646
return new S3AContract(conf);
4747
}
48+
49+
/**
50+
* S3A always declares zero byte files as encrypted.
51+
* @return true, always.
52+
*/
53+
@Override
54+
protected boolean areZeroByteFilesEncrypted() {
55+
return true;
56+
}
4857
}
Lines changed: 64 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,64 @@
1+
/*
2+
* Licensed to the Apache Software Foundation (ASF) under one
3+
* or more contributor license agreements. See the NOTICE file
4+
* distributed with this work for additional information
5+
* regarding copyright ownership. The ASF licenses this file
6+
* to you under the Apache License, Version 2.0 (the
7+
* "License"); you may not use this file except in compliance
8+
* with the License. You may obtain a copy of the License at
9+
*
10+
* http://www.apache.org/licenses/LICENSE-2.0
11+
*
12+
* Unless required by applicable law or agreed to in writing, software
13+
* distributed under the License is distributed on an "AS IS" BASIS,
14+
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15+
* See the License for the specific language governing permissions and
16+
* limitations under the License.
17+
*/
18+
19+
package org.apache.hadoop.mapreduce.filecache;
20+
21+
import java.io.IOException;
22+
import java.net.URI;
23+
import java.util.HashMap;
24+
import java.util.Map;
25+
26+
import org.junit.Test;
27+
28+
import org.apache.hadoop.fs.FileStatus;
29+
import org.apache.hadoop.fs.Path;
30+
import org.apache.hadoop.fs.s3a.S3AFileStatus;
31+
import org.apache.hadoop.test.HadoopTestBase;
32+
33+
/**
34+
* Test how S3A resources are scoped in YARN caching.
35+
* In this package to make use of package-private methods of
36+
* {@link ClientDistributedCacheManager}.
37+
*/
38+
public class TestS3AResourceScope extends HadoopTestBase {
39+
40+
private static final Path PATH = new Path("s3a://example/path");
41+
42+
@Test
43+
public void testS3AFilesArePrivate() throws Throwable {
44+
S3AFileStatus status = new S3AFileStatus(false, PATH, "self");
45+
assertTrue("Not encrypted: " + status, status.isEncrypted());
46+
assertNotExecutable(status);
47+
}
48+
49+
@Test
50+
public void testS3AFilesArePrivateOtherContstructor() throws Throwable {
51+
S3AFileStatus status = new S3AFileStatus(0, 0, PATH, 1, "self");
52+
assertTrue("Not encrypted: " + status, status.isEncrypted());
53+
assertNotExecutable(status);
54+
}
55+
56+
private void assertNotExecutable(final S3AFileStatus status)
57+
throws IOException {
58+
Map<URI, FileStatus> cache = new HashMap<>();
59+
cache.put(PATH.toUri(), status);
60+
assertFalse("Should not have been executable " + status,
61+
ClientDistributedCacheManager.ancestorsHaveExecutePermissions(
62+
null, PATH, cache));
63+
}
64+
}

0 commit comments

Comments
 (0)