Skip to content

Commit a52e08e

Browse files
author
Arthur Rand
authored
[SPARK-609] use environment based secrets for S3Job (apache#237)
* use environment based secrets for S3Job * add documentation, remove old secrets test
1 parent 772d064 commit a52e08e

File tree

2 files changed

+41
-31
lines changed

2 files changed

+41
-31
lines changed

docs/hdfs.md

Lines changed: 22 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1,14 +1,15 @@
11
---
2-
post_title: Integration with HDFS
2+
post_title: Integration with HDFS and S3
33
nav_title: HDFS
44
menu_order: 20
55
enterprise: 'no'
66
---
77

8+
# HDFS
89

910
If you plan to read and write from HDFS using Spark, there are two Hadoop configuration files that should be included on Spark's classpath: `hdfs-site.xml`, which provides default behaviors for the HDFS client. `core-site.xml`, which sets the default filesystem name. You can specify the location of these files at install time or for each job.
1011

11-
# Spark Installation
12+
## Spark Installation
1213
Within the Spark service configuration, set `hdfs.config-url` to be a URL that serves your `hdfs-site.xml` and `core-site.xml`, use this example where `http://mydomain.com/hdfs-config/hdfs-site.xml` and `http://mydomain.com/hdfs-config/core-site.xml` are valid URLs:
1314

1415
```json
@@ -20,10 +21,10 @@ Within the Spark service configuration, set `hdfs.config-url` to be a URL that s
2021
```
2122
This can also be done through the UI. If you are using the default installation of HDFS from Mesosphere this is probably `http://api.hdfs.marathon.l4lb.thisdcos.directory/v1/endpoints`.
2223

23-
# Adding HDFS files per-job
24+
## Adding HDFS configuration files per-job
2425
To add the configuration files manually for a job, use `--conf spark.mesos.uris=<location_of_hdfs-site.xml>,<location_of_core-site.xml>`. This will download the files to the sandbox of the Driver Spark application, and DC/OS Spark will automatically load these files into the correct location. **Note** It is important these files are called `hdfs-site.xml` and `core-site.xml`.
2526

26-
## Spark Checkpointing
27+
### Spark Checkpointing
2728

2829
In order to use spark with checkpointing make sure you follow the instructions [here](https://spark.apache.org/docs/latest/streaming-programming-guide.html#checkpointing) and use an hdfs directory as the checkpointing directory. For example:
2930
```
@@ -33,6 +34,23 @@ ssc.checkpoint(checkpointDirectory)
3334
```
3435
That hdfs directory will be automatically created on hdfs and spark streaming app will work from checkpointed data even in the presence of application restarts/failures.
3536

37+
# S3
38+
You can read/write files to S3 using environment-based secrets to pass your AWS credentials. Your credentials must first be uploaded to the DC/OS secret store:
39+
40+
```
41+
dcos security secrets create <secret_path_for_key_id> -v <AWS_ACCESS_KEY_ID>
42+
dcos security secrets create <secret_path_for_secret_key> -v <AWS_SECRET_ACCESS_KEY>
43+
```
44+
Then your Spark jobs can get these credentials directly:
45+
46+
```
47+
dcos spark run --submit-args="\
48+
...
49+
--conf spark.mesos.containerizer=mesos # required for secrets
50+
--conf spark.mesos.driver.secret.names=<secret_path_for_key_id>,<secret_path_for_secret_key>
51+
--conf spark.mesos.driver.secret.envkeys=AWS_ACCESS_KEY_ID,AWS_SECRET_ACCESS_KEY
52+
...
53+
```
3654

3755
[8]: http://spark.apache.org/docs/latest/configuration.html#inheriting-hadoop-cluster-configuration
3856
[9]: https://docs.mesosphere.com/services/spark/2.1.0-2.2.0-1/limitations/

tests/test_spark.py

Lines changed: 19 additions & 27 deletions
Original file line numberDiff line numberDiff line change
@@ -167,19 +167,33 @@ def _check_task_network_info(task):
167167

168168
@pytest.mark.sanity
169169
def test_s3():
170+
def make_credential_secret(envvar, secret_path):
171+
rc, stdout, stderr = sdk_cmd.run_raw_cli("security secrets create {p} -v {e}"
172+
.format(p=secret_path, e=os.environ[envvar]))
173+
assert rc == 0, "Failed to create secret {secret} from envvar {envvar}, stderr: {err}, stdout: {out}".format(
174+
secret=secret_path, envvar=envvar, err=stderr, out=stdout)
175+
176+
LOGGER.info("Creating AWS secrets")
177+
178+
aws_access_key_secret_path = "aws_access_key_id"
179+
aws_secret_access_key_path = "aws_secret_access_key"
180+
181+
make_credential_secret(envvar="AWS_ACCESS_KEY_ID", secret_path="/{}".format(aws_access_key_secret_path))
182+
make_credential_secret(envvar="AWS_SECRET_ACCESS_KEY", secret_path="/{}".format(aws_secret_access_key_path))
183+
170184
linecount_path = os.path.join(THIS_DIR, 'resources', 'linecount.txt')
171185
s3.upload_file(linecount_path)
172186

173187
app_args = "--readUrl {} --writeUrl {}".format(
174188
s3.s3n_url('linecount.txt'),
175189
s3.s3n_url("linecount-out"))
176190

177-
args = ["--conf",
178-
"spark.mesos.driverEnv.AWS_ACCESS_KEY_ID={}".format(
179-
os.environ["AWS_ACCESS_KEY_ID"]),
191+
args = ["--conf", "spark.mesos.containerizer=mesos",
180192
"--conf",
181-
"spark.mesos.driverEnv.AWS_SECRET_ACCESS_KEY={}".format(
182-
os.environ["AWS_SECRET_ACCESS_KEY"]),
193+
"spark.mesos.driver.secret.names=/{key},/{secret}".format(
194+
key=aws_access_key_secret_path, secret=aws_secret_access_key_path),
195+
"--conf",
196+
"spark.mesos.driver.secret.envkeys=AWS_ACCESS_KEY_ID,AWS_SECRET_ACCESS_KEY",
183197
"--class", "S3Job"]
184198
utils.run_tests(app_url=utils._scala_test_jar_url(),
185199
app_args=app_args,
@@ -229,28 +243,6 @@ def test_marathon_group():
229243
#shakedown.uninstall_package_and_wait(SPARK_PACKAGE_NAME, app_id)
230244

231245

232-
@pytest.mark.sanity
233-
@pytest.mark.secrets
234-
def test_secrets():
235-
properties_file_path = os.path.join(THIS_DIR, "resources", "secrets-opts.txt")
236-
# Create secret
237-
shakedown.run_dcos_command('security secrets create /{} --value {}'.format(SECRET_NAME, SECRET_CONTENTS))
238-
239-
secret_file_name = "secret_file"
240-
output = "Contents of file {}: {}".format(secret_file_name, SECRET_CONTENTS)
241-
args = ["--properties-file", properties_file_path,
242-
"--class", "SecretsJob"]
243-
try:
244-
utils.run_tests(app_url=utils._scala_test_jar_url(),
245-
app_args=secret_file_name,
246-
expected_output=output,
247-
args=args)
248-
249-
finally:
250-
# Delete secret
251-
shakedown.run_dcos_command('security secrets delete /{}'.format(SECRET_NAME))
252-
253-
254246
@pytest.mark.sanity
255247
def test_cli_multiple_spaces():
256248
utils.run_tests(app_url=utils.SPARK_EXAMPLES,

0 commit comments

Comments
 (0)