Skip to content

Commit d012cfa

Browse files
committed
Update spark_ec2.py to use new spark-ec2 scripts
1 parent 2435b7b commit d012cfa

File tree

2 files changed

+32
-12
lines changed

2 files changed

+32
-12
lines changed
Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,11 @@
1+
#!/bin/bash
2+
3+
# These variables are automatically filled in by the mesos-ec2 script.
4+
export MESOS_MASTERS="{{master_list}}"
5+
export MESOS_SLAVES="{{slave_list}}"
6+
export MESOS_ZOO_LIST="{{zoo_list}}"
7+
export MESOS_HDFS_DATA_DIRS="{{hdfs_data_dirs}}"
8+
export MESOS_MAPRED_LOCAL_DIRS="{{mapred_local_dirs}}"
9+
export MESOS_SPARK_LOCAL_DIRS="{{spark_local_dirs}}"
10+
export MODULES="{{modules}}"
11+
export SWAP="{{swap}}"

ec2/spark_ec2.py

Lines changed: 21 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -358,25 +358,31 @@ def get_existing_cluster(conn, opts, cluster_name, die_on_error=True):
358358
# Deploy configuration files and run setup scripts on a newly launched
359359
# or started EC2 cluster.
360360
def setup_cluster(conn, master_nodes, slave_nodes, zoo_nodes, opts, deploy_ssh_key):
361-
print "Deploying files to master..."
362-
deploy_files(conn, "deploy.generic", opts, master_nodes, slave_nodes, zoo_nodes)
361+
if opts.cluster_type == "mesos":
362+
modules = ['ephemeral-hdfs', 'persistent-hdfs', 'mesos']
363+
elif opts.cluster_type == "standalone":
364+
modules = ['ephemeral-hdfs', 'persistent-hdfs', 'spark-standalone']
365+
363366
master = master_nodes[0].public_dns_name
364367
if deploy_ssh_key:
365368
print "Copying SSH key %s to master..." % opts.identity_file
366369
ssh(master, opts, 'mkdir -p ~/.ssh')
367370
scp(master, opts, opts.identity_file, '~/.ssh/id_rsa')
368371
ssh(master, opts, 'chmod 600 ~/.ssh/id_rsa')
372+
373+
# NOTE: We should clone the repository before running deploy_files to prevent
374+
# ec2-variables.sh from being overwritten
375+
ssh(master, opts, "rm -rf spark-ec2 && git clone https://github.com/shivaram/spark-ec2.git")
376+
print "Deploying files to master..."
377+
deploy_files(conn, "deploy.generic", opts, master_nodes, slave_nodes,
378+
zoo_nodes, modules)
369379
print "Running setup on master..."
370-
if opts.cluster_type == "mesos":
371-
setup_mesos_cluster(master, opts)
372-
elif opts.cluster_type == "standalone":
373-
setup_standalone_cluster(master, slave_nodes, opts)
380+
setup_spark_cluster(master, opts)
374381
print "Done!"
375382

376-
def setup_mesos_cluster(master, opts):
377-
ssh(master, opts, "chmod u+x mesos-ec2/setup")
378-
ssh(master, opts, "mesos-ec2/setup %s %s %s %s" %
379-
("generic", "none", "master", opts.swap))
383+
def setup_spark_cluster(master, opts):
384+
ssh(master, opts, "chmod u+x spark-ec2/setup.sh")
385+
ssh(master, opts, "spark-ec2/setup.sh")
380386

381387
def setup_standalone_cluster(master, slave_nodes, opts):
382388
slave_ips = '\n'.join([i.public_dns_name for i in slave_nodes])
@@ -427,7 +433,8 @@ def get_num_disks(instance_type):
427433
# cluster (e.g. lists of masters and slaves). Files are only deployed to
428434
# the first master instance in the cluster, and we expect the setup
429435
# script to be run on that instance to copy them to other nodes.
430-
def deploy_files(conn, root_dir, opts, master_nodes, slave_nodes, zoo_nodes):
436+
def deploy_files(conn, root_dir, opts, master_nodes, slave_nodes, zoo_nodes,
437+
modules):
431438
active_master = master_nodes[0].public_dns_name
432439

433440
num_disks = get_num_disks(opts.instance_type)
@@ -459,7 +466,9 @@ def deploy_files(conn, root_dir, opts, master_nodes, slave_nodes, zoo_nodes):
459466
"cluster_url": cluster_url,
460467
"hdfs_data_dirs": hdfs_data_dirs,
461468
"mapred_local_dirs": mapred_local_dirs,
462-
"spark_local_dirs": spark_local_dirs
469+
"spark_local_dirs": spark_local_dirs,
470+
"swap": str(opts.swap),
471+
"modules": '\n'.join(modules)
463472
}
464473

465474
# Create a temp directory in which we will place all the files to be

0 commit comments

Comments
 (0)