@@ -358,25 +358,31 @@ def get_existing_cluster(conn, opts, cluster_name, die_on_error=True):
358358# Deploy configuration files and run setup scripts on a newly launched
359359# or started EC2 cluster.
360360def setup_cluster (conn , master_nodes , slave_nodes , zoo_nodes , opts , deploy_ssh_key ):
361- print "Deploying files to master..."
362- deploy_files (conn , "deploy.generic" , opts , master_nodes , slave_nodes , zoo_nodes )
361+ if opts .cluster_type == "mesos" :
362+ modules = ['ephemeral-hdfs' , 'persistent-hdfs' , 'mesos' ]
363+ elif opts .cluster_type == "standalone" :
364+ modules = ['ephemeral-hdfs' , 'persistent-hdfs' , 'spark-standalone' ]
365+
363366 master = master_nodes [0 ].public_dns_name
364367 if deploy_ssh_key :
365368 print "Copying SSH key %s to master..." % opts .identity_file
366369 ssh (master , opts , 'mkdir -p ~/.ssh' )
367370 scp (master , opts , opts .identity_file , '~/.ssh/id_rsa' )
368371 ssh (master , opts , 'chmod 600 ~/.ssh/id_rsa' )
372+
373+ # NOTE: We should clone the repository before running deploy_files to prevent
374+ # ec2-variables.sh from being overwritten
375+ ssh (master , opts , "rm -rf spark-ec2 && git clone https://github.com/shivaram/spark-ec2.git" )
376+ print "Deploying files to master..."
377+ deploy_files (conn , "deploy.generic" , opts , master_nodes , slave_nodes ,
378+ zoo_nodes , modules )
369379 print "Running setup on master..."
370- if opts .cluster_type == "mesos" :
371- setup_mesos_cluster (master , opts )
372- elif opts .cluster_type == "standalone" :
373- setup_standalone_cluster (master , slave_nodes , opts )
380+ setup_spark_cluster (master , opts )
374381 print "Done!"
375382
376- def setup_mesos_cluster (master , opts ):
377- ssh (master , opts , "chmod u+x mesos-ec2/setup" )
378- ssh (master , opts , "mesos-ec2/setup %s %s %s %s" %
379- ("generic" , "none" , "master" , opts .swap ))
383+ def setup_spark_cluster (master , opts ):
384+ ssh (master , opts , "chmod u+x spark-ec2/setup.sh" )
385+ ssh (master , opts , "spark-ec2/setup.sh" )
380386
381387def setup_standalone_cluster (master , slave_nodes , opts ):
382388 slave_ips = '\n ' .join ([i .public_dns_name for i in slave_nodes ])
@@ -427,7 +433,8 @@ def get_num_disks(instance_type):
427433# cluster (e.g. lists of masters and slaves). Files are only deployed to
428434# the first master instance in the cluster, and we expect the setup
429435# script to be run on that instance to copy them to other nodes.
430- def deploy_files (conn , root_dir , opts , master_nodes , slave_nodes , zoo_nodes ):
436+ def deploy_files (conn , root_dir , opts , master_nodes , slave_nodes , zoo_nodes ,
437+ modules ):
431438 active_master = master_nodes [0 ].public_dns_name
432439
433440 num_disks = get_num_disks (opts .instance_type )
@@ -459,7 +466,9 @@ def deploy_files(conn, root_dir, opts, master_nodes, slave_nodes, zoo_nodes):
459466 "cluster_url" : cluster_url ,
460467 "hdfs_data_dirs" : hdfs_data_dirs ,
461468 "mapred_local_dirs" : mapred_local_dirs ,
462- "spark_local_dirs" : spark_local_dirs
469+ "spark_local_dirs" : spark_local_dirs ,
470+ "swap" : str (opts .swap ),
471+ "modules" : '\n ' .join (modules )
463472 }
464473
465474 # Create a temp directory in which we will place all the files to be
0 commit comments