51
51
raw_input = input
52
52
xrange = range
53
53
54
- SPARK_EC2_VERSION = "1.6 .0"
54
+ SPARK_EC2_VERSION = "2.0 .0"
55
55
SPARK_EC2_DIR = os .path .dirname (os .path .realpath (__file__ ))
56
56
57
57
VALID_SPARK_VERSIONS = set ([
76
76
"1.5.1" ,
77
77
"1.5.2" ,
78
78
"1.6.0" ,
79
+ "2.0.0" ,
79
80
])
80
81
81
82
SPARK_TACHYON_MAP = {
94
95
"1.5.1" : "0.7.1" ,
95
96
"1.5.2" : "0.7.1" ,
96
97
"1.6.0" : "0.8.2" ,
98
+ "2.0.0" : "0.8.2" ,
97
99
}
98
100
99
101
DEFAULT_SPARK_VERSION = SPARK_EC2_VERSION
@@ -192,12 +194,12 @@ def parse_args():
192
194
help = "If you have multiple profiles (AWS or boto config), you can configure " +
193
195
"additional, named profiles by using this option (default: %default)" )
194
196
parser .add_option (
195
- "-t" , "--instance-type" , default = "m1.large " ,
197
+ "-t" , "--instance-type" , default = "g2.2xlarge " ,
196
198
help = "Type of instance to launch (default: %default). " +
197
199
"WARNING: must be 64-bit; small instances won't work" )
198
200
parser .add_option (
199
- "-m" , "--master-instance-type" , default = "" ,
200
- help = "Master instance type (leave empty for same as instance-type) " )
201
+ "-m" , "--master-instance-type" , default = "t2.micro " ,
202
+ help = "Master instance type (default: %default). " )
201
203
parser .add_option (
202
204
"-r" , "--region" , default = "us-east-1" ,
203
205
help = "EC2 region used to launch instances in, or to find them in (default: %default)" )
@@ -271,7 +273,7 @@ def parse_args():
271
273
help = "If specified, launch slaves as spot instances with the given " +
272
274
"maximum price (in dollars)" )
273
275
parser .add_option (
274
- "--ganglia" , action = "store_true" , default = True ,
276
+ "--ganglia" , action = "store_true" , default = False ,
275
277
help = "Setup Ganglia monitoring on cluster (default: %default). NOTE: " +
276
278
"the Ganglia page will be publicly accessible" )
277
279
parser .add_option (
@@ -295,8 +297,8 @@ def parse_args():
295
297
help = "Extra options to give to master through SPARK_MASTER_OPTS variable " +
296
298
"(e.g -Dspark.worker.timeout=180)" )
297
299
parser .add_option (
298
- "--user-data" , type = "string" , default = " " ,
299
- help = "Path to a user-data file (most AMIs interpret this as an initialization script)" )
300
+ "--user-data" , type = "string" , default = SPARK_EC2_DIR + "/gpu/user_data.txt " ,
301
+ help = "Path to a user-data file (default:%default) ( most AMIs interpret this as an initialization script)" )
300
302
parser .add_option (
301
303
"--authorized-address" , type = "string" , default = "0.0.0.0/0" ,
302
304
help = "Address to authorize on created security groups (default: %default)" )
@@ -327,6 +329,9 @@ def parse_args():
327
329
parser .add_option (
328
330
"--instance-profile-name" , default = None ,
329
331
help = "IAM profile name to launch instances under" )
332
+ parser .add_option (
333
+ "-g" , "--gpu" , action = "store_true" , default = True ,
334
+ help = "enable GPU exploitation (default: %default)" )
330
335
331
336
(opts , args ) = parser .parse_args ()
332
337
if len (args ) != 2 :
@@ -444,13 +449,35 @@ def get_validate_spark_version(version, repo):
444
449
"t2.large" : "hvm" ,
445
450
}
446
451
452
+ # Source: http://aws.amazon.com/amazon-linux-ami/
453
+ # Last Updated: 2016-01-02
454
+ EC2_AMAZON_HVM_AMI = {
455
+ "us-east-1" : "ami-60b6c60a" ,
456
+ "us-west-2" : "ami-f0091d91" ,
457
+ "us-west-1" : "ami-d5ea86b5" ,
458
+ "eu-west-1" : "ami-bff32ccc" ,
459
+ "eu-central-1" : "ami-bc5b48d0" ,
460
+ "ap-southeast-1" : "ami-c9b572aa" ,
461
+ "ap-northeast-1" : "ami-383c1956" ,
462
+ "ap-southeast-2" : "ami-48d38c2b" ,
463
+ "sa-east-1" : "ami-6817af04" ,
464
+ }
447
465
448
466
def get_tachyon_version (spark_version ):
449
467
return SPARK_TACHYON_MAP .get (spark_version , "" )
450
468
451
469
452
470
# Attempt to resolve an appropriate AMI given the architecture and region of the request.
453
471
def get_spark_ami (opts ):
472
+ if opts .gpu :
473
+ if opts .region in EC2_AMAZON_HVM_AMI :
474
+ ami = EC2_AMAZON_HVM_AMI [opts .region ]
475
+ print ("Spark AMI: " + ami )
476
+ return ami
477
+ else :
478
+ print ("Could not resolve AMAZON AMI for region: " + opts .region , file = stderr )
479
+ sys .exit (1 )
480
+
454
481
if opts .instance_type in EC2_INSTANCE_TYPES :
455
482
instance_type = EC2_INSTANCE_TYPES [opts .instance_type ]
456
483
else :
@@ -851,11 +878,25 @@ def setup_cluster(conn, master_nodes, slave_nodes, opts, deploy_ssh_key):
851
878
)
852
879
853
880
print ("Running setup on master..." )
854
- setup_spark_cluster (master , opts )
881
+ setup_spark_cluster (master , slave_nodes , opts )
855
882
print ("Done!" )
856
883
857
884
858
- def setup_spark_cluster (master , opts ):
885
+ def setup_spark_cluster (master , slave_nodes , opts ):
886
+ if opts .gpu :
887
+ scp (master , opts , "%s/gpu/spark_init.sh" % SPARK_EC2_DIR , "spark-ec2/spark/init.sh" )
888
+ scp (master , opts , "%s/gpu/spark-defaults.conf.add" % SPARK_EC2_DIR , "spark-ec2/templates/root/spark/conf/spark-defaults.conf.add" )
889
+ scp (master , opts , "%s/gpu/spark-env.sh.add" % SPARK_EC2_DIR , "spark-ec2/templates/root/spark/conf/spark-env.sh.add" )
890
+ ssh (master , opts , "cat spark-ec2/templates/root/spark/conf/spark-defaults.conf.add >> spark-ec2/templates/root/spark/conf/spark-defaults.conf" )
891
+ ssh (master , opts , "cat spark-ec2/templates/root/spark/conf/spark-env.sh.add >> spark-ec2/templates/root/spark/conf/spark-env.sh" )
892
+ scp (master , opts , "%s/gpu/create_image.sh" % SPARK_EC2_DIR , "create_image.sh" )
893
+ ssh (master , opts , "chmod u+x ./create_image.sh" )
894
+ ssh (master , opts , "./create_image.sh > ./create_image_master.log" )
895
+ for slave in slave_nodes :
896
+ slave_address = get_dns_name (slave , opts .private_ips )
897
+ scp (slave_address , opts , "%s/gpu/create_image.sh" % SPARK_EC2_DIR , "create_image.sh" )
898
+ ssh (slave_address , opts , "chmod u+x ./create_image.sh" )
899
+ ssh (slave_address , opts , "./create_image.sh gpu > ./create_image_slave.log" )
859
900
ssh (master , opts , "chmod u+x spark-ec2/setup.sh" )
860
901
ssh (master , opts , "spark-ec2/setup.sh" )
861
902
print ("Spark standalone cluster started at http://%s:8080" % master )
@@ -1180,6 +1221,29 @@ def ssh(host, opts, command):
1180
1221
tries = tries + 1
1181
1222
1182
1223
1224
+ def scp (host , opts , src , dst ):
1225
+ tries = 0
1226
+ while True :
1227
+ try :
1228
+ return subprocess .check_call (
1229
+ ['scp' ] + ssh_args (opts ) +
1230
+ [stringify_command (src ), '%s@%s:%s' % (opts .user , host , stringify_command (dst ))])
1231
+ except subprocess .CalledProcessError as e :
1232
+ if tries > 5 :
1233
+ # If this was an ssh failure, provide the user with hints.
1234
+ if e .returncode == 255 :
1235
+ raise UsageError (
1236
+ "Failed to SCP to remote host {0}.\n "
1237
+ "Please check that you have provided the correct --identity-file and "
1238
+ "--key-pair parameters and try again." .format (host ))
1239
+ else :
1240
+ raise e
1241
+ print ("Error executing remote command, retrying after 30 seconds: {0}" .format (e ),
1242
+ file = stderr )
1243
+ time .sleep (30 )
1244
+ tries = tries + 1
1245
+
1246
+
1183
1247
# Backported from Python 2.7 for compatiblity with 2.6 (See SPARK-1990)
1184
1248
def _check_output (* popenargs , ** kwargs ):
1185
1249
if 'stdout' in kwargs :
0 commit comments