Skip to content

Commit 695816e

Browse files
committed
Merge pull request #46 from douglaz/configurable_workers
Configurable workers
2 parents df2d208 + 65f0cfe commit 695816e

File tree

3 files changed

+22
-0
lines changed

3 files changed

+22
-0
lines changed

README.md

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -34,6 +34,9 @@ and can be used to install any pre-requisites.
3434
{{mapred_local_dirs}}
3535
{{spark_local_dirs}}
3636
{{default_spark_mem}}
37+
{{spark_worker_instances}}
38+
{{spark_worker_cores}}
39+
{{spark_master_opts}}
3740
3841
You can add new variables by modifying `deploy_templates.py`
3942

deploy_templates.py

Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -12,6 +12,7 @@
1212

1313
# Find system memory in KB and compute Spark's default limit from that
1414
mem_command = "cat /proc/meminfo | grep MemTotal | awk '{print $2}'"
15+
cpu_command = "nproc"
1516

1617
master_ram_kb = int(
1718
os.popen(mem_command).read().strip())
@@ -20,8 +21,14 @@
2021

2122
slave_mem_command = "ssh -t -o StrictHostKeyChecking=no %s %s" %\
2223
(first_slave, mem_command)
24+
25+
slave_cpu_command = "ssh -t -o StrictHostKeyChecking=no %s %s" %\
26+
(first_slave, cpu_command)
27+
2328
slave_ram_kb = int(os.popen(slave_mem_command).read().strip())
2429

30+
slave_cpus = int(os.popen(slave_cpu_command).read().strip())
31+
2532
system_ram_kb = min(slave_ram_kb, master_ram_kb)
2633

2734
system_ram_mb = system_ram_kb / 1024
@@ -42,6 +49,10 @@
4249
# Make tachyon_mb as spark_mb for now.
4350
tachyon_mb = spark_mb
4451

52+
worker_instances = int(os.getenv("SPARK_WORKER_INSTANCES", 1))
53+
# Distribute equally cpu cores among worker instances
54+
worker_cores = max(slave_cpus / worker_instances, 1)
55+
4556
template_vars = {
4657
"master_list": os.getenv("MASTERS"),
4758
"active_master": os.getenv("MASTERS").split("\n")[0],
@@ -50,6 +61,9 @@
5061
"mapred_local_dirs": os.getenv("MAPRED_LOCAL_DIRS"),
5162
"spark_local_dirs": os.getenv("SPARK_LOCAL_DIRS"),
5263
"default_spark_mem": "%dm" % spark_mb,
64+
"spark_worker_instances": "%d" % worker_instances,
65+
"spark_worker_cores": "%d" % worker_cores,
66+
"spark_master_opts": os.getenv("SPARK_MASTER_OPTS"),
5367
"spark_version": os.getenv("SPARK_VERSION"),
5468
"shark_version": os.getenv("SHARK_VERSION"),
5569
"hadoop_major_version": os.getenv("HADOOP_MAJOR_VERSION"),

templates/root/spark/conf/spark-env.sh

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -20,12 +20,17 @@ export SPARK_MEM={{default_spark_mem}}
2020
SPARK_JAVA_OPTS+=" -Dspark.local.dir={{spark_local_dirs}}"
2121
export SPARK_JAVA_OPTS
2222

23+
export SPARK_MASTER_OPTS="{{spark_master_opts}}"
24+
2325
export HADOOP_HOME="/root/ephemeral-hdfs"
2426
export SPARK_LIBRARY_PATH="/root/ephemeral-hdfs/lib/native/"
2527
export SPARK_MASTER_IP={{active_master}}
2628
export MASTER=`cat /root/spark-ec2/cluster-url`
2729
export SPARK_CLASSPATH=$SPARK_CLASSPATH":/root/ephemeral-hdfs/conf"
2830

31+
export SPARK_WORKER_INSTANCES={{spark_worker_instances}}
32+
export SPARK_WORKER_CORES={{spark_worker_cores}}
33+
2934
# Bind Spark's web UIs to this machine's public EC2 hostname:
3035
export SPARK_PUBLIC_DNS=`wget -q -O - http://169.254.169.254/latest/meta-data/public-hostname`
3136

0 commit comments

Comments
 (0)