Skip to content
Closed
Show file tree
Hide file tree
Changes from 2 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
177 changes: 9 additions & 168 deletions bin/spark-shell
Original file line number Diff line number Diff line change
Expand Up @@ -19,9 +19,8 @@

#
# Shell script for starting the Spark Shell REPL
# Note that it will set MASTER to spark://${SPARK_MASTER_IP}:${SPARK_MASTER_PORT}
# if those two env vars are set in spark-env.sh but MASTER is not.

args="$@"
cygwin=false
case "`uname`" in
CYGWIN*) cygwin=true;;
Expand All @@ -30,133 +29,16 @@ esac
# Enter posix mode for bash
set -o posix

if [[ "$@" == *--help* ]]; then
echo "Usage: ./bin/spark-shell [options]"
./bin/spark-submit --help 2>&1 | grep -v Usage 1>&2
exit 0
fi

## Global script variables
FWDIR="$(cd `dirname $0`/..; pwd)"

SPARK_REPL_OPTS="${SPARK_REPL_OPTS:-""}"
DEFAULT_MASTER="local[*]"
MASTER=${MASTER:-""}

info_log=0

#CLI Color Templates
txtund=$(tput sgr 0 1) # Underline
txtbld=$(tput bold) # Bold
bldred=${txtbld}$(tput setaf 1) # red
bldyel=${txtbld}$(tput setaf 3) # yellow
bldblu=${txtbld}$(tput setaf 4) # blue
bldwht=${txtbld}$(tput setaf 7) # white
txtrst=$(tput sgr0) # Reset
info=${bldwht}*${txtrst} # Feedback
pass=${bldblu}*${txtrst}
warn=${bldred}*${txtrst}
ques=${bldblu}?${txtrst}

# Helper function to describe the script usage
function usage() {
cat << EOF
${txtbld}Usage${txtrst}: spark-shell [OPTIONS]

${txtbld}OPTIONS${txtrst}:
-h --help : Print this help information.
-c --cores : The maximum number of cores to be used by the Spark Shell.
-em --executor-memory : The memory used by each executor of the Spark Shell, the number
is followed by m for megabytes or g for gigabytes, e.g. "1g".
-dm --driver-memory : The memory used by the Spark Shell, the number is followed
by m for megabytes or g for gigabytes, e.g. "1g".
-m --master : A full string that describes the Spark Master, defaults to "local[*]"
e.g. "spark://localhost:7077".
--log-conf : Enables logging of the supplied SparkConf as INFO at start of the
Spark Context.

e.g.
spark-shell -m spark://localhost:7077 -c 4 -dm 512m -em 2g

EOF
}

function out_error(){
echo -e "${txtund}${bldred}ERROR${txtrst}: $1"
usage
exit 1
}

function log_info(){
[ $info_log -eq 1 ] && echo -e "${bldyel}INFO${txtrst}: $1"
}

function log_warn(){
echo -e "${txtund}${bldyel}WARN${txtrst}: $1"
}

# PATTERNS used to validate more than one optional arg.
ARG_FLAG_PATTERN="^-"
MEM_PATTERN="^[0-9]+[m|g|M|G]$"
NUM_PATTERN="^[0-9]+$"
PORT_PATTERN="^[0-9]+$"

# Setters for optional args.
function set_cores(){
CORE_PATTERN="^[0-9]+$"
if [[ "$1" =~ $CORE_PATTERN ]]; then
SPARK_REPL_OPTS="$SPARK_REPL_OPTS -Dspark.cores.max=$1"
else
out_error "wrong format for $2"
fi
}

function set_em(){
if [[ $1 =~ $MEM_PATTERN ]]; then
SPARK_REPL_OPTS="$SPARK_REPL_OPTS -Dspark.executor.memory=$1"
else
out_error "wrong format for $2"
fi
}

function set_dm(){
if [[ $1 =~ $MEM_PATTERN ]]; then
export SPARK_DRIVER_MEMORY=$1
else
out_error "wrong format for $2"
fi
}

function set_spark_log_conf(){
SPARK_REPL_OPTS="$SPARK_REPL_OPTS -Dspark.logConf=$1"
}

function set_spark_master(){
if ! [[ "$1" =~ $ARG_FLAG_PATTERN ]]; then
export MASTER="$1"
else
out_error "wrong format for $2"
fi
}

function resolve_spark_master(){
# Set MASTER from spark-env if possible
DEFAULT_SPARK_MASTER_PORT=7077
if [ -z "$MASTER" ]; then
. $FWDIR/bin/load-spark-env.sh
if [ -n "$SPARK_MASTER_IP" ]; then
SPARK_MASTER_PORT="${SPARK_MASTER_PORT:-"$DEFAULT_SPARK_MASTER_PORT"}"
export MASTER="spark://${SPARK_MASTER_IP}:${SPARK_MASTER_PORT}"
fi
fi

if [ -z "$MASTER" ]; then
export MASTER="$DEFAULT_MASTER"
fi

}

function main(){
log_info "Base Directory set to $FWDIR"

resolve_spark_master
log_info "Spark Master is $MASTER"

log_info "Spark REPL options $SPARK_REPL_OPTS"
if $cygwin; then
# Workaround for issue involving JLine and Cygwin
# (see http://sourceforge.net/p/jline/bugs/40/).
Expand All @@ -165,55 +47,14 @@ function main(){
# (see https://github.com/sbt/sbt/issues/562).
stty -icanon min 1 -echo > /dev/null 2>&1
export SPARK_REPL_OPTS="$SPARK_REPL_OPTS -Djline.terminal=unix"
$FWDIR/bin/spark-class org.apache.spark.repl.Main "$@"
$FWDIR/bin/spark-submit spark-internal "$args" --class org.apache.spark.repl.Main
stty icanon echo > /dev/null 2>&1
else
export SPARK_REPL_OPTS
$FWDIR/bin/spark-class org.apache.spark.repl.Main "$@"
$FWDIR/bin/spark-submit spark-internal "$args" --class org.apache.spark.repl.Main
fi
}

for option in "$@"
do
case $option in
-h | --help )
usage
exit 1
;;
-c | --cores)
shift
_1=$1
shift
set_cores $_1 "-c/--cores"
;;
-em | --executor-memory)
shift
_1=$1
shift
set_em $_1 "-em/--executor-memory"
;;
-dm | --driver-memory)
shift
_1=$1
shift
set_dm $_1 "-dm/--driver-memory"
;;
-m | --master)
shift
_1=$1
shift
set_spark_master $_1 "-m/--master"
;;
--log-conf)
shift
set_spark_log_conf "true"
info_log=1
;;
?)
;;
esac
done

# Copy restore-TTY-on-exit functions from Scala script so spark-shell exits properly even in
# binary distribution of Spark where Scala is not installed
exit_status=127
Expand Down
10 changes: 5 additions & 5 deletions bin/spark-submit
Original file line number Diff line number Diff line change
Expand Up @@ -21,15 +21,15 @@ export SPARK_HOME="$(cd `dirname $0`/..; pwd)"
ORIG_ARGS=$@

while (($#)); do
if [ $1 = "--deploy-mode" ]; then
if [ "$1" = "--deploy-mode" ]; then
DEPLOY_MODE=$2
elif [ $1 = "--driver-memory" ]; then
elif [ "$1" = "--driver-memory" ]; then
DRIVER_MEMORY=$2
elif [ $1 = "--driver-library-path" ]; then
elif [ "$1" = "--driver-library-path" ]; then
export _SPARK_LIBRARY_PATH=$2
elif [ $1 = "--driver-class-path" ]; then
elif [ "$1" = "--driver-class-path" ]; then
export SPARK_CLASSPATH="$SPARK_CLASSPATH:$2"
elif [ $1 = "--driver-java-options" ]; then
elif [ "$1" = "--driver-java-options" ]; then
export SPARK_JAVA_OPTS="$SPARK_JAVA_OPTS $2"
fi
shift
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -38,6 +38,12 @@ object SparkSubmit {

private var clusterManager: Int = LOCAL

/**
* A special jar name that indicates the class being run is inside of Spark itself,
* and therefore no user jar is needed.
*/
private val RESERVED_JAR_NAME = "spark-internal"

def main(args: Array[String]) {
val appArgs = new SparkSubmitArguments(args)
if (appArgs.verbose) {
Expand Down Expand Up @@ -113,7 +119,7 @@ object SparkSubmit {

if (!deployOnCluster) {
childMainClass = appArgs.mainClass
childClasspath += appArgs.primaryResource
if (appArgs.primaryResource != RESERVED_JAR_NAME) childClasspath += appArgs.primaryResource
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

nit: put this in {}?

} else if (clusterManager == YARN) {
childMainClass = "org.apache.spark.deploy.yarn.Client"
childArgs += ("--jar", appArgs.primaryResource)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -108,7 +108,7 @@ private[spark] class SparkSubmitArguments(args: Array[String]) {
deployMode = Option(deployMode).getOrElse(System.getenv("DEPLOY_MODE"))

// Global defaults. These should be keep to minimum to avoid confusing behavior.
master = Option(master).getOrElse("local")
master = Option(master).getOrElse("local[*]")
}

/** Ensure that required fields exists. Call this only once all defaults are loaded. */
Expand Down
5 changes: 3 additions & 2 deletions repl/src/main/scala/org/apache/spark/repl/SparkILoop.scala
Original file line number Diff line number Diff line change
Expand Up @@ -963,8 +963,9 @@ class SparkILoop(in0: Option[BufferedReader], protected val out: JPrintWriter,
val master = this.master match {
case Some(m) => m
case None => {
val prop = System.getenv("MASTER")
if (prop != null) prop else "local[*]"
val envMaster = sys.env.get("MASTER")
val propMaster = sys.props.get("spark.master")
envMaster.getOrElse(propMaster.getOrElse("local[*]"))
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

How about envMaster.orElse(propMaster).getOrElse("local[*]")

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

mind = blown

}
}
master
Expand Down