diff --git a/assembly/pom.xml b/assembly/pom.xml index aee572b64a80..36cc60784383 100644 --- a/assembly/pom.xml +++ b/assembly/pom.xml @@ -152,6 +152,16 @@ + + connect + + + org.apache.spark + spark-connect_${scala.binary.version} + ${project.version} + + + kubernetes diff --git a/bin/spark-connect-shell b/bin/spark-connect-shell new file mode 100755 index 000000000000..9026c81e70d8 --- /dev/null +++ b/bin/spark-connect-shell @@ -0,0 +1,27 @@ +#!/usr/bin/env bash + +# +# Licensed to the Apache Software Foundation (ASF) under one or more +# contributor license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright ownership. +# The ASF licenses this file to You under the Apache License, Version 2.0 +# (the "License"); you may not use this file except in compliance with +# the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + +# The shell script to start a spark-shell with spark connect enabled. + +if [ -z "${SPARK_HOME}" ]; then + source "$(dirname "$0")"/find-spark-home +fi + +# This requires building the spark with `-Pconnect`, e,g, `build/sbt -Pconnect package` +exec "${SPARK_HOME}"/bin/spark-shell --conf spark.plugins=org.apache.spark.sql.connect.SparkConnectPlugin "$@" \ No newline at end of file diff --git a/connector/connect/bin/spark-connect b/connector/connect/bin/spark-connect index 2f2ce7df08c1..62d0d36b4410 100755 --- a/connector/connect/bin/spark-connect +++ b/connector/connect/bin/spark-connect @@ -17,17 +17,16 @@ # limitations under the License. # +# Start the spark-connect with server logs printed in the standard output. The script rebuild the +# server dependencies and start the server at the default port. This can be used to debug client +# during client development. + # Go to the Spark project root directory FWDIR="$(cd "`dirname "$0"`"/../../..; pwd)" cd "$FWDIR" export SPARK_HOME=$FWDIR -SCALA_BINARY_VER=`grep "scala.binary.version" "${SPARK_HOME}/pom.xml" | head -n1 | awk -F '[<>]' '{print $3}'` -SCALA_ARG=$(if [ "${SCALA_BINARY_VER}" == "2.13" ]; then echo "-Pscala-2.13"; else echo ""; fi) - # Build the jars needed for spark submit and spark connect -build/sbt "${SCALA_ARG}" -Phive package - -CONNECT_JAR=`ls "${SPARK_HOME}"/connector/connect/server/target/scala-"${SCALA_BINARY_VER}"/spark-connect-assembly*.jar | paste -sd ',' -` +build/sbt -Phive -Pconnect package -exec "${SPARK_HOME}"/bin/spark-submit "$@" --class org.apache.spark.sql.connect.SimpleSparkConnectService "$CONNECT_JAR" \ No newline at end of file +exec "${SPARK_HOME}"/bin/spark-submit --class org.apache.spark.sql.connect.SimpleSparkConnectService "$@" \ No newline at end of file diff --git a/connector/connect/bin/spark-connect-scala-client.sc b/connector/connect/bin/spark-connect-scala-client.sc index a8d1856498ce..9cb4f92417d6 100644 --- a/connector/connect/bin/spark-connect-scala-client.sc +++ b/connector/connect/bin/spark-connect-scala-client.sc @@ -22,6 +22,7 @@ val sessionBuilder = SparkSession.builder() val spark = if (conStr.isEmpty) sessionBuilder.build() else sessionBuilder.remote(conStr).build() import spark.implicits._ import spark.sql +println("Spark session available as 'spark'.") println( """ | _____ __ ______ __ diff --git a/connector/connect/bin/spark-connect-shell b/connector/connect/bin/spark-connect-shell new file mode 100755 index 000000000000..b31ba1bf140d --- /dev/null +++ b/connector/connect/bin/spark-connect-shell @@ -0,0 +1,32 @@ +#!/usr/bin/env bash + +# +# Licensed to the Apache Software Foundation (ASF) under one or more +# contributor license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright ownership. +# The ASF licenses this file to You under the Apache License, Version 2.0 +# (the "License"); you may not use this file except in compliance with +# the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + +# The spark connect shell for development. This shell script builds the spark connect server with +# all dependencies and starts the server at the default port. +# Use `/bin/spark-connect-shell` instead if rebuilding the dependency jars are not needed. + +# Go to the Spark project root directory +FWDIR="$(cd "`dirname "$0"`"/../../..; pwd)" +cd "$FWDIR" +export SPARK_HOME=$FWDIR + +# Build the jars needed for spark shell and spark connect +build/sbt -Phive -Pconnect package + +exec "${SPARK_HOME}"/bin/spark-shell --conf spark.plugins=org.apache.spark.sql.connect.SparkConnectPlugin "$@" \ No newline at end of file diff --git a/docs/building-spark.md b/docs/building-spark.md index 49359fedee95..8487c4826153 100644 --- a/docs/building-spark.md +++ b/docs/building-spark.md @@ -119,6 +119,10 @@ For instance, you can build the Spark Streaming module using: where `spark-streaming_{{site.SCALA_BINARY_VERSION}}` is the `artifactId` as defined in `streaming/pom.xml` file. +## Building with Spark Connect support + + ./build/mvn -Pconnect -DskipTests clean package + ## Continuous Compilation We use the scala-maven-plugin which supports incremental and continuous compilation. E.g. diff --git a/repl/src/main/scala-2.12/org/apache/spark/repl/Main.scala b/repl/src/main/scala-2.12/org/apache/spark/repl/Main.scala index a68b112ed2b9..eaca4ad6ee29 100644 --- a/repl/src/main/scala-2.12/org/apache/spark/repl/Main.scala +++ b/repl/src/main/scala-2.12/org/apache/spark/repl/Main.scala @@ -121,6 +121,11 @@ object Main extends Logging { sparkContext = sparkSession.sparkContext sparkSession } catch { + case e: ClassNotFoundException if isShellSession && e.getMessage.contains( + "org.apache.spark.sql.connect.SparkConnectPlugin") => + logError("Failed to load spark connect plugin.") + logError("You need to build Spark with -Pconnect.") + sys.exit(1) case e: Exception if isShellSession => logError("Failed to initialize Spark session.", e) sys.exit(1)