From ce0369f1bb9c68ffe6bf753b0328a75ac5323185 Mon Sep 17 00:00:00 2001 From: Zhen Li Date: Wed, 8 Mar 2023 12:23:10 -0800 Subject: [PATCH 1/3] Fix the spark-connect script --- connector/connect/bin/spark-connect | 11 +++++++++-- .../connect/bin/spark-connect-scala-client | 17 +++++++++-------- connector/connect/bin/spark-connect-shell | 8 ++++++-- 3 files changed, 24 insertions(+), 12 deletions(-) diff --git a/connector/connect/bin/spark-connect b/connector/connect/bin/spark-connect index 62d0d36b4410..f9cea880e854 100755 --- a/connector/connect/bin/spark-connect +++ b/connector/connect/bin/spark-connect @@ -26,7 +26,14 @@ FWDIR="$(cd "`dirname "$0"`"/../../..; pwd)" cd "$FWDIR" export SPARK_HOME=$FWDIR +# Determine the Scala version used in Spark +SCALA_BINARY_VER=`grep "scala.binary.version" "${SPARK_HOME}/pom.xml" | head -n1 | awk -F '[<>]' '{print $3}'` +SCALA_ARG=$(if [ "${SCALA_BINARY_VER}" == "2.13" ]; then echo "-Pscala-2.13"; else echo ""; fi) + # Build the jars needed for spark submit and spark connect -build/sbt -Phive -Pconnect package +build/sbt "${SCALA_ARG}" -Phive -Pconnect package + +# This jar is already in the classpath, but the submit commands wants a jar as the input. +CONNECT_JAR=`ls "${SPARK_HOME}"/assembly/target/scala-"${SCALA_BINARY_VER}"/jars/spark-connect_*.jar | paste -sd ',' -` -exec "${SPARK_HOME}"/bin/spark-submit --class org.apache.spark.sql.connect.SimpleSparkConnectService "$@" \ No newline at end of file +exec "${SPARK_HOME}"/bin/spark-submit "$@" --class org.apache.spark.sql.connect.SimpleSparkConnectService "$CONNECT_JAR" \ No newline at end of file diff --git a/connector/connect/bin/spark-connect-scala-client b/connector/connect/bin/spark-connect-scala-client index 902091a74de0..72e23794f5fb 100755 --- a/connector/connect/bin/spark-connect-scala-client +++ b/connector/connect/bin/spark-connect-scala-client @@ -34,17 +34,18 @@ FWDIR="$(cd "`dirname "$0"`"/../../..; pwd)" cd "$FWDIR" export SPARK_HOME=$FWDIR -# Build the jars needed for spark connect JVM client -build/sbt "sql/package;connect-client-jvm/assembly" - -CONNECT_CLASSPATH="$(build/sbt -DcopyDependencies=false "export connect-client-jvm/fullClasspath" | grep jar | tail -n1)" -SQL_CLASSPATH="$(build/sbt -DcopyDependencies=false "export sql/fullClasspath" | grep jar | tail -n1)" - -INIT_SCRIPT="${SPARK_HOME}"/connector/connect/bin/spark-connect-scala-client.sc - # Determine the Scala version used in Spark SCALA_BINARY_VER=`grep "scala.binary.version" "${SPARK_HOME}/pom.xml" | head -n1 | awk -F '[<>]' '{print $3}'` SCALA_VER=`grep "scala.version" "${SPARK_HOME}/pom.xml" | grep ${SCALA_BINARY_VER} | head -n1 | awk -F '[<>]' '{print $3}'` SCALA_BIN="${SPARK_HOME}/build/scala-${SCALA_VER}/bin/scala" +SCALA_ARG=$(if [ "${SCALA_BINARY_VER}" == "2.13" ]; then echo "-Pscala-2.13"; else echo ""; fi) + +# Build the jars needed for spark connect JVM client +build/sbt "${SCALA_ARG}" "sql/package;connect-client-jvm/assembly" + +CONNECT_CLASSPATH="$(build/sbt "${SCALA_ARG}" -DcopyDependencies=false "export connect-client-jvm/fullClasspath" | grep jar | tail -n1)" +SQL_CLASSPATH="$(build/sbt "${SCALA_ARG}" -DcopyDependencies=false "export sql/fullClasspath" | grep jar | tail -n1)" + +INIT_SCRIPT="${SPARK_HOME}"/connector/connect/bin/spark-connect-scala-client.sc exec "${SCALA_BIN}" -cp "$CONNECT_CLASSPATH:$SQL_CLASSPATH" -i $INIT_SCRIPT \ No newline at end of file diff --git a/connector/connect/bin/spark-connect-shell b/connector/connect/bin/spark-connect-shell index b31ba1bf140d..ab443b2e1116 100755 --- a/connector/connect/bin/spark-connect-shell +++ b/connector/connect/bin/spark-connect-shell @@ -26,7 +26,11 @@ FWDIR="$(cd "`dirname "$0"`"/../../..; pwd)" cd "$FWDIR" export SPARK_HOME=$FWDIR -# Build the jars needed for spark shell and spark connect -build/sbt -Phive -Pconnect package +# Determine the Scala version used in Spark +SCALA_BINARY_VER=`grep "scala.binary.version" "${SPARK_HOME}/pom.xml" | head -n1 | awk -F '[<>]' '{print $3}'` +SCALA_ARG=$(if [ "${SCALA_BINARY_VER}" == "2.13" ]; then echo "-Pscala-2.13"; else echo ""; fi) + +# Build the jars needed for spark submit and spark connect +build/sbt "${SCALA_ARG}" -Phive -Pconnect package exec "${SPARK_HOME}"/bin/spark-shell --conf spark.plugins=org.apache.spark.sql.connect.SparkConnectPlugin "$@" \ No newline at end of file From de73e7e955a8e10a0ec3f0f03bd53b3dc07994bd Mon Sep 17 00:00:00 2001 From: Zhen Li Date: Wed, 8 Mar 2023 13:40:35 -0800 Subject: [PATCH 2/3] Add end of line --- connector/connect/bin/spark-connect | 2 +- connector/connect/bin/spark-connect-scala-client | 2 +- connector/connect/bin/spark-connect-shell | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/connector/connect/bin/spark-connect b/connector/connect/bin/spark-connect index f9cea880e854..b7d05202d50e 100755 --- a/connector/connect/bin/spark-connect +++ b/connector/connect/bin/spark-connect @@ -36,4 +36,4 @@ build/sbt "${SCALA_ARG}" -Phive -Pconnect package # This jar is already in the classpath, but the submit commands wants a jar as the input. CONNECT_JAR=`ls "${SPARK_HOME}"/assembly/target/scala-"${SCALA_BINARY_VER}"/jars/spark-connect_*.jar | paste -sd ',' -` -exec "${SPARK_HOME}"/bin/spark-submit "$@" --class org.apache.spark.sql.connect.SimpleSparkConnectService "$CONNECT_JAR" \ No newline at end of file +exec "${SPARK_HOME}"/bin/spark-submit "$@" --class org.apache.spark.sql.connect.SimpleSparkConnectService "$CONNECT_JAR" diff --git a/connector/connect/bin/spark-connect-scala-client b/connector/connect/bin/spark-connect-scala-client index 72e23794f5fb..7eabcb0ba320 100755 --- a/connector/connect/bin/spark-connect-scala-client +++ b/connector/connect/bin/spark-connect-scala-client @@ -48,4 +48,4 @@ SQL_CLASSPATH="$(build/sbt "${SCALA_ARG}" -DcopyDependencies=false "export sql/f INIT_SCRIPT="${SPARK_HOME}"/connector/connect/bin/spark-connect-scala-client.sc -exec "${SCALA_BIN}" -cp "$CONNECT_CLASSPATH:$SQL_CLASSPATH" -i $INIT_SCRIPT \ No newline at end of file +exec "${SCALA_BIN}" -cp "$CONNECT_CLASSPATH:$SQL_CLASSPATH" -i $INIT_SCRIPT diff --git a/connector/connect/bin/spark-connect-shell b/connector/connect/bin/spark-connect-shell index ab443b2e1116..d571c3da8f5b 100755 --- a/connector/connect/bin/spark-connect-shell +++ b/connector/connect/bin/spark-connect-shell @@ -33,4 +33,4 @@ SCALA_ARG=$(if [ "${SCALA_BINARY_VER}" == "2.13" ]; then echo "-Pscala-2.13"; el # Build the jars needed for spark submit and spark connect build/sbt "${SCALA_ARG}" -Phive -Pconnect package -exec "${SPARK_HOME}"/bin/spark-shell --conf spark.plugins=org.apache.spark.sql.connect.SparkConnectPlugin "$@" \ No newline at end of file +exec "${SPARK_HOME}"/bin/spark-shell --conf spark.plugins=org.apache.spark.sql.connect.SparkConnectPlugin "$@" From 8a6a6ef5c1b9af7dff8b7d8487c291cbcfce8251 Mon Sep 17 00:00:00 2001 From: Zhen Li Date: Wed, 8 Mar 2023 14:33:02 -0800 Subject: [PATCH 3/3] Always use scala version --- connector/connect/bin/spark-connect | 2 +- connector/connect/bin/spark-connect-scala-client | 2 +- connector/connect/bin/spark-connect-shell | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/connector/connect/bin/spark-connect b/connector/connect/bin/spark-connect index b7d05202d50e..772a88a04f3e 100755 --- a/connector/connect/bin/spark-connect +++ b/connector/connect/bin/spark-connect @@ -28,7 +28,7 @@ export SPARK_HOME=$FWDIR # Determine the Scala version used in Spark SCALA_BINARY_VER=`grep "scala.binary.version" "${SPARK_HOME}/pom.xml" | head -n1 | awk -F '[<>]' '{print $3}'` -SCALA_ARG=$(if [ "${SCALA_BINARY_VER}" == "2.13" ]; then echo "-Pscala-2.13"; else echo ""; fi) +SCALA_ARG="-Pscala-${SCALA_BINARY_VER}" # Build the jars needed for spark submit and spark connect build/sbt "${SCALA_ARG}" -Phive -Pconnect package diff --git a/connector/connect/bin/spark-connect-scala-client b/connector/connect/bin/spark-connect-scala-client index 7eabcb0ba320..8c5e687ef245 100755 --- a/connector/connect/bin/spark-connect-scala-client +++ b/connector/connect/bin/spark-connect-scala-client @@ -38,7 +38,7 @@ export SPARK_HOME=$FWDIR SCALA_BINARY_VER=`grep "scala.binary.version" "${SPARK_HOME}/pom.xml" | head -n1 | awk -F '[<>]' '{print $3}'` SCALA_VER=`grep "scala.version" "${SPARK_HOME}/pom.xml" | grep ${SCALA_BINARY_VER} | head -n1 | awk -F '[<>]' '{print $3}'` SCALA_BIN="${SPARK_HOME}/build/scala-${SCALA_VER}/bin/scala" -SCALA_ARG=$(if [ "${SCALA_BINARY_VER}" == "2.13" ]; then echo "-Pscala-2.13"; else echo ""; fi) +SCALA_ARG="-Pscala-${SCALA_BINARY_VER}" # Build the jars needed for spark connect JVM client build/sbt "${SCALA_ARG}" "sql/package;connect-client-jvm/assembly" diff --git a/connector/connect/bin/spark-connect-shell b/connector/connect/bin/spark-connect-shell index d571c3da8f5b..0fcf831e03db 100755 --- a/connector/connect/bin/spark-connect-shell +++ b/connector/connect/bin/spark-connect-shell @@ -28,7 +28,7 @@ export SPARK_HOME=$FWDIR # Determine the Scala version used in Spark SCALA_BINARY_VER=`grep "scala.binary.version" "${SPARK_HOME}/pom.xml" | head -n1 | awk -F '[<>]' '{print $3}'` -SCALA_ARG=$(if [ "${SCALA_BINARY_VER}" == "2.13" ]; then echo "-Pscala-2.13"; else echo ""; fi) +SCALA_ARG="-Pscala-${SCALA_BINARY_VER}" # Build the jars needed for spark submit and spark connect build/sbt "${SCALA_ARG}" -Phive -Pconnect package