diff --git a/connector/protobuf-assembly/pom.xml b/connector/protobuf-assembly/pom.xml new file mode 100644 index 000000000000..8fcd904443f2 --- /dev/null +++ b/connector/protobuf-assembly/pom.xml @@ -0,0 +1,90 @@ + + + + + 4.0.0 + + org.apache.spark + spark-parent_2.12 + 4.0.0-SNAPSHOT + ../../pom.xml + + + spark-protobuf-assembly_2.12 + + protobuf-assembly + + jar + Spark Protobuf Assembly + https://spark.apache.org/ + + + + org.apache.spark + spark-protobuf_${scala.binary.version} + ${project.version} + + + com.google.protobuf + protobuf-java + compile + + + com.google.protobuf + protobuf-java-util + compile + + + + target/scala-${scala.binary.version}/classes + target/scala-${scala.binary.version}/test-classes + + + org.apache.maven.plugins + maven-shade-plugin + + false + false + + + org.apache.spark:spark-protobuf_${scala.binary.version} + com.google.protobuf:* + + + + + com.google.protobuf + ${spark.shade.packageName}.spark_protobuf.protobuf + + com.google.protobuf.** + + + + + + *:* + + google/protobuf/** + + + + + + + + diff --git a/connector/protobuf/pom.xml b/connector/protobuf/pom.xml index 26b5b601bccd..72e0a5a34d9f 100644 --- a/connector/protobuf/pom.xml +++ b/connector/protobuf/pom.xml @@ -90,38 +90,6 @@ target/scala-${scala.binary.version}/classes target/scala-${scala.binary.version}/test-classes - - - org.apache.maven.plugins - maven-shade-plugin - - false - false - - - com.google.protobuf:* - - - - - com.google.protobuf - ${spark.shade.packageName}.spark_protobuf.protobuf - - com.google.protobuf.** - - - - - - *:* - - google/protobuf/** - - - - - - diff --git a/pom.xml b/pom.xml index 76e3596edd43..c86a9a41c782 100644 --- a/pom.xml +++ b/pom.xml @@ -106,6 +106,7 @@ connector/connect/common connector/connect/client/jvm connector/protobuf + connector/protobuf-assembly diff --git a/project/SparkBuild.scala b/project/SparkBuild.scala index bd65d3c4bd4a..49e5e5f14784 100644 --- a/project/SparkBuild.scala +++ b/project/SparkBuild.scala @@ -45,8 +45,10 @@ object BuildCommons { private val buildLocation = file(".").getAbsoluteFile.getParentFile - val sqlProjects@Seq(catalyst, sql, hive, hiveThriftServer, tokenProviderKafka010, sqlKafka010, avro, protobuf) = Seq( - "catalyst", "sql", "hive", "hive-thriftserver", "token-provider-kafka-0-10", "sql-kafka-0-10", "avro", "protobuf" + val sqlProjects@Seq(catalyst, sql, hive, hiveThriftServer, + tokenProviderKafka010, sqlKafka010, avro, protobuf, protobufAssembly) = Seq( + "catalyst", "sql", "hive", "hive-thriftserver", "token-provider-kafka-0-10", "sql-kafka-0-10", "avro", + "protobuf", "protobuf-assembly" ).map(ProjectRef(buildLocation, _)) val streamingProjects@Seq(streaming, streamingKafka010) = @@ -414,7 +416,7 @@ object SparkBuild extends PomBuild { Seq( spark, hive, hiveThriftServer, repl, networkCommon, networkShuffle, networkYarn, unsafe, tags, tokenProviderKafka010, sqlKafka010, connectCommon, connect, connectClient, - commonUtils, sqlApi + commonUtils, sqlApi, protobufAssembly ).contains(x) } @@ -463,6 +465,7 @@ object SparkBuild extends PomBuild { /* Protobuf settings */ enable(SparkProtobuf.settings)(protobuf) + enable(SparkProtobufAssembly.settings)(protobufAssembly) // SPARK-14738 - Remove docker tests from main Spark build // enable(DockerIntegrationTests.settings)(dockerIntegrationTests) @@ -957,10 +960,34 @@ object SparkProtobuf { PB.gens.descriptorSet -> target.value / "generated-test-sources/descriptor-set-sbt.desc", // The above creates single descriptor file with all the proto files. This is different from // Maven, which create one descriptor file for each proto file. - ), + ) + ) ++ { + val sparkProtocExecPath = sys.props.get("spark.protoc.executable.path") + if (sparkProtocExecPath.isDefined) { + Seq( + PB.protocExecutable := file(sparkProtocExecPath.get) + ) + } else { + Seq.empty + } + } +} + +object SparkProtobufAssembly { + import BuildCommons.protoVersion + + lazy val settings = Seq( + // For some reason the resolution from the imported Maven build does not work for some + // of these dependendencies that we need to shade later on. + libraryDependencies += "com.google.protobuf" % "protobuf-java" % protoVersion % "protobuf", + + dependencyOverrides += "com.google.protobuf" % "protobuf-java" % protoVersion, (assembly / test) := { }, + (assembly / assemblyJarName) := + s"spark-protobuf-assembly_${scalaBinaryVersion.value}-${version.value}.jar", + (assembly / logLevel) := Level.Info, // Exclude `scala-library` from assembly. @@ -978,7 +1005,8 @@ object SparkProtobuf { }, (assembly / assemblyShadeRules) := Seq( - ShadeRule.rename("com.google.protobuf.**" -> "org.sparkproject.spark_protobuf.protobuf.@1").inAll, + ShadeRule.rename( + "com.google.protobuf.**" -> "org.sparkproject.spark_protobuf.protobuf.@1").inAll, ), (assembly / assemblyMergeStrategy) := { @@ -986,17 +1014,8 @@ object SparkProtobuf { // Drop all proto files that are not needed as artifacts of the build. case m if m.toLowerCase(Locale.ROOT).endsWith(".proto") => MergeStrategy.discard case _ => MergeStrategy.first - }, - ) ++ { - val sparkProtocExecPath = sys.props.get("spark.protoc.executable.path") - if (sparkProtocExecPath.isDefined) { - Seq( - PB.protocExecutable := file(sparkProtocExecPath.get) - ) - } else { - Seq.empty } - } + ) } object Unsafe { @@ -1535,7 +1554,6 @@ object CopyDependencies { // Later, when the dependencies are copied, we manually copy the shaded Jar only. val fid = (LocalProject("connect") / assembly).value val fidClient = (LocalProject("connect-client-jvm") / assembly).value - val fidProtobuf = (LocalProject("protobuf") / assembly).value (Compile / dependencyClasspath).value.map(_.data) .filter { jar => jar.isFile() } @@ -1551,9 +1569,6 @@ object CopyDependencies { } else if (jar.getName.contains("connect-client-jvm") && !SbtPomKeys.profiles.value.contains("noshade-connect-client-jvm")) { Files.copy(fidClient.toPath, destJar.toPath) - } else if (jar.getName.contains("spark-protobuf") && - !SbtPomKeys.profiles.value.contains("noshade-protobuf")) { - Files.copy(fidProtobuf.toPath, destJar.toPath) } else { Files.copy(jar.toPath(), destJar.toPath()) }