diff --git a/connector/protobuf-assembly/pom.xml b/connector/protobuf-assembly/pom.xml
new file mode 100644
index 000000000000..8fcd904443f2
--- /dev/null
+++ b/connector/protobuf-assembly/pom.xml
@@ -0,0 +1,90 @@
+
+
+
+
+ 4.0.0
+
+ org.apache.spark
+ spark-parent_2.12
+ 4.0.0-SNAPSHOT
+ ../../pom.xml
+
+
+ spark-protobuf-assembly_2.12
+
+ protobuf-assembly
+
+ jar
+ Spark Protobuf Assembly
+ https://spark.apache.org/
+
+
+
+ org.apache.spark
+ spark-protobuf_${scala.binary.version}
+ ${project.version}
+
+
+ com.google.protobuf
+ protobuf-java
+ compile
+
+
+ com.google.protobuf
+ protobuf-java-util
+ compile
+
+
+
+ target/scala-${scala.binary.version}/classes
+ target/scala-${scala.binary.version}/test-classes
+
+
+ org.apache.maven.plugins
+ maven-shade-plugin
+
+ false
+ false
+
+
+ org.apache.spark:spark-protobuf_${scala.binary.version}
+ com.google.protobuf:*
+
+
+
+
+ com.google.protobuf
+ ${spark.shade.packageName}.spark_protobuf.protobuf
+
+ com.google.protobuf.**
+
+
+
+
+
+ *:*
+
+ google/protobuf/**
+
+
+
+
+
+
+
+
diff --git a/connector/protobuf/pom.xml b/connector/protobuf/pom.xml
index 26b5b601bccd..72e0a5a34d9f 100644
--- a/connector/protobuf/pom.xml
+++ b/connector/protobuf/pom.xml
@@ -90,38 +90,6 @@
target/scala-${scala.binary.version}/classes
target/scala-${scala.binary.version}/test-classes
-
-
- org.apache.maven.plugins
- maven-shade-plugin
-
- false
- false
-
-
- com.google.protobuf:*
-
-
-
-
- com.google.protobuf
- ${spark.shade.packageName}.spark_protobuf.protobuf
-
- com.google.protobuf.**
-
-
-
-
-
- *:*
-
- google/protobuf/**
-
-
-
-
-
-
diff --git a/pom.xml b/pom.xml
index 76e3596edd43..c86a9a41c782 100644
--- a/pom.xml
+++ b/pom.xml
@@ -106,6 +106,7 @@
connector/connect/common
connector/connect/client/jvm
connector/protobuf
+ connector/protobuf-assembly
diff --git a/project/SparkBuild.scala b/project/SparkBuild.scala
index bd65d3c4bd4a..49e5e5f14784 100644
--- a/project/SparkBuild.scala
+++ b/project/SparkBuild.scala
@@ -45,8 +45,10 @@ object BuildCommons {
private val buildLocation = file(".").getAbsoluteFile.getParentFile
- val sqlProjects@Seq(catalyst, sql, hive, hiveThriftServer, tokenProviderKafka010, sqlKafka010, avro, protobuf) = Seq(
- "catalyst", "sql", "hive", "hive-thriftserver", "token-provider-kafka-0-10", "sql-kafka-0-10", "avro", "protobuf"
+ val sqlProjects@Seq(catalyst, sql, hive, hiveThriftServer,
+ tokenProviderKafka010, sqlKafka010, avro, protobuf, protobufAssembly) = Seq(
+ "catalyst", "sql", "hive", "hive-thriftserver", "token-provider-kafka-0-10", "sql-kafka-0-10", "avro",
+ "protobuf", "protobuf-assembly"
).map(ProjectRef(buildLocation, _))
val streamingProjects@Seq(streaming, streamingKafka010) =
@@ -414,7 +416,7 @@ object SparkBuild extends PomBuild {
Seq(
spark, hive, hiveThriftServer, repl, networkCommon, networkShuffle, networkYarn,
unsafe, tags, tokenProviderKafka010, sqlKafka010, connectCommon, connect, connectClient,
- commonUtils, sqlApi
+ commonUtils, sqlApi, protobufAssembly
).contains(x)
}
@@ -463,6 +465,7 @@ object SparkBuild extends PomBuild {
/* Protobuf settings */
enable(SparkProtobuf.settings)(protobuf)
+ enable(SparkProtobufAssembly.settings)(protobufAssembly)
// SPARK-14738 - Remove docker tests from main Spark build
// enable(DockerIntegrationTests.settings)(dockerIntegrationTests)
@@ -957,10 +960,34 @@ object SparkProtobuf {
PB.gens.descriptorSet -> target.value / "generated-test-sources/descriptor-set-sbt.desc",
// The above creates single descriptor file with all the proto files. This is different from
// Maven, which create one descriptor file for each proto file.
- ),
+ )
+ ) ++ {
+ val sparkProtocExecPath = sys.props.get("spark.protoc.executable.path")
+ if (sparkProtocExecPath.isDefined) {
+ Seq(
+ PB.protocExecutable := file(sparkProtocExecPath.get)
+ )
+ } else {
+ Seq.empty
+ }
+ }
+}
+
+object SparkProtobufAssembly {
+ import BuildCommons.protoVersion
+
+ lazy val settings = Seq(
+ // For some reason the resolution from the imported Maven build does not work for some
+ // of these dependendencies that we need to shade later on.
+ libraryDependencies += "com.google.protobuf" % "protobuf-java" % protoVersion % "protobuf",
+
+ dependencyOverrides += "com.google.protobuf" % "protobuf-java" % protoVersion,
(assembly / test) := { },
+ (assembly / assemblyJarName) :=
+ s"spark-protobuf-assembly_${scalaBinaryVersion.value}-${version.value}.jar",
+
(assembly / logLevel) := Level.Info,
// Exclude `scala-library` from assembly.
@@ -978,7 +1005,8 @@ object SparkProtobuf {
},
(assembly / assemblyShadeRules) := Seq(
- ShadeRule.rename("com.google.protobuf.**" -> "org.sparkproject.spark_protobuf.protobuf.@1").inAll,
+ ShadeRule.rename(
+ "com.google.protobuf.**" -> "org.sparkproject.spark_protobuf.protobuf.@1").inAll,
),
(assembly / assemblyMergeStrategy) := {
@@ -986,17 +1014,8 @@ object SparkProtobuf {
// Drop all proto files that are not needed as artifacts of the build.
case m if m.toLowerCase(Locale.ROOT).endsWith(".proto") => MergeStrategy.discard
case _ => MergeStrategy.first
- },
- ) ++ {
- val sparkProtocExecPath = sys.props.get("spark.protoc.executable.path")
- if (sparkProtocExecPath.isDefined) {
- Seq(
- PB.protocExecutable := file(sparkProtocExecPath.get)
- )
- } else {
- Seq.empty
}
- }
+ )
}
object Unsafe {
@@ -1535,7 +1554,6 @@ object CopyDependencies {
// Later, when the dependencies are copied, we manually copy the shaded Jar only.
val fid = (LocalProject("connect") / assembly).value
val fidClient = (LocalProject("connect-client-jvm") / assembly).value
- val fidProtobuf = (LocalProject("protobuf") / assembly).value
(Compile / dependencyClasspath).value.map(_.data)
.filter { jar => jar.isFile() }
@@ -1551,9 +1569,6 @@ object CopyDependencies {
} else if (jar.getName.contains("connect-client-jvm") &&
!SbtPomKeys.profiles.value.contains("noshade-connect-client-jvm")) {
Files.copy(fidClient.toPath, destJar.toPath)
- } else if (jar.getName.contains("spark-protobuf") &&
- !SbtPomKeys.profiles.value.contains("noshade-protobuf")) {
- Files.copy(fidProtobuf.toPath, destJar.toPath)
} else {
Files.copy(jar.toPath(), destJar.toPath())
}