-
Notifications
You must be signed in to change notification settings - Fork 29k
[SPARK-28042][K8S] Support using volume mount as local storage #24879
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from 7 commits
6fca505
ef66c87
5610fe4
f071c2c
45c8bc5
9392dad
6e5fcf6
2abb8e9
c29dd7a
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -18,7 +18,8 @@ package org.apache.spark.deploy.k8s.features | |
|
|
||
| import java.util.UUID | ||
|
|
||
| import io.fabric8.kubernetes.api.model.{ContainerBuilder, HasMetadata, PodBuilder, VolumeBuilder, VolumeMountBuilder} | ||
| import collection.JavaConverters._ | ||
|
||
| import io.fabric8.kubernetes.api.model._ | ||
|
|
||
| import org.apache.spark.deploy.k8s.{KubernetesConf, SparkPod} | ||
| import org.apache.spark.deploy.k8s.Config._ | ||
|
|
@@ -40,24 +41,33 @@ private[spark] class LocalDirsFeatureStep( | |
| private val useLocalDirTmpFs = conf.get(KUBERNETES_LOCAL_DIRS_TMPFS) | ||
|
|
||
| override def configurePod(pod: SparkPod): SparkPod = { | ||
| val localDirVolumes = resolvedLocalDirs | ||
| .zipWithIndex | ||
| .map { case (localDir, index) => | ||
| new VolumeBuilder() | ||
| .withName(s"spark-local-dir-${index + 1}") | ||
| .withNewEmptyDir() | ||
| .withMedium(if (useLocalDirTmpFs) "Memory" else null) | ||
| .endEmptyDir() | ||
| .build() | ||
| } | ||
| val localDirVolumeMounts = localDirVolumes | ||
| .zip(resolvedLocalDirs) | ||
| .map { case (localDirVolume, localDirPath) => | ||
| new VolumeMountBuilder() | ||
| .withName(localDirVolume.getName) | ||
| .withMountPath(localDirPath) | ||
| .build() | ||
| } | ||
| var localDirs = findLocalDirVolumeMount(pod) | ||
| var localDirVolumes : Seq[Volume] = Seq() | ||
| var localDirVolumeMounts : Seq[VolumeMount] = Seq() | ||
|
|
||
| if (localDirs.isEmpty) { | ||
| localDirs = resolvedLocalDirs.toSeq | ||
|
||
| localDirVolumes = resolvedLocalDirs | ||
| .zipWithIndex | ||
| .map { case (_, index) => | ||
| new VolumeBuilder() | ||
| .withName(s"spark-local-dir-${index + 1}") | ||
| .withNewEmptyDir() | ||
| .withMedium(if (useLocalDirTmpFs) "Memory" else null) | ||
| .endEmptyDir() | ||
| .build() | ||
| } | ||
|
|
||
| localDirVolumeMounts = localDirVolumes | ||
| .zip(resolvedLocalDirs) | ||
| .map { case (localDirVolume, localDirPath) => | ||
| new VolumeMountBuilder() | ||
| .withName(localDirVolume.getName) | ||
| .withMountPath(localDirPath) | ||
| .build() | ||
| } | ||
| } | ||
|
|
||
| val podWithLocalDirVolumes = new PodBuilder(pod.pod) | ||
| .editSpec() | ||
| .addToVolumes(localDirVolumes: _*) | ||
|
|
@@ -66,10 +76,22 @@ private[spark] class LocalDirsFeatureStep( | |
| val containerWithLocalDirVolumeMounts = new ContainerBuilder(pod.container) | ||
| .addNewEnv() | ||
| .withName("SPARK_LOCAL_DIRS") | ||
| .withValue(resolvedLocalDirs.mkString(",")) | ||
| .withValue(localDirs.mkString(",")) | ||
| .endEnv() | ||
| .addToVolumeMounts(localDirVolumeMounts: _*) | ||
| .build() | ||
| SparkPod(podWithLocalDirVolumes, containerWithLocalDirVolumeMounts) | ||
| } | ||
|
|
||
| def findLocalDirVolumeMount(pod: SparkPod): Seq[String] = { | ||
|
||
| val localDirVolumes = pod.pod.getSpec.getVolumes.asScala | ||
|
||
| .filter(v => v.getName.startsWith("spark-local-dir-")) | ||
|
||
|
|
||
| localDirVolumes.map { volume => pod.container.getVolumeMounts.asScala | ||
|
||
| .find(m => m.getName.equals(volume.getName)) match { | ||
|
||
| case Some(m) => m.getMountPath | ||
| case _ => "" | ||
| } | ||
| }.filter(s => s.length > 0) | ||
|
||
| } | ||
| } | ||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -43,12 +43,12 @@ private[spark] class KubernetesDriverBuilder { | |
| new DriverServiceFeatureStep(conf), | ||
| new MountSecretsFeatureStep(conf), | ||
| new EnvSecretsFeatureStep(conf), | ||
| new LocalDirsFeatureStep(conf), | ||
| new MountVolumesFeatureStep(conf), | ||
| new DriverCommandFeatureStep(conf), | ||
| new HadoopConfDriverFeatureStep(conf), | ||
| new KerberosConfDriverFeatureStep(conf), | ||
| new PodTemplateConfigMapStep(conf)) | ||
| new PodTemplateConfigMapStep(conf), | ||
| new LocalDirsFeatureStep(conf)) | ||
|
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. why not move this right after
Contributor
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I think some volume mounts maybe setup in PodTemplateConfigMapStep, so I move to last to ensure that.
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Yep, since the intent is for users to provide custom local dir volumes via either mount volumes or pod templates it needs to appear after both
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Either is fine, just wanted to point out that the pod template is initialized before any step is executed (i.e. |
||
|
|
||
| val spec = KubernetesDriverSpec( | ||
| initialPod, | ||
|
|
||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -19,9 +19,8 @@ package org.apache.spark.deploy.k8s.features | |
| import io.fabric8.kubernetes.api.model.{EnvVarBuilder, VolumeBuilder, VolumeMountBuilder} | ||
|
|
||
| import org.apache.spark.{SparkConf, SparkFunSuite} | ||
| import org.apache.spark.deploy.k8s.{KubernetesTestConf, SparkPod} | ||
| import org.apache.spark.deploy.k8s.{KubernetesHostPathVolumeConf, KubernetesTestConf, KubernetesVolumeSpec, SparkPod} | ||
|
||
| import org.apache.spark.deploy.k8s.Config._ | ||
| import org.apache.spark.deploy.k8s.submit.JavaMainAppResource | ||
| import org.apache.spark.util.SparkConfWithEnv | ||
|
|
||
| class LocalDirsFeatureStepSuite extends SparkFunSuite { | ||
|
|
@@ -116,4 +115,33 @@ class LocalDirsFeatureStepSuite extends SparkFunSuite { | |
| .withValue(defaultLocalDir) | ||
| .build()) | ||
| } | ||
|
|
||
| test("local dir on mounted volume") { | ||
dongjoon-hyun marked this conversation as resolved.
Show resolved
Hide resolved
|
||
| val volumeConf = KubernetesVolumeSpec( | ||
| "spark-local-dir-test", | ||
| "/tmp", | ||
| "", | ||
| false, | ||
| KubernetesHostPathVolumeConf("/hostPath/tmp") | ||
| ) | ||
| val mountVolumeConf = KubernetesTestConf.createDriverConf(volumes = Seq(volumeConf)) | ||
| val mountVolumeStep = new MountVolumesFeatureStep(mountVolumeConf) | ||
| val configuredPod = mountVolumeStep.configurePod(SparkPod.initialPod()) | ||
|
|
||
| val sparkConf = new SparkConfWithEnv(Map()) | ||
| val localDirConf = KubernetesTestConf.createDriverConf(sparkConf) | ||
|
||
| val localDirStep = new LocalDirsFeatureStep(localDirConf, defaultLocalDir) | ||
| val newConfiguredPod = localDirStep.configurePod(configuredPod) | ||
|
|
||
| assert(newConfiguredPod.pod.getSpec.getVolumes.size() === 1) | ||
| assert(newConfiguredPod.pod.getSpec.getVolumes.get(0).getHostPath.getPath === "/hostPath/tmp") | ||
| assert(newConfiguredPod.container.getVolumeMounts.size() === 1) | ||
| assert(newConfiguredPod.container.getVolumeMounts.get(0).getMountPath === "/tmp") | ||
| assert(newConfiguredPod.container.getVolumeMounts.get(0).getName === "spark-local-dir-test") | ||
| assert(newConfiguredPod.container.getEnv.get(0) === | ||
| new EnvVarBuilder() | ||
| .withName("SPARK_LOCAL_DIRS") | ||
| .withValue("/tmp") | ||
| .build()) | ||
| } | ||
| } | ||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
"If no volume is set..."
Should mention the config (
spark.local.dir) which is preferred over the env variable.