stackabletech · adwk67 · May 22, 2024 · May 21, 2024 · May 21, 2024 · May 22, 2024
diff --git a/CHANGELOG.md b/CHANGELOG.md
@@ -6,9 +6,14 @@ All notable changes to this project will be documented in this file.
 
 ### Changed
 
-- Update Rust dependency versions, most notably operator-rs 0.67.1 ([#401])
+- Update Rust dependency versions, most notably operator-rs 0.67.1 ([#401]
+
+### Fixed
+
+- Use actual values of resource CPU settings for pod values, but still rounding up for parallelism ([#408]).
 
 [#401]: https://github.com/stackabletech/spark-k8s-operator/pull/401
+[#408]: https://github.com/stackabletech/spark-k8s-operator/pull/408
 
 ## [24.3.0] - 2024-03-20
 

diff --git a/docs/modules/spark-k8s/pages/usage-guide/resources.adoc b/docs/modules/spark-k8s/pages/usage-guide/resources.adoc
@@ -58,26 +58,32 @@ To illustrate resource configuration consider the use-case where resources are d
 
 === CPU
 
-CPU request and limit will be rounded up to the next integer value, resulting in the following:
+CPU request and limit will be used as defined in the custom resource resulting in the following:
 
 
 |===
-|CRD |Spark conf
+|CRD |spark.kubernetes.{driver/executor} cores|spark.{driver/executor} cores (rounded up)
 
+|1800m
 |1800m
 |2
 
+|100m
 |100m
 |1
 
+|1.5
 |1.5
 |2
 
+|2
 |2
 |2
 |===
 
-Spark allows CPU limits to be set for the driver and executor using Spark settings (`spark.{driver|executor}.cores}`) as well as Kubernetes-specific ones (`spark.kubernetes.{driver,executor}.{request|limit}.cores`). `spark.kubernetes.executor.request.cores` takes precedence over `spark.executor.cores` in determining the pod CPU request, but does not affect task parallelism (the number of tasks an executor can run concurrently), so for this reason `spark.executor.cores` is set to the value of `spark.kubernetes.executor.limit.cores`.
+`spark.kubernetes.{driver,executor}.{request|limit}.cores` determine the actual pod CPU request and are taken directly from the manifest as defined by the user.
+`spark.{driver|executor}.cores}` are set to the rounded(-up) value of the manifest settings.
+Task parallelism (the number of tasks an executor can run concurrently), is determined by `spark.executor.cores`.
 
 === Memory
 

diff --git a/rust/crd/src/lib.rs b/rust/crd/src/lib.rs
@@ -915,15 +915,17 @@ fn resources_to_driver_props(
         ..
     } = &driver_config.resources
     {
-        let min_cores = cores_from_quantity(min.0.clone())?;
-        let max_cores = cores_from_quantity(max.0.clone())?;
-        // will have default value from resources to apply if nothing set specifically
-        props.insert("spark.driver.cores".to_string(), max_cores.clone());
+        let driver_cores = cores_from_quantity(max.0.clone())?;
+        // take rounded value for driver.cores but actual values for the pod
+        props.insert("spark.driver.cores".to_string(), driver_cores.clone());
         props.insert(
             "spark.kubernetes.driver.request.cores".to_string(),
-            min_cores,
+            min.0.clone(),
+        );
+        props.insert(
+            "spark.kubernetes.driver.limit.cores".to_string(),
+            max.0.clone(),
         );
-        props.insert("spark.kubernetes.driver.limit.cores".to_string(), max_cores);
     }
 
     if let Resources {
@@ -955,17 +957,16 @@ fn resources_to_executor_props(
         ..
     } = &executor_config.resources
     {
-        let min_cores = cores_from_quantity(min.0.clone())?;
-        let max_cores = cores_from_quantity(max.0.clone())?;
-        // will have default value from resources to apply if nothing set specifically
-        props.insert("spark.executor.cores".to_string(), max_cores.clone());
+        let executor_cores = cores_from_quantity(max.0.clone())?;
+        // take rounded value for executor.cores (to determine the parallelism) but actual values for the pod
+        props.insert("spark.executor.cores".to_string(), executor_cores.clone());
         props.insert(
             "spark.kubernetes.executor.request.cores".to_string(),
-            min_cores,
+            min.0.clone(),
         );
         props.insert(
             "spark.kubernetes.executor.limit.cores".to_string(),
-            max_cores,
+            max.0.clone(),
         );
     }
 
@@ -1154,7 +1155,7 @@ mod tests {
             ),
             (
                 "spark.kubernetes.driver.request.cores".to_string(),
-                "1".to_string(),
+                "250m".to_string(),
             ),
         ]
         .into_iter()
@@ -1194,7 +1195,7 @@ mod tests {
             ("spark.executor.memory".to_string(), "128m".to_string()), // 128 and not 512 because memory overhead is subtracted
             (
                 "spark.kubernetes.executor.request.cores".to_string(),
-                "1".to_string(),
+                "250m".to_string(),
             ),
             (
                 "spark.kubernetes.executor.limit.cores".to_string(),

diff --git a/tests/templates/kuttl/resources/10-assert.yaml.j2 b/tests/templates/kuttl/resources/10-assert.yaml.j2
@@ -33,10 +33,10 @@ spec:
       resources:
       # these resources are set via Spark submit properties like "spark.driver.cores"
         limits:
-          cpu: "2"
+          cpu: 1200m
           memory: 1Gi
         requests:
-          cpu: "1"
+          cpu: 300m
           memory: 1Gi
 ---
 apiVersion: v1
@@ -55,5 +55,5 @@ spec:
           cpu: "2"
           memory: 1Gi
         requests:
-          cpu: "2"
+          cpu: 1250m
           memory: 1Gi
diff --git a/tests/templates/kuttl/resources/10-deploy-spark-app.yaml.j2 b/tests/templates/kuttl/resources/10-deploy-spark-app.yaml.j2
@@ -36,7 +36,7 @@ spec:
         enableVectorAgent: {{ lookup('env', 'VECTOR_AGGREGATOR') | length > 0 }}
       resources:
         cpu:
-          min: 200m
+          min: 300m
           max: 1200m
         memory:
           limit: 1024Mi