-
Notifications
You must be signed in to change notification settings - Fork 29k
[SPARK-45527][CORE] Use fraction to do the resource calculation #43494
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from 7 commits
ee71e46
2d68843
fbd647d
58899b2
cd1c0ef
51ac5cb
d02a3be
07c42b3
2127a7b
61bcb34
eb7f918
18084e6
e9e7a26
877cacf
8657837
6ca3567
1e590bc
0be3176
3b08d1e
c772b33
c2da6e3
196e11b
7c11b6c
347196f
ab4c48e
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -191,7 +191,10 @@ private[spark] class CoarseGrainedExecutorBackend( | |
| } else { | ||
| val taskDesc = TaskDescription.decode(data.value) | ||
| logInfo("Got assigned task " + taskDesc.taskId) | ||
| taskResources.put(taskDesc.taskId, taskDesc.resources) | ||
| // Convert resources amounts into ResourceInformation | ||
| val resources = taskDesc.resources.map { case (rName, addressesAmounts) => | ||
| rName -> new ResourceInformation(rName, addressesAmounts.keys.toSeq.sorted.toArray)} | ||
| taskResources.put(taskDesc.taskId, resources) | ||
|
||
| executor.launchTask(this, taskDesc) | ||
| } | ||
|
|
||
|
|
@@ -271,7 +274,7 @@ private[spark] class CoarseGrainedExecutorBackend( | |
| } | ||
|
|
||
| override def statusUpdate(taskId: Long, state: TaskState, data: ByteBuffer): Unit = { | ||
| val resources = taskResources.getOrDefault(taskId, Map.empty[String, ResourceInformation]) | ||
| val resources = executor.runningTasks.get(taskId).taskDescription.resources | ||
| val cpus = executor.runningTasks.get(taskId).taskDescription.cpus | ||
| val msg = StatusUpdate(executorId, taskId, state, data, cpus, resources) | ||
| if (TaskState.isFinished(state)) { | ||
|
|
||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -20,6 +20,42 @@ package org.apache.spark.resource | |
| import scala.collection.mutable | ||
|
|
||
| import org.apache.spark.SparkException | ||
| import org.apache.spark.resource.ResourceAmountUtils.RESOURCE_TOTAL_AMOUNT | ||
|
|
||
| private[spark] object ResourceAmountUtils { | ||
| /** | ||
| * Using "double" to do the resource calculation may encounter a problem of precision loss. Eg | ||
| * | ||
| * scala> val taskAmount = 1.0 / 9 | ||
| * taskAmount: Double = 0.1111111111111111 | ||
| * | ||
| * scala> var total = 1.0 | ||
| * total: Double = 1.0 | ||
| * | ||
| * scala> for (i <- 1 to 9 ) { | ||
| * | if (total >= taskAmount) { | ||
| * | total -= taskAmount | ||
| * | println(s"assign $taskAmount for task $i, total left: $total") | ||
| * | } else { | ||
| * | println(s"ERROR Can't assign $taskAmount for task $i, total left: $total") | ||
| * | } | ||
| * | } | ||
| * assign 0.1111111111111111 for task 1, total left: 0.8888888888888888 | ||
| * assign 0.1111111111111111 for task 2, total left: 0.7777777777777777 | ||
| * assign 0.1111111111111111 for task 3, total left: 0.6666666666666665 | ||
| * assign 0.1111111111111111 for task 4, total left: 0.5555555555555554 | ||
| * assign 0.1111111111111111 for task 5, total left: 0.44444444444444425 | ||
| * assign 0.1111111111111111 for task 6, total left: 0.33333333333333315 | ||
| * assign 0.1111111111111111 for task 7, total left: 0.22222222222222204 | ||
| * assign 0.1111111111111111 for task 8, total left: 0.11111111111111094 | ||
| * ERROR Can't assign 0.1111111111111111 for task 9, total left: 0.11111111111111094 | ||
| * | ||
| * So we multiply RESOURCE_TOTAL_AMOUNT to convert the double to long to avoid this limitation. | ||
| * Double can display up to 16 decimal places, so we set the factor to | ||
| * 10, 000, 000, 000, 000, 000L. | ||
| */ | ||
| final val RESOURCE_TOTAL_AMOUNT: Long = 10000000000000000L | ||
|
||
| } | ||
|
|
||
| /** | ||
| * Trait used to help executor/worker allocate resources. | ||
|
|
@@ -29,59 +65,53 @@ private[spark] trait ResourceAllocator { | |
|
|
||
| protected def resourceName: String | ||
| protected def resourceAddresses: Seq[String] | ||
| protected def slotsPerAddress: Int | ||
|
|
||
| /** | ||
| * Map from an address to its availability, a value > 0 means the address is available, | ||
| * while value of 0 means the address is fully assigned. | ||
| * | ||
| * For task resources ([[org.apache.spark.scheduler.ExecutorResourceInfo]]), this value | ||
| * can be a multiple, such that each address can be allocated up to [[slotsPerAddress]] | ||
| * times. | ||
| * Map from an address to its availability default to 1.0 (we multiply RESOURCE_TOTAL_AMOUNT | ||
| * to avoid precision error), a value > 0 means the address is available, while value of | ||
| * 0 means the address is fully assigned. | ||
| */ | ||
| private lazy val addressAvailabilityMap = { | ||
| mutable.HashMap(resourceAddresses.map(_ -> slotsPerAddress): _*) | ||
| mutable.HashMap(resourceAddresses.map(address => address -> RESOURCE_TOTAL_AMOUNT): _*) | ||
| } | ||
|
|
||
| /** | ||
| * Sequence of currently available resource addresses. | ||
| * | ||
| * With [[slotsPerAddress]] greater than 1, [[availableAddrs]] can contain duplicate addresses | ||
| * e.g. with [[slotsPerAddress]] == 2, availableAddrs for addresses 0 and 1 can look like | ||
| * Seq("0", "0", "1"), where address 0 has two assignments available, and 1 has one. | ||
| * Get the amounts of resources that have been multiplied by RESOURCE_TOTAL_AMOUNT. | ||
| * @return the resources amounts | ||
| */ | ||
| def resourcesAmounts: Map[String, Long] = addressAvailabilityMap.toMap | ||
|
|
||
| /** | ||
| * Sequence of currently available resource addresses which are not fully assigned. | ||
| */ | ||
| def availableAddrs: Seq[String] = addressAvailabilityMap | ||
| .flatMap { case (addr, available) => | ||
| (0 until available).map(_ => addr) | ||
| }.toSeq.sorted | ||
| .filter(addresses => addresses._2 > 0).keys.toSeq.sorted | ||
|
|
||
| /** | ||
| * Sequence of currently assigned resource addresses. | ||
tgravescs marked this conversation as resolved.
Outdated
Show resolved
Hide resolved
|
||
| * | ||
| * With [[slotsPerAddress]] greater than 1, [[assignedAddrs]] can contain duplicate addresses | ||
| * e.g. with [[slotsPerAddress]] == 2, assignedAddrs for addresses 0 and 1 can look like | ||
| * Seq("0", "1", "1"), where address 0 was assigned once, and 1 was assigned twice. | ||
| */ | ||
| private[spark] def assignedAddrs: Seq[String] = addressAvailabilityMap | ||
| .flatMap { case (addr, available) => | ||
| (0 until slotsPerAddress - available).map(_ => addr) | ||
| }.toSeq.sorted | ||
| .filter(addresses => addresses._2 < RESOURCE_TOTAL_AMOUNT).keys.toSeq.sorted | ||
|
|
||
| /** | ||
| * Acquire a sequence of resource addresses (to a launched task), these addresses must be | ||
| * available. When the task finishes, it will return the acquired resource addresses. | ||
| * Throw an Exception if an address is not available or doesn't exist. | ||
| */ | ||
| def acquire(addrs: Seq[String]): Unit = { | ||
| addrs.foreach { address => | ||
| val isAvailable = addressAvailabilityMap.getOrElse(address, | ||
| def acquire(addressesAmounts: Map[String, Long]): Unit = { | ||
| addressesAmounts.foreach { case (address, amount) => | ||
| val prevAmount = addressAvailabilityMap.getOrElse(address, | ||
| throw new SparkException(s"Try to acquire an address that doesn't exist. $resourceName " + | ||
| s"address $address doesn't exist.")) | ||
| if (isAvailable > 0) { | ||
| addressAvailabilityMap(address) -= 1 | ||
| s"address $address doesn't exist.")) | ||
|
|
||
| val left = addressAvailabilityMap(address) - amount | ||
tgravescs marked this conversation as resolved.
Outdated
Show resolved
Hide resolved
|
||
|
|
||
| if (left < 0) { | ||
| throw new SparkException(s"Try to acquire $resourceName address $address " + | ||
| s"amount: ${amount.toDouble / RESOURCE_TOTAL_AMOUNT}, but only " + | ||
| s"${prevAmount.toDouble / RESOURCE_TOTAL_AMOUNT} left.") | ||
| } else { | ||
| throw new SparkException("Try to acquire an address that is not available. " + | ||
| s"$resourceName address $address is not available.") | ||
| addressAvailabilityMap(address) = left | ||
| } | ||
| } | ||
| } | ||
|
|
@@ -91,16 +121,21 @@ private[spark] trait ResourceAllocator { | |
| * addresses are released when a task has finished. | ||
| * Throw an Exception if an address is not assigned or doesn't exist. | ||
| */ | ||
| def release(addrs: Seq[String]): Unit = { | ||
| addrs.foreach { address => | ||
| val isAvailable = addressAvailabilityMap.getOrElse(address, | ||
| def release (addressesAmounts: Map[String, Long]): Unit = { | ||
tgravescs marked this conversation as resolved.
Outdated
Show resolved
Hide resolved
|
||
| addressesAmounts.foreach { case (address, amount) => | ||
| val prevAmount = addressAvailabilityMap.getOrElse(address, | ||
| throw new SparkException(s"Try to release an address that doesn't exist. $resourceName " + | ||
| s"address $address doesn't exist.")) | ||
| if (isAvailable < slotsPerAddress) { | ||
| addressAvailabilityMap(address) += 1 | ||
|
|
||
| val total = prevAmount + amount | ||
|
|
||
| if (total > RESOURCE_TOTAL_AMOUNT) { | ||
| throw new SparkException(s"Try to release $resourceName address $address " + | ||
| s"amount: ${amount.toDouble / RESOURCE_TOTAL_AMOUNT}. But the total amount: " + | ||
| s"${total.toDouble / RESOURCE_TOTAL_AMOUNT} " + | ||
| s"after release should be <= 1") | ||
| } else { | ||
| throw new SparkException(s"Try to release an address that is not assigned. $resourceName " + | ||
| s"address $address is not assigned.") | ||
| addressAvailabilityMap(address) = total | ||
| } | ||
| } | ||
| } | ||
|
|
||
Uh oh!
There was an error while loading. Please reload this page.