diff --git a/core/src/main/scala/org/apache/spark/scheduler/DAGScheduler.scala b/core/src/main/scala/org/apache/spark/scheduler/DAGScheduler.scala
index aee92ba928b4..0693b9a00a35 100644
--- a/core/src/main/scala/org/apache/spark/scheduler/DAGScheduler.scala
+++ b/core/src/main/scala/org/apache/spark/scheduler/DAGScheduler.scala
@@ -1554,6 +1554,7 @@ private[spark] class DAGScheduler(
       case sms: ShuffleMapStage if stage.isIndeterminate && !sms.isAvailable =>
         mapOutputTracker.unregisterAllMapAndMergeOutput(sms.shuffleDep.shuffleId)
         sms.shuffleDep.newShuffleMergeState()
+
       case _ =>
     }
 
@@ -1873,15 +1874,28 @@ private[spark] class DAGScheduler(
   private[scheduler] def handleTaskCompletion(event: CompletionEvent): Unit = {
     val task = event.task
     val stageId = task.stageId
+    val stageOption = stageIdToStage.get(task.stageId)
+    val isIndeterministicZombie = event.reason match {
+      case Success if stageOption.isDefined =>
+        val stage = stageOption.get
+        (task.stageAttemptId < stage.latestInfo.attemptNumber()
+          && stage.isIndeterminate) || stage.shouldDiscardResult(task.stageAttemptId)
+
+      case _ => false
+    }
 
     outputCommitCoordinator.taskCompleted(
       stageId,
       task.stageAttemptId,
       task.partitionId,
       event.taskInfo.attemptNumber, // this is a task attempt number
-      event.reason)
+      if (isIndeterministicZombie) {
+        TaskKilled(reason = "Indeterminate stage needs all tasks to be retried")
+      } else {
+        event.reason
+      })
 
-    if (!stageIdToStage.contains(task.stageId)) {
+    if (stageOption.isEmpty) {
       // The stage may have already finished when we get this event -- e.g. maybe it was a
       // speculative task. It is important that we send the TaskEnd event in any case, so listeners
       // are properly notified and can chose to handle it. For instance, some listeners are
@@ -1893,34 +1907,37 @@ private[spark] class DAGScheduler(
       return
     }
 
-    val stage = stageIdToStage(task.stageId)
+    val stage = stageOption.get
 
     // Make sure the task's accumulators are updated before any other processing happens, so that
     // we can post a task end event before any jobs or stages are updated. The accumulators are
     // only updated in certain cases.
     event.reason match {
       case Success =>
-        task match {
-          case rt: ResultTask[_, _] =>
-            val resultStage = stage.asInstanceOf[ResultStage]
-            resultStage.activeJob match {
-              case Some(job) =>
-                // Only update the accumulator once for each result task.
-                if (!job.finished(rt.outputId)) {
-                  updateAccumulators(event)
-                }
-              case None => // Ignore update if task's job has finished.
-            }
-          case _ =>
-            updateAccumulators(event)
+        if (!isIndeterministicZombie) {
+          task match {
+            case rt: ResultTask[_, _] =>
+              val resultStage = stage.asInstanceOf[ResultStage]
+              resultStage.activeJob match {
+                case Some(job) =>
+                  // Only update the accumulator once for each result task.
+                  if (!job.finished(rt.outputId)) {
+                    updateAccumulators(event)
+                  }
+                case _ => // Ignore update if task's job has finished.
+              }
+            case _ => updateAccumulators(event)
+          }
         }
+
       case _: ExceptionFailure | _: TaskKilled => updateAccumulators(event)
+
       case _ =>
     }
     if (trackingCacheVisibility) {
       // Update rdd blocks' visibility status.
       blockManagerMaster.updateRDDBlockVisibility(
-        event.taskInfo.taskId, visible = event.reason == Success)
+        event.taskInfo.taskId, visible = event.reason == Success && !isIndeterministicZombie)
     }
 
     postTaskEnd(event)
@@ -1936,7 +1953,7 @@ private[spark] class DAGScheduler(
         }
 
         task match {
-          case rt: ResultTask[_, _] =>
+          case rt: ResultTask[_, _] if !isIndeterministicZombie =>
             // Cast to ResultStage here because it's part of the ResultTask
             // TODO Refactor this out to a function that accepts a ResultStage
             val resultStage = stage.asInstanceOf[ResultStage]
@@ -1984,7 +2001,7 @@ private[spark] class DAGScheduler(
                 logInfo(log"Ignoring result from ${MDC(RESULT, rt)} because its job has finished")
             }
 
-          case smt: ShuffleMapTask =>
+          case smt: ShuffleMapTask if !isIndeterministicZombie =>
             val shuffleStage = stage.asInstanceOf[ShuffleMapStage]
             // Ignore task completion for old attempt of indeterminate stage
             val ignoreIndeterminate = stage.isIndeterminate &&
@@ -2017,6 +2034,8 @@ private[spark] class DAGScheduler(
                 processShuffleMapStageCompletion(shuffleStage)
               }
             }
+
+          case _ => // ignore
         }
 
       case FetchFailed(bmAddress, shuffleId, _, mapIndex, reduceId, failureMessage) =>
@@ -2121,6 +2140,12 @@ private[spark] class DAGScheduler(
             failedStages += failedStage
             failedStages += mapStage
             if (noResubmitEnqueued) {
+              def generateErrorMessage(stage: Stage): String = {
+                "A shuffle map stage with indeterminate output was failed and retried. " +
+                  s"However, Spark cannot rollback the $stage to re-process the input data, " +
+                  "and has to fail this job. Please eliminate the indeterminacy by " +
+                  "checkpointing the RDD before repartition and try again."
+              }
               // If the map stage is INDETERMINATE, which means the map tasks may return
               // different result when re-try, we need to re-try all the tasks of the failed
               // stage and its succeeding stages, because the input data will be changed after the
@@ -2147,13 +2172,6 @@ private[spark] class DAGScheduler(
                   }
                 }
 
-                def generateErrorMessage(stage: Stage): String = {
-                  "A shuffle map stage with indeterminate output was failed and retried. " +
-                    s"However, Spark cannot rollback the $stage to re-process the input data, " +
-                    "and has to fail this job. Please eliminate the indeterminacy by " +
-                    "checkpointing the RDD before repartition and try again."
-                }
-
                 activeJobs.foreach(job => collectStagesToRollback(job.finalStage :: Nil))
 
                 // The stages will be rolled back after checking
@@ -2171,7 +2189,12 @@ private[spark] class DAGScheduler(
                         abortStage(mapStage, reason, None)
                       } else {
                         rollingBackStages += mapStage
+                        mapOutputTracker.unregisterAllMapAndMergeOutput(
+                          mapStage.shuffleDep.shuffleId)
                       }
+                    } else {
+                      mapOutputTracker.unregisterAllMapAndMergeOutput(
+                        mapStage.shuffleDep.shuffleId)
                     }
 
                   case resultStage: ResultStage if resultStage.activeJob.isDefined =>
@@ -2179,6 +2202,8 @@ private[spark] class DAGScheduler(
                     if (numMissingPartitions < resultStage.numTasks) {
                       // TODO: support to rollback result tasks.
                       abortStage(resultStage, generateErrorMessage(resultStage), None)
+                    } else {
+                      resultStage.markAllPartitionsMissing()
                     }
 
                   case _ =>
@@ -2186,6 +2211,19 @@ private[spark] class DAGScheduler(
                 logInfo(log"The shuffle map stage ${MDC(SHUFFLE_ID, mapStage)} with indeterminate output was failed, " +
                   log"we will roll back and rerun below stages which include itself and all its " +
                   log"indeterminate child stages: ${MDC(STAGES, rollingBackStages)}")
+              } else if (failedStage.isIndeterminate) {
+                failedStage match {
+                  case resultStage: ResultStage if resultStage.activeJob.isDefined =>
+                    val numMissingPartitions = resultStage.findMissingPartitions().length
+                    if (numMissingPartitions < resultStage.numTasks) {
+                      // TODO: support to rollback result tasks.
+                      abortStage(resultStage, generateErrorMessage(resultStage), None)
+                    } else {
+                      resultStage.markAllPartitionsMissing()
+                    }
+
+                  case _ =>
+                }
               }
 
               // We expect one executor failure to trigger many FetchFailures in rapid succession,
diff --git a/core/src/main/scala/org/apache/spark/scheduler/ResultStage.scala b/core/src/main/scala/org/apache/spark/scheduler/ResultStage.scala
index 7fdc3186e86b..92f76f9adec8 100644
--- a/core/src/main/scala/org/apache/spark/scheduler/ResultStage.scala
+++ b/core/src/main/scala/org/apache/spark/scheduler/ResultStage.scala
@@ -38,6 +38,9 @@ private[spark] class ResultStage(
     resourceProfileId: Int)
   extends Stage(id, rdd, partitions.length, parents, firstJobId, callSite, resourceProfileId) {
 
+  @volatile
+  private var discardResultsForAttemptId: Int = -1
+
   /**
    * The active job for this result stage. Will be empty if the job has already finished
    * (e.g., because the job was cancelled).
@@ -54,6 +57,14 @@ private[spark] class ResultStage(
     _activeJob = None
   }
 
+  override def makeNewStageAttempt(
+      numPartitionsToCompute: Int,
+      taskLocalityPreferences: Seq[Seq[TaskLocation]] = Seq.empty): Unit = {
+    super.makeNewStageAttempt(numPartitionsToCompute, taskLocalityPreferences)
+    // clear the attemptId set in the attemptIdAllPartitionsMissing
+    discardResultsForAttemptId = -1
+  }
+
   /**
    * Returns the sequence of partition ids that are missing (i.e. needs to be computed).
    *
@@ -64,5 +75,16 @@ private[spark] class ResultStage(
     (0 until job.numPartitions).filter(id => !job.finished(id))
   }
 
+  def markAllPartitionsMissing(): Unit = {
+    this.discardResultsForAttemptId = this.latestInfo.attemptNumber()
+    val job = activeJob.get
+    for (id <- 0 until job.numPartitions) {
+      job.finished(id) = false
+    }
+  }
+
+  override def shouldDiscardResult(attemptId: Int): Boolean =
+    this.discardResultsForAttemptId >= attemptId
+
   override def toString: String = "ResultStage " + id
 }
diff --git a/core/src/main/scala/org/apache/spark/scheduler/Stage.scala b/core/src/main/scala/org/apache/spark/scheduler/Stage.scala
index f35beafd8748..f8420f45482f 100644
--- a/core/src/main/scala/org/apache/spark/scheduler/Stage.scala
+++ b/core/src/main/scala/org/apache/spark/scheduler/Stage.scala
@@ -131,4 +131,6 @@ private[scheduler] abstract class Stage(
   def isIndeterminate: Boolean = {
     rdd.outputDeterministicLevel == DeterministicLevel.INDETERMINATE
   }
+
+  def shouldDiscardResult(attemptId: Int): Boolean = false
 }
diff --git a/core/src/test/scala/org/apache/spark/scheduler/DAGSchedulerSuite.scala b/core/src/test/scala/org/apache/spark/scheduler/DAGSchedulerSuite.scala
index 3e507df706ba..0f00ce302962 100644
--- a/core/src/test/scala/org/apache/spark/scheduler/DAGSchedulerSuite.scala
+++ b/core/src/test/scala/org/apache/spark/scheduler/DAGSchedulerSuite.scala
@@ -51,7 +51,9 @@ import org.apache.spark.storage.{BlockId, BlockManager, BlockManagerId, BlockMan
 import org.apache.spark.util.{AccumulatorContext, AccumulatorV2, CallSite, Clock, LongAccumulator, SystemClock, ThreadUtils, Utils}
 import org.apache.spark.util.ArrayImplicits._
 
-class DAGSchedulerEventProcessLoopTester(dagScheduler: DAGScheduler)
+class DAGSchedulerEventProcessLoopTester(
+    dagScheduler: DAGScheduler,
+    dagSchedulerInterceptorOpt: Option[DagSchedulerInterceptor] = None)
   extends DAGSchedulerEventProcessLoop(dagScheduler) {
 
   dagScheduler.setEventProcessLoop(this)
@@ -64,12 +66,15 @@ class DAGSchedulerEventProcessLoopTester(dagScheduler: DAGScheduler)
     if (isProcessing) {
       // `DAGSchedulerEventProcessLoop` is guaranteed to process events sequentially. So we should
       // buffer events for sequent processing later instead of processing them recursively.
+      dagSchedulerInterceptorOpt.foreach(_.beforeAddingDagEventToQueue(event))
       eventQueue += event
+      dagSchedulerInterceptorOpt.foreach(_.afterAddingDagEventToQueue(event))
     } else {
       try {
         isProcessing = true
         // Forward event to `onReceive` directly to avoid processing event asynchronously.
         onReceive(event)
+        dagSchedulerInterceptorOpt.foreach(_.afterDirectProcessingOfDagEvent(event))
       } catch {
         case NonFatal(e) => onError(e)
       } finally {
@@ -175,6 +180,12 @@ class DummyScheduledFuture(
 
 class DAGSchedulerSuiteDummyException extends Exception
 
+trait DagSchedulerInterceptor {
+  def beforeAddingDagEventToQueue(event: DAGSchedulerEvent): Unit = {}
+  def afterAddingDagEventToQueue(event: DAGSchedulerEvent): Unit = {}
+  def afterDirectProcessingOfDagEvent(event: DAGSchedulerEvent): Unit = {}
+}
+
 class DAGSchedulerSuite extends SparkFunSuite with TempLocalSparkContext with TimeLimits {
 
   import DAGSchedulerSuite._
@@ -300,6 +311,7 @@ class DAGSchedulerSuite extends SparkFunSuite with TempLocalSparkContext with Ti
   }
 
   var sparkListener: EventInfoRecordingListener = null
+  var dagSchedulerInterceptor: DagSchedulerInterceptor = null
 
   var blockManagerMaster: BlockManagerMaster = null
   var mapOutputTracker: MapOutputTrackerMaster = null
@@ -444,7 +456,8 @@ class DAGSchedulerSuite extends SparkFunSuite with TempLocalSparkContext with Ti
       blockManagerMaster,
       sc.env))
 
-    dagEventProcessLoopTester = new DAGSchedulerEventProcessLoopTester(scheduler)
+    dagEventProcessLoopTester = new DAGSchedulerEventProcessLoopTester(scheduler,
+      Option(dagSchedulerInterceptor))
   }
 
   override def afterEach(): Unit = {
@@ -453,6 +466,7 @@ class DAGSchedulerSuite extends SparkFunSuite with TempLocalSparkContext with Ti
       dagEventProcessLoopTester.stop()
       mapOutputTracker.stop()
       broadcastManager.stop()
+      this.dagSchedulerInterceptor = null
     } finally {
       super.afterEach()
     }
@@ -3153,25 +3167,43 @@ class DAGSchedulerSuite extends SparkFunSuite with TempLocalSparkContext with Ti
       null))
     (shuffleId1, shuffleId2)
   }
+  private def constructTwoIndeterminateStage(): (Int, Int) = constructTwoStages(true, true)
 
-  private def constructTwoIndeterminateStage(): (Int, Int) = {
-    val shuffleMapRdd1 = new MyRDD(sc, 2, Nil, indeterminate = true)
+  private def constructTwoStages(
+      stage1InDeterminate: Boolean,
+      stage2InDeterminate: Boolean,
+      isDependencyBetweenStagesTransitive: Boolean = true): (Int, Int) = {
+    val shuffleMapRdd1 = new MyRDD(sc, 2, Nil, indeterminate = stage1InDeterminate)
 
     val shuffleDep1 = new ShuffleDependency(shuffleMapRdd1, new HashPartitioner(2))
     val shuffleId1 = shuffleDep1.shuffleId
-    val shuffleMapRdd2 = new MyRDD(sc, 2, List(shuffleDep1), tracker = mapOutputTracker,
-      indeterminate = true)
+    val shuffleMapRdd2 = if (isDependencyBetweenStagesTransitive) {
+      new MyRDD(sc, 2, List(shuffleDep1), tracker = mapOutputTracker,
+        indeterminate = stage2InDeterminate)
+    } else {
+      new MyRDD(sc, 2, Nil, tracker = mapOutputTracker, indeterminate = stage2InDeterminate)
+    }
 
     val shuffleDep2 = new ShuffleDependency(shuffleMapRdd2, new HashPartitioner(2))
     val shuffleId2 = shuffleDep2.shuffleId
-    val finalRdd = new MyRDD(sc, 2, List(shuffleDep2), tracker = mapOutputTracker)
+
+    val finalRdd = if (isDependencyBetweenStagesTransitive) {
+      new MyRDD(sc, 2, List(shuffleDep2), tracker = mapOutputTracker)
+    } else {
+      new MyRDD(sc, 2, List(shuffleDep1, shuffleDep2), tracker = mapOutputTracker)
+    }
 
     submit(finalRdd, Array(0, 1))
+    val stageId1 = this.scheduler.shuffleIdToMapStage(shuffleId1).id
 
     // Finish the first shuffle map stage.
-    completeShuffleMapStageSuccessfully(0, 0, 2)
-    assert(mapOutputTracker.findMissingPartitions(shuffleId1) === Some(Seq.empty))
-
+    completeShuffleMapStageSuccessfully(stageId1, 0, 2)
+    import org.scalatest.concurrent.Eventually._
+    import org.scalatest.matchers.should.Matchers._
+    import org.scalatest.time.SpanSugar._
+    eventually(timeout(1.minutes), interval(500.milliseconds)) {
+      mapOutputTracker.findMissingPartitions(shuffleId1) should equal(Some(Nil))
+    }
     (shuffleId1, shuffleId2)
   }
 
@@ -3185,6 +3217,157 @@ class DAGSchedulerSuite extends SparkFunSuite with TempLocalSparkContext with Ti
       "Spark can only do this while using the new shuffle block fetching protocol"))
   }
 
+
+
+  test("SPARK-51272: retry all the partitions of result stage, if the first result task" +
+    " has failed and failing ShuffleMap stage is inDeterminate") {
+    this.dagSchedulerInterceptor = createDagInterceptorForSpark51272(
+      () => taskSets.find(_.shuffleId.isEmpty).get.tasks(1), "RELEASE_LATCH")
+
+    val numPartitions = 2
+    // The first shuffle stage is completed by the below function itself which creates two
+    // stages.
+    val (shuffleId1, shuffleId2) = constructTwoStages(
+      stage1InDeterminate = false,
+      stage2InDeterminate = true,
+      isDependencyBetweenStagesTransitive = false)
+    val shuffleStage1 = this.scheduler.shuffleIdToMapStage(shuffleId1)
+    val shuffleStage2 = this.scheduler.shuffleIdToMapStage(shuffleId2)
+    completeShuffleMapStageSuccessfully(shuffleStage2.id, 0, numPartitions)
+    val resultStage = scheduler.stageIdToStage(2).asInstanceOf[ResultStage]
+    val activeJob = resultStage.activeJob
+    assert(activeJob.isDefined)
+    // The result stage is still waiting for its 2 tasks to complete
+    assert(resultStage.findMissingPartitions() == Seq.tabulate(numPartitions)(i => i))
+
+    // The below event is going to initiate the retry of previous indeterminate stages, and also
+    // the retry of all result tasks. But before the "ResubmitFailedStages" event is added to the
+    // queue of Scheduler, a successful completion of the result partition task is added to the
+    // event queue.  Due to scenario, the bug surfaces where instead of retry of all partitions
+    // of result tasks (2 tasks in total), only some (1 task) get retried
+    runEvent(
+      makeCompletionEvent(
+        taskSets.find(_.stageId == resultStage.id).get.tasks(0),
+        FetchFailed(makeBlockManagerId("hostA"), shuffleId1, 0L, 0, 0, "ignored"),
+        null))
+
+    import org.scalatest.concurrent.Eventually._
+    import org.scalatest.matchers.should.Matchers._
+    import org.scalatest.time.SpanSugar._
+    eventually(timeout(3.minutes), interval(500.milliseconds)) {
+      shuffleStage1.latestInfo.attemptNumber() should equal(1)
+    }
+    completeShuffleMapStageSuccessfully(shuffleStage1.id, 1, numPartitions)
+
+    eventually(timeout(3.minutes), interval(500.milliseconds)) {
+      shuffleStage2.latestInfo.attemptNumber() should equal(1)
+    }
+    completeShuffleMapStageSuccessfully(shuffleStage2.id, 1, numPartitions)
+    eventually(timeout(3.minutes), interval(500.milliseconds)) {
+      resultStage.latestInfo.attemptNumber() should equal(1)
+    }
+    org.scalatest.Assertions.assert(resultStage.latestInfo.numTasks == numPartitions)
+    org.scalatest.Assertions.assert(resultStage.findMissingPartitions().size == numPartitions)
+  }
+
+  test("SPARK-51272: retry all the partitions of result stage, if the first result task" +
+    " has failed with failing ShuffleStage determinate but result stage has another ShuffleStage" +
+    " which is indeterminate") {
+    this.dagSchedulerInterceptor = createDagInterceptorForSpark51272(
+      () => taskSets.find(_.shuffleId.isEmpty).get.tasks(1), "RELEASE_LATCH")
+
+    val numPartitions = 2
+    // The first shuffle stage is completed by the below function itself which creates two
+    // stages.
+    val (detShuffleId1, indetShuffleId2) = constructTwoStages(
+      stage1InDeterminate = false,
+      stage2InDeterminate = true,
+      isDependencyBetweenStagesTransitive = false)
+    val detShuffleStage1 = this.scheduler.shuffleIdToMapStage(detShuffleId1)
+    val inDetshuffleStage2 = this.scheduler.shuffleIdToMapStage(indetShuffleId2)
+    completeShuffleMapStageSuccessfully(inDetshuffleStage2.id, 0, numPartitions)
+    assert(mapOutputTracker.findMissingPartitions(indetShuffleId2) === Some(Seq.empty))
+    val resultStage = scheduler.stageIdToStage(2).asInstanceOf[ResultStage]
+    val activeJob = resultStage.activeJob
+    assert(activeJob.isDefined)
+    // The result stage is still waiting for its 2 tasks to complete
+    assert(resultStage.findMissingPartitions() == Seq.tabulate(numPartitions)(i => i))
+
+
+    // The below event will cause the first task of result stage to fail.
+    // Below scenario should happen if behaving correctly:
+    //    Since the result stage is dependent on two shuffles of which 1 is inDeterminate,
+    //    the retry of the ResultStage should  be for both tasks, even if the failed shuffle stage
+    //    is deterministic, as there is no guarantee at this point, if the indeterminate shuffle
+    //    stage 2 has also failed or not.  If inDeterminate stage too has hypothetically failed for,
+    //    for first result partition1, but successful for result partition2, then re-execution of
+    //    of shuffle stage 2 ( indeterminate) , will cause wrong results. So to avoid this, once
+    //    an inDeterminate Result Stage is being retried, no successful partitions should be
+    //    accepted having stale attempt
+    //
+    runEvent(
+      makeCompletionEvent(
+        taskSets.find(_.shuffleId.isEmpty).get.tasks(0),
+        FetchFailed(makeBlockManagerId("hostA"), detShuffleId1, 0L, 0, 0, "ignored"),
+        null))
+
+    import org.scalatest.concurrent.Eventually._
+    import org.scalatest.matchers.should.Matchers._
+    import org.scalatest.time.SpanSugar._
+    eventually(timeout(3.minutes), interval(500.milliseconds)) {
+      detShuffleStage1.latestInfo.attemptNumber() should equal(1)
+    }
+    completeShuffleMapStageSuccessfully(detShuffleStage1.id, 1, numPartitions)
+
+    // Though the inDetShuffleStage2 has not suffered any loss, but source code of DagScheduler
+    // has code to remove shuffleoutputs based on the lost BlockManager , which in this case will
+    // result in loss of output of shuffle2 also. It looses one partition and hence will be
+    // re-attempted..
+    // But that re-attempt should fetch all partitions!
+    eventually(timeout(3.minutes), interval(500.milliseconds)) {
+      inDetshuffleStage2.latestInfo.attemptNumber() should equal(1)
+    }
+    org.scalatest.Assertions.assert(inDetshuffleStage2.latestInfo.numTasks == 2)
+    org.scalatest.Assertions.assert(inDetshuffleStage2.findMissingPartitions().size == 2)
+    completeShuffleMapStageSuccessfully(inDetshuffleStage2.id, 1, numPartitions)
+    eventually(timeout(3.minutes), interval(500.milliseconds)) {
+      resultStage.latestInfo.attemptNumber() should equal(1)
+    }
+    org.scalatest.Assertions.assert(resultStage.latestInfo.numTasks == numPartitions)
+  }
+
+  test("SPARK-51272: retry all the partitions of Shuffle stage, if any task of ShuffleStage " +
+    " has failed and failing ShuffleMap stage is inDeterminate") {
+    val numPartitions = 2
+    this.dagSchedulerInterceptor = createDagInterceptorForSpark51272(
+      () => taskSets.filter(_.shuffleId.isDefined).maxBy(_.shuffleId.get).tasks(1),
+      makeMapStatus(host = "hostZZZ", reduces = numPartitions))
+    // The first shuffle stage is completed by the below function itself which creates two
+    // indeterminate stages.
+    val (shuffleId1, shuffleId2) = constructTwoStages(
+      stage1InDeterminate = false,
+      stage2InDeterminate = true,
+      isDependencyBetweenStagesTransitive = false
+    )
+    // This will trigger the resubmit failed stage and in before adding resubmit message to the
+    // queue, a successful partition completion event will arrive.
+    runEvent(
+      makeCompletionEvent(
+        taskSets.filter(_.shuffleId.isDefined).maxBy(_.shuffleId.get).tasks(0),
+        FetchFailed(makeBlockManagerId("hostB"), shuffleId2, 0L, 0, 0, "ignored"),
+        null))
+
+    val shuffleStage2 = scheduler.shuffleIdToMapStage(shuffleId2)
+    import org.scalatest.concurrent.Eventually._
+    import org.scalatest.matchers.should.Matchers._
+    import org.scalatest.time.SpanSugar._
+
+    eventually(timeout(30.seconds), interval(500.milliseconds)) {
+      shuffleStage2.latestInfo.attemptNumber() should equal(1)
+    }
+    org.scalatest.Assertions.assert(shuffleStage2.findMissingPartitions().size == numPartitions)
+  }
+
   test("SPARK-25341: retry all the succeeding stages when the map stage is indeterminate") {
     val (shuffleId1, shuffleId2) = constructIndeterminateStageFetchFailed()
 
@@ -3192,9 +3375,10 @@ class DAGSchedulerSuite extends SparkFunSuite with TempLocalSparkContext with Ti
     val failedStages = scheduler.failedStages.toSeq
     assert(failedStages.map(_.id) == Seq(1, 2))
     // Shuffle blocks of "hostC" is lost, so first task of the `shuffleMapRdd2` needs to retry.
+    // As the ShuffleMapStage is inDeterminate all the partitions need to be retried
     assert(failedStages.collect {
       case stage: ShuffleMapStage if stage.shuffleDep.shuffleId == shuffleId2 => stage
-    }.head.findMissingPartitions() == Seq(0))
+    }.head.findMissingPartitions() == Seq(0, 1))
     // The result stage is still waiting for its 2 tasks to complete
     assert(failedStages.collect {
       case stage: ResultStage => stage
@@ -4163,9 +4347,10 @@ class DAGSchedulerSuite extends SparkFunSuite with TempLocalSparkContext with Ti
     val failedStages = scheduler.failedStages.toSeq
     assert(failedStages.map(_.id) == Seq(1, 2))
     // Shuffle blocks of "hostC" is lost, so first task of the `shuffleMapRdd2` needs to retry.
+    // As the ShuffleMapStage is inDeterminate all the partitions need to be retried
     assert(failedStages.collect {
       case stage: ShuffleMapStage if stage.shuffleDep.shuffleId == shuffleId2 => stage
-    }.head.findMissingPartitions() == Seq(0))
+    }.head.findMissingPartitions() == Seq(0, 1))
     // The result stage is still waiting for its 2 tasks to complete
     assert(failedStages.collect {
       case stage: ResultStage => stage
@@ -5135,6 +5320,54 @@ class DAGSchedulerSuite extends SparkFunSuite with TempLocalSparkContext with Ti
     }
     CompletionEvent(task, reason, result, accumUpdates ++ extraAccumUpdates, metricPeaks, taskInfo)
   }
+
+  private def createDagInterceptorForSpark51272(latchReleaseTask: () => Task[_], taskResult: Any):
+  DagSchedulerInterceptor = {
+    new DagSchedulerInterceptor {
+      val latch = new CountDownLatch(1)
+      override def beforeAddingDagEventToQueue(event: DAGSchedulerEvent): Unit = {
+        event match {
+          case ResubmitFailedStages =>
+            // Before the ResubmitFailedStages is added to the queue, add the successful
+            // partition task completion.
+            runEvent(makeCompletionEvent(latchReleaseTask(), Success, taskResult))
+
+          case _ =>
+        }
+      }
+
+      override def afterAddingDagEventToQueue(event: DAGSchedulerEvent): Unit = {
+        event match {
+          case CompletionEvent(_, reason, result, _, _, _) =>
+            reason match {
+              case Success if result == taskResult => latch.countDown()
+
+              case _ =>
+            }
+
+          case _ =>
+        }
+      }
+
+      override def afterDirectProcessingOfDagEvent(event: DAGSchedulerEvent): Unit = {
+        event match {
+          case CompletionEvent(_, reason, _, _, _, _) =>
+            reason match {
+              case FetchFailed(_, _, _, _, _, _) =>
+                // Do not allow this thread to exit, till spurious sucessfull task
+                // ( latchRelease task gets in the queue). This would ensure that
+                // ResubmitFailedStages task will always be processed after the spurious task
+                // is processed.
+                latch.await(50, TimeUnit.SECONDS)
+
+              case _ =>
+            }
+
+          case _ =>
+        }
+      }
+    }
+  }
 }
 
 class DAGSchedulerAbortStageOffSuite extends DAGSchedulerSuite {
diff --git a/resource-managers/yarn/pom.xml b/resource-managers/yarn/pom.xml
index 0629c24c56dc..5b825dbe103e 100644
--- a/resource-managers/yarn/pom.xml
+++ b/resource-managers/yarn/pom.xml
@@ -37,6 +37,12 @@
       <artifactId>spark-core_${scala.binary.version}</artifactId>
       <version>${project.version}</version>
     </dependency>
+    <dependency>
+      <groupId>org.apache.spark</groupId>
+      <artifactId>spark-sql_${scala.binary.version}</artifactId>
+      <version>${project.version}</version>
+      <scope>test</scope>
+    </dependency>
     <dependency>
       <groupId>org.apache.spark</groupId>
       <artifactId>spark-network-yarn_${scala.binary.version}</artifactId>
diff --git a/resource-managers/yarn/src/test/scala/org/apache/spark/deploy/yarn/BaseYarnClusterSuite.scala b/resource-managers/yarn/src/test/scala/org/apache/spark/deploy/yarn/BaseYarnClusterSuite.scala
index 3bf6a6e84a88..7d1f4d7a989a 100644
--- a/resource-managers/yarn/src/test/scala/org/apache/spark/deploy/yarn/BaseYarnClusterSuite.scala
+++ b/resource-managers/yarn/src/test/scala/org/apache/spark/deploy/yarn/BaseYarnClusterSuite.scala
@@ -167,7 +167,9 @@ abstract class BaseYarnClusterSuite extends SparkFunSuite with Matchers {
       extraJars: Seq[String] = Nil,
       extraConf: Map[String, String] = Map(),
       extraEnv: Map[String, String] = Map(),
-      outFile: Option[File] = None): SparkAppHandle.State = {
+      outFile: Option[File] = None,
+      testTimeOutParams: TimeoutParams = TimeoutParams.DEFAULT
+  ): SparkAppHandle.State = {
     val deployMode = if (clientMode) "client" else "cluster"
     val propsFile = createConfFile(extraClassPath = extraClassPath, extraConf = extraConf)
     val env = Map(
@@ -181,10 +183,12 @@ abstract class BaseYarnClusterSuite extends SparkFunSuite with Matchers {
       launcher.setMainClass(klass)
       launcher.setAppResource(fakeSparkJar.getAbsolutePath())
     }
+
+    val numExecsOpt = extraConf.get(EXECUTOR_INSTANCES.key)
     launcher.setSparkHome(sys.props("spark.test.home"))
       .setMaster("yarn")
       .setDeployMode(deployMode)
-      .setConf(EXECUTOR_INSTANCES.key, "1")
+      .setConf(EXECUTOR_INSTANCES.key, numExecsOpt.getOrElse("1"))
       .setConf(SparkLauncher.DRIVER_DEFAULT_JAVA_OPTIONS,
         s"-Djava.net.preferIPv6Addresses=${Utils.preferIPv6}")
       .setPropertiesFile(propsFile)
@@ -210,7 +214,8 @@ abstract class BaseYarnClusterSuite extends SparkFunSuite with Matchers {
 
     val handle = launcher.startApplication()
     try {
-      eventually(timeout(3.minutes), interval(1.second)) {
+      eventually(timeout(testTimeOutParams.testTimeOut),
+        interval(testTimeOutParams.timeOutIntervalCheck)) {
         assert(handle.getState().isFinal())
       }
     } finally {
@@ -295,3 +300,9 @@ abstract class BaseYarnClusterSuite extends SparkFunSuite with Matchers {
   }
 
 }
+
+case class TimeoutParams(testTimeOut: Duration, timeOutIntervalCheck: Duration)
+
+object TimeoutParams {
+  val DEFAULT = TimeoutParams(3.minutes, 1.seconds)
+}
diff --git a/resource-managers/yarn/src/test/scala/org/apache/spark/deploy/yarn/SparkHASuite.scala b/resource-managers/yarn/src/test/scala/org/apache/spark/deploy/yarn/SparkHASuite.scala
new file mode 100644
index 000000000000..4081d48fead8
--- /dev/null
+++ b/resource-managers/yarn/src/test/scala/org/apache/spark/deploy/yarn/SparkHASuite.scala
@@ -0,0 +1,275 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.deploy.yarn
+
+import java.io.File
+import java.nio.charset.StandardCharsets
+
+import scala.concurrent.duration._
+
+import com.google.common.io.Files
+import org.apache.hadoop.yarn.conf.YarnConfiguration
+import org.scalatest.concurrent.Eventually._
+
+import org.apache.spark.{SparkContext, SparkException}
+import org.apache.spark.internal.Logging
+import org.apache.spark.internal.config.{DRIVER_MEMORY, EXECUTOR_CORES, EXECUTOR_INSTANCES, EXECUTOR_MEMORY}
+import org.apache.spark.scheduler.{SparkListener, SparkListenerJobStart, SparkListenerStageCompleted, SparkListenerStageSubmitted}
+import org.apache.spark.sql.{DataFrame, Encoders, SparkSession}
+import org.apache.spark.sql.functions._
+import org.apache.spark.sql.internal.SQLConf
+import org.apache.spark.sql.types.IntegerType
+import org.apache.spark.tags.ExtendedYarnTest
+
+
+@ExtendedYarnTest
+class SparkHASuite extends BaseYarnClusterSuite {
+  override def newYarnConfig(): YarnConfiguration = new YarnConfiguration()
+  test("bug SPARK-51016 and SPARK-51272: Indeterminate stage retry giving wrong results") {
+    testBasicYarnApp(
+      Map(
+        SQLConf.ADAPTIVE_EXECUTION_ENABLED.key -> "false",
+        SQLConf.SHUFFLE_PARTITIONS.key -> "2",
+        "spark.task.maxFailures" -> "8",
+        "spark.network.timeout" -> "100000s",
+        "spark.shuffle.sort.bypassMergeThreshold" -> "1",
+        "spark.sql.files.maxPartitionNum" -> "2",
+        "spark.sql.files.minPartitionNum" -> "2",
+        DRIVER_MEMORY.key -> "512m",
+        EXECUTOR_CORES.key -> "1",
+        EXECUTOR_MEMORY.key -> "512m",
+        EXECUTOR_INSTANCES.key -> "2",
+        "spark.ui.enabled" -> "false",
+        "spark.yarn.max.executor.failures" -> "100000"
+      ))
+  }
+
+  private def testBasicYarnApp(conf: Map[String, String] = Map()): Unit = {
+    val result = File.createTempFile("result", null, tempDir)
+    val finalState = runSpark(
+      clientMode = true,
+      mainClassName(SparkHASuite.getClass),
+      appArgs = Seq(result.getAbsolutePath),
+      extraConf = conf,
+      testTimeOutParams = TimeoutParams(30.minutes, 30.seconds))
+    checkResult(finalState, result)
+  }
+}
+
+private object SparkHASuite extends Logging {
+
+  object Counter {
+    var counter = 0
+    var retVal = 12
+
+    def getHash(): Int = this.synchronized {
+      counter += 1
+      val x = retVal
+      if (counter % 6 == 0) {
+        retVal += 1
+      }
+      x
+    }
+  }
+
+  private def getOuterJoinDF(spark: SparkSession) = {
+    import org.apache.spark.sql.functions.udf
+    val myudf = udf(() => Counter.getHash()).asNondeterministic()
+    spark.udf.register("myudf", myudf.asNondeterministic())
+
+    val leftOuter = spark.table("outer").select(
+      col("strleft"), when(isnull(col("pkLeftt")), myudf().
+        cast(IntegerType)).
+        otherwise(col("pkLeftt")).as("pkLeft"))
+
+    val innerRight = spark.table("inner")
+
+    val outerjoin = leftOuter.hint("SHUFFLE_HASH").
+      join(innerRight, col("pkLeft") === col("pkRight"), "left_outer")
+    outerjoin
+  }
+
+  def createBaseTables(spark: SparkSession): Unit = {
+    spark.sql("drop table if exists outer ")
+    spark.sql("drop table if exists inner ")
+    val data = Seq(
+      (java.lang.Integer.valueOf(0), "aa"),
+      (java.lang.Integer.valueOf(1), "aa"),
+      (java.lang.Integer.valueOf(1), "aa"),
+      (java.lang.Integer.valueOf(0), "aa"),
+      (java.lang.Integer.valueOf(0), "aa"),
+      (java.lang.Integer.valueOf(0), "aa"),
+      (null, "bb"),
+      (null, "bb"),
+      (null, "bb"),
+      (null, "bb"),
+      (null, "bb"),
+      (null, "bb")
+    )
+    val data1 = Seq(
+      (java.lang.Integer.valueOf(0), "bb"),
+      (java.lang.Integer.valueOf(1), "bb"))
+    val outerDf = spark.createDataset(data)(
+      Encoders.tuple(Encoders.INT, Encoders.STRING)).toDF("pkLeftt", "strleft")
+    this.logInfo("saving outer table")
+    outerDf.write.format("parquet").partitionBy("strleft").saveAsTable("outer")
+    val innerDf = spark.createDataset(data1)(
+      Encoders.tuple(Encoders.INT, Encoders.STRING)).toDF("pkRight", "strright")
+    this.logInfo("saving inner table")
+    innerDf.write.format("parquet").partitionBy("strright").saveAsTable("inner")
+  }
+
+  def main(args: Array[String]): Unit = {
+    val spark = SparkSession
+      .builder()
+      .appName("Spark51016Suite")
+      .config("spark.extraListeners", classOf[JobListener].getName)
+      .getOrCreate()
+    val sc = SparkContext.getOrCreate()
+
+    val status = new File(args(0))
+    var result = "failure"
+    try {
+      createBaseTables(spark)
+      val outerjoin: DataFrame = getOuterJoinDF(spark)
+      val correctRows = outerjoin.collect()
+      JobListener.inKillMode = true
+      JobListener.killWhen = KillPosition.KILL_IN_STAGE_SUBMISSION
+      for (i <- 0 until 100) {
+        if (i > 49) {
+          JobListener.killWhen = KillPosition.KILL_IN_STAGE_COMPLETION
+        }
+        try {
+          eventually(timeout(3.minutes), interval(100.milliseconds)) {
+            assert(sc.getExecutorIds().size == 2)
+          }
+          val rowsAfterRetry = getOuterJoinDF(spark).collect()
+          if (correctRows.length != rowsAfterRetry.length) {
+            logInfo(s"encounterted test failure incorrect query result. run  index = $i ")
+          }
+          assert(correctRows.length == rowsAfterRetry.length,
+            s"correct rows length = ${correctRows.length}," +
+            s" retry rows length = ${rowsAfterRetry.length}")
+          val retriedResults = rowsAfterRetry.toBuffer
+          correctRows.foreach(r => {
+            val index = retriedResults.indexWhere(x =>
+
+              r.getString(0) == x.getString(0) &&
+                (
+                  (r.getInt(1) < 2 && r.getInt(1) == x.getInt(1) && r.getInt(2) == x.getInt(2) &&
+                    r.getString(3) == x.getString(3))
+                    ||
+                    (r.isNullAt(2) && r.isNullAt(3) && x.isNullAt(3)
+                      && x.isNullAt(2))
+
+                  ))
+            assert(index >= 0)
+            retriedResults.remove(index)
+          })
+          assert(retriedResults.isEmpty)
+          logInfo(s"found successful query exec on  iter index = $i")
+        } catch {
+          case se: SparkException if se.getMessage.contains("Please eliminate the" +
+            " indeterminacy by checkpointing the RDD before repartition and try again") =>
+            logInfo(s"correctly encountered exception on iter index = $i")
+          // OK expected
+        }
+      }
+      result = "success"
+    } finally {
+      Files.asCharSink(status, StandardCharsets.UTF_8).write(result)
+      sc.stop()
+    }
+  }
+}
+
+object PIDGetter extends Logging {
+  def getExecutorPIds: Seq[Int] = {
+    import scala.sys.process._
+    val output = Seq("ps", "-ef").#|(Seq("grep", "java")).#|(Seq("grep", "executor-id ")).lazyLines
+    logInfo(s"pids obtained = ${output.mkString("\n")} ")
+    if (output.nonEmpty && output.size == 4) {
+      val execPidsStr = output.map(_.trim).filter(_.endsWith("--resourceProfileId 0"))
+      logInfo(s"filtered Pid String obtained = ${execPidsStr.mkString("\n")} ")
+      val pids = execPidsStr.map(str => str.split(" ")(1).toInt).sorted
+      Seq(pids.head, pids(1))
+    } else {
+      Seq.empty
+    }
+  }
+
+  def killExecutor(pid: Int): Unit = {
+    import scala.sys.process._
+    Seq("kill", "-9", pid.toString).!
+
+  }
+
+  def main(args: Array[String]): Unit = {
+    getExecutorPIds
+  }
+}
+
+private[spark] class JobListener extends SparkListener with Logging {
+  private var count: Int = 0
+  @volatile
+  private var pidToKill: Option[Int] = None
+
+  override def onJobStart(jobStart: SparkListenerJobStart): Unit = {
+    if (JobListener.inKillMode) {
+      val execids = PIDGetter.getExecutorPIds
+      assert(execids.size == 2)
+      pidToKill = Option(execids(count % 2))
+      logInfo("Pid to kill = " + pidToKill)
+      count += 1
+    }
+  }
+
+  override def onStageSubmitted(stageSubmitted: SparkListenerStageSubmitted): Unit = {
+    if (stageSubmitted.stageInfo.shuffleDepId.isEmpty && pidToKill.nonEmpty &&
+      JobListener.killWhen == KillPosition.KILL_IN_STAGE_SUBMISSION) {
+      val pid = pidToKill.get
+      pidToKill = None
+      logInfo(s"killing executor for pid = $pid")
+      PIDGetter.killExecutor(pid)
+    }
+  }
+
+  override def onStageCompleted(stageCompleted: SparkListenerStageCompleted): Unit = {
+    if (stageCompleted.stageInfo.shuffleDepId.exists(_ % 2 == count % 2) && pidToKill.nonEmpty &&
+      JobListener.killWhen == KillPosition.KILL_IN_STAGE_COMPLETION) {
+      val pid = pidToKill.get
+      pidToKill = None
+      logInfo(s"killing executor for pid = $pid")
+      PIDGetter.killExecutor(pid)
+    }
+  }
+}
+
+object KillPosition extends Enumeration {
+  type KillPosition = Value
+  val KILL_IN_STAGE_SUBMISSION, KILL_IN_STAGE_COMPLETION, NONE = Value
+}
+
+object JobListener {
+  @volatile
+  var inKillMode: Boolean = false
+
+  import KillPosition._
+  @volatile
+  var killWhen: KillPosition = NONE
+}
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/Expression.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/Expression.scala
index 7d4f8c3b2564..3a5beb2362af 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/Expression.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/Expression.scala
@@ -115,6 +115,23 @@ abstract class Expression extends TreeNode[Expression] {
    */
   lazy val deterministic: Boolean = children.forall(_.deterministic)
 
+  /**
+   * The information conveyed by this method hasIndeterminism differs from that conveyed
+   * by [[deterministic]] in the way that an [[Attribute]] representing an [[Expression]] having
+   * [[deterministic]] flag false, would have its [[deterministic]] flag true, but it would still
+   * have [[hasIndeterminism]] as true. Because the Attribute's evaluation represents a quantity
+   * which constitutes inDeterminism. Contrasted with [[deterministic]] flag which is always true
+   * for Leaf Expressions like [[AttributeReference]], [[hasIndeterminism]] carries information
+   * about the nature of the evaluated value, represented by the [[Expression]]
+   * @return Boolean true if the expression's evaluated value is a result of some indeterministic
+   *         quantity.
+   */
+  def hasIndeterminism: Boolean = _hasIndeterminism
+
+  @transient
+  private lazy val _hasIndeterminism: Boolean = !deterministic ||
+    this.references.exists(_.hasIndeterminism)
+
   def nullable: Boolean
 
   /**
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/ProjectionOverSchema.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/ProjectionOverSchema.scala
index bb67c173b946..ea092093d095 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/ProjectionOverSchema.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/ProjectionOverSchema.scala
@@ -38,7 +38,7 @@ case class ProjectionOverSchema(schema: StructType, output: AttributeSet) {
   private def getProjection(expr: Expression): Option[Expression] =
     expr match {
       case a: AttributeReference if fieldNames.contains(a.name) && output.contains(a) =>
-        Some(a.copy(dataType = schema(a.name).dataType)(a.exprId, a.qualifier))
+        Some(a.copy(dataType = schema(a.name).dataType)(a.exprId, a.qualifier, a.hasIndeterminism))
       case GetArrayItem(child, arrayItemOrdinal, failOnError) =>
         getProjection(child).map {
           projection => GetArrayItem(projection, arrayItemOrdinal, failOnError)
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/namedExpressions.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/namedExpressions.scala
index 2af6a1ba84ec..f6b8a96dca97 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/namedExpressions.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/namedExpressions.scala
@@ -111,6 +111,7 @@ abstract class Attribute extends LeafExpression with NamedExpression {
   @transient
   override lazy val references: AttributeSet = AttributeSet(this)
 
+  override def hasIndeterminism: Boolean = false
   def withNullability(newNullability: Boolean): Attribute
   def withQualifier(newQualifier: Seq[String]): Attribute
   def withName(newName: String): Attribute
@@ -194,7 +195,8 @@ case class Alias(child: Expression, name: String)(
 
   override def toAttribute: Attribute = {
     if (resolved) {
-      AttributeReference(name, child.dataType, child.nullable, metadata)(exprId, qualifier)
+      AttributeReference(name, child.dataType, child.nullable, metadata)(exprId, qualifier,
+        this.hasIndeterminism)
     } else {
       UnresolvedAttribute.quoted(name)
     }
@@ -274,7 +276,8 @@ case class AttributeReference(
     nullable: Boolean = true,
     override val metadata: Metadata = Metadata.empty)(
     val exprId: ExprId = NamedExpression.newExprId,
-    val qualifier: Seq[String] = Seq.empty[String])
+    val qualifier: Seq[String] = Seq.empty[String],
+    override val hasIndeterminism: Boolean = false)
   extends Attribute with Unevaluable {
 
   override lazy val treePatternBits: BitSet = AttributeReferenceTreeBits.bits
@@ -312,7 +315,8 @@ case class AttributeReference(
   }
 
   override def newInstance(): AttributeReference =
-    AttributeReference(name, dataType, nullable, metadata)(qualifier = qualifier)
+    AttributeReference(name, dataType, nullable, metadata)(qualifier = qualifier,
+      hasIndeterminism = hasIndeterminism)
 
   /**
    * Returns a copy of this [[AttributeReference]] with changed nullability.
@@ -321,7 +325,8 @@ case class AttributeReference(
     if (nullable == newNullability) {
       this
     } else {
-      AttributeReference(name, dataType, newNullability, metadata)(exprId, qualifier)
+      AttributeReference(name, dataType, newNullability, metadata)(exprId, qualifier,
+        hasIndeterminism)
     }
   }
 
@@ -329,7 +334,7 @@ case class AttributeReference(
     if (name == newName) {
       this
     } else {
-      AttributeReference(newName, dataType, nullable, metadata)(exprId, qualifier)
+      AttributeReference(newName, dataType, nullable, metadata)(exprId, qualifier, hasIndeterminism)
     }
   }
 
@@ -340,7 +345,7 @@ case class AttributeReference(
     if (newQualifier == qualifier) {
       this
     } else {
-      AttributeReference(name, dataType, nullable, metadata)(exprId, newQualifier)
+      AttributeReference(name, dataType, nullable, metadata)(exprId, newQualifier, hasIndeterminism)
     }
   }
 
@@ -348,12 +353,12 @@ case class AttributeReference(
     if (exprId == newExprId) {
       this
     } else {
-      AttributeReference(name, dataType, nullable, metadata)(newExprId, qualifier)
+      AttributeReference(name, dataType, nullable, metadata)(newExprId, qualifier, hasIndeterminism)
     }
   }
 
   override def withMetadata(newMetadata: Metadata): AttributeReference = {
-    AttributeReference(name, dataType, nullable, newMetadata)(exprId, qualifier)
+    AttributeReference(name, dataType, nullable, newMetadata)(exprId, qualifier, hasIndeterminism)
   }
 
   override def withDataType(newType: DataType): AttributeReference = {
@@ -361,7 +366,7 @@ case class AttributeReference(
   }
 
   override protected final def otherCopyArgs: Seq[AnyRef] = {
-    exprId :: qualifier :: Nil
+    exprId :: qualifier :: Boolean.box(hasIndeterminism) :: Nil
   }
 
   /** Used to signal the column used to calculate an eventTime watermark (e.g. a#1-T{delayMs}) */
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/ExpressionEvalHelper.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/ExpressionEvalHelper.scala
index 184f5a2a9485..d8da04ef6456 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/ExpressionEvalHelper.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/ExpressionEvalHelper.scala
@@ -76,7 +76,7 @@ trait ExpressionEvalHelper extends ScalaCheckDrivenPropertyChecks with PlanTestB
     case _ => expr.mapChildren(replace)
   }
 
-  private def prepareEvaluation(expression: Expression): Expression = {
+  private[expressions] def prepareEvaluation(expression: Expression): Expression = {
     val serializer = new JavaSerializer(new SparkConf()).newInstance()
     val resolver = ResolveTimeZone
     val expr = replace(resolver.resolveTimeZones(expression))
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/NondeterministicSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/NondeterministicSuite.scala
index bf1c930c0bd0..29f600347ea0 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/NondeterministicSuite.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/NondeterministicSuite.scala
@@ -18,6 +18,7 @@
 package org.apache.spark.sql.catalyst.expressions
 
 import org.apache.spark.SparkFunSuite
+import org.apache.spark.sql.catalyst.plans.physical.{HashPartitioning, KeyGroupedPartitioning, RangePartitioning}
 
 class NondeterministicSuite extends SparkFunSuite with ExpressionEvalHelper {
   test("MonotonicallyIncreasingID") {
@@ -31,4 +32,45 @@ class NondeterministicSuite extends SparkFunSuite with ExpressionEvalHelper {
   test("InputFileName") {
     checkEvaluation(InputFileName(), "")
   }
+
+  test("SPARK-51016: has Indeterministic Component") {
+    def assertIndeterminancyComponent(expression: Expression): Unit =
+      assert(prepareEvaluation(expression).hasIndeterminism)
+
+    assertIndeterminancyComponent(MonotonicallyIncreasingID())
+    val alias = Alias(Multiply(MonotonicallyIncreasingID(), Literal(100L)), "al1")()
+    assertIndeterminancyComponent(alias)
+    // For the attribute created from an Alias with deterministic flag false, the attribute would
+    // carry forward that information from Alias, via the hasIndeterminism flag value being true.
+    assertIndeterminancyComponent(alias.toAttribute)
+    // But the Attribute's deterministic flag would be true ( implying it does not carry forward
+    // that inDeterministic nature of evaluated quantity which Attribute represents)
+    assert(prepareEvaluation(alias.toAttribute).deterministic)
+
+    assertIndeterminancyComponent(Multiply(alias.toAttribute, Literal(1000L)))
+    assertIndeterminancyComponent(
+      HashPartitioning(Seq(Multiply(MonotonicallyIncreasingID(), Literal(100L))), 5))
+    assertIndeterminancyComponent(HashPartitioning(Seq(alias.toAttribute), 5))
+    assertIndeterminancyComponent(
+      RangePartitioning(Seq(SortOrder.apply(alias.toAttribute, Descending)), 5))
+    assertIndeterminancyComponent(KeyGroupedPartitioning(Seq(alias.toAttribute), 5))
+  }
+
+  test("SPARK-51016: has Deterministic Component") {
+    def assertNoIndeterminancyComponent(expression: Expression): Unit =
+      assert(!prepareEvaluation(expression).hasIndeterminism)
+
+    assertNoIndeterminancyComponent(Literal(1000L))
+    val alias = Alias(Multiply(Literal(10000L), Literal(100L)), "al1")()
+    assertNoIndeterminancyComponent(alias)
+    assertNoIndeterminancyComponent(alias.toAttribute)
+    assertNoIndeterminancyComponent(
+      HashPartitioning(Seq(Multiply(Literal(10L), Literal(100L))), 5))
+    assertNoIndeterminancyComponent(HashPartitioning(Seq(alias.toAttribute), 5))
+    assertNoIndeterminancyComponent(
+      RangePartitioning(Seq(SortOrder.apply(alias.toAttribute, Descending)), 5))
+    assertNoIndeterminancyComponent(KeyGroupedPartitioning(Seq(alias.toAttribute), 5))
+  }
+
+
 }
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/plans/PlanTest.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/plans/PlanTest.scala
index f06e6ed137cc..062a27075185 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/plans/PlanTest.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/plans/PlanTest.scala
@@ -69,7 +69,8 @@ trait PlanTestBase extends PredicateHelper with SQLHelper { self: Suite =>
   protected def rewriteNameFromAttrNullability(plan: LogicalPlan): LogicalPlan = {
     plan.transformAllExpressions {
       case a @ AttributeReference(name, _, false, _) =>
-        a.copy(name = s"*$name")(exprId = a.exprId, qualifier = a.qualifier)
+        a.copy(name = s"*$name")(exprId = a.exprId, qualifier = a.qualifier,
+          hasIndeterminism = a.hasIndeterminism)
     }
   }
 
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/exchange/ShuffleExchangeExec.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/exchange/ShuffleExchangeExec.scala
index 31a3f53eb719..2b68b2feb07e 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/exchange/ShuffleExchangeExec.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/exchange/ShuffleExchangeExec.scala
@@ -22,15 +22,16 @@ import java.util.function.Supplier
 
 import scala.collection.mutable
 import scala.concurrent.{ExecutionContext, Future, Promise}
+import scala.reflect.ClassTag
 
 import org.apache.spark._
 import org.apache.spark.internal.config
-import org.apache.spark.rdd.{RDD, RDDOperationScope}
+import org.apache.spark.rdd.{DeterministicLevel, MapPartitionsRDD, RDD, RDDOperationScope}
 import org.apache.spark.serializer.Serializer
 import org.apache.spark.shuffle.{ShuffleWriteMetricsReporter, ShuffleWriteProcessor}
 import org.apache.spark.shuffle.sort.SortShuffleManager
 import org.apache.spark.sql.catalyst.InternalRow
-import org.apache.spark.sql.catalyst.expressions.{Attribute, BoundReference, UnsafeProjection, UnsafeRow}
+import org.apache.spark.sql.catalyst.expressions.{Attribute, BoundReference, Expression, UnsafeProjection, UnsafeRow}
 import org.apache.spark.sql.catalyst.expressions.BindReferences.bindReferences
 import org.apache.spark.sql.catalyst.expressions.codegen.LazilyGeneratedOrdering
 import org.apache.spark.sql.catalyst.plans.logical.Statistics
@@ -43,6 +44,7 @@ import org.apache.spark.util.{MutablePair, ThreadUtils}
 import org.apache.spark.util.collection.unsafe.sort.{PrefixComparators, RecordComparator}
 import org.apache.spark.util.random.XORShiftRandom
 
+
 /**
  * Common trait for all shuffle exchange implementations to facilitate pattern matching.
  */
@@ -452,19 +454,34 @@ object ShuffleExchangeExec {
         rdd
       }
 
+      val isIndeterministic = newPartitioning match {
+        case expr: Expression => expr.hasIndeterminism
+        case _ => false
+      }
+
       // round-robin function is order sensitive if we don't sort the input.
       val isOrderSensitive = isRoundRobin && !SQLConf.get.sortBeforeRepartition
       if (needToCopyObjectsBeforeShuffle(part)) {
-        newRdd.mapPartitionsWithIndexInternal((_, iter) => {
-          val getPartitionKey = getPartitionKeyExtractor()
-          iter.map { row => (part.getPartition(getPartitionKey(row)), row.copy()) }
-        }, isOrderSensitive = isOrderSensitive)
+        this.createRddWithPartition(
+          newRdd,
+          (_, iter: Iterator[InternalRow]) => {
+            val getPartitionKey = getPartitionKeyExtractor()
+            iter.map { row => (part.getPartition(getPartitionKey(row)), row.copy()) }
+          },
+          isIndeterministic,
+          isOrderSensitive
+        )
+
       } else {
-        newRdd.mapPartitionsWithIndexInternal((_, iter) => {
-          val getPartitionKey = getPartitionKeyExtractor()
-          val mutablePair = new MutablePair[Int, InternalRow]()
-          iter.map { row => mutablePair.update(part.getPartition(getPartitionKey(row)), row) }
-        }, isOrderSensitive = isOrderSensitive)
+        this.createRddWithPartition(
+          newRdd,
+          (_, iter: Iterator[InternalRow]) => {
+            val getPartitionKey = getPartitionKeyExtractor()
+            val mutablePair = new MutablePair[Int, InternalRow]()
+            iter.map { row => mutablePair.update(part.getPartition(getPartitionKey(row)), row) }
+          },
+          isIndeterministic,
+          isOrderSensitive)
       }
     }
 
@@ -481,6 +498,25 @@ object ShuffleExchangeExec {
     dependency
   }
 
+  private  def createRddWithPartition[T: ClassTag, U: ClassTag](
+     rdd: RDD[T],
+     f: (Int, Iterator[T]) => Iterator[U],
+     isInDeterminate: Boolean,
+     isOrderSensitive: Boolean): RDD[U] = if (isInDeterminate) {
+      RDDOperationScope.withScope(rdd.sparkContext) {
+        new MapPartitionsRDD(
+          rdd,
+          (_: TaskContext, index: Int, iter: Iterator[T]) => f(index, iter),
+          isOrderSensitive = isOrderSensitive) {
+            override protected def getOutputDeterministicLevel =
+              DeterministicLevel.INDETERMINATE
+          }
+        }
+      } else {
+        rdd.mapPartitionsWithIndexInternal(f, isOrderSensitive = isOrderSensitive)
+      }
+
+
   /**
    * Create a customized [[ShuffleWriteProcessor]] for SQL which wrap the default metrics reporter
    * with [[SQLShuffleWriteMetricsReporter]] as new reporter for [[ShuffleWriteProcessor]].
diff --git a/sql/core/src/test/scala/org/apache/spark/scheduler/ShuffleMapStageTest.scala b/sql/core/src/test/scala/org/apache/spark/scheduler/ShuffleMapStageTest.scala
new file mode 100644
index 000000000000..4733ae38d918
--- /dev/null
+++ b/sql/core/src/test/scala/org/apache/spark/scheduler/ShuffleMapStageTest.scala
@@ -0,0 +1,61 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.scheduler
+
+import org.apache.spark.sql.Encoders
+import org.apache.spark.sql.catalyst.expressions.Literal
+import org.apache.spark.sql.functions._
+import org.apache.spark.sql.internal.SQLConf
+import org.apache.spark.sql.test.SharedSparkSession
+import org.apache.spark.sql.types.LongType
+
+class ShuffleMapStageTest extends SharedSparkSession {
+
+  test("SPARK-51016: ShuffleMapStage using indeterministic join keys should be INDETERMINATE") {
+    withSQLConf(SQLConf.ADAPTIVE_EXECUTION_ENABLED.key -> "false") {
+      val leftDfBase = spark.createDataset(
+        Seq((1L, "aa")))(
+        Encoders.tuple(Encoders.scalaLong, Encoders.STRING)).toDF("pkLeftt", "strleft")
+
+      val rightDf = spark.createDataset(
+        Seq((1L, "11"), (2L, "22")))(
+        Encoders.tuple(Encoders.scalaLong, Encoders.STRING)).toDF("pkRight", "strright")
+
+      val leftDf = leftDfBase.select(
+        col("strleft"), when(isnull(col("pkLeftt")), floor(rand() * Literal(10000000L)).
+          cast(LongType)).
+          otherwise(col("pkLeftt")).as("pkLeft"))
+
+      val join = leftDf.hint("shuffle_hash").
+        join(rightDf, col("pkLeft") === col("pkRight"), "inner")
+      val shuffleStages: Array[ShuffleMapStage] = Array.ofDim(2)
+      spark.sparkContext.addSparkListener(new SparkListener() {
+        var i = 0
+        override def onStageSubmitted(stageSubmitted: SparkListenerStageSubmitted): Unit = {
+          if (stageSubmitted.stageInfo.shuffleDepId.isDefined) {
+            shuffleStages(i) =
+              spark.sparkContext.dagScheduler.shuffleIdToMapStage(stageSubmitted.stageInfo.stageId)
+            i +=1
+          }
+        }
+      });
+      join.collect()
+      assert(shuffleStages.filter(_.isIndeterminate).size == 1)
+    }
+  }
+}
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/ProjectedOrderingAndPartitioningSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/ProjectedOrderingAndPartitioningSuite.scala
index ec13d48d45f8..cf339559a169 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/ProjectedOrderingAndPartitioningSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/ProjectedOrderingAndPartitioningSuite.scala
@@ -17,14 +17,17 @@
 
 package org.apache.spark.sql.execution
 
-import org.apache.spark.rdd.RDD
+import org.apache.spark.rdd.{DeterministicLevel, RDD}
+import org.apache.spark.sql.Encoders
 import org.apache.spark.sql.catalyst.InternalRow
-import org.apache.spark.sql.catalyst.expressions.{Alias, Attribute, AttributeReference}
+import org.apache.spark.sql.catalyst.expressions.{Alias, Attribute, AttributeReference, Literal}
 import org.apache.spark.sql.catalyst.plans.physical.{HashPartitioning, Partitioning, PartitioningCollection, UnknownPartitioning}
 import org.apache.spark.sql.execution.adaptive.AdaptiveSparkPlanHelper
+import org.apache.spark.sql.execution.joins.ShuffledHashJoinExec
+import org.apache.spark.sql.functions.{col, floor, isnull, rand, when}
 import org.apache.spark.sql.internal.SQLConf
 import org.apache.spark.sql.test.SharedSparkSession
-import org.apache.spark.sql.types.StringType
+import org.apache.spark.sql.types.{LongType, StringType}
 
 class ProjectedOrderingAndPartitioningSuite
   extends SharedSparkSession with AdaptiveSparkPlanHelper {
@@ -210,6 +213,37 @@ class ProjectedOrderingAndPartitioningSuite
     assert(outputOrdering.head.child.asInstanceOf[Attribute].name == "a")
     assert(outputOrdering.head.sameOrderExpressions.size == 0)
   }
+
+  test("SPARK-51016: ShuffleRDD using indeterministic join keys should be INDETERMINATE") {
+    withSQLConf(SQLConf.ADAPTIVE_EXECUTION_ENABLED.key -> "false") {
+      val leftDfBase = spark.createDataset(
+        Seq((1L, "aa"), (null, "aa"), (2L, "bb"), (null, "bb"), (3L, "cc"), (null, "cc")))(
+        Encoders.tupleEncoder(Encoders.LONG, Encoders.STRING)).toDF("pkLeftt", "strleft")
+
+      val rightDf = spark.createDataset(
+        Seq((1L, "11"), (2L, "22"), (3L, "33")))(
+        Encoders.tupleEncoder(Encoders.LONG, Encoders.STRING)).toDF("pkRight", "strright")
+
+      val leftDf = leftDfBase.select(
+        col("strleft"), when(isnull(col("pkLeftt")), floor(rand() * Literal(10000000L)).
+          cast(LongType)).
+          otherwise(col("pkLeftt")).as("pkLeft"))
+
+      val join = leftDf.hint("shuffle_hash").
+        join(rightDf, col("pkLeft") === col("pkRight"), "inner")
+
+      join.collect()
+      val finalPlan = join.queryExecution.executedPlan
+      val shuffleHJExec = finalPlan.children(0).asInstanceOf[ShuffledHashJoinExec]
+      assert(shuffleHJExec.left.asInstanceOf[InputAdapter].execute().outputDeterministicLevel ==
+        DeterministicLevel.INDETERMINATE)
+
+      assert(shuffleHJExec.right.asInstanceOf[InputAdapter].execute().outputDeterministicLevel ==
+        DeterministicLevel.UNORDERED)
+
+      assert(shuffleHJExec.execute().outputDeterministicLevel == DeterministicLevel.INDETERMINATE)
+    }
+  }
 }
 
 private case class DummyLeafPlanExec(output: Seq[Attribute]) extends LeafExecNode {