diff --git a/compiler/src/iree/compiler/Codegen/Common/GPU/GPUTensorTile.cpp b/compiler/src/iree/compiler/Codegen/Common/GPU/GPUTensorTile.cpp
index 086f1a4555db..a7f88042d3df 100644
--- a/compiler/src/iree/compiler/Codegen/Common/GPU/GPUTensorTile.cpp
+++ b/compiler/src/iree/compiler/Codegen/Common/GPU/GPUTensorTile.cpp
@@ -144,6 +144,9 @@ class TileConsumerAndFuseInputProducer final
       auto tilingOp = sliceOp.getSource().getDefiningOp<TilingInterface>();
       if (!tilingOp)
         continue;
+      if (isa<tensor::PadOp>(sliceOp.getSource().getDefiningOp())) {
+        continue;
+      }
       // Restrict to fully parallel ops for now for simplicity.
       auto isParallel = [](utils::IteratorType it) {
         return linalg::isParallelIterator(it);
diff --git a/compiler/src/iree/compiler/Codegen/Common/GPU/GPUTile.cpp b/compiler/src/iree/compiler/Codegen/Common/GPU/GPUTile.cpp
index 6380fa0fba7c..b8019278836d 100644
--- a/compiler/src/iree/compiler/Codegen/Common/GPU/GPUTile.cpp
+++ b/compiler/src/iree/compiler/Codegen/Common/GPU/GPUTile.cpp
@@ -117,11 +117,18 @@ static LogicalResult tileAndDistributeToThreads(TilingInterface consumerOp,
   IRRewriter rewriter(context);
   SmallVector<OpFoldResult> tileSizesOfr =
       getAsIndexOpFoldResult(context, tileSizes);
+  scf::SCFTilingOptions tilingOptions;
+  tilingOptions.setTileSizes(tileSizesOfr);
+  scf::SCFTileAndFuseOptions tileAndFuseOptions;
+  tileAndFuseOptions.setTilingOptions(tilingOptions);
+  tileAndFuseOptions.setFusionControlFn(
+      [](tensor::ExtractSliceOp sliceOp, OpResult origProducer,
+         bool isDestinationOperand) -> std::tuple<bool, bool> {
+        return {!isa<tensor::PadOp>(origProducer.getOwner()), false};
+      });
   FailureOr<scf::SCFTileAndFuseResult> tileAndFuseResult =
-      scf::tileConsumerAndFuseProducersUsingSCF(
-          rewriter, consumerOp,
-          scf::SCFTileAndFuseOptions().setTilingOptions(
-              scf::SCFTilingOptions().setTileSizes(tileSizesOfr)));
+      scf::tileConsumerAndFuseProducersUsingSCF(rewriter, consumerOp,
+                                                tileAndFuseOptions);
 
   if (failed(tileAndFuseResult)) {
     return consumerOp.emitOpError("failed tiling and fusing producers");
diff --git a/compiler/src/iree/compiler/Codegen/LLVMCPU/DispatchABI.cpp b/compiler/src/iree/compiler/Codegen/LLVMCPU/DispatchABI.cpp
index 154ab37ecc03..659949ab9d06 100644
--- a/compiler/src/iree/compiler/Codegen/LLVMCPU/DispatchABI.cpp
+++ b/compiler/src/iree/compiler/Codegen/LLVMCPU/DispatchABI.cpp
@@ -546,7 +546,7 @@ HALDispatchABI::buildScopeAttr(mlir::ModuleOp moduleOp,
                                      /*scopeline=*/1,
                                      LLVM::DISubprogramFlags::Definition |
                                          LLVM::DISubprogramFlags::Optimized,
-                                     subroutineTypeAttr);
+                                     subroutineTypeAttr, /*retainedNodes =*/{});
 }
 
 // Returns the most local DISubprogramAttr starting from |forOp|.
diff --git a/compiler/src/iree/compiler/Codegen/LLVMCPU/LLVMCPUTileAndFuse.cpp b/compiler/src/iree/compiler/Codegen/LLVMCPU/LLVMCPUTileAndFuse.cpp
index 40c8314ba7f5..ce7e3b7d5fc6 100644
--- a/compiler/src/iree/compiler/Codegen/LLVMCPU/LLVMCPUTileAndFuse.cpp
+++ b/compiler/src/iree/compiler/Codegen/LLVMCPU/LLVMCPUTileAndFuse.cpp
@@ -183,6 +183,9 @@ LogicalResult applyTileAndFuse(RewriterBase &rewriter, Operation *rootOp,
     // Traverse the slices in BFS fashion.
     tensor::ExtractSliceOp candidateSliceOp = candidates.front();
     candidates.pop_front();
+    if (candidateSliceOp.getSource().getDefiningOp<tensor::PadOp>()) {
+      continue;
+    }
 
     // Materialize the slice of the producer in place.
     std::optional<scf::SCFFuseProducerOfSliceResult> fusedProducer =
diff --git a/third_party/llvm-project b/third_party/llvm-project
index 085448c918aa..f6935c777f67 160000
--- a/third_party/llvm-project
+++ b/third_party/llvm-project
@@ -1 +1 @@
-Subproject commit 085448c918aa3b730cdd3e497892cfeff0ed60a6
+Subproject commit f6935c777f675490ecb2327887dbac5c7d7fce1f