diff --git a/cmake/llvm-hash.txt b/cmake/llvm-hash.txt
index 46ef2941aa59..4faf2ea91f28 100644
--- a/cmake/llvm-hash.txt
+++ b/cmake/llvm-hash.txt
@@ -1 +1 @@
-979132a02d146ec79e2f046e31877516d7f32d20
+ac5dc54d509169d387fcfd495d71853d81c46484
diff --git a/python/src/llvm.cc b/python/src/llvm.cc
index 2941d87f97c2..fa93102ff7ba 100644
--- a/python/src/llvm.cc
+++ b/python/src/llvm.cc
@@ -133,6 +133,7 @@ createTargetMachine(llvm::Module *module, std::string proc,
   bool disableLLVMOpt = mlir::triton::tools::getBoolEnv("DISABLE_LLVM_OPT");
   if (enable_fp_fusion)
     opt.AllowFPOpFusion = llvm::FPOpFusion::Fast;
+  opt.NoInfsFPMath = false;
   opt.NoNaNsFPMath = true;
   opt.TrapUnreachable = true;
   opt.MCOptions.AsmVerbose = true;
diff --git a/python/test/unit/language/test_core.py b/python/test/unit/language/test_core.py
index 46fcb3d6a2f6..9a70966edfc6 100644
--- a/python/test/unit/language/test_core.py
+++ b/python/test/unit/language/test_core.py
@@ -1382,9 +1382,7 @@ def kernel(X, Z):
     # atom.add.bf16 is unsupported prior to Hopper so instead we generate an
     # atom.cas add loop on Ampere and prior
     if dst_type == 'bfloat16' and torch.cuda.get_device_capability()[0] < 9:
-        assert "atom.relaxed.gpu.global.cas" in h.asm["ptx"]
-        if sem_str != "relaxed":
-            assert "fence.acq_rel.gpu" in h.asm["ptx"]
+        assert f"atom.{sem_str}.gpu.global.cas" in h.asm["ptx"]
         return
 
     assert f"atom.global.gpu.{sem_str}" in h.asm["ptx"]