[AMDGPU] Return two MMOs for load-to-lds and store-from-lds intrinsics by nhaehnle · Pull Request #175845 · llvm/llvm-project

nhaehnle · 2026-01-13T21:46:42Z

Accurately represent both the load and the store part of those
intrinsics.

The test changes seem to be mostly fairly insignificant changes caused by
subtly different scheduler behavior.

Stack:

⚠️ Part of a stack created by spr. Merging this PR using the GitHub UI may have unexpected results.

llvmbot · 2026-01-13T21:47:19Z

@llvm/pr-subscribers-backend-amdgpu

Author: Nicolai Hähnle (nhaehnle)

Changes

Accurately represent both the load and the store part of those
intrinsics.

The test changes seem to be mostly fairly insignificant changes caused by
subtly different scheduler behavior.

Stack:

[4/4] #175846
[3/4] #175845 ⬅
[2/4] #175844
[1/4] #175843

⚠️ Part of a stack created by spr. Merging this PR using the GitHub UI may have unexpected results.

Full diff: https://github.com/llvm/llvm-project/pull/175845.diff

4 Files Affected:

(modified) llvm/lib/Target/AMDGPU/SIISelLowering.cpp (+56-51)
(modified) llvm/test/CodeGen/AMDGPU/llvm.amdgcn.load.to.lds.ll (+2-4)
(modified) llvm/test/CodeGen/AMDGPU/memory-legalizer-lds-dma-volatile-and-nontemporal.ll (+2-4)
(modified) llvm/test/CodeGen/AMDGPU/waitcnt-unscoped.ll (+3-4)

diff --git a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp
index cc239752cd453..5224f2f093755 100644
--- a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp
+++ b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp
@@ -1440,8 +1440,23 @@ void SITargetLowering::getTgtMemIntrinsic(SmallVectorImpl<IntrinsicInfo> &Infos,
       case Intrinsic::amdgcn_struct_buffer_load_lds:
       case Intrinsic::amdgcn_struct_ptr_buffer_load_lds: {
         unsigned Width = cast<ConstantInt>(CI.getArgOperand(2))->getZExtValue();
+
+        // Entry 0: Load from buffer.
+        // Don't set an offset, since the pointer value always represents the
+        // base of the buffer.
         Info.memVT = EVT::getIntegerVT(CI.getContext(), Width * 8);
-        Info.ptrVal = CI.getArgOperand(1);
+        Info.flags &= ~MachineMemOperand::MOStore;
+        Infos.push_back(Info);
+
+        // Entry 1: Store to LDS.
+        // Instruction offset is applied, and an additional per-lane offset
+        // which we simulate using a larger memory type.
+        Info.memVT = EVT::getIntegerVT(CI.getContext(), Width * 8 * 64);
+        Info.ptrVal = CI.getArgOperand(1); // LDS destination pointer
+        Info.offset = cast<ConstantInt>(CI.getArgOperand(CI.arg_size() - 2))->getZExtValue();
+        Info.fallbackAddressSpace = AMDGPUAS::LOCAL_ADDRESS;
+        Info.flags &= ~MachineMemOperand::MOLoad;
+        Info.flags |= MachineMemOperand::MOStore;
         Infos.push_back(Info);
         return;
       }
@@ -1634,10 +1649,18 @@ void SITargetLowering::getTgtMemIntrinsic(SmallVectorImpl<IntrinsicInfo> &Infos,
   case Intrinsic::amdgcn_cluster_load_async_to_lds_b32:
   case Intrinsic::amdgcn_cluster_load_async_to_lds_b64:
   case Intrinsic::amdgcn_cluster_load_async_to_lds_b128: {
+    // Entry 0: Load from source (global/flat).
     Info.opc = ISD::INTRINSIC_VOID;
     Info.memVT = EVT::getIntegerVT(CI.getContext(), getIntrMemWidth(IntrID));
-    Info.ptrVal = CI.getArgOperand(1);
-    Info.flags |= MachineMemOperand::MOLoad | MachineMemOperand::MOStore;
+    Info.ptrVal = CI.getArgOperand(0); // Global pointer
+    Info.offset = cast<ConstantInt>(CI.getArgOperand(2))->getSExtValue();
+    Info.flags |= MachineMemOperand::MOLoad;
+    Infos.push_back(Info);
+
+    // Entry 1: Store to LDS (same offset).
+    Info.flags &= ~MachineMemOperand::MOLoad;
+    Info.flags |= MachineMemOperand::MOStore;
+    Info.ptrVal = CI.getArgOperand(1); // LDS pointer
     Infos.push_back(Info);
     return;
   }
@@ -1645,24 +1668,45 @@ void SITargetLowering::getTgtMemIntrinsic(SmallVectorImpl<IntrinsicInfo> &Infos,
   case Intrinsic::amdgcn_global_store_async_from_lds_b32:
   case Intrinsic::amdgcn_global_store_async_from_lds_b64:
   case Intrinsic::amdgcn_global_store_async_from_lds_b128: {
+    // Entry 0: Load from LDS.
     Info.opc = ISD::INTRINSIC_VOID;
     Info.memVT = EVT::getIntegerVT(CI.getContext(), getIntrMemWidth(IntrID));
-    Info.ptrVal = CI.getArgOperand(0);
-    Info.flags |= MachineMemOperand::MOLoad | MachineMemOperand::MOStore;
+    Info.ptrVal = CI.getArgOperand(1); // LDS pointer
+    Info.offset = cast<ConstantInt>(CI.getArgOperand(2))->getSExtValue();
+    Info.flags |= MachineMemOperand::MOLoad;
+    Infos.push_back(Info);
+
+    // Entry 1: Store to global (same offset).
+    Info.flags &= ~MachineMemOperand::MOLoad;
+    Info.flags |= MachineMemOperand::MOStore;
+    Info.ptrVal = CI.getArgOperand(0); // Global pointer
     Infos.push_back(Info);
     return;
   }
   case Intrinsic::amdgcn_load_to_lds:
   case Intrinsic::amdgcn_global_load_lds: {
-    Info.opc = ISD::INTRINSIC_VOID;
     unsigned Width = cast<ConstantInt>(CI.getArgOperand(2))->getZExtValue();
-    Info.memVT = EVT::getIntegerVT(CI.getContext(), Width * 8);
-    Info.ptrVal = CI.getArgOperand(1);
-    Info.flags |= MachineMemOperand::MOLoad | MachineMemOperand::MOStore;
     auto *Aux = cast<ConstantInt>(CI.getArgOperand(CI.arg_size() - 1));
-    if (Aux->getZExtValue() & AMDGPU::CPol::VOLATILE)
+    bool IsVolatile = Aux->getZExtValue() & AMDGPU::CPol::VOLATILE;
+
+    // Entry 0: Load from source (global/flat).
+    Info.opc = ISD::INTRINSIC_VOID;
+    Info.memVT = EVT::getIntegerVT(CI.getContext(), Width * 8);
+    Info.ptrVal = CI.getArgOperand(0); // Source pointer
+    Info.offset = cast<ConstantInt>(CI.getArgOperand(3))->getSExtValue();
+    Info.flags |= MachineMemOperand::MOLoad;
+    if (IsVolatile)
       Info.flags |= MachineMemOperand::MOVolatile;
     Infos.push_back(Info);
+
+    // Entry 1: Store to LDS.
+    // Same offset from the instruction, but an additional per-lane offset is
+    // added. Represent that using a wider memory type.
+    Info.memVT = EVT::getIntegerVT(CI.getContext(), Width * 8 * 64);
+    Info.ptrVal = CI.getArgOperand(1); // LDS destination pointer
+    Info.flags &= ~MachineMemOperand::MOLoad;
+    Info.flags |= MachineMemOperand::MOStore;
+    Infos.push_back(Info);
     return;
   }
   case Intrinsic::amdgcn_ds_bvh_stack_rtn:
@@ -11184,7 +11228,6 @@ SDValue SITargetLowering::LowerINTRINSIC_VOID(SDValue Op,
   SDLoc DL(Op);
   SDValue Chain = Op.getOperand(0);
   unsigned IntrinsicID = Op.getConstantOperandVal(1);
-  MachineFunction &MF = DAG.getMachineFunction();
 
   switch (IntrinsicID) {
   case Intrinsic::amdgcn_exp_compr: {
@@ -11459,29 +11502,8 @@ SDValue SITargetLowering::LowerINTRINSIC_VOID(SDValue Op,
     Ops.push_back(M0Val.getValue(1));                            // Glue
 
     auto *M = cast<MemSDNode>(Op);
-    MachineMemOperand *LoadMMO = M->getMemOperand();
-    // Don't set the offset value here because the pointer points to the base of
-    // the buffer.
-    MachinePointerInfo LoadPtrI = LoadMMO->getPointerInfo();
-
-    MachinePointerInfo StorePtrI = LoadPtrI;
-    LoadPtrI.V = PoisonValue::get(
-        PointerType::get(*DAG.getContext(), AMDGPUAS::BUFFER_RESOURCE));
-    LoadPtrI.AddrSpace = AMDGPUAS::BUFFER_RESOURCE;
-    StorePtrI.AddrSpace = AMDGPUAS::LOCAL_ADDRESS;
-
-    auto F = LoadMMO->getFlags() &
-             ~(MachineMemOperand::MOStore | MachineMemOperand::MOLoad);
-    LoadMMO =
-        MF.getMachineMemOperand(LoadPtrI, F | MachineMemOperand::MOLoad, Size,
-                                LoadMMO->getBaseAlign(), LoadMMO->getAAInfo());
-
-    MachineMemOperand *StoreMMO = MF.getMachineMemOperand(
-        StorePtrI, F | MachineMemOperand::MOStore, sizeof(int32_t),
-        LoadMMO->getBaseAlign(), LoadMMO->getAAInfo());
-
     auto *Load = DAG.getMachineNode(Opc, DL, M->getVTList(), Ops);
-    DAG.setNodeMemRefs(Load, {LoadMMO, StoreMMO});
+    DAG.setNodeMemRefs(Load, M->memoperands());
 
     return SDValue(Load, 0);
   }
@@ -11563,25 +11585,8 @@ SDValue SITargetLowering::LowerINTRINSIC_VOID(SDValue Op,
     Ops.push_back(M0Val.getValue(1)); // Glue
 
     auto *M = cast<MemSDNode>(Op);
-    MachineMemOperand *LoadMMO = M->getMemOperand();
-    MachinePointerInfo LoadPtrI = LoadMMO->getPointerInfo();
-    LoadPtrI.Offset = Op->getConstantOperandVal(5);
-    MachinePointerInfo StorePtrI = LoadPtrI;
-    LoadPtrI.V = PoisonValue::get(
-        PointerType::get(*DAG.getContext(), AMDGPUAS::GLOBAL_ADDRESS));
-    LoadPtrI.AddrSpace = AMDGPUAS::GLOBAL_ADDRESS;
-    StorePtrI.AddrSpace = AMDGPUAS::LOCAL_ADDRESS;
-    auto F = LoadMMO->getFlags() &
-             ~(MachineMemOperand::MOStore | MachineMemOperand::MOLoad);
-    LoadMMO =
-        MF.getMachineMemOperand(LoadPtrI, F | MachineMemOperand::MOLoad, Size,
-                                LoadMMO->getBaseAlign(), LoadMMO->getAAInfo());
-    MachineMemOperand *StoreMMO = MF.getMachineMemOperand(
-        StorePtrI, F | MachineMemOperand::MOStore, sizeof(int32_t), Align(4),
-        LoadMMO->getAAInfo());
-
     auto *Load = DAG.getMachineNode(Opc, DL, Op->getVTList(), Ops);
-    DAG.setNodeMemRefs(Load, {LoadMMO, StoreMMO});
+    DAG.setNodeMemRefs(Load, M->memoperands());
 
     return SDValue(Load, 0);
   }
diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.load.to.lds.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.load.to.lds.ll
index 352af044b0a6d..f66ad928d261d 100644
--- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.load.to.lds.ll
+++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.load.to.lds.ll
@@ -267,9 +267,8 @@ main_body:
 define amdgpu_ps void @buffer_load_lds_dword_volatile(ptr addrspace(7) nocapture inreg %gptr, i32 %off, ptr addrspace(3) inreg %lptr) {
 ; GFX90A-LABEL: buffer_load_lds_dword_volatile:
 ; GFX90A:       ; %bb.0: ; %main_body
-; GFX90A-NEXT:    v_add_u32_e32 v0, s4, v0
 ; GFX90A-NEXT:    s_mov_b32 m0, s5
-; GFX90A-NEXT:    s_nop 0
+; GFX90A-NEXT:    v_add_u32_e32 v0, s4, v0
 ; GFX90A-NEXT:    buffer_load_dword v0, s[0:3], 0 offen glc lds
 ; GFX90A-NEXT:    s_waitcnt vmcnt(0)
 ; GFX90A-NEXT:    buffer_load_dword v0, s[0:3], 0 offen offset:256 lds
@@ -278,9 +277,8 @@ define amdgpu_ps void @buffer_load_lds_dword_volatile(ptr addrspace(7) nocapture
 ;
 ; GFX942-LABEL: buffer_load_lds_dword_volatile:
 ; GFX942:       ; %bb.0: ; %main_body
-; GFX942-NEXT:    v_add_u32_e32 v0, s4, v0
 ; GFX942-NEXT:    s_mov_b32 m0, s5
-; GFX942-NEXT:    s_nop 0
+; GFX942-NEXT:    v_add_u32_e32 v0, s4, v0
 ; GFX942-NEXT:    buffer_load_dword v0, s[0:3], 0 offen sc0 sc1 lds
 ; GFX942-NEXT:    s_waitcnt vmcnt(0)
 ; GFX942-NEXT:    buffer_load_dword v0, s[0:3], 0 offen offset:256 lds
diff --git a/llvm/test/CodeGen/AMDGPU/memory-legalizer-lds-dma-volatile-and-nontemporal.ll b/llvm/test/CodeGen/AMDGPU/memory-legalizer-lds-dma-volatile-and-nontemporal.ll
index 0b40c81af414d..14d2e4ca5f2c3 100644
--- a/llvm/test/CodeGen/AMDGPU/memory-legalizer-lds-dma-volatile-and-nontemporal.ll
+++ b/llvm/test/CodeGen/AMDGPU/memory-legalizer-lds-dma-volatile-and-nontemporal.ll
@@ -340,18 +340,16 @@ define amdgpu_ps void @load_to_lds_p7_dword_volatile(ptr addrspace(7) inreg %gpt
 define amdgpu_ps void @load_to_lds_p7_dword_nontemporal(ptr addrspace(7) inreg %gptr, i32 %off, ptr addrspace(3) inreg %lptr) {
 ; GFX90A-LABEL: load_to_lds_p7_dword_nontemporal:
 ; GFX90A:       ; %bb.0:
-; GFX90A-NEXT:    v_add_u32_e32 v0, s4, v0
 ; GFX90A-NEXT:    s_mov_b32 m0, s5
-; GFX90A-NEXT:    s_nop 0
+; GFX90A-NEXT:    v_add_u32_e32 v0, s4, v0
 ; GFX90A-NEXT:    buffer_load_dword v0, s[0:3], 0 offen glc slc lds
 ; GFX90A-NEXT:    buffer_load_dword v0, s[0:3], 0 offen offset:512 lds
 ; GFX90A-NEXT:    s_endpgm
 ;
 ; GFX942-LABEL: load_to_lds_p7_dword_nontemporal:
 ; GFX942:       ; %bb.0:
-; GFX942-NEXT:    v_add_u32_e32 v0, s4, v0
 ; GFX942-NEXT:    s_mov_b32 m0, s5
-; GFX942-NEXT:    s_nop 0
+; GFX942-NEXT:    v_add_u32_e32 v0, s4, v0
 ; GFX942-NEXT:    buffer_load_dword v0, s[0:3], 0 offen nt lds
 ; GFX942-NEXT:    buffer_load_dword v0, s[0:3], 0 offen offset:512 lds
 ; GFX942-NEXT:    s_endpgm
diff --git a/llvm/test/CodeGen/AMDGPU/waitcnt-unscoped.ll b/llvm/test/CodeGen/AMDGPU/waitcnt-unscoped.ll
index a00aca34252b1..74fddfe290818 100644
--- a/llvm/test/CodeGen/AMDGPU/waitcnt-unscoped.ll
+++ b/llvm/test/CodeGen/AMDGPU/waitcnt-unscoped.ll
@@ -11,15 +11,14 @@ define amdgpu_kernel void @test_waitcnt(ptr addrspace(1) %global_buffer, ptr add
 ; CHECK-NEXT:    s_load_dwordx4 s[0:3], s[4:5], 0x0
 ; CHECK-NEXT:    v_mov_b32_e32 v0, 0
 ; CHECK-NEXT:    s_waitcnt lgkmcnt(0)
+; CHECK-NEXT:    s_load_dword s6, s[0:1], 0x0
 ; CHECK-NEXT:    s_add_u32 s4, s0, 64
 ; CHECK-NEXT:    s_addc_u32 s5, s1, 0
 ; CHECK-NEXT:    s_mov_b32 m0, s2
-; CHECK-NEXT:    s_nop 0
-; CHECK-NEXT:    global_load_lds_dword v0, s[4:5] offset:4
-; CHECK-NEXT:    s_load_dword s4, s[0:1], 0x0
 ; CHECK-NEXT:    s_waitcnt lgkmcnt(0)
-; CHECK-NEXT:    v_mov_b32_e32 v3, s4
+; CHECK-NEXT:    v_mov_b32_e32 v3, s6
 ; CHECK-NEXT:    global_store_dword v0, v3, s[0:1] offset:64
+; CHECK-NEXT:    global_load_lds_dword v0, s[4:5] offset:4
 ; CHECK-NEXT:    ; sched_barrier mask(0x00000000)
 ; CHECK-NEXT:    v_mov_b32_e32 v1, s2
 ; CHECK-NEXT:    v_mov_b32_e32 v2, s3

krzysz00

Thanks for tackling this - one note

krzysz00 · 2026-01-13T22:39:36Z

+        // Entry 1: Store to LDS.
+        // Instruction offset is applied, and an additional per-lane offset
+        // which we simulate using a larger memory type.
+        Info.memVT = EVT::getIntegerVT(CI.getContext(), Width * 8 * 64);


This exists on gfx10, where the width is 32?

Ugh, you're right. To be precise, it may be either 32 or 64.

krzysz00 · 2026-01-13T22:41:00Z

-    LoadPtrI.V = PoisonValue::get(
-        PointerType::get(*DAG.getContext(), AMDGPUAS::GLOBAL_ADDRESS));
-    LoadPtrI.AddrSpace = AMDGPUAS::GLOBAL_ADDRESS;
-    StorePtrI.AddrSpace = AMDGPUAS::LOCAL_ADDRESS;


Huzzah! It's finally gone!

krzysz00 · 2026-01-13T22:42:40Z

-; GFX942-NEXT:    v_add_u32_e32 v0, s4, v0
 ; GFX942-NEXT:    s_mov_b32 m0, s5
-; GFX942-NEXT:    s_nop 0
+; GFX942-NEXT:    v_add_u32_e32 v0, s4, v0


That's a nice side-effect of this

choikwa · 2026-01-14T00:58:52Z

-    Info.flags |= MachineMemOperand::MOLoad | MachineMemOperand::MOStore;
+    Info.ptrVal = CI.getArgOperand(0); // Global pointer
+    Info.offset = cast<ConstantInt>(CI.getArgOperand(2))->getSExtValue();
+    Info.flags |= MachineMemOperand::MOLoad;


Info.flags &= ~MachineMemOperand::MOStore;

or is it unnecessary?

The initial Info.flags doesn't set either Load or Store.

choikwa · 2026-01-14T00:59:07Z

+        // base of the buffer.
        Info.memVT = EVT::getIntegerVT(CI.getContext(), Width * 8);
-        Info.ptrVal = CI.getArgOperand(1);
+        Info.flags &= ~MachineMemOperand::MOStore;


Info.flags |= MachineMemOperand::MOLoad; ?

The initial Info.flags sets both Load and Store.

Why not just assign store then? Assume both is a conservative default, but I don't expect targets to be trying to read information from the default

There are other flags in play here, including ones based on metadata that are setup in the beginning.

I was trying to keep the change minimal, but I agree that the code is confusing as-is. That said, looking into cleaning it up has uncovered a whole bunch of weirdness and inconsistencies, so I'm going to follow up with a separate change to refactor this.

krzysz00

lgtm

arsenm · 2026-01-22T13:03:39Z

+    Info.memVT =
+        EVT::getIntegerVT(CI.getContext(), Width * 8 * ST.getWavefrontSize());
+    Info.ptrVal = CI.getArgOperand(1); // LDS destination pointer
+    Info.flags &= ~MachineMemOperand::MOLoad;


Same as above, just assign the correct values instead of trying to read whatever happened to be the default

Same note as above.

krzysz00 · 2026-01-22T15:59:25Z

Re assigning vs notification - I think the interesting case is of the cache policy calls for volatile - we don't want to remove that and do want to make sure to add it

... Heck, can we get a test for a volatile global load to LDS (cache policy 1 << 31)

Accurately represent both the load and the store part of those intrinsics. The test changes seem to be mostly fairly insignificant changes caused by subtly different scheduler behavior. commit-id:0269189c

nhaehnle · 2026-02-03T02:15:53Z

Re assigning vs notification - I think the interesting case is of the cache policy calls for volatile - we don't want to remove that and do want to make sure to add it

... Heck, can we get a test for a volatile global load to LDS (cache policy 1 << 31)

What do you have in mind? This stuff is notoriously difficult to test.

I tried to make this change as uninvasive as possible, but I agree that the current flow of the flags is unfortunate. I'll follow up with a change to clean this up. It's a bit non-trivial since looking over it revealed some weirdness and inconsistencies that we'd better fix at the same time.

…ntrinsics (llvm#175845)" This reverts commit 3e1e86e.

llvm#175845) Accurately represent both the load and the store part of those intrinsics. The test changes seem to be mostly fairly insignificant changes caused by subtly different scheduler behavior.

…ntrinsics (llvm#175845)" This reverts commit 3e1e86e.

This was referenced Jan 13, 2026

[CodeGen] Remove legacy getTgtMemIntrinsic overload #175846

Merged

[CodeGen] Refactor targets to override the new getTgtMemIntrinsic overload (NFC) #175844

Merged

[CodeGen] Add getTgtMemIntrinsic overload for multiple memory operands (NFC) #175843

Merged

llvmbot added the backend:AMDGPU label Jan 13, 2026

nhaehnle force-pushed the users/nhaehnle/spr/main/c7770af6 branch from 48931ab to 963ecef Compare January 13, 2026 21:47

nhaehnle force-pushed the users/nhaehnle/spr/main/0269189c branch from 1d72df0 to b585dcf Compare January 13, 2026 21:47

nhaehnle requested review from arsenm, choikwa, jayfoad and krzysz00 and removed request for krzysz00 January 13, 2026 21:49

krzysz00 reviewed Jan 13, 2026

View reviewed changes

choikwa reviewed Jan 14, 2026

View reviewed changes

nhaehnle force-pushed the users/nhaehnle/spr/main/c7770af6 branch from 963ecef to 1c445d8 Compare January 14, 2026 22:43

nhaehnle force-pushed the users/nhaehnle/spr/main/0269189c branch from b585dcf to 4fbc3c6 Compare January 14, 2026 22:43

krzysz00 approved these changes Jan 21, 2026

View reviewed changes

arsenm reviewed Jan 22, 2026

View reviewed changes

nhaehnle force-pushed the users/nhaehnle/spr/main/0269189c branch from 4fbc3c6 to 8a444c4 Compare February 2, 2026 21:14

nhaehnle force-pushed the users/nhaehnle/spr/main/c7770af6 branch from 1c445d8 to 940398a Compare February 2, 2026 21:14

Base automatically changed from users/nhaehnle/spr/main/c7770af6 to main February 3, 2026 01:40

[AMDGPU] Return two MMOs for load-to-lds and store-from-lds intrinsics

8050ace

Accurately represent both the load and the store part of those intrinsics. The test changes seem to be mostly fairly insignificant changes caused by subtly different scheduler behavior. commit-id:0269189c

nhaehnle force-pushed the users/nhaehnle/spr/main/0269189c branch from 8a444c4 to 8050ace Compare February 3, 2026 02:13

arsenm approved these changes Feb 3, 2026

View reviewed changes

nhaehnle mentioned this pull request Feb 3, 2026

AMDGPU: Cleanup the handling of flags in getTgtMemIntrinsic #179469

Merged

nhaehnle merged commit 3e1e86e into main Feb 4, 2026
13 of 17 checks passed

nhaehnle deleted the users/nhaehnle/spr/main/0269189c branch February 4, 2026 20:29

Muzammiluddin-Syed-ECE added a commit to iree-org/llvm-project that referenced this pull request Feb 5, 2026

Revert "[AMDGPU] Return two MMOs for load-to-lds and store-from-lds i…

1077109

…ntrinsics (llvm#175845)" This reverts commit 3e1e86e.

Muzammiluddin-Syed-ECE added a commit to iree-org/llvm-project that referenced this pull request Feb 6, 2026

Revert "[AMDGPU] Return two MMOs for load-to-lds and store-from-lds i…

a9ba0fa

…ntrinsics (llvm#175845)" This reverts commit 3e1e86e.

rkayaith pushed a commit to iree-org/llvm-project that referenced this pull request Feb 9, 2026

Revert "[AMDGPU] Return two MMOs for load-to-lds and store-from-lds i…

896b733

…ntrinsics (llvm#175845)" This reverts commit 3e1e86e.

rkayaith pushed a commit to iree-org/llvm-project that referenced this pull request Feb 9, 2026

Revert "[AMDGPU] Return two MMOs for load-to-lds and store-from-lds i…

741d6f8

…ntrinsics (llvm#175845)" This reverts commit 3e1e86e.

rkayaith pushed a commit to iree-org/llvm-project that referenced this pull request Feb 9, 2026

Revert "[AMDGPU] Return two MMOs for load-to-lds and store-from-lds i…

5322774

…ntrinsics (llvm#175845)" This reverts commit 3e1e86e.

rkayaith pushed a commit to iree-org/llvm-project that referenced this pull request Feb 10, 2026

Revert "[AMDGPU] Return two MMOs for load-to-lds and store-from-lds i…

2516e6c

…ntrinsics (llvm#175845)" This reverts commit 3e1e86e.

rkayaith pushed a commit to iree-org/llvm-project that referenced this pull request Feb 10, 2026

Revert "[AMDGPU] Return two MMOs for load-to-lds and store-from-lds i…

1e1471e

…ntrinsics (llvm#175845)" This reverts commit 3e1e86e.

rkayaith pushed a commit to iree-org/llvm-project that referenced this pull request Feb 11, 2026

Revert "[AMDGPU] Return two MMOs for load-to-lds and store-from-lds i…

62dbb8e

…ntrinsics (llvm#175845)" This reverts commit 3e1e86e.

Conversation

nhaehnle commented Jan 13, 2026 • edited Loading Uh oh! There was an error while loading. Please reload this page.

Uh oh!

Uh oh!

llvmbot commented Jan 13, 2026

Uh oh!

krzysz00 left a comment

Choose a reason for hiding this comment

Uh oh!

Choose a reason for hiding this comment

Uh oh!

Choose a reason for hiding this comment

Uh oh!

Uh oh!

Choose a reason for hiding this comment

Uh oh!

Choose a reason for hiding this comment

Uh oh!

Choose a reason for hiding this comment

Uh oh!

Choose a reason for hiding this comment

Uh oh!

Choose a reason for hiding this comment

Uh oh!

Choose a reason for hiding this comment

Uh oh!

Choose a reason for hiding this comment

Uh oh!

Choose a reason for hiding this comment

Uh oh!

krzysz00 left a comment

Choose a reason for hiding this comment

Uh oh!

Uh oh!

Uh oh!

Uh oh!

Uh oh!

Choose a reason for hiding this comment

Uh oh!

Choose a reason for hiding this comment

Uh oh!

krzysz00 commented Jan 22, 2026

Uh oh!

nhaehnle commented Feb 3, 2026

Uh oh!

Uh oh!

Reviewers

Assignees

Labels

Projects

Milestone

Development

Uh oh!

5 participants

nhaehnle commented Jan 13, 2026 •

edited

Loading