[WebAssembly][FastISel] Fold i64 extension chains into widened loads#187934
[WebAssembly][FastISel] Fold i64 extension chains into widened loads#187934ParkHanbum merged 6 commits intollvm:mainfrom
Conversation
Use the originating LoadInst to determine the folded load width instead of re-decoding it from the extension opcode. This keeps the existing behavior while simplifying getFoldedLoadOpcode: the machine opcode is now only used to identify foldable sext patterns and whether they produce an i32 or i64 result.
i32 followed by i64.extend8_s or i64.extend16_s. Teach load folding to recognize this explicit promoted sign-extension chain and fold it directly into i64.load8_s / i64.load16_s.
When extending a loaded i32 value to i64, FastISel can materialize a COPY before emitting i64.extend_i32_u or i64.extend_i32_s. Recognize this single-use COPY + extend chain during load folding and emit i64.load32_u / i64.load32_s directly instead.
FastISel already folds the mask generated by zeroExtendToI32 into a narrow unsigned load, but the outer i64.extend_i32_u remains. Teach the AND matcher to look through a single-use outer i64 extension and fold the whole chain into i64.load8_u / i64.load16_u.
FastISel can already fold shift-based sign-extension patterns into signed i32 loads, but the outer i64.extend_i32_s still remains. Teach the shift matcher to look through a single-use outer sign-extend and fold the full chain into i64.load8_s / i64.load16_s.
|
@llvm/pr-subscribers-backend-webassembly Author: hanbeom (ParkHanbum) ChangesFastISel can already fold some sign- and zero-extending loads, but a This patch series extends load folding to recognize several such cases,
When these patterns originate from narrow integer loads, fold them Fixes #179672 Patch is 27.19 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/187934.diff 2 Files Affected:
diff --git a/llvm/lib/Target/WebAssembly/WebAssemblyFastISel.cpp b/llvm/lib/Target/WebAssembly/WebAssemblyFastISel.cpp
index 9acf7f5113285..b851fe38bec6b 100644
--- a/llvm/lib/Target/WebAssembly/WebAssemblyFastISel.cpp
+++ b/llvm/lib/Target/WebAssembly/WebAssemblyFastISel.cpp
@@ -1284,89 +1284,117 @@ bool WebAssemblyFastISel::selectBitCast(const Instruction *I) {
return true;
}
-static unsigned getSExtLoadOpcode(unsigned Opc, bool A64) {
- switch (Opc) {
- default:
- return WebAssembly::INSTRUCTION_LIST_END;
- case WebAssembly::I32_EXTEND8_S_I32:
- Opc = A64 ? WebAssembly::LOAD8_S_I32_A64 : WebAssembly::LOAD8_S_I32_A32;
- break;
- case WebAssembly::I32_EXTEND16_S_I32:
- Opc = A64 ? WebAssembly::LOAD16_S_I32_A64 : WebAssembly::LOAD16_S_I32_A32;
- break;
- case WebAssembly::I64_EXTEND8_S_I64:
- Opc = A64 ? WebAssembly::LOAD8_S_I64_A64 : WebAssembly::LOAD8_S_I64_A32;
- break;
- case WebAssembly::I64_EXTEND16_S_I64:
- Opc = A64 ? WebAssembly::LOAD16_S_I64_A64 : WebAssembly::LOAD16_S_I64_A32;
- break;
- case WebAssembly::I64_EXTEND32_S_I64:
- case WebAssembly::I64_EXTEND_S_I32:
- Opc = A64 ? WebAssembly::LOAD32_S_I64_A64 : WebAssembly::LOAD32_S_I64_A32;
- break;
- }
-
- return Opc;
-}
-
-static unsigned getZExtLoadOpcodeFromAnd(MachineInstr *MI,
- MachineRegisterInfo &MRI,
- const LoadInst *LI, bool A64) {
- uint64_t Mask = 0;
- bool IsConstant = false;
- for (unsigned I = 1; I <= 2; ++I) {
- Register Reg = MI->getOperand(I).getReg();
- MachineInstr *DefMI = MRI.getUniqueVRegDef(Reg);
- if (DefMI && (DefMI->getOpcode() == WebAssembly::CONST_I32 ||
- DefMI->getOpcode() == WebAssembly::CONST_I64)) {
- Mask = DefMI->getOperand(1).getImm();
- IsConstant = true;
- break;
+static unsigned getSExtLoadOpcode(unsigned LoadSize, bool I64Result, bool A64) {
+ if (I64Result) {
+ switch (LoadSize) {
+ default:
+ return WebAssembly::INSTRUCTION_LIST_END;
+ case 8:
+ return A64 ? WebAssembly::LOAD8_S_I64_A64 : WebAssembly::LOAD8_S_I64_A32;
+ case 16:
+ return A64 ? WebAssembly::LOAD16_S_I64_A64
+ : WebAssembly::LOAD16_S_I64_A32;
+ case 32:
+ return A64 ? WebAssembly::LOAD32_S_I64_A64
+ : WebAssembly::LOAD32_S_I64_A32;
}
}
- if (!IsConstant)
- return WebAssembly::INSTRUCTION_LIST_END;
-
- unsigned LoadSize = LI->getType()->getPrimitiveSizeInBits();
- if (Mask != llvm::maskTrailingOnes<uint64_t>(LoadSize))
+ switch (LoadSize) {
+ default:
return WebAssembly::INSTRUCTION_LIST_END;
+ case 8:
+ return A64 ? WebAssembly::LOAD8_S_I32_A64 : WebAssembly::LOAD8_S_I32_A32;
+ case 16:
+ return A64 ? WebAssembly::LOAD16_S_I32_A64 : WebAssembly::LOAD16_S_I32_A32;
+ }
+}
- if (MI->getOpcode() == WebAssembly::AND_I32) {
- if (LoadSize == 8)
- return A64 ? WebAssembly::LOAD8_U_I32_A64 : WebAssembly::LOAD8_U_I32_A32;
- if (LoadSize == 16)
- return A64 ? WebAssembly::LOAD16_U_I32_A64
- : WebAssembly::LOAD16_U_I32_A32;
- } else if (MI->getOpcode() == WebAssembly::AND_I64) {
- if (LoadSize == 8)
+static unsigned getZExtLoadOpcode(unsigned LoadSize, bool I64Result, bool A64) {
+ if (I64Result) {
+ switch (LoadSize) {
+ default:
+ return WebAssembly::INSTRUCTION_LIST_END;
+ case 8:
return A64 ? WebAssembly::LOAD8_U_I64_A64 : WebAssembly::LOAD8_U_I64_A32;
- if (LoadSize == 16)
+ case 16:
return A64 ? WebAssembly::LOAD16_U_I64_A64
: WebAssembly::LOAD16_U_I64_A32;
- if (LoadSize == 32)
+ case 32:
return A64 ? WebAssembly::LOAD32_U_I64_A64
: WebAssembly::LOAD32_U_I64_A32;
+ }
}
- return WebAssembly::INSTRUCTION_LIST_END;
+ switch (LoadSize) {
+ default:
+ return WebAssembly::INSTRUCTION_LIST_END;
+ case 8:
+ return A64 ? WebAssembly::LOAD8_U_I32_A64 : WebAssembly::LOAD8_U_I32_A32;
+ case 16:
+ return A64 ? WebAssembly::LOAD16_U_I32_A64 : WebAssembly::LOAD16_U_I32_A32;
+ }
}
-static unsigned getFoldedLoadOpcode(MachineInstr *MI, MachineRegisterInfo &MRI,
- const LoadInst *LI, bool A64) {
- switch (MI->getOpcode()) {
+static bool isFoldableSExtOpcode(unsigned Opc) {
+ switch (Opc) {
+ default:
+ return false;
case WebAssembly::I32_EXTEND8_S_I32:
case WebAssembly::I32_EXTEND16_S_I32:
case WebAssembly::I64_EXTEND8_S_I64:
case WebAssembly::I64_EXTEND16_S_I64:
case WebAssembly::I64_EXTEND32_S_I64:
case WebAssembly::I64_EXTEND_S_I32:
- return getSExtLoadOpcode(MI->getOpcode(), A64);
- case WebAssembly::AND_I32:
- case WebAssembly::AND_I64:
- return getZExtLoadOpcodeFromAnd(MI, MRI, LI, A64);
+ return true;
+ }
+}
+
+static bool isI64SExtResult(unsigned Opc) {
+ switch (Opc) {
default:
- return WebAssembly::INSTRUCTION_LIST_END;
+ llvm_unreachable("unexpected opcode");
+ case WebAssembly::I32_EXTEND8_S_I32:
+ case WebAssembly::I32_EXTEND16_S_I32:
+ return false;
+ case WebAssembly::I64_EXTEND8_S_I64:
+ case WebAssembly::I64_EXTEND16_S_I64:
+ case WebAssembly::I64_EXTEND32_S_I64:
+ case WebAssembly::I64_EXTEND_S_I32:
+ return true;
+ }
+}
+
+static unsigned getFoldedLoadOpcode(MachineInstr *MI, MachineRegisterInfo &MRI,
+ const LoadInst *LI, bool A64) {
+ unsigned Opc = MI->getOpcode();
+
+ if (isFoldableSExtOpcode(Opc)) {
+ unsigned LoadSize = LI->getType()->getPrimitiveSizeInBits();
+ return getSExtLoadOpcode(LoadSize, isI64SExtResult(Opc), A64);
+ }
+
+ return WebAssembly::INSTRUCTION_LIST_END;
+}
+
+static unsigned getFoldedI64LoadOpcode(Register DestReg, const LoadInst *LI,
+ MachineRegisterInfo &MRI, bool A64,
+ MachineInstr *&OuterUserMI,
+ unsigned NarrowOpc) {
+ if (!MRI.hasOneNonDBGUse(DestReg))
+ return NarrowOpc;
+
+ MachineInstr *UserMI = &*MRI.use_instr_nodbg_begin(DestReg);
+ unsigned LoadSize = LI->getType()->getPrimitiveSizeInBits();
+ switch (UserMI->getOpcode()) {
+ case WebAssembly::I64_EXTEND_U_I32:
+ OuterUserMI = UserMI;
+ return getZExtLoadOpcode(LoadSize, /*I64Result=*/true, A64);
+ case WebAssembly::I64_EXTEND_S_I32:
+ OuterUserMI = UserMI;
+ return getSExtLoadOpcode(LoadSize, /*I64Result=*/true, A64);
+ default:
+ return NarrowOpc;
}
}
@@ -1379,7 +1407,8 @@ static unsigned getFoldedLoadOpcode(MachineInstr *MI, MachineRegisterInfo &MRI,
/// size (32 - LoadBitWidth).
static unsigned matchFoldableShift(MachineInstr *MI, const LoadInst *LI,
MachineRegisterInfo &MRI, bool A64,
- MachineInstr *&UserMI) {
+ MachineInstr *&UserMI,
+ MachineInstr *&OuterUserMI) {
unsigned Opc = MI->getOpcode();
unsigned NewOpc = WebAssembly::INSTRUCTION_LIST_END;
if (Opc != WebAssembly::SHL_I32)
@@ -1410,13 +1439,104 @@ static unsigned matchFoldableShift(MachineInstr *MI, const LoadInst *LI,
if (!IsExpectedConst(ShlAmtDef) || !IsExpectedConst(ShrAmtDef))
return NewOpc;
- if (LoadTy->isIntegerTy(8))
- NewOpc = A64 ? WebAssembly::LOAD8_S_I32_A64 : WebAssembly::LOAD8_S_I32_A32;
- else if (LoadTy->isIntegerTy(16))
- NewOpc =
- A64 ? WebAssembly::LOAD16_S_I32_A64 : WebAssembly::LOAD16_S_I32_A32;
+ unsigned LoadSize = LoadTy->getIntegerBitWidth();
+ unsigned NarrowOpc = getSExtLoadOpcode(LoadSize, /*I64Result=*/false, A64);
+ if (NarrowOpc == WebAssembly::INSTRUCTION_LIST_END)
+ return WebAssembly::INSTRUCTION_LIST_END;
+
+ return getFoldedI64LoadOpcode(UserMI->getOperand(0).getReg(), LI, MRI, A64,
+ OuterUserMI, NarrowOpc);
+}
+
+static unsigned matchFoldableSExtFromPromotedI32(MachineInstr *MI,
+ const LoadInst *LI,
+ MachineRegisterInfo &MRI,
+ bool A64,
+ MachineInstr *&UserMI) {
+ if (MI->getOpcode() != WebAssembly::I64_EXTEND_U_I32)
+ return WebAssembly::INSTRUCTION_LIST_END;
+
+ unsigned LoadSize = LI->getType()->getPrimitiveSizeInBits();
+ Register DestReg = MI->getOperand(0).getReg();
+ if (!MRI.hasOneNonDBGUse(DestReg))
+ return WebAssembly::INSTRUCTION_LIST_END;
+
+ UserMI = &*MRI.use_instr_nodbg_begin(DestReg);
+ switch (UserMI->getOpcode()) {
+ default:
+ return WebAssembly::INSTRUCTION_LIST_END;
+ case WebAssembly::I64_EXTEND8_S_I64:
+ if (LoadSize != 8)
+ return WebAssembly::INSTRUCTION_LIST_END;
+ return getSExtLoadOpcode(LoadSize, true, A64);
+ case WebAssembly::I64_EXTEND16_S_I64:
+ if (LoadSize != 16)
+ return WebAssembly::INSTRUCTION_LIST_END;
+ return getSExtLoadOpcode(LoadSize, true, A64);
+ }
+}
+
+static unsigned matchFoldableCopyToI64Ext(MachineInstr *MI, const LoadInst *LI,
+ MachineRegisterInfo &MRI, bool A64,
+ MachineInstr *&OuterUserMI) {
+ if (MI->getOpcode() != WebAssembly::COPY)
+ return WebAssembly::INSTRUCTION_LIST_END;
- return NewOpc;
+ unsigned LoadSize = LI->getType()->getPrimitiveSizeInBits();
+ if (LoadSize != 32)
+ return WebAssembly::INSTRUCTION_LIST_END;
+
+ Register CopyDst = MI->getOperand(0).getReg();
+ if (!MRI.hasOneNonDBGUse(CopyDst))
+ return WebAssembly::INSTRUCTION_LIST_END;
+
+ OuterUserMI = &*MRI.use_instr_nodbg_begin(CopyDst);
+ switch (OuterUserMI->getOpcode()) {
+ default:
+ return WebAssembly::INSTRUCTION_LIST_END;
+ case WebAssembly::I64_EXTEND_U_I32:
+ return getZExtLoadOpcode(LoadSize, true, A64);
+ case WebAssembly::I64_EXTEND_S_I32:
+ return getSExtLoadOpcode(LoadSize, true, A64);
+ }
+}
+
+static unsigned matchFoldableAnd(MachineInstr *MI, const LoadInst *LI,
+ MachineRegisterInfo &MRI, bool A64,
+ MachineInstr *&OuterUserMI) {
+ if (MI->getOpcode() != WebAssembly::AND_I32 &&
+ MI->getOpcode() != WebAssembly::AND_I64)
+ return WebAssembly::INSTRUCTION_LIST_END;
+
+ uint64_t Mask = 0;
+ bool IsConstant = false;
+ for (unsigned I = 1; I <= 2; ++I) {
+ Register Reg = MI->getOperand(I).getReg();
+ MachineInstr *DefMI = MRI.getUniqueVRegDef(Reg);
+ if (DefMI && (DefMI->getOpcode() == WebAssembly::CONST_I32 ||
+ DefMI->getOpcode() == WebAssembly::CONST_I64)) {
+ Mask = DefMI->getOperand(1).getImm();
+ IsConstant = true;
+ break;
+ }
+ }
+
+ if (!IsConstant)
+ return WebAssembly::INSTRUCTION_LIST_END;
+
+ unsigned LoadSize = LI->getType()->getPrimitiveSizeInBits();
+ if (Mask != llvm::maskTrailingOnes<uint64_t>(LoadSize))
+ return WebAssembly::INSTRUCTION_LIST_END;
+
+ if (MI->getOpcode() == WebAssembly::AND_I64)
+ return getZExtLoadOpcode(LoadSize, /*I64Result=*/true, A64);
+
+ unsigned NarrowOpc = getZExtLoadOpcode(LoadSize, /*I64Result=*/false, A64);
+ if (NarrowOpc == WebAssembly::INSTRUCTION_LIST_END)
+ return WebAssembly::INSTRUCTION_LIST_END;
+
+ return getFoldedI64LoadOpcode(MI->getOperand(0).getReg(), LI, MRI, A64,
+ OuterUserMI, NarrowOpc);
}
bool WebAssemblyFastISel::tryToFoldLoadIntoMI(MachineInstr *MI, unsigned OpNo,
@@ -1425,13 +1545,27 @@ bool WebAssemblyFastISel::tryToFoldLoadIntoMI(MachineInstr *MI, unsigned OpNo,
MachineRegisterInfo &MRI = FuncInfo.MF->getRegInfo();
Register ResultReg;
MachineInstr *UserMI = nullptr;
- unsigned NewOpc;
- if ((NewOpc = getFoldedLoadOpcode(MI, MRI, LI, A64)) !=
+ MachineInstr *OuterUserMI = nullptr;
+ unsigned NewOpc = WebAssembly::INSTRUCTION_LIST_END;
+ if ((NewOpc = matchFoldableSExtFromPromotedI32(MI, LI, MRI, A64, UserMI)) !=
WebAssembly::INSTRUCTION_LIST_END) {
+ ResultReg = UserMI->getOperand(0).getReg();
+ } else if ((NewOpc = matchFoldableCopyToI64Ext(MI, LI, MRI, A64,
+ OuterUserMI)) !=
+ WebAssembly::INSTRUCTION_LIST_END) {
+ ResultReg = OuterUserMI->getOperand(0).getReg();
+ } else if ((NewOpc = matchFoldableAnd(MI, LI, MRI, A64, OuterUserMI)) !=
+ WebAssembly::INSTRUCTION_LIST_END) {
+ ResultReg = OuterUserMI ? OuterUserMI->getOperand(0).getReg()
+ : MI->getOperand(0).getReg();
+ } else if ((NewOpc = getFoldedLoadOpcode(MI, MRI, LI, A64)) !=
+ WebAssembly::INSTRUCTION_LIST_END) {
ResultReg = MI->getOperand(0).getReg();
- } else if ((NewOpc = matchFoldableShift(MI, LI, MRI, A64, UserMI)) !=
+ } else if ((NewOpc =
+ matchFoldableShift(MI, LI, MRI, A64, UserMI, OuterUserMI)) !=
WebAssembly::INSTRUCTION_LIST_END) {
- ResultReg = UserMI->getOperand(0).getReg();
+ ResultReg = OuterUserMI ? OuterUserMI->getOperand(0).getReg()
+ : UserMI->getOperand(0).getReg();
} else {
return false;
}
@@ -1439,6 +1573,11 @@ bool WebAssemblyFastISel::tryToFoldLoadIntoMI(MachineInstr *MI, unsigned OpNo,
if (!emitLoad(ResultReg, NewOpc, LI))
return false;
+ if (OuterUserMI) {
+ MachineBasicBlock::iterator OuterIter(OuterUserMI);
+ removeDeadCode(OuterIter, std::next(OuterIter));
+ }
+
if (UserMI) {
MachineBasicBlock::iterator UserIter(UserMI);
removeDeadCode(UserIter, std::next(UserIter));
diff --git a/llvm/test/CodeGen/WebAssembly/load-ext.ll b/llvm/test/CodeGen/WebAssembly/load-ext.ll
index a67c871a74295..4f1edbc8af283 100644
--- a/llvm/test/CodeGen/WebAssembly/load-ext.ll
+++ b/llvm/test/CodeGen/WebAssembly/load-ext.ll
@@ -153,18 +153,15 @@ define i64 @global_sext_i8_i64() {
; WASM32-FAST-LABEL: global_sext_i8_i64:
; WASM32-FAST: .functype global_sext_i8_i64 () -> (i64)
; WASM32-FAST-NEXT: # %bb.0:
-; WASM32-FAST-NEXT: i32.const $push3=, 0
-; WASM32-FAST-NEXT: i32.load8_u $push2=, gv8($pop3)
-; WASM32-FAST-NEXT: i64.extend_i32_u $push0=, $pop2
-; WASM32-FAST-NEXT: i64.extend8_s $push1=, $pop0
-; WASM32-FAST-NEXT: return $pop1
+; WASM32-FAST-NEXT: i32.const $push1=, 0
+; WASM32-FAST-NEXT: i64.load8_s $push0=, gv8($pop1)
+; WASM32-FAST-NEXT: return $pop0
;
; WASM32-FAST-MVP-LABEL: global_sext_i8_i64:
; WASM32-FAST-MVP: .functype global_sext_i8_i64 () -> (i64)
; WASM32-FAST-MVP-NEXT: # %bb.0:
-; WASM32-FAST-MVP-NEXT: i32.const $push2=, 0
-; WASM32-FAST-MVP-NEXT: i32.load8_s $push1=, gv8($pop2)
-; WASM32-FAST-MVP-NEXT: i64.extend_i32_s $push0=, $pop1
+; WASM32-FAST-MVP-NEXT: i32.const $push1=, 0
+; WASM32-FAST-MVP-NEXT: i64.load8_s $push0=, gv8($pop1)
; WASM32-FAST-MVP-NEXT: return $pop0
;
; WASM64-DAG-LABEL: global_sext_i8_i64:
@@ -184,18 +181,15 @@ define i64 @global_sext_i8_i64() {
; WASM64-FAST-LABEL: global_sext_i8_i64:
; WASM64-FAST: .functype global_sext_i8_i64 () -> (i64)
; WASM64-FAST-NEXT: # %bb.0:
-; WASM64-FAST-NEXT: i64.const $push3=, 0
-; WASM64-FAST-NEXT: i32.load8_u $push2=, gv8($pop3)
-; WASM64-FAST-NEXT: i64.extend_i32_u $push0=, $pop2
-; WASM64-FAST-NEXT: i64.extend8_s $push1=, $pop0
-; WASM64-FAST-NEXT: return $pop1
+; WASM64-FAST-NEXT: i64.const $push1=, 0
+; WASM64-FAST-NEXT: i64.load8_s $push0=, gv8($pop1)
+; WASM64-FAST-NEXT: return $pop0
;
; WASM64-FAST-MVP-LABEL: global_sext_i8_i64:
; WASM64-FAST-MVP: .functype global_sext_i8_i64 () -> (i64)
; WASM64-FAST-MVP-NEXT: # %bb.0:
-; WASM64-FAST-MVP-NEXT: i64.const $push2=, 0
-; WASM64-FAST-MVP-NEXT: i32.load8_s $push1=, gv8($pop2)
-; WASM64-FAST-MVP-NEXT: i64.extend_i32_s $push0=, $pop1
+; WASM64-FAST-MVP-NEXT: i64.const $push1=, 0
+; WASM64-FAST-MVP-NEXT: i64.load8_s $push0=, gv8($pop1)
; WASM64-FAST-MVP-NEXT: return $pop0
%ld = load i8, ptr @gv8
%conv = sext i8 %ld to i64
@@ -220,18 +214,15 @@ define i64 @global_sext_i16_i64() {
; WASM32-FAST-LABEL: global_sext_i16_i64:
; WASM32-FAST: .functype global_sext_i16_i64 () -> (i64)
; WASM32-FAST-NEXT: # %bb.0:
-; WASM32-FAST-NEXT: i32.const $push3=, 0
-; WASM32-FAST-NEXT: i32.load16_u $push2=, gv16($pop3)
-; WASM32-FAST-NEXT: i64.extend_i32_u $push0=, $pop2
-; WASM32-FAST-NEXT: i64.extend16_s $push1=, $pop0
-; WASM32-FAST-NEXT: return $pop1
+; WASM32-FAST-NEXT: i32.const $push1=, 0
+; WASM32-FAST-NEXT: i64.load16_s $push0=, gv16($pop1)
+; WASM32-FAST-NEXT: return $pop0
;
; WASM32-FAST-MVP-LABEL: global_sext_i16_i64:
; WASM32-FAST-MVP: .functype global_sext_i16_i64 () -> (i64)
; WASM32-FAST-MVP-NEXT: # %bb.0:
-; WASM32-FAST-MVP-NEXT: i32.const $push2=, 0
-; WASM32-FAST-MVP-NEXT: i32.load16_s $push1=, gv16($pop2)
-; WASM32-FAST-MVP-NEXT: i64.extend_i32_s $push0=, $pop1
+; WASM32-FAST-MVP-NEXT: i32.const $push1=, 0
+; WASM32-FAST-MVP-NEXT: i64.load16_s $push0=, gv16($pop1)
; WASM32-FAST-MVP-NEXT: return $pop0
;
; WASM64-DAG-LABEL: global_sext_i16_i64:
@@ -251,18 +242,15 @@ define i64 @global_sext_i16_i64() {
; WASM64-FAST-LABEL: global_sext_i16_i64:
; WASM64-FAST: .functype global_sext_i16_i64 () -> (i64)
; WASM64-FAST-NEXT: # %bb.0:
-; WASM64-FAST-NEXT: i64.const $push3=, 0
-; WASM64-FAST-NEXT: i32.load16_u $push2=, gv16($pop3)
-; WASM64-FAST-NEXT: i64.extend_i32_u $push0=, $pop2
-; WASM64-FAST-NEXT: i64.extend16_s $push1=, $pop0
-; WASM64-FAST-NEXT: return $pop1
+; WASM64-FAST-NEXT: i64.const $push1=, 0
+; WASM64-FAST-NEXT: i64.load16_s $push0=, gv16($pop1)
+; WASM64-FAST-NEXT: return $pop0
;
; WASM64-FAST-MVP-LABEL: global_sext_i16_i64:
; WASM64-FAST-MVP: .functype global_sext_i16_i64 () -> (i64)
; WASM64-FAST-MVP-NEXT: # %bb.0:
-; WASM64-FAST-MVP-NEXT: i64.const $push2=, 0
-; WASM64-FAST-MVP-NEXT: i32.load16_s $push1=, gv16($pop2)
-; WASM64-FAST-MVP-NEXT: i64.extend_i32_s $push0=, $pop1
+; WASM64-FAST-MVP-NEXT: i64.const $push1=, 0
+; WASM64-FAST-MVP-NEXT: i64.load16_s $push0=, gv16($pop1)
; WASM64-FAST-MVP-NEXT: return $pop0
%ld= load i16, ptr @gv16
%conv = sext i16 %ld to i64
@@ -497,16 +485,13 @@ define i64 @sext_i8_i64(ptr %p) {
; WASM32-FAST-LABEL: sext_i8_i64:
; WASM32-FAST: .functype sext_i8_i64 (i32) -> (i64)
; WASM32-FAST-NEXT: # %bb.0:
-; WASM32-FAST-NEXT: i32.load8_u $push2=, 0($0)
-; WASM32-FAST-NEXT: i64.extend_i32_u $push0=, $pop2
-; WASM32-FAST-NEXT: i64.extend8_s $push1=, $pop0
-; WASM32-FAST-NEXT: return $pop1
+; WASM32-FAST-NEXT: i64.load8_s $push0=, 0($0)
+; WASM32-FAST-NEXT: return $pop0
;
; WASM32-FAST-MVP-LABEL: sext_i8_i64:
; WASM32-FAST-MVP: .functype sext_i8_i64 (i32) -> (i64)
; WASM32-FAST-MVP-NEXT: # %bb.0:
-; WASM32-FAST-MVP-NEXT: i32.load8_s $push1=, 0($0)
-; WASM32-FAST-MVP-NEXT: i64.extend_i32_s $push0=, $pop1
+; WASM32-FAST-MVP-NEXT: i64.load8_s $push0=, 0($0)
; WASM32-FAST-MVP-NEXT: return $pop0
;
; WASM64-DAG-LABEL: sext_i8_i64:
@@ -524,16 +509,13 @@ define i64 @sext_i8_i64(ptr %p) {
; WASM64-FAST-LABEL: sext_i8_i64:
; WASM64-FAST: .functype sext_i8_i64 (i64) -> (i64)
; WASM64-FAST-NEXT: # %bb.0:
-; WASM64-FAST-NEXT: i32.load8_u $push2=, 0($0)
-; WASM64-FAST-NEXT: i64.extend_i32_u $push0=, $pop2
-; WASM64-FAST-NEXT: i64.extend8_s $push1=, $pop0
-; WASM64-FAST-NEXT: return $pop1
+; WASM64-FAST-NEXT: i64.load8_s $push0=, 0($0)
+; WASM64-FAST-NEXT: return $pop0
;
; WASM64-FAST-MVP-LABEL: sext_i8_i64:
; WASM64-FAST-MVP: .functype sext_i8_i64 (i64) -> (i64)
; WASM64-FAST-MVP-NEXT: # %bb.0:
-; WASM64-FAST-MVP-NEXT: i32.load8_s $push1=, 0($0)
-; WASM64-FAST-MVP-NEXT: i64.extend_i32_s $push0=, $pop1
+; WASM64-FAST-MVP-NEXT: i64.load8_s $push0=, 0($0)
; WASM64-FAST-MVP-NEXT: return $pop0
%v = load i8, ptr %p
%e = sext i8 %v to i64
@@ -556,16 +538,14 @@ define i64 @zext_i8_i64(ptr %p) {
; WASM32-FAST-LABEL: zext_i8_i64:
; WASM32-FAST: .functype zext_i8_i64 (i32) -> (i64)
; WASM32-FAST-NEXT: # %bb.0:
-; WASM32-FAST-NEXT: i32.load8_u $push0=, 0($0)
-; WASM32-FAST-NEXT: i64.extend_i32_u $push1=, $pop0
-; WASM32-FAST-NEXT: return $pop1
+; WASM32-FAST-NEXT: i64.load8_u $push0=, 0($0)
+; WASM32-FAST-NEXT: return $pop0
;
; WA...
[truncated]
|
|
✅ With the latest revision this PR passed the C/C++ code formatter. |
|
LLVM Buildbot has detected a new failure on builder Full details are available at: https://lab.llvm.org/buildbot/#/builders/51/builds/34251 Here is the relevant piece of the build log for the reference |
|
LLVM Buildbot has detected a new failure on builder Full details are available at: https://lab.llvm.org/buildbot/#/builders/211/builds/7146 Here is the relevant piece of the build log for the reference |
|
LLVM Buildbot has detected a new failure on builder Full details are available at: https://lab.llvm.org/buildbot/#/builders/186/builds/17298 Here is the relevant piece of the build log for the reference |
|
Reverting in #188306 |
…into widened loads" (#188306) Reverts llvm/llvm-project#187934 Breaks bots.
…into widened loads" (#188324) Reverts llvm/llvm-project#188306 Relands llvm/llvm-project#187934 It was reverted by mistake.
…lvm#187934) FastISel can already fold some sign- and zero-extending loads, but a number of i64 extension patterns still leave redundant instructions behind. This patch series extends load folding to recognize several such cases, including: - promoted sign-extension chains - copy + i64.extend_i32_{u,s} chains - AND-based zero-extension chains - shift-based sign-extension chains When these patterns originate from narrow integer loads, fold them directly into widened i64 loads instead of materializing intermediate i32 loads followed by redundant i64 extends. Fixes llvm#179672
…d loads" (llvm#188306) Reverts llvm#187934 Breaks bots.
…d loads" (llvm#188324) Reverts llvm#188306 Relands llvm#187934 It was reverted by mistake.
…d loads" (#188306) Reverts llvm/llvm-project#187934 Breaks bots. (cherry picked from commit 03e8fd6)
…d loads" (#188324) Reverts llvm/llvm-project#188306 Relands llvm/llvm-project#187934 It was reverted by mistake. (cherry picked from commit 55bbf7b)
FastISel can already fold some sign- and zero-extending loads, but a
number of i64 extension patterns still leave redundant instructions
behind.
This patch series extends load folding to recognize several such cases,
including:
When these patterns originate from narrow integer loads, fold them
directly into widened i64 loads instead of materializing intermediate
i32 loads followed by redundant i64 extends.
Fixes #179672