Skip to content

Commit

Permalink
[X86] Support APX promoted RAO-INT and MOVBE instructions (#77431)
Browse files Browse the repository at this point in the history
R16-R31 was added into GPRs in
#70958,
This patch supports the promoted RAO-INT and MOVBE instructions in EVEX
space.

RFC:
https://discourse.llvm.org/t/rfc-design-for-apx-feature-egpr-and-ndd-support/73031/4
  • Loading branch information
XinWang10 authored Jan 26, 2024
1 parent d9d1ae6 commit 02d5680
Show file tree
Hide file tree
Showing 14 changed files with 754 additions and 70 deletions.
68 changes: 38 additions & 30 deletions llvm/lib/Target/X86/X86InstrMisc.td
Original file line number Diff line number Diff line change
Expand Up @@ -229,7 +229,7 @@ def PUSHA16 : I<0x60, RawFrm, (outs), (ins), "pushaw", []>,
OpSize16, Requires<[Not64BitMode]>;
}

let Constraints = "$src = $dst", SchedRW = [WriteBSWAP32] in {
let Constraints = "$src = $dst", SchedRW = [WriteBSWAP32], Predicates = [NoNDD_Or_NoMOVBE] in {
// This instruction is a consequence of BSWAP32r observing operand size. The
// encoding is valid, but the behavior is undefined.
let isCodeGenOnly = 1, ForceDisassemble = 1, hasSideEffects = 0 in
Expand Down Expand Up @@ -1090,35 +1090,43 @@ def ARPL16mr : I<0x63, MRMDestMem, (outs), (ins i16mem:$dst, GR16:$src),
//===----------------------------------------------------------------------===//
// MOVBE Instructions
//
let Predicates = [HasMOVBE] in {
let SchedRW = [WriteALULd] in {
def MOVBE16rm : I<0xF0, MRMSrcMem, (outs GR16:$dst), (ins i16mem:$src),
"movbe{w}\t{$src, $dst|$dst, $src}",
[(set GR16:$dst, (bswap (loadi16 addr:$src)))]>,
OpSize16, T8;
def MOVBE32rm : I<0xF0, MRMSrcMem, (outs GR32:$dst), (ins i32mem:$src),
"movbe{l}\t{$src, $dst|$dst, $src}",
[(set GR32:$dst, (bswap (loadi32 addr:$src)))]>,
OpSize32, T8;
def MOVBE64rm : RI<0xF0, MRMSrcMem, (outs GR64:$dst), (ins i64mem:$src),
"movbe{q}\t{$src, $dst|$dst, $src}",
[(set GR64:$dst, (bswap (loadi64 addr:$src)))]>,
T8;
}
let SchedRW = [WriteStore] in {
def MOVBE16mr : I<0xF1, MRMDestMem, (outs), (ins i16mem:$dst, GR16:$src),
"movbe{w}\t{$src, $dst|$dst, $src}",
[(store (bswap GR16:$src), addr:$dst)]>,
OpSize16, T8;
def MOVBE32mr : I<0xF1, MRMDestMem, (outs), (ins i32mem:$dst, GR32:$src),
"movbe{l}\t{$src, $dst|$dst, $src}",
[(store (bswap GR32:$src), addr:$dst)]>,
OpSize32, T8;
def MOVBE64mr : RI<0xF1, MRMDestMem, (outs), (ins i64mem:$dst, GR64:$src),
"movbe{q}\t{$src, $dst|$dst, $src}",
[(store (bswap GR64:$src), addr:$dst)]>,
T8;
}
multiclass Movbe<bits<8> o, X86TypeInfo t, string suffix = ""> {
def rm#suffix : ITy<o, MRMSrcMem, t, (outs t.RegClass:$dst),
(ins t.MemOperand:$src1), "movbe", unaryop_ndd_args,
[(set t.RegClass:$dst, (bswap (t.LoadNode addr:$src1)))]>,
Sched<[WriteALULd]>;
def mr#suffix : ITy<!add(o, 1), MRMDestMem, t, (outs),
(ins t.MemOperand:$dst, t.RegClass:$src1),
"movbe", unaryop_ndd_args,
[(store (bswap t.RegClass:$src1), addr:$dst)]>,
Sched<[WriteStore]>;
}

let Predicates = [HasMOVBE, NoEGPR] in {
defm MOVBE16 : Movbe<0xF0, Xi16>, OpSize16, T8;
defm MOVBE32 : Movbe<0xF0, Xi32>, OpSize32, T8;
defm MOVBE64 : Movbe<0xF0, Xi64>, T8;
}

let Predicates = [HasMOVBE, HasEGPR, In64BitMode] in {
defm MOVBE16 : Movbe<0x60, Xi16, "_EVEX">, EVEX, T_MAP4, PD;
defm MOVBE32 : Movbe<0x60, Xi32, "_EVEX">, EVEX, T_MAP4;
defm MOVBE64 : Movbe<0x60, Xi64, "_EVEX">, EVEX, T_MAP4;
}

multiclass Movberr<X86TypeInfo t> {
def rr : ITy<0x61, MRMDestReg, t, (outs t.RegClass:$dst),
(ins t.RegClass:$src1), "movbe", unaryop_ndd_args,
[(set t.RegClass:$dst, (bswap t.RegClass:$src1))]>,
EVEX, T_MAP4;
def rr_REV : ITy<0x60, MRMSrcReg, t, (outs t.RegClass:$dst),
(ins t.RegClass:$src1), "movbe", unaryop_ndd_args, []>,
EVEX, T_MAP4, DisassembleOnly;
}
let SchedRW = [WriteALU], Predicates = [HasMOVBE, HasNDD, In64BitMode] in {
defm MOVBE16 : Movberr<Xi16>, PD;
defm MOVBE32 : Movberr<Xi32>;
defm MOVBE64 : Movberr<Xi64>;
}

//===----------------------------------------------------------------------===//
Expand Down
1 change: 1 addition & 0 deletions llvm/lib/Target/X86/X86InstrPredicates.td
Original file line number Diff line number Diff line change
Expand Up @@ -122,6 +122,7 @@ def HasTBM : Predicate<"Subtarget->hasTBM()">;
def NoTBM : Predicate<"!Subtarget->hasTBM()">;
def HasLWP : Predicate<"Subtarget->hasLWP()">;
def HasMOVBE : Predicate<"Subtarget->hasMOVBE()">;
def NoNDD_Or_NoMOVBE : Predicate<"!Subtarget->hasNDD() || !Subtarget->hasMOVBE()">;
def HasRDRAND : Predicate<"Subtarget->hasRDRAND()">;
def HasF16C : Predicate<"Subtarget->hasF16C()">;
def HasFSGSBase : Predicate<"Subtarget->hasFSGSBase()">;
Expand Down
32 changes: 17 additions & 15 deletions llvm/lib/Target/X86/X86InstrRAOINT.td
Original file line number Diff line number Diff line change
Expand Up @@ -25,21 +25,23 @@ def X86rao_xor : SDNode<"X86ISD::AXOR", SDTRAOBinaryArith,
def X86rao_and : SDNode<"X86ISD::AAND", SDTRAOBinaryArith,
[SDNPHasChain, SDNPMayStore, SDNPMemOperand]>;

multiclass RAOINT_BASE<string OpcodeStr> {
let Predicates = [HasRAOINT] in
def 32mr : I<0xfc, MRMDestMem, (outs), (ins i32mem:$dst, GR32:$src),
!strconcat("a", OpcodeStr, "{l}\t{$src, $dst|$dst, $src}"),
[(!cast<SDNode>("X86rao_" # OpcodeStr) addr:$dst, GR32:$src)]>,
Sched<[WriteALURMW]>;
multiclass RaoInt<string m, string suffix = ""> {
let Pattern = [(!cast<SDNode>("X86rao_" # m) addr:$src1, GR32:$src2)] in
def 32mr#suffix : BinOpMR_M<0xfc, "a" # m, Xi32>;
let Pattern = [(!cast<SDNode>("X86rao_" # m) addr:$src1, GR64:$src2)] in
def 64mr#suffix : BinOpMR_M<0xfc, "a" # m, Xi64>;
}

let Predicates = [HasRAOINT, In64BitMode] in
def 64mr : I<0xfc, MRMDestMem, (outs), (ins i64mem:$dst, GR64:$src),
!strconcat("a", OpcodeStr, "{q}\t{$src, $dst|$dst, $src}"),
[(!cast<SDNode>("X86rao_" # OpcodeStr) addr:$dst, GR64:$src)]>,
Sched<[WriteALURMW]>, REX_W;
let Predicates = [HasRAOINT, NoEGPR] in {
defm AADD : RaoInt<"add">, T8;
defm AAND : RaoInt<"and">, T8, PD;
defm AOR : RaoInt<"or" >, T8, XD;
defm AXOR : RaoInt<"xor">, T8, XS;
}

defm AADD : RAOINT_BASE<"add">, T8;
defm AAND : RAOINT_BASE<"and">, T8, PD;
defm AOR : RAOINT_BASE<"or" >, T8, XD;
defm AXOR : RAOINT_BASE<"xor">, T8, XS;
let Predicates = [HasRAOINT, HasEGPR, In64BitMode] in {
defm AADD : RaoInt<"add", "_EVEX">, EVEX, T_MAP4;
defm AAND : RaoInt<"and", "_EVEX">, EVEX, T_MAP4, PD;
defm AOR : RaoInt<"or", "_EVEX">, EVEX, T_MAP4, XD;
defm AXOR : RaoInt<"xor", "_EVEX">, EVEX, T_MAP4, XS;
}
214 changes: 189 additions & 25 deletions llvm/test/CodeGen/X86/movbe.ll
Original file line number Diff line number Diff line change
@@ -1,66 +1,230 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 4
; RUN: llc -mtriple=x86_64-linux -mcpu=atom < %s | FileCheck %s
; RUN: llc -mtriple=x86_64-linux -mcpu=slm < %s | FileCheck %s -check-prefix=SLM

; RUN: llc -mtriple=x86_64-linux -mattr=+egpr,+ndd,+movbe --show-mc-encoding < %s | FileCheck %s -check-prefix=EGPR
; RUN: llc -mtriple=x86_64-linux -mattr=+egpr,+ndd --show-mc-encoding < %s | FileCheck %s -check-prefix=NOMOVBE
declare i16 @llvm.bswap.i16(i16) nounwind readnone
declare i32 @llvm.bswap.i32(i32) nounwind readnone
declare i64 @llvm.bswap.i64(i64) nounwind readnone

define void @test1(ptr nocapture %x, i16 %y) nounwind {
; CHECK-LABEL: test1:
; CHECK: # %bb.0:
; CHECK-NEXT: movbew %si, (%rdi)
; CHECK-NEXT: nop
; CHECK-NEXT: nop
; CHECK-NEXT: nop
; CHECK-NEXT: nop
; CHECK-NEXT: nop
; CHECK-NEXT: nop
; CHECK-NEXT: retq
;
; SLM-LABEL: test1:
; SLM: # %bb.0:
; SLM-NEXT: movbew %si, (%rdi)
; SLM-NEXT: retq
;
; EGPR-LABEL: test1:
; EGPR: # %bb.0:
; EGPR-NEXT: movbew %si, (%rdi) # EVEX TO LEGACY Compression encoding: [0x66,0x0f,0x38,0xf1,0x37]
; EGPR-NEXT: retq # encoding: [0xc3]
;
; NOMOVBE-LABEL: test1:
; NOMOVBE: # %bb.0:
; NOMOVBE-NEXT: rolw $8, %si, %ax # encoding: [0x62,0xf4,0x7d,0x18,0xc1,0xc6,0x08]
; NOMOVBE-NEXT: movw %ax, (%rdi) # encoding: [0x66,0x89,0x07]
; NOMOVBE-NEXT: retq # encoding: [0xc3]
%bswap = call i16 @llvm.bswap.i16(i16 %y)
store i16 %bswap, ptr %x, align 2
ret void
; CHECK-LABEL: test1:
; CHECK: movbew %si, (%rdi)
; SLM-LABEL: test1:
; SLM: movbew %si, (%rdi)
}

define i16 @test2(ptr %x) nounwind {
; CHECK-LABEL: test2:
; CHECK: # %bb.0:
; CHECK-NEXT: movbew (%rdi), %ax
; CHECK-NEXT: nop
; CHECK-NEXT: nop
; CHECK-NEXT: nop
; CHECK-NEXT: nop
; CHECK-NEXT: nop
; CHECK-NEXT: nop
; CHECK-NEXT: retq
;
; SLM-LABEL: test2:
; SLM: # %bb.0:
; SLM-NEXT: movbew (%rdi), %ax
; SLM-NEXT: retq
;
; EGPR-LABEL: test2:
; EGPR: # %bb.0:
; EGPR-NEXT: movbew (%rdi), %ax # EVEX TO LEGACY Compression encoding: [0x66,0x0f,0x38,0xf0,0x07]
; EGPR-NEXT: retq # encoding: [0xc3]
;
; NOMOVBE-LABEL: test2:
; NOMOVBE: # %bb.0:
; NOMOVBE-NEXT: rolw $8, (%rdi), %ax # encoding: [0x62,0xf4,0x7d,0x18,0xc1,0x07,0x08]
; NOMOVBE-NEXT: retq # encoding: [0xc3]
%load = load i16, ptr %x, align 2
%bswap = call i16 @llvm.bswap.i16(i16 %load)
ret i16 %bswap
; CHECK-LABEL: test2:
; CHECK: movbew (%rdi), %ax
; SLM-LABEL: test2:
; SLM: movbew (%rdi), %ax
}

define void @test3(ptr nocapture %x, i32 %y) nounwind {
; CHECK-LABEL: test3:
; CHECK: # %bb.0:
; CHECK-NEXT: movbel %esi, (%rdi)
; CHECK-NEXT: nop
; CHECK-NEXT: nop
; CHECK-NEXT: nop
; CHECK-NEXT: nop
; CHECK-NEXT: nop
; CHECK-NEXT: nop
; CHECK-NEXT: retq
;
; SLM-LABEL: test3:
; SLM: # %bb.0:
; SLM-NEXT: movbel %esi, (%rdi)
; SLM-NEXT: retq
;
; EGPR-LABEL: test3:
; EGPR: # %bb.0:
; EGPR-NEXT: movbel %esi, (%rdi) # EVEX TO LEGACY Compression encoding: [0x0f,0x38,0xf1,0x37]
; EGPR-NEXT: retq # encoding: [0xc3]
;
; NOMOVBE-LABEL: test3:
; NOMOVBE: # %bb.0:
; NOMOVBE-NEXT: bswapl %esi # encoding: [0x0f,0xce]
; NOMOVBE-NEXT: movl %esi, (%rdi) # encoding: [0x89,0x37]
; NOMOVBE-NEXT: retq # encoding: [0xc3]
%bswap = call i32 @llvm.bswap.i32(i32 %y)
store i32 %bswap, ptr %x, align 4
ret void
; CHECK-LABEL: test3:
; CHECK: movbel %esi, (%rdi)
; SLM-LABEL: test3:
; SLM: movbel %esi, (%rdi)
}

define i32 @test4(ptr %x) nounwind {
; CHECK-LABEL: test4:
; CHECK: # %bb.0:
; CHECK-NEXT: movbel (%rdi), %eax
; CHECK-NEXT: nop
; CHECK-NEXT: nop
; CHECK-NEXT: nop
; CHECK-NEXT: nop
; CHECK-NEXT: nop
; CHECK-NEXT: nop
; CHECK-NEXT: retq
;
; SLM-LABEL: test4:
; SLM: # %bb.0:
; SLM-NEXT: movbel (%rdi), %eax
; SLM-NEXT: retq
;
; EGPR-LABEL: test4:
; EGPR: # %bb.0:
; EGPR-NEXT: movbel (%rdi), %eax # EVEX TO LEGACY Compression encoding: [0x0f,0x38,0xf0,0x07]
; EGPR-NEXT: retq # encoding: [0xc3]
;
; NOMOVBE-LABEL: test4:
; NOMOVBE: # %bb.0:
; NOMOVBE-NEXT: movl (%rdi), %eax # encoding: [0x8b,0x07]
; NOMOVBE-NEXT: bswapl %eax # encoding: [0x0f,0xc8]
; NOMOVBE-NEXT: retq # encoding: [0xc3]
%load = load i32, ptr %x, align 4
%bswap = call i32 @llvm.bswap.i32(i32 %load)
ret i32 %bswap
; CHECK-LABEL: test4:
; CHECK: movbel (%rdi), %eax
; SLM-LABEL: test4:
; SLM: movbel (%rdi), %eax
}

define void @test5(ptr %x, i64 %y) nounwind {
; CHECK-LABEL: test5:
; CHECK: # %bb.0:
; CHECK-NEXT: movbeq %rsi, (%rdi)
; CHECK-NEXT: nop
; CHECK-NEXT: nop
; CHECK-NEXT: nop
; CHECK-NEXT: nop
; CHECK-NEXT: nop
; CHECK-NEXT: nop
; CHECK-NEXT: retq
;
; SLM-LABEL: test5:
; SLM: # %bb.0:
; SLM-NEXT: movbeq %rsi, (%rdi)
; SLM-NEXT: retq
;
; EGPR-LABEL: test5:
; EGPR: # %bb.0:
; EGPR-NEXT: movbeq %rsi, (%rdi) # EVEX TO LEGACY Compression encoding: [0x48,0x0f,0x38,0xf1,0x37]
; EGPR-NEXT: retq # encoding: [0xc3]
;
; NOMOVBE-LABEL: test5:
; NOMOVBE: # %bb.0:
; NOMOVBE-NEXT: bswapq %rsi # encoding: [0x48,0x0f,0xce]
; NOMOVBE-NEXT: movq %rsi, (%rdi) # encoding: [0x48,0x89,0x37]
; NOMOVBE-NEXT: retq # encoding: [0xc3]
%bswap = call i64 @llvm.bswap.i64(i64 %y)
store i64 %bswap, ptr %x, align 8
ret void
; CHECK-LABEL: test5:
; CHECK: movbeq %rsi, (%rdi)
; SLM-LABEL: test5:
; SLM: movbeq %rsi, (%rdi)
}

define i64 @test6(ptr %x) nounwind {
; CHECK-LABEL: test6:
; CHECK: # %bb.0:
; CHECK-NEXT: movbeq (%rdi), %rax
; CHECK-NEXT: nop
; CHECK-NEXT: nop
; CHECK-NEXT: nop
; CHECK-NEXT: nop
; CHECK-NEXT: nop
; CHECK-NEXT: nop
; CHECK-NEXT: retq
;
; SLM-LABEL: test6:
; SLM: # %bb.0:
; SLM-NEXT: movbeq (%rdi), %rax
; SLM-NEXT: retq
;
; EGPR-LABEL: test6:
; EGPR: # %bb.0:
; EGPR-NEXT: movbeq (%rdi), %rax # EVEX TO LEGACY Compression encoding: [0x48,0x0f,0x38,0xf0,0x07]
; EGPR-NEXT: retq # encoding: [0xc3]
;
; NOMOVBE-LABEL: test6:
; NOMOVBE: # %bb.0:
; NOMOVBE-NEXT: movq (%rdi), %rax # encoding: [0x48,0x8b,0x07]
; NOMOVBE-NEXT: bswapq %rax # encoding: [0x48,0x0f,0xc8]
; NOMOVBE-NEXT: retq # encoding: [0xc3]
%load = load i64, ptr %x, align 8
%bswap = call i64 @llvm.bswap.i64(i64 %load)
ret i64 %bswap
; CHECK-LABEL: test6:
; CHECK: movbeq (%rdi), %rax
; SLM-LABEL: test6:
; SLM: movbeq (%rdi), %rax
}

define i64 @test7(i64 %x) nounwind {
; CHECK-LABEL: test7:
; CHECK: # %bb.0:
; CHECK-NEXT: movq %rdi, %rax
; CHECK-NEXT: bswapq %rax
; CHECK-NEXT: nop
; CHECK-NEXT: nop
; CHECK-NEXT: nop
; CHECK-NEXT: nop
; CHECK-NEXT: retq
;
; SLM-LABEL: test7:
; SLM: # %bb.0:
; SLM-NEXT: movq %rdi, %rax
; SLM-NEXT: bswapq %rax
; SLM-NEXT: retq
;
; EGPR-LABEL: test7:
; EGPR: # %bb.0:
; EGPR-NEXT: movbeq %rdi, %rax # encoding: [0x62,0xf4,0xfc,0x08,0x61,0xf8]
; EGPR-NEXT: retq # encoding: [0xc3]
;
; NOMOVBE-LABEL: test7:
; NOMOVBE: # %bb.0:
; NOMOVBE-NEXT: movq %rdi, %rax # encoding: [0x48,0x89,0xf8]
; NOMOVBE-NEXT: bswapq %rax # encoding: [0x48,0x0f,0xc8]
; NOMOVBE-NEXT: retq # encoding: [0xc3]
%bswap = call i64 @llvm.bswap.i64(i64 %x)
ret i64 %bswap
}
Loading

0 comments on commit 02d5680

Please sign in to comment.