Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
26 changes: 21 additions & 5 deletions llvm/lib/Target/AArch64/AArch64InstrFormats.td
Original file line number Diff line number Diff line change
Expand Up @@ -5299,28 +5299,29 @@ multiclass FPToIntegerUnscaled<bits<2> rmode, bits<3> opcode, string asm,
}
}

multiclass FPToIntegerSIMDScalar<bits<2> rmode, bits<3> opcode, string asm> {
multiclass FPToIntegerSIMDScalar<bits<2> rmode, bits<3> opcode, string asm,
SDPatternOperator OpN> {
// double-precision to 32-bit SIMD/FPR
def SDr : BaseFPToIntegerUnscaled<0b01, rmode, opcode, FPR64, FPR32, asm,
[]> {
[(set FPR32:$Rd, (i32 (OpN (f64 FPR64:$Rn))))]> {
let Inst{31} = 0; // 32-bit FPR flag
}

// half-precision to 32-bit SIMD/FPR
def SHr : BaseFPToIntegerUnscaled<0b11, rmode, opcode, FPR16, FPR32, asm,
[]> {
[(set FPR32:$Rd, (i32 (OpN (f16 FPR16:$Rn))))]> {
let Inst{31} = 0; // 32-bit FPR flag
}

// half-precision to 64-bit SIMD/FPR
def DHr : BaseFPToIntegerUnscaled<0b11, rmode, opcode, FPR16, FPR64, asm,
[]> {
[(set FPR64:$Rd, (i64 (OpN (f16 FPR16:$Rn))))]> {
let Inst{31} = 1; // 64-bit FPR flag
}

// single-precision to 64-bit SIMD/FPR
def DSr : BaseFPToIntegerUnscaled<0b00, rmode, opcode, FPR32, FPR64, asm,
[]> {
[(set FPR64:$Rd, (i64 (OpN (f32 FPR32:$Rn))))]> {
let Inst{31} = 1; // 64-bit FPR flag
}
}
Expand Down Expand Up @@ -7949,6 +7950,21 @@ multiclass SIMDFPTwoScalar<bit U, bit S, bits<5> opc, string asm> {
}
}

let mayRaiseFPException = 1, Uses = [FPCR] in
multiclass SIMDFPTwoScalarFCVT<bit U, bit S, bits<5> opc, string asm,
SDPatternOperator OpN> {
let Predicates = [HasNEONandIsStreamingSafe], FastISelShouldIgnore = 1 in {
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Why do we need to set FastISelShouldIgnore? Can we add a comment here?

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Another question is, I cannot find the description of these instructions on developer.arm. Where the converts vector in and out has the same size.
This one:
https://developer.arm.com/documentation/ddi0602/2025-06/SIMD-FP-Instructions/FCVTAU--vector---Floating-point-convert-to-unsigned-integer--rounding-to-nearest-with-ties-to-away--vector--?lang=en
only have form half to half

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I added it because it fails with FastISel. I didn't really investigate why it fails, as I thought FastISel is not really important.

They are under Scalar single-precision and double-precision section on the webpage

def v1i64 : BaseSIMDTwoScalar<U, {S,1}, 0b00, opc, FPR64, FPR64, asm,
[(set (i64 FPR64:$Rd), (OpN (f64 FPR64:$Rn)))]>;
def v1i32 : BaseSIMDTwoScalar<U, {S,0}, 0b00, opc, FPR32, FPR32, asm,
[(set FPR32:$Rd, (i32 (OpN (f32 FPR32:$Rn))))]>;
}
let Predicates = [HasNEONandIsStreamingSafe, HasFullFP16] in {
def v1f16 : BaseSIMDTwoScalar<U, {S,1}, 0b11, opc, FPR16, FPR16, asm,
[(set FPR16:$Rd, (i16 (OpN (f16 FPR16:$Rn))))]>;
}
}

let mayRaiseFPException = 1, Uses = [FPCR] in
multiclass SIMDFPTwoScalarCVT<bit U, bit S, bits<5> opc, string asm,
SDPatternOperator OpNode> {
Expand Down
251 changes: 230 additions & 21 deletions llvm/lib/Target/AArch64/AArch64InstrInfo.td

Large diffs are not rendered by default.

66 changes: 45 additions & 21 deletions llvm/lib/Target/AArch64/GISel/AArch64RegisterBankInfo.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -568,9 +568,7 @@ bool AArch64RegisterBankInfo::onlyUsesFP(const MachineInstr &MI,
case Intrinsic::aarch64_neon_fcvtnu:
case Intrinsic::aarch64_neon_fcvtps:
case Intrinsic::aarch64_neon_fcvtpu:
// Force FPR register bank for half types, as those types otherwise
// don't get legalized correctly resulting in fp16 <-> gpr32 COPY's.
return MRI.getType(MI.getOperand(2).getReg()) == LLT::float16();
return true;
default:
break;
}
Expand Down Expand Up @@ -849,25 +847,29 @@ AArch64RegisterBankInfo::getInstrMapping(const MachineInstr &MI) const {
break;
}
case TargetOpcode::G_FPTOSI_SAT:
case TargetOpcode::G_FPTOUI_SAT: {
LLT DstType = MRI.getType(MI.getOperand(0).getReg());
if (DstType.isVector())
break;
if (DstType == LLT::scalar(16)) {
OpRegBankIdx = {PMI_FirstFPR, PMI_FirstFPR};
break;
}
OpRegBankIdx = {PMI_FirstGPR, PMI_FirstFPR};
break;
}
case TargetOpcode::G_FPTOUI_SAT:
case TargetOpcode::G_FPTOSI:
case TargetOpcode::G_FPTOUI:
case TargetOpcode::G_INTRINSIC_LRINT:
case TargetOpcode::G_INTRINSIC_LLRINT:
case TargetOpcode::G_LROUND:
case TargetOpcode::G_LLROUND: {
if (MRI.getType(MI.getOperand(0).getReg()).isVector())
break;
OpRegBankIdx = {PMI_FirstGPR, PMI_FirstFPR};
TypeSize DstSize = getSizeInBits(MI.getOperand(0).getReg(), MRI, TRI);
TypeSize SrcSize = getSizeInBits(MI.getOperand(1).getReg(), MRI, TRI);
if (((DstSize == SrcSize) || STI.hasFeature(AArch64::FeatureFPRCVT)) &&
all_of(MRI.use_nodbg_instructions(MI.getOperand(0).getReg()),
[&](const MachineInstr &UseMI) {
return onlyUsesFP(UseMI, MRI, TRI) ||
prefersFPUse(UseMI, MRI, TRI);
}))
OpRegBankIdx = {PMI_FirstFPR, PMI_FirstFPR};
else
OpRegBankIdx = {PMI_FirstGPR, PMI_FirstFPR};
break;
}

case TargetOpcode::G_FCMP: {
// If the result is a vector, it must use a FPR.
AArch64GenRegisterBankInfo::PartialMappingIdx Idx0 =
Expand Down Expand Up @@ -1143,6 +1145,34 @@ AArch64RegisterBankInfo::getInstrMapping(const MachineInstr &MI) const {
case TargetOpcode::G_INTRINSIC:
case TargetOpcode::G_INTRINSIC_W_SIDE_EFFECTS: {
switch (cast<GIntrinsic>(MI).getIntrinsicID()) {
case Intrinsic::aarch64_neon_fcvtas:
case Intrinsic::aarch64_neon_fcvtau:
case Intrinsic::aarch64_neon_fcvtzs:
case Intrinsic::aarch64_neon_fcvtzu:
case Intrinsic::aarch64_neon_fcvtms:
case Intrinsic::aarch64_neon_fcvtmu:
case Intrinsic::aarch64_neon_fcvtns:
case Intrinsic::aarch64_neon_fcvtnu:
case Intrinsic::aarch64_neon_fcvtps:
case Intrinsic::aarch64_neon_fcvtpu: {
OpRegBankIdx[2] = PMI_FirstFPR;
if (MRI.getType(MI.getOperand(0).getReg()).isVector()) {
OpRegBankIdx[0] = PMI_FirstFPR;
break;
}
TypeSize DstSize = getSizeInBits(MI.getOperand(0).getReg(), MRI, TRI);
TypeSize SrcSize = getSizeInBits(MI.getOperand(2).getReg(), MRI, TRI);
if (((DstSize == SrcSize) || STI.hasFeature(AArch64::FeatureFPRCVT)) &&
all_of(MRI.use_nodbg_instructions(MI.getOperand(0).getReg()),
[&](const MachineInstr &UseMI) {
return onlyUsesFP(UseMI, MRI, TRI) ||
prefersFPUse(UseMI, MRI, TRI);
}))
OpRegBankIdx[0] = PMI_FirstFPR;
else
OpRegBankIdx[0] = PMI_FirstGPR;
break;
}
case Intrinsic::aarch64_neon_vcvtfxs2fp:
case Intrinsic::aarch64_neon_vcvtfxu2fp:
case Intrinsic::aarch64_neon_vcvtfp2fxs:
Expand Down Expand Up @@ -1179,12 +1209,6 @@ AArch64RegisterBankInfo::getInstrMapping(const MachineInstr &MI) const {
}
break;
}
case TargetOpcode::G_LROUND:
case TargetOpcode::G_LLROUND: {
// Source is always floating point and destination is always integer.
OpRegBankIdx = {PMI_FirstGPR, PMI_FirstFPR};
break;
}
}

// Finally construct the computed mapping.
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -96,7 +96,7 @@ body: |
; CHECK-NEXT: [[SITOFP:%[0-9]+]]:fpr(s32) = G_SITOFP [[COPY1]](s32)
; CHECK-NEXT: [[COPY3:%[0-9]+]]:fpr(s32) = COPY [[COPY2]](s32)
; CHECK-NEXT: [[SELECT:%[0-9]+]]:fpr(s32) = G_SELECT [[COPY2]](s32), [[COPY3]], [[SITOFP]]
; CHECK-NEXT: [[FPTOSI:%[0-9]+]]:gpr(s32) = G_FPTOSI [[SELECT]](s32)
; CHECK-NEXT: [[FPTOSI:%[0-9]+]]:fpr(s32) = G_FPTOSI [[SELECT]](s32)
%0:_(s32) = COPY $w0
%2:_(s32) = COPY $w1
%3:_(s32) = COPY $w2
Expand Down
4 changes: 2 additions & 2 deletions llvm/test/CodeGen/AArch64/GlobalISel/regbank-llround.mir
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@ body: |
; CHECK: liveins: $d0
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: %fpr:fpr(s64) = COPY $d0
; CHECK-NEXT: %llround:gpr(s64) = G_LLROUND %fpr(s64)
; CHECK-NEXT: %llround:fpr(s64) = G_LLROUND %fpr(s64)
; CHECK-NEXT: $d0 = COPY %llround(s64)
; CHECK-NEXT: RET_ReallyLR implicit $s0
%fpr:_(s64) = COPY $d0
Expand All @@ -35,7 +35,7 @@ body: |
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: %gpr:gpr(s64) = COPY $x0
; CHECK-NEXT: [[COPY:%[0-9]+]]:fpr(s64) = COPY %gpr(s64)
; CHECK-NEXT: %llround:gpr(s64) = G_LLROUND [[COPY]](s64)
; CHECK-NEXT: %llround:fpr(s64) = G_LLROUND [[COPY]](s64)
; CHECK-NEXT: $d0 = COPY %llround(s64)
; CHECK-NEXT: RET_ReallyLR implicit $s0
%gpr:_(s64) = COPY $x0
Expand Down
4 changes: 2 additions & 2 deletions llvm/test/CodeGen/AArch64/GlobalISel/regbank-lround.mir
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@ body: |
; CHECK: liveins: $d0
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: %fpr:fpr(s64) = COPY $d0
; CHECK-NEXT: %lround:gpr(s64) = G_LROUND %fpr(s64)
; CHECK-NEXT: %lround:fpr(s64) = G_LROUND %fpr(s64)
; CHECK-NEXT: $d0 = COPY %lround(s64)
; CHECK-NEXT: RET_ReallyLR implicit $s0
%fpr:_(s64) = COPY $d0
Expand All @@ -35,7 +35,7 @@ body: |
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: %gpr:gpr(s64) = COPY $x0
; CHECK-NEXT: [[COPY:%[0-9]+]]:fpr(s64) = COPY %gpr(s64)
; CHECK-NEXT: %lround:gpr(s64) = G_LROUND [[COPY]](s64)
; CHECK-NEXT: %lround:fpr(s64) = G_LROUND [[COPY]](s64)
; CHECK-NEXT: $d0 = COPY %lround(s64)
; CHECK-NEXT: RET_ReallyLR implicit $s0
%gpr:_(s64) = COPY $x0
Expand Down
Loading
Loading