Skip to content

Commit fed870e

Browse files
committed
[AArch64] Give a higher cost for more expensive SVE FCMP instructions
1 parent 65f60fd commit fed870e

File tree

4 files changed

+603
-268
lines changed

4 files changed

+603
-268
lines changed

llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp

Lines changed: 32 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -4409,6 +4409,32 @@ AArch64TTIImpl::getAddressComputationCost(Type *PtrTy, ScalarEvolution *SE,
44094409
return 1;
44104410
}
44114411

4412+
/// Check whether Opcode1 has less throughput according to the scheduling
4413+
/// model than Opcode2.
4414+
bool AArch64TTIImpl::hasKnownLowerThroughputFromSchedulingModel(
4415+
unsigned Opcode1, unsigned Opcode2) const {
4416+
const MCSchedModel &Sched = ST->getSchedModel();
4417+
const TargetInstrInfo *TII = ST->getInstrInfo();
4418+
if (!Sched.hasInstrSchedModel())
4419+
return false;
4420+
4421+
const MCSchedClassDesc *SCD1 =
4422+
Sched.getSchedClassDesc(TII->get(Opcode1).getSchedClass());
4423+
const MCSchedClassDesc *SCD2 =
4424+
Sched.getSchedClassDesc(TII->get(Opcode2).getSchedClass());
4425+
// We cannot handle variant scheduling classes without an MI. If we need to
4426+
// support them for any of the instructions we query the information of we
4427+
// might need to add a way to resolve them without a MI or not use the
4428+
// scheduling info.
4429+
assert(!SCD1->isVariant() && !SCD2->isVariant() &&
4430+
"Cannot handle variant scheduling classes without an MI");
4431+
if (!SCD1->isValid() || !SCD2->isValid())
4432+
return false;
4433+
4434+
return MCSchedModel::getReciprocalThroughput(*ST, *SCD1) >
4435+
MCSchedModel::getReciprocalThroughput(*ST, *SCD2);
4436+
}
4437+
44124438
InstructionCost AArch64TTIImpl::getCmpSelInstrCost(
44134439
unsigned Opcode, Type *ValTy, Type *CondTy, CmpInst::Predicate VecPred,
44144440
TTI::TargetCostKind CostKind, TTI::OperandValueInfo Op1Info,
@@ -4506,6 +4532,12 @@ InstructionCost AArch64TTIImpl::getCmpSelInstrCost(
45064532
(VecPred == FCmpInst::FCMP_ONE || VecPred == FCmpInst::FCMP_UEQ))
45074533
Factor = 3; // fcmxx+fcmyy+or
45084534

4535+
if (isa<ScalableVectorType>(ValTy) &&
4536+
CostKind == TTI::TCK_RecipThroughput &&
4537+
hasKnownLowerThroughputFromSchedulingModel(AArch64::FCMEQ_PPzZZ_S,
4538+
AArch64::FCMEQv4f32))
4539+
Factor *= 2;
4540+
45094541
return Factor * (CostKind == TTI::TCK_Latency ? 2 : LT.first);
45104542
}
45114543

llvm/lib/Target/AArch64/AArch64TargetTransformInfo.h

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -174,6 +174,11 @@ class AArch64TTIImpl final : public BasicTTIImplBase<AArch64TTIImpl> {
174174

175175
bool prefersVectorizedAddressing() const override;
176176

177+
/// Check whether Opcode1 has less throughput according to the scheduling
178+
/// model than Opcode2.
179+
bool hasKnownLowerThroughputFromSchedulingModel(unsigned Opcode1,
180+
unsigned Opcode2) const;
181+
177182
InstructionCost
178183
getMaskedMemoryOpCost(unsigned Opcode, Type *Src, Align Alignment,
179184
unsigned AddressSpace,

llvm/test/Analysis/CostModel/AArch64/sve-cmpsel.ll

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -58,10 +58,10 @@ define <vscale x 32 x i1> @cmp_nxv32i1() {
5858
; Check fcmp for legal FP vectors
5959
define void @cmp_legal_fp() #0 {
6060
; CHECK-LABEL: 'cmp_legal_fp'
61-
; CHECK-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:2 SizeLat:1 for: %1 = fcmp oge <vscale x 2 x double> undef, undef
62-
; CHECK-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:2 SizeLat:1 for: %2 = fcmp oge <vscale x 4 x float> undef, undef
63-
; CHECK-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:2 SizeLat:1 for: %3 = fcmp oge <vscale x 8 x half> undef, undef
64-
; CHECK-NEXT: Cost Model: Found costs of RThru:11 CodeSize:5 Lat:5 SizeLat:5 for: %4 = fcmp oge <vscale x 8 x bfloat> undef, undef
61+
; CHECK-NEXT: Cost Model: Found costs of RThru:2 CodeSize:1 Lat:2 SizeLat:1 for: %1 = fcmp oge <vscale x 2 x double> undef, undef
62+
; CHECK-NEXT: Cost Model: Found costs of RThru:2 CodeSize:1 Lat:2 SizeLat:1 for: %2 = fcmp oge <vscale x 4 x float> undef, undef
63+
; CHECK-NEXT: Cost Model: Found costs of RThru:2 CodeSize:1 Lat:2 SizeLat:1 for: %3 = fcmp oge <vscale x 8 x half> undef, undef
64+
; CHECK-NEXT: Cost Model: Found costs of RThru:13 CodeSize:5 Lat:5 SizeLat:5 for: %4 = fcmp oge <vscale x 8 x bfloat> undef, undef
6565
; CHECK-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void
6666
;
6767
%1 = fcmp oge <vscale x 2 x double> undef, undef
@@ -74,7 +74,7 @@ define void @cmp_legal_fp() #0 {
7474
; Check fcmp for an illegal FP vector
7575
define <vscale x 16 x i1> @cmp_nxv16f16() {
7676
; CHECK-LABEL: 'cmp_nxv16f16'
77-
; CHECK-NEXT: Cost Model: Found costs of 2 for: %res = fcmp oge <vscale x 16 x half> undef, undef
77+
; CHECK-NEXT: Cost Model: Found costs of RThru:4 CodeSize:2 Lat:2 SizeLat:2 for: %res = fcmp oge <vscale x 16 x half> undef, undef
7878
; CHECK-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <vscale x 16 x i1> %res
7979
;
8080
%res = fcmp oge <vscale x 16 x half> undef, undef

0 commit comments

Comments
 (0)