Skip to content

Commit

Permalink
Disable predicate promotion to scalar for fused EU in VC
Browse files Browse the repository at this point in the history
Due to the fused EU hardware restrictions, VC should promote predicate
operations into vector operations instead of scalar ones.
  • Loading branch information
vmustya authored and igcbot committed Jul 19, 2024
1 parent bed3ead commit 4a09bac
Show file tree
Hide file tree
Showing 2 changed files with 39 additions and 7 deletions.
29 changes: 22 additions & 7 deletions IGC/VectorCompiler/lib/GenXCodeGen/GenXPromotePredicate.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -16,18 +16,22 @@ SPDX-License-Identifier: MIT
///
//===----------------------------------------------------------------------===//


#include "GenX.h"
#include "GenXSubtarget.h"
#include "GenXTargetMachine.h"
#include "GenXUtil.h"

#include "llvm/ADT/EquivalenceClasses.h"
#include "llvm/ADT/Statistic.h"
#include "llvm/CodeGen/TargetPassConfig.h"
#include "llvm/IR/IRBuilder.h"
#include "llvm/IR/InstIterator.h"
#include "llvm/InitializePasses.h"
#include "llvm/Pass.h"

#include "llvmWrapper/IR/DerivedTypes.h"

#define DEBUG_TYPE "GENX_PROMOTE_PREDICATE"
#define DEBUG_TYPE "genx-promote-predicate"

using namespace llvm;
using namespace genx;
Expand All @@ -48,6 +52,7 @@ class GenXPromotePredicate : public FunctionPass {
bool runOnFunction(Function &F) override;
StringRef getPassName() const override { return "GenXPromotePredicate"; }
void getAnalysisUsage(AnalysisUsage &AU) const override {
AU.addRequired<TargetPassConfig>();
AU.setPreservesCFG();
}
};
Expand All @@ -61,6 +66,7 @@ void initializeGenXPromotePredicatePass(PassRegistry &);
}
INITIALIZE_PASS_BEGIN(GenXPromotePredicate, "GenXPromotePredicate",
"GenXPromotePredicate", false, false)
INITIALIZE_PASS_DEPENDENCY(TargetPassConfig)
INITIALIZE_PASS_END(GenXPromotePredicate, "GenXPromotePredicate",
"GenXPromotePredicate", false, false)

Expand Down Expand Up @@ -138,8 +144,9 @@ static Value *promoteInstToScalar(Instruction *Inst) {

// Promote one predicate instruction to grf - promote all its operands and
// instruction itself, and then sink the result back to predicate.
static Value *promoteInst(Instruction *Inst) {
if (auto *VTy = dyn_cast<IGCLLVM::FixedVectorType>(Inst->getType())) {
static Value *promoteInst(Instruction *Inst, bool AllowScalarPromotion) {
if (auto *VTy = dyn_cast<IGCLLVM::FixedVectorType>(Inst->getType());
VTy && AllowScalarPromotion) {
IGC_ASSERT(VTy->isIntOrIntVectorTy(1));
auto Width = VTy->getNumElements();

Expand Down Expand Up @@ -220,7 +227,8 @@ static void foldBitcast(BitCastInst *Cast) {
class PredicateWeb {
public:
template <class InputIt>
PredicateWeb(InputIt first, InputIt last) : Web(first, last) {}
PredicateWeb(InputIt First, InputIt Last, bool AllowScalar)
: Web(First, Last), AllowScalarPromotion(AllowScalar) {}
void print(llvm::raw_ostream &O) const {
for (auto Inst : Web)
O << *Inst << '\n';
Expand All @@ -236,7 +244,7 @@ class PredicateWeb {
// Do promotion.
SmallVector<Instruction *, 8> Worklist;
for (auto *Inst : Web) {
auto *PromotedInst = promoteInst(Inst);
auto *PromotedInst = promoteInst(Inst, AllowScalarPromotion);

if (isa<TruncInst>(PromotedInst) || isa<BitCastInst>(PromotedInst))
Worklist.push_back(cast<Instruction>(PromotedInst));
Expand All @@ -254,6 +262,7 @@ class PredicateWeb {

private:
SmallPtrSet<Instruction *, 16> Web;
bool AllowScalarPromotion;
};

constexpr const char IdxMDName[] = "pred.index";
Expand All @@ -273,6 +282,11 @@ struct Comparator {
};

bool GenXPromotePredicate::runOnFunction(Function &F) {
auto &ST = getAnalysis<TargetPassConfig>()
.getTM<GenXTargetMachine>()
.getGenXSubtarget();
bool AllowScalarPromotion = !ST.hasFusedEU();

// Put every predicate instruction into its own equivalence class.
long Idx = 0;
llvm::EquivalenceClasses<Instruction *, Comparator> PredicateWebs;
Expand Down Expand Up @@ -303,7 +317,8 @@ bool GenXPromotePredicate::runOnFunction(Function &F) {
for (auto I = PredicateWebs.begin(), E = PredicateWebs.end(); I != E; ++I) {
if (!I->isLeader())
continue;
PredicateWeb Web(PredicateWebs.member_begin(I), PredicateWebs.member_end());
PredicateWeb Web(PredicateWebs.member_begin(I), PredicateWebs.member_end(),
AllowScalarPromotion);
LLVM_DEBUG(dbgs() << "Predicate web:\n"; Web.dump());
++NumCollectedPredicateWebs;
if (!Web.isBeneficialToPromote())
Expand Down
17 changes: 17 additions & 0 deletions IGC/VectorCompiler/test/PromotePredicate/ispc-example.ll
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,12 @@
; RUN: %opt %use_old_pass_manager% -GenXPromotePredicate -march=genx64 -mtriple=spir64-unknown-unknown \
; RUN: -mcpu=Gen9 -logical-ops-threshold=2 -S < %s | FileCheck %s

; RUN: %opt %use_old_pass_manager% -GenXPromotePredicate -march=genx64 -mtriple=spir64-unknown-unknown \
; RUN: -mcpu=XeHPG -logical-ops-threshold=2 -S < %s | FileCheck --check-prefix=FUSED %s

; RUN: %opt %use_old_pass_manager% -GenXPromotePredicate -march=genx64 -mtriple=spir64-unknown-unknown \
; RUN: -mcpu=XeHPC -logical-ops-threshold=2 -S < %s | FileCheck %s

; CHECK-LABEL: f_f
; CHECK-DAG: [[LESSEQUAL_A_LOAD_widened:%.*]] = bitcast <8 x i1> %lessequal_a_load_ to i8
; CHECK-DAG: [[EQUAL_A_LOAD5_widened:%.*]] = bitcast <8 x i1> %equal_a_load5_ to i8
Expand All @@ -21,6 +27,17 @@
; CHECK-DAG: call void @llvm.genx.svm.scatter.v8i1.v8i64.v8f32(<8 x i1> [[NEG_RETURNED_LANES]], i32 0, <8 x i64> %new_offsets.i.i34, <8 x float> zeroinitializer)
; CHECK-DAG: icmp eq i8 [[LOGICAL_AND_promoted]], -1

; FUSED-LABEL: f_f
; FUSED-DAG: [[LESSEQUAL_A_LOAD_widened:%.*]] = sext <8 x i1> %lessequal_a_load_ to <8 x i16>
; FUSED-DAG: [[EQUAL_A_LOAD5_widened:%.*]] = sext <8 x i1> %equal_a_load5_ to <8 x i16>
; FUSED-DAG: [[LOGICAL_AND_promoted:%.*]] = and <8 x i16> [[LESSEQUAL_A_LOAD_widened]], [[EQUAL_A_LOAD5_widened]]
; FUSED-DAG: [[LOGICAL_AND:%.*]] = icmp ne <8 x i16> [[LOGICAL_AND_promoted]], zeroinitializer
; FUSED-DAG: call i1 @llvm.genx.any.v8i1(<8 x i1> [[LOGICAL_AND]])
; FUSED-DAG: [[RETURNED_LANES_MEMORY_0_promoted:%.*]] = phi <8 x i16> [ [[LOGICAL_AND_promoted]], %safe_if_run_true.safe_if_after_true_crit_edge ], [ zeroinitializer, %allocas.safe_if_after_true_crit_edge ]
; FUSED-DAG: [[NEG_RETURNED_LANES_promoted:%.*]] = xor <8 x i16> [[RETURNED_LANES_MEMORY_0_promoted]], <i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1>
; FUSED-DAG: [[NEG_RETURNED_LANES:%.*]] = icmp ne <8 x i16> [[NEG_RETURNED_LANES_promoted]], zeroinitializer
; FUSED-DAG: call void @llvm.genx.svm.scatter.v8i1.v8i64.v8f32(<8 x i1> [[NEG_RETURNED_LANES]], i32 0, <8 x i64> %new_offsets.i.i34, <8 x float> zeroinitializer)

declare void @llvm.genx.svm.scatter.v8i1.v8i64.v8f32(<8 x i1>, i32, <8 x i64>, <8 x float>)
declare i1 @llvm.genx.any.v8i1(<8 x i1>)
declare <8 x float> @llvm.genx.svm.block.ld.unaligned.v8f32.i64(i64)
Expand Down

0 comments on commit 4a09bac

Please sign in to comment.