Skip to content

Commit e9587a3

Browse files
bokrzesiigcbot
authored andcommitted
[LLVM16][StatelessToStateful] DeterminePointerAlignment algorithm fix v4
The DeterminePointerAlignment algorithm is analyzing alignment of load/store instructions. Before this fix, it was walking over all stores/loads and picking the highest alignment. The problem with this approach was that some loads/stores were used basing on control flow, which in practice meant that some loads/stores were never used. And we accidentally could use their alignment, which caused mismatch. Such case occured here, in the "__devicelib_memcpy", which utilized various memcpy strategies basing on sizes https://github.com/intel/llvm/blob/ee397f94cfc0034a24cb06221f90863a0203326f/libdevice/fallback-cstring.cpp#L47 ``` 22: ; preds = %19 %23 = getelementptr inbounds i8, ptr addrspace(4) %1, i64 %20 %24 = load i8, ptr addrspace(4) %23, align 1 here %25 = getelementptr inbounds i8, ptr addrspace(4) %0, i64 %20 store i8 %24, ptr addrspace(4) %25, align 1 here %26 = add nuw i64 %20, 1, !spirv.Decorations !15 br label %19 27: ; preds = %13 %28 = icmp eq i64 %16, 0 br i1 %28, label %29, label %58 29: ; preds = %27 %30 = and i64 %2, 3 %31 = lshr i64 %2, 2 br label %32 32: ; preds = %35, %29 %33 = phi i64 [ 0, %29 ], [ %41, %35 ] %34 = icmp ult i64 %33, %31 br i1 %34, label %35, label %42 35: ; preds = %32 %36 = bitcast ptr addrspace(4) %1 to ptr addrspace(4) %37 = getelementptr inbounds i32, ptr addrspace(4) %36, i64 %33 %38 = load i32, ptr addrspace(4) %37, align 4 here %39 = bitcast ptr addrspace(4) %0 to ptr addrspace(4) %40 = getelementptr inbounds i32, ptr addrspace(4) %39, i64 %33 store i32 %38, ptr addrspace(4) %40, align 4 here %41 = add nuw nsw i64 %33, 1, !spirv.Decorations !17 br label %32 ```
1 parent f3d9419 commit e9587a3

File tree

13 files changed

+335
-32
lines changed

13 files changed

+335
-32
lines changed

IGC/Compiler/Optimizer/OpenCLPasses/StatelessToStateful/StatelessToStateful.cpp

Lines changed: 53 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -23,6 +23,7 @@ SPDX-License-Identifier: MIT
2323
#include "common/LLVMWarningsPop.hpp"
2424
#include <string>
2525
#include "Probe/Assertion.h"
26+
#include "PointersSettings.h"
2627

2728
using namespace llvm;
2829
using namespace IGC;
@@ -520,23 +521,23 @@ bool StatelessToStateful::pointerIsFromKernelArgument(Value &ptr) {
520521
return false;
521522
}
522523

523-
static alignment_t determinePointerAlignment(Value *Ptr, const DataLayout &DL, AssumptionCache *AC,
524-
Instruction *InsertionPt) {
525-
alignment_t BestAlign = 1;
524+
static Align determinePointerAlignmentTypedPointers(Value *ptr, const DataLayout &DL, AssumptionCache *AC,
525+
Instruction *insertionPt) {
526+
Align BestAlign = Align(1);
526527

527528
// 1) Examine uses: look for loads/stores (which may carry explicit
528529
// alignment) or a GEP that reveals an ABI alignment from its element
529530
// type.
530-
for (User *U : Ptr->users()) {
531+
for (User *U : ptr->users()) {
531532
if (auto *LI = dyn_cast<LoadInst>(U)) {
532533
// Load has an explicit alignment.
533-
alignment_t LdAlign = LI->getAlign().value();
534+
Align LdAlign = LI->getAlign();
534535
if (LdAlign > BestAlign)
535536
BestAlign = LdAlign;
536537
} else if (auto *SI = dyn_cast<StoreInst>(U)) {
537538
// Store sets alignment only if the pointer we store into is Ptr.
538-
if (SI->getPointerOperand() == Ptr) {
539-
alignment_t StAlign = SI->getAlign().value();
539+
if (SI->getPointerOperand() == ptr) {
540+
Align StAlign = SI->getAlign();
540541
if (StAlign > BestAlign)
541542
BestAlign = StAlign;
542543
}
@@ -545,7 +546,7 @@ static alignment_t determinePointerAlignment(Value *Ptr, const DataLayout &DL, A
545546
// alignment.
546547
Type *BaseTy = GEP->getSourceElementType();
547548
if (BaseTy && BaseTy->isSized()) {
548-
alignment_t GEPAlign = DL.getABITypeAlign(BaseTy).value();
549+
Align GEPAlign = DL.getABITypeAlign(BaseTy);
549550
if (GEPAlign > BestAlign)
550551
BestAlign = GEPAlign;
551552
}
@@ -554,10 +555,10 @@ static alignment_t determinePointerAlignment(Value *Ptr, const DataLayout &DL, A
554555

555556
// 2) If this pointer is actually a function parameter, see if it has an
556557
// alignment attribute.
557-
if (auto *Arg = dyn_cast<Argument>(Ptr)) {
558+
if (auto *Arg = dyn_cast<Argument>(ptr)) {
558559
if (Arg->hasAttribute(llvm::Attribute::Alignment)) {
559560
if (MaybeAlign ArgAlign = Arg->getParamAlign()) {
560-
alignment_t ArgAlignOrOne = ArgAlign.valueOrOne().value();
561+
Align ArgAlignOrOne = ArgAlign.valueOrOne();
561562
if (ArgAlignOrOne > BestAlign)
562563
BestAlign = ArgAlignOrOne;
563564
}
@@ -566,15 +567,43 @@ static alignment_t determinePointerAlignment(Value *Ptr, const DataLayout &DL, A
566567

567568
// 3) Fallback: use LLVM's built-in assumption-based alignment analysis
568569
// (based on a.o. llvm.assume intrinsics).
569-
Align Known = getKnownAlignment(Ptr, DL, InsertionPt, AC);
570+
Align Known = getKnownAlignment(ptr, DL, insertionPt, AC);
570571
if (Known > BestAlign)
571-
BestAlign = Known.value();
572+
BestAlign = Known;
572573

573574
return BestAlign;
574575
}
575576

576-
bool StatelessToStateful::pointerIsPositiveOffsetFromKernelArgument(Function *F, Value *V, Value *&offset,
577-
unsigned int &argNumber, bool ignoreSyncBuffer) {
577+
static bool determinePointerAlignment(const KernelArg *arg, Value *base, const DataLayout &DL, Value *V,
578+
AssumptionCache *AC, Instruction *insertionPt,
579+
std::optional<llvm::Align> originalInstructionAlignment) {
580+
if (arg->isImplicitArg())
581+
return true;
582+
583+
auto desiredAlignmentLevel = 4;
584+
585+
// The intent of getKnownAlignment below is to check if any llvm.assume intrinsic provides
586+
// a hint about the base pointer alignment
587+
Align knownAlignment = getKnownAlignment(base, DL, insertionPt, AC);
588+
589+
if (knownAlignment >= desiredAlignmentLevel)
590+
return true;
591+
592+
if (AreOpaquePointersEnabled()) {
593+
if (originalInstructionAlignment.has_value() &&
594+
IGC::isStatefulAddrSpace(base->getType()->getPointerAddressSpace())) {
595+
knownAlignment = originalInstructionAlignment.value();
596+
}
597+
} else {
598+
knownAlignment = determinePointerAlignmentTypedPointers(base, DL, AC, insertionPt);
599+
}
600+
601+
return knownAlignment >= desiredAlignmentLevel;
602+
}
603+
604+
bool StatelessToStateful::pointerIsPositiveOffsetFromKernelArgument(
605+
Function *F, Value *V, Value *&offset, unsigned int &argNumber, bool ignoreSyncBuffer,
606+
std::optional<llvm::Align> OriginalInstructionAlignment) {
578607
const DataLayout *DL = &F->getParent()->getDataLayout();
579608

580609
AssumptionCache *AC = getAC(F);
@@ -626,9 +655,8 @@ bool StatelessToStateful::pointerIsPositiveOffsetFromKernelArgument(Function *F,
626655
// guarantted to be DW-aligned.)
627656
//
628657
// Note that implicit arg is always aligned.
629-
bool isAlignedPointee = arg->isImplicitArg()
630-
? true
631-
: determinePointerAlignment(base, *DL, AC, F->getEntryBlock().getFirstNonPHI()) >= 4;
658+
auto insertionPt = F->getEntryBlock().getFirstNonPHI();
659+
bool isAlignedPointee = determinePointerAlignment(arg, base, *DL, V, AC, insertionPt, OriginalInstructionAlignment);
632660

633661
// If m_hasBufferOffsetArg is true, the offset argument is added to
634662
// the final offset to make it definitely positive. Thus skip checking
@@ -657,6 +685,7 @@ bool StatelessToStateful::pointerIsPositiveOffsetFromKernelArgument(Function *F,
657685
updateArgInfo(arg, gepProducesPositivePointer);
658686
}
659687
}
688+
660689
if ((m_hasBufferOffsetArg || (gepProducesPositivePointer && isAlignedPointee)) &&
661690
getOffsetFromGEP(F, GEPs, argNumber, arg->isImplicitArg(), offset)) {
662691
return true;
@@ -922,12 +951,14 @@ void StatelessToStateful::promote() {
922951
resAllocMD->uavsNumType += m_promotionMap.size();
923952
}
924953

925-
void StatelessToStateful::addToPromotionMap(Instruction &I, Value *Ptr) {
954+
void StatelessToStateful::addToPromotionMap(Instruction &I, Value *Ptr,
955+
std::optional<llvm::Align> OriginalInstructionAlignment = std::nullopt) {
926956
Value *offset = nullptr;
927957
unsigned baseArgNumber = 0;
928958

929-
bool isPromotable = m_promotionMap.size() < maxPromotionCount &&
930-
pointerIsPositiveOffsetFromKernelArgument(m_F, Ptr, offset, baseArgNumber, true);
959+
bool isPromotable =
960+
m_promotionMap.size() < maxPromotionCount &&
961+
pointerIsPositiveOffsetFromKernelArgument(m_F, Ptr, offset, baseArgNumber, true, OriginalInstructionAlignment);
931962

932963
if (isPromotable) {
933964
InstructionInfo II(&I, Ptr, offset);
@@ -1026,7 +1057,7 @@ void StatelessToStateful::visitCallInst(CallInst &I) {
10261057

10271058
void StatelessToStateful::visitLoadInst(LoadInst &I) {
10281059
Value *ptr = I.getPointerOperand();
1029-
addToPromotionMap(I, ptr);
1060+
addToPromotionMap(I, ptr, I.getAlign());
10301061

10311062
// check if there's non-kernel-arg load/store
10321063
if (IGC_IS_FLAG_ENABLED(DumpHasNonKernelArgLdSt) && ptr != nullptr && !pointerIsFromKernelArgument(*ptr)) {
@@ -1038,7 +1069,7 @@ void StatelessToStateful::visitLoadInst(LoadInst &I) {
10381069

10391070
void StatelessToStateful::visitStoreInst(StoreInst &I) {
10401071
Value *ptr = I.getPointerOperand();
1041-
addToPromotionMap(I, ptr);
1072+
addToPromotionMap(I, ptr, I.getAlign());
10421073

10431074
if (IGC_IS_FLAG_ENABLED(DumpHasNonKernelArgLdSt) && ptr != nullptr && !pointerIsFromKernelArgument(*ptr)) {
10441075
ModuleMetaData *modMD = getAnalysis<MetaDataUtilsWrapper>().getModuleMetaData();

IGC/Compiler/Optimizer/OpenCLPasses/StatelessToStateful/StatelessToStateful.hpp

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -82,7 +82,8 @@ class StatelessToStateful : public llvm::ModulePass, public llvm::InstVisitor<St
8282
bool getModuleUsesBindless();
8383

8484
void findPromotableInstructions();
85-
void addToPromotionMap(llvm::Instruction &I, llvm::Value *Ptr);
85+
void addToPromotionMap(llvm::Instruction &I, llvm::Value *Ptr,
86+
std::optional<llvm::Align> OriginalInstructionAlignment);
8687

8788
void promote();
8889
void promoteInstruction(InstructionInfo &InstInfo);
@@ -106,7 +107,8 @@ class StatelessToStateful : public llvm::ModulePass, public llvm::InstVisitor<St
106107
// ignoreSyncBuffer - when set to true, return false directly if V is from the implicit kernel
107108
// argument "sync buffer". sync buffer must be stateless access in ZEBinary path so cannot be promoted.
108109
bool pointerIsPositiveOffsetFromKernelArgument(llvm::Function *F, llvm::Value *V, llvm::Value *&offset,
109-
unsigned int &argNumber, bool ignoreSyncBuffer);
110+
unsigned int &argNumber, bool ignoreSyncBuffer,
111+
std::optional<llvm::Align> OriginalInstructionAlignment);
110112

111113
// Check if the given pointer value can be traced back to any kernel argument.
112114
// return the kernel argument if found, otherwise return nullptr.

IGC/Compiler/tests/StatelessToStateful/Bindful/buffer_image-ro-buffer.ll

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -25,7 +25,7 @@ target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f3
2525

2626
%spirv.Image._void_1_0_0_0_0_0_0 = type opaque
2727

28-
define spir_kernel void @test(i32 addrspace(1)* %srcA, %spirv.Image._void_1_0_0_0_0_0_0 addrspace(1)* %img, i32 addrspace(1)* %dst) {
28+
define spir_kernel void @test(i32 addrspace(1)* align 4 %srcA, %spirv.Image._void_1_0_0_0_0_0_0 addrspace(1)* %img, i32 addrspace(1)* align 4 %dst) {
2929
entry:
3030
; BTI for image has been assigned by OCLBIConverter
3131
%color = call <4 x i32> @llvm.genx.GenISA.ldptr.v4i32.p196608f32.p196608f32(i32 0, i32 0, i32 0, i32 0, float addrspace(196608)* undef, float addrspace(196608)* null, i32 0, i32 0, i32 0)

IGC/Compiler/tests/StatelessToStateful/Bindful/buffer_image-wo_buffer.ll

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -20,7 +20,7 @@ target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f3
2020

2121
%spirv.Image._void_1_0_0_0_0_0_1 = type opaque
2222

23-
define spir_kernel void @test(i32 addrspace(1)* %srcA, %spirv.Image._void_1_0_0_0_0_0_1 addrspace(1)* %img, i32 addrspace(1)* %dst) {
23+
define spir_kernel void @test(i32 addrspace(1)* align 4 %srcA, %spirv.Image._void_1_0_0_0_0_0_1 addrspace(1)* %img, i32 addrspace(1)* align 4 %dst) {
2424
entry:
2525
; BTI for image has been assigned by OCLBIConverter
2626
call void @llvm.genx.GenISA.typedwrite.p131072f32(float addrspace(131072)* null, i32 0, i32 0, i32 0, i32 0, float 0.000000e+00, float 0.000000e+00, float 0.000000e+00, float 0.000000e+00)

IGC/Compiler/tests/StatelessToStateful/Bindful/buffer_scalar_buffer.ll

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -18,7 +18,7 @@
1818
; CHECK: target datalayout = {{.*}}-p131072:32:32:32-p131073:32:32:32"
1919
target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v16:16:16-v24:32:32-v32:32:32-v48:64:64-v64:64:64-v96:128:128-v128:128:128-v192:256:256-v256:256:256-v512:512:512-v1024:1024:1024-n8:16:32"
2020

21-
define spir_kernel void @test(i32 addrspace(1)* %srcA, i32 %srcB, i32 addrspace(1)* %dst) {
21+
define spir_kernel void @test(i32 addrspace(1)* align 4 %srcA, i32 %srcB, i32 addrspace(1)* align 4 %dst) {
2222
entry:
2323
; CHECK: %[[VAL_A:[0-9]+]] = load i32, i32 addrspace(131072)* %{{.*}}, align 4
2424
%ptrA = getelementptr inbounds i32, i32 addrspace(1)* %srcA, i64 1

IGC/Compiler/tests/StatelessToStateful/Bindful/has_non_kernel_arg_LdSt.ll

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -19,7 +19,7 @@
1919
; CHECK: CheckModuleDebugify: PASS
2020

2121

22-
define spir_kernel void @func_b(i32 %n, i32 addrspace(1)* %r, <8 x i32> %r0, <8 x i32> %payloadHeader, i8* %privateBase, i8 addrspace(1)* %s2, i8 addrspace(1)* %s3, i32 %s4, i32 %s5, i32 %bufferOffset) #0 {
22+
define spir_kernel void @func_b(i32 %n, i32 addrspace(1)* align 4 %r, <8 x i32> %r0, <8 x i32> %payloadHeader, i8* %privateBase, i8 addrspace(1)* %s2, i8 addrspace(1)* %s3, i32 %s4, i32 %s5, i32 %bufferOffset) #0 {
2323
; CHECK-LABEL: @func_b(
2424
; CHECK-NEXT: entry:
2525
; CHECK-NEXT: [[TMP0:%.*]] = getelementptr i32, i32 addrspace(1)* [[R:%.*]], i32 16, !dbg [[DBG104:![0-9]+]]

IGC/Compiler/tests/StatelessToStateful/Bindful/hoist_loads_typed_pointers.ll

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -9,7 +9,7 @@
99

1010
; RUN: igc_opt --typed-pointers %s -S -o - -igc-stateless-to-stateful-resolution | FileCheck %s
1111

12-
define spir_kernel void @func_with_phinode_1(i1 %n, i32 addrspace(1)* %r, <4 x i32> addrspace(1)* %otp, i64 %idx1, i64 %idx2, <8 x i32> %r0, <8 x i32> %payloadHeader, i8* %privateBase, i8 addrspace(1)* %s2, i8 addrspace(1)* %s3, i32 %s4, i32 %s5, i32 %bufferOffset) #0 {
12+
define spir_kernel void @func_with_phinode_1(i1 %n, i32 addrspace(1)* align 4 %r, <4 x i32> addrspace(1)* %otp, i64 %idx1, i64 %idx2, <8 x i32> %r0, <8 x i32> %payloadHeader, i8* %privateBase, i8 addrspace(1)* %s2, i8 addrspace(1)* %s3, i32 %s4, i32 %s5, i32 %bufferOffset) #0 {
1313
bb1:
1414
%add.ptr1 = getelementptr inbounds i32, i32 addrspace(1)* %r, i64 16
1515
%add.ptr2 = getelementptr inbounds i32, i32 addrspace(1)* %r, i64 128

IGC/Compiler/tests/StatelessToStateful/Bindful/simd_block_read.ll

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -16,7 +16,7 @@
1616
; CHECK: target datalayout = {{.*}}-p131072:32:32:32-p131073:32:32:32"
1717
target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v16:16:16-v24:32:32-v32:32:32-v48:64:64-v64:64:64-v96:128:128-v128:128:128-v192:256:256-v256:256:256-v512:512:512-v1024:1024:1024-n8:16:32"
1818

19-
define spir_kernel void @test(i32 addrspace(1)* %in, i32 addrspace(1)* %out) {
19+
define spir_kernel void @test(i32 addrspace(1)* align 4 %in, i32 addrspace(1)* align 4 %out) {
2020
entry:
2121
%ptrIn = getelementptr inbounds i32, i32 addrspace(1)* %in, i64 1
2222
; CHECK: %[[V:[0-9]+]] = call i32 @llvm.genx.GenISA.simdBlockRead.i32.p131072i32(i32 addrspace(131072)* %{{.*}})

IGC/Compiler/tests/StatelessToStateful/Bindful/simd_block_write.ll

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -16,7 +16,7 @@
1616
; CHECK: target datalayout = {{.*}}-p131072:32:32:32-p131073:32:32:32"
1717
target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v16:16:16-v24:32:32-v32:32:32-v48:64:64-v64:64:64-v96:128:128-v128:128:128-v192:256:256-v256:256:256-v512:512:512-v1024:1024:1024-n8:16:32"
1818

19-
define spir_kernel void @test(i32 addrspace(1)* %in, i32 addrspace(1)* %out) {
19+
define spir_kernel void @test(i32 addrspace(1)* align 4 %in, i32 addrspace(1)* align 4 %out) {
2020
entry:
2121
%arrayidx = getelementptr inbounds i32, i32 addrspace(1)* %out, i64 1
2222
%arrayidx1 = getelementptr inbounds i32, i32 addrspace(1)* %in, i64 1

IGC/Compiler/tests/StatelessToStateful/Bindful/unused_buffer_argument.ll

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -20,7 +20,7 @@
2020
; CHECK: target datalayout = {{.*}}-p131072:32:32:32-p131073:32:32:32-p131074:32:32:32"
2121
target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v16:16:16-v24:32:32-v32:32:32-v48:64:64-v64:64:64-v96:128:128-v128:128:128-v192:256:256-v256:256:256-v512:512:512-v1024:1024:1024-n8:16:32"
2222

23-
define spir_kernel void @test(i32 addrspace(1)* %srcA, i32 addrspace(1)* %srcB, i32 addrspace(1)* %srcC, i32 addrspace(1)* %dst) {
23+
define spir_kernel void @test(i32 addrspace(1)* align 4 %srcA, i32 addrspace(1)* align 4 %srcB, i32 addrspace(1)* align 4 %srcC, i32 addrspace(1)* align 4 %dst) {
2424
entry:
2525
%ptrA = getelementptr inbounds i32, i32 addrspace(1)* %srcA, i64 1
2626
; CHECK: %[[VAL_A:[0-9]+]] = load i32, i32 addrspace(131072)* %{{.*}}, align 4

0 commit comments

Comments
 (0)