Skip to content

Commit

Permalink
Translate genx block2d VC intrinsics into internal ones
Browse files Browse the repository at this point in the history
.
  • Loading branch information
vmustya authored and igcbot committed Jul 16, 2024
1 parent f58188f commit 3ad9e4a
Show file tree
Hide file tree
Showing 2 changed files with 96 additions and 11 deletions.
55 changes: 44 additions & 11 deletions IGC/VectorCompiler/lib/GenXOpts/CMTrans/GenXTranslateIntrinsics.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -57,6 +57,7 @@ class GenXTranslateIntrinsics final
Value *translateLscAtomic(CallInst &I) const;
Value *translateLscLoadStore(CallInst &I) const;
Value *translateLscLoadStoreBlock2D(CallInst &I) const;
Value *translateLscLoadStore2DDesc(CallInst &I) const;
Value *translateLscTyped(CallInst &I) const;
};
} // namespace
Expand Down Expand Up @@ -149,6 +150,13 @@ void GenXTranslateIntrinsics::visitCallInst(CallInst &I) const {
case GenXIntrinsic::genx_lsc_store2d_stateless:
NewI = translateLscLoadStoreBlock2D(I);
break;
case GenXIntrinsic::genx_lsc_load_2d_ugm_desc:
case GenXIntrinsic::genx_lsc_load_2d_ugm_desc_transpose:
case GenXIntrinsic::genx_lsc_load_2d_ugm_desc_vnni:
case GenXIntrinsic::genx_lsc_prefetch_2d_ugm_desc:
case GenXIntrinsic::genx_lsc_store_2d_ugm_desc:
NewI = translateLscLoadStore2DDesc(I);
break;
case GenXIntrinsic::genx_lsc_load_merge_quad_typed_bti:
case GenXIntrinsic::genx_lsc_prefetch_quad_typed_bti:
case GenXIntrinsic::genx_lsc_store_quad_typed_bti:
Expand Down Expand Up @@ -581,6 +589,41 @@ GenXTranslateIntrinsics::translateLscLoadStoreBlock2D(CallInst &I) const {
return NewI;
}

Value *GenXTranslateIntrinsics::translateLscLoadStore2DDesc(CallInst &I) const {
auto IID = GenXIntrinsic::getGenXIntrinsicID(&I);
LLVM_DEBUG(dbgs() << "Translate: " << I << "\n");
IRBuilder<> Builder(&I);
Module *M = I.getModule();

auto NewIID = vc::InternalIntrinsic::not_any_intrinsic;
switch (IID) {
default:
IGC_ASSERT_UNREACHABLE();
case GenXIntrinsic::genx_lsc_load_2d_ugm_desc:
NewIID = vc::InternalIntrinsic::lsc_load_2d_ugm_desc;
break;
case GenXIntrinsic::genx_lsc_load_2d_ugm_desc_transpose:
NewIID = vc::InternalIntrinsic::lsc_load_2d_ugm_desc_transpose;
break;
case GenXIntrinsic::genx_lsc_load_2d_ugm_desc_vnni:
NewIID = vc::InternalIntrinsic::lsc_load_2d_ugm_desc_vnni;
break;
case GenXIntrinsic::genx_lsc_prefetch_2d_ugm_desc:
NewIID = vc::InternalIntrinsic::lsc_prefetch_2d_ugm_desc;
break;
case GenXIntrinsic::genx_lsc_store_2d_ugm_desc:
NewIID = vc::InternalIntrinsic::lsc_store_2d_ugm_desc;
break;
}

SmallVector<Value *, 10> Args(I.args());
auto *NewF = vc::getAnyDeclarationForArgs(M, NewIID, I.getType(), Args);
auto *NewI = Builder.CreateCall(NewF, Args);
LLVM_DEBUG(dbgs() << "New intrinsic generated: " << *NewI);

return NewI;
}

Value *GenXTranslateIntrinsics::translateLscTyped(CallInst &I) const {
auto IID = GenXIntrinsic::getGenXIntrinsicID(&I);
LLVM_DEBUG(dbgs() << "Translate: " << I << "\n");
Expand All @@ -603,18 +646,8 @@ Value *GenXTranslateIntrinsics::translateLscTyped(CallInst &I) const {
break;
}

SmallVector<Type *, 3> Types;

if (vc::InternalIntrinsic::isOverloadedRet(NewIID))
Types.push_back(I.getType());

for (unsigned Idx = 0; Idx < I.arg_size(); Idx++)
if (vc::InternalIntrinsic::isOverloadedArg(NewIID, Idx))
Types.push_back(I.getArgOperand(Idx)->getType());

auto *Func = vc::InternalIntrinsic::getInternalDeclaration(M, NewIID, Types);

SmallVector<Value *, 10> Args(I.args());
auto *Func = vc::getAnyDeclarationForArgs(M, NewIID, I.getType(), Args);
auto *NewI = Builder.CreateCall(Func, Args);
LLVM_DEBUG(dbgs() << "New intrinsic generated: " << *NewI);

Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,52 @@
;=========================== begin_copyright_notice ============================
;
; Copyright (C) 2024 Intel Corporation
;
; SPDX-License-Identifier: MIT
;
;============================ end_copyright_notice =============================

; RUN: %opt %use_old_pass_manager% -GenXTranslateIntrinsics -mcpu=XeHPC -mtriple=spir64-unknown-unknown -S < %s | FileCheck %s

declare <16 x i32> @llvm.genx.lsc.load.2d.ugm.desc.v16i32.v2i8(i1, <2 x i8>, i8, i16, i16, <16 x i32>, i32, i32, <16 x i32>)
declare <32 x i16> @llvm.genx.lsc.load.2d.ugm.desc.v32i16.v2i8(i1, <2 x i8>, i8, i16, i16, <16 x i32>, i32, i32, <32 x i16>)
declare <16 x i32> @llvm.genx.lsc.load.2d.ugm.desc.transpose.v16i32.v2i8(i1, <2 x i8>, i8, i16, i16, <16 x i32>, i32, i32, <16 x i32>)
declare <64 x i8> @llvm.genx.lsc.load.2d.ugm.desc.vnni.v64i8.v2i8(i1, <2 x i8>, i8, i16, i16, <16 x i32>, i32, i32, <64 x i8>)

declare void @llvm.genx.lsc.prefetch.2d.ugm.desc.v2i8.i64(i1, <2 x i8>, i8, i16, i16, <16 x i32>, i32, i32, i64)

declare void @llvm.genx.lsc.store.2d.ugm.desc.v2i8.v16i32(i1, <2 x i8>, i8, i16, i16, <16 x i32>, i32, i32, <16 x i32>)

define void @test(i64 %base, i32 %width, i32 %height, i32 %pitch, i32 %x, i32 %y) {
%vbase = bitcast i64 %base to <2 x i32>
%1 = shufflevector <2 x i32> %vbase, <2 x i32> undef, <16 x i32> <i32 0, i32 1, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2>
%2 = insertelement <16 x i32> %1, i32 %width, i32 2
%3 = insertelement <16 x i32> %2, i32 %height, i32 3
%4 = insertelement <16 x i32> %3, i32 %pitch, i32 4
%5 = insertelement <16 x i32> %4, i32 %x, i32 5
%desc = insertelement <16 x i32> %5, i32 %y, i32 6

%desc.1 = insertelement <16 x i32> %desc, i32 263, i32 7 ; 8x2
; CHECK: %load = call <16 x i32> @llvm.vc.internal.lsc.load.2d.ugm.desc.v16i32.v2i8(i1 true, <2 x i8> <i8 1, i8 2>, i8 1, i16 8, i16 2, <16 x i32> %desc.1, i32 0, i32 0, <16 x i32> undef)
%load = call <16 x i32> @llvm.genx.lsc.load.2d.ugm.desc.v16i32.v2i8(i1 true, <2 x i8> <i8 1, i8 2>, i8 1, i16 8, i16 2, <16 x i32> %desc.1, i32 0, i32 0, <16 x i32> undef)

%desc.2 = insertelement <16 x i32> %desc.1, i32 65799, i32 7 ; 2x8x2
; CHECK: %load.a2 = call <32 x i16> @llvm.vc.internal.lsc.load.2d.ugm.desc.v32i16.v2i8(i1 true, <2 x i8> <i8 2, i8 1>, i8 2, i16 8, i16 2, <16 x i32> %desc.2, i32 0, i32 0, <32 x i16> undef)
%load.a2 = call <32 x i16> @llvm.genx.lsc.load.2d.ugm.desc.v32i16.v2i8(i1 true, <2 x i8> <i8 2, i8 1>, i8 2, i16 8, i16 2, <16 x i32> %desc.2, i32 0, i32 0, <32 x i16> undef)

%desc.3 = insertelement <16 x i32> %desc.2, i32 1793, i32 7 ; 1x2x8
; CHECK: %load.t = call <16 x i32> @llvm.vc.internal.lsc.load.2d.ugm.desc.transpose.v16i32.v2i8(i1 true, <2 x i8> <i8 5, i8 1>, i8 1, i16 2, i16 8, <16 x i32> %desc.3, i32 0, i32 0, <16 x i32> undef)
%load.t = call <16 x i32> @llvm.genx.lsc.load.2d.ugm.desc.transpose.v16i32.v2i8(i1 true, <2 x i8> <i8 5, i8 1>, i8 1, i16 2, i16 8, <16 x i32> %desc.3, i32 0, i32 0, <16 x i32> undef)

%desc.4 = insertelement <16 x i32> %desc.3, i32 3843, i32 7 ; 1x4x16
; CHECK: %load.v = call <64 x i8> @llvm.vc.internal.lsc.load.2d.ugm.desc.vnni.v64i8.v2i8(i1 true, <2 x i8> <i8 5, i8 2>, i8 1, i16 4, i16 16, <16 x i32> %desc.4, i32 0, i32 0, <64 x i8> undef)
%load.v = call <64 x i8> @llvm.genx.lsc.load.2d.ugm.desc.vnni.v64i8.v2i8(i1 true, <2 x i8> <i8 5, i8 2>, i8 1, i16 4, i16 16, <16 x i32> %desc.4, i32 0, i32 0, <64 x i8> undef)

%desc.5 = insertelement <16 x i32> %desc.4, i32 263, i32 7 ; 1x8x2
; CHECK: call void @llvm.vc.internal.lsc.prefetch.2d.ugm.desc.v2i8.i64(i1 true, <2 x i8> <i8 1, i8 2>, i8 1, i16 8, i16 2, <16 x i32> %desc.5, i32 0, i32 0, i64 undef)
call void @llvm.genx.lsc.prefetch.2d.ugm.desc.v2i8.i64(i1 true, <2 x i8> <i8 1, i8 2>, i8 1, i16 8, i16 2, <16 x i32> %desc.5, i32 0, i32 0, i64 undef)

; CHECK: call void @llvm.vc.internal.lsc.store.2d.ugm.desc.v2i8.v16i32(i1 true, <2 x i8> <i8 4, i8 3>, i8 1, i16 8, i16 2, <16 x i32> %desc.5, i32 0, i32 0, <16 x i32> %load)
call void @llvm.genx.lsc.store.2d.ugm.desc.v2i8.v16i32(i1 true, <2 x i8> <i8 4, i8 3>, i8 1, i16 8, i16 2, <16 x i32> %desc.5, i32 0, i32 0, <16 x i32> %load)
ret void
}

0 comments on commit 3ad9e4a

Please sign in to comment.