Skip to content

Commit

Permalink
Pass to convert 2D block intrinsics into constituents.
Browse files Browse the repository at this point in the history
The 2D block IO intrinsics are opaque to the compiler and identical
payloads cannot be optimized by the compiler. Intrinsics have been
introduced to overcome this problem by allowing the 2D intrinsics to be
broken up into constituent stages (payload, set X and Y relative to
payload, IO operation).

This PR adds a new pass, `Decompose2DBlockFuncs`, which converts the 2D
IO intrinsics into the constituents, allowing subequent passes to
optimize the code. At the current stage, the payload intrinsics can be
hoisted above loops by LICM, and can be merged by EarlyCSA and others.

The `LSC2DBlockSetAddrPayloadField` intrinsics have a mode in which the
IO location in memory is specified exactly (relative to the payload),
and another mode in which the intrinsic increments the value. When a
payload is used by the `LSC2DBlockSetAddrPayloadField` intrinsic in the
accumulator mode, the payload cannot be hoisted. Because of this, I have
given the `LSC2DBlockCreateAddrPayload` default attributes of
`writeonly`, which is conservative for when the SetAddr intrinsic is in
accumulator mode.

The `LSC2DBlockSetAddrPayloadField` intrinsic added by the
`Decompose2DBlockFuncs` is always in the mode in which it explicitly
specifies the memory location. Since we have this guarantee, the related
payload is described as not accessing memory, since the dependencies are
all properly accounted for by the data dependency; this allows the
desired optimizations to function.

The 2D block intrinsics are not decomposed when they do not fall within
a loop, or if the payload would have loop-dependent parameters.
  • Loading branch information
aaronkintel authored and igcbot committed Jul 17, 2024
1 parent 71165b1 commit 82a6156
Show file tree
Hide file tree
Showing 15 changed files with 726 additions and 4 deletions.
7 changes: 7 additions & 0 deletions IGC/AdaptorOCL/UnifyIROCL.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -105,6 +105,7 @@ SPDX-License-Identifier: MIT
#include "Compiler/Optimizer/OpenCLPasses/BfloatFuncs/BfloatFuncsResolution.hpp"
#include "Compiler/Optimizer/OpenCLPasses/DpasFuncs/DpasFuncsResolution.hpp"
#include "Compiler/Optimizer/OpenCLPasses/LSCFuncs/LSCFuncsResolution.hpp"
#include "Compiler/Optimizer/OpenCLPasses/Decompose2DBlockFuncs/Decompose2DBlockFuncs.hpp"
#include "Compiler/Optimizer/OpenCLPasses/NamedBarriers/NamedBarriersResolution.hpp"
#include "Compiler/Optimizer/OpenCLPasses/JointMatrixFuncsResolutionPass/JointMatrixFuncsResolutionPass.h"
#include "Compiler/Optimizer/OpenCLPasses/RayTracing/ResolveOCLRaytracingBuiltins.hpp"
Expand Down Expand Up @@ -583,6 +584,12 @@ static void CommonOCLBasedPasses(OpenCLProgramContext* pContext)
// Break down the intrinsics into smaller operations (eg. fmuladd to fmul add)
mpm.add(new BreakdownIntrinsicPass());

// Break down 2D block intrinsics. Should be before a call to LICM. Mostly
// useful when LICM is enabled, so we will consider only that case
if (IGC_IS_FLAG_ENABLED(allowLICM)) {
mpm.add(createDecompose2DBlockFuncsPass());
}

{
if(IGC_IS_FLAG_ENABLED(EnableConstantPromotion))
{
Expand Down
1 change: 1 addition & 0 deletions IGC/Compiler/CISACodeGen/EmitVISAPass.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@ SPDX-License-Identifier: MIT
#include "Compiler/Optimizer/OpenCLPasses/NamedBarriers/NamedBarriersResolution.hpp"
#include "Compiler/Optimizer/OpenCLPasses/StackOverflowDetection/StackOverflowDetection.hpp"
#include "Compiler/Optimizer/OpenCLPasses/LSCFuncs/LSCFuncsResolution.hpp"
#include "Compiler/Optimizer/OpenCLPasses/Decompose2DBlockFuncs/Decompose2DBlockFuncs.hpp"
#include "Compiler/CISACodeGen/GenerateFrequencyData.hpp"
#include "AdaptorCommon/RayTracing/RTStackFormat.h"
#include "DeSSA.hpp"
Expand Down
1 change: 1 addition & 0 deletions IGC/Compiler/InitializePasses.h
Original file line number Diff line number Diff line change
Expand Up @@ -156,6 +156,7 @@ void initializeTransformUnmaskedFunctionsPassPass(llvm::PassRegistry&);
void initializeIndirectCallOptimizationPass(llvm::PassRegistry&);
void initializePromoteInt8TypePass(llvm::PassRegistry&);
void initializeDpasFuncsResolutionPass(llvm::PassRegistry&);
void initializeDecompose2DBlockFuncsPass(llvm::PassRegistry&);
void initializeLSCFuncsResolutionPass(llvm::PassRegistry&);
void initializeConvertMSAAPayloadTo16BitPass(llvm::PassRegistry&);
void initializeInterfaceOptimizationPass(llvm::PassRegistry&);
Expand Down
4 changes: 4 additions & 0 deletions IGC/Compiler/Optimizer/OpenCLPasses/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -33,6 +33,7 @@ add_subdirectory(KernelArgs)
add_subdirectory(KernelFunctionCloning)
add_subdirectory(BufferBoundsChecking)
add_subdirectory(LSCFuncs)
add_subdirectory(Decompose2DBlockFuncs)
add_subdirectory(LocalBuffers)
add_subdirectory(NamedBarriers)
add_subdirectory(NontemporalLoadsAndStoresInAssert)
Expand Down Expand Up @@ -66,6 +67,7 @@ set(IGC_BUILD__SRC__Optimizer_OpenCLPasses_All
${IGC_BUILD__SRC__OpenCLPasses_BreakConstantExpr}
${IGC_BUILD__SRC__OpenCLPasses_BreakdownIntrinsic}
${IGC_BUILD__SRC__OpenCLPasses_CorrectlyRoundedDivSqrt}
${IGC_BUILD__SRC__OpenCLPasses_Decompose2DBlockFuncs}
${IGC_BUILD__SRC__OpenCLPasses_DeviceEnqueueFuncs}
${IGC_BUILD__SRC__OpenCLPasses_DisableLoopUnrollOnRetry}
${IGC_BUILD__SRC__OpenCLPasses_DpasFuncs}
Expand Down Expand Up @@ -137,6 +139,7 @@ set(IGC_BUILD__HDR__Optimizer_OpenCLPasses_All
${IGC_BUILD__HDR__OpenCLPasses_KernelFunctionCloning}
${IGC_BUILD__HDR__OpenCLPasses_BufferBoundsChecking}
${IGC_BUILD__HDR__OpenCLPasses_LSCFuncs}
${IGC_BUILD__HDR__OpenCLPasses_IOBlock2DFuncs}
${IGC_BUILD__HDR__OpenCLPasses_LocalBuffers}
${IGC_BUILD__HDR__OpenCLPasses_NamedBarriers}
${IGC_BUILD__HDR__OpenCLPasses_NontemporalLoadsAndStoresInAssert}
Expand Down Expand Up @@ -192,6 +195,7 @@ set(IGC_BUILD_Compiler_OpenCLPasses_Groups
Compiler__OpenCLPasses_KernelFunctionCloning
Compiler__OpenCLPasses_BufferBoundsChecking
Compiler__OpenCLPasses_LSCFuncs
Compiler__OpenCLPasses_IOBlock2DFuncs
Compiler__OpenCLPasses_LocalBuffers
Compiler__OpenCLPasses_NamedBarriers
Compiler__OpenCLPasses_NontemporalLoadsAndStoresInAssert
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,29 @@
#=========================== begin_copyright_notice ============================
#
# Copyright (C) 2019-2021 Intel Corporation
#
# SPDX-License-Identifier: MIT
#
#============================ end_copyright_notice =============================

include_directories("${CMAKE_CURRENT_SOURCE_DIR}")


set(IGC_BUILD__SRC__Decompose2DBlockFuncs
"${CMAKE_CURRENT_SOURCE_DIR}/Decompose2DBlockFuncs.cpp"
)
set(IGC_BUILD__SRC__OpenCLPasses_Decompose2DBlockFuncs ${IGC_BUILD__SRC__Decompose2DBlockFuncs} PARENT_SCOPE)

set(IGC_BUILD__HDR__Decompose2DBlockFuncs
"${CMAKE_CURRENT_SOURCE_DIR}/Decompose2DBlockFuncs.hpp"
)
set(IGC_BUILD__HDR__OpenCLPasses_Decompose2DBlockFuncs ${IGC_BUILD__HDR__Decompose2DBlockFuncs} PARENT_SCOPE)


igc_sg_register(
Compiler__OpenCLPasses_Decompose2DBlockFuncs
"Decompose2DBlockFuncs"
FILES
${IGC_BUILD__SRC__Decompose2DBlockFuncs}
${IGC_BUILD__HDR__Decompose2DBlockFuncs}
)
Loading

0 comments on commit 82a6156

Please sign in to comment.