Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 0 additions & 1 deletion include/triton/Dialect/Triton/IR/TritonOpInterfaces.td
Original file line number Diff line number Diff line change
Expand Up @@ -69,7 +69,6 @@ def DotOpInterface : OpInterface<"DotOpInterface"> {
auto aTy = cast<ShapedType>($_op.getA().getType());
auto bTy = cast<ShapedType>($_op.getB().getType());
auto cTy = cast<ShapedType>($_op->getOperand(2).getType());
auto dTy = cast<ShapedType>($_op.getD().getType());
auto aShape = aTy.getShape();
auto bShape = bTy.getShape();
auto cShape = cTy.getShape();
Expand Down
1 change: 0 additions & 1 deletion lib/Conversion/TritonGPUToLLVM/ElementwiseOpToLLVM.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -599,7 +599,6 @@ struct MapElementwiseOpConversion
}

auto &scalarOp = op.getScalarOp();
Region &parent = *rewriter.getBlock()->getParent();

auto nOutputs = op.getNumResults();
SmallVector<Value> scalarOutputs(nOutputs * nElems);
Expand Down
1 change: 0 additions & 1 deletion lib/Dialect/Gluon/Transforms/ResolveAutoEncodings.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -50,7 +50,6 @@ class GluonResolveAutoEncodingsPass
using BaseT::BaseT;

void runOnOperation() override {
MLIRContext *context = &getContext();
ModuleOp m = getOperation();

// Do layout inference
Expand Down
2 changes: 1 addition & 1 deletion lib/Dialect/Triton/IR/Ops.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -702,7 +702,7 @@ static SmallVector<T> repeatInterleave(const SmallVectorImpl<T> &vs,
SmallVector<T> result;
result.reserve(vs.size() * nRepeat);
for (auto v : vs)
for (auto _ : llvm::seq(nRepeat))
for (int i = 0; i < nRepeat; ++i)
Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

:(

result.push_back(v);
return result;
}
Expand Down
3 changes: 0 additions & 3 deletions lib/Dialect/TritonGPU/IR/Dialect.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -4356,9 +4356,6 @@ bool triton::gpu::areLayoutsEquivalent(ArrayRef<int64_t> shape,
}

bool triton::gpu::isInnermostContiguous(MemDescType type, unsigned numElems) {
Attribute enc = type.getEncoding();
MLIRContext *ctx = enc.getContext();

LinearLayout actual = toLinearLayout(type);

// Flatten actual outs in reverse order to produce a row-major flattening
Expand Down
2 changes: 0 additions & 2 deletions lib/Dialect/TritonGPU/IR/LinearLayoutConversions.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1428,7 +1428,6 @@ LinearLayout chooseScaledWmmaScaleLayout(
MLIRContext *ctx, int dotOperandIdx, ArrayRef<int64_t> dotOperandShape,
unsigned wmmaMDim, unsigned wmmaNDim, bool isTransposed,
unsigned scaleFactor, LinearLayout ctaLayout, CGAEncodingAttr cgaLayout) {
using basisT = std::vector<std::vector<int32_t>>;
unsigned rank = dotOperandShape.size();
bool hasBatchDim = rank == 3;
auto outDimNames = standardOutDimNames(ctx, rank);
Expand Down Expand Up @@ -1568,7 +1567,6 @@ LinearLayout chooseScaledMfmaScaleLayout(MLIRContext *ctx, int dotOperandIdx,
unsigned mfmaMDim,
ArrayRef<unsigned> tilesPerWarp,
ArrayRef<unsigned> warpsPerCTA) {
using basisT = std::vector<std::vector<int32_t>>;
unsigned rank = dotOperandShape.size();
auto order = mlir::triton::gpu::getMatrixOrder(rank, /*rowMajor=*/true);
auto standardOutDims = standardOutDimNames(ctx, rank);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -81,7 +81,6 @@ class CombineTensorSelectAndIfPass
CombineTensorSelectAndIfPass> {
public:
void runOnOperation() override {
MLIRContext *context = &getContext();
ModuleOp m = getOperation();
canonicalizeSelectUsersInSCFIf(m);

Expand Down
3 changes: 0 additions & 3 deletions lib/Dialect/TritonGPU/Transforms/FuseNestedLoops.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -709,9 +709,6 @@ static void fuseOneLevel(LoopNestNode *parent, mlir::DominanceInfo &domInfo) {
Value curI = fused.getRegionIterArg(1);
Value i;

auto lenInnersIt =
ValueRange(fused.getRegionIterArgs()).begin() + lenInnersStartIdx;

ArrayRef<BlockArgument> ivars = fused.getRegionIterArgs().slice(ivarStartIdx);
auto bodyOutsIt =
ValueRange(fused.getRegionIterArgs()).begin() + innerOutsStartIdx;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,6 @@ class TMEMAllocWithUnusedInit

LogicalResult matchAndRewrite(triton::nvidia_gpu::TMEMAllocOp op,
PatternRewriter &rewriter) const override {
MLIRContext *ctx = op.getContext();
if (op.getSrc() == nullptr)
return failure();
SmallVector<Operation *> users(op.getResult().getUsers().begin(),
Expand Down
6 changes: 2 additions & 4 deletions lib/Dialect/TritonGPU/Transforms/OptimizeThreadLocality.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -308,8 +308,6 @@ class TritonGPUOptimizeThreadLocalityPass
auto srcEncoding = srcType.getEncoding();
assert(isa<triton::gpu::BlockedEncodingAttr>(srcEncoding) &&
"Thread locality optimization only supports blocked encoding");
auto elemsPerThread =
triton::gpu::getElemsPerThread(srcType)[reduce.getAxis()];
auto rank = srcShape.size();
// create new layouts
auto blocked3d = getThreadLocalityOptimizedEncoding(reduce);
Expand Down Expand Up @@ -354,8 +352,8 @@ class TritonGPUOptimizeThreadLocalityPass
// create new accum update
auto newUpdate = createUpdate(builder, newLoop, newReduce, oldUpdate);
// create new yield
auto newYield = createYield(builder, newLoop, oldYield,
newUpdate->getResult(0), blockArgNum);
createYield(builder, newLoop, oldYield, newUpdate->getResult(0),
blockArgNum);
// create post loop reduction on the original reduce axis
auto newReduce2 = createPostLoopReduce(builder, newLoop, reduce);
// add convert_layout to get back to original layout, the result layout
Expand Down
5 changes: 0 additions & 5 deletions lib/Dialect/TritonGPU/Transforms/Pipeliner/LowerLoops.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -209,8 +209,6 @@ void createTMAAsyncCopy(

Operation *firstUse = getFirstUseOfPipelinedOp({loadOp}, forOp, schedule);
assert(firstUse && "LoadOp has no users");
Attribute sharedMemorySpace =
ttg::SharedMemorySpaceAttr::get(forOp.getContext());

builder.setInsertionPoint(loadOp);
builder.setStageCluster(schedule[loadOp]);
Expand Down Expand Up @@ -957,9 +955,6 @@ void multibufferTensorMemory(scf::ForOp forOp, CoarseSchedule &schedule,

scf::ForOp lowerMMA(ttng::MMAv5OpInterface mma, scf::ForOp forOp,
CoarseSchedule &schedule) {
auto isLoadToBePipelined = [&](Operation *op) {
return schedule[mma].first > schedule[op].first;
};
Value alloc = mma.getAccumulator();

int mmaSelfLatency = getSelfLatencyFromAttr(mma.getOperation());
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -106,12 +106,10 @@ std::unique_ptr<Graph> buildGraph(Operation *region) {

// init iter args
{
size_t idx = 0;
for (auto operand : forOp.getInitArgs()) {
for (size_t idx = 0; idx < forOp.getInitArgs().size(); ++idx) {
auto iter_arg_node = node->getDefines()[idx + 1];
operands[std::make_pair(op, idx + 3)] =
InputPort(iter_arg_node, 0);
idx++;
}
}

Expand Down
66 changes: 27 additions & 39 deletions lib/Dialect/TritonInstrument/IR/FunctionBuilder.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -436,7 +436,7 @@ void FunctionBuilder::createSetWaitingCall(ImplicitLocOpBuilder &b, Value mbar,
createCallToCachedFunction(
b, "set_waiting", args,
/*assertInfo=*/std::nullopt, {barriersType, waitingType},
[barriersType, waitingType](ImplicitLocOpBuilder &fb, Block *entryBlock) {
[waitingType](ImplicitLocOpBuilder &fb, Block *entryBlock) {
Value mbarOffset = entryBlock->getArgument(0);
Value lengthVal = entryBlock->getArgument(1);
Value baseThread = entryBlock->getArgument(2);
Expand Down Expand Up @@ -537,7 +537,7 @@ void FunctionBuilder::createClearWaitingCall(ImplicitLocOpBuilder &b,
createCallToCachedFunction(
b, "clear_waiting", args,
/*assertInfo=*/std::nullopt, {barriersType, waitingType},
[barriersType, waitingType](ImplicitLocOpBuilder &fb, Block *entryBlock) {
[waitingType](ImplicitLocOpBuilder &fb, Block *entryBlock) {
Value mbarOffset = entryBlock->getArgument(0);
Value lengthVal = entryBlock->getArgument(1);
Value baseThread = entryBlock->getArgument(2);
Expand Down Expand Up @@ -785,8 +785,7 @@ void FunctionBuilder::createVerifyBarrierCanInitCall(ImplicitLocOpBuilder &b,
createCallToCachedFunction(
b, "verify_barrier_can_init", args, assertInfo,
{barriersType, barrierStatesType},
[barriersType, barrierStatesType](ImplicitLocOpBuilder &fb,
Block *entryBlock) {
[barrierStatesType](ImplicitLocOpBuilder &fb, Block *entryBlock) {
Value mbarOffset = entryBlock->getArgument(0);
Value lengthVal = entryBlock->getArgument(1);
Value pred = entryBlock->getArgument(2);
Expand Down Expand Up @@ -841,8 +840,7 @@ void FunctionBuilder::createVerifyBarrierInitializedCall(
createCallToCachedFunction(
b, "verify_barrier_initialized", args, assertInfo,
{barriersType, barrierStatesType},
[barriersType, barrierStatesType](ImplicitLocOpBuilder &fb,
Block *entryBlock) {
[barrierStatesType](ImplicitLocOpBuilder &fb, Block *entryBlock) {
Value mbarOffset = entryBlock->getArgument(0);
Value lengthVal = entryBlock->getArgument(1);
Value pred = entryBlock->getArgument(2);
Expand Down Expand Up @@ -899,8 +897,7 @@ void FunctionBuilder::createInitBarrierStateCall(ImplicitLocOpBuilder &b,
createCallToCachedFunction(
b, "init_barrier_state", args,
/*assertInfo=*/std::nullopt, {barriersType, barrierStatesType},
[barriersType, barrierStatesType](ImplicitLocOpBuilder &fb,
Block *entryBlock) {
[barrierStatesType](ImplicitLocOpBuilder &fb, Block *entryBlock) {
Value mbarOffset = entryBlock->getArgument(0);
Value lengthVal = entryBlock->getArgument(1);
Value count = entryBlock->getArgument(2);
Expand Down Expand Up @@ -975,8 +972,8 @@ void FunctionBuilder::createInvalidateBarrierStateCall(ImplicitLocOpBuilder &b,
b, "invalidate_barrier_state", args,
/*assertInfo=*/std::nullopt,
{barriersType, barrierStatesType, waitingType},
[barriersType, barrierStatesType, waitingType](ImplicitLocOpBuilder &fb,
Block *entryBlock) {
[barrierStatesType, waitingType](ImplicitLocOpBuilder &fb,
Block *entryBlock) {
Value mbarOffset = entryBlock->getArgument(0);
Value lengthVal = entryBlock->getArgument(1);
Value pred = entryBlock->getArgument(2);
Expand Down Expand Up @@ -1058,8 +1055,7 @@ void FunctionBuilder::createVerifyBarrierArriveCall(
createCallToCachedFunction(
b, "verify_barrier_arrive", args, assertInfo,
{barriersType, barrierStatesType},
[barriersType, barrierStatesType](ImplicitLocOpBuilder &fb,
Block *entryBlock) {
[barrierStatesType](ImplicitLocOpBuilder &fb, Block *entryBlock) {
Value mbarOffset = entryBlock->getArgument(0);
Value lengthVal = entryBlock->getArgument(1);
Value count = entryBlock->getArgument(2);
Expand Down Expand Up @@ -1176,8 +1172,7 @@ void FunctionBuilder::createUpdateBarrierStateCall(
createCallToCachedFunction(
b, "update_barrier_state", args,
/*assertInfo=*/std::nullopt, {barriersType, barrierStatesType},
[barriersType, barrierStatesType](ImplicitLocOpBuilder &fb,
Block *entryBlock) {
[barrierStatesType](ImplicitLocOpBuilder &fb, Block *entryBlock) {
Value mbarOffset = entryBlock->getArgument(0);
Value lengthVal = entryBlock->getArgument(1);
Value count = entryBlock->getArgument(2);
Expand Down Expand Up @@ -1307,8 +1302,7 @@ void FunctionBuilder::createSetWriteVisibilityCall(
b, "set_write_visibility", args,
/*assertInfo=*/std::nullopt,
{buffersType, writeVisibilityType, (uint64_t)memType},
[buffersType, writeVisibilityType](ImplicitLocOpBuilder &fb,
Block *entryBlock) {
[writeVisibilityType](ImplicitLocOpBuilder &fb, Block *entryBlock) {
Value bufOffset = entryBlock->getArgument(0);
Value lengthVal = entryBlock->getArgument(1);
Value pred = entryBlock->getArgument(2);
Expand Down Expand Up @@ -1368,8 +1362,7 @@ void FunctionBuilder::createSetReadVisibilityCall(
b, "set_read_visibility", args,
/*assertInfo=*/std::nullopt,
{buffersType, readVisibilityType, (uint64_t)memType},
[buffersType, readVisibilityType](ImplicitLocOpBuilder &fb,
Block *entryBlock) {
[readVisibilityType](ImplicitLocOpBuilder &fb, Block *entryBlock) {
Value bufOffset = entryBlock->getArgument(0);
Value lengthVal = entryBlock->getArgument(1);
Value pred = entryBlock->getArgument(2);
Expand Down Expand Up @@ -1434,8 +1427,7 @@ void FunctionBuilder::createClearWriteTrackingCall(ImplicitLocOpBuilder &b,
b, "clear_write_tracking", args,
/*assertInfo=*/std::nullopt,
{buffersType, writeTrackingType, (uint64_t)memType},
[buffersType, writeTrackingType](ImplicitLocOpBuilder &fb,
Block *entryBlock) {
[writeTrackingType](ImplicitLocOpBuilder &fb, Block *entryBlock) {
Value bufOffset = entryBlock->getArgument(0);
Value lengthVal = entryBlock->getArgument(1);
Value pred = entryBlock->getArgument(2);
Expand Down Expand Up @@ -1491,8 +1483,7 @@ void FunctionBuilder::createClearReadVisibilityCall(ImplicitLocOpBuilder &b,
b, "clear_read_visibility", args,
/*assertInfo=*/std::nullopt,
{buffersType, readVisibilityType, (uint64_t)memType},
[buffersType, readVisibilityType](ImplicitLocOpBuilder &fb,
Block *entryBlock) {
[readVisibilityType](ImplicitLocOpBuilder &fb, Block *entryBlock) {
Value bufOffset = entryBlock->getArgument(0);
Value lengthVal = entryBlock->getArgument(1);
Value pred = entryBlock->getArgument(2);
Expand Down Expand Up @@ -1549,8 +1540,7 @@ void FunctionBuilder::createClearReadTrackingCall(ImplicitLocOpBuilder &b,
b, "clear_read_tracking", args,
/*assertInfo=*/std::nullopt,
{buffersType, readTrackingType, (uint64_t)memType},
[buffersType, readTrackingType](ImplicitLocOpBuilder &fb,
Block *entryBlock) {
[readTrackingType](ImplicitLocOpBuilder &fb, Block *entryBlock) {
Value bufOffset = entryBlock->getArgument(0);
Value lengthVal = entryBlock->getArgument(1);
Value pred = entryBlock->getArgument(2);
Expand Down Expand Up @@ -1614,8 +1604,8 @@ void FunctionBuilder::createTrackVisibleWritesCall(ImplicitLocOpBuilder &b,
b, "track_visible_writes", args,
/*assertInfo=*/std::nullopt,
{barriersType, writeVisibilityType, writeTrackingType, (uint64_t)memType},
[barriersType, writeVisibilityType,
writeTrackingType](ImplicitLocOpBuilder &fb, Block *entryBlock) {
[writeVisibilityType, writeTrackingType](ImplicitLocOpBuilder &fb,
Block *entryBlock) {
Value mbarOffset = entryBlock->getArgument(0);
Value lengthVal = entryBlock->getArgument(1);
Value pred = entryBlock->getArgument(2);
Expand Down Expand Up @@ -1699,8 +1689,8 @@ void FunctionBuilder::createTrackVisibleReadsCall(ImplicitLocOpBuilder &b,
b, "track_visible_reads", args,
/*assertInfo=*/std::nullopt,
{barriersType, readVisibilityType, readTrackingType, (uint64_t)memType},
[barriersType, readVisibilityType,
readTrackingType](ImplicitLocOpBuilder &fb, Block *entryBlock) {
[readVisibilityType, readTrackingType](ImplicitLocOpBuilder &fb,
Block *entryBlock) {
Value mbarOffset = entryBlock->getArgument(0);
Value lengthVal = entryBlock->getArgument(1);
Value pred = entryBlock->getArgument(2);
Expand Down Expand Up @@ -1791,7 +1781,7 @@ void FunctionBuilder::createTrackBarrierWriteForBufferCall(
/*assertInfo=*/std::nullopt,
{barriersType, buffersType, writeTrackingType, barrierWriteRecipientsType,
(uint64_t)memType, (uint64_t)diagonalEffectRecipientCTAs},
[barriersType, buffersType, writeTrackingType, barrierWriteRecipientsType,
[writeTrackingType, barrierWriteRecipientsType,
diagonalEffectRecipientCTAs](ImplicitLocOpBuilder &fb,
Block *entryBlock) {
Value mbarOffset = entryBlock->getArgument(0);
Expand Down Expand Up @@ -1909,8 +1899,8 @@ void FunctionBuilder::createClearBarrierWriteTrackingCall(
/*assertInfo=*/std::nullopt,
{barriersType, writeTrackingType, barrierWriteRecipientsType,
(uint64_t)memType},
[barriersType, writeTrackingType, barrierWriteRecipientsType](
ImplicitLocOpBuilder &fb, Block *entryBlock) {
[writeTrackingType, barrierWriteRecipientsType](ImplicitLocOpBuilder &fb,
Block *entryBlock) {
Value mbarOffset = entryBlock->getArgument(0);
Value lengthVal = entryBlock->getArgument(1);
Value pred = entryBlock->getArgument(2);
Expand Down Expand Up @@ -1989,8 +1979,7 @@ void FunctionBuilder::createClearBarrierReadTrackingCall(
b, "clear_barrier_read_tracking", args,
/*assertInfo=*/std::nullopt,
{barriersType, readTrackingType, (uint64_t)memType},
[barriersType, readTrackingType](ImplicitLocOpBuilder &fb,
Block *entryBlock) {
[readTrackingType](ImplicitLocOpBuilder &fb, Block *entryBlock) {
Value mbarOffset = entryBlock->getArgument(0);
Value lengthVal = entryBlock->getArgument(1);
Value pred = entryBlock->getArgument(2);
Expand Down Expand Up @@ -2063,9 +2052,8 @@ void FunctionBuilder::createTransferVisibleWritesCall(
/*assertInfo=*/std::nullopt,
{barriersType, writeVisibilityType, writeTrackingType,
barrierWriteRecipientsType, (uint64_t)memType},
[barriersType, writeVisibilityType, writeTrackingType,
barrierWriteRecipientsType](ImplicitLocOpBuilder &fb,
Block *entryBlock) {
[writeVisibilityType, writeTrackingType, barrierWriteRecipientsType](
ImplicitLocOpBuilder &fb, Block *entryBlock) {
Value mbarOffset = entryBlock->getArgument(0);
Value lengthVal = entryBlock->getArgument(1);
Value pred = entryBlock->getArgument(2);
Expand Down Expand Up @@ -2170,8 +2158,8 @@ void FunctionBuilder::createTransferVisibleReadsCall(
b, "transfer_visible_reads", args,
/*assertInfo=*/std::nullopt,
{barriersType, readVisibilityType, readTrackingType, (uint64_t)memType},
[barriersType, readVisibilityType,
readTrackingType](ImplicitLocOpBuilder &fb, Block *entryBlock) {
[readVisibilityType, readTrackingType](ImplicitLocOpBuilder &fb,
Block *entryBlock) {
Value mbarOffset = entryBlock->getArgument(0);
Value lengthVal = entryBlock->getArgument(1);
Value pred = entryBlock->getArgument(2);
Expand Down Expand Up @@ -2627,7 +2615,7 @@ void FunctionBuilder::createStageAccessForCommitCall(
createCallToCachedFunction(
b, "stage_access_for_commit", args,
/*assertInfo=*/std::nullopt, {buffersType, commitsType},
[buffersType, commitsType](ImplicitLocOpBuilder &fb, Block *entryBlock) {
[commitsType](ImplicitLocOpBuilder &fb, Block *entryBlock) {
Value bufOffset = entryBlock->getArgument(0);
Value lengthVal = entryBlock->getArgument(1);
Value pred = entryBlock->getArgument(2);
Expand Down
3 changes: 1 addition & 2 deletions lib/Dialect/TritonNvidiaGPU/Transforms/InterleaveTMem.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -71,7 +71,7 @@ findBufferAccessMemdescSubview(Operation *subview) {
src = indexOp.getSrc();
shape = to_vector(indexOp.getType().getShape());
offsets = {indexOp.getIndex()};
for (auto i : llvm::seq(std::max<int>(0, shape.size() - 1)))
for (int i = 0, e = std::max<int>(0, shape.size() - 1); i < e; ++i)
Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Definitely worse imo. You could add [[maybe_unused]] to the i declaration if you want the error to pass cleanly.

Copy link
Copy Markdown
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I apologise, I didn't realize people would have a particular preference. I've generally leaned away from using llvm::seq because it's a less common pattern and can be slightly less efficient (e.g. https://godbolt.org/z/6sEhaP7c6).

That said, neither of those is a particularly compelling reason, so if you'd like I can put up a PR to change these back.

offsets.push_back(arith::ConstantIntOp::create(builder, loc, 0, 32));
} else {
auto subsliceOp = cast<ttg::MemDescSubsliceOp>(subview);
Expand Down Expand Up @@ -261,7 +261,6 @@ struct TritonNvidiaGPUInterleaveTMemPass
TritonNvidiaGPUInterleaveTMemPass>::TritonNvidiaGPUInterleaveTMemPassBase;

void runOnOperation() override {
MLIRContext *context = &getContext();
ModuleOp m = getOperation();
SmallVector<std::pair<Operation *, Value>> opsToSink;
m.walk([&](Operation *op) {
Expand Down
1 change: 0 additions & 1 deletion lib/Dialect/TritonNvidiaGPU/Transforms/MMALowering.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -82,7 +82,6 @@ struct TCGen5MMAScaleSharedToTmemConversion

LogicalResult matchAndRewrite(TCGen5MMAScaledOp op,
PatternRewriter &rewriter) const override {
MLIRContext *context = op->getContext();
auto aScaleType = op.getAScale().getType();
auto bScaleType = op.getBScale().getType();
if (aScaleType.getShape() != aScaleType.getAllocShape() ||
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -102,7 +102,6 @@ class TritonNvidiaGPUOptimizeDescriptorEncodingPass
using BaseT::BaseT;

void runOnOperation() override {
MLIRContext *context = &getContext();
ModuleOp m = getOperation();
NvidiaGPUAssignDescriptorMemoryLayouts assignMemoryLayouts;
assignMemoryLayouts.assignMemoryLayouts(m);
Expand Down
Loading
Loading