Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion cmake/llvm-hash.txt
Original file line number Diff line number Diff line change
@@ -1 +1 @@
b5cc222d7429fe6f18c787f633d5262fac2e676f
b74e588e1f460eb48ceb1a30cf8ac870b7537dcc
11 changes: 5 additions & 6 deletions lib/Conversion/TritonToTritonGPU/TritonGPUConversion.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -56,20 +56,19 @@ TritonGPUTypeConverter::TritonGPUTypeConverter(MLIRContext *context,
// This will create newArg, and map(origArg, newArg)
addArgumentMaterialization([&](OpBuilder &builder,
RankedTensorType tensorType, ValueRange inputs,
Location loc) -> std::optional<Value> {
Location loc) -> Value {
llvm_unreachable("Argument rematerialization should not happen in Triton "
"-> TritonGPU conversion");
return std::nullopt;
return {};
});

// If the origValue still has live user(s), use this to
// convert origValue to newValue
addSourceMaterialization([&](OpBuilder &builder, RankedTensorType tensorType,
ValueRange inputs,
Location loc) -> std::optional<Value> {
ValueRange inputs, Location loc) -> Value {
llvm_unreachable("Source rematerialization should not happen in Triton -> "
"TritonGPU Conversion");
return std::nullopt;
return {};
});

// This will be called when (desiredType != newOperandType)
Expand All @@ -79,7 +78,7 @@ TritonGPUTypeConverter::TritonGPUTypeConverter(MLIRContext *context,
ValueRange inputs, Location loc) {
auto cast =
builder.create<triton::gpu::ConvertLayoutOp>(loc, tensorType, inputs);
return std::optional<Value>(cast.getResult());
return cast.getResult();
});
}

Expand Down
2 changes: 1 addition & 1 deletion test/TritonGPU/amd/amd-convert-buffer-ops.mlir
Original file line number Diff line number Diff line change
Expand Up @@ -42,7 +42,7 @@ module attributes {"triton_gpu.num-ctas" = 1 : i32, "triton_gpu.num-warps" = 4 :
%1 = arith.muli %0, %c1024_i32 : i32
%sub = arith.subi %1, %c128_i32 : i32
%cmp = arith.cmpi sgt, %sub, %c0_i32 : i32
"llvm.intr.assume"(%cmp) : (i1) -> ()
llvm.intr.assume %cmp : i1
%2 = tt.splat %sub : i32 -> tensor<1024xi32, #blocked>
%3 = tt.make_range {end = 1024 : i32, start = 0 : i32} : tensor<1024xi32, #blocked>
// CHECK: %[[offset:.*]] = arith.addi
Expand Down