diff --git a/include/triton/Conversion/TritonGPUToLLVM/ElementwiseOpToLLVMBase.h b/include/triton/Conversion/TritonGPUToLLVM/ElementwiseOpToLLVMBase.h index a5f65e77a6d4..5203ffff9d8a 100644 --- a/include/triton/Conversion/TritonGPUToLLVM/ElementwiseOpToLLVMBase.h +++ b/include/triton/Conversion/TritonGPUToLLVM/ElementwiseOpToLLVMBase.h @@ -88,6 +88,12 @@ class ElementwiseOpConversionBase : public ConvertOpToLLVMPattern { // encoding not available return resultVals; Attribute baseEncoding = encoding; + if (isa(baseEncoding)) + // TODO: this logic seems incorrect for mfma layout. Skip for now. + // We saw mismatches for some flash-attention tests on AMD backend. + // Note that this logic works for sliced layout whose parent is + // mfma layout. Therefore, this is not combined with the following check. + return resultVals; while (auto sliced = dyn_cast(baseEncoding)) baseEncoding = sliced.getParent(); if (isa(baseEncoding)) {