diff --git a/python/src/gluon_ir.cc b/python/src/gluon_ir.cc index 30a9f15baa59..fa17eb1d6d2c 100644 --- a/python/src/gluon_ir.cc +++ b/python/src/gluon_ir.cc @@ -286,10 +286,16 @@ py::object layoutToGluon(Attribute layout) { auto kOffset = mlir::StringAttr::get(ctx, "offset"); auto kBlock = mlir::StringAttr::get(ctx, "block"); const auto &ll = paddedShared.getLinearComponent(); + auto outDims = llvm::to_vector(ll.getOutDimNames()); + + auto ofstBases = ll.getBases().lookup(kOffset); + auto ofstLL = triton::LinearLayout({{kOffset, ofstBases}}, outDims); + auto blkLL = divideLeft(ll, ofstLL); + assert(blkLL.has_value()); + auto blkBases = blkLL->getBases().lookup(kBlock); auto shape = toStdVector(ll.getOutDimSizes()); - return layouts.PaddedSharedLayout(intervalPaddingPairs, - ll.getBases().lookup(kOffset), - ll.getBases().lookup(kBlock), shape); + return layouts.PaddedSharedLayout(intervalPaddingPairs, ofstBases, blkBases, + shape); } else if (auto partitioned = dyn_cast(layout)) { py::object partitionLayout = diff --git a/python/test/gluon/test_frontend.py b/python/test/gluon/test_frontend.py index ca93e1d838f8..62f619253a7f 100644 --- a/python/test/gluon/test_frontend.py +++ b/python/test/gluon/test_frontend.py @@ -41,6 +41,7 @@ HIP_TARGET_GFX1250 = GPUTarget("hip", "gfx1250", 32) ALL_TARGETS = [AMPERE_TARGET, HOPPER_TARGET, BLACKWELL_TARGET, HIP_TARGET_RDNA4] +ALL_MULTICTA_TARGETS = [HOPPER_TARGET, BLACKWELL_TARGET, HIP_TARGET_GFX1250] def anonymize_ir(ir): @@ -3350,6 +3351,18 @@ def infer_layout_for_padded_shared_kernel(): ttgl.static_assert(reshaped.type.layout == ref_layout) +@gluon.jit +def test_convert_padded_shared_with_multicta_kernel(): + shape: ttgl.constexpr = [512, 128] + initial_order: ttgl.constexpr = [0, 1] + layout: ttgl.constexpr = ttgl.PaddedSharedLayout.with_identity_for(interval_padding_pairs=[[256, 16]], + cga_layout=[[0, 1]], shape=shape, + order=initial_order) + smem = ttgl.allocate_shared_memory(ttgl.int32, shape, layout) + reshaped = smem.permute((0, 1)) + ttgl.static_assert(reshaped.layout.cga_layout[0] == ttgl.constexpr([0, 1])) + + @pytest.mark.parametrize("target", ALL_TARGETS) def test_infer_layout_for_padded_shared(target): # This test is used to test the conversion to gluon object PaddedSharedLayout from PaddedSharedEncodingAttr. @@ -3370,6 +3383,13 @@ def test_infer_layout_for_padded_shared(target): """) +@pytest.mark.parametrize("target", ALL_MULTICTA_TARGETS) +def test_convert_padded_shared_with_multicta(target): + # It is to make sure layoutToGluon() handle CGA layout correctly when + # converting a PaddedSharedEncodingAttr to a PaddedSharedLayout object. + run_parser(test_convert_padded_shared_with_multicta_kernel, *make_args(num_ctas=2), target=target) + + @filecheck_test @gluon.jit def test_layout_zeros():