diff --git a/lib/Dialect/TritonGPU/IR/LinearLayoutConversions.cpp b/lib/Dialect/TritonGPU/IR/LinearLayoutConversions.cpp index e9e77333b0c8..376b9a8938fb 100644 --- a/lib/Dialect/TritonGPU/IR/LinearLayoutConversions.cpp +++ b/lib/Dialect/TritonGPU/IR/LinearLayoutConversions.cpp @@ -510,7 +510,7 @@ LinearLayout sharedToLinearLayoutNoLeadingOffset(ArrayRef shape, int vec = shared.getVec(); int perPhase = shared.getPerPhase(); int maxPhase = shared.getMaxPhase(); - bases2D.push_back({row, vec * ((row / perPhase) % maxPhase)}); + bases2D.push_back({row, (vec * ((row / perPhase) % maxPhase)) % numCols}); } LinearLayout ctaLayout = LinearLayout({{S("offset"), bases2D}}, {rowDimName, colDimName}); diff --git a/python/test/unit/language/test_core.py b/python/test/unit/language/test_core.py index 57a375fb2a08..f26bffd6906f 100644 --- a/python/test/unit/language/test_core.py +++ b/python/test/unit/language/test_core.py @@ -4822,11 +4822,6 @@ def compute_scratch_buffer_shape(src_layout, dst_layout, shape): @pytest.mark.parametrize("interm_layout", intermediate_layouts) @pytest.mark.parametrize("dst_layout", layouts) def test_convert2d(M, N, src_layout, interm_layout, dst_layout, dtype, device): - if (M == 1 or N == 1) and interm_layout: - # TODO(jlebar): These OOB accesses don't even hit an assert in the - # compiler, and some of them return the wrong result instead of - # crashing! - pytest.skip("Out of bound access when maxPhase > 1") if str(src_layout) == str(dst_layout): pytest.skip() if is_hip(): diff --git a/unittest/Dialect/TritonGPU/LinearLayoutConversionsTest.cpp b/unittest/Dialect/TritonGPU/LinearLayoutConversionsTest.cpp index 3a6e03fc8eaa..0d12f4bcfc7a 100644 --- a/unittest/Dialect/TritonGPU/LinearLayoutConversionsTest.cpp +++ b/unittest/Dialect/TritonGPU/LinearLayoutConversionsTest.cpp @@ -729,5 +729,14 @@ TEST_F(LinearLayoutConversionsTest, LeadingOffset_8x64_1_8_32b) { /*requireSurjective=*/false)); } +TEST_F(LinearLayoutConversionsTest, Shared1DSwizzle) { + EXPECT_EQ(toLinearLayout( + {64, 1}, shared(2, 2, 4, false, {1, 1}, {1, 1}, {1, 0}, {1, 0}), + /*elemBitWidth=*/16), + LinearLayout::identity1D(64, S("offset"), S("dim0")) * + LinearLayout::identity1D(1, S("offset"), S("dim1")) * + LinearLayout::identity1D(1, S("block"), S("dim0"))); +} + } // anonymous namespace } // namespace mlir::triton::gpu