From 6f1b39277ac53b4ac0157de1049f2409647928ec Mon Sep 17 00:00:00 2001 From: root Date: Thu, 30 Apr 2026 16:08:21 +0000 Subject: [PATCH 1/3] Enable in_thread_transpose by default for gfx1201 --- third_party/amd/backend/compiler.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/third_party/amd/backend/compiler.py b/third_party/amd/backend/compiler.py index b65836a52810..ca4acf32f67b 100644 --- a/third_party/amd/backend/compiler.py +++ b/third_party/amd/backend/compiler.py @@ -25,7 +25,8 @@ def is_pingpong_schedule_enabled(arch, use_async_copy): def is_in_thread_transpose_enabled(arch): - return (arch == "gfx942") if knobs.amd.use_in_thread_transpose is None else knobs.amd.use_in_thread_transpose + return (arch in ("gfx942", + "gfx1201")) if knobs.amd.use_in_thread_transpose is None else knobs.amd.use_in_thread_transpose def is_async_copy_enabled(arch): From 6530a81f71598528cacf612ea9ae4338356cd08e Mon Sep 17 00:00:00 2001 From: root Date: Fri, 1 May 2026 13:21:13 +0000 Subject: [PATCH 2/3] Enable for all GFX12 --- third_party/amd/backend/compiler.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/third_party/amd/backend/compiler.py b/third_party/amd/backend/compiler.py index ca4acf32f67b..b61b98452add 100644 --- a/third_party/amd/backend/compiler.py +++ b/third_party/amd/backend/compiler.py @@ -25,8 +25,8 @@ def is_pingpong_schedule_enabled(arch, use_async_copy): def is_in_thread_transpose_enabled(arch): - return (arch in ("gfx942", - "gfx1201")) if knobs.amd.use_in_thread_transpose is None else knobs.amd.use_in_thread_transpose + return (arch == "gfx942" + or "gfx120" in arch) if knobs.amd.use_in_thread_transpose is None else knobs.amd.use_in_thread_transpose def is_async_copy_enabled(arch): From b3197171f6a130a4ed93351c23b46dae81323aef Mon Sep 17 00:00:00 2001 From: Lei Zhang Date: Fri, 1 May 2026 12:06:38 -0700 Subject: [PATCH 3/3] Format a bit --- third_party/amd/backend/compiler.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/third_party/amd/backend/compiler.py b/third_party/amd/backend/compiler.py index b61b98452add..c7db2a9027bd 100644 --- a/third_party/amd/backend/compiler.py +++ b/third_party/amd/backend/compiler.py @@ -25,8 +25,8 @@ def is_pingpong_schedule_enabled(arch, use_async_copy): def is_in_thread_transpose_enabled(arch): - return (arch == "gfx942" - or "gfx120" in arch) if knobs.amd.use_in_thread_transpose is None else knobs.amd.use_in_thread_transpose + return (arch == "gfx942" or "gfx120" in arch) \ + if knobs.amd.use_in_thread_transpose is None else knobs.amd.use_in_thread_transpose def is_async_copy_enabled(arch):