[3rdparty] Bump cutlass_fpA_intB_gemm to fix SM90 build (#18291)

MasterJH5574 · MasterJH5574 · commit d8e3e1a0a565 · 2025-09-09T09:02:26.000-04:00
This PR fixes a SM90 build issue when CUTLASS is enabled.
The issue is because a source file indluced a CUTLASS header file
that has been removed since CUTLASS 4. Simply removing the header
fixes the build issue.
diff --git a/3rdparty/cutlass_fpA_intB_gemm b/3rdparty/cutlass_fpA_intB_gemm
@@ -1 +1 @@
-Subproject commit c633ae800283627a62e69e064d05a28ff13d380a
+Subproject commit 6ad91366619e20129c5f77d02c82098d13b287a5
diff --git a/tests/python/codegen/test_gpu_codegen_allreduce.py b/tests/python/codegen/test_gpu_codegen_allreduce.py
@@ -14,13 +14,14 @@
 # KIND, either express or implied.  See the License for the
 # specific language governing permissions and limitations
 # under the License.
+import numpy as np
+import pytest
+import tvm_ffi
+
 import tvm
 import tvm.testing
-import numpy as np
 from tvm.script import tir as T
 
-import pytest
-
 
 @T.prim_func
 def reduce(a: T.handle, b: T.handle, d1: T.int32, d2: T.int32, d3: T.int32) -> None:
@@ -96,7 +97,11 @@ def optional_metal_compile_callback(define_metal_compile_callback):
 
         @tvm.register_global_func(name, override=True)
         def compile_metal(src, target):
-            return tvm.contrib.xcode.compile_metal(src, sdk="macosx")
+            from tvm.contrib.xcode import (  # pylint: disable=import-outside-toplevel
+                compile_metal,
+            )
+
+            return compile_metal(src, sdk="macosx")
 
     yield