wip

masahi · masahi · commit 9828698ca3d8 · 2023-09-26T07:38:15.000Z
diff --git a/python/tvm/contrib/cutlass/gen_tensor_op.py b/python/tvm/contrib/cutlass/gen_tensor_op.py
@@ -765,9 +765,6 @@ def get_batch_on_arg(arg_name, arg_shape):
             and int(annotations["arch"]) >= 80
         )
 
-        print(int(attrs["head_dim"]) <= 256, int(attrs["head_dim"]) % 8 == 0, int(attrs["head_dim"]) == int(attrs["head_dim_value"]),int(annotations["arch"]) >= 80, annotations["ret_dtype"] == "float16", "bias" not in attrs, int(annotations["arch"]) >= 80)
-
-
         if use_flash:
             headers.append("flash.h")
             attrs["is_causal"] = int(annotations["custom_mask_type"]) > 0
diff --git a/tests/python/relax/test_codegen_cutlass.py b/tests/python/relax/test_codegen_cutlass.py
@@ -2005,10 +2005,23 @@ def main(
                 R.output(lv6_1)
             return lv6_1
 
+    q_np = np.random.randn(4, 16, 32, 8).astype("float16")
+    k_np = np.random.randn(4, 16, 1, 8).astype("float16")
+    v_np = np.random.randn(4, 16, 1, 8).astype("float16")
+    args = [q_np, k_np, v_np]
+    ref = build_and_run(Module, args, "llvm", legalize=True)
+    print(ref)
+
+    return
+
     Module["main"] = rewrite_attention(Module["main"])
     mod = partition_for_cutlass(Module)
     codegen_pass = relax.transform.RunCodegen({"cutlass": {"sm": 80, "find_first_valid": True}})
-    print(codegen_pass(mod))
+    mod = codegen_pass(mod)
+
+    out = build_and_run(Module, args, "cuda")
+    print(ref)
+
 
 if __name__ == "__main__":
     # tvm.testing.main()

Original file line number	Diff line number	Diff line change
`@@ -765,9 +765,6 @@ def get_batch_on_arg(arg_name, arg_shape):`
`765`	`765`	`and int(annotations["arch"]) >= 80`
`766`	`766`	`)`
`767`	`767`
`768`		`- print(int(attrs["head_dim"]) <= 256, int(attrs["head_dim"]) % 8 == 0, int(attrs["head_dim"]) == int(attrs["head_dim_value"]),int(annotations["arch"]) >= 80, annotations["ret_dtype"] == "float16", "bias" not in attrs, int(annotations["arch"]) >= 80)`
`769`		`-`
`770`		`-`
`771`	`768`	`if use_flash:`
`772`	`769`	`headers.append("flash.h")`
`773`	`770`	`attrs["is_causal"] = int(annotations["custom_mask_type"]) > 0`