wip

masahi · masahi · commit ab3572d852e2 · 2023-09-26T07:38:15.000Z
diff --git a/tests/python/relax/test_codegen_cutlass.py b/tests/python/relax/test_codegen_cutlass.py
@@ -1945,5 +1945,40 @@ def main(
     tvm.testing.assert_allclose(out, ref, rtol=1e-2, atol=1e-2)
 
 
+def test_attention_rewrite_multi_query():
+    @I.ir_module
+    class Module:
+        @R.function
+        def main(
+            q: R.Tensor((4, 16, 32, 8), dtype="float16"),
+            k_single: R.Tensor((4, 16, 1, 8), dtype="float16"),
+            v_single: R.Tensor((4, 16, 1, 8), dtype="float16"),
+        ) -> R.Tensor((4, 16, 32, 8), dtype="float16"):
+            with R.dataflow():
+                k = R.repeat(k_single, 32, axis=2)
+                v = R.repeat(v_single, 32, axis=2)
+
+                lv = R.permute_dims(q, axes=[0, 2, 1, 3])
+                lv1 = R.reshape(lv, R.shape([128, 16, 8]))
+                lv2 = R.permute_dims(k, axes=[0, 2, 1, 3])
+                lv3 = R.reshape(lv2, R.shape([128, 16, 8]))
+                lv4 = R.permute_dims(v, axes=[0, 2, 1, 3])
+                lv5 = R.reshape(lv4, R.shape([128, 16, 8]))
+
+                lv6 = R.permute_dims(lv3, axes=[0, 2, 1])
+                lv7 = R.matmul(lv1, lv6, out_dtype="float16")
+                lv3_1 = R.const(0.5, "float16")
+                lv8 = R.multiply(lv7, lv3_1)
+                lv11 = R.nn.softmax(lv8, axis=2)
+                lv12 = R.matmul(lv11, lv5, out_dtype="float16")
+                lv13 = R.reshape(lv12, R.shape([4, 32, 16, 8]))
+                lv6_1 = R.permute_dims(lv13, axes=[0, 2, 1, 3])
+                R.output(lv6_1)
+            return lv6_1
+
+    mod = partition_for_cutlass(Module)
+    print(mod)
+
 if __name__ == "__main__":
-    tvm.testing.main()
+    # tvm.testing.main()
+    test_attention_rewrite_multi_query()