Squashed commit of the following:

masahi · masahi · commit 7e9ffabbb476 · 2022-04-21T13:16:19.000+09:00
commit e8c4405 Author: Masahiro Masuda <masahi129@gmail.com> Date: Wed Apr 20 16:52:02 2022 +0900 cpplint commit 7275684 Author: Masahiro Masuda <masahi129@gmail.com> Date: Wed Apr 20 16:49:36 2022 +0900 Squashed commit of the following: commit 3eba93a Author: Masahiro Masuda <masahi129@gmail.com> Date: Wed Apr 20 12:37:27 2022 +0900 Squashed commit of the following: commit 928668b Author: Masahiro Masuda <masahi129@gmail.com> Date: Sat Apr 16 08:48:56 2022 +0900 Reworking GetTensorizeloopmapping commit a80e639 Author: Masahiro Masuda <masahi129@gmail.com> Date: Sat Apr 16 08:16:50 2022 +0900 fixed bad merge commit 776c04b Author: Masahiro Masuda <masahi129@gmail.com> Date: Fri Apr 15 19:46:44 2022 +0900 Squashed commit of the following: commit f499e60 Author: Masahiro Masuda <masahi129@gmail.com> Date: Fri Apr 15 04:11:02 2022 +0900 Squashed commit of the following: commit dcb628d Author: Masahiro Masuda <masahi129@gmail.com> Date: Thu Apr 14 17:10:27 2022 +0900 Squashed commit of the following: commit dd956ec Author: Masahiro Masuda <masahi129@gmail.com> Date: Thu Apr 14 16:53:34 2022 +0900 add conv2d relay test commit 7291e47 Author: Masahiro Masuda <masahi129@gmail.com> Date: Thu Apr 14 16:46:05 2022 +0900 add dense and bmm test commit a957dde Author: Masahiro Masuda <masahi129@gmail.com> Date: Thu Apr 14 16:32:43 2022 +0900 conv2d topi test working commit 6d53c50 Author: Masahiro Masuda <masahi129@gmail.com> Date: Thu Apr 14 11:33:38 2022 +0900 add mattr kind commit 3761bd7 Author: Masahiro Masuda <masahi129@gmail.com> Date: Thu Apr 14 11:12:14 2022 +0900 update dot prod intrin commit e781ee1 Author: Masahiro Masuda <masahi129@gmail.com> Date: Thu Apr 14 11:02:43 2022 +0900 black commit b2208a7 Author: Masahiro Masuda <masahi129@gmail.com> Date: Thu Apr 14 10:58:10 2022 +0900 cleanup commit f8bc306 Author: Masahiro Masuda <masahi129@gmail.com> Date: Thu Apr 14 10:35:02 2022 +0900 [ROCM] Support dp4a on AMDGPU by sdot4 intrinsic commit 0225f2b Author: Masahiro Masuda <masahi129@gmail.com> Date: Thu Apr 14 08:56:10 2022 +0900 share op strategy between cuda and rocm commit 762c7e8 Author: Masahiro Masuda <masahi129@gmail.com> Date: Thu Apr 14 08:28:34 2022 +0900 fixed rocm batch_matmul strategy for mixed i8i8i32 commit ce53e8d Author: Masahiro Masuda <masahi129@gmail.com> Date: Thu Apr 14 06:17:30 2022 +0900 add rocm sdot4 TIR intrin commit f4562b9 Author: Masahiro Masuda <masahi129@gmail.com> Date: Thu Apr 14 06:03:44 2022 +0900 rocm sdot4 works commit 6cc6280 Author: Masahiro Masuda <masahi129@gmail.com> Date: Thu Apr 14 05:32:07 2022 +0900 more wip commit 0602f4a Author: Masahiro Masuda <masahi129@gmail.com> Date: Thu Apr 14 03:47:37 2022 +0900 Squashed commit of the following: commit 65b8bcf Author: Masahiro Masuda <masahi129@gmail.com> Date: Wed Apr 13 20:36:49 2022 +0900 [WIP] adding DP4A support to rocm commit 4f8f308 Author: Masahiro Masuda <masahi129@gmail.com> Date: Wed Apr 13 14:03:25 2022 +0900 Squashed commit of the following: commit 1711be3 Author: Masahiro Masuda <masahi129@gmail.com> Date: Wed Apr 13 13:11:40 2022 +0900 fixed condition for real commit 8a48fb5 Author: Masahiro Masuda <masahi129@gmail.com> Date: Wed Apr 13 09:57:42 2022 +0900 Revert "Skip applying sch_rule when both ann and sch_rule are defined" This reverts commit 4915c6a. commit daea033 Author: Masahiro Masuda <masahi129@gmail.com> Date: Mon Apr 11 09:31:05 2022 +0900 [Metaschedule] Support rocm and spirv commit eb0cae2 Author: Masahiro Masuda <masahi129@gmail.com> Date: Wed Apr 13 07:25:04 2022 +0900 dp4a works commit 4915c6a Author: Masahiro Masuda <masahi129@gmail.com> Date: Wed Apr 13 06:13:45 2022 +0900 Skip applying sch_rule when both ann and sch_rule are defined commit 7b3d71c Author: Masahiro Masuda <masahi129@gmail.com> Date: Wed Apr 13 04:40:31 2022 +0900 fixed intrin description commit 7666cd7 Author: Masahiro Masuda <masahi129@gmail.com> Date: Tue Apr 12 19:59:47 2022 +0900 add DP4A intrin commit 7086bdb Author: Masahiro Masuda <masahi129@gmail.com> Date: Tue Apr 12 19:03:44 2022 +0900 works commit db34397 Author: Masahiro Masuda <masahi129@gmail.com> Date: Tue Apr 12 12:49:52 2022 +0900 more hack to tensorize loop mapping to make resnet50 e2e work commit 2409674 Author: Masahiro Masuda <masahi129@gmail.com> Date: Mon Apr 11 13:40:59 2022 +0900 wip support pad + qnn.conv2d folding commit 613cb7e Author: Masahiro Masuda <masahi129@gmail.com> Date: Sun Apr 10 12:04:08 2022 +0900 hack to tensorize loop mapping to make conv2d work commit 9e4f9df Author: Masahiro Masuda <masahi129@gmail.com> Date: Sun Apr 10 11:34:13 2022 +0900 wrap tensorize with try/catch commit d4b496d Author: Masahiro Masuda <masahi129@gmail.com> Date: Sun Apr 10 11:33:39 2022 +0900 revert change in task_scheduler.cc commit 476129b Author: Masahiro Masuda <masahi129@gmail.com> Date: Sat Apr 9 05:54:10 2022 +0900 try / catch in ThreadedApply commit d8226ff Author: Masahiro Masuda <masahi129@gmail.com> Date: Fri Apr 8 17:17:59 2022 +0900 filter out invalid candidate commit 2632899 Author: Masahiro Masuda <masahi129@gmail.com> Date: Fri Apr 8 10:09:48 2022 +0900 try graceful exit in parallel_for_dynamic commit 9d6741c Author: Masahiro Masuda <masahi129@gmail.com> Date: Fri Apr 8 09:35:51 2022 +0900 [QNN] Fix broadcast for invalid axis commit 6ccde09 Author: Masahiro Masuda <masahi129@gmail.com> Date: Thu Apr 7 20:51:15 2022 +0900 refactor rewrite_tensorize commit 2ce2066 Author: Masahiro Masuda <masahi129@gmail.com> Date: Thu Apr 7 20:48:17 2022 +0900 allow missing schedule_rule in post order apply commit 3a69353 Author: Masahiro Masuda <masahi129@gmail.com> Date: Thu Apr 7 19:42:48 2022 +0900 refactor rewrite_tensorize commit 43e0b2f Author: Masahiro Masuda <masahi129@gmail.com> Date: Thu Apr 7 18:25:14 2022 +0900 rewrite_vnni -> rewrite_tensorize commit 823797e Author: Masahiro Masuda <masahi129@gmail.com> Date: Thu Apr 7 18:12:12 2022 +0900 VNNI -> WithIntrin commit 4284a47 Author: Masahiro Masuda <masahi129@gmail.com> Date: Thu Apr 7 17:45:41 2022 +0900 introduce TileForIntrin commit b87ef32 Author: Masahiro Masuda <masahi129@gmail.com> Date: Thu Apr 7 17:34:04 2022 +0900 move TilingwithTensorIntrin to auto_tensorize.cc commit 2fc118b Author: Masahiro Masuda <masahi129@gmail.com> Date: Thu Apr 7 17:28:45 2022 +0900 clean up headers commit d8b2aa3 Author: Masahiro Masuda <masahi129@gmail.com> Date: Thu Apr 7 17:09:32 2022 +0900 clean up using namespace commit eb05d25 Author: Masahiro Masuda <masahi129@gmail.com> Date: Thu Apr 7 17:03:05 2022 +0900 refactored init commit 5e6b0a0 Author: Masahiro Masuda <masahi129@gmail.com> Date: Thu Apr 7 16:57:14 2022 +0900 compiled commit 2b8c430 Author: Masahiro Masuda <masahi129@gmail.com> Date: Thu Apr 7 12:51:55 2022 +0900 wip MultiLevelTiling refactor commit 7c21a9f Author: Masahiro Masuda <masahi129@gmail.com> Date: Thu Apr 7 11:58:33 2022 +0900 function doc string not supported by tvmscript commit 40f9742 Author: Masahiro Masuda <masahi129@gmail.com> Date: Thu Apr 7 11:56:45 2022 +0900 update vnni intrin name commit 4814f82 Merge: e0c5eb8 07bbb38 Author: Masahiro Masuda <masahi129@gmail.com> Date: Thu Apr 7 11:44:47 2022 +0900 Merge branch 'tir-tensor-intrin' into auto-tensorize-vnni commit 07bbb38 Author: Masahiro Masuda <masahi129@gmail.com> Date: Thu Apr 7 11:24:56 2022 +0900 more lint fix commit 15e60b4 Author: Masahiro Masuda <masahi129@gmail.com> Date: Thu Apr 7 11:16:08 2022 +0900 black commit 7a757fe Author: Masahiro Masuda <masahi129@gmail.com> Date: Thu Apr 7 11:12:54 2022 +0900 pylint commit 9a3e508 Author: Masahiro Masuda <masahi129@gmail.com> Date: Thu Apr 7 10:58:52 2022 +0900 simplify import commit d8e43ec Author: Masahiro Masuda <masahi129@gmail.com> Date: Thu Apr 7 10:52:50 2022 +0900 use vectorlow/high in arm intrin commit 625cd27 Author: Masahiro Masuda <masahi129@gmail.com> Date: Thu Apr 7 10:34:57 2022 +0900 fixed offset factor commit 69e72b6 Author: Masahiro Masuda <masahi129@gmail.com> Date: Thu Apr 7 10:12:02 2022 +0900 Add ARM intrin commit 1351fde Author: Masahiro Masuda <masahi129@gmail.com> Date: Thu Apr 7 08:27:27 2022 +0900 use buffer syntax sugar commit 0ced85f Author: Masahiro Masuda <masahi129@gmail.com> Date: Thu Apr 7 08:17:43 2022 +0900 rename vnni.py to x86.py commit 38a5aca Author: Masahiro Masuda <masahi129@gmail.com> Date: Thu Apr 7 07:24:44 2022 +0900 add VNNI unittest commit 88b763e Author: Masahiro Masuda <masahi129@gmail.com> Date: Thu Apr 7 07:10:06 2022 +0900 refactored existing test using VNNI intrin commit 711a007 Author: Masahiro Masuda <masahi129@gmail.com> Date: Thu Apr 7 07:04:58 2022 +0900 [TIR] Add VNNI dot product intrinsic for TIR commit e0c5eb8 Author: Masahiro Masuda <masahi129@gmail.com> Date: Thu Apr 7 11:42:26 2022 +0900 merge fix commit b171748 Merge: 71fe3bd 82e152a Author: Masahiro Masuda <masahi129@gmail.com> Date: Thu Apr 7 11:33:59 2022 +0900 Merge branch 'tir-tensor-intrin' into auto-tensorize-vnni commit 71fe3bd Author: Masahiro Masuda <masahi129@gmail.com> Date: Thu Apr 7 06:57:38 2022 +0900 move tensor intrin under tir commit 0c51bad Author: Masahiro Masuda <masahi129@gmail.com> Date: Thu Apr 7 06:12:39 2022 +0900 remove log commit fed910e Author: Masahiro Masuda <masahi129@gmail.com> Date: Thu Apr 7 06:11:22 2022 +0900 more revert commit 7150aff Author: Masahiro Masuda <masahi129@gmail.com> Date: Thu Apr 7 06:10:44 2022 +0900 revert stmt_functor change commit 155107b Author: Masahiro Masuda <masahi129@gmail.com> Date: Thu Apr 7 06:10:09 2022 +0900 refactored RewriteVNNI a bit commit ca15255 Author: Masahiro Masuda <masahi129@gmail.com> Date: Thu Apr 7 05:41:13 2022 +0900 add RewriteVNNI commit dc9f71d Author: Masahiro Masuda <masahi129@gmail.com> Date: Thu Apr 7 05:38:56 2022 +0900 vectorized init loop commit fcc31ee Author: Masahiro Masuda <masahi129@gmail.com> Date: Thu Apr 7 04:55:36 2022 +0900 tensorize worked commit 2b53437 Author: Masahiro Masuda <masahi129@gmail.com> Date: Wed Apr 6 19:11:05 2022 +0900 TilingwithTensorIntrin works commit 86baa31 Author: Masahiro Masuda <masahi129@gmail.com> Date: Wed Apr 6 08:58:27 2022 +0900 Ported auto-tensorization code commit 82e152a Author: Masahiro Masuda <masahi129@gmail.com> Date: Thu Apr 7 11:24:56 2022 +0900 more lint fix commit 88d9bdd Author: Masahiro Masuda <masahi129@gmail.com> Date: Thu Apr 7 11:16:08 2022 +0900 black commit 31fe7eb Author: Masahiro Masuda <masahi129@gmail.com> Date: Thu Apr 7 11:12:54 2022 +0900 pylint commit 7876754 Author: Masahiro Masuda <masahi129@gmail.com> Date: Thu Apr 7 10:58:52 2022 +0900 simplify import commit 56f2e9a Author: Masahiro Masuda <masahi129@gmail.com> Date: Thu Apr 7 10:52:50 2022 +0900 use vectorlow/high in arm intrin commit 995cc8d Author: Masahiro Masuda <masahi129@gmail.com> Date: Thu Apr 7 10:34:57 2022 +0900 fixed offset factor commit 86bbd49 Author: Masahiro Masuda <masahi129@gmail.com> Date: Thu Apr 7 10:12:02 2022 +0900 Add ARM intrin commit 120fd96 Author: Masahiro Masuda <masahi129@gmail.com> Date: Thu Apr 7 08:27:27 2022 +0900 use buffer syntax sugar commit 0f0682d Author: Masahiro Masuda <masahi129@gmail.com> Date: Thu Apr 7 08:17:43 2022 +0900 rename vnni.py to x86.py commit f88c31e Author: Masahiro Masuda <masahi129@gmail.com> Date: Thu Apr 7 07:24:44 2022 +0900 add VNNI unittest commit 6cc8009 Author: Masahiro Masuda <masahi129@gmail.com> Date: Thu Apr 7 07:10:06 2022 +0900 refactored existing test using VNNI intrin commit 11a29c7 Author: Masahiro Masuda <masahi129@gmail.com> Date: Thu Apr 7 07:04:58 2022 +0900 [TIR] Add VNNI dot product intrinsic for TIR commit e370ed4 Author: Chris Sullivan <csullivan@octoml.ai> Date: Wed Apr 13 15:19:41 2022 -0700 [Hexagon] Less aggressive adb state clean up (apache#10909) * Only remove port forwarding applied in a session to avoid affecting global adb state. * Send SIGINT to attempt to allow remote server to cleanup and undbind port in deconstruction * Only attempt to forward ports not in use by adb or the system. commit ce8f83e Author: Christian Convey <cconvey@octoml.ai> Date: Wed Apr 13 16:25:39 2022 -0400 [hexagon] 'add_hvx' test to explore HVX usage. (apache#10604) Add a unit test named 'add_hvx' to explore how various scheduling choices, tensor sizes, etc. impact efficient usage of Hexagon HVX units. commit 0602f4a Author: Masahiro Masuda <masahi129@gmail.com> Date: Thu Apr 14 03:47:37 2022 +0900 Squashed commit of the following: commit 65b8bcf Author: Masahiro Masuda <masahi129@gmail.com> Date: Wed Apr 13 20:36:49 2022 +0900 [WIP] adding DP4A support to rocm commit 4f8f308 Author: Masahiro Masuda <masahi129@gmail.com> Date: Wed Apr 13 14:03:25 2022 +0900 Squashed commit of the following: commit 1711be3 Author: Masahiro Masuda <masahi129@gmail.com> Date: Wed Apr 13 13:11:40 2022 +0900 fixed condition for real commit 8a48fb5 Author: Masahiro Masuda <masahi129@gmail.com> Date: Wed Apr 13 09:57:42 2022 +0900 Revert "Skip applying sch_rule when both ann and sch_rule are defined" This reverts commit 4915c6a. commit daea033 Author: Masahiro Masuda <masahi129@gmail.com> Date: Mon Apr 11 09:31:05 2022 +0900 [Metaschedule] Support rocm and spirv commit eb0cae2 Author: Masahiro Masuda <masahi129@gmail.com> Date: Wed Apr 13 07:25:04 2022 +0900 dp4a works commit 4915c6a Author: Masahiro Masuda <masahi129@gmail.com> Date: Wed Apr 13 06:13:45 2022 +0900 Skip applying sch_rule when both ann and sch_rule are defined commit 7b3d71c Author: Masahiro Masuda <masahi129@gmail.com> Date: Wed Apr 13 04:40:31 2022 +0900 fixed intrin description commit 7666cd7 Author: Masahiro Masuda <masahi129@gmail.com> Date: Tue Apr 12 19:59:47 2022 +0900 add DP4A intrin commit 7086bdb Author: Masahiro Masuda <masahi129@gmail.com> Date: Tue Apr 12 19:03:44 2022 +0900 works commit db34397 Author: Masahiro Masuda <masahi129@gmail.com> Date: Tue Apr 12 12:49:52 2022 +0900 more hack to tensorize loop mapping to make resnet50 e2e work commit 2409674 Author: Masahiro Masuda <masahi129@gmail.com> Date: Mon Apr 11 13:40:59 2022 +0900 wip support pad + qnn.conv2d folding commit 613cb7e Author: Masahiro Masuda <masahi129@gmail.com> Date: Sun Apr 10 12:04:08 2022 +0900 hack to tensorize loop mapping to make conv2d work commit 9e4f9df Author: Masahiro Masuda <masahi129@gmail.com> Date: Sun Apr 10 11:34:13 2022 +0900 wrap tensorize with try/catch commit d4b496d Author: Masahiro Masuda <masahi129@gmail.com> Date: Sun Apr 10 11:33:39 2022 +0900 revert change in task_scheduler.cc commit 476129b Author: Masahiro Masuda <masahi129@gmail.com> Date: Sat Apr 9 05:54:10 2022 +0900 try / catch in ThreadedApply commit d8226ff Author: Masahiro Masuda <masahi129@gmail.com> Date: Fri Apr 8 17:17:59 2022 +0900 filter out invalid candidate commit 2632899 Author: Masahiro Masuda <masahi129@gmail.com> Date: Fri Apr 8 10:09:48 2022 +0900 try graceful exit in parallel_for_dynamic commit 9d6741c Author: Masahiro Masuda <masahi129@gmail.com> Date: Fri Apr 8 09:35:51 2022 +0900 [QNN] Fix broadcast for invalid axis commit 6ccde09 Author: Masahiro Masuda <masahi129@gmail.com> Date: Thu Apr 7 20:51:15 2022 +0900 refactor rewrite_tensorize commit 2ce2066 Author: Masahiro Masuda <masahi129@gmail.com> Date: Thu Apr 7 20:48:17 2022 +0900 allow missing schedule_rule in post order apply commit 3a69353 Author: Masahiro Masuda <masahi129@gmail.com> Date: Thu Apr 7 19:42:48 2022 +0900 refactor rewrite_tensorize commit 43e0b2f Author: Masahiro Masuda <masahi129@gmail.com> Date: Thu Apr 7 18:25:14 2022 +0900 rewrite_vnni -> rewrite_tensorize commit 823797e Author: Masahiro Masuda <masahi129@gmail.com> Date: Thu Apr 7 18:12:12 2022 +0900 VNNI -> WithIntrin commit 4284a47 Author: Masahiro Masuda <masahi129@gmail.com> Date: Thu Apr 7 17:45:41 2022 +0900 introduce TileForIntrin commit b87ef32 Author: Masahiro Masuda <masahi129@gmail.com> Date: Thu Apr 7 17:34:04 2022 +0900 move TilingwithTensorIntrin to auto_tensorize.cc commit 2fc118b Author: Masahiro Masuda <masahi129@gmail.com> Date: Thu Apr 7 17:28:45 2022 +0900 clean up headers commit d8b2aa3 Author: Masahiro Masuda <masahi129@gmail.com> Date: Thu Apr 7 17:09:32 2022 +0900 clean up using namespace commit eb05d25 Author: Masahiro Masuda <masahi129@gmail.com> Date: Thu Apr 7 17:03:05 2022 +0900 refactored init commit 5e6b0a0 Author: Masahiro Masuda <masahi129@gmail.com> Date: Thu Apr 7 16:57:14 2022 +0900 compiled commit 2b8c430 Author: Masahiro Masuda <masahi129@gmail.com> Date: Thu Apr 7 12:51:55 2022 +0900 wip MultiLevelTiling refactor commit 7c21a9f Author: Masahiro Masuda <masahi129@gmail.com> Date: Thu Apr 7 11:58:33 2022 +0900 function doc string not supported by tvmscript commit 40f9742 Author: Masahiro Masuda <masahi129@gmail.com> Date: Thu Apr 7 11:56:45 2022 +0900 update vnni intrin name commit 4814f82 Merge: e0c5eb8 07bbb38 Author: Masahiro Masuda <masahi129@gmail.com> Date: Thu Apr 7 11:44:47 2022 +0900 Merge branch 'tir-tensor-intrin' into auto-tensorize-vnni commit 07bbb38 Author: Masahiro Masuda <masahi129@gmail.com> Date: Thu Apr 7 11:24:56 2022 +0900 more lint fix commit 15e60b4 Author: Masahiro Masuda <masahi129@gmail.com> Date: Thu Apr 7 11:16:08 2022 +0900 black commit 7a757fe Author: Masahiro Masuda <masahi129@gmail.com> Date: Thu Apr 7 11:12:54 2022 +0900 pylint commit 9a3e508 Author: Masahiro Masuda <masahi129@gmail.com> Date: Thu Apr 7 10:58:52 2022 +0900 simplify import commit d8e43ec Author: Masahiro Masuda <masahi129@gmail.com> Date: Thu Apr 7 10:52:50 2022 +0900 use vectorlow/high in arm intrin commit 625cd27 Author: Masahiro Masuda <masahi129@gmail.com> Date: Thu Apr 7 10:34:57 2022 +0900 fixed offset factor commit 69e72b6 Author: Masahiro Masuda <masahi129@gmail.com> Date: Thu Apr 7 10:12:02 2022 +0900 Add ARM intrin commit 1351fde Author: Masahiro Masuda <masahi129@gmail.com> Date: Thu Apr 7 08:27:27 2022 +0900 use buffer syntax sugar commit 0ced85f Author: Masahiro Masuda <masahi129@gmail.com> Date: Thu Apr 7 08:17:43 2022 +0900 rename vnni.py to x86.py commit 38a5aca Author: Masahiro Masuda <masahi129@gmail.com> Date: Thu Apr 7 07:24:44 2022 +0900 add VNNI unittest commit 88b763e Author: Masahiro Masuda <masahi129@gmail.com> Date: Thu Apr 7 07:10:06 2022 +0900 refactored existing test using VNNI intrin commit 711a007 Author: Masahiro Masuda <masahi129@gmail.com> Date: Thu Apr 7 07:04:58 2022 +0900 [TIR] Add VNNI dot product intrinsic for TIR commit e0c5eb8 Author: Masahiro Masuda <masahi129@gmail.com> Date: Thu Apr 7 11:42:26 2022 +0900 merge fix commit b171748 Merge: 71fe3bd 82e152a Author: Masahiro Masuda <masahi129@gmail.com> Date: Thu Apr 7 11:33:59 2022 +0900 Merge branch 'tir-tensor-intrin' into auto-tensorize-vnni commit 71fe3bd Author: Masahiro Masuda <masahi129@gmail.com> Date: Thu Apr 7 06:57:38 2022 +0900 move tensor intrin under tir commit 0c51bad Author: Masahiro Masuda <masahi129@gmail.com> Date: Thu Apr 7 06:12:39 2022 +0900 remove log commit fed910e Author: Masahiro Masuda <masahi129@gmail.com> Date: Thu Apr 7 06:11:22 2022 +0900 more revert commit 7150aff Author: Masahiro Masuda <masahi129@gmail.com> Date: Thu Apr 7 06:10:44 2022 +0900 revert stmt_functor change commit 155107b Author: Masahiro Masuda <masahi129@gmail.com> Date: Thu Apr 7 06:10:09 2022 +0900 refactored RewriteVNNI a bit commit ca15255 Author: Masahiro Masuda <masahi129@gmail.com> Date: Thu Apr 7 05:41:13 2022 +0900 add RewriteVNNI commit dc9f71d Author: Masahiro Masuda <masahi129@gmail.com> Date: Thu Apr 7 05:38:56 2022 +0900 vectorized init loop commit fcc31ee Author: Masahiro Masuda <masahi129@gmail.com> Date: Thu Apr 7 04:55:36 2022 +0900 tensorize worked commit 2b53437 Author: Masahiro Masuda <masahi129@gmail.com> Date: Wed Apr 6 19:11:05 2022 +0900 TilingwithTensorIntrin works commit 86baa31 Author: Masahiro Masuda <masahi129@gmail.com> Date: Wed Apr 6 08:58:27 2022 +0900 Ported auto-tensorization code commit 82e152a Author: Masahiro Masuda <masahi129@gmail.com> Date: Thu Apr 7 11:24:56 2022 +0900 more lint fix commit 88d9bdd Author: Masahiro Masuda <masahi129@gmail.com> Date: Thu Apr 7 11:16:08 2022 +0900 black commit 31fe7eb Author: Masahiro Masuda <masahi129@gmail.com> Date: Thu Apr 7 11:12:54 2022 +0900 pylint commit 7876754 Author: Masahiro Masuda <masahi129@gmail.com> Date: Thu Apr 7 10:58:52 2022 +0900 simplify import commit 56f2e9a Author: Masahiro Masuda <masahi129@gmail.com> Date: Thu Apr 7 10:52:50 2022 +0900 use vectorlow/high in arm intrin commit 995cc8d Author: Masahiro Masuda <masahi129@gmail.com> Date: Thu Apr 7 10:34:57 2022 +0900 fixed offset factor commit 86bbd49 Author: Masahiro Masuda <masahi129@gmail.com> Date: Thu Apr 7 10:12:02 2022 +0900 Add ARM intrin commit 120fd96 Author: Masahiro Masuda <masahi129@gmail.com> Date: Thu Apr 7 08:27:27 2022 +0900 use buffer syntax sugar commit 0f0682d Author: Masahiro Masuda <masahi129@gmail.com> Date: Thu Apr 7 08:17:43 2022 +0900 rename vnni.py to x86.py commit f88c31e Author: Masahiro Masuda <masahi129@gmail.com> Date: Thu Apr 7 07:24:44 2022 +0900 add VNNI unittest commit 6cc8009 Author: Masahiro Masuda <masahi129@gmail.com> Date: Thu Apr 7 07:10:06 2022 +0900 refactored existing test using VNNI intrin commit 11a29c7 Author: Masahiro Masuda <masahi129@gmail.com> Date: Thu Apr 7 07:04:58 2022 +0900 [TIR] Add VNNI dot product intrinsic for TIR commit 88cbe5e Author: Masahiro Masuda <masahi129@gmail.com> Date: Wed Apr 20 16:44:54 2022 +0900 add doc commit e8155a5 Author: Masahiro Masuda <masahi129@gmail.com> Date: Wed Apr 20 16:25:24 2022 +0900 add tests commit 70ac9c1 Author: Masahiro Masuda <masahi129@gmail.com> Date: Wed Apr 20 12:47:45 2022 +0900 [Metaschedule] Add TilingwithTensorIntrin
diff --git a/include/tvm/meta_schedule/schedule_rule.h b/include/tvm/meta_schedule/schedule_rule.h
@@ -150,6 +150,16 @@ class ScheduleRule : public runtime::ObjectRef {
                                                Optional<Array<Integer>> vector_load_lens,    //
                                                Optional<Map<String, ObjectRef>> reuse_read,  //
                                                Optional<Map<String, ObjectRef>> reuse_write);
+
+  TVM_DLL static ScheduleRule MultiLevelTilingWithIntrin(
+      String intrin_name,                           //
+      String structure,                             //
+      Optional<Array<String>> tile_binds,           //
+      Optional<Integer> max_innermost_factor,       //
+      Optional<Array<Integer>> vector_load_lens,    //
+      Optional<Map<String, ObjectRef>> reuse_read,  //
+      Optional<Map<String, ObjectRef>> reuse_write);
+
   /*!
    * \brief Create a rule: add-rfactor to some blocks if needed
    * \param max_jobs_per_core The maximum number of jobs to be launched per CPU core. It sets the
diff --git a/include/tvm/tir/stmt.h b/include/tvm/tir/stmt.h
@@ -1509,6 +1509,11 @@ constexpr const char* meta_schedule_unroll_explicit = "meta_schedule.unroll_expl
 /*! \brief Mark auto-unroll setting on the block. */
 constexpr const char* meta_schedule_unroll_implicit = "meta_schedule.unroll_implicit";
 
+/*!
+ * \brief Mark that the block should be further rewritten using tensorization.
+ */
+constexpr const char* meta_schedule_auto_tensorize = "meta_schedule.auto_tensorize";
+
 /*!
  * \brief Check if attr_key is a pragma key extension
  * \param attr_key The attr key to be compared
diff --git a/python/tvm/meta_schedule/postproc/__init__.py b/python/tvm/meta_schedule/postproc/__init__.py
@@ -22,3 +22,4 @@
 from .rewrite_reduction_block import RewriteReductionBlock
 from .rewrite_unbound_block import RewriteUnboundBlock
 from .verify_gpu_code import VerifyGPUCode
+from .rewrite_tensorize import RewriteTensorize
diff --git a/python/tvm/meta_schedule/postproc/rewrite_tensorize.py b/python/tvm/meta_schedule/postproc/rewrite_tensorize.py
@@ -0,0 +1,33 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+"""A postprocessor that tensorize related components."""
+
+from tvm._ffi.registry import register_object
+from .. import _ffi_api
+from .postproc import Postproc
+import tvm.tir.tensor_intrin
+
+
+@register_object("meta_schedule.RewriteTensorize")
+class RewriteTensorize(Postproc):
+    """A postprocessor that tensorize related components."""
+
+    def __init__(self, vectorize_init_loop=False) -> None:
+        self.__init_handle_by_constructor__(
+            _ffi_api.PostprocRewriteTensorize,  # type: ignore # pylint: disable=no-member
+            vectorize_init_loop
+        )
diff --git a/python/tvm/meta_schedule/schedule_rule/__init__.py b/python/tvm/meta_schedule/schedule_rule/__init__.py
@@ -22,7 +22,7 @@
 from .add_rfactor import AddRFactor
 from .auto_inline import AutoInline
 from .cross_thread_reduction import CrossThreadReduction
-from .multi_level_tiling import MultiLevelTiling, ReuseType
+from .multi_level_tiling import MultiLevelTiling, MultiLevelTilingWithIntrin, ReuseType
 from .parallel_vectorize_unroll import ParallelizeVectorizeUnroll
 from .random_compute_location import RandomComputeLocation
 from .schedule_rule import PyScheduleRule, ScheduleRule
diff --git a/python/tvm/meta_schedule/schedule_rule/multi_level_tiling.py b/python/tvm/meta_schedule/schedule_rule/multi_level_tiling.py
@@ -82,3 +82,50 @@ def __init__(
             reuse_read.as_dict() if reuse_read is not None else None,
             reuse_write.as_dict() if reuse_write is not None else None,
         )
+
+
+@register_object("meta_schedule.MultiLevelTilingWithIntrin")
+class MultiLevelTilingWithIntrin(ScheduleRule):
+    """Multi-level tiling with reuse.
+
+    Parameters
+    ----------
+    structure : str
+        The tiling structure. Recommended:
+        - 'SSRSRS' on CPU
+        - 'SSSRRSRS' on GPU
+    tile_bind : Optional[List[str]]
+        For each level of tiles, which thread axis it is bound to. Recommended:
+        - None on CPU
+        - [blockIdx.x, vthread.x, threadIdx.x] on GPU
+    max_innermost_factor : Optional[int]
+        The maximum size of the innermost factor. None means no limit
+    vector_load_lens : Optional[List[int]]
+        The length of vector lane in vectorized cooperative fetching.
+        None means disable vectorization
+    reuse_read : Optional[ReuseType]
+        Data reuse configuration for reading. None means no reuse.
+    reuse_write : Optional[ReuseType]
+        Data reuse configuration for writing. None means no reuse.
+    """
+
+    def __init__(
+        self,
+        intrin_name: str,
+        structure: str,
+        tile_binds: Optional[List[str]] = None,
+        max_innermost_factor: Optional[int] = None,
+        vector_load_lens: Optional[List[int]] = None,
+        reuse_read: Optional[ReuseType] = None,
+        reuse_write: Optional[ReuseType] = None,
+    ) -> None:
+        self.__init_handle_by_constructor__(
+            _ffi_api.ScheduleRuleMultiLevelTilingWithIntrin,  # type: ignore # pylint: disable=no-member
+            intrin_name,
+            structure,
+            tile_binds,
+            max_innermost_factor,
+            vector_load_lens,
+            reuse_read.as_dict() if reuse_read is not None else None,
+            reuse_write.as_dict() if reuse_write is not None else None,
+        )
diff --git a/src/meta_schedule/postproc/rewrite_tensorize.cc b/src/meta_schedule/postproc/rewrite_tensorize.cc
@@ -0,0 +1,104 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+#include <algorithm>
+
+#include "../utils.h"
+#include "tvm/runtime/container/base.h"
+
+namespace tvm {
+namespace meta_schedule {
+
+using tir::BlockRV;
+using tir::LoopRV;
+
+void ApplyTensorization(const tir::Schedule& sch, const String& func_name,
+                        const tir::PrimFuncNode* func, bool vectorize_init_loop) {
+  std::vector<std::pair<std::string, std::function<void(tir::BlockRV)>>> jobs;
+
+  tir::PostOrderVisit(func->body, [=, &jobs](const ObjectRef& obj) -> bool {
+    if (const auto* block = obj.as<tir::BlockNode>()) {
+      tir::StmtSRef block_sref = sch->GetSRef(block);
+      if (Optional<String> intrin_name =
+              tir::GetAnn<String>(block_sref, tir::attr::meta_schedule_auto_tensorize)) {
+        std::string block_name = block_sref->StmtAs<tir::BlockNode>()->name_hint;
+        if (block_name.find("init") == std::string::npos) {
+          jobs.emplace_back(block_name, [sch, intrin_name](tir::BlockRV block) {
+            try {
+              sch->Tensorize(block, intrin_name.value());
+            } catch (const std::exception& e) {
+              LOG(WARNING) << "Tensorize failed with error " << e.what();
+            }
+          });
+        } else if (vectorize_init_loop) {
+          jobs.emplace_back(block_name, [sch](tir::BlockRV block) {
+            Array<BlockRV> child_blocks = sch->GetChildBlocks(block);
+            ICHECK(child_blocks.size() == 1);
+            Array<LoopRV> init_loops = sch->GetLoops(child_blocks[0]);
+            ICHECK(init_loops.size() == 1);
+            sch->Vectorize(init_loops[0]);
+          });
+        }
+      }
+    }
+    return true;
+  });
+
+  for (auto kv : jobs) {
+    tir::BlockRV block = sch->GetBlock(kv.first, func_name);
+    sch->Unannotate(block, tir::attr::meta_schedule_auto_tensorize);
+    kv.second(block);
+  }
+}
+
+class RewriteTensorizeNode : public PostprocNode {
+ public:
+  void InitializeWithTuneContext(const TuneContext& context) final {}
+
+  bool Apply(const tir::Schedule& sch) final;
+
+  void VisitAttrs(tvm::AttrVisitor* v) {}
+
+  bool vectorize_init_loop = false;
+
+  static constexpr const char* _type_key = "meta_schedule.RewriteTensorize";
+  TVM_DECLARE_FINAL_OBJECT_INFO(RewriteTensorizeNode, PostprocNode);
+};
+
+bool RewriteTensorizeNode::Apply(const tir::Schedule& sch) {
+  for (const auto& kv : sch->mod()->functions) {
+    GlobalVar g_var = kv.first;
+    BaseFunc base_func = kv.second;
+    if (const tir::PrimFuncNode* prim_func = base_func.as<tir::PrimFuncNode>()) {
+      ApplyTensorization(sch, g_var->name_hint, prim_func, vectorize_init_loop);
+    }
+  }
+  return true;
+}
+
+Postproc RewriteTensorize(bool vectorize_init_loop) {
+  ObjectPtr<RewriteTensorizeNode> n = make_object<RewriteTensorizeNode>();
+  n->vectorize_init_loop = vectorize_init_loop;
+  return Postproc(n);
+}
+
+TVM_REGISTER_NODE_TYPE(RewriteTensorizeNode);
+TVM_REGISTER_GLOBAL("meta_schedule.PostprocRewriteTensorize").set_body_typed(RewriteTensorize);
+
+}  // namespace meta_schedule
+}  // namespace tvm
diff --git a/src/meta_schedule/schedule_rule/multi_level_tiling.cc b/src/meta_schedule/schedule_rule/multi_level_tiling.cc
@@ -25,6 +25,7 @@
 #include <vector>
 
 #include "../utils.h"
+#include "tvm/meta_schedule/schedule_rule.h"
 
 namespace tvm {
 namespace tir {
@@ -260,28 +261,9 @@ ScheduleRule ScheduleRule::MultiLevelTiling(String structure, Optional<Array<Str
                                             Optional<Array<Integer>> vector_load_lens,
                                             Optional<Map<String, ObjectRef>> reuse_read,
                                             Optional<Map<String, ObjectRef>> reuse_write) {
-  ObjectPtr<MultiLevelTilingNode> n = make_object<MultiLevelTilingNode>();
-  n->structure = structure;
-  n->tile_binds = tile_binds.value_or({});
-  n->max_innermost_factor = max_innermost_factor.value_or(Integer(-1))->value;
-  n->vector_load_lens = vector_load_lens.defined()
-                            ? support::AsVector<Integer, int>(vector_load_lens.value())
-                            : std::vector<int>();
-  n->reuse_read_ = reuse_read.defined() ? ReuseConfig(reuse_read.value()) : ReuseConfig();
-  n->reuse_write_ = reuse_write.defined() ? ReuseConfig(reuse_write.value()) : ReuseConfig();
-  for (int i = 0, len = structure.size(); i < len; ++i) {
-    char c = structure.data()[i];
-    if (c == 'S') {
-      n->s_indices_.push_back(i);
-    } else if (c == 'R') {
-      n->r_indices_.push_back(i);
-    } else {
-      LOG(FATAL) << "ValueError: Invalid tiling structure: " << structure;
-    }
-  }
-  n->thread_warp_size_ = -1;
-  n->max_threads_per_block_ = -1;
-  return ScheduleRule(n);
+  auto node = MultiLevelTilingInitCommon<MultiLevelTilingNode>(
+      structure, tile_binds, max_innermost_factor, vector_load_lens, reuse_read, reuse_write);
+  return ScheduleRule(node);
 }
 
 TVM_REGISTER_NODE_TYPE(MultiLevelTilingNode);
diff --git a/src/meta_schedule/schedule_rule/multi_level_tiling.h b/src/meta_schedule/schedule_rule/multi_level_tiling.h
@@ -181,6 +181,36 @@ class MultiLevelTilingNode : public ScheduleRuleNode {
   TVM_DECLARE_BASE_OBJECT_INFO(MultiLevelTilingNode, ScheduleRuleNode);
 };
 
+template <typename NodeType>
+ObjectPtr<NodeType> MultiLevelTilingInitCommon(String structure, Optional<Array<String>> tile_binds,
+                                               Optional<Integer> max_innermost_factor,
+                                               Optional<Array<Integer>> vector_load_lens,
+                                               Optional<Map<String, ObjectRef>> reuse_read,
+                                               Optional<Map<String, ObjectRef>> reuse_write) {
+  ObjectPtr<NodeType> n = make_object<NodeType>();
+  n->structure = structure;
+  n->tile_binds = tile_binds.value_or({});
+  n->max_innermost_factor = max_innermost_factor.value_or(Integer(-1))->value;
+  n->vector_load_lens = vector_load_lens.defined()
+                            ? support::AsVector<Integer, int>(vector_load_lens.value())
+                            : std::vector<int>();
+  n->reuse_read_ = reuse_read.defined() ? ReuseConfig(reuse_read.value()) : ReuseConfig();
+  n->reuse_write_ = reuse_write.defined() ? ReuseConfig(reuse_write.value()) : ReuseConfig();
+  for (int i = 0, len = structure.size(); i < len; ++i) {
+    char c = structure.data()[i];
+    if (c == 'S') {
+      n->s_indices_.push_back(i);
+    } else if (c == 'R') {
+      n->r_indices_.push_back(i);
+    } else {
+      LOG(FATAL) << "ValueError: Invalid tiling structure: " << structure;
+    }
+  }
+  n->thread_warp_size_ = -1;
+  n->max_threads_per_block_ = -1;
+  return n;
+}
+
 }  // namespace meta_schedule
 }  // namespace tvm
 
diff --git a/src/meta_schedule/schedule_rule/multi_level_tiling_with_intrin.cc b/src/meta_schedule/schedule_rule/multi_level_tiling_with_intrin.cc
@@ -0,0 +1,60 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+#include "../utils.h"
+#include "auto_tensorize.h"
+#include "multi_level_tiling.h"
+
+namespace tvm {
+namespace meta_schedule {
+
+class MultiLevelTilingWithIntrinNode : public MultiLevelTilingNode {
+ protected:
+  virtual std::vector<State> ApplySubRules(std::vector<State> states) {
+    states = SubRule(std::move(states), [&](State state) {
+      state.block_rv = TileForIntrin(state.sch, state.block_rv, intrin_name);
+      return std::vector<State>(1, state);
+    });
+    return MultiLevelTilingNode::ApplySubRules(states);
+  }
+
+ public:
+  String intrin_name;
+
+  static constexpr const char* _type_key = "meta_schedule.MultiLevelTilingWithIntrin";
+  TVM_DECLARE_FINAL_OBJECT_INFO(MultiLevelTilingWithIntrinNode, MultiLevelTilingNode);
+};
+
+ScheduleRule ScheduleRule::MultiLevelTilingWithIntrin(
+    String intrin_name, String structure, Optional<Array<String>> tile_binds,
+    Optional<Integer> max_innermost_factor, Optional<Array<Integer>> vector_load_lens,
+    Optional<Map<String, ObjectRef>> reuse_read, Optional<Map<String, ObjectRef>> reuse_write) {
+  ICHECK(tir::TensorIntrin::Get(intrin_name).defined());
+  auto node = MultiLevelTilingInitCommon<MultiLevelTilingWithIntrinNode>(
+      structure, tile_binds, max_innermost_factor, vector_load_lens, reuse_read, reuse_write);
+  node->intrin_name = intrin_name;
+  return ScheduleRule(node);
+}
+
+TVM_REGISTER_NODE_TYPE(MultiLevelTilingWithIntrinNode);
+TVM_REGISTER_GLOBAL("meta_schedule.ScheduleRuleMultiLevelTilingWithIntrin")
+    .set_body_typed(ScheduleRule::MultiLevelTilingWithIntrin);
+
+}  // namespace meta_schedule
+}  // namespace tvm