From 53da9a17f648adc76aa74d76a7d8eb37ceeef5fd Mon Sep 17 00:00:00 2001 From: jjsjann123 Date: Thu, 17 Apr 2025 09:28:13 -0700 Subject: [PATCH 1/6] Hack to disable transpose scheduler --- csrc/scheduler/scheduler_types.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/csrc/scheduler/scheduler_types.h b/csrc/scheduler/scheduler_types.h index 29c0fbee20e..ef4ae83f5e0 100644 --- a/csrc/scheduler/scheduler_types.h +++ b/csrc/scheduler/scheduler_types.h @@ -69,7 +69,7 @@ constexpr std::array all_heuristics_in_priority_order = { SchedulerType::Matmul, SchedulerType::Reduction, SchedulerType::Resize, - SchedulerType::Transpose, + // SchedulerType::Transpose, SchedulerType::PointWise, SchedulerType::InnerPersistent, SchedulerType::OuterPersistent, From 8dfba340ed1d231cfdc6e6b5028afc984fd67d82 Mon Sep 17 00:00:00 2001 From: jjsjann123 Date: Thu, 17 Apr 2025 11:15:28 -0700 Subject: [PATCH 2/6] WIP --- csrc/scheduler/pointwise.cpp | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/csrc/scheduler/pointwise.cpp b/csrc/scheduler/pointwise.cpp index 862a6a336a2..084b3edb485 100644 --- a/csrc/scheduler/pointwise.cpp +++ b/csrc/scheduler/pointwise.cpp @@ -1040,6 +1040,10 @@ void schedulePointwise(Fusion* fusion, const PointwiseParams* pparams) { auto output = entry.second; inner_most_tensors.erase(output); } + for (auto idx_sel : ir_utils::getOpsOfType(fusion)) { + inner_most_tensors.erase(idx_sel->output(0)); + } + inlineMost(inner_most_tensors); scheduler_utils::promoteProducerMemoryTypes(fusion, cached_inputs); From eda1c1377549e6bc98eea5df4886f0ba8ca6998b Mon Sep 17 00:00:00 2001 From: jjsjann123 Date: Thu, 17 Apr 2025 11:16:58 -0700 Subject: [PATCH 3/6] WIP --- csrc/scheduler/pointwise.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/csrc/scheduler/pointwise.cpp b/csrc/scheduler/pointwise.cpp index 084b3edb485..7b646999ca8 100644 --- a/csrc/scheduler/pointwise.cpp +++ b/csrc/scheduler/pointwise.cpp @@ -1041,7 +1041,7 @@ void schedulePointwise(Fusion* fusion, const PointwiseParams* pparams) { inner_most_tensors.erase(output); } for (auto idx_sel : ir_utils::getOpsOfType(fusion)) { - inner_most_tensors.erase(idx_sel->output(0)); + inner_most_tensors.erase(idx_sel->output(0)->as()); } inlineMost(inner_most_tensors); From 612d9058017429848f2f5b92d1376cc58411be82 Mon Sep 17 00:00:00 2001 From: jjsjann123 Date: Thu, 17 Apr 2025 11:30:04 -0700 Subject: [PATCH 4/6] removing unwanted changes --- csrc/scheduler/scheduler_types.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/csrc/scheduler/scheduler_types.h b/csrc/scheduler/scheduler_types.h index ef4ae83f5e0..29c0fbee20e 100644 --- a/csrc/scheduler/scheduler_types.h +++ b/csrc/scheduler/scheduler_types.h @@ -69,7 +69,7 @@ constexpr std::array all_heuristics_in_priority_order = { SchedulerType::Matmul, SchedulerType::Reduction, SchedulerType::Resize, - // SchedulerType::Transpose, + SchedulerType::Transpose, SchedulerType::PointWise, SchedulerType::InnerPersistent, SchedulerType::OuterPersistent, From 4e4d2b9ae9707cf6ee16271039d1c55dfbb1bf4c Mon Sep 17 00:00:00 2001 From: jjsjann123 Date: Fri, 18 Apr 2025 10:25:04 -0700 Subject: [PATCH 5/6] adding comment --- csrc/scheduler/pointwise.cpp | 2 ++ 1 file changed, 2 insertions(+) diff --git a/csrc/scheduler/pointwise.cpp b/csrc/scheduler/pointwise.cpp index 7b646999ca8..73903831fdf 100644 --- a/csrc/scheduler/pointwise.cpp +++ b/csrc/scheduler/pointwise.cpp @@ -1040,6 +1040,8 @@ void schedulePointwise(Fusion* fusion, const PointwiseParams* pparams) { auto output = entry.second; inner_most_tensors.erase(output); } + // IndexSelectOp supports vectorized load on lookupTv. It should be treated + // the same as a cached input and excluded from inner_most_tensors. for (auto idx_sel : ir_utils::getOpsOfType(fusion)) { inner_most_tensors.erase(idx_sel->output(0)->as()); } From d8b270f8fd68fa07000d913859cb3964b2bf52c1 Mon Sep 17 00:00:00 2001 From: jjsjann123 Date: Wed, 30 Apr 2025 02:47:16 -0700 Subject: [PATCH 6/6] adjusting comment --- csrc/scheduler/pointwise.cpp | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/csrc/scheduler/pointwise.cpp b/csrc/scheduler/pointwise.cpp index 73903831fdf..a9d72e29540 100644 --- a/csrc/scheduler/pointwise.cpp +++ b/csrc/scheduler/pointwise.cpp @@ -1040,8 +1040,10 @@ void schedulePointwise(Fusion* fusion, const PointwiseParams* pparams) { auto output = entry.second; inner_most_tensors.erase(output); } - // IndexSelectOp supports vectorized load on lookupTv. It should be treated - // the same as a cached input and excluded from inner_most_tensors. + // IndexSelectOp reads lookup tv without cache. Because pointwise scheduler + // doesn't use ParallelType::Unroll, we need to exclude consumer of fusion + // inputs to be inlineMost. This allows us to aggregate the allocation of + // manual unroll ID and its inner ID. for (auto idx_sel : ir_utils::getOpsOfType(fusion)) { inner_most_tensors.erase(idx_sel->output(0)->as()); }