Skip to content
Merged
Show file tree
Hide file tree
Changes from 11 commits
Commits
Show all changes
32 commits
Select commit Hold shift + click to select a range
0c4cf86
[CK_TILE] Add GetName functions for Gemm Kernels
aledudek Jan 2, 2025
ff5115b
[CK_TILE] Add GetName for grouped gemm
aledudek Jan 3, 2025
8adaf41
[CK_TILE] Add GetName for gemm - review changes
aledudek Jan 8, 2025
67ab389
Merge branch 'develop' into gemm_getname
aledudek Jan 8, 2025
67e305e
Merge branch 'develop' into getname_gemm
aledudek Jan 9, 2025
c32aa3f
Merge branch 'develop' into getname_gemm
aledudek Jan 14, 2025
08aad5e
[CK_TILE] Print also gemm problem pipeline and shape
aledudek Jan 14, 2025
56e6326
[CK_TILE] Print also GemmPipelineScheduler
aledudek Jan 22, 2025
8866825
Merge branch 'develop' into getname_gemm
aledudek Jan 22, 2025
615fd8f
[CK_TILE] GetName - fixed Scheduler <<operator visibility
aledudek Jan 28, 2025
6e7ac86
Merge branch 'develop' into getname_gemm
aledudek Jan 28, 2025
9258ed7
[CK_TILE] GetName info adjustments
aledudek Jan 30, 2025
5522165
Merge branch 'develop' into getname_gemm
aledudek Jan 30, 2025
75a3989
[CK_TILE] GetName post-merge fix
aledudek Jan 30, 2025
3416bef
[CK_TILE] GetName - add general concat function
aledudek Jan 31, 2025
afa81a7
[CK_TILE] GetName - small adjustments, format change
aledudek Feb 4, 2025
98b3516
Merge branch 'develop' into getname_gemm
aledudek Feb 4, 2025
d9c6a64
post merge develop fix
aledudek Feb 4, 2025
711b2dd
Remove commented code
aledudek Feb 4, 2025
800cf89
Merge from internal (#1857)
illsilin Feb 4, 2025
2bef550
restore cron trigger (#1863)
illsilin Feb 5, 2025
5bb041b
add vectorloads on non-k dim for memory pipelines (#1856)
jakpiase Feb 6, 2025
feb656d
Support for dtypes (fp8, bf8, bf16 and fp16) for the ck_tile/03_gemm …
kylasa Feb 6, 2025
932071a
Extract prec_str and add separator to concat
aledudek Feb 6, 2025
510abd9
Merge branch 'develop' into getname_gemm
aledudek Feb 6, 2025
0f063f3
GetName add
aledudek Feb 6, 2025
b5d201d
CK Tile - small fix to hotloop scheduler & KPack value. (#1867)
aosewski Feb 7, 2025
ce51797
Merge branch 'develop' into getname_gemm
aledudek Feb 7, 2025
8976171
Merge branch 'develop' into getname_gemm
aledudek Feb 7, 2025
f4607cf
Merge branch 'getname_gemm' of https://github.com/ROCm/composable_ker…
aledudek Feb 7, 2025
fa3afaf
Resolve merge issues
aledudek Feb 7, 2025
7857894
Merge branch 'develop' into getname_gemm
aledudek Feb 10, 2025
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 4 additions & 1 deletion example/ck_tile/03_gemm/gemm_basic.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -87,7 +87,10 @@ float gemm_calc(const ck_tile::GemmHostArgs& args, const ck_tile::stream_config&

if(s.log_level_ > 0)
{
std::cout << "Launching kernel with args:"
std::cout << "Launching kernel: " << Kernel::GetName()
Comment thread
aledudek marked this conversation as resolved.
Outdated
<< " shape: " << CodegenGemmShape::GetName()
<< " problem: " << CodegenPipelineProblem::GetName()
<< " pipeline: " << CodegenGemmPipeline::GetName() << " with args:"
<< " grid: {" << grids.x << ", " << grids.y << ", " << grids.z << "}"
<< ", blocks: {" << blocks.x << ", " << blocks.y << ", " << blocks.z << "}"
<< std::endl;
Expand Down
2 changes: 1 addition & 1 deletion example/ck_tile/16_batched_gemm/batched_gemm.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -89,7 +89,7 @@ float batched_gemm(const ck_tile::BatchedGemmHostArgs& args, const ck_tile::stre

if(s.log_level_ > 0)
{
std::cout << "Launching kernel with args:"
std::cout << "Launching kernel: " << Kernel::GetName() << " with args:"
<< " grid: {" << grids.x << ", " << grids.y << ", " << grids.z << "}"
<< ", blocks: {" << blocks.x << ", " << blocks.y << ", " << blocks.z << "}"
<< std::endl;
Expand Down
4 changes: 2 additions & 2 deletions example/ck_tile/17_grouped_gemm/grouped_gemm.cpp
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
// SPDX-License-Identifier: MIT
// Copyright (c) 2024, Advanced Micro Devices, Inc. All rights reserved.
// Copyright (c) 2024-2025, Advanced Micro Devices, Inc. All rights reserved.

#include <hip/hip_runtime.h>

Expand Down Expand Up @@ -124,7 +124,7 @@ float grouped_gemm(const std::vector<grouped_gemm_kargs>& gemm_descs,

if(s.log_level_ > 0)
{
std::cout << "Launching kernel with args:"
std::cout << "Launching kernel: " << GroupedGemmKernel::GetName() << " with args:"
<< " grid: {" << grids.x << ", " << grids.y << ", " << grids.z << "}"
<< ", blocks: {" << blocks.x << ", " << blocks.y << ", " << blocks.z << "}"
<< std::endl;
Expand Down
1 change: 1 addition & 0 deletions include/ck_tile/ops/add_rmsnorm2d_rdquant.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -10,3 +10,4 @@
#include "ck_tile/ops/add_rmsnorm2d_rdquant/pipeline/add_rmsnorm2d_rdquant_fwd_pipeline_three_pass.hpp"
#include "ck_tile/ops/common/generic_2d_block_shape.hpp"
#include "ck_tile/ops/common/tensor_layout.hpp"
#include "ck_tile/ops/common/utils.hpp"
1 change: 1 addition & 0 deletions include/ck_tile/ops/common.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -5,3 +5,4 @@

#include "ck_tile/ops/common/generic_2d_block_shape.hpp"
#include "ck_tile/ops/common/tensor_layout.hpp"
#include "ck_tile/ops/common/utils.hpp"
22 changes: 22 additions & 0 deletions include/ck_tile/ops/common/utils.hpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,22 @@
// SPDX-License-Identifier: MIT
// Copyright (c) 2025, Advanced Micro Devices, Inc. All rights reserved.

#pragma once

#include <iostream>
#include <string>

#include "ck_tile/core.hpp"

namespace ck_tile {
Comment thread
aledudek marked this conversation as resolved.

// clang-format off
template <typename T> struct t2s;
template <> struct t2s<float> { static constexpr const char * name = "fp32"; };
Comment thread
aledudek marked this conversation as resolved.
Outdated
template <> struct t2s<fp16_t> { static constexpr const char * name = "fp16"; };
template <> struct t2s<bf16_t> { static constexpr const char * name = "bf16"; };
template <> struct t2s<fp8_t> { static constexpr const char * name = "fp8"; };
template <> struct t2s<bf8_t> { static constexpr const char * name = "bf8"; };
template <> struct t2s<int8_t> { static constexpr const char * name = "int8"; };
// clang-format on
} // namespace ck_tile
1 change: 1 addition & 0 deletions include/ck_tile/ops/elementwise.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -6,3 +6,4 @@
#include "ck_tile/ops/elementwise/unary_element_wise_operation.hpp"
#include "ck_tile/ops/common/generic_2d_block_shape.hpp"
#include "ck_tile/ops/common/tensor_layout.hpp"
#include "ck_tile/ops/common/utils.hpp"
1 change: 1 addition & 0 deletions include/ck_tile/ops/epilogue.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -8,3 +8,4 @@
#include "ck_tile/ops/epilogue/dynamic_quant_epilogue.hpp"
#include "ck_tile/ops/common/generic_2d_block_shape.hpp"
#include "ck_tile/ops/common/tensor_layout.hpp"
#include "ck_tile/ops/common/utils.hpp"
1 change: 1 addition & 0 deletions include/ck_tile/ops/flatmm.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -9,3 +9,4 @@
#include "ck_tile/ops/flatmm/block/flatmm_uk_config.hpp"
#include "ck_tile/ops/common/generic_2d_block_shape.hpp"
#include "ck_tile/ops/common/tensor_layout.hpp"
#include "ck_tile/ops/common/utils.hpp"
1 change: 1 addition & 0 deletions include/ck_tile/ops/fmha.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -44,3 +44,4 @@
#include "ck_tile/ops/fmha/pipeline/tile_fmha_traits.hpp"
#include "ck_tile/ops/common/generic_2d_block_shape.hpp"
#include "ck_tile/ops/common/tensor_layout.hpp"
#include "ck_tile/ops/common/utils.hpp"
1 change: 1 addition & 0 deletions include/ck_tile/ops/fused_moe.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -17,3 +17,4 @@
#include "ck_tile/ops/fused_moe/pipeline/moe_sorting_problem.hpp"
#include "ck_tile/ops/common/generic_2d_block_shape.hpp"
#include "ck_tile/ops/common/tensor_layout.hpp"
#include "ck_tile/ops/common/utils.hpp"
1 change: 1 addition & 0 deletions include/ck_tile/ops/gemm.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -46,3 +46,4 @@
#include "ck_tile/ops/gemm/warp/warp_gemm_impl.hpp"
#include "ck_tile/ops/common/generic_2d_block_shape.hpp"
#include "ck_tile/ops/common/tensor_layout.hpp"
#include "ck_tile/ops/common/utils.hpp"
26 changes: 25 additions & 1 deletion include/ck_tile/ops/gemm/kernel/batched_gemm_kernel.hpp
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
// SPDX-License-Identifier: MIT
// Copyright (c) 2024, Advanced Micro Devices, Inc. All rights reserved.
// Copyright (c) 2024-2025, Advanced Micro Devices, Inc. All rights reserved.

#pragma once

Expand Down Expand Up @@ -57,6 +57,30 @@ struct BatchedGemmKernel : public GemmKernel<TilePartitioner_, GemmPipeline_, Ep
using BLayout = typename Base::BLayout;
using CLayout = typename Base::CLayout;

CK_TILE_HOST static std::string GetName()
Comment thread
aledudek marked this conversation as resolved.
Outdated
{
#define _SS_ std::string
#define _TS_ std::to_string
// clang-format off
using P_ = GemmPipeline;

auto prec_str = [&] () {
std::string base_str = _SS_(Base::template t2s<ADataType>::name);
if (!std::is_same_v<ADataType, BDataType>) {
base_str += _SS_("_") + _SS_(Base::template t2s<BDataType>::name);
}
return base_str;
}();

return _SS_("gemm_batched_") + _SS_(prec_str) + "_" +
Comment thread
aledudek marked this conversation as resolved.
Outdated
_TS_(P_::kMPerBlock) + "x" + _TS_(P_::kNPerBlock) + "x" + _TS_(P_::kKPerBlock) + "_" +
_TS_(P_::VectorSizeA) + "x" + _TS_(P_::VectorSizeB) + "x" + _TS_(P_::VectorSizeC) + "_" +
_TS_(P_::kPadM) + "x" + _TS_(P_::kPadN) + "x" + _TS_(P_::kPadK);
#undef _SS_
#undef _TS_
// clang-format on
}

struct BatchedGemmKernelArgs : GemmKernelArgs
{
index_t batch_stride_A;
Expand Down
20 changes: 20 additions & 0 deletions include/ck_tile/ops/gemm/kernel/gemm_kernel.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -75,6 +75,26 @@ struct GemmKernel
static constexpr auto I1 = number<1>();
static constexpr auto I2 = number<2>();

CK_TILE_HOST static std::string GetName()
{
#define _TS_ std::to_string
// clang-format off
using P_ = GemmPipeline;
using _SS_ = std::string;

auto prec_str = [&] () {
std::string base_str = _SS_(t2s<ADataType>::name);
if (!std::is_same_v<ADataType, BDataType>) {
base_str += _SS_("_") + _SS_(t2s<BDataType>::name);
}
return base_str;
}();

return _SS_("gemm_") + _SS_(prec_str) + "_" + P_::GetName();
#undef _TS_
// clang-format on
}

__host__ static constexpr auto GridSize(index_t M, index_t N, index_t KBatch)
{
return TilePartitioner::GridSize(M, N, KBatch);
Expand Down
24 changes: 24 additions & 0 deletions include/ck_tile/ops/gemm/kernel/grouped_gemm_kernel.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -64,6 +64,30 @@ struct GroupedGemmKernel : public GemmKernel<TilePartitioner_, GemmPipeline_, Ep
}
};

CK_TILE_HOST static std::string GetName()
{
#define _SS_ std::string
Comment thread
aledudek marked this conversation as resolved.
Outdated
#define _TS_ std::to_string
// clang-format off
using P_ = GemmPipeline;

auto prec_str = [&] () {
Comment thread
aledudek marked this conversation as resolved.
Outdated
std::string base_str = _SS_(t2s<ADataType>::name);
if (!std::is_same_v<ADataType, BDataType>) {
base_str += _SS_("_") + _SS_(t2s<BDataType>::name);
}
return base_str;
}();

return _SS_("gemm_grouped_") + _SS_(prec_str) + "_" +
_TS_(P_::kMPerBlock) + "x" + _TS_(P_::kNPerBlock) + "x" + _TS_(P_::kKPerBlock) + "_" +
_TS_(P_::VectorSizeA) + "x" + _TS_(P_::VectorSizeB) + "x" + _TS_(P_::VectorSizeC) + "_" +
_TS_(P_::kPadM) + "x" + _TS_(P_::kPadN) + "x" + _TS_(P_::kPadK);
#undef _SS_
#undef _TS_
// clang-format on
}

__host__ static auto GetWorkSpaceSize(const std::vector<GroupedGemmHostArgs>& gemm_descs)
-> std::size_t
{
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -76,6 +76,20 @@ struct GemmPipelineAgBgCrCompV3 : public BaseGemmPipelineAgBgCrCompV3<Problem>

using Base::PrefetchStages;

CK_TILE_HOST static std::string GetName()
{
#define _TS_ std::to_string
// clang-format off
using _SS_ = std::string;

return _SS_("pipeline_AgBgCrCompV3_") +
_TS_(BlockSize) + "_" +
_TS_(VectorSizeA) + "x" + _TS_(VectorSizeB) + "x" + _TS_(VectorSizeC) + "_" +
_TS_(kPadM) + "x" + _TS_(kPadN) + "x" + _TS_(kPadK);
#undef _TS_
// clang-format on
}

CK_TILE_HOST_DEVICE static constexpr index_t GetSmemSize()
{
return Policy::template GetSmemSize<Problem>();
Expand Down
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
// SPDX-License-Identifier: MIT
// Copyright (c) 2024, Advanced Micro Devices, Inc. All rights reserved.
// Copyright (c) 2024-2025, Advanced Micro Devices, Inc. All rights reserved.

#pragma once

Expand Down Expand Up @@ -126,6 +126,20 @@ struct GemmPipelineAgBgCrMem : public BaseGemmPipelineAgBgCrMem<Problem>
static constexpr auto TailNum = Problem::TailNum;
static constexpr auto Scheduler = Problem::Scheduler;

CK_TILE_HOST static std::string GetName()
{
#define _TS_ std::to_string
// clang-format off
using _SS_ = std::string;

return _SS_("pipeline_AgBgCrMe_") +
_TS_(MPerBlock) + "x" + _TS_(NPerBlock) + "x" + _TS_(KPerBlock) + "_" +
_TS_(VectorSizeA) + "x" + _TS_(VectorSizeB) + "x" + _TS_(VectorSizeC) + "_" +
_TS_(kPadM) + "x" + _TS_(kPadN) + "x" + _TS_(kPadK);
#undef _TS_
// clang-format on
}

using Base::PrefetchStages;

CK_TILE_HOST_DEVICE static constexpr index_t GetSmemSize()
Expand Down
Original file line number Diff line number Diff line change
@@ -1,9 +1,10 @@
// SPDX-License-Identifier: MIT
// Copyright (c) 2024, Advanced Micro Devices, Inc. All rights reserved.
// Copyright (c) 2024-2025, Advanced Micro Devices, Inc. All rights reserved.

#pragma once

#include <ostream>
#include <sstream>

#include "ck_tile/core.hpp"

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -39,6 +39,34 @@ struct GemmPipelineAGmemBGmemCRegV1
static constexpr bool kPadN = Problem::kPadN;
static constexpr bool kPadK = Problem::kPadK;

<<<<<<< HEAD
Comment thread
aledudek marked this conversation as resolved.
Outdated
CK_TILE_HOST static std::string GetName()
{
#define _TS_ std::to_string
// clang-format off
using _SS_ = std::string;

return _SS_("pipeline_AGmemBGmemCRegV1_") +
_TS_(kMPerBlock) + "x" + _TS_(kNPerBlock) + "x" + _TS_(kKPerBlock) + "x" + _TS_(BlockSize) + "_" +
_TS_(VectorSizeA) + "x" + _TS_(VectorSizeB) + "x" + _TS_(VectorSizeC) + "_" +
_TS_(kPadM) + "x" + _TS_(kPadN) + "x" + _TS_(kPadK);
#undef _TS_
// clang-format on
}

CK_TILE_HOST_DEVICE static constexpr index_t GetStaticLdsSize()
Comment thread
aledudek marked this conversation as resolved.
Outdated
{
return integer_divide_ceil(
sizeof(ADataType) *
Policy::template MakeALdsBlockDescriptor<Problem>().get_element_space_size(),
16) *
Comment thread
aledudek marked this conversation as resolved.
Outdated
16 +
sizeof(BDataType) *
Policy::template MakeBLdsBlockDescriptor<Problem>().get_element_space_size();
}

=======
>>>>>>> develop
CK_TILE_HOST_DEVICE static constexpr index_t GetSmemSize()
{
return Policy::template GetSmemSize<Problem>();
Expand Down
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
// SPDX-License-Identifier: MIT
// Copyright (c) 2018-2024, Advanced Micro Devices, Inc. All rights reserved.
// Copyright (c) 2018-2025, Advanced Micro Devices, Inc. All rights reserved.

#pragma once

Expand All @@ -25,6 +25,18 @@ struct GemmPipelineAGmemBGmemCRegV2
static constexpr index_t kNPerBlock = BlockGemmShape::kN;
static constexpr index_t kKPerBlock = BlockGemmShape::kK;

CK_TILE_HOST static std::string GetName()
{
#define _TS_ std::to_string
// clang-format off
using _SS_ = std::string;

return _SS_("pipeline_AGmemBGmemCRegV2_") +
_TS_(kMPerBlock) + "x" + _TS_(kNPerBlock) + "x" + _TS_(kKPerBlock) + "x" + _TS_(kBlockSize);
#undef _TS_
// clang-format on
}

CK_TILE_HOST_DEVICE static constexpr index_t GetStaticLdsSize()
{
return integer_divide_ceil(
Expand Down
24 changes: 21 additions & 3 deletions include/ck_tile/ops/gemm/pipeline/gemm_pipeline_problem.hpp
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
// SPDX-License-Identifier: MIT
// Copyright (c) 2018-2024, Advanced Micro Devices, Inc. All rights reserved.
// Copyright (c) 2018-2025, Advanced Micro Devices, Inc. All rights reserved.

#pragma once

Expand Down Expand Up @@ -33,9 +33,27 @@ struct GemmPipelineProblemBase
static constexpr bool kPadN = Traits::kPadN;
static constexpr bool kPadK = Traits::kPadK;

static constexpr auto Scheduler = GemmPipelineScheduler::Default;

static constexpr auto Scheduler = GemmPipelineScheduler::Default;
static constexpr index_t VectorLoadSize = Traits::_VectorSize;

CK_TILE_HOST static std::string GetName()
{
#define _TS_ std::to_string
// clang-format off
using _SS_ = std::string;
using ::operator<<;

thread_local std::ostringstream oss;
oss << Scheduler;

return _SS_("gemm_problem_") +
_TS_(VectorLoadSize) + "x" + _TS_(kBlockSize) + "_" +
Comment thread
aledudek marked this conversation as resolved.
Outdated
_TS_(kPadM) + "x" + _TS_(kPadN) + "x" + _TS_(kPadK) + "_" +
Comment thread
aledudek marked this conversation as resolved.
Outdated
oss.str();
#undef _TS_
// clang-format on
}

CK_TILE_HOST_DEVICE static constexpr auto GetAlignmentA()
{
if constexpr(std::is_same_v<ALayout, ck_tile::tensor_layout::gemm::ColumnMajor>)
Expand Down
16 changes: 15 additions & 1 deletion include/ck_tile/ops/gemm/pipeline/tile_gemm_shape.hpp
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
// SPDX-License-Identifier: MIT
// Copyright (c) 2018-2023, Advanced Micro Devices, Inc. All rights reserved.
// Copyright (c) 2018-2025, Advanced Micro Devices, Inc. All rights reserved.

#pragma once

Expand All @@ -19,6 +19,20 @@ struct TileGemmShape
static constexpr index_t kM = BlockTile::at(number<0>{});
static constexpr index_t kN = BlockTile::at(number<1>{});
static constexpr index_t kK = BlockTile::at(number<2>{});

CK_TILE_HOST static std::string GetName()
{
#define _TS_ std::to_string
// clang-format off
using _SS_ = std::string;

return _SS_("tile_gemm_shape_") +
_TS_(kM) + "x" + _TS_(kN) + "x" + _TS_(kK) + "x" + _TS_(NumWarps) + "_" +
_TS_(BlockWarps::at(number<0>{})) + "x" + _TS_(BlockWarps::at(number<1>{})) + "x" + _TS_(BlockWarps::at(number<2>{})) + "_" +
_TS_(WarpTile::at(number<0>{})) + "x" + _TS_(WarpTile::at(number<1>{})) + "x" + _TS_(WarpTile::at(number<2>{}));
#undef _TS_
// clang-format on
}
};

} // namespace ck_tile
1 change: 1 addition & 0 deletions include/ck_tile/ops/image_to_column.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -8,3 +8,4 @@
#include "ck_tile/ops/image_to_column/pipeline/tile_image_to_column_shape.hpp"
#include "ck_tile/ops/common/generic_2d_block_shape.hpp"
#include "ck_tile/ops/common/tensor_layout.hpp"
#include "ck_tile/ops/common/utils.hpp"
1 change: 1 addition & 0 deletions include/ck_tile/ops/layernorm2d.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -11,3 +11,4 @@
#include "ck_tile/ops/layernorm2d/pipeline/layernorm2d_fwd_traits.hpp"
#include "ck_tile/ops/common/generic_2d_block_shape.hpp"
#include "ck_tile/ops/common/tensor_layout.hpp"
#include "ck_tile/ops/common/utils.hpp"
1 change: 1 addition & 0 deletions include/ck_tile/ops/permute.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -7,3 +7,4 @@
#include "ck_tile/ops/permute/pipeline/generic_petmute_problem.hpp"
#include "ck_tile/ops/common/generic_2d_block_shape.hpp"
#include "ck_tile/ops/common/tensor_layout.hpp"
#include "ck_tile/ops/common/utils.hpp"
Loading