From 739e9f3a7c7cb8100a9f29bd1b39e848cc2017bf Mon Sep 17 00:00:00 2001 From: Bartlomiej Kocot Date: Tue, 29 Oct 2024 17:01:56 +0000 Subject: [PATCH 1/4] Remove virtual destructors from unary ops --- .../element/unary_element_wise_operation.hpp | 98 ++++++++++++++++--- 1 file changed, 83 insertions(+), 15 deletions(-) diff --git a/include/ck/tensor_operation/gpu/element/unary_element_wise_operation.hpp b/include/ck/tensor_operation/gpu/element/unary_element_wise_operation.hpp index 712b8861835..e67185c4687 100644 --- a/include/ck/tensor_operation/gpu/element/unary_element_wise_operation.hpp +++ b/include/ck/tensor_operation/gpu/element/unary_element_wise_operation.hpp @@ -13,10 +13,12 @@ namespace ck { namespace tensor_operation { namespace element_wise { +#pragma clang diagnostic push +#pragma clang diagnostic ignored "-Wnon-virtual-dtor" struct UnaryOpBase { public: - __host__ __device__ virtual ~UnaryOpBase() = default; + __host__ __device__ ~UnaryOpBase() = default; __host__ __device__ UnaryOpBase() = default; __host__ __device__ UnaryOpBase(const UnaryOpBase&) = default; @@ -50,8 +52,12 @@ struct PassThroughPack2 constexpr const static bool is_pack2_invocable = true; }; -struct PassThrough : public UnaryOpBase +struct PassThrough final : public UnaryOpBase { + PassThrough() = default; + PassThrough(const PassThrough&) = default; + PassThrough(PassThrough&&) = default; + __host__ __device__ ~PassThrough() {} __host__ __device__ inline void operator()(float& y, const float& x) const final { y = x; } @@ -409,8 +415,14 @@ struct UnarySquare }; }; -struct UnaryAbs : public UnaryOpBase +struct UnaryAbs final : public UnaryOpBase { + UnaryAbs() = default; + UnaryAbs(const UnaryAbs&) = default; + UnaryAbs(UnaryAbs&&) = default; + + __host__ __device__ ~UnaryAbs() {} + __host__ __device__ inline void operator()(float& y, const float& x) const final { y = ck::math::abs(x); @@ -459,8 +471,14 @@ struct UnarySqrt }; }; -struct Relu : public UnaryOpBase +struct Relu final : public UnaryOpBase { + Relu() = default; + Relu(const Relu&) = default; + Relu(Relu&&) = default; + + __host__ __device__ ~Relu() {} + __host__ __device__ inline void operator()(float& y, const float& x) const final { y = x > 0 ? x : 0; @@ -633,8 +651,12 @@ struct Gelu } }; -struct Sigmoid : public UnaryOpBase +struct Sigmoid final : public UnaryOpBase { + Sigmoid() = default; + Sigmoid(const Sigmoid&) = default; + Sigmoid(Sigmoid&&) = default; + __host__ __device__ ~Sigmoid() {} __host__ __device__ inline void operator()(float& y, const float& x) const final { @@ -688,8 +710,13 @@ struct Silu }; }; -struct TanH : public UnaryOpBase +struct TanH final : public UnaryOpBase { + TanH() = default; + TanH(const TanH&) = default; + TanH(TanH&&) = default; + __host__ __device__ ~TanH() {} + __host__ __device__ inline void operator()(float& y, const float& x) const final { y = ck::math::tanh(x); @@ -959,8 +986,12 @@ struct Rcp }; }; -struct Swish : public UnaryOpBase +struct Swish final : public UnaryOpBase { + Swish(const Swish&) = default; + Swish(Swish&&) = default; + __host__ __device__ ~Swish() {} + __host__ __device__ Swish(float beta = 1.0f) : beta_(beta) {} __host__ __device__ float get_beta() const { return beta_; } @@ -1019,8 +1050,12 @@ struct Swish : public UnaryOpBase } }; -struct SoftRelu : public UnaryOpBase +struct SoftRelu final : public UnaryOpBase { + SoftRelu(const SoftRelu&) = default; + SoftRelu(SoftRelu&&) = default; + __host__ __device__ ~SoftRelu() {} + __host__ __device__ SoftRelu(float alpha = 1.0f) : alpha_(alpha) {} __host__ __device__ float get_alpha() const { return alpha_; } @@ -1070,8 +1105,12 @@ struct SoftRelu : public UnaryOpBase } }; -struct Power : public UnaryOpBase +struct Power final : public UnaryOpBase { + Power(const Power&) = default; + Power(Power&&) = default; + __host__ __device__ ~Power() {} + __host__ __device__ Power(float alpha = 0.f, float beta = 1.f, float gamma = 2.f) : alpha_(alpha), beta_(beta), gamma_(gamma) { @@ -1148,8 +1187,12 @@ struct Power : public UnaryOpBase } }; -struct ClippedRelu : public UnaryOpBase +struct ClippedRelu final : public UnaryOpBase { + ClippedRelu(const ClippedRelu&) = default; + ClippedRelu(ClippedRelu&&) = default; + __host__ __device__ ~ClippedRelu() {} + __host__ __device__ ClippedRelu(float alpha = 0.f, float beta = 1.f) : alpha_(alpha), beta_(beta) { @@ -1205,8 +1248,11 @@ struct ClippedRelu : public UnaryOpBase } }; -struct LeakyRelu : public UnaryOpBase +struct LeakyRelu final : public UnaryOpBase { + LeakyRelu(const LeakyRelu&) = default; + LeakyRelu(LeakyRelu&&) = default; + __host__ __device__ ~LeakyRelu() {} __host__ __device__ LeakyRelu(float alpha = 0.f) : alpha_(alpha) {} @@ -1250,8 +1296,11 @@ struct LeakyRelu : public UnaryOpBase } }; -struct Elu : public UnaryOpBase +struct Elu final : public UnaryOpBase { + Elu(const Elu&) = default; + Elu(Elu&&) = default; + __host__ __device__ ~Elu() {} __host__ __device__ Elu(float alpha = 1.f) : alpha_(alpha) {} @@ -1296,8 +1345,11 @@ struct Elu : public UnaryOpBase } }; -struct Logistic : public UnaryOpBase +struct Logistic final : public UnaryOpBase { + Logistic(const Logistic&) = default; + Logistic(Logistic&&) = default; + __host__ __device__ ~Logistic() {} __host__ __device__ Logistic(float alpha = 1.0f) : alpha_(alpha) {} @@ -1631,8 +1683,23 @@ struct DynamicUnaryOp __host__ __device__ ~DynamicUnaryOp() { - if(unary_op_ptr_) - delete unary_op_ptr_; + switch(unary_op_type_) + { + case(UnaryOpType::Swish): delete static_cast(unary_op_ptr_); break; + case(UnaryOpType::Sigmoid): delete static_cast(unary_op_ptr_); break; + case(UnaryOpType::PassThrough): delete static_cast(unary_op_ptr_); break; + case(UnaryOpType::Logistic): delete static_cast(unary_op_ptr_); break; + case(UnaryOpType::TanH): delete static_cast(unary_op_ptr_); break; + case(UnaryOpType::Relu): delete static_cast(unary_op_ptr_); break; + case(UnaryOpType::SoftRelu): delete static_cast(unary_op_ptr_); break; + case(UnaryOpType::UnaryAbs): delete static_cast(unary_op_ptr_); break; + case(UnaryOpType::Power): delete static_cast(unary_op_ptr_); break; + case(UnaryOpType::ClippedRelu): delete static_cast(unary_op_ptr_); break; + case(UnaryOpType::LeakyRelu): delete static_cast(unary_op_ptr_); break; + case(UnaryOpType::Elu): delete static_cast(unary_op_ptr_); break; + + default: break; + } } __device__ void InitUnaryOpPtrOnDevice() @@ -1721,6 +1788,7 @@ struct DynamicUnaryOp float beta; float gamma; }; +#pragma clang diagnostic pop } // namespace element_wise } // namespace tensor_operation From 5216b83230bcd9450e6c95f4e923bb758474e412 Mon Sep 17 00:00:00 2001 From: Bartlomiej Kocot Date: Tue, 29 Oct 2024 23:49:32 +0000 Subject: [PATCH 2/4] Fixes --- .../element/unary_element_wise_operation.hpp | 92 +++++++++---------- 1 file changed, 46 insertions(+), 46 deletions(-) diff --git a/include/ck/tensor_operation/gpu/element/unary_element_wise_operation.hpp b/include/ck/tensor_operation/gpu/element/unary_element_wise_operation.hpp index e67185c4687..361e7e497f8 100644 --- a/include/ck/tensor_operation/gpu/element/unary_element_wise_operation.hpp +++ b/include/ck/tensor_operation/gpu/element/unary_element_wise_operation.hpp @@ -20,11 +20,11 @@ struct UnaryOpBase public: __host__ __device__ ~UnaryOpBase() = default; - __host__ __device__ UnaryOpBase() = default; - __host__ __device__ UnaryOpBase(const UnaryOpBase&) = default; - __host__ __device__ UnaryOpBase& operator=(const UnaryOpBase&) = default; - __host__ __device__ UnaryOpBase(UnaryOpBase&&) = default; - __host__ __device__ UnaryOpBase& operator=(UnaryOpBase&&) = default; + __host__ __device__ constexpr UnaryOpBase() = default; + __host__ __device__ constexpr UnaryOpBase(const UnaryOpBase&) = default; + __host__ __device__ constexpr UnaryOpBase& operator=(const UnaryOpBase&) = default; + __host__ __device__ constexpr UnaryOpBase(UnaryOpBase&&) = default; + __host__ __device__ constexpr UnaryOpBase& operator=(UnaryOpBase&&) = default; __host__ __device__ virtual inline void operator()(float& y, const float& x) const = 0; @@ -54,10 +54,10 @@ struct PassThroughPack2 struct PassThrough final : public UnaryOpBase { - PassThrough() = default; - PassThrough(const PassThrough&) = default; - PassThrough(PassThrough&&) = default; - __host__ __device__ ~PassThrough() {} + __host__ __device__ constexpr PassThrough() = default; + __host__ __device__ constexpr PassThrough(const PassThrough&) = default; + __host__ __device__ constexpr PassThrough(PassThrough&&) = default; + __host__ __device__ ~PassThrough() = default; __host__ __device__ inline void operator()(float& y, const float& x) const final { y = x; } @@ -417,11 +417,11 @@ struct UnarySquare struct UnaryAbs final : public UnaryOpBase { - UnaryAbs() = default; - UnaryAbs(const UnaryAbs&) = default; - UnaryAbs(UnaryAbs&&) = default; + __host__ __device__ constexpr UnaryAbs() = default; + __host__ __device__ constexpr UnaryAbs(const UnaryAbs&) = default; + __host__ __device__ constexpr UnaryAbs(UnaryAbs&&) = default; - __host__ __device__ ~UnaryAbs() {} + __host__ __device__ ~UnaryAbs() = default; __host__ __device__ inline void operator()(float& y, const float& x) const final { @@ -473,11 +473,11 @@ struct UnarySqrt struct Relu final : public UnaryOpBase { - Relu() = default; - Relu(const Relu&) = default; - Relu(Relu&&) = default; + __host__ __device__ constexpr Relu() = default; + __host__ __device__ constexpr Relu(const Relu&) = default; + __host__ __device__ constexpr Relu(Relu&&) = default; - __host__ __device__ ~Relu() {} + __host__ __device__ ~Relu() = default; __host__ __device__ inline void operator()(float& y, const float& x) const final { @@ -653,10 +653,10 @@ struct Gelu struct Sigmoid final : public UnaryOpBase { - Sigmoid() = default; - Sigmoid(const Sigmoid&) = default; - Sigmoid(Sigmoid&&) = default; - __host__ __device__ ~Sigmoid() {} + __host__ __device__ constexpr Sigmoid() = default; + __host__ __device__ constexpr Sigmoid(const Sigmoid&) = default; + __host__ __device__ constexpr Sigmoid(Sigmoid&&) = default; + __host__ __device__ ~Sigmoid() = default; __host__ __device__ inline void operator()(float& y, const float& x) const final { @@ -712,10 +712,10 @@ struct Silu struct TanH final : public UnaryOpBase { - TanH() = default; - TanH(const TanH&) = default; - TanH(TanH&&) = default; - __host__ __device__ ~TanH() {} + __host__ __device__ constexpr TanH() = default; + __host__ __device__ constexpr TanH(const TanH&) = default; + __host__ __device__ constexpr TanH(TanH&&) = default; + __host__ __device__ ~TanH() = default; __host__ __device__ inline void operator()(float& y, const float& x) const final { @@ -988,9 +988,9 @@ struct Rcp struct Swish final : public UnaryOpBase { - Swish(const Swish&) = default; - Swish(Swish&&) = default; - __host__ __device__ ~Swish() {} + __host__ __device__ constexpr Swish(const Swish&) = default; + __host__ __device__ constexpr Swish(Swish&&) = default; + __host__ __device__ ~Swish() = default; __host__ __device__ Swish(float beta = 1.0f) : beta_(beta) {} @@ -1052,9 +1052,9 @@ struct Swish final : public UnaryOpBase struct SoftRelu final : public UnaryOpBase { - SoftRelu(const SoftRelu&) = default; - SoftRelu(SoftRelu&&) = default; - __host__ __device__ ~SoftRelu() {} + __host__ __device__ constexpr SoftRelu(const SoftRelu&) = default; + __host__ __device__ constexpr SoftRelu(SoftRelu&&) = default; + __host__ __device__ ~SoftRelu() = default; __host__ __device__ SoftRelu(float alpha = 1.0f) : alpha_(alpha) {} @@ -1107,9 +1107,9 @@ struct SoftRelu final : public UnaryOpBase struct Power final : public UnaryOpBase { - Power(const Power&) = default; - Power(Power&&) = default; - __host__ __device__ ~Power() {} + __host__ __device__ constexpr Power(const Power&) = default; + __host__ __device__ constexpr Power(Power&&) = default; + __host__ __device__ ~Power() = default; __host__ __device__ Power(float alpha = 0.f, float beta = 1.f, float gamma = 2.f) : alpha_(alpha), beta_(beta), gamma_(gamma) @@ -1189,9 +1189,9 @@ struct Power final : public UnaryOpBase struct ClippedRelu final : public UnaryOpBase { - ClippedRelu(const ClippedRelu&) = default; - ClippedRelu(ClippedRelu&&) = default; - __host__ __device__ ~ClippedRelu() {} + __host__ __device__ constexpr ClippedRelu(const ClippedRelu&) = default; + __host__ __device__ constexpr ClippedRelu(ClippedRelu&&) = default; + __host__ __device__ ~ClippedRelu() = default; __host__ __device__ ClippedRelu(float alpha = 0.f, float beta = 1.f) : alpha_(alpha), beta_(beta) @@ -1250,9 +1250,9 @@ struct ClippedRelu final : public UnaryOpBase struct LeakyRelu final : public UnaryOpBase { - LeakyRelu(const LeakyRelu&) = default; - LeakyRelu(LeakyRelu&&) = default; - __host__ __device__ ~LeakyRelu() {} + __host__ __device__ constexpr LeakyRelu(const LeakyRelu&) = default; + __host__ __device__ constexpr LeakyRelu(LeakyRelu&&) = default; + __host__ __device__ ~LeakyRelu() = default; __host__ __device__ LeakyRelu(float alpha = 0.f) : alpha_(alpha) {} @@ -1298,9 +1298,9 @@ struct LeakyRelu final : public UnaryOpBase struct Elu final : public UnaryOpBase { - Elu(const Elu&) = default; - Elu(Elu&&) = default; - __host__ __device__ ~Elu() {} + __host__ __device__ constexpr Elu(const Elu&) = default; + __host__ __device__ constexpr Elu(Elu&&) = default; + __host__ __device__ ~Elu() = default; __host__ __device__ Elu(float alpha = 1.f) : alpha_(alpha) {} @@ -1347,9 +1347,9 @@ struct Elu final : public UnaryOpBase struct Logistic final : public UnaryOpBase { - Logistic(const Logistic&) = default; - Logistic(Logistic&&) = default; - __host__ __device__ ~Logistic() {} + __host__ __device__ constexpr Logistic(const Logistic&) = default; + __host__ __device__ constexpr Logistic(Logistic&&) = default; + __host__ __device__ ~Logistic() = default; __host__ __device__ Logistic(float alpha = 1.0f) : alpha_(alpha) {} From a9d32470785e7fffb39d242140950ceb2c3b4ddb Mon Sep 17 00:00:00 2001 From: Bartlomiej Kocot Date: Wed, 30 Oct 2024 09:51:56 +0000 Subject: [PATCH 3/4] Fixes --- .../element/unary_element_wise_operation.hpp | 28 ++++++++++++------- 1 file changed, 18 insertions(+), 10 deletions(-) diff --git a/include/ck/tensor_operation/gpu/element/unary_element_wise_operation.hpp b/include/ck/tensor_operation/gpu/element/unary_element_wise_operation.hpp index 361e7e497f8..39b81ca5730 100644 --- a/include/ck/tensor_operation/gpu/element/unary_element_wise_operation.hpp +++ b/include/ck/tensor_operation/gpu/element/unary_element_wise_operation.hpp @@ -22,9 +22,9 @@ struct UnaryOpBase __host__ __device__ constexpr UnaryOpBase() = default; __host__ __device__ constexpr UnaryOpBase(const UnaryOpBase&) = default; - __host__ __device__ constexpr UnaryOpBase& operator=(const UnaryOpBase&) = default; - __host__ __device__ constexpr UnaryOpBase(UnaryOpBase&&) = default; - __host__ __device__ constexpr UnaryOpBase& operator=(UnaryOpBase&&) = default; + __host__ __device__ constexpr UnaryOpBase(UnaryOpBase&&) = default; + __host__ __device__ UnaryOpBase& operator=(const UnaryOpBase&) = default; + __host__ __device__ UnaryOpBase& operator=(UnaryOpBase&&) = default; __host__ __device__ virtual inline void operator()(float& y, const float& x) const = 0; @@ -57,7 +57,9 @@ struct PassThrough final : public UnaryOpBase __host__ __device__ constexpr PassThrough() = default; __host__ __device__ constexpr PassThrough(const PassThrough&) = default; __host__ __device__ constexpr PassThrough(PassThrough&&) = default; - __host__ __device__ ~PassThrough() = default; + __host__ __device__ PassThrough& operator=(const PassThrough&) = default; + __host__ __device__ PassThrough& operator=(PassThrough&&) = default; + __host__ __device__ ~PassThrough() = default; __host__ __device__ inline void operator()(float& y, const float& x) const final { y = x; } @@ -420,8 +422,9 @@ struct UnaryAbs final : public UnaryOpBase __host__ __device__ constexpr UnaryAbs() = default; __host__ __device__ constexpr UnaryAbs(const UnaryAbs&) = default; __host__ __device__ constexpr UnaryAbs(UnaryAbs&&) = default; - - __host__ __device__ ~UnaryAbs() = default; + __host__ __device__ UnaryAbs& operator=(const UnaryAbs&) = default; + __host__ __device__ UnaryAbs& operator=(UnaryAbs&&) = default; + __host__ __device__ ~UnaryAbs() = default; __host__ __device__ inline void operator()(float& y, const float& x) const final { @@ -476,8 +479,9 @@ struct Relu final : public UnaryOpBase __host__ __device__ constexpr Relu() = default; __host__ __device__ constexpr Relu(const Relu&) = default; __host__ __device__ constexpr Relu(Relu&&) = default; - - __host__ __device__ ~Relu() = default; + __host__ __device__ Relu& operator=(const Relu&) = default; + __host__ __device__ Relu& operator=(Relu&&) = default; + __host__ __device__ ~Relu() = default; __host__ __device__ inline void operator()(float& y, const float& x) const final { @@ -656,7 +660,9 @@ struct Sigmoid final : public UnaryOpBase __host__ __device__ constexpr Sigmoid() = default; __host__ __device__ constexpr Sigmoid(const Sigmoid&) = default; __host__ __device__ constexpr Sigmoid(Sigmoid&&) = default; - __host__ __device__ ~Sigmoid() = default; + __host__ __device__ Sigmoid& operator=(const Sigmoid&) = default; + __host__ __device__ Sigmoid& operator=(Sigmoid&&) = default; + __host__ __device__ ~Sigmoid() = default; __host__ __device__ inline void operator()(float& y, const float& x) const final { @@ -715,7 +721,9 @@ struct TanH final : public UnaryOpBase __host__ __device__ constexpr TanH() = default; __host__ __device__ constexpr TanH(const TanH&) = default; __host__ __device__ constexpr TanH(TanH&&) = default; - __host__ __device__ ~TanH() = default; + __host__ __device__ TanH& operator=(const TanH&) = default; + __host__ __device__ TanH& operator=(TanH&&) = default; + __host__ __device__ ~TanH() = default; __host__ __device__ inline void operator()(float& y, const float& x) const final { From 589f8ad6bdb6fdeae9da6605123649840fcf4637 Mon Sep 17 00:00:00 2001 From: Bartlomiej Kocot Date: Wed, 30 Oct 2024 10:34:42 +0000 Subject: [PATCH 4/4] clang format fixes --- include/ck_tile/core/numeric/math.hpp | 2 +- .../host/reference/reference_elementwise.hpp | 2 +- .../host/reference/reference_permute.hpp | 2 +- .../host/reference/reference_rmsnorm2d_fwd.hpp | 2 +- .../kernel/add_rmsnorm2d_rdquant_fwd_shape.hpp | 2 +- ..._rmsnorm2d_rdquant_fwd_pipeline_problem.hpp | 2 +- .../ops/fmha/pipeline/tile_fmha_shape.hpp | 2 +- .../pipeline/generic_petmute_problem.hpp | 2 +- .../ck_tile/ops/reduce/block/block_reduce.hpp | 18 +++++++++--------- .../rmsnorm2d/kernel/rmsnorm2d_fwd_shape.hpp | 2 +- .../rmsnorm2d_fwd_pipeline_problem.hpp | 2 +- .../ops/welford/block/block_welford.hpp | 2 +- 12 files changed, 20 insertions(+), 20 deletions(-) diff --git a/include/ck_tile/core/numeric/math.hpp b/include/ck_tile/core/numeric/math.hpp index 0faf1aa043a..6bdcb509b09 100644 --- a/include/ck_tile/core/numeric/math.hpp +++ b/include/ck_tile/core/numeric/math.hpp @@ -1,5 +1,5 @@ // SPDX-License-Identifier: MIT -// Copyright (c) 2018-2023, Advanced Micro Devices, Inc. All rights reserved. +// Copyright (c) 2018-2024, Advanced Micro Devices, Inc. All rights reserved. #pragma once diff --git a/include/ck_tile/host/reference/reference_elementwise.hpp b/include/ck_tile/host/reference/reference_elementwise.hpp index 809049fa640..65303279b87 100644 --- a/include/ck_tile/host/reference/reference_elementwise.hpp +++ b/include/ck_tile/host/reference/reference_elementwise.hpp @@ -1,5 +1,5 @@ // SPDX-License-Identifier: MIT -// Copyright (c) 2018-2023, Advanced Micro Devices, Inc. All rights reserved. +// Copyright (c) 2018-2024, Advanced Micro Devices, Inc. All rights reserved. #pragma once diff --git a/include/ck_tile/host/reference/reference_permute.hpp b/include/ck_tile/host/reference/reference_permute.hpp index 1c82483407c..14ed4f815e4 100644 --- a/include/ck_tile/host/reference/reference_permute.hpp +++ b/include/ck_tile/host/reference/reference_permute.hpp @@ -1,5 +1,5 @@ // SPDX-License-Identifier: MIT -// Copyright (c) 2018-2023, Advanced Micro Devices, Inc. All rights reserved. +// Copyright (c) 2018-2024, Advanced Micro Devices, Inc. All rights reserved. #pragma once diff --git a/include/ck_tile/host/reference/reference_rmsnorm2d_fwd.hpp b/include/ck_tile/host/reference/reference_rmsnorm2d_fwd.hpp index db6e92f4c08..b14e25a85b3 100644 --- a/include/ck_tile/host/reference/reference_rmsnorm2d_fwd.hpp +++ b/include/ck_tile/host/reference/reference_rmsnorm2d_fwd.hpp @@ -1,5 +1,5 @@ // SPDX-License-Identifier: MIT -// Copyright (c) 2018-2023, Advanced Micro Devices, Inc. All rights reserved. +// Copyright (c) 2018-2024, Advanced Micro Devices, Inc. All rights reserved. #pragma once diff --git a/include/ck_tile/ops/add_rmsnorm2d_rdquant/kernel/add_rmsnorm2d_rdquant_fwd_shape.hpp b/include/ck_tile/ops/add_rmsnorm2d_rdquant/kernel/add_rmsnorm2d_rdquant_fwd_shape.hpp index a17c53c73f6..4bc7db434ea 100644 --- a/include/ck_tile/ops/add_rmsnorm2d_rdquant/kernel/add_rmsnorm2d_rdquant_fwd_shape.hpp +++ b/include/ck_tile/ops/add_rmsnorm2d_rdquant/kernel/add_rmsnorm2d_rdquant_fwd_shape.hpp @@ -1,5 +1,5 @@ // SPDX-License-Identifier: MIT -// Copyright (c) 2018-2023, Advanced Micro Devices, Inc. All rights reserved. +// Copyright (c) 2018-2024, Advanced Micro Devices, Inc. All rights reserved. #pragma once diff --git a/include/ck_tile/ops/add_rmsnorm2d_rdquant/pipeline/add_rmsnorm2d_rdquant_fwd_pipeline_problem.hpp b/include/ck_tile/ops/add_rmsnorm2d_rdquant/pipeline/add_rmsnorm2d_rdquant_fwd_pipeline_problem.hpp index 106e5086bea..2e64060038e 100644 --- a/include/ck_tile/ops/add_rmsnorm2d_rdquant/pipeline/add_rmsnorm2d_rdquant_fwd_pipeline_problem.hpp +++ b/include/ck_tile/ops/add_rmsnorm2d_rdquant/pipeline/add_rmsnorm2d_rdquant_fwd_pipeline_problem.hpp @@ -1,5 +1,5 @@ // SPDX-License-Identifier: MIT -// Copyright (c) 2018-2023, Advanced Micro Devices, Inc. All rights reserved. +// Copyright (c) 2018-2024, Advanced Micro Devices, Inc. All rights reserved. #pragma once diff --git a/include/ck_tile/ops/fmha/pipeline/tile_fmha_shape.hpp b/include/ck_tile/ops/fmha/pipeline/tile_fmha_shape.hpp index 570754b22e1..bb33b5f0216 100644 --- a/include/ck_tile/ops/fmha/pipeline/tile_fmha_shape.hpp +++ b/include/ck_tile/ops/fmha/pipeline/tile_fmha_shape.hpp @@ -1,5 +1,5 @@ // SPDX-License-Identifier: MIT -// Copyright (c) 2018-2023, Advanced Micro Devices, Inc. All rights reserved. +// Copyright (c) 2018-2024, Advanced Micro Devices, Inc. All rights reserved. #pragma once diff --git a/include/ck_tile/ops/permute/pipeline/generic_petmute_problem.hpp b/include/ck_tile/ops/permute/pipeline/generic_petmute_problem.hpp index e504ed74721..17f18acb5e2 100644 --- a/include/ck_tile/ops/permute/pipeline/generic_petmute_problem.hpp +++ b/include/ck_tile/ops/permute/pipeline/generic_petmute_problem.hpp @@ -1,5 +1,5 @@ // SPDX-License-Identifier: MIT -// Copyright (c) 2018-2023, Advanced Micro Devices, Inc. All rights reserved. +// Copyright (c) 2018-2024, Advanced Micro Devices, Inc. All rights reserved. #pragma once diff --git a/include/ck_tile/ops/reduce/block/block_reduce.hpp b/include/ck_tile/ops/reduce/block/block_reduce.hpp index d9df949cf95..fa3007d1e49 100644 --- a/include/ck_tile/ops/reduce/block/block_reduce.hpp +++ b/include/ck_tile/ops/reduce/block/block_reduce.hpp @@ -16,8 +16,8 @@ namespace ck_tile { // synchronize reduce result (cross lane reduction and broadcast on replicated dimension) template CK_TILE_DEVICE void block_tile_reduce_sync(AccDistributedTensor_& acc_tensor, - const ReduceFunc& reduce_func, - bool_constant = {}) + const ReduceFunc& reduce_func, + bool_constant = {}) { using Dstr = typename AccDistributedTensor_::StaticTileDistribution; using DstrEncode = typename Dstr::DstrEncode; @@ -116,7 +116,7 @@ CK_TILE_DEVICE void block_tile_reduce_sync(AccDistributedTensor_& acc_tensor, */ template CK_TILE_DEVICE void block_tile_reduce_xor_sync(AccDistributedTensor_& acc_tensor, - const ReduceFunc& reduce_func) + const ReduceFunc& reduce_func) { using Dstr = typename AccDistributedTensor_::StaticTileDistribution; using DstrEncode = typename Dstr::DstrEncode; @@ -175,9 +175,9 @@ template CK_TILE_DEVICE void block_tile_reduce(AccDistributedTensor_& acc_tensor, - const InDistributedTensor_& in_tensor, - sequence, - const ReduceFunc& reduce_func) + const InDistributedTensor_& in_tensor, + sequence, + const ReduceFunc& reduce_func) { constexpr auto I0 = number<0>{}; constexpr auto I1 = number<1>{}; @@ -250,9 +250,9 @@ template CK_TILE_DEVICE auto block_tile_reduce(const InDistributedTensor_& in_tensor, - sequence in_reduce_dims, - const ReduceFunc& reduce_func, - const InDataType_& reduce_init) + sequence in_reduce_dims, + const ReduceFunc& reduce_func, + const InDataType_& reduce_init) { using InDataType = typename InDistributedTensor_::DataType; using AccDataType = remove_cvref_t; diff --git a/include/ck_tile/ops/rmsnorm2d/kernel/rmsnorm2d_fwd_shape.hpp b/include/ck_tile/ops/rmsnorm2d/kernel/rmsnorm2d_fwd_shape.hpp index fb484a10694..fc4b9f470c0 100644 --- a/include/ck_tile/ops/rmsnorm2d/kernel/rmsnorm2d_fwd_shape.hpp +++ b/include/ck_tile/ops/rmsnorm2d/kernel/rmsnorm2d_fwd_shape.hpp @@ -1,5 +1,5 @@ // SPDX-License-Identifier: MIT -// Copyright (c) 2018-2023, Advanced Micro Devices, Inc. All rights reserved. +// Copyright (c) 2018-2024, Advanced Micro Devices, Inc. All rights reserved. #pragma once diff --git a/include/ck_tile/ops/rmsnorm2d/pipeline/rmsnorm2d_fwd_pipeline_problem.hpp b/include/ck_tile/ops/rmsnorm2d/pipeline/rmsnorm2d_fwd_pipeline_problem.hpp index 87cab346317..2820e181332 100644 --- a/include/ck_tile/ops/rmsnorm2d/pipeline/rmsnorm2d_fwd_pipeline_problem.hpp +++ b/include/ck_tile/ops/rmsnorm2d/pipeline/rmsnorm2d_fwd_pipeline_problem.hpp @@ -1,5 +1,5 @@ // SPDX-License-Identifier: MIT -// Copyright (c) 2018-2023, Advanced Micro Devices, Inc. All rights reserved. +// Copyright (c) 2018-2024, Advanced Micro Devices, Inc. All rights reserved. #pragma once diff --git a/include/ck_tile/ops/welford/block/block_welford.hpp b/include/ck_tile/ops/welford/block/block_welford.hpp index 623e1e16d84..ce73c183e16 100644 --- a/include/ck_tile/ops/welford/block/block_welford.hpp +++ b/include/ck_tile/ops/welford/block/block_welford.hpp @@ -1,5 +1,5 @@ // SPDX-License-Identifier: MIT -// Copyright (c) 2018-2023, Advanced Micro Devices, Inc. All rights reserved. +// Copyright (c) 2018-2024, Advanced Micro Devices, Inc. All rights reserved. #pragma once