ROCm · asroy · Jul 30, 2021 · Jul 30, 2021
diff --git a/composable_kernel/include/tensor_description/dynamic_tensor_descriptor.hpp b/composable_kernel/include/tensor_description/dynamic_tensor_descriptor.hpp
@@ -33,13 +33,11 @@ struct DynamicTensorDescriptor
 
     __host__ __device__ static constexpr index_t GetNumOfHiddenDimension()
     {
-        constexpr auto all_low_dim_ids =
-            unpack([](auto&&... xs) constexpr { return merge_sequences(xs...); },
-                   LowerDimensionIdss{});
+        constexpr auto all_low_dim_ids = unpack(
+            [](auto&&... xs) constexpr { return merge_sequences(xs...); }, LowerDimensionIdss{});
 
-        constexpr auto all_up_dim_ids =
-            unpack([](auto&&... xs) constexpr { return merge_sequences(xs...); },
-                   UpperDimensionIdss{});
+        constexpr auto all_up_dim_ids = unpack(
+            [](auto&&... xs) constexpr { return merge_sequences(xs...); }, UpperDimensionIdss{});
 
         constexpr auto all_dim_ids = merge_sequences(all_low_dim_ids, all_up_dim_ids);
 
@@ -347,22 +345,22 @@ transform_dynamic_tensor_descriptor(const OldTensorDescriptor& old_tensor_desc,
     constexpr auto up_dim_numbers_scan = merge_sequences(
         Sequence<0>{}, inclusive_scan_sequence(up_dim_numbers, math::plus<index_t>{}, Number<0>{}));
 
-    constexpr auto up_dim_hidden_idss =
-        generate_tuple([ old_hidden_dim_number, up_dim_numbers_scan ](auto i) constexpr {
+    constexpr auto up_dim_hidden_idss = generate_tuple(
+        [ old_hidden_dim_number, up_dim_numbers_scan ](auto i) constexpr {
             return
                 typename arithmetic_sequence_gen<old_hidden_dim_number + up_dim_numbers_scan[i],
                                                  old_hidden_dim_number + up_dim_numbers_scan[i + 1],
                                                  1>::type{};
         },
-                       Number<num_new_transform>{});
+        Number<num_new_transform>{});
 
     // new visible dimension's hidden ids
-    constexpr auto unordered_new_visible_dim_hidden_ids =
-        unpack([](auto... xs) constexpr { return merge_sequences(xs...); }, up_dim_hidden_idss);
+    constexpr auto unordered_new_visible_dim_hidden_ids = unpack(
+        [](auto... xs) constexpr { return merge_sequences(xs...); }, up_dim_hidden_idss);
 
-    constexpr auto new_visible_dim_unordered2ordered =
-        unpack([](auto... xs) constexpr { return merge_sequences(xs...); },
-               NewUpperDimensionNewVisibleIdss{});
+    constexpr auto new_visible_dim_unordered2ordered = unpack(
+        [](auto... xs) constexpr { return merge_sequences(xs...); },
+        NewUpperDimensionNewVisibleIdss{});
 
     constexpr auto new_visible_dim_hidden_ids =
         unordered_new_visible_dim_hidden_ids.ReorderGivenOld2New(new_visible_dim_unordered2ordered);

diff --git a/composable_kernel/include/tensor_description/tensor_adaptor.hpp b/composable_kernel/include/tensor_description/tensor_adaptor.hpp
@@ -106,13 +106,13 @@ struct TensorAdaptor
 
     __host__ __device__ static constexpr index_t GetNumOfHiddenDimension()
     {
-        constexpr auto all_low_dim_ids =
-            unpack([](auto&&... xs) constexpr { return merge_sequences(xs...); },
-                   LowerDimensionHiddenIdss{});
+        constexpr auto all_low_dim_ids = unpack(
+            [](auto&&... xs) constexpr { return merge_sequences(xs...); },
+            LowerDimensionHiddenIdss{});
 
-        constexpr auto all_up_dim_ids =
-            unpack([](auto&&... xs) constexpr { return merge_sequences(xs...); },
-                   UpperDimensionHiddenIdss{});
+        constexpr auto all_up_dim_ids = unpack(
+            [](auto&&... xs) constexpr { return merge_sequences(xs...); },
+            UpperDimensionHiddenIdss{});
 
         constexpr auto all_dim_ids = merge_sequences(all_low_dim_ids, all_up_dim_ids);
 
@@ -418,13 +418,11 @@ __host__ __device__ constexpr auto make_single_stage_tensor_adaptor(const Transf
                   "wrong!");
 
     // sanity check on LowerDimensionOldTopIdss and UpperDimensionNewTopIdss
-    constexpr auto all_low_dim_old_top_ids =
-        unpack([](auto&&... xs) constexpr { return merge_sequences(xs...); },
-               LowerDimensionOldTopIdss{});
+    constexpr auto all_low_dim_old_top_ids = unpack(
+        [](auto&&... xs) constexpr { return merge_sequences(xs...); }, LowerDimensionOldTopIdss{});
 
-    constexpr auto all_up_dim_new_top_ids =
-        unpack([](auto&&... xs) constexpr { return merge_sequences(xs...); },
-               UpperDimensionNewTopIdss{});
+    constexpr auto all_up_dim_new_top_ids = unpack(
+        [](auto&&... xs) constexpr { return merge_sequences(xs...); }, UpperDimensionNewTopIdss{});
 
     static_assert(is_valid_sequence_map<decltype(all_low_dim_old_top_ids)>::value &&
                       is_valid_sequence_map<decltype(all_up_dim_new_top_ids)>::value,

diff --git a/composable_kernel/include/tensor_operation/blockwise_gemm_dlops_v3.hpp b/composable_kernel/include/tensor_operation/blockwise_gemm_dlops_v3.hpp
@@ -152,7 +152,6 @@ struct BlockwiseGemmDlops_km_kn_m0m1n0n1_v3
 
         static_for<0, EPerBlock, EPerThreadLoop>{}([&](auto e_begin) {
             static_for<0, KPerThread, KPerThreadSubC>{}([&](auto k_begin) {
-
                 a_thread_copy_.Run(a_block_mtx,
                                    make_tuple(e_begin, k_begin),
                                    a_block_buf,

diff --git a/composable_kernel/include/tensor_operation/threadwise_contraction_dlops.hpp b/composable_kernel/include/tensor_operation/threadwise_contraction_dlops.hpp
@@ -87,7 +87,6 @@ struct ThreadwiseGemmDlops_km0m1_kn0n1_m0m1n0n1
                 static_for<0, TM1, 1>{}([&](auto tm1) {
                     static_for<0, TN0, 1>{}([&](auto tn0) {
                         static_for<0, TN1, 1>{}([&](auto tn1) {
-
                             constexpr index_t a_offset =
                                 AThreadDesc_TK0_TM0_TM1_TK1{}.CalculateOffset(
                                     a_origin_idx + make_multi_index(tk, tm0, tm1));
@@ -192,7 +191,6 @@ struct ThreadwiseContractionDlops_A_TK0_TM0_TM1_TK1_B_TK0_TN0_TN1_TK1_C_TM0_TM1_
                 static_for<0, TM1, 1>{}([&](auto tm1) {
                     static_for<0, TN0, 1>{}([&](auto tn0) {
                         static_for<0, TN1, 1>{}([&](auto tn1) {
-
                             vector_type<FloatA, TK1> a_vec;
                             vector_type<FloatB, TK1> b_vec;
 

diff --git a/composable_kernel/include/tensor_operation/threadwise_gemm_dlops_v3.hpp b/composable_kernel/include/tensor_operation/threadwise_gemm_dlops_v3.hpp
@@ -136,7 +136,6 @@ struct ThreadwiseGemmDlops_km_kn_mn_v3
                 {
                     static_for<0, H, 1>{}([&](auto h) {
                         static_for<0, W, 1>{}([&](auto w) {
-
                             constexpr index_t b_offset =
                                 BDesc{}.CalculateOffset(b_origin_idx + make_tuple(e, 0, h, w));
 

diff --git a/composable_kernel/include/utility/data_type_enum.hpp b/composable_kernel/include/utility/data_type_enum.hpp
@@ -4,7 +4,8 @@
 namespace ck {
 
 // this enumerate should be synchronized with include/miopen.h
-typedef enum {
+typedef enum
+{
     Half     = 0,
     Float    = 1,
     Int32    = 2,

diff --git a/external/half/include/half.hpp b/external/half/include/half.hpp
@@ -2399,11 +2399,11 @@ unsigned int erf(unsigned int arg)
 template <std::float_round_style R, bool L>
 unsigned int gamma(unsigned int arg)
 {
-    /*			static const double p[] ={ 2.50662827563479526904, 225.525584619175212544, -268.295973841304927459, 80.9030806934622512966, -5.00757863970517583837, 0.0114684895434781459556 };
-			double t = arg + 4.65, s = p[0];
-			for(unsigned int i=0; i<5; ++i)
-				s += p[i+1] / (arg+i);
-			return std::log(s) + (arg-0.5)*std::log(t) - t;
+    /*			static const double p[] ={ 2.50662827563479526904, 225.525584619175212544,
+       -268.295973841304927459, 80.9030806934622512966, -5.00757863970517583837,
+       0.0114684895434781459556 }; double t = arg + 4.65, s = p[0]; for(unsigned int i=0; i<5; ++i)
+                                s += p[i+1] / (arg+i);
+                        return std::log(s) + (arg-0.5)*std::log(t) - t;
 */ static const f31 pi(0xC90FDAA2, 1),
         lbe(0xB8AA3B29, 0);
     unsigned int abs = arg & 0x7FFF, sign = arg & 0x8000;
@@ -2506,7 +2506,7 @@ unsigned int gamma(unsigned int arg)
 
 template <typename, typename, std::float_round_style>
 struct half_caster;
-}
+} // namespace detail
 
 /// Half-precision floating-point type.
 /// This class implements an IEEE-conformant half-precision floating-point type with the usual

diff --git a/host/host_tensor/include/host_tensor.hpp b/host/host_tensor/include/host_tensor.hpp
@@ -39,7 +39,8 @@ std::ostream& LogRangeAsType(std::ostream& os, Range&& range, std::string delim)
     return os;
 }
 
-typedef enum {
+typedef enum
+{
     Half  = 0,
     Float = 1,
 } DataType_t;
@@ -227,27 +228,23 @@ struct Tensor
     {
         switch(mDesc.GetNumOfDimension())
         {
-        case 1:
-        {
+        case 1: {
             auto f = [&](auto i) { (*this)(i) = g(i); };
             make_ParallelTensorFunctor(f, mDesc.GetLengths()[0])(num_thread);
             break;
         }
-        case 2:
-        {
+        case 2: {
             auto f = [&](auto i0, auto i1) { (*this)(i0, i1) = g(i0, i1); };
             make_ParallelTensorFunctor(f, mDesc.GetLengths()[0], mDesc.GetLengths()[1])(num_thread);
             break;
         }
-        case 3:
-        {
+        case 3: {
             auto f = [&](auto i0, auto i1, auto i2) { (*this)(i0, i1, i2) = g(i0, i1, i2); };
             make_ParallelTensorFunctor(
                 f, mDesc.GetLengths()[0], mDesc.GetLengths()[1], mDesc.GetLengths()[2])(num_thread);
             break;
         }
-        case 4:
-        {
+        case 4: {
             auto f = [&](auto i0, auto i1, auto i2, auto i3) {
                 (*this)(i0, i1, i2, i3) = g(i0, i1, i2, i3);
             };

diff --git a/host/online_compilation/hip_utility/kernel_cache.cpp b/host/online_compilation/hip_utility/kernel_cache.cpp
@@ -145,9 +145,7 @@ void KernelCache::ClearKernels(const std::string& algorithm, const std::string&
     }
     const std::pair<std::string, std::string> key = std::make_pair(algorithm, network_config);
     auto&& v                                      = this->kernel_map[key];
-    if(!v.empty())
-    {
-    }
+    if(!v.empty()) {}
     v.clear();
 }
 

diff --git a/host/online_compilation/hip_utility/logger.cpp b/host/online_compilation/hip_utility/logger.cpp
@@ -40,4 +40,4 @@ ostream& fdt_log(LogLevel level, const char* header, const char* content)
 ostream& fdt_log() { return (cerr); };
 
 void fdt_log_flush() { cerr << endl; }
-};
+}; // namespace olCompile