Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -33,13 +33,11 @@ struct DynamicTensorDescriptor

__host__ __device__ static constexpr index_t GetNumOfHiddenDimension()
{
constexpr auto all_low_dim_ids =
unpack([](auto&&... xs) constexpr { return merge_sequences(xs...); },
LowerDimensionIdss{});
constexpr auto all_low_dim_ids = unpack(
[](auto&&... xs) constexpr { return merge_sequences(xs...); }, LowerDimensionIdss{});

constexpr auto all_up_dim_ids =
unpack([](auto&&... xs) constexpr { return merge_sequences(xs...); },
UpperDimensionIdss{});
constexpr auto all_up_dim_ids = unpack(
[](auto&&... xs) constexpr { return merge_sequences(xs...); }, UpperDimensionIdss{});

constexpr auto all_dim_ids = merge_sequences(all_low_dim_ids, all_up_dim_ids);

Expand Down Expand Up @@ -347,22 +345,22 @@ transform_dynamic_tensor_descriptor(const OldTensorDescriptor& old_tensor_desc,
constexpr auto up_dim_numbers_scan = merge_sequences(
Sequence<0>{}, inclusive_scan_sequence(up_dim_numbers, math::plus<index_t>{}, Number<0>{}));

constexpr auto up_dim_hidden_idss =
generate_tuple([ old_hidden_dim_number, up_dim_numbers_scan ](auto i) constexpr {
constexpr auto up_dim_hidden_idss = generate_tuple(
[ old_hidden_dim_number, up_dim_numbers_scan ](auto i) constexpr {
return
typename arithmetic_sequence_gen<old_hidden_dim_number + up_dim_numbers_scan[i],
old_hidden_dim_number + up_dim_numbers_scan[i + 1],
1>::type{};
},
Number<num_new_transform>{});
Number<num_new_transform>{});

// new visible dimension's hidden ids
constexpr auto unordered_new_visible_dim_hidden_ids =
unpack([](auto... xs) constexpr { return merge_sequences(xs...); }, up_dim_hidden_idss);
constexpr auto unordered_new_visible_dim_hidden_ids = unpack(
[](auto... xs) constexpr { return merge_sequences(xs...); }, up_dim_hidden_idss);

constexpr auto new_visible_dim_unordered2ordered =
unpack([](auto... xs) constexpr { return merge_sequences(xs...); },
NewUpperDimensionNewVisibleIdss{});
constexpr auto new_visible_dim_unordered2ordered = unpack(
[](auto... xs) constexpr { return merge_sequences(xs...); },
NewUpperDimensionNewVisibleIdss{});

constexpr auto new_visible_dim_hidden_ids =
unordered_new_visible_dim_hidden_ids.ReorderGivenOld2New(new_visible_dim_unordered2ordered);
Expand Down
22 changes: 10 additions & 12 deletions composable_kernel/include/tensor_description/tensor_adaptor.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -106,13 +106,13 @@ struct TensorAdaptor

__host__ __device__ static constexpr index_t GetNumOfHiddenDimension()
{
constexpr auto all_low_dim_ids =
unpack([](auto&&... xs) constexpr { return merge_sequences(xs...); },
LowerDimensionHiddenIdss{});
constexpr auto all_low_dim_ids = unpack(
[](auto&&... xs) constexpr { return merge_sequences(xs...); },
LowerDimensionHiddenIdss{});

constexpr auto all_up_dim_ids =
unpack([](auto&&... xs) constexpr { return merge_sequences(xs...); },
UpperDimensionHiddenIdss{});
constexpr auto all_up_dim_ids = unpack(
[](auto&&... xs) constexpr { return merge_sequences(xs...); },
UpperDimensionHiddenIdss{});

constexpr auto all_dim_ids = merge_sequences(all_low_dim_ids, all_up_dim_ids);

Expand Down Expand Up @@ -418,13 +418,11 @@ __host__ __device__ constexpr auto make_single_stage_tensor_adaptor(const Transf
"wrong!");

// sanity check on LowerDimensionOldTopIdss and UpperDimensionNewTopIdss
constexpr auto all_low_dim_old_top_ids =
unpack([](auto&&... xs) constexpr { return merge_sequences(xs...); },
LowerDimensionOldTopIdss{});
constexpr auto all_low_dim_old_top_ids = unpack(
[](auto&&... xs) constexpr { return merge_sequences(xs...); }, LowerDimensionOldTopIdss{});

constexpr auto all_up_dim_new_top_ids =
unpack([](auto&&... xs) constexpr { return merge_sequences(xs...); },
UpperDimensionNewTopIdss{});
constexpr auto all_up_dim_new_top_ids = unpack(
[](auto&&... xs) constexpr { return merge_sequences(xs...); }, UpperDimensionNewTopIdss{});

static_assert(is_valid_sequence_map<decltype(all_low_dim_old_top_ids)>::value &&
is_valid_sequence_map<decltype(all_up_dim_new_top_ids)>::value,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -152,7 +152,6 @@ struct BlockwiseGemmDlops_km_kn_m0m1n0n1_v3

static_for<0, EPerBlock, EPerThreadLoop>{}([&](auto e_begin) {
static_for<0, KPerThread, KPerThreadSubC>{}([&](auto k_begin) {

a_thread_copy_.Run(a_block_mtx,
make_tuple(e_begin, k_begin),
a_block_buf,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -87,7 +87,6 @@ struct ThreadwiseGemmDlops_km0m1_kn0n1_m0m1n0n1
static_for<0, TM1, 1>{}([&](auto tm1) {
static_for<0, TN0, 1>{}([&](auto tn0) {
static_for<0, TN1, 1>{}([&](auto tn1) {

constexpr index_t a_offset =
AThreadDesc_TK0_TM0_TM1_TK1{}.CalculateOffset(
a_origin_idx + make_multi_index(tk, tm0, tm1));
Expand Down Expand Up @@ -192,7 +191,6 @@ struct ThreadwiseContractionDlops_A_TK0_TM0_TM1_TK1_B_TK0_TN0_TN1_TK1_C_TM0_TM1_
static_for<0, TM1, 1>{}([&](auto tm1) {
static_for<0, TN0, 1>{}([&](auto tn0) {
static_for<0, TN1, 1>{}([&](auto tn1) {

vector_type<FloatA, TK1> a_vec;
vector_type<FloatB, TK1> b_vec;

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -136,7 +136,6 @@ struct ThreadwiseGemmDlops_km_kn_mn_v3
{
static_for<0, H, 1>{}([&](auto h) {
static_for<0, W, 1>{}([&](auto w) {

constexpr index_t b_offset =
BDesc{}.CalculateOffset(b_origin_idx + make_tuple(e, 0, h, w));

Expand Down
3 changes: 2 additions & 1 deletion composable_kernel/include/utility/data_type_enum.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,8 @@
namespace ck {

// this enumerate should be synchronized with include/miopen.h
typedef enum {
typedef enum
{
Half = 0,
Float = 1,
Int32 = 2,
Expand Down
12 changes: 6 additions & 6 deletions external/half/include/half.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -2399,11 +2399,11 @@ unsigned int erf(unsigned int arg)
template <std::float_round_style R, bool L>
unsigned int gamma(unsigned int arg)
{
/* static const double p[] ={ 2.50662827563479526904, 225.525584619175212544, -268.295973841304927459, 80.9030806934622512966, -5.00757863970517583837, 0.0114684895434781459556 };
double t = arg + 4.65, s = p[0];
for(unsigned int i=0; i<5; ++i)
s += p[i+1] / (arg+i);
return std::log(s) + (arg-0.5)*std::log(t) - t;
/* static const double p[] ={ 2.50662827563479526904, 225.525584619175212544,
-268.295973841304927459, 80.9030806934622512966, -5.00757863970517583837,
0.0114684895434781459556 }; double t = arg + 4.65, s = p[0]; for(unsigned int i=0; i<5; ++i)
s += p[i+1] / (arg+i);
return std::log(s) + (arg-0.5)*std::log(t) - t;
*/ static const f31 pi(0xC90FDAA2, 1),
lbe(0xB8AA3B29, 0);
unsigned int abs = arg & 0x7FFF, sign = arg & 0x8000;
Expand Down Expand Up @@ -2506,7 +2506,7 @@ unsigned int gamma(unsigned int arg)

template <typename, typename, std::float_round_style>
struct half_caster;
}
} // namespace detail

/// Half-precision floating-point type.
/// This class implements an IEEE-conformant half-precision floating-point type with the usual
Expand Down
15 changes: 6 additions & 9 deletions host/host_tensor/include/host_tensor.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -39,7 +39,8 @@ std::ostream& LogRangeAsType(std::ostream& os, Range&& range, std::string delim)
return os;
}

typedef enum {
typedef enum
{
Half = 0,
Float = 1,
} DataType_t;
Expand Down Expand Up @@ -227,27 +228,23 @@ struct Tensor
{
switch(mDesc.GetNumOfDimension())
{
case 1:
{
case 1: {
auto f = [&](auto i) { (*this)(i) = g(i); };
make_ParallelTensorFunctor(f, mDesc.GetLengths()[0])(num_thread);
break;
}
case 2:
{
case 2: {
auto f = [&](auto i0, auto i1) { (*this)(i0, i1) = g(i0, i1); };
make_ParallelTensorFunctor(f, mDesc.GetLengths()[0], mDesc.GetLengths()[1])(num_thread);
break;
}
case 3:
{
case 3: {
auto f = [&](auto i0, auto i1, auto i2) { (*this)(i0, i1, i2) = g(i0, i1, i2); };
make_ParallelTensorFunctor(
f, mDesc.GetLengths()[0], mDesc.GetLengths()[1], mDesc.GetLengths()[2])(num_thread);
break;
}
case 4:
{
case 4: {
auto f = [&](auto i0, auto i1, auto i2, auto i3) {
(*this)(i0, i1, i2, i3) = g(i0, i1, i2, i3);
};
Expand Down
4 changes: 1 addition & 3 deletions host/online_compilation/hip_utility/kernel_cache.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -145,9 +145,7 @@ void KernelCache::ClearKernels(const std::string& algorithm, const std::string&
}
const std::pair<std::string, std::string> key = std::make_pair(algorithm, network_config);
auto&& v = this->kernel_map[key];
if(!v.empty())
{
}
if(!v.empty()) {}
v.clear();
}

Expand Down
2 changes: 1 addition & 1 deletion host/online_compilation/hip_utility/logger.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -40,4 +40,4 @@ ostream& fdt_log(LogLevel level, const char* header, const char* content)
ostream& fdt_log() { return (cerr); };

void fdt_log_flush() { cerr << endl; }
};
}; // namespace olCompile