Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
71 commits
Select commit Hold shift + click to select a range
18d2bb1
ad gelu and fast_gelu
May 18, 2022
8a913c2
added GeLU and fast GeLU
May 19, 2022
b548c0b
clean up
May 19, 2022
4769425
Merge remote-tracking branch 'origin/develop' into gelu
May 23, 2022
5208c62
add gemm+fastgelu example
May 23, 2022
7279e12
add gemm+gelu instances
May 25, 2022
b238662
Merge remote-tracking branch 'origin/develop' into gelu
May 25, 2022
f99f614
update profiler
May 25, 2022
a0eb2c0
clean up
May 25, 2022
b9d3d27
clean up
May 25, 2022
09ec28b
Merge remote-tracking branch 'origin/develop' into gelu
May 31, 2022
52ce27b
adding gemm+bias+activation
May 31, 2022
30109f6
clean
May 31, 2022
83511d7
adding bias
Jun 1, 2022
512666f
clean
Jun 6, 2022
ea3feee
adding gemm multiple d
Jun 9, 2022
c7d5941
debugging
Jun 9, 2022
8a60a32
add gemm bias add fastgelu
Jun 11, 2022
25e35b5
rename, clean
Jun 11, 2022
ff4f8ba
refactoring; add readme
Jun 13, 2022
f9b92b1
refactor
Jun 13, 2022
5727181
refactor
Jun 13, 2022
e09f6e0
refactor
Jun 13, 2022
7fd5e9f
refactor
Jun 13, 2022
97ec23b
refactor
Jun 13, 2022
2488d0b
refactor
Jun 13, 2022
ad11d2a
fix
Jun 14, 2022
5816a64
fix
Jun 14, 2022
578ffb6
update example
Jun 14, 2022
67fcb0b
Merge remote-tracking branch 'origin/develop' into gelu
Jun 14, 2022
c4f1208
update example
Jun 14, 2022
9551101
rename
Jun 15, 2022
3d00581
update example
Jun 15, 2022
b58b98f
add ckProfiler
Jun 15, 2022
e5f731c
clean
Jun 15, 2022
5d87cb7
clean
Jun 15, 2022
82837d1
clean
Jun 15, 2022
35a67b9
clean
Jun 15, 2022
af81e9d
add client app example
Jun 16, 2022
dc3f0c7
update readme
Jun 16, 2022
b64ffee
delete obselete files
Jun 16, 2022
d20d1df
remove old client app
Jun 16, 2022
586792f
delete old file
Jun 16, 2022
fa40d2a
cleaning
Jun 17, 2022
fe090b9
Merge remote-tracking branch 'origin/develop' into external_interface
Jun 21, 2022
8074c85
clean
Jun 21, 2022
bfeb09b
remove half
Jun 21, 2022
996ff99
fix header path
Jun 21, 2022
bddda29
Merge remote-tracking branch 'origin/develop' into external_interface
Jun 21, 2022
dd2c9e0
fix header path
Jun 21, 2022
d685ba2
fix header path
Jun 21, 2022
174b1ba
fix header path
Jun 21, 2022
294b27b
fix header path
Jun 23, 2022
2e181fb
fix header path for all examples
Jun 23, 2022
9228f8f
Merge remote-tracking branch 'origin/develop' into external_interface
Jun 23, 2022
4cdf46a
fix header path
Jun 23, 2022
9a9cd97
fix header path
Jun 23, 2022
3ae04c7
fix header path
Jun 23, 2022
0dab8c8
fix header path
Jun 23, 2022
54442e1
fix header path
Jun 23, 2022
c102dc1
fix header path
Jun 23, 2022
f3e542a
fix header path
Jun 23, 2022
35d2263
fix header path
Jun 23, 2022
47bb0e6
fix header path
Jun 24, 2022
460c3c4
revert client app example
Jun 24, 2022
2e5cd79
clean build
Jun 24, 2022
c7ce4cc
fix build
Jun 24, 2022
f214a8f
temporary disable client test on Jenkins
Jun 24, 2022
7d1698e
clean
Jun 24, 2022
01c777e
clean
Jun 24, 2022
ea819f3
clean
Jun 24, 2022
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
The table of contents is too big for display.
Diff view
Diff view
  •  
  •  
  •  
5 changes: 0 additions & 5 deletions CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -78,10 +78,6 @@ rocm_create_package(
LDCONFIG
)

## half
set(HALF_INCLUDE_DIR "${PROJECT_SOURCE_DIR}/external/include/half")
message("HALF_INCLUDE_DIR: ${HALF_INCLUDE_DIR}")

## tidy
include(EnableCompilerWarnings)
set(CK_TIDY_ERRORS ERRORS * -readability-inconsistent-declaration-parameter-name)
Expand Down Expand Up @@ -229,7 +225,6 @@ set(CMAKE_RUNTIME_OUTPUT_DIRECTORY ${CMAKE_CURRENT_BINARY_DIR}/bin)

include_directories(BEFORE
${PROJECT_SOURCE_DIR}/include
${PROJECT_BINARY_DIR}/include
${PROJECT_SOURCE_DIR}/library/include
)

Expand Down
34 changes: 17 additions & 17 deletions Jenkinsfile
Original file line number Diff line number Diff line change
Expand Up @@ -379,23 +379,23 @@ pipeline {
}
}
}
stage("Client App")
{
parallel
{
stage("Run Client App")
{
agent{ label rocmnode("gfx908")}
environment{
setup_args = """ -D -DBUILD_DEV=Off -DCMAKE_INSTALL_PREFIX=../install CMAKE_CXX_FLAGS="--offload-arch=gfx908 -O3 " """
execute_args = """ cd ../test/client_app && rm -rf build && mkdir build && cd build && cmake -DCMAKE_PREFIX_PATH="${env.WORKSPACE}/install;/opt/rocm" .. && make """
}
steps{
buildHipClangJobAndReboot(setup_args: setup_args, config_targets: "install", no_reboot:true, build_type: 'Release', execute_cmd: execute_args, prefixpath: '/usr/local')
}
}
}
}
//stage("Client App")
//{
// parallel
// {
// stage("Run Client App")
// {
// agent{ label rocmnode("gfx908")}
// environment{
// setup_args = """ -D -DBUILD_DEV=Off -DCMAKE_INSTALL_PREFIX=../install CMAKE_CXX_FLAGS="--offload-arch=gfx908 -O3 " """
// execute_args = """ cd ../test/client_app && rm -rf build && mkdir build && cd build && cmake -DCMAKE_PREFIX_PATH="${env.WORKSPACE}/install;/opt/rocm" .. && make """
// }
// steps{
// buildHipClangJobAndReboot(setup_args: setup_args, config_targets: "install", no_reboot:true, build_type: 'Release', execute_cmd: execute_args, prefixpath: '/usr/local')
// }
// }
// }
//}
stage("Performance Tests")
{
parallel
Expand Down
24 changes: 11 additions & 13 deletions example/01_gemm/gemm_dl_fp16.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -2,19 +2,17 @@
#include <numeric>
#include <initializer_list>
#include <cstdlib>
#include <stdlib.h>
#include <half.hpp>

#include "check_err.hpp"
#include "config.hpp"
#include "device.hpp"
#include "host_tensor.hpp"
#include "host_tensor_generator.hpp"
#include "device_tensor.hpp"
#include "device_gemm_dl.hpp"
#include "element_wise_operation.hpp"
#include "reference_gemm.hpp"
#include "gemm_specialization.hpp"

#include "ck/ck.hpp"
#include "ck/tensor_operation/gpu/device/gemm_specialization.hpp"
#include "ck/tensor_operation/gpu/device/device_gemm_dl.hpp"
#include "ck/tensor_operation/gpu/element/element_wise_operation.hpp"

#include "ck/library/utility/check_err.hpp"
#include "ck/library/host_tensor/device_memory.hpp"
#include "ck/library/host_tensor/host_tensor.hpp"
#include "ck/library/host_tensor/host_tensor_generator.hpp"
#include "ck/library/reference_tensor_operation/cpu/reference_gemm.hpp"

template <ck::index_t... Is>
using S = ck::Sequence<Is...>;
Expand Down
24 changes: 11 additions & 13 deletions example/01_gemm/gemm_dl_fp32.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -2,19 +2,17 @@
#include <numeric>
#include <initializer_list>
#include <cstdlib>
#include <stdlib.h>
#include <half.hpp>

#include "check_err.hpp"
#include "config.hpp"
#include "device.hpp"
#include "host_tensor.hpp"
#include "host_tensor_generator.hpp"
#include "device_tensor.hpp"
#include "device_gemm_dl.hpp"
#include "element_wise_operation.hpp"
#include "reference_gemm.hpp"
#include "gemm_specialization.hpp"

#include "ck/ck.hpp"
#include "ck/tensor_operation/gpu/device/gemm_specialization.hpp"
#include "ck/tensor_operation/gpu/device/device_gemm_dl.hpp"
#include "ck/tensor_operation/gpu/element/element_wise_operation.hpp"

#include "ck/library/utility/check_err.hpp"
#include "ck/library/host_tensor/device_memory.hpp"
#include "ck/library/host_tensor/host_tensor.hpp"
#include "ck/library/host_tensor/host_tensor_generator.hpp"
#include "ck/library/reference_tensor_operation/cpu/reference_gemm.hpp"

template <ck::index_t... Is>
using S = ck::Sequence<Is...>;
Expand Down
24 changes: 11 additions & 13 deletions example/01_gemm/gemm_dl_int8.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -2,19 +2,17 @@
#include <numeric>
#include <initializer_list>
#include <cstdlib>
#include <stdlib.h>
#include <half.hpp>

#include "check_err.hpp"
#include "config.hpp"
#include "device.hpp"
#include "host_tensor.hpp"
#include "host_tensor_generator.hpp"
#include "device_tensor.hpp"
#include "device_gemm_dl.hpp"
#include "element_wise_operation.hpp"
#include "reference_gemm.hpp"
#include "gemm_specialization.hpp"

#include "ck/ck.hpp"
#include "ck/tensor_operation/gpu/device/gemm_specialization.hpp"
#include "ck/tensor_operation/gpu/device/device_gemm_dl.hpp"
#include "ck/tensor_operation/gpu/element/element_wise_operation.hpp"

#include "ck/library/utility/check_err.hpp"
#include "ck/library/host_tensor/device_memory.hpp"
#include "ck/library/host_tensor/host_tensor.hpp"
#include "ck/library/host_tensor/host_tensor_generator.hpp"
#include "ck/library/reference_tensor_operation/cpu/reference_gemm.hpp"

template <ck::index_t... Is>
using S = ck::Sequence<Is...>;
Expand Down
24 changes: 11 additions & 13 deletions example/01_gemm/gemm_xdl_bf16.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -2,19 +2,17 @@
#include <numeric>
#include <initializer_list>
#include <cstdlib>
#include <stdlib.h>
#include <half.hpp>

#include "check_err.hpp"
#include "config.hpp"
#include "device.hpp"
#include "host_tensor.hpp"
#include "host_tensor_generator.hpp"
#include "device_tensor.hpp"
#include "device_gemm_xdl_cshuffle.hpp"
#include "element_wise_operation.hpp"
#include "reference_gemm.hpp"
#include "gemm_specialization.hpp"

#include "ck/ck.hpp"
#include "ck/tensor_operation/gpu/device/device_gemm_xdl_cshuffle.hpp"
#include "ck/tensor_operation/gpu/element/element_wise_operation.hpp"
#include "ck/tensor_operation/gpu/device/gemm_specialization.hpp"

#include "ck/library/host_tensor/device_memory.hpp"
#include "ck/library/host_tensor/host_tensor.hpp"
#include "ck/library/host_tensor/host_tensor_generator.hpp"
#include "ck/library/reference_tensor_operation/cpu/reference_gemm.hpp"
#include "ck/library/utility/check_err.hpp"

template <ck::index_t... Is>
using S = ck::Sequence<Is...>;
Expand Down
24 changes: 11 additions & 13 deletions example/01_gemm/gemm_xdl_fp16.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -2,19 +2,17 @@
#include <numeric>
#include <initializer_list>
#include <cstdlib>
#include <stdlib.h>
#include <half.hpp>
#include "check_err.hpp"
#include "config.hpp"
#include "device.hpp"
#include "host_tensor.hpp"
#include "host_tensor_generator.hpp"
#include "device_tensor.hpp"
#include "device_gemm_xdl.hpp"
#include "device_gemm_xdl_cshuffle.hpp"
#include "element_wise_operation.hpp"
#include "reference_gemm.hpp"
#include "gemm_specialization.hpp"

#include "ck/ck.hpp"
#include "ck/tensor_operation/gpu/device/gemm_specialization.hpp"
#include "ck/tensor_operation/gpu/device/device_gemm_xdl_cshuffle.hpp"
#include "ck/tensor_operation/gpu/element/element_wise_operation.hpp"

#include "ck/library/utility/check_err.hpp"
#include "ck/library/host_tensor/device_memory.hpp"
#include "ck/library/host_tensor/host_tensor.hpp"
#include "ck/library/host_tensor/host_tensor_generator.hpp"
#include "ck/library/reference_tensor_operation/cpu/reference_gemm.hpp"

template <ck::index_t... Is>
using S = ck::Sequence<Is...>;
Expand Down
26 changes: 12 additions & 14 deletions example/01_gemm/gemm_xdl_fp64.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -2,20 +2,18 @@
#include <numeric>
#include <initializer_list>
#include <cstdlib>
#include <stdlib.h>
#include <half.hpp>

#include "check_err.hpp"
#include "config.hpp"
#include "device.hpp"
#include "host_tensor.hpp"
#include "host_tensor_generator.hpp"
#include "device_tensor.hpp"
#include "device_gemm_xdl.hpp"
#include "device_gemm_xdl_cshuffle.hpp"
#include "element_wise_operation.hpp"
#include "reference_gemm.hpp"
#include "gemm_specialization.hpp"

#include "ck/ck.hpp"
#include "ck/tensor_operation/gpu/device/tensor_layout.hpp"
#include "ck/tensor_operation/gpu/device/gemm_specialization.hpp"
#include "ck/tensor_operation/gpu/device/device_gemm_xdl.hpp"
#include "ck/tensor_operation/gpu/element/element_wise_operation.hpp"

#include "ck/library/host_tensor/device_memory.hpp"
#include "ck/library/host_tensor/host_tensor.hpp"
#include "ck/library/host_tensor/host_tensor_generator.hpp"
#include "ck/library/reference_tensor_operation/cpu/reference_gemm.hpp"
#include "ck/library/utility/check_err.hpp"

template <ck::index_t... Is>
using S = ck::Sequence<Is...>;
Expand Down
25 changes: 12 additions & 13 deletions example/01_gemm/gemm_xdl_int8.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -2,19 +2,18 @@
#include <numeric>
#include <initializer_list>
#include <cstdlib>
#include <stdlib.h>
#include <half.hpp>

#include "check_err.hpp"
#include "config.hpp"
#include "device.hpp"
#include "host_tensor.hpp"
#include "host_tensor_generator.hpp"
#include "device_tensor.hpp"
#include "device_gemm_xdl_cshuffle.hpp"
#include "element_wise_operation.hpp"
#include "reference_gemm.hpp"
#include "gemm_specialization.hpp"

#include "ck/ck.hpp"
#include "ck/tensor_operation/gpu/device/tensor_layout.hpp"
#include "ck/tensor_operation/gpu/device/gemm_specialization.hpp"
#include "ck/tensor_operation/gpu/device/device_gemm_xdl_cshuffle.hpp"
#include "ck/tensor_operation/gpu/element/element_wise_operation.hpp"

#include "ck/library/utility/check_err.hpp"
#include "ck/library/host_tensor/device_memory.hpp"
#include "ck/library/host_tensor/host_tensor.hpp"
#include "ck/library/host_tensor/host_tensor_generator.hpp"
#include "ck/library/reference_tensor_operation/cpu/reference_gemm.hpp"

template <ck::index_t... Is>
using S = ck::Sequence<Is...>;
Expand Down
26 changes: 11 additions & 15 deletions example/02_gemm_alpha_beta/gemm_xdl_alpha_beta.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -2,21 +2,17 @@
#include <numeric>
#include <initializer_list>
#include <cstdlib>
#include <stdlib.h>
#include <half.hpp>

#include "check_err.hpp"
#include "config.hpp"
#include "print.hpp"
#include "device.hpp"
#include "host_tensor.hpp"
#include "host_tensor_generator.hpp"
#include "host_gemm.hpp"
#include "device_tensor.hpp"
#include "device_base.hpp"
#include "device_gemm_xdl_c_shuffle_bias_2d.hpp"
#include "element_wise_operation.hpp"
#include "reference_gemm_bias_2d.hpp"

#include "ck/ck.hpp"
#include "ck/tensor_operation/gpu/device/gemm_specialization.hpp"
#include "ck/tensor_operation/gpu/device/device_gemm_xdl_c_shuffle_bias_2d.hpp"
#include "ck/tensor_operation/gpu/element/element_wise_operation.hpp"

#include "ck/library/utility/check_err.hpp"
#include "ck/library/host_tensor/device_memory.hpp"
#include "ck/library/host_tensor/host_tensor.hpp"
#include "ck/library/host_tensor/host_tensor_generator.hpp"
#include "ck/library/reference_tensor_operation/cpu/reference_gemm_bias_2d.hpp"

template <ck::index_t... Is>
using S = ck::Sequence<Is...>;
Expand Down
24 changes: 12 additions & 12 deletions example/03_gemm_bias_relu/gemm_xdl_bias_relu.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -2,18 +2,18 @@
#include <numeric>
#include <initializer_list>
#include <cstdlib>
#include <stdlib.h>

#include "check_err.hpp"
#include "config.hpp"
#include "device.hpp"
#include "host_tensor.hpp"
#include "host_tensor_generator.hpp"
#include "device_tensor.hpp"
#include "element_wise_operation.hpp"
#include "reference_gemm.hpp"
#include "gemm_specialization.hpp"
#include "device_gemm_multiple_d_xdl_cshuffle.hpp"

#include "ck/ck.hpp"
#include "ck/tensor_operation/gpu/device/tensor_layout.hpp"
#include "ck/tensor_operation/gpu/device/gemm_specialization.hpp"
#include "ck/tensor_operation/gpu/device/device_gemm_multiple_d_xdl_cshuffle.hpp"
#include "ck/tensor_operation/gpu/element/element_wise_operation.hpp"

#include "ck/library/host_tensor/device_memory.hpp"
#include "ck/library/host_tensor/host_tensor.hpp"
#include "ck/library/host_tensor/host_tensor_generator.hpp"
#include "ck/library/reference_tensor_operation/cpu/reference_gemm.hpp"
#include "ck/library/utility/check_err.hpp"

template <ck::index_t... Is>
using S = ck::Sequence<Is...>;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -2,18 +2,18 @@
#include <numeric>
#include <initializer_list>
#include <cstdlib>
#include <stdlib.h>

#include "check_err.hpp"
#include "config.hpp"
#include "device.hpp"
#include "host_tensor.hpp"
#include "host_tensor_generator.hpp"
#include "device_tensor.hpp"
#include "element_wise_operation.hpp"
#include "reference_gemm.hpp"
#include "gemm_specialization.hpp"
#include "device_gemm_multiple_d_xdl_cshuffle.hpp"

#include "ck/ck.hpp"
#include "ck/tensor_operation/gpu/device/tensor_layout.hpp"
#include "ck/tensor_operation/gpu/device/gemm_specialization.hpp"
#include "ck/tensor_operation/gpu/device/device_gemm_multiple_d_xdl_cshuffle.hpp"
#include "ck/tensor_operation/gpu/element/element_wise_operation.hpp"

#include "ck/library/host_tensor/device_memory.hpp"
#include "ck/library/host_tensor/host_tensor.hpp"
#include "ck/library/host_tensor/host_tensor_generator.hpp"
#include "ck/library/reference_tensor_operation/cpu/reference_gemm.hpp"
#include "ck/library/utility/check_err.hpp"

template <ck::index_t... Is>
using S = ck::Sequence<Is...>;
Expand Down
Loading