Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion projects/rocprim/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -162,7 +162,7 @@ else()
)
else()
rocm_check_target_ids(DEFAULT_AMDGPU_TARGETS
TARGETS "gfx906:xnack-;gfx908:xnack-;gfx90a:xnack-;gfx90a:xnack+;gfx942;gfx950;gfx1030;gfx1100;gfx1101;gfx1102;gfx1151;gfx1200;gfx1201"
TARGETS "gfx906:xnack-;gfx908:xnack-;gfx90a:xnack-;gfx90a:xnack+;gfx942;gfx950;gfx1030;gfx1100;gfx1101;gfx1102;gfx1151;gfx1152;gfx1153;gfx1200;gfx1201"
)
endif()
set(GPU_TARGETS "${DEFAULT_AMDGPU_TARGETS}" CACHE STRING "GPU architectures to compile for" FORCE)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -36,7 +36,7 @@ Use the appropriate CMake directive:
* ``BUILD_BENCHMARK``: Set to ``ON`` to build benchmarking tests. ``OFF`` by default.
* ``BENCHMARK_CONFIG_TUNING``: Set to ``ON`` to find the best kernel configuration parameters for benchmarking. Turning this on might increase compilation time significantly. ``OFF`` by default.
* ``BENCHMARK_USE_AMDSMI``: Set to ``ON`` to let benchmarks use AMD SMI to output more GPU statistics. ``OFF`` by default.
* ``AMDGPU_TARGETS``: Set this to build the library, examples, tests, examples, and benchmarks for specific architecture targets. When not set, the examples, tests, and benchmarks are built for gfx803, gfx900:xnack-, gfx906:xnack-, gfx908:xnack-, gfx90a:xnack-, gfx90a:xnack+, gfx942;gfx950, gfx1030, gfx1100, gfx1101, gfx1102, gfx1151, gfx1200, and gfx1201 architectures. The list of targets must be separated by a semicolon (``;``).
* ``AMDGPU_TARGETS``: Set this to build the library, examples, tests, examples, and benchmarks for specific architecture targets. When not set, the examples, tests, and benchmarks are built for gfx803, gfx900:xnack-, gfx906:xnack-, gfx908:xnack-, gfx90a:xnack-, gfx90a:xnack+, gfx942;gfx950, gfx1030, gfx1100, gfx1101, gfx1102, gfx1151, gfx1152, gfx1153, gfx1200, and gfx1201 architectures. The list of targets must be separated by a semicolon (``;``).
* ``AMDGPU_TEST_TARGETS``: Set this to build tests for a subset of the architectures specified by ``AMDGPU_TARGETS``. When set, copies of the same test will be generated for each of the architectures listed. These tests can be run using ``ctest -R "TARGET_ARCHITECTURE"``. The list of targets must be separated by a semicolon (``;``).
* ``USE_SYSTEM_LIB``: Set to ``ON`` to use the installed ``ROCm`` libraries when building the tests. Off by default. For this option to take effect, ``BUILD_TEST`` must be ``ON``.
* ``ONLY_INSTALL``: Set to ``ON`` to ignore any example, test, or benchmark build instructions. ``OFF`` by default.
Expand Down
2 changes: 2 additions & 0 deletions projects/rocprim/rocprim/include/rocprim/config.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -171,6 +171,8 @@
__builtin_amdgcn_processor_is("gfx1100") || __builtin_amdgcn_processor_is("gfx1101") \
|| __builtin_amdgcn_processor_is("gfx1102") \
|| __builtin_amdgcn_processor_is("gfx1103") \
|| __builtin_amdgcn_processor_is("gfx1152") \
|| __builtin_amdgcn_processor_is("gfx1153") \
|| __builtin_amdgcn_processor_is("gfx11-generic")
#define IS_RDNA2() \
__builtin_amdgcn_processor_is("gfx1030") || __builtin_amdgcn_processor_is("gfx1031") \
Expand Down
118 changes: 53 additions & 65 deletions projects/rocprim/rocprim/include/rocprim/device/config_types.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,7 @@
#define ROCPRIM_DEVICE_CONFIG_TYPES_HPP_

#include <algorithm>
#include <array>
#include <atomic>
#include <limits>
#include <optional>
Expand Down Expand Up @@ -172,6 +173,8 @@ enum class target_arch : unsigned int
gfx1030 = 1030,
gfx1100 = 1100,
gfx1102 = 1102,
gfx1152 = 1152,
gfx1153 = 1153,
gfx1200 = 1200,
gfx1201 = 1201,
unknown = std::numeric_limits<unsigned int>::max(),
Expand Down Expand Up @@ -203,46 +206,38 @@ constexpr bool prefix_equals(const char* lhs, const char* rhs, std::size_t n)
return i == n && *lhs == '\0';
}

constexpr const char* target_names[] = {"gfx803",
"gfx900",
"gfx906",
"gfx908",
"gfx90a",
"gfx942",
"gfx950",
"gfx1030",
"gfx1100",
"gfx1102",
"gfx1200",
"gfx1201"};

constexpr target_arch target_architectures[] = {
target_arch::gfx803,
target_arch::gfx900,
target_arch::gfx906,
target_arch::gfx908,
target_arch::gfx90a,
target_arch::gfx942,
target_arch::gfx950,
target_arch::gfx1030,
target_arch::gfx1100,
target_arch::gfx1102,
target_arch::gfx1200,
target_arch::gfx1201,
struct target_arch_descriptor
{
target_arch arch;
const char *arch_name;
};

#define X(ID) target_arch_descriptor{target_arch::ID, #ID}
constexpr auto target_arch_descriptors = std::array{
X(gfx803),
X(gfx900),
X(gfx906),
X(gfx908),
X(gfx90a),
X(gfx942),
X(gfx950),
X(gfx1030),
X(gfx1100),
X(gfx1102),
X(gfx1152),
X(gfx1153),
X(gfx1200),
X(gfx1201),
};
#undef X

constexpr target_arch get_target_arch_from_name(const char* const arch_name, const std::size_t n)
{
static_assert(sizeof(target_names) / sizeof(target_names[0])
== sizeof(target_architectures) / sizeof(target_architectures[0]),
"target_names and target_architectures should have the same number of elements");
constexpr auto num_architectures = sizeof(target_names) / sizeof(target_names[0]);

for(unsigned int i = 0; i < num_architectures; ++i)
for (const auto& desc : target_arch_descriptors)
{
if(prefix_equals(target_names[i], arch_name, n))
if(prefix_equals(desc.arch_name, arch_name, n))
{
return target_architectures[i];
return desc.arch;
}
}
return target_arch::unknown;
Expand All @@ -251,14 +246,14 @@ constexpr target_arch get_target_arch_from_name(const char* const arch_name, con
template<class F, std::size_t... Is>
constexpr void for_each_arch_impl(F&& f, std::index_sequence<Is...>)
{
(f(std::integral_constant<target_arch, target_architectures[Is]>{}), ...);
(f(std::integral_constant<target_arch, target_arch_descriptors[Is].arch>{}), ...);
}

template<class F>
constexpr void for_each_arch(F&& f)
{
for_each_arch_impl(std::forward<F>(f),
std::make_index_sequence<std::size(target_architectures)>{});
std::make_index_sequence<std::size(target_arch_descriptors)>{});
}

constexpr arch::wavefront::target arch_wavefront_size(const target_arch target_arch)
Expand All @@ -276,6 +271,8 @@ constexpr arch::wavefront::target arch_wavefront_size(const target_arch target_a
case target_arch::gfx1030: return arch::wavefront::target::size32;
case target_arch::gfx1100: return arch::wavefront::target::size32;
case target_arch::gfx1102: return arch::wavefront::target::size32;
case target_arch::gfx1152: return arch::wavefront::target::size32;
case target_arch::gfx1153: return arch::wavefront::target::size32;
case target_arch::gfx1200: return arch::wavefront::target::size32;
case target_arch::gfx1201: return arch::wavefront::target::size32;

Expand Down Expand Up @@ -493,35 +490,26 @@ auto dispatch_target_arch([[maybe_unused]] const target_arch target_arch)
{
switch(target_arch)
{

case target_arch::unknown:
return Config::template architecture_config<target_arch::unknown>::params;
case target_arch::gfx803:
return Config::template architecture_config<target_arch::gfx803>::params;
case target_arch::gfx900:
return Config::template architecture_config<target_arch::gfx900>::params;
case target_arch::gfx906:
return Config::template architecture_config<target_arch::gfx906>::params;
case target_arch::gfx908:
return Config::template architecture_config<target_arch::gfx908>::params;
case target_arch::gfx90a:
return Config::template architecture_config<target_arch::gfx90a>::params;
case target_arch::gfx942:
return Config::template architecture_config<target_arch::gfx942>::params;
case target_arch::gfx950:
return Config::template architecture_config<target_arch::gfx950>::params;
case target_arch::gfx1030:
return Config::template architecture_config<target_arch::gfx1030>::params;
case target_arch::gfx1100:
return Config::template architecture_config<target_arch::gfx1100>::params;
case target_arch::gfx1102:
return Config::template architecture_config<target_arch::gfx1102>::params;
case target_arch::gfx1200:
return Config::template architecture_config<target_arch::gfx1200>::params;
case target_arch::gfx1201:
return Config::template architecture_config<target_arch::gfx1201>::params;
case target_arch::invalid:
assert(false && "Invalid target architecture selected at runtime.");
case target_arch::invalid:
assert(false && "Invalid target architecture selected at runtime.");
break;
#define X(ID) case target_arch::ID: return Config::template architecture_config<target_arch::ID>::params
X(unknown);
X(gfx803);
X(gfx900);
X(gfx906);
X(gfx908);
X(gfx90a);
X(gfx942);
X(gfx950);
X(gfx1030);
X(gfx1100);
X(gfx1102);
X(gfx1152);
X(gfx1153);
X(gfx1200);
X(gfx1201);
#undef X
}
}
return Config::template architecture_config<target_arch::unknown>::params;
Expand Down