Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
15 changes: 15 additions & 0 deletions CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -52,6 +52,21 @@ set(CMAKE_CXX_STANDARD_REQUIRED ON)
set(CMAKE_CXX_EXTENSIONS OFF)
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Wall -Wextra -Werror")

# Determine if CXX Compiler is hcc, hip-clang or other
execute_process(COMMAND ${CMAKE_CXX_COMPILER} "--version" OUTPUT_VARIABLE CXX_OUTPUT
OUTPUT_STRIP_TRAILING_WHITESPACE
ERROR_STRIP_TRAILING_WHITESPACE)
string(REGEX MATCH "[A-Za-z]* ?clang version" TMP_CXX_VERSION ${CXX_OUTPUT})
string(REGEX MATCH "[A-Za-z]+" CXX_VERSION_STRING ${TMP_CXX_VERSION})
if(CXX_VERSION_STRING MATCHES "HCC")
set(HIP_COMPILER "hcc" CACHE STRING "HIP Compiler")
elseif(CXX_VERSION_STRING MATCHES "clang")
set(HIP_COMPILER "clang" CACHE STRING "HIP Compiler")
else()
message(FATAL_ERROR "CXX Compiler version ${CXX_VERSION_STRING} unsupported.")
endif()
message(STATUS "HIP Compiler: " ${HIP_COMPILER})

# Build options
option(BUILD_TEST "Build tests (requires googletest)" ON)
option(BUILD_BENCHMARK "Build benchmarks" OFF)
Expand Down
2 changes: 2 additions & 0 deletions benchmark/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -79,6 +79,7 @@ add_rocprim_benchmark_hip(benchmark_hip_warp_sort.cpp)
add_rocprim_benchmark_hip(benchmark_hip_device_memory.cpp)

# rocPRIM HC benchmarks
if(HIP_COMPILER STREQUAL "HCC")
add_rocprim_benchmark_hc(benchmark_hc_block_discontinuity.cpp)
add_rocprim_benchmark_hc(benchmark_hc_block_exchange.cpp)
add_rocprim_benchmark_hc(benchmark_hc_block_histogram.cpp)
Expand All @@ -95,3 +96,4 @@ add_rocprim_benchmark_hc(benchmark_hc_device_select.cpp)
add_rocprim_benchmark_hc(benchmark_hc_device_transform.cpp)
add_rocprim_benchmark_hc(benchmark_hc_warp_scan.cpp)
add_rocprim_benchmark_hc(benchmark_hc_warp_sort.cpp)
endif()
6 changes: 3 additions & 3 deletions cmake/VerifyCompiler.cmake
Original file line number Diff line number Diff line change
Expand Up @@ -32,8 +32,8 @@ if(HIP_PLATFORM STREQUAL "nvcc")
include(cmake/SetupNVCC.cmake)
message(STATUS "rocPRIM does not support NVCC. Only hipCUB will be available.")
elseif(HIP_PLATFORM STREQUAL "hcc")
if(NOT (CMAKE_CXX_COMPILER MATCHES ".*/hcc$"))
message(FATAL_ERROR "On ROCm platform 'hcc' must be used as C++ compiler.")
if(NOT (CMAKE_CXX_COMPILER MATCHES ".*/hcc$" OR CMAKE_CXX_COMPILER MATCHES ".*/hipcc$"))
message(FATAL_ERROR "On ROCm platform 'hcc' or 'clang' must be used as C++ compiler.")
else()
# Workaround until hcc & hip cmake modules fixes symlink logic in their config files.
# (Thanks to rocBLAS devs for finding workaround for this problem.)
Expand All @@ -44,5 +44,5 @@ elseif(HIP_PLATFORM STREQUAL "hcc")
find_package(hip REQUIRED CONFIG PATHS /opt/rocm)
endif()
else()
message(FATAL_ERROR "HIP_PLATFORM must be 'hcc' (AMD ROCm platform) or `nvcc` (NVIDIA CUDA platform).")
message(FATAL_ERROR "HIP_PLATFORM must be 'hcc' or 'clang' (AMD ROCm platform) or `nvcc` (NVIDIA CUDA platform).")
endif()
6 changes: 3 additions & 3 deletions hipcub/include/hipcub/rocprim/util_ptx.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -183,7 +183,7 @@ auto unsigned_bit_extract(UnsignedBits source,
-> typename std::enable_if<sizeof(UnsignedBits) == 8, unsigned int>::type
{
#ifdef __HIP_PLATFORM_HCC__
Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

__HIP_PLATFORM_HCC__ is also defined on clang with HIP backend?

Copy link
Copy Markdown
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Seems like it, I ran into an error for hc::__bitextract_u64 without this change

Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Those are HSA functions. In HIP-Clang they are just in global namespace?

Copy link
Copy Markdown
Contributor

@jszuppe jszuppe Aug 22, 2018

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Does __bitextract_u64 work when using hcc? Because it does not work on ROCm 1.8.2.

Copy link
Copy Markdown
Contributor

@jszuppe jszuppe Aug 22, 2018

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think we can do this:

    #ifdef __HIP_PLATFORM_HCC__        
        #ifdef __HCC__
        using ::hc::__bitextract_u64;
        #endif
        return __bitextract_u64(source, bit_start, num_bits);
    #else
        return (source << (64 - bit_start - num_bits)) >> (64 - num_bits);
    #endif // __HIP_PLATFORM_HCC__

or

    #ifdef __HCC__ // hcc
        return ::hc::__bitextract_u64(source, bit_start, num_bits);
    #elif defined(__HIP_PLATFORM_HCC__) // clang with support for HIP
        return __bitextract_u64(source, bit_start, num_bits);
    #else // nvcc
        return (source << (64 - bit_start - num_bits)) >> (64 - num_bits);
    #endif // __HIP_PLATFORM_HCC__

In 2nd option hipCUB (HIP code) won't depend on hc:: even when compiled with hcc. I guess hip-clang does not define __HCC__.

Copy link
Copy Markdown
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Good point, thanks I will change that

return ::hc::__bitextract_u64(source, bit_start, num_bits);
return __bitextract_u64(source, bit_start, num_bits);
#else
return (source << (64 - bit_start - num_bits)) >> (64 - num_bits);
#endif // __HIP_PLATFORM_HCC__
Expand All @@ -197,7 +197,7 @@ auto unsigned_bit_extract(UnsignedBits source,
-> typename std::enable_if<sizeof(UnsignedBits) < 8, unsigned int>::type
{
#ifdef __HIP_PLATFORM_HCC__
return ::hc::__bitextract_u32(source, bit_start, num_bits);
return __bitextract_u32(source, bit_start, num_bits);
#else
return (static_cast<unsigned int>(source) << (32 - bit_start - num_bits)) >> (32 - num_bits);
#endif // __HIP_PLATFORM_HCC__
Expand Down Expand Up @@ -228,7 +228,7 @@ void BFI(unsigned int &ret,
unsigned int num_bits)
{
#ifdef __HIP_PLATFORM_HCC__
ret = ::hc::__bitinsert_u32(x, y, bit_start, num_bits);
ret = __bitinsert_u32(x, y, bit_start, num_bits);
#else
x <<= bit_start;
unsigned int MASK_X = ((1 << num_bits) - 1) << bit_start;
Expand Down
8 changes: 8 additions & 0 deletions rocprim/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -42,12 +42,20 @@ target_include_directories(rocprim
# This target allows using only HC interface, links only
# against HC/HSA library, doesn't require HIP
add_library(rocprim_hc INTERFACE)
if(HIP_COMPILER STREQUAL "HCC")
target_link_libraries(rocprim_hc
INTERFACE
rocprim
hcc::hccrt
hcc::hc_am
)
elseif(HIP_COMPILER STREQUAL "clang")
Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Does it mean that clang also supports HC C++ API?

Copy link
Copy Markdown
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

HIP-Clang shouldn't support HC C++ API. Does that mean I should remove rocprim_hc entirely for HIP-Clang?

Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yes. I can check what's the best way to do that.

Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think we can safely disable it for for HIP-Clang. You disabled HC tests/benchmarks/examples anyway, I think the only other place rocprim_hc target is used is in rocm_install_targets, so add it there only if HCC is used as compiler and that's it.

target_link_libraries(rocprim_hc
INTERFACE
rocprim
)
endif()

target_compile_definitions(rocprim_hc
INTERFACE
ROCPRIM_HC_API=1
Expand Down
3 changes: 2 additions & 1 deletion rocprim/include/rocprim/device/device_scan_by_key_hip.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -304,7 +304,8 @@ hipError_t exclusive_scan_by_key(void * temporary_storage,
)
)
),
[initial_value, key_compare_op](const ::rocprim::tuple<input_type, key_type, key_type>& t)
[initial_value, key_compare_op] ROCPRIM_HOST_DEVICE
(const ::rocprim::tuple<input_type, key_type, key_type>& t)
-> ::rocprim::tuple<input_type, key_type>
{
if(!key_compare_op(::rocprim::get<1>(t), ::rocprim::get<2>(t)))
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -617,7 +617,8 @@ hipError_t segmented_exclusive_scan(void * temporary_storage,
head_flags
)
),
[initial_value](const ::rocprim::tuple<input_type, flag_type>& t)
[initial_value] ROCPRIM_HOST_DEVICE
(const ::rocprim::tuple<input_type, flag_type>& t)
-> ::rocprim::tuple<input_type, flag_type>
{
if(::rocprim::get<1>(t))
Expand Down
10 changes: 5 additions & 5 deletions rocprim/include/rocprim/intrinsics/atomic.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -33,7 +33,7 @@ namespace detail
#ifdef ROCPRIM_HC_API
return hc::atomic_fetch_add(address, value);
#else
return ::atomicAdd(address, value);
return atomicAdd(address, value);
Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Why making sure we're calling atomicAdd from global namespace would cause problems?

Copy link
Copy Markdown
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I thought the atomic functions in HIP were static, but I notice that isn't true

Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

How is that relevant?

#endif
}

Expand All @@ -43,7 +43,7 @@ namespace detail
#ifdef ROCPRIM_HC_API
return hc::atomic_fetch_add(address, value);
#else
return ::atomicAdd(address, value);
return atomicAdd(address, value);
#endif
}

Expand All @@ -53,7 +53,7 @@ namespace detail
#ifdef ROCPRIM_HC_API
return hc::atomic_fetch_add(address, value);
#else
return ::atomicAdd(address, value);
return atomicAdd(address, value);
#endif
}

Expand All @@ -63,7 +63,7 @@ namespace detail
#ifdef ROCPRIM_HC_API
return hc::atomic_fetch_add(reinterpret_cast<uint64_t*>(address), static_cast<uint64_t>(value));
#else
return ::atomicAdd(address, value);
return atomicAdd(address, value);
#endif
}

Expand All @@ -73,7 +73,7 @@ namespace detail
#ifdef ROCPRIM_HC_API
return hc::__atomic_wrapinc(address, value);
#else
return ::atomicInc(address, value);
return atomicInc(address, value);
#endif
}
}
Expand Down
9 changes: 4 additions & 5 deletions rocprim/include/rocprim/intrinsics/thread.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -75,8 +75,7 @@ unsigned int lane_id()
#ifdef ROCPRIM_HC_API
return hc::__lane_id();
#else // HIP
// TODO: Find HIP function for that
return hc::__lane_id();
return __lane_id();
Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Does HIP in ROCm 1.8.2 have this function? If not then I am not sure if can merge it to develop.

Actually we have branch on our private repo which replaces HC intrinsics with HIP functions (in some places intrinsics just weren't in HIP, in some they didn't work).

Copy link
Copy Markdown
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This function seems to exist on HIP branch roc-1.8.x, I'm not sure if that is specifically ROCm 1.8.2.

Do you remember which intrinsic were missing or didn't work?

Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

  • __shfl* functions - they still don't work correctly on ROCm 1.8.2. From what I see changes you made in Add hipclang amdgcn functions hip#515 are still only in master. I don't know if those functions work correctly in hip-clang. If they don't that may be a reason for many failures.
  • __bitextract_u{32,64}, __bitinsert_u32 - in worst case scenario those can be replaced with some math like it's done for nvcc,
  • __lane_id, __amdgcn_mbcnt_{lo,hi} - those are present in ROCm 1.8.2

Copy link
Copy Markdown
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Does rocPRIM have its own rocm 1.8.x branch which can be used on ROCm 1.8.2, and develop used on the master of HIP?

Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

No, but we can do it the other way around: have a special branch for changes required for hip-clang and HIP master.

#endif
}

Expand Down Expand Up @@ -262,19 +261,19 @@ namespace detail
ROCPRIM_DEVICE inline
void memory_fence_system(void)
{
::__threadfence_system();
__threadfence_system();
}

ROCPRIM_DEVICE inline
void memory_fence_block(void)
{
::__threadfence_block();
__threadfence_block();
}

ROCPRIM_DEVICE inline
void memory_fence_device(void)
{
::__threadfence();
__threadfence();
}
#else
// __threadfence_system()
Expand Down
9 changes: 4 additions & 5 deletions rocprim/include/rocprim/intrinsics/warp.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -56,10 +56,9 @@ unsigned int masked_bit_count(unsigned long long x, unsigned int add = 0)
c = hc::__amdgcn_mbcnt_hi(static_cast<int>(x >> 32), c);
return c;
#else // HIP
// TODO: Use HIP function(s)
int c;
c = hc::__amdgcn_mbcnt_lo(static_cast<int>(x), add);
c = hc::__amdgcn_mbcnt_hi(static_cast<int>(x >> 32), c);
c = __mbcnt_lo(static_cast<int>(x), add);
c = __mbcnt_hi(static_cast<int>(x >> 32), c);
return c;
#endif
}
Expand All @@ -73,7 +72,7 @@ int warp_any(int predicate)
#ifdef ROCPRIM_HC_API
return hc::__any(predicate);
#else // ROCPRIM_HIP_API
return ::__any(predicate);
return __any(predicate);
#endif
}

Expand All @@ -83,7 +82,7 @@ int warp_all(int predicate)
#ifdef ROCPRIM_HC_API
return hc::__all(predicate);
#else // ROCPRIM_HIP_API
return ::__all(predicate);
return __all(predicate);
#endif
}

Expand Down
8 changes: 4 additions & 4 deletions rocprim/include/rocprim/intrinsics/warp_shuffle.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -102,7 +102,7 @@ T warp_shuffle(T input, const int src_lane, const int width = warp_size())
input,
[=](int v) -> int
{
#if defined(ROCPRIM_HC_API) || defined(__HIP_PLATFORM_HCC__)
#if defined(ROCPRIM_HC_API)
return hc::__shfl(v, src_lane, width);
#else
return __shfl(v, src_lane, width);
Expand Down Expand Up @@ -131,7 +131,7 @@ T warp_shuffle_up(T input, const unsigned int delta, const int width = warp_size
input,
[=](int v) -> int
{
#if defined(ROCPRIM_HC_API) || defined(__HIP_PLATFORM_HCC__)
#if defined(ROCPRIM_HC_API)
return hc::__shfl_up(v, delta, width);
#else
return __shfl_up(v, delta, width);
Expand Down Expand Up @@ -160,7 +160,7 @@ T warp_shuffle_down(T input, const unsigned int delta, const int width = warp_si
input,
[=](int v) -> int
{
#if defined(ROCPRIM_HC_API) || defined(__HIP_PLATFORM_HCC__)
#if defined(ROCPRIM_HC_API)
return hc::__shfl_down(v, delta, width);
#else
return __shfl_down(v, delta, width);
Expand Down Expand Up @@ -188,7 +188,7 @@ T warp_shuffle_xor(T input, const int lane_mask, const int width = warp_size())
input,
[=](int v) -> int
{
#if defined(ROCPRIM_HC_API) || defined(__HIP_PLATFORM_HCC__)
#if defined(ROCPRIM_HC_API)
return hc::__shfl_xor(v, lane_mask, width);
#else
return __shfl_xor(v, lane_mask, width);
Expand Down
2 changes: 2 additions & 0 deletions rocprim/include/rocprim/iterator/zip_iterator.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -92,12 +92,14 @@ struct decrement_iterator
template<class Difference>
struct advance_iterator
{
ROCPRIM_HOST_DEVICE inline
advance_iterator(Difference distance)
: distance_(distance)
{
}

template<class Iterator>
ROCPRIM_HOST_DEVICE inline
void operator()(Iterator& it)
{
using it_distance_type = typename std::iterator_traits<Iterator>::difference_type;
Expand Down
4 changes: 2 additions & 2 deletions rocprim/include/rocprim/types.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -119,12 +119,12 @@ struct empty_type
#ifdef ROCPRIM_HC_API
using half = ::hc::half;
#else // HIP
using half = ::__half;
using half = __half;
#endif

END_ROCPRIM_NAMESPACE

/// @}
// end of group utilsmodule

#endif // ROCPRIM_TYPES_HPP_
#endif // ROCPRIM_TYPES_HPP_
2 changes: 1 addition & 1 deletion rocprim/include/rocprim/types/tuple.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -186,7 +186,7 @@ namespace detail
using is_final = std::integral_constant<bool, __is_final(T)>;
#else
template<class T>
struct is_final : std::false_type;
struct is_final : std::false_type
{
};
#endif
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -53,7 +53,7 @@ unsigned int last_in_warp_segment(Flag flag)
// Make sure last item in logical warp is marked as a tail
warp_flags |= ballot_type(1) << (WarpSize - 1U);
// Calculate logical lane id of the last valid value in the segment
return hc::__lastbit_u32_u64(warp_flags);
return __lastbit_u32_u64(warp_flags);
}

} // end namespace detail
Expand Down
23 changes: 16 additions & 7 deletions test/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -61,12 +61,19 @@ function(add_hc_test TEST_NAME TEST_SOURCES)
PUBLIC
${GTEST_INCLUDE_DIRS}
)
target_link_libraries(${TEST_TARGET}
PRIVATE
hcc::hccrt
hcc::hc_am
${GTEST_BOTH_LIBRARIES}
)
if(HIP_COMPILER STREQUAL "HCC")
target_link_libraries(${TEST_TARGET}
PRIVATE
hcc::hccrt
hcc::hc_am
${GTEST_BOTH_LIBRARIES}
)
elseif(HIP_COMPILER STREQUAL "clang")
target_link_libraries(${TEST_TARGET}
PRIVATE
${GTEST_BOTH_LIBRARIES}
)
endif()
foreach(amdgpu_target ${AMDGPU_TARGETS})
target_link_libraries(${TEST_TARGET}
PRIVATE
Expand All @@ -87,7 +94,9 @@ endfunction()
# HC and HIP tests without using rocPRIM
if(HIP_PLATFORM STREQUAL "hcc")
add_hip_test("hc.device_api" test_hip_api.cpp)
add_hc_test ("hip.device_api" test_hc_api.cpp)
if(HIP_COMPILER STREQUAL "HCC")
add_hc_test ("hip.device_api" test_hc_api.cpp)
endif()
endif()

# rocPRIM test (run only on ROCm/hcc)
Expand Down
3 changes: 2 additions & 1 deletion test/rocprim/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -63,6 +63,7 @@ endfunction()
#

# HCP basic test, which also checks if there are no linkage problems when there are multiple sources
if(HIP_COMPILER STREQUAL "HCC")
add_rocprim_test_hc("rocprim.hc.basic_test" "test_hc_basic.cpp;detail/get_rocprim_version_hc.cpp")

add_rocprim_test_hc("rocprim.hc.arg_index_iterator" test_hc_arg_index_iterator.cpp)
Expand Down Expand Up @@ -98,7 +99,7 @@ add_rocprim_test_hc("rocprim.hc.warp_reduce" test_hc_warp_reduce.cpp)
add_rocprim_test_hc("rocprim.hc.warp_scan" test_hc_warp_scan.cpp)
add_rocprim_test_hc("rocprim.hc.warp_sort" test_hc_warp_sort.cpp)
add_rocprim_test_hc("rocprim.hc.zip_iterator" test_hc_zip_iterator.cpp)

endif()
#
# rocPRIM HIP API tests
#
Expand Down
2 changes: 0 additions & 2 deletions test/rocprim/test_hip_block_exchange.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -28,8 +28,6 @@

// Google Test
#include <gtest/gtest.h>
// HC API
#include <hcc/hc.hpp>
// rocPRIM API
#include <rocprim/rocprim.hpp>

Expand Down
Loading