Skip to content

Commit

Permalink
separate intrinsics into core
Browse files Browse the repository at this point in the history
  • Loading branch information
upsj committed Jan 20, 2025
1 parent 8b88101 commit 1b9534b
Show file tree
Hide file tree
Showing 7 changed files with 100 additions and 80 deletions.
3 changes: 1 addition & 2 deletions common/unified/components/range_minimum_query_kernels.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -6,9 +6,8 @@

#include <limits>

#include <ginkgo/core/base/intrinsics.hpp>

#include "common/unified/base/kernel_launch.hpp"
#include "core/base/intrinsics.hpp"
#include "core/components/bit_packed_storage.hpp"
#include "core/components/range_minimum_query.hpp"

Expand Down
93 changes: 93 additions & 0 deletions core/base/intrinsics.hpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,93 @@
// SPDX-FileCopyrightText: 2017 - 2025 The Ginkgo authors
//
// SPDX-License-Identifier: BSD-3-Clause

#ifndef GKO_CORE_BASE_INTRINSICS_HPP_
#define GKO_CORE_BASE_INTRINSICS_HPP_


#include <ginkgo/core/base/types.hpp>

// MSVC needs different intrinsics
#ifdef _MSC_VER
#include <intrin.h>

#pragma intrinsic(_BitScanForward, _BitScanForward64, _BitScanReverse, \
_BitScanReverse64)
#endif


namespace gko {
namespace detail {


/**
* Returns the index of the highest bit set in this bitmask.
* The least significant bit has index 0.
*/
GKO_ATTRIBUTES GKO_INLINE int find_highest_bit(uint32 bitmask)
{
#if defined(__CUDA_ARCH__) || defined(__HIP_DEVICE_COMPILE__)
return 31 - __clz(static_cast<unsigned>(bitmask));
#elif defined(_MSC_VER)
unsigned long index{};
_BitScanReverse(&index, bitmask);
return index;
#else
return 31 - __builtin_clz(bitmask);
#endif
}


/** @copydoc find_highest_bit(uint32) */
GKO_ATTRIBUTES GKO_INLINE int find_highest_bit(uint64 bitmask)
{
#if defined(__CUDA_ARCH__) || defined(__HIP_DEVICE_COMPILE__)
return 63 - __clzll(static_cast<unsigned long long>(bitmask));
#elif defined(_MSC_VER)
unsigned long index{};
_BitScanReverse64(&index, bitmask);
return index;
#else
return 63 - __builtin_clzll(bitmask);
#endif
}


/**
* Returns the index of the lowest bit set in this bitmask.
* The least significant bit has index 0.
*/
GKO_ATTRIBUTES GKO_INLINE int find_lowest_bit(uint32 bitmask)
{
#if defined(__CUDA_ARCH__) || defined(__HIP_DEVICE_COMPILE__)
return __ffs(static_cast<unsigned>(bitmask)) - 1;
#elif defined(_MSC_VER)
unsigned long index{};
_BitScanForward(&index, bitmask);
return index;
#else
return __builtin_ffs(bitmask) - 1;
#endif
}


/** @copydoc find_lowest_bit(uint32) */
GKO_ATTRIBUTES GKO_INLINE int find_lowest_bit(uint64 bitmask)
{
#if defined(__CUDA_ARCH__) || defined(__HIP_DEVICE_COMPILE__)
return __ffsll(static_cast<unsigned long long>(bitmask)) - 1;
#elif defined(_MSC_VER)
unsigned long index{};
_BitScanForward64(&index, bitmask);
return index;
#else
return __builtin_ffsll(bitmask) - 1;
#endif
}


} // namespace detail
} // namespace gko

#endif // GKO_CORE_BASE_INTRINSICS_HPP_
3 changes: 2 additions & 1 deletion core/components/bit_packed_storage.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -9,9 +9,10 @@
#include <limits>
#include <utility>

#include <ginkgo/core/base/intrinsics.hpp>
#include <ginkgo/core/base/types.hpp>

#include "core/base/intrinsics.hpp"

namespace gko {


Expand Down
2 changes: 1 addition & 1 deletion core/components/range_minimum_query.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -10,11 +10,11 @@
#include <utility>

#include <ginkgo/core/base/array.hpp>
#include <ginkgo/core/base/intrinsics.hpp>
#include <ginkgo/core/base/math.hpp>
#include <ginkgo/core/base/types.hpp>

#include "core/base/index_range.hpp"
#include "core/base/intrinsics.hpp"
#include "core/components/bit_packed_storage.hpp"

namespace gko {
Expand Down
74 changes: 0 additions & 74 deletions include/ginkgo/core/base/intrinsics.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -10,14 +10,6 @@

#include <ginkgo/core/base/types.hpp>

// MSVC needs different intrinsics
#ifdef _MSC_VER
#include <intrin.h>

#pragma intrinsic(_BitScanForward, _BitScanForward64, _BitScanReverse, \
_BitScanReverse64)
#endif


namespace gko {
namespace detail {
Expand Down Expand Up @@ -49,72 +41,6 @@ GKO_ATTRIBUTES GKO_INLINE int popcount(uint64 bitmask)
}


/**
* Returns the index of the highest bit set in this bitmask.
* The least significant bit has index 0.
*/
GKO_ATTRIBUTES GKO_INLINE int find_highest_bit(uint32 bitmask)
{
#if defined(__CUDA_ARCH__) || defined(__HIP_DEVICE_COMPILE__)
return 31 - __clz(static_cast<unsigned>(bitmask));
#elif defined(_MSC_VER)
unsigned long index{};
_BitScanReverse(&index, bitmask);
return index;
#else
return 31 - __builtin_clz(bitmask);
#endif
}


/** @copydoc find_highest_bit(uint32) */
GKO_ATTRIBUTES GKO_INLINE int find_highest_bit(uint64 bitmask)
{
#if defined(__CUDA_ARCH__) || defined(__HIP_DEVICE_COMPILE__)
return 63 - __clzll(static_cast<unsigned long long>(bitmask));
#elif defined(_MSC_VER)
unsigned long index{};
_BitScanReverse64(&index, bitmask);
return index;
#else
return 63 - __builtin_clzll(bitmask);
#endif
}


/**
* Returns the index of the lowest bit set in this bitmask.
* The least significant bit has index 0.
*/
GKO_ATTRIBUTES GKO_INLINE int find_lowest_bit(uint32 bitmask)
{
#if defined(__CUDA_ARCH__) || defined(__HIP_DEVICE_COMPILE__)
return __ffs(static_cast<unsigned>(bitmask)) - 1;
#elif defined(_MSC_VER)
unsigned long index{};
_BitScanForward(&index, bitmask);
return index;
#else
return __builtin_ffs(bitmask) - 1;
#endif
}


/** @copydoc find_lowest_bit(uint32) */
GKO_ATTRIBUTES GKO_INLINE int find_lowest_bit(uint64 bitmask)
{
#if defined(__CUDA_ARCH__) || defined(__HIP_DEVICE_COMPILE__)
return __ffsll(static_cast<unsigned long long>(bitmask)) - 1;
#elif defined(_MSC_VER)
unsigned long index{};
_BitScanForward64(&index, bitmask);
return index;
#else
return __builtin_ffsll(bitmask) - 1;
#endif
}


} // namespace detail
} // namespace gko

Expand Down
3 changes: 1 addition & 2 deletions reference/components/range_minimum_query_kernels.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -6,8 +6,7 @@

#include <limits>

#include <ginkgo/core/base/intrinsics.hpp>

#include "core/base/intrinsics.hpp"
#include "core/components/bit_packed_storage.hpp"
#include "core/components/range_minimum_query.hpp"

Expand Down
2 changes: 2 additions & 0 deletions test/base/intrinsics.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,8 @@
//
// SPDX-License-Identifier: BSD-3-Clause

#include "core/base/intrinsics.hpp"

#include <memory>

#include <gtest/gtest.h>
Expand Down

0 comments on commit 1b9534b

Please sign in to comment.