Skip to content

Commit

Permalink
ARCH/X86: Use UCS function to count leading zeros
Browse files Browse the repository at this point in the history
  • Loading branch information
tvegas1 committed Feb 25, 2025
1 parent 9a42220 commit 6e8a6f0
Show file tree
Hide file tree
Showing 3 changed files with 51 additions and 2 deletions.
18 changes: 18 additions & 0 deletions config/m4/compiler.m4
Original file line number Diff line number Diff line change
Expand Up @@ -599,6 +599,24 @@ if $CC --version 2>&1 | grep -q Intel; then
[AC_LANG_SOURCE([[int main(int argc, char **argv){return 0;}]])])
fi

#
# Check actual lzcnt support (at least nvc 24.9 fails to link, even if it defines __LZCNT__)
#
SAVE_CFLAGS="$CFLAGS"
CFLAGS="$CFLAGS -mlzcnt"
AC_MSG_CHECKING([if lzcnt is supported])
AC_LINK_IFELSE([AC_LANG_SOURCE([[
#include <x86intrin.h>
int main(void) {
return (int)_lzcnt_u32(1) | (int)_lzcnt_u64(2);
}
]])],
[AC_MSG_RESULT([yes])
BASE_CFLAGS="-mlzcnt $BASE_CFLAGS"
AC_DEFINE([HAVE_LZCNT], 1, [LZCNT Intrinsic support])],
[AC_MSG_RESULT([no])])
CFLAGS="$SAVE_CFLAGS"


#
# Set C++ optimization/debug flags to be the same as for C
Expand Down
15 changes: 13 additions & 2 deletions src/ucs/arch/bitops.h
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,11 @@ BEGIN_C_DECLS
#endif


#if defined(HAVE_LZCNT)
# include <x86intrin.h>
#endif


#define ucs_ilog2(_n) \
( \
__builtin_constant_p(_n) ? ( \
Expand Down Expand Up @@ -121,10 +126,16 @@ BEGIN_C_DECLS
((sizeof(_n) <= 4) ? __builtin_ctz((uint32_t)(_n)) : __builtin_ctzl(_n))

/* Returns the number of leading 0-bits in _n.
* If _n is 0, the result is undefined
*/
#if defined(HAVE_LZCNT)
#define ucs_count_leading_zero_bits(_n) \
((sizeof(_n) <= 4) ? __builtin_clz((uint32_t)(_n)) : __builtin_clzl(_n))
((sizeof(_n) <= 4) ? _lzcnt_u32((uint32_t)(_n)) : _lzcnt_u64(_n))
#else
#define ucs_count_leading_zero_bits(_n) \
((_n) ? ((sizeof(_n) <= 4) ? __builtin_clz((uint32_t)(_n)) : \
__builtin_clzl(_n)) : \
(sizeof(_n) * 8))
#endif

/* Returns the number of bits lower than 'bit_index' that are set in 'mask'
* For example: ucs_bitmap2idx(mask=0xF0, idx=6) returns 2
Expand Down
20 changes: 20 additions & 0 deletions test/gtest/ucs/test_bitops.cc
Original file line number Diff line number Diff line change
Expand Up @@ -138,6 +138,26 @@ UCS_TEST_F(test_bitops, is_equal) {
test_bitops::check_bitwise_equality(buffer1, buffer2, indices, 0);
}

template <typename T> void test_clz()
{
constexpr int bits = sizeof(T) * 8;
T v = 1;

for (int i = bits - 1; v != 0; v <<= 1, --i) {
ASSERT_EQ(i, ucs_count_leading_zero_bits(v));
}

ASSERT_EQ(bits, ucs_count_leading_zero_bits(v));
}

UCS_TEST_F(test_bitops, clz)
{
test_clz<uint32_t>();
test_clz<uint32_t>();
test_clz<int32_t>();
test_clz<int64_t>();
}

template<typename Type> void test_mask()
{
Type expected = 0;
Expand Down

0 comments on commit 6e8a6f0

Please sign in to comment.