Skip to content

Commit

Permalink
removing movemask from platform (facebook#2302)
Browse files Browse the repository at this point in the history
Summary:
Pull Request resolved: facebook#2302

stepping stone for simd::contains.

We want to do less things in simd platform if possbile, so moving out non essential things.

Reviewed By: Gownta

Differential Revision: D63388617

fbshipit-source-id: 49bf59d8c04edd4e05eac3ce7e542956a7f5de72
  • Loading branch information
DenisYaroshevskiy authored and facebook-github-bot committed Oct 2, 2024
1 parent 7ff79c0 commit 19095b7
Show file tree
Hide file tree
Showing 11 changed files with 254 additions and 95 deletions.
10 changes: 10 additions & 0 deletions folly/algorithm/simd/BUCK
Original file line number Diff line number Diff line change
Expand Up @@ -5,11 +5,21 @@ load("@fbcode_macros//build_defs:cpp_library.bzl", "cpp_library")

oncall("fbcode_entropy_wardens_folly")

cpp_library(
name = "ignore",
headers = ["Ignore.h"],
exported_deps = [
"//folly/lang:bits",
],
)

cpp_library(
name = "movemask",
headers = ["Movemask.h"],
exported_deps = [
":ignore",
"//folly:portability",
"//folly/lang:bits",
],
)

Expand Down
2 changes: 1 addition & 1 deletion folly/algorithm/simd/FindFixed.h
Original file line number Diff line number Diff line change
Expand Up @@ -194,7 +194,7 @@ std::optional<std::size_t> findSplitFirstRegister(

template <typename Scalar, typename Reg>
std::optional<std::size_t> firstTrue(Reg reg) {
auto [bits, bitsPerElement] = folly::movemask<Scalar>(reg);
auto [bits, bitsPerElement] = folly::simd::movemask<Scalar>(reg);
if (bits) {
return std::countr_zero(bits) / bitsPerElement();
}
Expand Down
52 changes: 52 additions & 0 deletions folly/algorithm/simd/Ignore.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,52 @@
/*
* Copyright (c) Meta Platforms, Inc. and affiliates.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

#pragma once

#include <folly/lang/Bits.h>

#include <type_traits>

namespace folly::simd {

/**
* ignore(_none/_extrema)
*
* tag types to be used in some simd operations.
*
* They are used to indicate to the function that
* some of the elements in are garbage.
*
* ignore_none indicates that the whole register is used.
* ignore_extrema.first, .last show how many elements are out of the data.
*
* Example:
* register: [true, true, false, false, false, false, false, true]
* indexes [0, 1, 2, 3, 4, 5, 6, 7 ]
*
* ignore_extema{.first = 1, .last = 2}
* means that elements with indexes 0, 6, and 7 will be ignored
* (w/e that means for an operation)
*/

struct ignore_extrema {
int first = 0;
int last = 0;
};

struct ignore_none {};

} // namespace folly::simd
87 changes: 70 additions & 17 deletions folly/algorithm/simd/Movemask.h
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,8 @@
#pragma once

#include <folly/Portability.h>
#include <folly/algorithm/simd/Ignore.h>
#include <folly/lang/Bits.h>

#include <cstdint>
#include <type_traits>
Expand All @@ -33,27 +35,45 @@
FOLLY_PUSH_WARNING
FOLLY_GCC_DISABLE_WARNING("-Wignored-attributes")

namespace folly {
namespace folly::simd {

/*
/**
* movemask
* movemask_fn
*
* This is a low level utility used for simd search algorithms.
* At the moment used in folly::findFixed and folly::split.
*
* Logical extension of _mm_movemask_epi8 for different types
* This is a logical extension of _mm_movemask_epi8 for different types
* for both x86 and arm.
*
* Interface looks like this:
* folly::movemask<-scalar type->(nativeRegister)
* Main interface looks like this:
* folly::simd::movemask<scalar_type>(simdRegister)
* -> std::pair<Bits, BitsPerElement>;
*
* scalar type - type of element in the simdRegister
*
* Bits - unsigned integral, containing the bitmask (first is lowest bit).
* BitsPerElement - std::integral_constant with number of bits per element
* BitsPerElement - std::integral_constant with number of bits per element.
*
* There are also overloads taking `ignore`
*
* folly::simd::movemask<T>(nativeRegister, ignore_extrema)
* folly::simd::movemask<T>(nativeRegister, ignore_none)
*
* Example:
* These are there if not all the native register contains valid results,
* and some need to be ignored (zeroed out)
*
* std::optional<std::uint32_t> firstTrueUint16(auto simdRegister) {
* auto [bits, bitsPerElement] =
* folly::movemask<std::uint16_t>(simdRegister);
* Example: find in 8 shorts on arm.
*
* std::optional<std::uint32_t> findUint16(
* std::span<const std::uint16_t> haystack,
* std::uint16_t needle) {
* uint16x8_t loaded = vld1q_u16(arr.data());
* uint16x8_t simdNeedle = vdupq_n_u16(needle);
* uint16x8_t test = vceqq_u16(loaded, simdNeedle);
*
* auto [bits, bitsPerElement] = folly::simd::movemask<std::uint16_t>(test);
* if (!bits) {
* return std::nullopt;
* }
Expand All @@ -63,15 +83,47 @@ namespace folly {
* Arm implementation is based on:
* https://github.com/jfalcou/eve/blob/a2e2cf539e36e9a3326800194ad5206a8ef3f5b7/include/eve/detail/function/simd/arm/neon/movemask.hpp#L48
*
*/
**/

template <typename Scalar>
struct movemask_fn {
template <typename Reg>
auto operator()(Reg reg) const;

template <typename Reg, typename Ignore>
FOLLY_ERASE auto operator()(Reg reg, Ignore ignore) const {
auto [bits, bitsPerElement] = operator()(reg);

if constexpr (std::is_same_v<Ignore, ignore_none>) {
return std::pair{bits, bitsPerElement};
} else {
static constexpr int kCardinal = sizeof(Reg) / sizeof(Scalar);

int bitsToKeep = (kCardinal - ignore.last) * bitsPerElement;

bits =
clear_n_least_significant_bits(bits, ignore.first * bitsPerElement);
bits = clear_n_most_significant_bits(bits, sizeof(bits) * 8 - bitsToKeep);
return std::pair{bits, bitsPerElement};
}
}
};

template <typename Scalar>
inline constexpr movemask_fn<Scalar> movemask;

#if FOLLY_X64

template <typename Scalar, typename Reg>
auto movemask(Reg reg) {
template <typename Scalar>
template <typename Reg>
auto movemask_fn<Scalar>::operator()(Reg reg) const {
std::integral_constant<std::uint32_t, sizeof(Scalar) == 2 ? 2 : 1>
bitsPerElement;
auto mmask = static_cast<std::uint32_t>([&] {

using uint_t = std::
conditional_t<std::is_same_v<Reg, __m128i>, std::uint16_t, std::uint32_t>;

auto mmask = static_cast<uint_t>([&] {
if constexpr (std::is_same_v<Reg, __m128i>) {
if constexpr (sizeof(Scalar) <= 2) {
return _mm_movemask_epi8(reg);
Expand Down Expand Up @@ -123,8 +175,9 @@ uint64x1_t asUint64x1Aarch64(Reg reg) {

} // namespace detail

template <typename Scalar, typename Reg>
auto movemask(Reg reg) {
template <typename Scalar>
template <typename Reg>
auto movemask_fn<Scalar>::operator()(Reg reg) const {
if constexpr (std::is_same_v<Reg, uint64x2_t>) {
return movemask<std::uint32_t>(vmovn_u64(reg));
} else if constexpr (std::is_same_v<Reg, uint32x4_t>) {
Expand All @@ -142,6 +195,6 @@ auto movemask(Reg reg) {

#endif

} // namespace folly
} // namespace folly::simd

FOLLY_POP_WARNING
5 changes: 3 additions & 2 deletions folly/algorithm/simd/detail/BUCK
Original file line number Diff line number Diff line change
Expand Up @@ -19,8 +19,8 @@ cpp_library(
name = "simd_char_platform",
headers = ["SimdCharPlatform.h"],
exported_deps = [
":simd_for_each",
"//folly:portability",
"//folly/algorithm/simd:ignore",
"//folly/algorithm/simd:movemask",
"//folly/lang:bits",
],
Expand All @@ -41,9 +41,10 @@ cpp_library(
name = "simd_for_each",
headers = ["SimdForEach.h"],
exported_deps = [
":unroll_utils",
"//folly:c_portability",
"//folly:traits",
"//folly/algorithm/simd/detail:unroll_utils",
"//folly/algorithm/simd:ignore",
],
)

Expand Down
51 changes: 6 additions & 45 deletions folly/algorithm/simd/detail/SimdCharPlatform.h
Original file line number Diff line number Diff line change
Expand Up @@ -17,8 +17,9 @@
#pragma once

#include <folly/Portability.h>
#include <folly/algorithm/simd/Ignore.h>
#include <folly/algorithm/simd/Movemask.h>
#include <folly/algorithm/simd/detail/SimdForEach.h>
#include <folly/algorithm/simd/detail/SimdCharPlatform.h>
#include <folly/lang/Bits.h>

#include <array>
Expand Down Expand Up @@ -48,12 +49,9 @@ namespace simd::detail {
* Nested types:
* - reg_t - type of a simd register (__m128i)
* - logical_t - type of a simd logical register (matches reg_t so far)
* - mmask_t - type of an integer bitmask we can get from logical (similar to
* _mm_movemask_epi8).
*
* Nested constants:
* - kCardinal - number of elements in a register
* - kMmaskBitsPerElement - number of bits per element in a mmask_t
*
* loads:
* - loadu(const char*, ignore_none)
Expand All @@ -68,47 +66,16 @@ namespace simd::detail {
* - le_unsigned(reg_t, char) - by lane less than or equal to char.
*
* logical ops:
* - movemask - take a bitmask
* - any(logical_t, ignore) - return true if any the lanes are true
* - logical_or(logical_t, logical_t) - by lane logical or
*
* mmask ops:
* - clear(mmask, ignore) - sets ignored bits to 0
*
*/

#if FOLLY_X64 || FOLLY_AARCH64

template <typename Platform>
struct SimdCharPlatformCommon : Platform {
using logical_t = typename Platform::logical_t;
using movemask_result_t =
decltype(folly::movemask<std::uint8_t>(logical_t{}));
using mmask_t = typename movemask_result_t::first_type;
static constexpr std::uint32_t kMmaskBitsPerElement =
typename movemask_result_t::second_type{}();

template <typename Uint>
FOLLY_NODISCARD FOLLY_ALWAYS_INLINE static Uint setLowerNBits(int n) {
if (sizeof(Uint) == 8 && n == 64) {
return static_cast<Uint>(-1);
}
return static_cast<Uint>((std::uint64_t{1} << n) - 1);
}

FOLLY_NODISCARD FOLLY_ALWAYS_INLINE static mmask_t clear(
mmask_t mmask, ignore_extrema ignore) {
mmask_t clearFirst =
~setLowerNBits<mmask_t>(ignore.first * kMmaskBitsPerElement);
mmask_t clearLast = setLowerNBits<mmask_t>(
(Platform::kCardinal - ignore.last) * kMmaskBitsPerElement);
return mmask & clearFirst & clearLast;
}

FOLLY_NODISCARD FOLLY_ALWAYS_INLINE static mmask_t clear(
mmask_t mmask, ignore_none) {
return mmask;
}

// These are aligned loads but there is no point in generating
// aligned load instructions, so we call loadu.
Expand All @@ -122,18 +89,12 @@ struct SimdCharPlatformCommon : Platform {
return Platform::unsafeLoadu(ptr, ignore_none{});
}

FOLLY_ALWAYS_INLINE
static mmask_t movemask(logical_t log) {
return folly::movemask<std::uint8_t>(log).first;
}

using Platform::any;

FOLLY_ALWAYS_INLINE
static bool any(typename Platform::logical_t log, ignore_extrema ignore) {
auto mmask = movemask(log);
mmask = clear(mmask, ignore);
return mmask;
std::pair mmask = movemask<std::uint8_t>(log, ignore);
return mmask.first;
}

static auto toArray(typename Platform::reg_t x) {
Expand Down Expand Up @@ -186,7 +147,7 @@ struct SimdCharSse2PlatformSpecific {

FOLLY_ALWAYS_INLINE
static bool any(logical_t log, ignore_none) {
return folly::movemask<std::uint8_t>(log).first;
return movemask<std::uint8_t>(log).first;
}
};

Expand Down Expand Up @@ -234,7 +195,7 @@ struct SimdCharAvx2PlatformSpecific {

FOLLY_ALWAYS_INLINE
static bool any(logical_t log, ignore_none) {
return folly::movemask<std::uint8_t>(log).first;
return simd::movemask<std::uint8_t>(log).first;
}
};

Expand Down
18 changes: 1 addition & 17 deletions folly/algorithm/simd/detail/SimdForEach.h
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@

#include <folly/CPortability.h>
#include <folly/Traits.h>
#include <folly/algorithm/simd/Ignore.h>
#include <folly/algorithm/simd/detail/UnrollUtils.h>

#include <array>
Expand All @@ -35,23 +36,6 @@ namespace simd::detail {
// to mess that up.
//

/**
* ignore(_none/_extrema)
*
* Tag types for handling the tails.
* ignore_none indicates that the whole register is used.
* ignore_extrema.first, .last show how many elements are out of the data.
*
* For example 3 elements, starting from the second for an 8 element register
* will be ignore_extrema{.first = 1, .last = 4}
*/
struct ignore_extrema {
int first = 0;
int last = 0;
};

struct ignore_none {};

/**
* simdForEachAligning<unrolling>(cardinal, f, l, delegate);
*
Expand Down
2 changes: 1 addition & 1 deletion folly/algorithm/simd/detail/test/SimdAnyOfTest.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -83,7 +83,7 @@ TEST(SimdAnyOfSimple, Ignore) {
buffer.fill(' ');
for (auto& c : buffer) {
c = 'a';
anySpacesTest({&c, 1}, false);
ASSERT_NO_FATAL_FAILURE(anySpacesTest({&c, 1}, false));
c = ' ';
}
}
Expand Down
Loading

0 comments on commit 19095b7

Please sign in to comment.