-
Notifications
You must be signed in to change notification settings - Fork 1.3k
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
[math] Use dynamic dispatch for highway SIMD
- Loading branch information
1 parent
0053d9d
commit 6e02f33
Showing
15 changed files
with
610 additions
and
793 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,41 @@ | ||
#include "drake/common/hwy_dynamic.h" | ||
|
||
#include <mutex> | ||
#include <vector> | ||
|
||
#include "drake/common/drake_assert.h" | ||
#include "drake/common/never_destroyed.h" | ||
|
||
namespace drake { | ||
namespace internal { | ||
namespace { | ||
|
||
struct Globals { | ||
mutable std::mutex mutex; | ||
std::vector<void (*)()> resets; | ||
}; | ||
|
||
Globals& get_singleton() { | ||
static never_destroyed<Globals> storage; | ||
return storage.access(); | ||
} | ||
|
||
} // namespace | ||
|
||
void HwyDynamicRegisterResetFunction(void (*reset)()) { | ||
DRAKE_DEMAND(reset != nullptr); | ||
auto& singleton = get_singleton(); | ||
std::lock_guard<std::mutex> guard(singleton.mutex); | ||
singleton.resets.push_back(reset); | ||
} | ||
|
||
void HwyDynamicReset() { | ||
const auto& singleton = get_singleton(); | ||
// No mutex guard here; this function is documented as not thread-safe. | ||
for (const auto& reset : singleton.resets) { | ||
reset(); | ||
} | ||
} | ||
|
||
} // namespace internal | ||
} // namespace drake |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,24 @@ | ||
#pragma once | ||
|
||
namespace drake { | ||
namespace internal { | ||
|
||
/* This file provides two singleton-like functions whose purpose is to assist | ||
with unit tests that want to probe code that uses "hwy/highway.h" for SIMD. */ | ||
|
||
/* Anywhere in Drake that uses Highway for dynamic CPU dispatch should call this | ||
helper function to register a reset handler to clear the latched CPU detction. | ||
(Most developers never need to worry about this, because it's automatic when you | ||
use the tools in `hwy_dynamic_impl.h`.) This function is safe to call from | ||
multiple threads currently. */ | ||
void HwyDynamicRegisterResetFunction(void (*)()); | ||
|
||
/* (For testing only) Clears the latched CPU target detection by calling all of | ||
the reset handlers registered using the HwyDynamicRegisterResetFunction() above. | ||
This allows for testing multiple different CPU targets from within the same test | ||
program. This function is NOT thread-safe; only use this in single-threaded | ||
tests. */ | ||
void HwyDynamicReset(); | ||
|
||
} // namespace internal | ||
} // namespace drake |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,84 @@ | ||
#pragma once | ||
|
||
#include <atomic> | ||
#include <utility> | ||
|
||
#include "drake/common/hwy_dynamic.h" | ||
|
||
// This file should only ever be included from `*.cc` implementation files, | ||
// and we always want its code to be private to that file, so we'll use an | ||
// anonymous namespace in a header file, for simplicity. | ||
namespace { // NOLINT(build/namespaces) | ||
|
||
/* LateBoundFunction is a small wrapper class around a C-style raw function | ||
pointer. (It doesn't support std::function objects.) | ||
In a given process, the first time our Call() function is called, it will | ||
latch-initialize the wrapped function pointer as follows: | ||
- call the ChooseFunctor to select a which function pointer to use, | ||
- memorize that answer for next time; | ||
- call the selected function pointer and return its result. | ||
After the selected function pointer is memorized, future calls to Call() will | ||
invoke it efficiently (with no conditional branching). | ||
Note that the memorization is thread-safe, but not limited to at-most-once | ||
semantics. Multiple threads might concurrently select and memorize the choice, | ||
the ChooseFunctor might be called multiple times. | ||
In support of unit testing, selections can be unlatched via HwyDynamicReset() | ||
in "drake/common/hwy_dynamic.h". | ||
@tparam ChooseFunctor thread-safe functor that selects the function pointer to | ||
be wrapped. */ | ||
template <typename ChooseFunctor> | ||
class LateBoundFunction { | ||
public: | ||
/* A function pointer type, determined by what the ChooseFunctor returns. */ | ||
using FunctionPointer = decltype(ChooseFunctor()()); | ||
|
||
/* Calls this trampoline. The first time we're called, we'll choose the best | ||
target function and memorize it; subsequent calls will directly call into the | ||
chosen target without any conditional checks. */ | ||
template <typename... Args> | ||
__attribute__((always_inline)) static decltype(auto) Call(Args... args) { | ||
auto impl = function_.load(std::memory_order::relaxed); | ||
return impl(std::forward<Args>(args)...); | ||
} | ||
|
||
private: | ||
template <typename... Args> | ||
__attribute__((cold)) static decltype(auto) ChooseThenCall(Args... args) { | ||
drake::internal::HwyDynamicRegisterResetFunction(&Reset); | ||
hwy::GetChosenTarget().Update(hwy::SupportedTargets()); | ||
auto impl = ChooseFunctor()(); | ||
function_.store(impl, std::memory_order::relaxed); | ||
return impl(args...); | ||
} | ||
|
||
/* (For testing only) Clears the latched target detection. This allows for | ||
testing multiple different targets from the same test program. This function | ||
is NOT thread-safe; only use this in single-threaded tests. */ | ||
__attribute__((cold)) static void Reset() { | ||
// The memory order here doesn't really matter; this must only ever be | ||
// called in a single-threaded context. Anyway we'll use still use relaxed | ||
// to match the rest of this file. | ||
function_.store(&LateBoundFunction::ChooseThenCall, | ||
std::memory_order::relaxed); | ||
} | ||
|
||
// Static globals must be trivially destructible. | ||
static_assert(std::is_trivially_destructible_v<std::atomic<FunctionPointer>>); | ||
|
||
// All operations on this pointer must use memory_order::relaxed, which is | ||
// zero-cost on the platforms we care about. Note that the default value for | ||
// this variable is provided out-of-line below; it does NOT default to null. | ||
static std::atomic<FunctionPointer> function_; | ||
}; | ||
|
||
template <typename ChooseFunctor> | ||
std::atomic<decltype(ChooseFunctor()())> | ||
LateBoundFunction<ChooseFunctor>::function_ = | ||
&LateBoundFunction::ChooseThenCall; | ||
|
||
} // namespace |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Oops, something went wrong.