Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Implement P0952R2: 'A new specification for std::generate_canonical' #4740

Merged
merged 31 commits into from
Jun 27, 2024
Merged
Show file tree
Hide file tree
Changes from 7 commits
Commits
Show all changes
31 commits
Select commit Hold shift + click to select a range
13d7ffa
Implement P0952 "A new specification for `std::generate_canonical`"
MattStephanson Jun 20, 2024
adca300
Add/update WP number
MattStephanson Jun 20, 2024
bcfe68b
Update yvals_core.h
MattStephanson Jun 20, 2024
b36e308
Remove outdated test of internal helper function
MattStephanson Jun 20, 2024
cef0522
various cleanups
MattStephanson Jun 23, 2024
56541ca
Add example from LWG2524
MattStephanson Jun 24, 2024
f95de6d
tr1 unpleasantness
MattStephanson Jun 24, 2024
422ebe2
Drop `std::` qualification for `conditional_t`.
StephanTLavavej Jun 25, 2024
422c537
Style: Drop unnecessary parens.
StephanTLavavej Jun 25, 2024
5a432d5
List this paper under "KNOWN UPSTREAM"; also cite P0952R2 specifically.
StephanTLavavej Jun 25, 2024
f069279
Rewrap/detach/unwrap comments.
StephanTLavavej Jun 25, 2024
d549bc3
Style: `while (true)` => `for (;;)`
StephanTLavavej Jun 25, 2024
ffabec6
Initialize `_Xx` and `_Limit` instead of suppressing warnings.
StephanTLavavej Jun 25, 2024
4923a9d
After `if constexpr` `return`, use `else` to avoid dead code.
StephanTLavavej Jun 25, 2024
c658217
Pass `_Rd` to `_Nrand_for_tr1` as a template argument.
StephanTLavavej Jun 25, 2024
70bc9e7
Add const.
StephanTLavavej Jun 25, 2024
8eafb32
Comment `_Rx_is_pow2`.
StephanTLavavej Jun 26, 2024
b176e25
Reorder `_Urbg_gen_can_params` members to follow init order, optimizi…
StephanTLavavej Jun 26, 2024
0ae65e7
Style: `_Urbg_gen_can_params _Params` => `auto _Params`
StephanTLavavej Jun 26, 2024
fa6cf54
Naming clarity: `_Result_int_type` => `_Result_uint_type`
StephanTLavavej Jun 26, 2024
19f6012
Style: `static_cast<_Sx_type>(1)` => `_Sx_type{1}`
StephanTLavavej Jun 26, 2024
89d8b9f
Initialize `_Real _Val{0};` to avoid any potential for warnings.
StephanTLavavej Jun 26, 2024
a71eecc
We shouldn't need to suppress warning C4293 "shift count negative or …
StephanTLavavej Jun 26, 2024
39a4d09
Pre-existing: `_Digits` is either 24 (float) or 53 (double), drop `_M…
StephanTLavavej Jun 26, 2024
bd3b156
Update comment to say TRANSITION and mention `subtract_with_carry_01`.
StephanTLavavej Jun 26, 2024
17736b2
Style: Flip integral and non-integral tr1 cases.
StephanTLavavej Jun 26, 2024
c736511
`_STL_INTERNAL_STATIC_ASSERT` that k is at least 1.
StephanTLavavej Jun 26, 2024
c35d5ce
Test: Use range-for.
StephanTLavavej Jun 26, 2024
1b4869d
Test: Include `<cmath>`, but we don't need `<utility>`.
StephanTLavavej Jun 26, 2024
7107006
Revert "Pass `_Rd` to `_Nrand_for_tr1` as a template argument."
StephanTLavavej Jun 27, 2024
720c471
Work around /clr:pure test timeouts in GH_001017_discrete_distributio…
StephanTLavavej Jun 27, 2024
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
190 changes: 154 additions & 36 deletions stl/inc/random
Original file line number Diff line number Diff line change
Expand Up @@ -261,30 +261,37 @@ private:
vector<result_type> _Myvec;
};

_NODISCARD constexpr int _Generate_canonical_iterations(const int _Bits, const uint64_t _Gmin, const uint64_t _Gmax) {
// For a URBG type `G` with range == `(G::max() - G::min()) + 1`, returns the number of calls to generate at least
// _Bits bits of entropy. Specifically, max(1, ceil(_Bits / log2(range))).

_STL_INTERNAL_CHECK(0 <= _Bits && _Bits <= 64);
struct _Urbg_gen_can_params {
int _Kx = 0; // $k$ in N4981 [rand.util.canonical]/1.4
uint64_t _Xx = 0; // $x$ in N4981 [rand.util.canonical]/1.5
int _Smax_bits = 0; // number of bits in $R^k$
float _Scale = 0.0f; // $r^{-d}$, always representable as a float
bool _Rx_is_pow2 = false;
};

if (_Bits == 0 || (_Gmax == UINT64_MAX && _Gmin == 0)) {
return 1;
_NODISCARD constexpr _Urbg_gen_can_params _Generate_canonical_params(size_t _Bits, uint64_t _Rm1) {
const auto _Rx = _Unsigned128{_Rm1} + 1; // $R$ in N4981 [rand.util.canonical]/1.2
const auto _Target = (_Unsigned128{1} << _Bits); // $r^d$
_Unsigned128 _Product = 1;
_Urbg_gen_can_params _Result;
_Result._Rx_is_pow2 = (_Rx & (_Rx - 1)) == 0;
_Result._Kx = 0;
while (_Product < _Target) {
_Product *= _Rx;
++_Result._Kx;
}

const auto _Range = (_Gmax - _Gmin) + 1;
const auto _Target = ~uint64_t{0} >> (64 - _Bits);
uint64_t _Prod = 1;
int _Ceil = 0;
while (_Prod <= _Target) {
++_Ceil;
if (_Prod > UINT64_MAX / _Range) {
break;
}

_Prod *= _Range;
_Result._Xx = static_cast<uint64_t>(_Product / _Target);
_Result._Scale = 1.0f / static_cast<float>(1ULL << _Bits);
_Result._Smax_bits = 0;
// $S \in [0, _Product)$
--_Product;
while (_Product > 0) {
++_Result._Smax_bits;
_Product >>= 1;
}

return _Ceil;
return _Result;
}

_EXPORT_STD template <class _Real, size_t _Bits, class _Gen>
Expand All @@ -294,27 +301,110 @@ _NODISCARD _Real generate_canonical(_Gen& _Gx) { // build a floating-point value
constexpr auto _Digits = static_cast<size_t>(numeric_limits<_Real>::digits);
constexpr auto _Minbits = static_cast<int>(_Digits < _Bits ? _Digits : _Bits);

constexpr auto _Gxmin = static_cast<_Real>((_Gen::min)());
constexpr auto _Gxmax = static_cast<_Real>((_Gen::max)());
constexpr auto _Rx = (_Gxmax - _Gxmin) + _Real{1};
if constexpr (_Minbits == 0) {
return _Real{0};
}

using _Result_int_type = std::conditional_t<_Minbits <= 32, uint32_t, uint64_t>;

constexpr auto _Gxmin = (_Gen::min)();
constexpr auto _Gxmax = (_Gen::max)();
constexpr _Urbg_gen_can_params _Params = _Generate_canonical_params(_Minbits, _Gxmax - _Gxmin);

using _Sx_type = std::conditional_t<_Params._Smax_bits <= 32, uint32_t,
std::conditional_t<_Params._Smax_bits <= 64, uint64_t, _Unsigned128>>;
_Sx_type _Sx;

if constexpr (_Params._Rx_is_pow2) {
// Always needs only one attempt. Multiplications can be replaced with shift/add. Optimize k=1 case.
if constexpr (_Params._Kx == 1) {
_Sx = static_cast<_Sx_type>(_Gx() - _Gxmin);
} else if constexpr (_Params._Kx > 1) {
constexpr int _Rx_bits = _Params._Smax_bits / _Params._Kx;
_Sx = 0;
int _Shift = 0;
for (int _Idx = 0; _Idx < _Params._Kx; ++_Idx) {
_Sx += static_cast<_Sx_type>(_Gx() - _Gxmin) << _Shift;
_Shift += _Rx_bits;
}
}

_Sx >>= (_Params._Smax_bits - _Minbits);
return static_cast<_Real>(static_cast<_Result_int_type>(_Sx)) * static_cast<_Real>(_Params._Scale);
}

constexpr auto _Rx = _Sx_type{_Gxmax - _Gxmin} + 1u;
constexpr _Sx_type _Limit = static_cast<_Sx_type>(_Params._Xx) * (static_cast<_Sx_type>(1) << _Minbits);

do {
// unroll first two iterations to avoid unnecessary multiplications
_Sx = static_cast<_Sx_type>(_Gx() - _Gxmin);
if constexpr (_Params._Kx == 2) {
_Sx += static_cast<_Sx_type>(_Gx() - _Gxmin) * _Rx;
} else if constexpr (_Params._Kx > 2) {
_Sx += static_cast<_Sx_type>(_Gx() - _Gxmin) * _Rx;
_Sx_type _Factor = _Rx;
for (int _Idx = 2; _Idx < _Params._Kx; ++_Idx) {
_Factor *= _Rx;
_Sx += static_cast<_Sx_type>(_Gx() - _Gxmin) * _Factor;
}
}
} while (_Sx >= _Limit);

return static_cast<_Real>(static_cast<_Result_int_type>(_Sx / _Params._Xx)) * static_cast<_Real>(_Params._Scale);
}

#pragma warning(push)
#pragma warning(disable : 4701) // potentially uninitialized local variable [_Xx and _Limit]
template <class _Uint_type, class _Gen, class _Real>
bool _Nrand_for_tr1(
const uint64_t _Rd, const uint64_t _Rx, _Gen& _Gx, _Real& _Val, uint64_t& _Sx_init, uint64_t& _Factor_init) {
// Minimally-constexpr generate_canonical algorithm. Will save work and exit if _Factor would overflow.
_Uint_type _Sx = _Sx_init;
_Uint_type _Factor = _Factor_init;
const auto _Gxmin = (_Gx.min)();
const auto _Overflow_limit = UINT64_MAX / _Rx;

_Uint_type _Xx;
_Uint_type _Limit;
bool _Init = false;
while (true) {
while (_Factor < _Rd) {
if constexpr (is_same_v<_Uint_type, uint64_t>) {
if (_Factor > _Overflow_limit) {
_Sx_init = _Sx;
_Factor_init = _Factor;
return true;
}
}

constexpr int _Kx = _Generate_canonical_iterations(_Minbits, (_Gen::min)(), (_Gen::max)());
_Sx += (_Gx() - _Gxmin) * _Factor;
_Factor *= _Rx;
}

_Real _Ans{0};
_Real _Factor{1};
if (!_Init) {
_Init = true;
_Xx = _Factor / _Rd;
_Limit = _Xx * _Rd;
}

for (int _Idx = 0; _Idx < _Kx; ++_Idx) { // add in another set of bits
_Ans += (static_cast<_Real>(_Gx()) - _Gxmin) * _Factor;
_Factor *= _Rx;
if (_Sx < _Limit) {
break;
} else {
_Sx = 0;
_Factor = 1;
}
}

return _Ans / _Factor;
_Val = static_cast<_Real>(static_cast<uint64_t>(_Sx / _Xx));
return false;
}
#pragma warning(pop)

template <class _Gen, class = void>
struct _Has_static_min_max : false_type {};

// This checks a requirement of N4950 [rand.req.urng] `concept uniform_random_bit_generator` but doesn't attempt
// This checks a requirement of N4981 [rand.req.urng] `concept uniform_random_bit_generator` but doesn't attempt
// to implement the whole concept - we just need to distinguish Standard machinery from tr1 machinery.
template <class _Gen>
struct _Has_static_min_max<_Gen, void_t<decltype(bool_constant<(_Gen::min)() < (_Gen::max)()>::value)>> : true_type {};
Expand All @@ -329,7 +419,8 @@ _NODISCARD _Real _Nrand_impl(_Gen& _Gx) { // build a floating-point value from r

if constexpr (_Has_static_min_max<_Gen>::value && _Minbits <= 64) {
return _STD generate_canonical<_Real, _Minbits>(_Gx);
} else { // TRANSITION, for tr1 machinery only; Standard machinery can call generate_canonical directly
} else if constexpr (!is_integral_v<typename _Gen::result_type>) {
// the pre-P0952R2 algorithm
const _Real _Gxmin = static_cast<_Real>((_Gx.min)());
const _Real _Gxmax = static_cast<_Real>((_Gx.max)());
const _Real _Rx = (_Gxmax - _Gxmin) + _Real{1};
Expand All @@ -346,6 +437,32 @@ _NODISCARD _Real _Nrand_impl(_Gen& _Gx) { // build a floating-point value from r
}

return _Ans / _Factor;
} else {
// TRANSITION, for integral tr1 machinery only; Standard machinery can call generate_canonical directly
const auto _Gxmax = (_Gx.max)();
const auto _Gxmin = (_Gx.min)();
_Real _Val;

if (_Gxmax == UINT64_MAX && _Gxmin == 0) {
// special case when uint64_t can't hold the full URBG range
#pragma warning(push)
#pragma warning(disable : 4293) // shift count negative or too big
_Val = static_cast<_Real>(static_cast<uint64_t>(_Gx()) >> (64 - _Minbits));
#pragma warning(pop)
} else {
constexpr auto _Rd = 1ULL << _Minbits;
const auto _Rx = static_cast<uint64_t>(_Gxmax - _Gxmin) + 1;
uint64_t _Sx = 0;
uint64_t _Factor = 1;

// Try with 64 bits first, upgrade to 128 if necessary.
const bool _Would_overflow = _Nrand_for_tr1<uint64_t>(_Rd, _Rx, _Gx, _Val, _Sx, _Factor);
if (_Would_overflow) {
_Nrand_for_tr1<_Unsigned128>(_Rd, _Rx, _Gx, _Val, _Sx, _Factor);
}
}

return _STD ldexp(_Val, -static_cast<int>(_Minbits));
}
}

Expand Down Expand Up @@ -1968,8 +2085,8 @@ public:
explicit _Rng_from_urng_v2(_Urng& _Func) noexcept : _Ref(_Func) {}

_Diff operator()(_Diff _Index) { // adapt _Urng closed range to [0, _Index)
// From Daniel Lemire, "Fast Random Integer Generation in an Interval", ACM Trans. Model. Comput. Simul. 29 (1),
// 2019.
// From Daniel Lemire, "Fast Random Integer Generation in an Interval", ACM Trans. Model. Comput. Simul. 29
// (1), 2019.
//
// Algorithm 5 <-> This Code:
// m <-> _Product
Expand Down Expand Up @@ -2925,8 +3042,8 @@ public:

private:
template <class _Engine>
result_type _Eval(
_Engine& _Eng, const param_type& _Par0) const { // Press et al., Numerical Recipes in C, 2nd ed., pp 295-296.
result_type _Eval(_Engine& _Eng,
const param_type& _Par0) const { // Press et al., Numerical Recipes in C, 2nd ed., pp 295-296.
_Ty _Res;
if (_Par0._Tx < 25) { // generate directly
_Res = 0;
Expand All @@ -2938,7 +3055,8 @@ private:

return _Res;
} else if (_Par0._Mean < 1.0) {
// Events are rare, use waiting time method (Luc Devroye, Non-Uniform Random Variate Generation, p. 525).
// Events are rare, use waiting time method (Luc Devroye, Non-Uniform Random Variate Generation, p.
// 525).
const _Ty1 _Rand = _NRAND(_Eng, _Ty1);

// The exit condition is log(1 - _Rand)/t < log(1-p), which is equivalent to _Rand > 1 - (1-p)^t. If
Expand Down
1 change: 1 addition & 0 deletions stl/inc/yvals_core.h
Original file line number Diff line number Diff line change
Expand Up @@ -62,6 +62,7 @@
// P0883R2 Fixing Atomic Initialization
// P0935R0 Eradicating Unnecessarily Explicit Default Constructors
// P0941R2 Feature-Test Macros
// P0952R2 A New Specification For generate_canonical()
// P0972R0 noexcept For <chrono> zero(), min(), max()
// P1065R2 constexpr INVOKE
// (the std::invoke function only; other components like bind and reference_wrapper are C++20 only)
Expand Down
3 changes: 3 additions & 0 deletions tests/libcxx/expected_results.txt
Original file line number Diff line number Diff line change
Expand Up @@ -582,6 +582,9 @@ std/numerics/rand/rand.dist/rand.dist.uni/rand.dist.uni.real/param_ctor.pass.cpp
# test4() constructs a negative_binomial_distribution from (40, 1); [rand.dist.bern.negbin] says p == 1 generates undefined probabilities.
std/numerics/rand/rand.dist/rand.dist.bern/rand.dist.bern.negbin/eval.pass.cpp FAIL

# libc++ hasn't implemented P0952, which changes the generate_canonical algorithm.
std/numerics/rand/rand.util/rand.util.canonical/generate_canonical.pass.cpp FAIL

# Assertion failed: (std::lerp(T(2.3), T(2.3), inf) == T(2.3))
# Asserts `(std::lerp(T(2.3), T(2.3), inf) == T(2.3))` and `std::isnan(std::lerp(T( 0), T( 0), inf))`
# They shouldn't behave differently. Both of them should probably return NaN.
Expand Down
2 changes: 1 addition & 1 deletion tests/std/test.lst
Original file line number Diff line number Diff line change
Expand Up @@ -198,7 +198,6 @@ tests\GH_001850_clog_tied_to_cout
tests\GH_001858_iostream_exception
tests\GH_001912_random_distribution_operator_const
tests\GH_001914_cached_position
tests\GH_001964_constexpr_generate_canonical
tests\GH_002030_asan_annotate_string
tests\GH_002030_asan_annotate_vector
tests\GH_002039_byte_is_not_trivially_swappable
Expand Down Expand Up @@ -511,6 +510,7 @@ tests\P0898R3_identity
tests\P0912R5_coroutine
tests\P0919R3_heterogeneous_unordered_lookup
tests\P0943R6_stdatomic_h
tests\P0952R2_new_generate_canonical
tests\P0966R1_string_reserve_should_not_shrink
tests\P0980R1_constexpr_strings
tests\P1004R2_constexpr_vector
Expand Down
64 changes: 0 additions & 64 deletions tests/std/tests/GH_001964_constexpr_generate_canonical/test.cpp

This file was deleted.

Loading