Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Implement P0952R2: 'A new specification for std::generate_canonical' #4740

Merged
merged 31 commits into from
Jun 27, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
31 commits
Select commit Hold shift + click to select a range
13d7ffa
Implement P0952 "A new specification for `std::generate_canonical`"
MattStephanson Jun 20, 2024
adca300
Add/update WP number
MattStephanson Jun 20, 2024
bcfe68b
Update yvals_core.h
MattStephanson Jun 20, 2024
b36e308
Remove outdated test of internal helper function
MattStephanson Jun 20, 2024
cef0522
various cleanups
MattStephanson Jun 23, 2024
56541ca
Add example from LWG2524
MattStephanson Jun 24, 2024
f95de6d
tr1 unpleasantness
MattStephanson Jun 24, 2024
422ebe2
Drop `std::` qualification for `conditional_t`.
StephanTLavavej Jun 25, 2024
422c537
Style: Drop unnecessary parens.
StephanTLavavej Jun 25, 2024
5a432d5
List this paper under "KNOWN UPSTREAM"; also cite P0952R2 specifically.
StephanTLavavej Jun 25, 2024
f069279
Rewrap/detach/unwrap comments.
StephanTLavavej Jun 25, 2024
d549bc3
Style: `while (true)` => `for (;;)`
StephanTLavavej Jun 25, 2024
ffabec6
Initialize `_Xx` and `_Limit` instead of suppressing warnings.
StephanTLavavej Jun 25, 2024
4923a9d
After `if constexpr` `return`, use `else` to avoid dead code.
StephanTLavavej Jun 25, 2024
c658217
Pass `_Rd` to `_Nrand_for_tr1` as a template argument.
StephanTLavavej Jun 25, 2024
70bc9e7
Add const.
StephanTLavavej Jun 25, 2024
8eafb32
Comment `_Rx_is_pow2`.
StephanTLavavej Jun 26, 2024
b176e25
Reorder `_Urbg_gen_can_params` members to follow init order, optimizi…
StephanTLavavej Jun 26, 2024
0ae65e7
Style: `_Urbg_gen_can_params _Params` => `auto _Params`
StephanTLavavej Jun 26, 2024
fa6cf54
Naming clarity: `_Result_int_type` => `_Result_uint_type`
StephanTLavavej Jun 26, 2024
19f6012
Style: `static_cast<_Sx_type>(1)` => `_Sx_type{1}`
StephanTLavavej Jun 26, 2024
89d8b9f
Initialize `_Real _Val{0};` to avoid any potential for warnings.
StephanTLavavej Jun 26, 2024
a71eecc
We shouldn't need to suppress warning C4293 "shift count negative or …
StephanTLavavej Jun 26, 2024
39a4d09
Pre-existing: `_Digits` is either 24 (float) or 53 (double), drop `_M…
StephanTLavavej Jun 26, 2024
bd3b156
Update comment to say TRANSITION and mention `subtract_with_carry_01`.
StephanTLavavej Jun 26, 2024
17736b2
Style: Flip integral and non-integral tr1 cases.
StephanTLavavej Jun 26, 2024
c736511
`_STL_INTERNAL_STATIC_ASSERT` that k is at least 1.
StephanTLavavej Jun 26, 2024
c35d5ce
Test: Use range-for.
StephanTLavavej Jun 26, 2024
1b4869d
Test: Include `<cmath>`, but we don't need `<utility>`.
StephanTLavavej Jun 26, 2024
7107006
Revert "Pass `_Rd` to `_Nrand_for_tr1` as a template argument."
StephanTLavavej Jun 27, 2024
720c471
Work around /clr:pure test timeouts in GH_001017_discrete_distributio…
StephanTLavavej Jun 27, 2024
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
194 changes: 153 additions & 41 deletions stl/inc/random
MattStephanson marked this conversation as resolved.
Show resolved Hide resolved
Original file line number Diff line number Diff line change
Expand Up @@ -261,30 +261,37 @@ private:
vector<result_type> _Myvec;
};

_NODISCARD constexpr int _Generate_canonical_iterations(const int _Bits, const uint64_t _Gmin, const uint64_t _Gmax) {
// For a URBG type `G` with range == `(G::max() - G::min()) + 1`, returns the number of calls to generate at least
// _Bits bits of entropy. Specifically, max(1, ceil(_Bits / log2(range))).

_STL_INTERNAL_CHECK(0 <= _Bits && _Bits <= 64);
struct _Urbg_gen_can_params {
bool _Rx_is_pow2 = false; // $R$ (in N4981 [rand.util.canonical]/1.2) is a power of 2
int _Kx = 0; // $k$ in N4981 [rand.util.canonical]/1.4
uint64_t _Xx = 0; // $x$ in N4981 [rand.util.canonical]/1.5
float _Scale = 0.0f; // $r^{-d}$, always representable as a float
int _Smax_bits = 0; // number of bits in $R^k$
};

if (_Bits == 0 || (_Gmax == UINT64_MAX && _Gmin == 0)) {
return 1;
_NODISCARD constexpr _Urbg_gen_can_params _Generate_canonical_params(const size_t _Bits, const uint64_t _Rm1) {
const auto _Rx = _Unsigned128{_Rm1} + 1; // $R$ in N4981 [rand.util.canonical]/1.2
const auto _Target = _Unsigned128{1} << _Bits; // $r^d$
_Unsigned128 _Product = 1;
_Urbg_gen_can_params _Result;
_Result._Rx_is_pow2 = (_Rx & (_Rx - 1)) == 0;
_Result._Kx = 0;
while (_Product < _Target) {
_Product *= _Rx;
++_Result._Kx;
}

const auto _Range = (_Gmax - _Gmin) + 1;
const auto _Target = ~uint64_t{0} >> (64 - _Bits);
uint64_t _Prod = 1;
int _Ceil = 0;
while (_Prod <= _Target) {
++_Ceil;
if (_Prod > UINT64_MAX / _Range) {
break;
}

_Prod *= _Range;
_Result._Xx = static_cast<uint64_t>(_Product / _Target);
_Result._Scale = 1.0f / static_cast<float>(1ULL << _Bits);
StephanTLavavej marked this conversation as resolved.
Show resolved Hide resolved
_Result._Smax_bits = 0;
// $S \in [0, _Product)$
--_Product;
while (_Product > 0) {
++_Result._Smax_bits;
_Product >>= 1;
}

return _Ceil;
return _Result;
}

_EXPORT_STD template <class _Real, size_t _Bits, class _Gen>
Expand All @@ -294,27 +301,110 @@ _NODISCARD _Real generate_canonical(_Gen& _Gx) { // build a floating-point value
constexpr auto _Digits = static_cast<size_t>(numeric_limits<_Real>::digits);
constexpr auto _Minbits = static_cast<int>(_Digits < _Bits ? _Digits : _Bits);

constexpr auto _Gxmin = static_cast<_Real>((_Gen::min)());
constexpr auto _Gxmax = static_cast<_Real>((_Gen::max)());
constexpr auto _Rx = (_Gxmax - _Gxmin) + _Real{1};
if constexpr (_Minbits == 0) {
return _Real{0};
} else {
using _Result_uint_type = conditional_t<_Minbits <= 32, uint32_t, uint64_t>;

constexpr auto _Gxmin = (_Gen::min)();
constexpr auto _Gxmax = (_Gen::max)();
constexpr auto _Params = _Generate_canonical_params(_Minbits, _Gxmax - _Gxmin);

using _Sx_type = conditional_t<_Params._Smax_bits <= 32, uint32_t,
conditional_t<_Params._Smax_bits <= 64, uint64_t, _Unsigned128>>;
_Sx_type _Sx;

if constexpr (_Params._Rx_is_pow2) {
// Always needs only one attempt. Multiplications can be replaced with shift/add. Optimize k=1 case.
if constexpr (_Params._Kx == 1) {
_Sx = static_cast<_Sx_type>(_Gx() - _Gxmin);
} else if constexpr (_Params._Kx > 1) {
constexpr int _Rx_bits = _Params._Smax_bits / _Params._Kx;
_Sx = 0;
int _Shift = 0;
for (int _Idx = 0; _Idx < _Params._Kx; ++_Idx) {
_Sx += static_cast<_Sx_type>(_Gx() - _Gxmin) << _Shift;
_Shift += _Rx_bits;
}
} else {
_STL_INTERNAL_STATIC_ASSERT(false); // unexpected k
}

constexpr int _Kx = _Generate_canonical_iterations(_Minbits, (_Gen::min)(), (_Gen::max)());
_Sx >>= _Params._Smax_bits - _Minbits;
return static_cast<_Real>(static_cast<_Result_uint_type>(_Sx)) * static_cast<_Real>(_Params._Scale);
StephanTLavavej marked this conversation as resolved.
Show resolved Hide resolved
} else {
constexpr auto _Rx = _Sx_type{_Gxmax - _Gxmin} + 1u;
constexpr _Sx_type _Limit = static_cast<_Sx_type>(_Params._Xx) * (_Sx_type{1} << _Minbits);

do {
// unroll first two iterations to avoid unnecessary multiplications
_Sx = static_cast<_Sx_type>(_Gx() - _Gxmin);
if constexpr (_Params._Kx == 2) {
_Sx += static_cast<_Sx_type>(_Gx() - _Gxmin) * _Rx;
} else if constexpr (_Params._Kx > 2) {
_Sx += static_cast<_Sx_type>(_Gx() - _Gxmin) * _Rx;
_Sx_type _Factor = _Rx;
for (int _Idx = 2; _Idx < _Params._Kx; ++_Idx) {
_Factor *= _Rx;
_Sx += static_cast<_Sx_type>(_Gx() - _Gxmin) * _Factor;
}
}
} while (_Sx >= _Limit);

_Real _Ans{0};
_Real _Factor{1};
return static_cast<_Real>(static_cast<_Result_uint_type>(_Sx / _Params._Xx))
* static_cast<_Real>(_Params._Scale);
}
}
}

for (int _Idx = 0; _Idx < _Kx; ++_Idx) { // add in another set of bits
_Ans += (static_cast<_Real>(_Gx()) - _Gxmin) * _Factor;
_Factor *= _Rx;
template <class _Uint_type, class _Gen, class _Real>
bool _Nrand_for_tr1(
const uint64_t _Rd, const uint64_t _Rx, _Gen& _Gx, _Real& _Val, uint64_t& _Sx_init, uint64_t& _Factor_init) {
// Minimally-constexpr generate_canonical algorithm. Will save work and exit if _Factor would overflow.
_Uint_type _Sx = _Sx_init;
_Uint_type _Factor = _Factor_init;
const auto _Gxmin = (_Gx.min)();
const auto _Overflow_limit = UINT64_MAX / _Rx;

_Uint_type _Xx = 0;
_Uint_type _Limit = 0;
bool _Init = false;
for (;;) {
while (_Factor < _Rd) {
if constexpr (is_same_v<_Uint_type, uint64_t>) {
if (_Factor > _Overflow_limit) {
_Sx_init = _Sx;
_Factor_init = _Factor;
return true;
}
}

_Sx += (_Gx() - _Gxmin) * _Factor;
_Factor *= _Rx;
}

if (!_Init) {
_Init = true;
_Xx = _Factor / _Rd;
_Limit = _Xx * _Rd;
}

if (_Sx < _Limit) {
break;
} else {
_Sx = 0;
_Factor = 1;
}
}

return _Ans / _Factor;
_Val = static_cast<_Real>(static_cast<uint64_t>(_Sx / _Xx));
return false;
}

template <class _Gen, class = void>
struct _Has_static_min_max : false_type {};

// This checks a requirement of N4950 [rand.req.urng] `concept uniform_random_bit_generator` but doesn't attempt
// This checks a requirement of N4981 [rand.req.urng] `concept uniform_random_bit_generator` but doesn't attempt
// to implement the whole concept - we just need to distinguish Standard machinery from tr1 machinery.
template <class _Gen>
struct _Has_static_min_max<_Gen, void_t<decltype(bool_constant<(_Gen::min)() < (_Gen::max)()>::value)>> : true_type {};
Expand All @@ -323,18 +413,40 @@ template <class _Real, class _Gen>
_NODISCARD _Real _Nrand_impl(_Gen& _Gx) { // build a floating-point value from random sequence
_RNG_REQUIRE_REALTYPE(_Nrand_impl, _Real);

constexpr auto _Digits = static_cast<size_t>(numeric_limits<_Real>::digits);
constexpr auto _Bits = ~size_t{0};
constexpr auto _Minbits = _Digits < _Bits ? _Digits : _Bits;
constexpr auto _Digits = static_cast<size_t>(numeric_limits<_Real>::digits);

if constexpr (_Has_static_min_max<_Gen>::value) {
return _STD generate_canonical<_Real, _Digits>(_Gx);
} else if constexpr (is_integral_v<typename _Gen::result_type>) {
// TRANSITION, for integral tr1 machinery only; Standard machinery can call generate_canonical directly
const auto _Gxmax = (_Gx.max)();
const auto _Gxmin = (_Gx.min)();
_Real _Val{0};

if (_Gxmax == UINT64_MAX && _Gxmin == 0) {
// special case when uint64_t can't hold the full URBG range
_Val = static_cast<_Real>(static_cast<uint64_t>(_Gx()) >> (64 - _Digits));
} else {
constexpr auto _Rd = 1ULL << _Digits;
const auto _Rx = static_cast<uint64_t>(_Gxmax - _Gxmin) + 1;
uint64_t _Sx = 0;
uint64_t _Factor = 1;

// Try with 64 bits first, upgrade to 128 if necessary.
const bool _Would_overflow = _Nrand_for_tr1<uint64_t>(_Rd, _Rx, _Gx, _Val, _Sx, _Factor);
if (_Would_overflow) {
_Nrand_for_tr1<_Unsigned128>(_Rd, _Rx, _Gx, _Val, _Sx, _Factor);
}
}

if constexpr (_Has_static_min_max<_Gen>::value && _Minbits <= 64) {
return _STD generate_canonical<_Real, _Minbits>(_Gx);
} else { // TRANSITION, for tr1 machinery only; Standard machinery can call generate_canonical directly
return _STD ldexp(_Val, -static_cast<int>(_Digits));
} else {
// TRANSITION, the pre-P0952R2 algorithm, for non-integral tr1 machinery only (e.g. subtract_with_carry_01)
const _Real _Gxmin = static_cast<_Real>((_Gx.min)());
const _Real _Gxmax = static_cast<_Real>((_Gx.max)());
const _Real _Rx = (_Gxmax - _Gxmin) + _Real{1};

const int _Ceil = static_cast<int>(_STD ceil(static_cast<_Real>(_Minbits) / _STD log2(_Rx)));
const int _Ceil = static_cast<int>(_STD ceil(static_cast<_Real>(_Digits) / _STD log2(_Rx)));
const int _Kx = _Ceil < 1 ? 1 : _Ceil;

_Real _Ans{0};
Expand Down Expand Up @@ -1968,8 +2080,8 @@ public:
explicit _Rng_from_urng_v2(_Urng& _Func) noexcept : _Ref(_Func) {}

_Diff operator()(_Diff _Index) { // adapt _Urng closed range to [0, _Index)
// From Daniel Lemire, "Fast Random Integer Generation in an Interval", ACM Trans. Model. Comput. Simul. 29 (1),
// 2019.
// From Daniel Lemire, "Fast Random Integer Generation in an Interval",
// ACM Trans. Model. Comput. Simul. 29 (1), 2019.
//
// Algorithm 5 <-> This Code:
// m <-> _Product
Expand Down Expand Up @@ -2925,8 +3037,8 @@ public:

private:
template <class _Engine>
result_type _Eval(
_Engine& _Eng, const param_type& _Par0) const { // Press et al., Numerical Recipes in C, 2nd ed., pp 295-296.
result_type _Eval(_Engine& _Eng, const param_type& _Par0) const {
// Press et al., Numerical Recipes in C, 2nd ed., pp 295-296.
_Ty _Res;
if (_Par0._Tx < 25) { // generate directly
_Res = 0;
Expand Down
1 change: 1 addition & 0 deletions stl/inc/yvals_core.h
Original file line number Diff line number Diff line change
Expand Up @@ -62,6 +62,7 @@
// P0883R2 Fixing Atomic Initialization
// P0935R0 Eradicating Unnecessarily Explicit Default Constructors
// P0941R2 Feature-Test Macros
// P0952R2 A New Specification For generate_canonical()
// P0972R0 noexcept For <chrono> zero(), min(), max()
// P1065R2 constexpr INVOKE
// (the std::invoke function only; other components like bind and reference_wrapper are C++20 only)
Expand Down
3 changes: 3 additions & 0 deletions tests/libcxx/expected_results.txt
Original file line number Diff line number Diff line change
Expand Up @@ -67,6 +67,9 @@ std/containers/unord/unord.set/insert_and_emplace_allocator_requirements.pass.cp
# Bogus test believes that copyability of array<T, 0> must be the same as array<T, 1>
std/containers/sequences/array/array.cons/implicit_copy.pass.cpp FAIL

# libc++ hasn't implemented P0952R2, which changes the generate_canonical algorithm.
std/numerics/rand/rand.util/rand.util.canonical/generate_canonical.pass.cpp FAIL

# Test expects __cpp_lib_chrono to have the old value 201611L for P0505R0; we define the C++20 value 201907L for P1466R3.
std/language.support/support.limits/support.limits.general/chrono.version.compile.pass.cpp FAIL

Expand Down
2 changes: 1 addition & 1 deletion tests/std/test.lst
Original file line number Diff line number Diff line change
Expand Up @@ -198,7 +198,6 @@ tests\GH_001850_clog_tied_to_cout
tests\GH_001858_iostream_exception
tests\GH_001912_random_distribution_operator_const
tests\GH_001914_cached_position
tests\GH_001964_constexpr_generate_canonical
tests\GH_002030_asan_annotate_string
tests\GH_002030_asan_annotate_vector
tests\GH_002039_byte_is_not_trivially_swappable
Expand Down Expand Up @@ -511,6 +510,7 @@ tests\P0898R3_identity
tests\P0912R5_coroutine
tests\P0919R3_heterogeneous_unordered_lookup
tests\P0943R6_stdatomic_h
tests\P0952R2_new_generate_canonical
tests\P0966R1_string_reserve_should_not_shrink
tests\P0980R1_constexpr_strings
tests\P1004R2_constexpr_vector
Expand Down
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
# Copyright (c) Microsoft Corporation.
# SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception

RUNALL_INCLUDE ..\usual_matrix.lst
# Skip /clr:pure configurations; they pass but take an extremely long time to run, risking test timeouts.
RUNALL_INCLUDE ..\impure_matrix.lst
64 changes: 0 additions & 64 deletions tests/std/tests/GH_001964_constexpr_generate_canonical/test.cpp

This file was deleted.

Loading