Optimize RNG use in std::sample selection sampling

2016-10-21  Eelis van der Weegen  <eelis@eelis.net>

	* include/bits/stl_algo.h (__gen_two_uniform_ints): Move logic out
	of shuffle into new function.
	(shuffle): Call __gen_two_uniform_ints.
	(__sample<ForwardIterator, OutputIterator, Cat, Size, URBG>): Use
	__gen_two_uniform_ints and perform two samples at a time.

From-SVN: r241414
This commit is contained in:
Eelis van der Weegen 2016-10-21 15:55:07 +00:00 committed by Jonathan Wakely
parent 58f3096372
commit 9d8ff2f63b
2 changed files with 91 additions and 8 deletions

View File

@ -1,3 +1,11 @@
2016-10-21 Eelis van der Weegen <eelis@eelis.net>
* include/bits/stl_algo.h (__gen_two_uniform_ints): Move logic out
of shuffle into new function.
(shuffle): Call __gen_two_uniform_ints.
(__sample<ForwardIterator, OutputIterator, Cat, Size, URBG>): Use
__gen_two_uniform_ints and perform two samples at a time.
2016-10-21 Jonathan Wakely <jwakely@redhat.com>
* include/Makefile.am: Add <bits/refwrap.h> and <bits/std_function.h>.

View File

@ -3740,6 +3740,37 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
#endif // C++14
#ifdef _GLIBCXX_USE_C99_STDINT_TR1
/**
* @brief Generate two uniformly distributed integers using a
* single distribution invocation.
* @param __b0 The upper bound for the first integer.
* @param __b1 The upper bound for the second integer.
* @param __g A UniformRandomBitGenerator.
* @return A pair (i, j) with i and j uniformly distributed
* over [0, __b0) and [0, __b1), respectively.
*
* Requires: __b0 * __b1 <= __g.max() - __g.min().
*
* Using uniform_int_distribution with a range that is very
* small relative to the range of the generator ends up wasting
* potentially expensively generated randomness, since
* uniform_int_distribution does not store leftover randomness
* between invocations.
*
* If we know we want two integers in ranges that are sufficiently
* small, we can compose the ranges, use a single distribution
* invocation, and significantly reduce the waste.
*/
template<typename _IntType, typename _UniformRandomBitGenerator>
pair<_IntType, _IntType>
__gen_two_uniform_ints(_IntType __b0, _IntType __b1,
_UniformRandomBitGenerator&& __g)
{
_IntType __x
= uniform_int_distribution<_IntType>{0, (__b0 * __b1) - 1}(__g);
return std::make_pair(__x / __b1, __x % __b1);
}
/**
* @brief Shuffle the elements of a sequence using a uniform random
* number generator.
@ -3773,8 +3804,10 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
typedef typename std::uniform_int_distribution<__ud_type> __distr_type;
typedef typename __distr_type::param_type __p_type;
typedef typename std::remove_reference<_UniformRandomNumberGenerator>::type _Gen;
typedef typename std::common_type<typename _Gen::result_type, __ud_type>::type __uc_type;
typedef typename remove_reference<_UniformRandomNumberGenerator>::type
_Gen;
typedef typename common_type<typename _Gen::result_type, __ud_type>::type
__uc_type;
const __uc_type __urngrange = __g.max() - __g.min();
const __uc_type __urange = __uc_type(__last - __first);
@ -3801,13 +3834,12 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
while (__i != __last)
{
const __uc_type __swap_range = __uc_type(__i - __first) + 1;
const __uc_type __comp_range = __swap_range * (__swap_range + 1);
std::uniform_int_distribution<__uc_type> __d{0, __comp_range - 1};
const __uc_type __pospos = __d(__g);
const pair<__uc_type, __uc_type> __pospos =
__gen_two_uniform_ints(__swap_range, __swap_range + 1, __g);
std::iter_swap(__i++, __first + (__pospos % __swap_range));
std::iter_swap(__i++, __first + (__pospos / __swap_range));
std::iter_swap(__i++, __first + __pospos.first);
std::iter_swap(__i++, __first + __pospos.second);
}
return;
@ -5695,9 +5727,52 @@ _GLIBCXX_BEGIN_NAMESPACE_ALGO
{
using __distrib_type = uniform_int_distribution<_Size>;
using __param_type = typename __distrib_type::param_type;
using _USize = make_unsigned_t<_Size>;
using _Gen = remove_reference_t<_UniformRandomBitGenerator>;
using __uc_type = common_type_t<typename _Gen::result_type, _USize>;
__distrib_type __d{};
_Size __unsampled_sz = std::distance(__first, __last);
for (__n = std::min(__n, __unsampled_sz); __n != 0; ++__first)
__n = std::min(__n, __unsampled_sz);
// If possible, we use __gen_two_uniform_ints to efficiently produce
// two random numbers using a single distribution invocation:
const __uc_type __urngrange = __g.max() - __g.min();
if (__urngrange / __uc_type(__unsampled_sz) >= __uc_type(__unsampled_sz))
// I.e. (__urngrange >= __unsampled_sz * __unsampled_sz) but without
// wrapping issues.
{
while (__n != 0 && __unsampled_sz >= 2)
{
const pair<_Size, _Size> p =
__gen_two_uniform_ints(__unsampled_sz, __unsampled_sz - 1, __g);
--__unsampled_sz;
if (p.first < __n)
{
*__out++ = *__first;
--__n;
}
++__first;
if (__n == 0) break;
--__unsampled_sz;
if (p.second < __n)
{
*__out++ = *__first;
--__n;
}
++__first;
}
}
// The loop above is otherwise equivalent to this one-at-a-time version:
for (; __n != 0; ++__first)
if (__d(__g, __param_type{0, --__unsampled_sz}) < __n)
{
*__out++ = *__first;