multiway_merge.h: Simple formatting and uglification fixes.
2009-11-06 Paolo Carlini <paolo.carlini@oracle.com> * include/parallel/multiway_merge.h: Simple formatting and uglification fixes. * include/parallel/find_selectors.h: Likewise. * include/parallel/losertree.h: Likewise. * include/parallel/list_partition.h: Likewise. * include/parallel/for_each.h: Likewise. * include/parallel/multiseq_selection.h: Likewise. * include/parallel/workstealing.h: Likewise. * include/parallel/par_loop.h: Likewise. * include/parallel/numeric: Likewise. * include/parallel/quicksort.h: Likewise. * include/parallel/equally_split.h: Likewise. * include/parallel/omp_loop_static.h: Likewise. * include/parallel/random_shuffle.h: Likewise. * include/parallel/balanced_quicksort.h: Likewise. * include/parallel/tags.h: Likewise. * include/parallel/set_operations.h: Likewise. * include/parallel/merge.h: Likewise. * include/parallel/unique_copy.h: Likewise. * include/parallel/multiway_mergesort.h: Likewise. * include/parallel/search.h: Likewise. * include/parallel/partition.h: Likewise. * include/parallel/partial_sum.h: Likewise. * include/parallel/find.h: Likewise. * include/parallel/queue.h: Likewise. * include/parallel/omp_loop.h: Likewise. * include/parallel/checkers.h: Likewise. * include/parallel/sort.h: Likewise. From-SVN: r153966
This commit is contained in:
parent
b169fe9de8
commit
77d16198fc
@ -1,3 +1,34 @@
|
||||
2009-11-06 Paolo Carlini <paolo.carlini@oracle.com>
|
||||
|
||||
* include/parallel/multiway_merge.h: Simple formatting and
|
||||
uglification fixes.
|
||||
* include/parallel/find_selectors.h: Likewise.
|
||||
* include/parallel/losertree.h: Likewise.
|
||||
* include/parallel/list_partition.h: Likewise.
|
||||
* include/parallel/for_each.h: Likewise.
|
||||
* include/parallel/multiseq_selection.h: Likewise.
|
||||
* include/parallel/workstealing.h: Likewise.
|
||||
* include/parallel/par_loop.h: Likewise.
|
||||
* include/parallel/numeric: Likewise.
|
||||
* include/parallel/quicksort.h: Likewise.
|
||||
* include/parallel/equally_split.h: Likewise.
|
||||
* include/parallel/omp_loop_static.h: Likewise.
|
||||
* include/parallel/random_shuffle.h: Likewise.
|
||||
* include/parallel/balanced_quicksort.h: Likewise.
|
||||
* include/parallel/tags.h: Likewise.
|
||||
* include/parallel/set_operations.h: Likewise.
|
||||
* include/parallel/merge.h: Likewise.
|
||||
* include/parallel/unique_copy.h: Likewise.
|
||||
* include/parallel/multiway_mergesort.h: Likewise.
|
||||
* include/parallel/search.h: Likewise.
|
||||
* include/parallel/partition.h: Likewise.
|
||||
* include/parallel/partial_sum.h: Likewise.
|
||||
* include/parallel/find.h: Likewise.
|
||||
* include/parallel/queue.h: Likewise.
|
||||
* include/parallel/omp_loop.h: Likewise.
|
||||
* include/parallel/checkers.h: Likewise.
|
||||
* include/parallel/sort.h: Likewise.
|
||||
|
||||
2009-11-06 Jonathan Wakely <jwakely.gcc@gmail.com>
|
||||
|
||||
PR libstdc++/41949
|
||||
|
@ -57,436 +57,435 @@
|
||||
|
||||
namespace __gnu_parallel
|
||||
{
|
||||
/** @brief Information local to one thread in the parallel quicksort run. */
|
||||
template<typename _RAIter>
|
||||
struct _QSBThreadLocal
|
||||
{
|
||||
typedef std::iterator_traits<_RAIter> _TraitsType;
|
||||
typedef typename _TraitsType::difference_type _DifferenceType;
|
||||
/** @brief Information local to one thread in the parallel quicksort run. */
|
||||
template<typename _RAIter>
|
||||
struct _QSBThreadLocal
|
||||
{
|
||||
typedef std::iterator_traits<_RAIter> _TraitsType;
|
||||
typedef typename _TraitsType::difference_type _DifferenceType;
|
||||
|
||||
/** @brief Continuous part of the sequence, described by an
|
||||
iterator pair. */
|
||||
typedef std::pair<_RAIter, _RAIter> _Piece;
|
||||
/** @brief Continuous part of the sequence, described by an
|
||||
iterator pair. */
|
||||
typedef std::pair<_RAIter, _RAIter> _Piece;
|
||||
|
||||
/** @brief Initial piece to work on. */
|
||||
_Piece _M_initial;
|
||||
/** @brief Initial piece to work on. */
|
||||
_Piece _M_initial;
|
||||
|
||||
/** @brief Work-stealing queue. */
|
||||
_RestrictedBoundedConcurrentQueue<_Piece> _M_leftover_parts;
|
||||
/** @brief Work-stealing queue. */
|
||||
_RestrictedBoundedConcurrentQueue<_Piece> _M_leftover_parts;
|
||||
|
||||
/** @brief Number of threads involved in this algorithm. */
|
||||
_ThreadIndex _M_num_threads;
|
||||
/** @brief Number of threads involved in this algorithm. */
|
||||
_ThreadIndex _M_num_threads;
|
||||
|
||||
/** @brief Pointer to a counter of elements left over to sort. */
|
||||
volatile _DifferenceType* _M_elements_leftover;
|
||||
/** @brief Pointer to a counter of elements left over to sort. */
|
||||
volatile _DifferenceType* _M_elements_leftover;
|
||||
|
||||
/** @brief The complete sequence to sort. */
|
||||
_Piece _M_global;
|
||||
/** @brief The complete sequence to sort. */
|
||||
_Piece _M_global;
|
||||
|
||||
/** @brief Constructor.
|
||||
* @param __queue_size size of the work-stealing queue. */
|
||||
_QSBThreadLocal(int __queue_size) : _M_leftover_parts(__queue_size) { }
|
||||
};
|
||||
/** @brief Constructor.
|
||||
* @param __queue_size size of the work-stealing queue. */
|
||||
_QSBThreadLocal(int __queue_size) : _M_leftover_parts(__queue_size) { }
|
||||
};
|
||||
|
||||
/** @brief Balanced quicksort divide step.
|
||||
* @param __begin Begin iterator of subsequence.
|
||||
* @param __end End iterator of subsequence.
|
||||
* @param __comp Comparator.
|
||||
* @param __num_threads Number of threads that are allowed to work on
|
||||
* this part.
|
||||
* @pre @__c (__end-__begin)>=1 */
|
||||
template<typename _RAIter, typename _Compare>
|
||||
typename std::iterator_traits<_RAIter>::difference_type
|
||||
__qsb_divide(_RAIter __begin, _RAIter __end,
|
||||
_Compare __comp, _ThreadIndex __num_threads)
|
||||
{
|
||||
_GLIBCXX_PARALLEL_ASSERT(__num_threads > 0);
|
||||
/** @brief Balanced quicksort divide step.
|
||||
* @param __begin Begin iterator of subsequence.
|
||||
* @param __end End iterator of subsequence.
|
||||
* @param __comp Comparator.
|
||||
* @param __num_threads Number of threads that are allowed to work on
|
||||
* this part.
|
||||
* @pre @__c (__end-__begin)>=1 */
|
||||
template<typename _RAIter, typename _Compare>
|
||||
typename std::iterator_traits<_RAIter>::difference_type
|
||||
__qsb_divide(_RAIter __begin, _RAIter __end,
|
||||
_Compare __comp, _ThreadIndex __num_threads)
|
||||
{
|
||||
_GLIBCXX_PARALLEL_ASSERT(__num_threads > 0);
|
||||
|
||||
typedef std::iterator_traits<_RAIter> _TraitsType;
|
||||
typedef typename _TraitsType::value_type _ValueType;
|
||||
typedef typename _TraitsType::difference_type _DifferenceType;
|
||||
typedef std::iterator_traits<_RAIter> _TraitsType;
|
||||
typedef typename _TraitsType::value_type _ValueType;
|
||||
typedef typename _TraitsType::difference_type _DifferenceType;
|
||||
|
||||
_RAIter __pivot_pos =
|
||||
__median_of_three_iterators(__begin, __begin + (__end - __begin) / 2,
|
||||
__end - 1, __comp);
|
||||
_RAIter __pivot_pos =
|
||||
__median_of_three_iterators(__begin, __begin + (__end - __begin) / 2,
|
||||
__end - 1, __comp);
|
||||
|
||||
#if defined(_GLIBCXX_ASSERTIONS)
|
||||
// Must be in between somewhere.
|
||||
_DifferenceType __n = __end - __begin;
|
||||
// Must be in between somewhere.
|
||||
_DifferenceType __n = __end - __begin;
|
||||
|
||||
_GLIBCXX_PARALLEL_ASSERT(
|
||||
(!__comp(*__pivot_pos, *__begin) &&
|
||||
!__comp(*(__begin + __n / 2), *__pivot_pos))
|
||||
|| (!__comp(*__pivot_pos, *__begin) &&
|
||||
!__comp(*(__end - 1), *__pivot_pos))
|
||||
|| (!__comp(*__pivot_pos, *(__begin + __n / 2)) &&
|
||||
!__comp(*__begin, *__pivot_pos))
|
||||
|| (!__comp(*__pivot_pos, *(__begin + __n / 2)) &&
|
||||
!__comp(*(__end - 1), *__pivot_pos))
|
||||
|| (!__comp(*__pivot_pos, *(__end - 1)) &&
|
||||
!__comp(*__begin, *__pivot_pos))
|
||||
|| (!__comp(*__pivot_pos, *(__end - 1)) &&
|
||||
!__comp(*(__begin + __n / 2), *__pivot_pos)));
|
||||
_GLIBCXX_PARALLEL_ASSERT((!__comp(*__pivot_pos, *__begin)
|
||||
&& !__comp(*(__begin + __n / 2),
|
||||
*__pivot_pos))
|
||||
|| (!__comp(*__pivot_pos, *__begin)
|
||||
&& !__comp(*(__end - 1), *__pivot_pos))
|
||||
|| (!__comp(*__pivot_pos, *(__begin + __n / 2))
|
||||
&& !__comp(*__begin, *__pivot_pos))
|
||||
|| (!__comp(*__pivot_pos, *(__begin + __n / 2))
|
||||
&& !__comp(*(__end - 1), *__pivot_pos))
|
||||
|| (!__comp(*__pivot_pos, *(__end - 1))
|
||||
&& !__comp(*__begin, *__pivot_pos))
|
||||
|| (!__comp(*__pivot_pos, *(__end - 1))
|
||||
&& !__comp(*(__begin + __n / 2),
|
||||
*__pivot_pos)));
|
||||
#endif
|
||||
|
||||
// Swap pivot value to end.
|
||||
if (__pivot_pos != (__end - 1))
|
||||
std::swap(*__pivot_pos, *(__end - 1));
|
||||
__pivot_pos = __end - 1;
|
||||
// Swap pivot value to end.
|
||||
if (__pivot_pos != (__end - 1))
|
||||
std::swap(*__pivot_pos, *(__end - 1));
|
||||
__pivot_pos = __end - 1;
|
||||
|
||||
__gnu_parallel::binder2nd<_Compare, _ValueType, _ValueType, bool>
|
||||
__pred(__comp, *__pivot_pos);
|
||||
__gnu_parallel::binder2nd<_Compare, _ValueType, _ValueType, bool>
|
||||
__pred(__comp, *__pivot_pos);
|
||||
|
||||
// Divide, returning __end - __begin - 1 in the worst case.
|
||||
_DifferenceType __split_pos = __parallel_partition(
|
||||
__begin, __end - 1, __pred, __num_threads);
|
||||
// Divide, returning __end - __begin - 1 in the worst case.
|
||||
_DifferenceType __split_pos = __parallel_partition(__begin, __end - 1,
|
||||
__pred,
|
||||
__num_threads);
|
||||
|
||||
// Swap back pivot to middle.
|
||||
std::swap(*(__begin + __split_pos), *__pivot_pos);
|
||||
__pivot_pos = __begin + __split_pos;
|
||||
// Swap back pivot to middle.
|
||||
std::swap(*(__begin + __split_pos), *__pivot_pos);
|
||||
__pivot_pos = __begin + __split_pos;
|
||||
|
||||
#if _GLIBCXX_ASSERTIONS
|
||||
_RAIter __r;
|
||||
for (__r = __begin; __r != __pivot_pos; ++__r)
|
||||
_GLIBCXX_PARALLEL_ASSERT(__comp(*__r, *__pivot_pos));
|
||||
for (; __r != __end; ++__r)
|
||||
_GLIBCXX_PARALLEL_ASSERT(!__comp(*__r, *__pivot_pos));
|
||||
_RAIter __r;
|
||||
for (__r = __begin; __r != __pivot_pos; ++__r)
|
||||
_GLIBCXX_PARALLEL_ASSERT(__comp(*__r, *__pivot_pos));
|
||||
for (; __r != __end; ++__r)
|
||||
_GLIBCXX_PARALLEL_ASSERT(!__comp(*__r, *__pivot_pos));
|
||||
#endif
|
||||
|
||||
return __split_pos;
|
||||
}
|
||||
|
||||
/** @brief Quicksort conquer step.
|
||||
* @param __tls Array of thread-local storages.
|
||||
* @param __begin Begin iterator of subsequence.
|
||||
* @param __end End iterator of subsequence.
|
||||
* @param __comp Comparator.
|
||||
* @param __iam Number of the thread processing this function.
|
||||
* @param __num_threads
|
||||
* Number of threads that are allowed to work on this part. */
|
||||
template<typename _RAIter, typename _Compare>
|
||||
void
|
||||
__qsb_conquer(_QSBThreadLocal<_RAIter>** __tls,
|
||||
_RAIter __begin, _RAIter __end,
|
||||
_Compare __comp,
|
||||
_ThreadIndex __iam, _ThreadIndex __num_threads,
|
||||
bool __parent_wait)
|
||||
{
|
||||
typedef std::iterator_traits<_RAIter> _TraitsType;
|
||||
typedef typename _TraitsType::value_type _ValueType;
|
||||
typedef typename _TraitsType::difference_type _DifferenceType;
|
||||
|
||||
_DifferenceType __n = __end - __begin;
|
||||
|
||||
if (__num_threads <= 1 || __n <= 1)
|
||||
{
|
||||
__tls[__iam]->_M_initial.first = __begin;
|
||||
__tls[__iam]->_M_initial.second = __end;
|
||||
|
||||
__qsb_local_sort_with_helping(__tls, __comp, __iam, __parent_wait);
|
||||
|
||||
return;
|
||||
}
|
||||
|
||||
// Divide step.
|
||||
_DifferenceType __split_pos =
|
||||
__qsb_divide(__begin, __end, __comp, __num_threads);
|
||||
|
||||
#if _GLIBCXX_ASSERTIONS
|
||||
_GLIBCXX_PARALLEL_ASSERT(0 <= __split_pos &&
|
||||
__split_pos < (__end - __begin));
|
||||
#endif
|
||||
|
||||
_ThreadIndex __num_threads_leftside =
|
||||
std::max<_ThreadIndex>(1, std::min<_ThreadIndex>(
|
||||
__num_threads - 1, __split_pos * __num_threads / __n));
|
||||
|
||||
# pragma omp atomic
|
||||
*__tls[__iam]->_M_elements_leftover -= (_DifferenceType)1;
|
||||
|
||||
// Conquer step.
|
||||
# pragma omp parallel num_threads(2)
|
||||
{
|
||||
bool __wait;
|
||||
if(omp_get_num_threads() < 2)
|
||||
__wait = false;
|
||||
else
|
||||
__wait = __parent_wait;
|
||||
|
||||
# pragma omp sections
|
||||
{
|
||||
# pragma omp section
|
||||
{
|
||||
__qsb_conquer(__tls, __begin, __begin + __split_pos, __comp,
|
||||
__iam,
|
||||
__num_threads_leftside,
|
||||
__wait);
|
||||
__wait = __parent_wait;
|
||||
}
|
||||
// The pivot_pos is left in place, to ensure termination.
|
||||
# pragma omp section
|
||||
{
|
||||
__qsb_conquer(__tls, __begin + __split_pos + 1, __end, __comp,
|
||||
__iam + __num_threads_leftside,
|
||||
__num_threads - __num_threads_leftside,
|
||||
__wait);
|
||||
__wait = __parent_wait;
|
||||
}
|
||||
}
|
||||
return __split_pos;
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* @brief Quicksort step doing load-balanced local sort.
|
||||
* @param __tls Array of thread-local storages.
|
||||
* @param __comp Comparator.
|
||||
* @param __iam Number of the thread processing this function.
|
||||
*/
|
||||
template<typename _RAIter, typename _Compare>
|
||||
void
|
||||
__qsb_local_sort_with_helping(_QSBThreadLocal<_RAIter>** __tls,
|
||||
_Compare& __comp, int __iam, bool __wait)
|
||||
{
|
||||
typedef std::iterator_traits<_RAIter> _TraitsType;
|
||||
typedef typename _TraitsType::value_type _ValueType;
|
||||
typedef typename _TraitsType::difference_type _DifferenceType;
|
||||
typedef std::pair<_RAIter, _RAIter> _Piece;
|
||||
/** @brief Quicksort conquer step.
|
||||
* @param __tls Array of thread-local storages.
|
||||
* @param __begin Begin iterator of subsequence.
|
||||
* @param __end End iterator of subsequence.
|
||||
* @param __comp Comparator.
|
||||
* @param __iam Number of the thread processing this function.
|
||||
* @param __num_threads
|
||||
* Number of threads that are allowed to work on this part. */
|
||||
template<typename _RAIter, typename _Compare>
|
||||
void
|
||||
__qsb_conquer(_QSBThreadLocal<_RAIter>** __tls,
|
||||
_RAIter __begin, _RAIter __end,
|
||||
_Compare __comp,
|
||||
_ThreadIndex __iam, _ThreadIndex __num_threads,
|
||||
bool __parent_wait)
|
||||
{
|
||||
typedef std::iterator_traits<_RAIter> _TraitsType;
|
||||
typedef typename _TraitsType::value_type _ValueType;
|
||||
typedef typename _TraitsType::difference_type _DifferenceType;
|
||||
|
||||
_QSBThreadLocal<_RAIter>& __tl = *__tls[__iam];
|
||||
_DifferenceType __n = __end - __begin;
|
||||
|
||||
_DifferenceType __base_case_n =
|
||||
_Settings::get().sort_qsb_base_case_maximal_n;
|
||||
if (__base_case_n < 2)
|
||||
__base_case_n = 2;
|
||||
_ThreadIndex __num_threads = __tl._M_num_threads;
|
||||
if (__num_threads <= 1 || __n <= 1)
|
||||
{
|
||||
__tls[__iam]->_M_initial.first = __begin;
|
||||
__tls[__iam]->_M_initial.second = __end;
|
||||
|
||||
// Every thread has its own random number generator.
|
||||
_RandomNumber __rng(__iam + 1);
|
||||
__qsb_local_sort_with_helping(__tls, __comp, __iam, __parent_wait);
|
||||
|
||||
_Piece __current = __tl._M_initial;
|
||||
return;
|
||||
}
|
||||
|
||||
// Divide step.
|
||||
_DifferenceType __split_pos =
|
||||
__qsb_divide(__begin, __end, __comp, __num_threads);
|
||||
|
||||
_DifferenceType __elements_done = 0;
|
||||
#if _GLIBCXX_ASSERTIONS
|
||||
_DifferenceType __total_elements_done = 0;
|
||||
_GLIBCXX_PARALLEL_ASSERT(0 <= __split_pos &&
|
||||
__split_pos < (__end - __begin));
|
||||
#endif
|
||||
|
||||
for (;;)
|
||||
_ThreadIndex
|
||||
__num_threads_leftside = std::max<_ThreadIndex>
|
||||
(1, std::min<_ThreadIndex>(__num_threads - 1, __split_pos
|
||||
* __num_threads / __n));
|
||||
|
||||
# pragma omp atomic
|
||||
*__tls[__iam]->_M_elements_leftover -= (_DifferenceType)1;
|
||||
|
||||
// Conquer step.
|
||||
# pragma omp parallel num_threads(2)
|
||||
{
|
||||
// Invariant: __current must be a valid (maybe empty) range.
|
||||
_RAIter __begin = __current.first, __end = __current.second;
|
||||
_DifferenceType __n = __end - __begin;
|
||||
bool __wait;
|
||||
if(omp_get_num_threads() < 2)
|
||||
__wait = false;
|
||||
else
|
||||
__wait = __parent_wait;
|
||||
|
||||
if (__n > __base_case_n)
|
||||
{
|
||||
// Divide.
|
||||
_RAIter __pivot_pos = __begin + __rng(__n);
|
||||
# pragma omp sections
|
||||
{
|
||||
# pragma omp section
|
||||
{
|
||||
__qsb_conquer(__tls, __begin, __begin + __split_pos, __comp,
|
||||
__iam, __num_threads_leftside, __wait);
|
||||
__wait = __parent_wait;
|
||||
}
|
||||
// The pivot_pos is left in place, to ensure termination.
|
||||
# pragma omp section
|
||||
{
|
||||
__qsb_conquer(__tls, __begin + __split_pos + 1, __end, __comp,
|
||||
__iam + __num_threads_leftside,
|
||||
__num_threads - __num_threads_leftside, __wait);
|
||||
__wait = __parent_wait;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Swap __pivot_pos value to end.
|
||||
if (__pivot_pos != (__end - 1))
|
||||
std::swap(*__pivot_pos, *(__end - 1));
|
||||
__pivot_pos = __end - 1;
|
||||
/**
|
||||
* @brief Quicksort step doing load-balanced local sort.
|
||||
* @param __tls Array of thread-local storages.
|
||||
* @param __comp Comparator.
|
||||
* @param __iam Number of the thread processing this function.
|
||||
*/
|
||||
template<typename _RAIter, typename _Compare>
|
||||
void
|
||||
__qsb_local_sort_with_helping(_QSBThreadLocal<_RAIter>** __tls,
|
||||
_Compare& __comp, int __iam, bool __wait)
|
||||
{
|
||||
typedef std::iterator_traits<_RAIter> _TraitsType;
|
||||
typedef typename _TraitsType::value_type _ValueType;
|
||||
typedef typename _TraitsType::difference_type _DifferenceType;
|
||||
typedef std::pair<_RAIter, _RAIter> _Piece;
|
||||
|
||||
__gnu_parallel::binder2nd
|
||||
<_Compare, _ValueType, _ValueType, bool>
|
||||
__pred(__comp, *__pivot_pos);
|
||||
_QSBThreadLocal<_RAIter>& __tl = *__tls[__iam];
|
||||
|
||||
// Divide, leave pivot unchanged in last place.
|
||||
_RAIter __split_pos1, __split_pos2;
|
||||
__split_pos1 =
|
||||
__gnu_sequential::partition(__begin, __end - 1, __pred);
|
||||
_DifferenceType
|
||||
__base_case_n = _Settings::get().sort_qsb_base_case_maximal_n;
|
||||
if (__base_case_n < 2)
|
||||
__base_case_n = 2;
|
||||
_ThreadIndex __num_threads = __tl._M_num_threads;
|
||||
|
||||
// Left side: < __pivot_pos; __right side: >= __pivot_pos.
|
||||
// Every thread has its own random number generator.
|
||||
_RandomNumber __rng(__iam + 1);
|
||||
|
||||
_Piece __current = __tl._M_initial;
|
||||
|
||||
_DifferenceType __elements_done = 0;
|
||||
#if _GLIBCXX_ASSERTIONS
|
||||
_GLIBCXX_PARALLEL_ASSERT(__begin <= __split_pos1
|
||||
&& __split_pos1 < __end);
|
||||
#endif
|
||||
// Swap pivot back to middle.
|
||||
if (__split_pos1 != __pivot_pos)
|
||||
std::swap(*__split_pos1, *__pivot_pos);
|
||||
__pivot_pos = __split_pos1;
|
||||
|
||||
// In case all elements are equal, __split_pos1 == 0.
|
||||
if ((__split_pos1 + 1 - __begin) < (__n >> 7)
|
||||
|| (__end - __split_pos1) < (__n >> 7))
|
||||
{
|
||||
// Very unequal split, one part smaller than one 128th
|
||||
// elements not strictly larger than the pivot.
|
||||
__gnu_parallel::__unary_negate<__gnu_parallel::__binder1st
|
||||
<_Compare, _ValueType, _ValueType, bool>, _ValueType>
|
||||
__pred(__gnu_parallel::__binder1st
|
||||
<_Compare, _ValueType, _ValueType, bool>(
|
||||
__comp, *__pivot_pos));
|
||||
|
||||
// Find other end of pivot-equal range.
|
||||
__split_pos2 = __gnu_sequential::partition(__split_pos1 + 1,
|
||||
__end, __pred);
|
||||
}
|
||||
else
|
||||
// Only skip the pivot.
|
||||
__split_pos2 = __split_pos1 + 1;
|
||||
|
||||
// Elements equal to pivot are done.
|
||||
__elements_done += (__split_pos2 - __split_pos1);
|
||||
#if _GLIBCXX_ASSERTIONS
|
||||
__total_elements_done += (__split_pos2 - __split_pos1);
|
||||
#endif
|
||||
// Always push larger part onto stack.
|
||||
if (((__split_pos1 + 1) - __begin) < (__end - (__split_pos2)))
|
||||
{
|
||||
// Right side larger.
|
||||
if ((__split_pos2) != __end)
|
||||
__tl._M_leftover_parts.push_front(
|
||||
std::make_pair(__split_pos2, __end));
|
||||
|
||||
//__current.first = __begin; //already set anyway
|
||||
__current.second = __split_pos1;
|
||||
continue;
|
||||
}
|
||||
else
|
||||
{
|
||||
// Left side larger.
|
||||
if (__begin != __split_pos1)
|
||||
__tl._M_leftover_parts.push_front(std::make_pair(__begin,
|
||||
__split_pos1));
|
||||
|
||||
__current.first = __split_pos2;
|
||||
//__current.second = __end; //already set anyway
|
||||
continue;
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
__gnu_sequential::sort(__begin, __end, __comp);
|
||||
__elements_done += __n;
|
||||
#if _GLIBCXX_ASSERTIONS
|
||||
__total_elements_done += __n;
|
||||
_DifferenceType __total_elements_done = 0;
|
||||
#endif
|
||||
|
||||
// Prefer own stack, small pieces.
|
||||
if (__tl._M_leftover_parts.pop_front(__current))
|
||||
continue;
|
||||
for (;;)
|
||||
{
|
||||
// Invariant: __current must be a valid (maybe empty) range.
|
||||
_RAIter __begin = __current.first, __end = __current.second;
|
||||
_DifferenceType __n = __end - __begin;
|
||||
|
||||
# pragma omp atomic
|
||||
*__tl._M_elements_leftover -= __elements_done;
|
||||
if (__n > __base_case_n)
|
||||
{
|
||||
// Divide.
|
||||
_RAIter __pivot_pos = __begin + __rng(__n);
|
||||
|
||||
__elements_done = 0;
|
||||
// Swap __pivot_pos value to end.
|
||||
if (__pivot_pos != (__end - 1))
|
||||
std::swap(*__pivot_pos, *(__end - 1));
|
||||
__pivot_pos = __end - 1;
|
||||
|
||||
__gnu_parallel::binder2nd
|
||||
<_Compare, _ValueType, _ValueType, bool>
|
||||
__pred(__comp, *__pivot_pos);
|
||||
|
||||
// Divide, leave pivot unchanged in last place.
|
||||
_RAIter __split_pos1, __split_pos2;
|
||||
__split_pos1 = __gnu_sequential::partition(__begin, __end - 1,
|
||||
__pred);
|
||||
|
||||
// Left side: < __pivot_pos; __right side: >= __pivot_pos.
|
||||
#if _GLIBCXX_ASSERTIONS
|
||||
double __search_start = omp_get_wtime();
|
||||
_GLIBCXX_PARALLEL_ASSERT(__begin <= __split_pos1
|
||||
&& __split_pos1 < __end);
|
||||
#endif
|
||||
// Swap pivot back to middle.
|
||||
if (__split_pos1 != __pivot_pos)
|
||||
std::swap(*__split_pos1, *__pivot_pos);
|
||||
__pivot_pos = __split_pos1;
|
||||
|
||||
// In case all elements are equal, __split_pos1 == 0.
|
||||
if ((__split_pos1 + 1 - __begin) < (__n >> 7)
|
||||
|| (__end - __split_pos1) < (__n >> 7))
|
||||
{
|
||||
// Very unequal split, one part smaller than one 128th
|
||||
// elements not strictly larger than the pivot.
|
||||
__gnu_parallel::__unary_negate<__gnu_parallel::__binder1st
|
||||
<_Compare, _ValueType, _ValueType, bool>, _ValueType>
|
||||
__pred(__gnu_parallel::__binder1st
|
||||
<_Compare, _ValueType, _ValueType, bool>
|
||||
(__comp, *__pivot_pos));
|
||||
|
||||
// Find other end of pivot-equal range.
|
||||
__split_pos2 = __gnu_sequential::partition(__split_pos1 + 1,
|
||||
__end, __pred);
|
||||
}
|
||||
else
|
||||
// Only skip the pivot.
|
||||
__split_pos2 = __split_pos1 + 1;
|
||||
|
||||
// Elements equal to pivot are done.
|
||||
__elements_done += (__split_pos2 - __split_pos1);
|
||||
#if _GLIBCXX_ASSERTIONS
|
||||
__total_elements_done += (__split_pos2 - __split_pos1);
|
||||
#endif
|
||||
// Always push larger part onto stack.
|
||||
if (((__split_pos1 + 1) - __begin) < (__end - (__split_pos2)))
|
||||
{
|
||||
// Right side larger.
|
||||
if ((__split_pos2) != __end)
|
||||
__tl._M_leftover_parts.push_front
|
||||
(std::make_pair(__split_pos2, __end));
|
||||
|
||||
//__current.first = __begin; //already set anyway
|
||||
__current.second = __split_pos1;
|
||||
continue;
|
||||
}
|
||||
else
|
||||
{
|
||||
// Left side larger.
|
||||
if (__begin != __split_pos1)
|
||||
__tl._M_leftover_parts.push_front(std::make_pair
|
||||
(__begin, __split_pos1));
|
||||
|
||||
__current.first = __split_pos2;
|
||||
//__current.second = __end; //already set anyway
|
||||
continue;
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
__gnu_sequential::sort(__begin, __end, __comp);
|
||||
__elements_done += __n;
|
||||
#if _GLIBCXX_ASSERTIONS
|
||||
__total_elements_done += __n;
|
||||
#endif
|
||||
|
||||
// Look for new work.
|
||||
bool __successfully_stolen = false;
|
||||
while (__wait && *__tl._M_elements_leftover > 0
|
||||
&& !__successfully_stolen
|
||||
#if _GLIBCXX_ASSERTIONS
|
||||
// Possible dead-lock.
|
||||
&& (omp_get_wtime() < (__search_start + 1.0))
|
||||
#endif
|
||||
)
|
||||
{
|
||||
_ThreadIndex __victim;
|
||||
__victim = __rng(__num_threads);
|
||||
// Prefer own stack, small pieces.
|
||||
if (__tl._M_leftover_parts.pop_front(__current))
|
||||
continue;
|
||||
|
||||
// Large pieces.
|
||||
__successfully_stolen = (__victim != __iam)
|
||||
&& __tls[__victim]->_M_leftover_parts.pop_back(__current);
|
||||
if (!__successfully_stolen)
|
||||
__yield();
|
||||
# pragma omp atomic
|
||||
*__tl._M_elements_leftover -= __elements_done;
|
||||
|
||||
__elements_done = 0;
|
||||
|
||||
#if _GLIBCXX_ASSERTIONS
|
||||
double __search_start = omp_get_wtime();
|
||||
#endif
|
||||
|
||||
// Look for new work.
|
||||
bool __successfully_stolen = false;
|
||||
while (__wait && *__tl._M_elements_leftover > 0
|
||||
&& !__successfully_stolen
|
||||
#if _GLIBCXX_ASSERTIONS
|
||||
// Possible dead-lock.
|
||||
&& (omp_get_wtime() < (__search_start + 1.0))
|
||||
#endif
|
||||
)
|
||||
{
|
||||
_ThreadIndex __victim;
|
||||
__victim = __rng(__num_threads);
|
||||
|
||||
// Large pieces.
|
||||
__successfully_stolen = (__victim != __iam)
|
||||
&& __tls[__victim]->_M_leftover_parts.pop_back(__current);
|
||||
if (!__successfully_stolen)
|
||||
__yield();
|
||||
#if !defined(__ICC) && !defined(__ECC)
|
||||
# pragma omp flush
|
||||
# pragma omp flush
|
||||
#endif
|
||||
}
|
||||
}
|
||||
|
||||
#if _GLIBCXX_ASSERTIONS
|
||||
if (omp_get_wtime() >= (__search_start + 1.0))
|
||||
{
|
||||
sleep(1);
|
||||
_GLIBCXX_PARALLEL_ASSERT(omp_get_wtime()
|
||||
< (__search_start + 1.0));
|
||||
}
|
||||
if (omp_get_wtime() >= (__search_start + 1.0))
|
||||
{
|
||||
sleep(1);
|
||||
_GLIBCXX_PARALLEL_ASSERT(omp_get_wtime()
|
||||
< (__search_start + 1.0));
|
||||
}
|
||||
#endif
|
||||
if (!__successfully_stolen)
|
||||
{
|
||||
if (!__successfully_stolen)
|
||||
{
|
||||
#if _GLIBCXX_ASSERTIONS
|
||||
_GLIBCXX_PARALLEL_ASSERT(*__tl._M_elements_leftover == 0);
|
||||
_GLIBCXX_PARALLEL_ASSERT(*__tl._M_elements_leftover == 0);
|
||||
#endif
|
||||
return;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
return;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/** @brief Top-level quicksort routine.
|
||||
* @param __begin Begin iterator of sequence.
|
||||
* @param __end End iterator of sequence.
|
||||
* @param __comp Comparator.
|
||||
* @param __num_threads Number of threads that are allowed to work on
|
||||
* this part.
|
||||
*/
|
||||
template<typename _RAIter, typename _Compare>
|
||||
void
|
||||
__parallel_sort_qsb(_RAIter __begin, _RAIter __end,
|
||||
_Compare __comp,
|
||||
_ThreadIndex __num_threads)
|
||||
{
|
||||
_GLIBCXX_CALL(__end - __begin)
|
||||
/** @brief Top-level quicksort routine.
|
||||
* @param __begin Begin iterator of sequence.
|
||||
* @param __end End iterator of sequence.
|
||||
* @param __comp Comparator.
|
||||
* @param __num_threads Number of threads that are allowed to work on
|
||||
* this part.
|
||||
*/
|
||||
template<typename _RAIter, typename _Compare>
|
||||
void
|
||||
__parallel_sort_qsb(_RAIter __begin, _RAIter __end,
|
||||
_Compare __comp, _ThreadIndex __num_threads)
|
||||
{
|
||||
_GLIBCXX_CALL(__end - __begin)
|
||||
|
||||
typedef std::iterator_traits<_RAIter> _TraitsType;
|
||||
typedef typename _TraitsType::value_type _ValueType;
|
||||
typedef typename _TraitsType::difference_type _DifferenceType;
|
||||
typedef std::pair<_RAIter, _RAIter> _Piece;
|
||||
typedef std::iterator_traits<_RAIter> _TraitsType;
|
||||
typedef typename _TraitsType::value_type _ValueType;
|
||||
typedef typename _TraitsType::difference_type _DifferenceType;
|
||||
typedef std::pair<_RAIter, _RAIter> _Piece;
|
||||
|
||||
typedef _QSBThreadLocal<_RAIter> _TLSType;
|
||||
typedef _QSBThreadLocal<_RAIter> _TLSType;
|
||||
|
||||
_DifferenceType __n = __end - __begin;
|
||||
_DifferenceType __n = __end - __begin;
|
||||
|
||||
if (__n <= 1)
|
||||
return;
|
||||
if (__n <= 1)
|
||||
return;
|
||||
|
||||
// At least one element per processor.
|
||||
if (__num_threads > __n)
|
||||
__num_threads = static_cast<_ThreadIndex>(__n);
|
||||
// At least one element per processor.
|
||||
if (__num_threads > __n)
|
||||
__num_threads = static_cast<_ThreadIndex>(__n);
|
||||
|
||||
// Initialize thread local storage
|
||||
_TLSType** __tls = new _TLSType*[__num_threads];
|
||||
_DifferenceType __queue_size =
|
||||
__num_threads * (_ThreadIndex)(log2(__n) + 1);
|
||||
for (_ThreadIndex __t = 0; __t < __num_threads; ++__t)
|
||||
__tls[__t] = new _QSBThreadLocal<_RAIter>(__queue_size);
|
||||
// Initialize thread local storage
|
||||
_TLSType** __tls = new _TLSType*[__num_threads];
|
||||
_DifferenceType __queue_size = (__num_threads
|
||||
* (_ThreadIndex)(__rd_log2(__n) + 1));
|
||||
for (_ThreadIndex __t = 0; __t < __num_threads; ++__t)
|
||||
__tls[__t] = new _QSBThreadLocal<_RAIter>(__queue_size);
|
||||
|
||||
// There can never be more than ceil(log2(__n)) ranges on the stack,
|
||||
// because
|
||||
// 1. Only one processor pushes onto the stack
|
||||
// 2. The largest range has at most length __n
|
||||
// 3. Each range is larger than half of the range remaining
|
||||
volatile _DifferenceType _M_elements_leftover = __n;
|
||||
for (int __i = 0; __i < __num_threads; ++__i)
|
||||
{
|
||||
__tls[__i]->_M_elements_leftover = &_M_elements_leftover;
|
||||
__tls[__i]->_M_num_threads = __num_threads;
|
||||
__tls[__i]->_M_global = std::make_pair(__begin, __end);
|
||||
// There can never be more than ceil(__rd_log2(__n)) ranges on the
|
||||
// stack, because
|
||||
// 1. Only one processor pushes onto the stack
|
||||
// 2. The largest range has at most length __n
|
||||
// 3. Each range is larger than half of the range remaining
|
||||
volatile _DifferenceType __elements_leftover = __n;
|
||||
for (int __i = 0; __i < __num_threads; ++__i)
|
||||
{
|
||||
__tls[__i]->_M_elements_leftover = &__elements_leftover;
|
||||
__tls[__i]->_M_num_threads = __num_threads;
|
||||
__tls[__i]->_M_global = std::make_pair(__begin, __end);
|
||||
|
||||
// Just in case nothing is left to assign.
|
||||
__tls[__i]->_M_initial = std::make_pair(__end, __end);
|
||||
}
|
||||
// Just in case nothing is left to assign.
|
||||
__tls[__i]->_M_initial = std::make_pair(__end, __end);
|
||||
}
|
||||
|
||||
// Main recursion call.
|
||||
__qsb_conquer(
|
||||
__tls, __begin, __begin + __n, __comp, 0, __num_threads, true);
|
||||
// Main recursion call.
|
||||
__qsb_conquer(__tls, __begin, __begin + __n, __comp, 0,
|
||||
__num_threads, true);
|
||||
|
||||
#if _GLIBCXX_ASSERTIONS
|
||||
// All stack must be empty.
|
||||
_Piece __dummy;
|
||||
for (int __i = 1; __i < __num_threads; ++__i)
|
||||
_GLIBCXX_PARALLEL_ASSERT(
|
||||
!__tls[__i]->_M_leftover_parts.pop_back(__dummy));
|
||||
// All stack must be empty.
|
||||
_Piece __dummy;
|
||||
for (int __i = 1; __i < __num_threads; ++__i)
|
||||
_GLIBCXX_PARALLEL_ASSERT(
|
||||
!__tls[__i]->_M_leftover_parts.pop_back(__dummy));
|
||||
#endif
|
||||
|
||||
for (int __i = 0; __i < __num_threads; ++__i)
|
||||
delete __tls[__i];
|
||||
delete[] __tls;
|
||||
}
|
||||
for (int __i = 0; __i < __num_threads; ++__i)
|
||||
delete __tls[__i];
|
||||
delete[] __tls;
|
||||
}
|
||||
} // namespace __gnu_parallel
|
||||
|
||||
#endif /* _GLIBCXX_PARALLEL_BALANCED_QUICKSORT_H */
|
||||
|
@ -68,5 +68,6 @@ namespace __gnu_parallel
|
||||
|
||||
return true;
|
||||
}
|
||||
}
|
||||
|
||||
#endif /* _GLIBCXX_PARALLEL_CHECKERS_H */
|
||||
|
@ -1,6 +1,6 @@
|
||||
// -*- C++ -*-
|
||||
|
||||
// Copyright (C) 2007, 2009 Free Software Foundation, Inc.
|
||||
// Copyright (C) 2007, 2008, 2009 Free Software Foundation, Inc.
|
||||
//
|
||||
// This file is part of the GNU ISO C++ Library. This library is free
|
||||
// software; you can redistribute it and/or modify it under the terms
|
||||
@ -33,57 +33,56 @@
|
||||
|
||||
namespace __gnu_parallel
|
||||
{
|
||||
/** @brief function to split a sequence into parts of almost equal size.
|
||||
*
|
||||
* The resulting sequence __s of length __num_threads+1 contains the splitting
|
||||
* positions when splitting the range [0,__n) into parts of almost
|
||||
* equal size (plus minus 1). The first entry is 0, the last one
|
||||
* n. There may result empty parts.
|
||||
* @param __n Number of elements
|
||||
* @param __num_threads Number of parts
|
||||
* @param __s Splitters
|
||||
* @returns End of __splitter sequence, i.e. @__c __s+__num_threads+1 */
|
||||
template<typename _DifferenceType, typename _OutputIterator>
|
||||
_OutputIterator
|
||||
equally_split(_DifferenceType __n, _ThreadIndex __num_threads,
|
||||
_OutputIterator __s)
|
||||
{
|
||||
_DifferenceType __chunk_length = __n / __num_threads;
|
||||
_DifferenceType __num_longer_chunks = __n % __num_threads;
|
||||
_DifferenceType __pos = 0;
|
||||
for (_ThreadIndex __i = 0; __i < __num_threads; ++__i)
|
||||
{
|
||||
*__s++ = __pos;
|
||||
__pos += (__i < __num_longer_chunks) ?
|
||||
(__chunk_length + 1) : __chunk_length;
|
||||
}
|
||||
*__s++ = __n;
|
||||
return __s;
|
||||
}
|
||||
/** @brief function to split a sequence into parts of almost equal size.
|
||||
*
|
||||
* The resulting sequence __s of length __num_threads+1 contains the
|
||||
* splitting positions when splitting the range [0,__n) into parts of
|
||||
* almost equal size (plus minus 1). The first entry is 0, the last
|
||||
* one n. There may result empty parts.
|
||||
* @param __n Number of elements
|
||||
* @param __num_threads Number of parts
|
||||
* @param __s Splitters
|
||||
* @returns End of __splitter sequence, i.e. @__c __s+__num_threads+1 */
|
||||
template<typename _DifferenceType, typename _OutputIterator>
|
||||
_OutputIterator
|
||||
equally_split(_DifferenceType __n, _ThreadIndex __num_threads,
|
||||
_OutputIterator __s)
|
||||
{
|
||||
_DifferenceType __chunk_length = __n / __num_threads;
|
||||
_DifferenceType __num_longer_chunks = __n % __num_threads;
|
||||
_DifferenceType __pos = 0;
|
||||
for (_ThreadIndex __i = 0; __i < __num_threads; ++__i)
|
||||
{
|
||||
*__s++ = __pos;
|
||||
__pos += ((__i < __num_longer_chunks)
|
||||
? (__chunk_length + 1) : __chunk_length);
|
||||
}
|
||||
*__s++ = __n;
|
||||
return __s;
|
||||
}
|
||||
|
||||
|
||||
/** @brief function to split a sequence into parts of almost equal size.
|
||||
*
|
||||
* Returns the position of the splitting point between
|
||||
* thread number __thread_no (included) and
|
||||
* thread number __thread_no+1 (excluded).
|
||||
* @param __n Number of elements
|
||||
* @param __num_threads Number of parts
|
||||
* @returns splitting point */
|
||||
template<typename _DifferenceType>
|
||||
_DifferenceType
|
||||
equally_split_point(_DifferenceType __n,
|
||||
_ThreadIndex __num_threads,
|
||||
_ThreadIndex __thread_no)
|
||||
{
|
||||
_DifferenceType __chunk_length = __n / __num_threads;
|
||||
_DifferenceType __num_longer_chunks = __n % __num_threads;
|
||||
if (__thread_no < __num_longer_chunks)
|
||||
return __thread_no * (__chunk_length + 1);
|
||||
else
|
||||
return __num_longer_chunks * (__chunk_length + 1)
|
||||
/** @brief function to split a sequence into parts of almost equal size.
|
||||
*
|
||||
* Returns the position of the splitting point between
|
||||
* thread number __thread_no (included) and
|
||||
* thread number __thread_no+1 (excluded).
|
||||
* @param __n Number of elements
|
||||
* @param __num_threads Number of parts
|
||||
* @returns splitting point */
|
||||
template<typename _DifferenceType>
|
||||
_DifferenceType
|
||||
equally_split_point(_DifferenceType __n,
|
||||
_ThreadIndex __num_threads,
|
||||
_ThreadIndex __thread_no)
|
||||
{
|
||||
_DifferenceType __chunk_length = __n / __num_threads;
|
||||
_DifferenceType __num_longer_chunks = __n % __num_threads;
|
||||
if (__thread_no < __num_longer_chunks)
|
||||
return __thread_no * (__chunk_length + 1);
|
||||
else
|
||||
return __num_longer_chunks * (__chunk_length + 1)
|
||||
+ (__thread_no - __num_longer_chunks) * __chunk_length;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#endif /* _GLIBCXX_PARALLEL_EQUALLY_SPLIT_H */
|
||||
|
@ -42,360 +42,363 @@
|
||||
|
||||
namespace __gnu_parallel
|
||||
{
|
||||
/**
|
||||
* @brief Parallel std::find, switch for different algorithms.
|
||||
* @param __begin1 Begin iterator of first sequence.
|
||||
* @param __end1 End iterator of first sequence.
|
||||
* @param __begin2 Begin iterator of second sequence. Must have same
|
||||
* length as first sequence.
|
||||
* @param __pred Find predicate.
|
||||
* @param __selector _Functionality (e. g. std::find_if (), std::equal(),...)
|
||||
* @return Place of finding in both sequences.
|
||||
*/
|
||||
template<typename _RAIter1,
|
||||
typename _RAIter2,
|
||||
typename _Pred,
|
||||
typename _Selector>
|
||||
inline std::pair<_RAIter1, _RAIter2>
|
||||
__find_template(_RAIter1 __begin1, _RAIter1 __end1,
|
||||
_RAIter2 __begin2, _Pred __pred, _Selector __selector)
|
||||
{
|
||||
switch (_Settings::get().find_algorithm)
|
||||
{
|
||||
case GROWING_BLOCKS:
|
||||
return __find_template(__begin1, __end1, __begin2, __pred, __selector,
|
||||
growing_blocks_tag());
|
||||
case CONSTANT_SIZE_BLOCKS:
|
||||
return __find_template(__begin1, __end1, __begin2, __pred, __selector,
|
||||
constant_size_blocks_tag());
|
||||
case EQUAL_SPLIT:
|
||||
return __find_template(__begin1, __end1, __begin2, __pred, __selector,
|
||||
equal_split_tag());
|
||||
default:
|
||||
_GLIBCXX_PARALLEL_ASSERT(false);
|
||||
return std::make_pair(__begin1, __begin2);
|
||||
}
|
||||
}
|
||||
/**
|
||||
* @brief Parallel std::find, switch for different algorithms.
|
||||
* @param __begin1 Begin iterator of first sequence.
|
||||
* @param __end1 End iterator of first sequence.
|
||||
* @param __begin2 Begin iterator of second sequence. Must have same
|
||||
* length as first sequence.
|
||||
* @param __pred Find predicate.
|
||||
* @param __selector _Functionality (e. g. std::find_if(), std::equal(),...)
|
||||
* @return Place of finding in both sequences.
|
||||
*/
|
||||
template<typename _RAIter1,
|
||||
typename _RAIter2,
|
||||
typename _Pred,
|
||||
typename _Selector>
|
||||
inline std::pair<_RAIter1, _RAIter2>
|
||||
__find_template(_RAIter1 __begin1, _RAIter1 __end1,
|
||||
_RAIter2 __begin2, _Pred __pred, _Selector __selector)
|
||||
{
|
||||
switch (_Settings::get().find_algorithm)
|
||||
{
|
||||
case GROWING_BLOCKS:
|
||||
return __find_template(__begin1, __end1, __begin2, __pred,
|
||||
__selector, growing_blocks_tag());
|
||||
case CONSTANT_SIZE_BLOCKS:
|
||||
return __find_template(__begin1, __end1, __begin2, __pred,
|
||||
__selector, constant_size_blocks_tag());
|
||||
case EQUAL_SPLIT:
|
||||
return __find_template(__begin1, __end1, __begin2, __pred,
|
||||
__selector, equal_split_tag());
|
||||
default:
|
||||
_GLIBCXX_PARALLEL_ASSERT(false);
|
||||
return std::make_pair(__begin1, __begin2);
|
||||
}
|
||||
}
|
||||
|
||||
#if _GLIBCXX_FIND_EQUAL_SPLIT
|
||||
|
||||
/**
|
||||
* @brief Parallel std::find, equal splitting variant.
|
||||
* @param __begin1 Begin iterator of first sequence.
|
||||
* @param __end1 End iterator of first sequence.
|
||||
* @param __begin2 Begin iterator of second sequence. Second __sequence
|
||||
* must have same length as first sequence.
|
||||
* @param __pred Find predicate.
|
||||
* @param __selector _Functionality (e. g. std::find_if (), std::equal(),...)
|
||||
* @return Place of finding in both sequences.
|
||||
*/
|
||||
template<typename _RAIter1,
|
||||
typename _RAIter2,
|
||||
typename _Pred,
|
||||
typename _Selector>
|
||||
std::pair<_RAIter1, _RAIter2>
|
||||
__find_template(_RAIter1 __begin1,
|
||||
_RAIter1 __end1,
|
||||
_RAIter2 __begin2,
|
||||
_Pred __pred,
|
||||
_Selector __selector,
|
||||
equal_split_tag)
|
||||
{
|
||||
_GLIBCXX_CALL(__end1 - __begin1)
|
||||
/**
|
||||
* @brief Parallel std::find, equal splitting variant.
|
||||
* @param __begin1 Begin iterator of first sequence.
|
||||
* @param __end1 End iterator of first sequence.
|
||||
* @param __begin2 Begin iterator of second sequence. Second __sequence
|
||||
* must have same length as first sequence.
|
||||
* @param __pred Find predicate.
|
||||
* @param __selector _Functionality (e. g. std::find_if(), std::equal(),...)
|
||||
* @return Place of finding in both sequences.
|
||||
*/
|
||||
template<typename _RAIter1,
|
||||
typename _RAIter2,
|
||||
typename _Pred,
|
||||
typename _Selector>
|
||||
std::pair<_RAIter1, _RAIter2>
|
||||
__find_template(_RAIter1 __begin1, _RAIter1 __end1,
|
||||
_RAIter2 __begin2, _Pred __pred,
|
||||
_Selector __selector, equal_split_tag)
|
||||
{
|
||||
_GLIBCXX_CALL(__end1 - __begin1)
|
||||
|
||||
typedef std::iterator_traits<_RAIter1> _TraitsType;
|
||||
typedef typename _TraitsType::difference_type _DifferenceType;
|
||||
typedef typename _TraitsType::value_type _ValueType;
|
||||
typedef std::iterator_traits<_RAIter1> _TraitsType;
|
||||
typedef typename _TraitsType::difference_type _DifferenceType;
|
||||
typedef typename _TraitsType::value_type _ValueType;
|
||||
|
||||
_DifferenceType __length = __end1 - __begin1;
|
||||
_DifferenceType __result = __length;
|
||||
_DifferenceType* __borders;
|
||||
_DifferenceType __length = __end1 - __begin1;
|
||||
_DifferenceType __result = __length;
|
||||
_DifferenceType* __borders;
|
||||
|
||||
omp_lock_t __result_lock;
|
||||
omp_init_lock(&__result_lock);
|
||||
omp_lock_t __result_lock;
|
||||
omp_init_lock(&__result_lock);
|
||||
|
||||
_ThreadIndex __num_threads = __get_max_threads();
|
||||
# pragma omp parallel num_threads(__num_threads)
|
||||
_ThreadIndex __num_threads = __get_max_threads();
|
||||
# pragma omp parallel num_threads(__num_threads)
|
||||
{
|
||||
# pragma omp single
|
||||
{
|
||||
__num_threads = omp_get_num_threads();
|
||||
__borders = new _DifferenceType[__num_threads + 1];
|
||||
equally_split(__length, __num_threads, __borders);
|
||||
} //single
|
||||
# pragma omp single
|
||||
{
|
||||
__num_threads = omp_get_num_threads();
|
||||
__borders = new _DifferenceType[__num_threads + 1];
|
||||
equally_split(__length, __num_threads, __borders);
|
||||
} //single
|
||||
|
||||
_ThreadIndex __iam = omp_get_thread_num();
|
||||
_DifferenceType __start = __borders[__iam],
|
||||
__stop = __borders[__iam + 1];
|
||||
_ThreadIndex __iam = omp_get_thread_num();
|
||||
_DifferenceType __start = __borders[__iam],
|
||||
__stop = __borders[__iam + 1];
|
||||
|
||||
_RAIter1 __i1 = __begin1 + __start;
|
||||
_RAIter2 __i2 = __begin2 + __start;
|
||||
for (_DifferenceType __pos = __start; __pos < __stop; ++__pos)
|
||||
{
|
||||
#pragma omp flush(__result)
|
||||
// Result has been set to something lower.
|
||||
if (__result < __pos)
|
||||
break;
|
||||
_RAIter1 __i1 = __begin1 + __start;
|
||||
_RAIter2 __i2 = __begin2 + __start;
|
||||
for (_DifferenceType __pos = __start; __pos < __stop; ++__pos)
|
||||
{
|
||||
# pragma omp flush(__result)
|
||||
// Result has been set to something lower.
|
||||
if (__result < __pos)
|
||||
break;
|
||||
|
||||
if (__selector(__i1, __i2, __pred))
|
||||
{
|
||||
omp_set_lock(&__result_lock);
|
||||
if (__pos < __result)
|
||||
__result = __pos;
|
||||
omp_unset_lock(&__result_lock);
|
||||
break;
|
||||
}
|
||||
++__i1;
|
||||
++__i2;
|
||||
}
|
||||
if (__selector(__i1, __i2, __pred))
|
||||
{
|
||||
omp_set_lock(&__result_lock);
|
||||
if (__pos < __result)
|
||||
__result = __pos;
|
||||
omp_unset_lock(&__result_lock);
|
||||
break;
|
||||
}
|
||||
++__i1;
|
||||
++__i2;
|
||||
}
|
||||
} //parallel
|
||||
|
||||
omp_destroy_lock(&__result_lock);
|
||||
delete[] __borders;
|
||||
omp_destroy_lock(&__result_lock);
|
||||
delete[] __borders;
|
||||
|
||||
return
|
||||
std::pair<_RAIter1, _RAIter2>(__begin1 + __result, __begin2 + __result);
|
||||
}
|
||||
return std::pair<_RAIter1, _RAIter2>(__begin1 + __result,
|
||||
__begin2 + __result);
|
||||
}
|
||||
|
||||
#endif
|
||||
|
||||
#if _GLIBCXX_FIND_GROWING_BLOCKS
|
||||
|
||||
/**
|
||||
* @brief Parallel std::find, growing block size variant.
|
||||
* @param __begin1 Begin iterator of first sequence.
|
||||
* @param __end1 End iterator of first sequence.
|
||||
* @param __begin2 Begin iterator of second sequence. Second __sequence
|
||||
* must have same length as first sequence.
|
||||
* @param __pred Find predicate.
|
||||
* @param __selector _Functionality (e. g. std::find_if (), std::equal(),...)
|
||||
* @return Place of finding in both sequences.
|
||||
* @see __gnu_parallel::_Settings::find_sequential_search_size
|
||||
* @see __gnu_parallel::_Settings::find_initial_block_size
|
||||
* @see __gnu_parallel::_Settings::find_maximum_block_size
|
||||
* @see __gnu_parallel::_Settings::find_increasing_factor
|
||||
*
|
||||
* There are two main differences between the growing blocks and
|
||||
* the constant-size blocks variants.
|
||||
* 1. For GB, the block size grows; for CSB, the block size is fixed.
|
||||
* 2. For GB, the blocks are allocated dynamically;
|
||||
* for CSB, the blocks are allocated in a predetermined manner,
|
||||
* namely spacial round-robin.
|
||||
*/
|
||||
template<typename _RAIter1,
|
||||
typename _RAIter2,
|
||||
typename _Pred,
|
||||
typename _Selector>
|
||||
std::pair<_RAIter1, _RAIter2>
|
||||
__find_template(_RAIter1 __begin1, _RAIter1 __end1,
|
||||
_RAIter2 __begin2, _Pred __pred, _Selector __selector,
|
||||
growing_blocks_tag)
|
||||
{
|
||||
_GLIBCXX_CALL(__end1 - __begin1)
|
||||
/**
|
||||
* @brief Parallel std::find, growing block size variant.
|
||||
* @param __begin1 Begin iterator of first sequence.
|
||||
* @param __end1 End iterator of first sequence.
|
||||
* @param __begin2 Begin iterator of second sequence. Second __sequence
|
||||
* must have same length as first sequence.
|
||||
* @param __pred Find predicate.
|
||||
* @param __selector _Functionality (e. g. std::find_if(), std::equal(),...)
|
||||
* @return Place of finding in both sequences.
|
||||
* @see __gnu_parallel::_Settings::find_sequential_search_size
|
||||
* @see __gnu_parallel::_Settings::find_initial_block_size
|
||||
* @see __gnu_parallel::_Settings::find_maximum_block_size
|
||||
* @see __gnu_parallel::_Settings::find_increasing_factor
|
||||
*
|
||||
* There are two main differences between the growing blocks and
|
||||
* the constant-size blocks variants.
|
||||
* 1. For GB, the block size grows; for CSB, the block size is fixed.
|
||||
* 2. For GB, the blocks are allocated dynamically;
|
||||
* for CSB, the blocks are allocated in a predetermined manner,
|
||||
* namely spacial round-robin.
|
||||
*/
|
||||
template<typename _RAIter1,
|
||||
typename _RAIter2,
|
||||
typename _Pred,
|
||||
typename _Selector>
|
||||
std::pair<_RAIter1, _RAIter2>
|
||||
__find_template(_RAIter1 __begin1, _RAIter1 __end1,
|
||||
_RAIter2 __begin2, _Pred __pred, _Selector __selector,
|
||||
growing_blocks_tag)
|
||||
{
|
||||
_GLIBCXX_CALL(__end1 - __begin1)
|
||||
|
||||
typedef std::iterator_traits<_RAIter1> _TraitsType;
|
||||
typedef typename _TraitsType::difference_type _DifferenceType;
|
||||
typedef typename _TraitsType::value_type _ValueType;
|
||||
typedef std::iterator_traits<_RAIter1> _TraitsType;
|
||||
typedef typename _TraitsType::difference_type _DifferenceType;
|
||||
typedef typename _TraitsType::value_type _ValueType;
|
||||
|
||||
const _Settings& __s = _Settings::get();
|
||||
const _Settings& __s = _Settings::get();
|
||||
|
||||
_DifferenceType __length = __end1 - __begin1;
|
||||
_DifferenceType __length = __end1 - __begin1;
|
||||
|
||||
_DifferenceType __sequential_search_size =
|
||||
std::min<_DifferenceType>(__length, __s.find_sequential_search_size);
|
||||
_DifferenceType
|
||||
__sequential_search_size = std::min<_DifferenceType>
|
||||
(__length, __s.find_sequential_search_size);
|
||||
|
||||
// Try it sequentially first.
|
||||
std::pair<_RAIter1, _RAIter2> __find_seq_result =
|
||||
__selector._M_sequential_algorithm(
|
||||
__begin1, __begin1 + __sequential_search_size, __begin2, __pred);
|
||||
// Try it sequentially first.
|
||||
std::pair<_RAIter1, _RAIter2>
|
||||
__find_seq_result = __selector._M_sequential_algorithm
|
||||
(__begin1, __begin1 + __sequential_search_size,
|
||||
__begin2, __pred);
|
||||
|
||||
if (__find_seq_result.first != (__begin1 + __sequential_search_size))
|
||||
return __find_seq_result;
|
||||
if (__find_seq_result.first != (__begin1 + __sequential_search_size))
|
||||
return __find_seq_result;
|
||||
|
||||
// Index of beginning of next free block (after sequential find).
|
||||
_DifferenceType __next_block_start = __sequential_search_size;
|
||||
_DifferenceType __result = __length;
|
||||
// Index of beginning of next free block (after sequential find).
|
||||
_DifferenceType __next_block_start = __sequential_search_size;
|
||||
_DifferenceType __result = __length;
|
||||
|
||||
omp_lock_t __result_lock;
|
||||
omp_init_lock(&__result_lock);
|
||||
omp_lock_t __result_lock;
|
||||
omp_init_lock(&__result_lock);
|
||||
|
||||
_ThreadIndex __num_threads = __get_max_threads();
|
||||
# pragma omp parallel shared(__result) num_threads(__num_threads)
|
||||
_ThreadIndex __num_threads = __get_max_threads();
|
||||
# pragma omp parallel shared(__result) num_threads(__num_threads)
|
||||
{
|
||||
# pragma omp single
|
||||
__num_threads = omp_get_num_threads();
|
||||
__num_threads = omp_get_num_threads();
|
||||
|
||||
// Not within first __k elements -> start parallel.
|
||||
_ThreadIndex __iam = omp_get_thread_num();
|
||||
// Not within first __k elements -> start parallel.
|
||||
_ThreadIndex __iam = omp_get_thread_num();
|
||||
|
||||
_DifferenceType __block_size = __s.find_initial_block_size;
|
||||
_DifferenceType __start =
|
||||
__fetch_and_add<_DifferenceType>(&__next_block_start, __block_size);
|
||||
_DifferenceType __block_size = __s.find_initial_block_size;
|
||||
_DifferenceType __start = __fetch_and_add<_DifferenceType>
|
||||
(&__next_block_start, __block_size);
|
||||
|
||||
// Get new block, update pointer to next block.
|
||||
_DifferenceType __stop =
|
||||
std::min<_DifferenceType>(__length, __start + __block_size);
|
||||
// Get new block, update pointer to next block.
|
||||
_DifferenceType __stop =
|
||||
std::min<_DifferenceType>(__length, __start + __block_size);
|
||||
|
||||
std::pair<_RAIter1, _RAIter2> __local_result;
|
||||
std::pair<_RAIter1, _RAIter2> __local_result;
|
||||
|
||||
while (__start < __length)
|
||||
{
|
||||
while (__start < __length)
|
||||
{
|
||||
# pragma omp flush(__result)
|
||||
// Get new value of result.
|
||||
if (__result < __start)
|
||||
{
|
||||
// No chance to find first element.
|
||||
break;
|
||||
}
|
||||
// Get new value of result.
|
||||
if (__result < __start)
|
||||
{
|
||||
// No chance to find first element.
|
||||
break;
|
||||
}
|
||||
|
||||
__local_result = __selector._M_sequential_algorithm(
|
||||
__begin1 + __start, __begin1 + __stop,
|
||||
__begin2 + __start, __pred);
|
||||
if (__local_result.first != (__begin1 + __stop))
|
||||
{
|
||||
omp_set_lock(&__result_lock);
|
||||
if ((__local_result.first - __begin1) < __result)
|
||||
{
|
||||
__result = __local_result.first - __begin1;
|
||||
__local_result = __selector._M_sequential_algorithm
|
||||
(__begin1 + __start, __begin1 + __stop,
|
||||
__begin2 + __start, __pred);
|
||||
|
||||
// Result cannot be in future blocks, stop algorithm.
|
||||
__fetch_and_add<_DifferenceType>(
|
||||
&__next_block_start, __length);
|
||||
}
|
||||
omp_unset_lock(&__result_lock);
|
||||
}
|
||||
if (__local_result.first != (__begin1 + __stop))
|
||||
{
|
||||
omp_set_lock(&__result_lock);
|
||||
if ((__local_result.first - __begin1) < __result)
|
||||
{
|
||||
__result = __local_result.first - __begin1;
|
||||
|
||||
__block_size = std::min<_DifferenceType>(
|
||||
__block_size * __s.find_increasing_factor,
|
||||
__s.find_maximum_block_size);
|
||||
// Result cannot be in future blocks, stop algorithm.
|
||||
__fetch_and_add<_DifferenceType>(&__next_block_start,
|
||||
__length);
|
||||
}
|
||||
omp_unset_lock(&__result_lock);
|
||||
}
|
||||
|
||||
// Get new block, update pointer to next block.
|
||||
__start =
|
||||
__fetch_and_add<_DifferenceType>(
|
||||
&__next_block_start, __block_size);
|
||||
__stop = ((__length < (__start + __block_size))
|
||||
? __length : (__start + __block_size));
|
||||
}
|
||||
__block_size = std::min<_DifferenceType>
|
||||
(__block_size * __s.find_increasing_factor,
|
||||
__s.find_maximum_block_size);
|
||||
|
||||
// Get new block, update pointer to next block.
|
||||
__start = __fetch_and_add<_DifferenceType>(&__next_block_start,
|
||||
__block_size);
|
||||
__stop = (__length < (__start + __block_size)
|
||||
? __length : (__start + __block_size));
|
||||
}
|
||||
} //parallel
|
||||
|
||||
omp_destroy_lock(&__result_lock);
|
||||
omp_destroy_lock(&__result_lock);
|
||||
|
||||
// Return iterator on found element.
|
||||
return
|
||||
std::pair<_RAIter1, _RAIter2>(__begin1 + __result, __begin2 + __result);
|
||||
}
|
||||
// Return iterator on found element.
|
||||
return
|
||||
std::pair<_RAIter1, _RAIter2>(__begin1 + __result,
|
||||
__begin2 + __result);
|
||||
}
|
||||
|
||||
#endif
|
||||
|
||||
#if _GLIBCXX_FIND_CONSTANT_SIZE_BLOCKS
|
||||
|
||||
/**
|
||||
* @brief Parallel std::find, constant block size variant.
|
||||
* @param __begin1 Begin iterator of first sequence.
|
||||
* @param __end1 End iterator of first sequence.
|
||||
* @param __begin2 Begin iterator of second sequence. Second __sequence
|
||||
* must have same length as first sequence.
|
||||
* @param __pred Find predicate.
|
||||
* @param __selector _Functionality (e. g. std::find_if (), std::equal(),...)
|
||||
* @return Place of finding in both sequences.
|
||||
* @see __gnu_parallel::_Settings::find_sequential_search_size
|
||||
* @see __gnu_parallel::_Settings::find_block_size
|
||||
* There are two main differences between the growing blocks and the
|
||||
* constant-size blocks variants.
|
||||
* 1. For GB, the block size grows; for CSB, the block size is fixed.
|
||||
* 2. For GB, the blocks are allocated dynamically; for CSB, the
|
||||
* blocks are allocated in a predetermined manner, namely spacial
|
||||
* round-robin.
|
||||
*/
|
||||
template<typename _RAIter1,
|
||||
typename _RAIter2,
|
||||
typename _Pred,
|
||||
typename _Selector>
|
||||
std::pair<_RAIter1, _RAIter2>
|
||||
__find_template(_RAIter1 __begin1, _RAIter1 __end1,
|
||||
_RAIter2 __begin2, _Pred __pred, _Selector __selector,
|
||||
constant_size_blocks_tag)
|
||||
{
|
||||
_GLIBCXX_CALL(__end1 - __begin1)
|
||||
typedef std::iterator_traits<_RAIter1> _TraitsType;
|
||||
typedef typename _TraitsType::difference_type _DifferenceType;
|
||||
typedef typename _TraitsType::value_type _ValueType;
|
||||
/**
|
||||
* @brief Parallel std::find, constant block size variant.
|
||||
* @param __begin1 Begin iterator of first sequence.
|
||||
* @param __end1 End iterator of first sequence.
|
||||
* @param __begin2 Begin iterator of second sequence. Second __sequence
|
||||
* must have same length as first sequence.
|
||||
* @param __pred Find predicate.
|
||||
* @param __selector _Functionality (e. g. std::find_if(), std::equal(),...)
|
||||
* @return Place of finding in both sequences.
|
||||
* @see __gnu_parallel::_Settings::find_sequential_search_size
|
||||
* @see __gnu_parallel::_Settings::find_block_size
|
||||
* There are two main differences between the growing blocks and the
|
||||
* constant-size blocks variants.
|
||||
* 1. For GB, the block size grows; for CSB, the block size is fixed.
|
||||
* 2. For GB, the blocks are allocated dynamically; for CSB, the
|
||||
* blocks are allocated in a predetermined manner, namely spacial
|
||||
* round-robin.
|
||||
*/
|
||||
template<typename _RAIter1,
|
||||
typename _RAIter2,
|
||||
typename _Pred,
|
||||
typename _Selector>
|
||||
std::pair<_RAIter1, _RAIter2>
|
||||
__find_template(_RAIter1 __begin1, _RAIter1 __end1,
|
||||
_RAIter2 __begin2, _Pred __pred, _Selector __selector,
|
||||
constant_size_blocks_tag)
|
||||
{
|
||||
_GLIBCXX_CALL(__end1 - __begin1)
|
||||
typedef std::iterator_traits<_RAIter1> _TraitsType;
|
||||
typedef typename _TraitsType::difference_type _DifferenceType;
|
||||
typedef typename _TraitsType::value_type _ValueType;
|
||||
|
||||
const _Settings& __s = _Settings::get();
|
||||
const _Settings& __s = _Settings::get();
|
||||
|
||||
_DifferenceType __length = __end1 - __begin1;
|
||||
_DifferenceType __length = __end1 - __begin1;
|
||||
|
||||
_DifferenceType __sequential_search_size = std::min<_DifferenceType>(
|
||||
__length, __s.find_sequential_search_size);
|
||||
_DifferenceType __sequential_search_size = std::min<_DifferenceType>
|
||||
(__length, __s.find_sequential_search_size);
|
||||
|
||||
// Try it sequentially first.
|
||||
std::pair<_RAIter1, _RAIter2> __find_seq_result =
|
||||
__selector._M_sequential_algorithm(
|
||||
__begin1, __begin1 + __sequential_search_size, __begin2, __pred);
|
||||
// Try it sequentially first.
|
||||
std::pair<_RAIter1, _RAIter2>
|
||||
__find_seq_result = __selector._M_sequential_algorithm
|
||||
(__begin1, __begin1 + __sequential_search_size, __begin2, __pred);
|
||||
|
||||
if (__find_seq_result.first != (__begin1 + __sequential_search_size))
|
||||
return __find_seq_result;
|
||||
if (__find_seq_result.first != (__begin1 + __sequential_search_size))
|
||||
return __find_seq_result;
|
||||
|
||||
_DifferenceType __result = __length;
|
||||
omp_lock_t __result_lock;
|
||||
omp_init_lock(&__result_lock);
|
||||
_DifferenceType __result = __length;
|
||||
omp_lock_t __result_lock;
|
||||
omp_init_lock(&__result_lock);
|
||||
|
||||
// Not within first __sequential_search_size elements -> start parallel.
|
||||
// Not within first __sequential_search_size elements -> start parallel.
|
||||
|
||||
_ThreadIndex __num_threads = __get_max_threads();
|
||||
# pragma omp parallel shared(__result) num_threads(__num_threads)
|
||||
_ThreadIndex __num_threads = __get_max_threads();
|
||||
# pragma omp parallel shared(__result) num_threads(__num_threads)
|
||||
{
|
||||
# pragma omp single
|
||||
__num_threads = omp_get_num_threads();
|
||||
__num_threads = omp_get_num_threads();
|
||||
|
||||
_ThreadIndex __iam = omp_get_thread_num();
|
||||
_DifferenceType __block_size = __s.find_initial_block_size;
|
||||
_ThreadIndex __iam = omp_get_thread_num();
|
||||
_DifferenceType __block_size = __s.find_initial_block_size;
|
||||
|
||||
// First element of thread's current iteration.
|
||||
_DifferenceType __iteration_start = __sequential_search_size;
|
||||
// First element of thread's current iteration.
|
||||
_DifferenceType __iteration_start = __sequential_search_size;
|
||||
|
||||
// Where to work (initialization).
|
||||
_DifferenceType __start = __iteration_start + __iam * __block_size;
|
||||
_DifferenceType __stop =
|
||||
std::min<_DifferenceType>(__length, __start + __block_size);
|
||||
// Where to work (initialization).
|
||||
_DifferenceType __start = __iteration_start + __iam * __block_size;
|
||||
_DifferenceType __stop = std::min<_DifferenceType>(__length,
|
||||
__start
|
||||
+ __block_size);
|
||||
|
||||
std::pair<_RAIter1, _RAIter2> __local_result;
|
||||
std::pair<_RAIter1, _RAIter2> __local_result;
|
||||
|
||||
while (__start < __length)
|
||||
{
|
||||
// Get new value of result.
|
||||
while (__start < __length)
|
||||
{
|
||||
// Get new value of result.
|
||||
# pragma omp flush(__result)
|
||||
// No chance to find first element.
|
||||
if (__result < __start)
|
||||
break;
|
||||
__local_result = __selector._M_sequential_algorithm(
|
||||
__begin1 + __start, __begin1 + __stop,
|
||||
__begin2 + __start, __pred);
|
||||
if (__local_result.first != (__begin1 + __stop))
|
||||
{
|
||||
omp_set_lock(&__result_lock);
|
||||
if ((__local_result.first - __begin1) < __result)
|
||||
__result = __local_result.first - __begin1;
|
||||
omp_unset_lock(&__result_lock);
|
||||
// Will not find better value in its interval.
|
||||
break;
|
||||
}
|
||||
// No chance to find first element.
|
||||
if (__result < __start)
|
||||
break;
|
||||
|
||||
__iteration_start += __num_threads * __block_size;
|
||||
__local_result = __selector._M_sequential_algorithm
|
||||
(__begin1 + __start, __begin1 + __stop,
|
||||
__begin2 + __start, __pred);
|
||||
|
||||
// Where to work.
|
||||
__start = __iteration_start + __iam * __block_size;
|
||||
__stop = std::min<_DifferenceType>(
|
||||
__length, __start + __block_size);
|
||||
}
|
||||
if (__local_result.first != (__begin1 + __stop))
|
||||
{
|
||||
omp_set_lock(&__result_lock);
|
||||
if ((__local_result.first - __begin1) < __result)
|
||||
__result = __local_result.first - __begin1;
|
||||
omp_unset_lock(&__result_lock);
|
||||
// Will not find better value in its interval.
|
||||
break;
|
||||
}
|
||||
|
||||
__iteration_start += __num_threads * __block_size;
|
||||
|
||||
// Where to work.
|
||||
__start = __iteration_start + __iam * __block_size;
|
||||
__stop = std::min<_DifferenceType>(__length,
|
||||
__start + __block_size);
|
||||
}
|
||||
} //parallel
|
||||
|
||||
omp_destroy_lock(&__result_lock);
|
||||
omp_destroy_lock(&__result_lock);
|
||||
|
||||
// Return iterator on found element.
|
||||
return
|
||||
std::pair<_RAIter1, _RAIter2>(__begin1 + __result, __begin2 + __result);
|
||||
}
|
||||
// Return iterator on found element.
|
||||
return std::pair<_RAIter1, _RAIter2>(__begin1 + __result,
|
||||
__begin2 + __result);
|
||||
}
|
||||
#endif
|
||||
} // end namespace
|
||||
|
||||
|
@ -103,12 +103,12 @@ namespace __gnu_parallel
|
||||
typename _Pred>
|
||||
std::pair<_RAIter1, _RAIter2>
|
||||
_M_sequential_algorithm(_RAIter1 __begin1,
|
||||
_RAIter1 __end1,
|
||||
_RAIter2 __begin2, _Pred __pred)
|
||||
_RAIter1 __end1,
|
||||
_RAIter2 __begin2, _Pred __pred)
|
||||
{
|
||||
// Passed end iterator is one short.
|
||||
_RAIter1 __spot = adjacent_find(__begin1, __end1 + 1,
|
||||
__pred, sequential_tag());
|
||||
__pred, sequential_tag());
|
||||
if (__spot == (__end1 + 1))
|
||||
__spot = __end1;
|
||||
return std::make_pair(__spot, __begin2);
|
||||
@ -141,56 +141,57 @@ namespace __gnu_parallel
|
||||
typename _Pred>
|
||||
std::pair<_RAIter1, _RAIter2>
|
||||
_M_sequential_algorithm(_RAIter1 __begin1,
|
||||
_RAIter1 __end1,
|
||||
_RAIter2 __begin2, _Pred __pred)
|
||||
{ return mismatch(__begin1, __end1, __begin2, __pred, sequential_tag());
|
||||
}
|
||||
_RAIter1 __end1,
|
||||
_RAIter2 __begin2, _Pred __pred)
|
||||
{ return mismatch(__begin1, __end1, __begin2,
|
||||
__pred, sequential_tag()); }
|
||||
};
|
||||
|
||||
|
||||
/** @brief Test predicate on several elements. */
|
||||
template<typename _FIterator>
|
||||
struct __find_first_of_selector : public __generic_find_selector
|
||||
{
|
||||
_FIterator _M_begin;
|
||||
_FIterator _M_end;
|
||||
struct __find_first_of_selector : public __generic_find_selector
|
||||
{
|
||||
_FIterator _M_begin;
|
||||
_FIterator _M_end;
|
||||
|
||||
explicit __find_first_of_selector(_FIterator __begin, _FIterator __end)
|
||||
: _M_begin(__begin), _M_end(__end) { }
|
||||
explicit __find_first_of_selector(_FIterator __begin,
|
||||
_FIterator __end)
|
||||
: _M_begin(__begin), _M_end(__end) { }
|
||||
|
||||
/** @brief Test on one position.
|
||||
* @param __i1 _Iterator on first sequence.
|
||||
* @param __i2 _Iterator on second sequence (unused).
|
||||
* @param __pred Find predicate. */
|
||||
template<typename _RAIter1, typename _RAIter2,
|
||||
typename _Pred>
|
||||
bool
|
||||
operator()(_RAIter1 __i1, _RAIter2 __i2, _Pred __pred)
|
||||
{
|
||||
for (_FIterator __pos_in_candidates = _M_begin;
|
||||
__pos_in_candidates != _M_end; ++__pos_in_candidates)
|
||||
if (__pred(*__i1, *__pos_in_candidates))
|
||||
return true;
|
||||
return false;
|
||||
}
|
||||
/** @brief Test on one position.
|
||||
* @param __i1 _Iterator on first sequence.
|
||||
* @param __i2 _Iterator on second sequence (unused).
|
||||
* @param __pred Find predicate. */
|
||||
template<typename _RAIter1, typename _RAIter2,
|
||||
typename _Pred>
|
||||
bool
|
||||
operator()(_RAIter1 __i1, _RAIter2 __i2, _Pred __pred)
|
||||
{
|
||||
for (_FIterator __pos_in_candidates = _M_begin;
|
||||
__pos_in_candidates != _M_end; ++__pos_in_candidates)
|
||||
if (__pred(*__i1, *__pos_in_candidates))
|
||||
return true;
|
||||
return false;
|
||||
}
|
||||
|
||||
/** @brief Corresponding sequential algorithm on a sequence.
|
||||
* @param __begin1 Begin iterator of first sequence.
|
||||
* @param __end1 End iterator of first sequence.
|
||||
* @param __begin2 Begin iterator of second sequence.
|
||||
* @param __pred Find predicate. */
|
||||
template<typename _RAIter1, typename _RAIter2,
|
||||
typename _Pred>
|
||||
std::pair<_RAIter1, _RAIter2>
|
||||
_M_sequential_algorithm(_RAIter1 __begin1,
|
||||
_RAIter1 __end1,
|
||||
_RAIter2 __begin2, _Pred __pred)
|
||||
{
|
||||
return std::make_pair(
|
||||
find_first_of(__begin1, __end1, _M_begin, _M_end, __pred,
|
||||
sequential_tag()), __begin2);
|
||||
}
|
||||
};
|
||||
/** @brief Corresponding sequential algorithm on a sequence.
|
||||
* @param __begin1 Begin iterator of first sequence.
|
||||
* @param __end1 End iterator of first sequence.
|
||||
* @param __begin2 Begin iterator of second sequence.
|
||||
* @param __pred Find predicate. */
|
||||
template<typename _RAIter1, typename _RAIter2,
|
||||
typename _Pred>
|
||||
std::pair<_RAIter1, _RAIter2>
|
||||
_M_sequential_algorithm(_RAIter1 __begin1,
|
||||
_RAIter1 __end1,
|
||||
_RAIter2 __begin2, _Pred __pred)
|
||||
{
|
||||
return std::make_pair(find_first_of(__begin1, __end1,
|
||||
_M_begin, _M_end, __pred,
|
||||
sequential_tag()), __begin2);
|
||||
}
|
||||
};
|
||||
}
|
||||
|
||||
#endif /* _GLIBCXX_PARALLEL_FIND_SELECTORS_H */
|
||||
|
@ -69,31 +69,21 @@ namespace __gnu_parallel
|
||||
_Parallelism __parallelism_tag)
|
||||
{
|
||||
if (__parallelism_tag == parallel_unbalanced)
|
||||
return __for_each_template_random_access_ed(__begin, __end, __user_op,
|
||||
__functionality, __reduction,
|
||||
__reduction_start,
|
||||
__output, __bound);
|
||||
return __for_each_template_random_access_ed
|
||||
(__begin, __end, __user_op, __functionality, __reduction,
|
||||
__reduction_start, __output, __bound);
|
||||
else if (__parallelism_tag == parallel_omp_loop)
|
||||
return __for_each_template_random_access_omp_loop(
|
||||
__begin, __end, __user_op,
|
||||
__functionality,
|
||||
__reduction,
|
||||
__reduction_start,
|
||||
__output, __bound);
|
||||
return __for_each_template_random_access_omp_loop
|
||||
(__begin, __end, __user_op, __functionality, __reduction,
|
||||
__reduction_start, __output, __bound);
|
||||
else if (__parallelism_tag == parallel_omp_loop_static)
|
||||
return __for_each_template_random_access_omp_loop(
|
||||
__begin, __end, __user_op,
|
||||
__functionality,
|
||||
__reduction,
|
||||
__reduction_start,
|
||||
__output, __bound);
|
||||
return __for_each_template_random_access_omp_loop
|
||||
(__begin, __end, __user_op, __functionality, __reduction,
|
||||
__reduction_start, __output, __bound);
|
||||
else //e. g. parallel_balanced
|
||||
return __for_each_template_random_access_workstealing(__begin, __end,
|
||||
__user_op,
|
||||
__functionality,
|
||||
__reduction,
|
||||
__reduction_start,
|
||||
__output, __bound);
|
||||
return __for_each_template_random_access_workstealing
|
||||
(__begin, __end, __user_op, __functionality, __reduction,
|
||||
__reduction_start, __output, __bound);
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -48,11 +48,11 @@ namespace __gnu_parallel
|
||||
template<typename _IIter>
|
||||
void
|
||||
__shrink_and_double(std::vector<_IIter>& __os_starts,
|
||||
size_t& __count_to_two, size_t& __range_length,
|
||||
const bool __make_twice)
|
||||
size_t& __count_to_two, size_t& __range_length,
|
||||
const bool __make_twice)
|
||||
{
|
||||
++__count_to_two;
|
||||
if (not __make_twice or __count_to_two < 2)
|
||||
if (!__make_twice || __count_to_two < 2)
|
||||
__shrink(__os_starts, __count_to_two, __range_length);
|
||||
else
|
||||
{
|
||||
@ -68,7 +68,7 @@ namespace __gnu_parallel
|
||||
template<typename _IIter>
|
||||
void
|
||||
__shrink(std::vector<_IIter>& __os_starts, size_t& __count_to_two,
|
||||
size_t& __range_length)
|
||||
size_t& __range_length)
|
||||
{
|
||||
for (typename std::vector<_IIter>::size_type __i = 0;
|
||||
__i <= (__os_starts.size() / 2); ++__i)
|
||||
@ -112,8 +112,8 @@ namespace __gnu_parallel
|
||||
|
||||
std::vector<_IIter> __os_starts(2 * __oversampling * __num_parts + 1);
|
||||
|
||||
__os_starts[0]= __begin;
|
||||
_IIter __prev = __begin, __it = __begin;
|
||||
__os_starts[0] = __begin;
|
||||
_IIter __prev = __begin, __it = __begin;
|
||||
size_t __dist_limit = 0, __dist = 0;
|
||||
size_t __cur = 1, __next = 1;
|
||||
size_t __range_length = 1;
|
||||
|
File diff suppressed because it is too large
Load Diff
@ -54,11 +54,10 @@ namespace __gnu_parallel
|
||||
typename _OutputIterator, typename _DifferenceTp,
|
||||
typename _Compare>
|
||||
_OutputIterator
|
||||
__merge_advance_usual(_RAIter1& __begin1,
|
||||
_RAIter1 __end1,
|
||||
_RAIter2& __begin2,
|
||||
_RAIter2 __end2, _OutputIterator __target,
|
||||
_DifferenceTp __max_length, _Compare __comp)
|
||||
__merge_advance_usual(_RAIter1& __begin1, _RAIter1 __end1,
|
||||
_RAIter2& __begin2, _RAIter2 __end2,
|
||||
_OutputIterator __target,
|
||||
_DifferenceTp __max_length, _Compare __comp)
|
||||
{
|
||||
typedef _DifferenceTp _DifferenceType;
|
||||
while (__begin1 != __end1 && __begin2 != __end2 && __max_length > 0)
|
||||
@ -103,12 +102,10 @@ namespace __gnu_parallel
|
||||
typename _OutputIterator, typename _DifferenceTp,
|
||||
typename _Compare>
|
||||
_OutputIterator
|
||||
__merge_advance_movc(_RAIter1& __begin1,
|
||||
_RAIter1 __end1,
|
||||
_RAIter2& __begin2,
|
||||
_RAIter2 __end2,
|
||||
_OutputIterator __target,
|
||||
_DifferenceTp __max_length, _Compare __comp)
|
||||
__merge_advance_movc(_RAIter1& __begin1, _RAIter1 __end1,
|
||||
_RAIter2& __begin2, _RAIter2 __end2,
|
||||
_OutputIterator __target,
|
||||
_DifferenceTp __max_length, _Compare __comp)
|
||||
{
|
||||
typedef _DifferenceTp _DifferenceType;
|
||||
typedef typename std::iterator_traits<_RAIter1>::value_type
|
||||
@ -172,14 +169,14 @@ namespace __gnu_parallel
|
||||
typename _Compare>
|
||||
inline _OutputIterator
|
||||
__merge_advance(_RAIter1& __begin1, _RAIter1 __end1,
|
||||
_RAIter2& __begin2, _RAIter2 __end2,
|
||||
_OutputIterator __target, _DifferenceTp __max_length,
|
||||
_Compare __comp)
|
||||
_RAIter2& __begin2, _RAIter2 __end2,
|
||||
_OutputIterator __target, _DifferenceTp __max_length,
|
||||
_Compare __comp)
|
||||
{
|
||||
_GLIBCXX_CALL(__max_length)
|
||||
|
||||
return __merge_advance_movc(__begin1, __end1, __begin2, __end2, __target,
|
||||
__max_length, __comp);
|
||||
return __merge_advance_movc(__begin1, __end1, __begin2, __end2,
|
||||
__target, __max_length, __comp);
|
||||
}
|
||||
|
||||
/** @brief Merge routine fallback to sequential in case the
|
||||
@ -195,17 +192,15 @@ namespace __gnu_parallel
|
||||
template<typename _RAIter1, typename _RAIter2,
|
||||
typename _RAIter3, typename _Compare>
|
||||
inline _RAIter3
|
||||
__parallel_merge_advance(_RAIter1& __begin1,
|
||||
_RAIter1 __end1,
|
||||
_RAIter2& __begin2,
|
||||
// different iterators, parallel implementation
|
||||
// not available
|
||||
_RAIter2 __end2,
|
||||
_RAIter3 __target, typename
|
||||
std::iterator_traits<_RAIter1>::
|
||||
difference_type __max_length, _Compare __comp)
|
||||
__parallel_merge_advance(_RAIter1& __begin1, _RAIter1 __end1,
|
||||
_RAIter2& __begin2,
|
||||
// different iterators, parallel implementation
|
||||
// not available
|
||||
_RAIter2 __end2, _RAIter3 __target, typename
|
||||
std::iterator_traits<_RAIter1>::
|
||||
difference_type __max_length, _Compare __comp)
|
||||
{ return __merge_advance(__begin1, __end1, __begin2, __end2, __target,
|
||||
__max_length, __comp); }
|
||||
__max_length, __comp); }
|
||||
|
||||
/** @brief Parallel merge routine being able to merge only the @__c
|
||||
* __max_length smallest elements.
|
||||
@ -225,13 +220,11 @@ namespace __gnu_parallel
|
||||
template<typename _RAIter1, typename _RAIter3,
|
||||
typename _Compare>
|
||||
inline _RAIter3
|
||||
__parallel_merge_advance(_RAIter1& __begin1,
|
||||
_RAIter1 __end1,
|
||||
_RAIter1& __begin2,
|
||||
_RAIter1 __end2,
|
||||
_RAIter3 __target, typename
|
||||
std::iterator_traits<_RAIter1>::
|
||||
difference_type __max_length, _Compare __comp)
|
||||
__parallel_merge_advance(_RAIter1& __begin1, _RAIter1 __end1,
|
||||
_RAIter1& __begin2, _RAIter1 __end2,
|
||||
_RAIter3 __target, typename
|
||||
std::iterator_traits<_RAIter1>::
|
||||
difference_type __max_length, _Compare __comp)
|
||||
{
|
||||
typedef typename
|
||||
std::iterator_traits<_RAIter1>::value_type _ValueType;
|
||||
@ -242,17 +235,14 @@ namespace __gnu_parallel
|
||||
typedef typename std::pair<_RAIter1, _RAIter1>
|
||||
_IteratorPair;
|
||||
|
||||
_IteratorPair
|
||||
seqs[2] = { std::make_pair(__begin1, __end1),
|
||||
std::make_pair(__begin2, __end2) };
|
||||
_RAIter3
|
||||
__target_end = parallel_multiway_merge
|
||||
< /* __stable = */ true, /* __sentinels = */ false>(
|
||||
seqs, seqs + 2, __target,
|
||||
multiway_merge_exact_splitting
|
||||
< /* __stable = */ true, _IteratorPair*,
|
||||
_Compare, _DifferenceType1>,
|
||||
__max_length, __comp, omp_get_max_threads());
|
||||
_IteratorPair __seqs[2] = { std::make_pair(__begin1, __end1),
|
||||
std::make_pair(__begin2, __end2) };
|
||||
_RAIter3 __target_end = parallel_multiway_merge
|
||||
< /* __stable = */ true, /* __sentinels = */ false>
|
||||
(__seqs, __seqs + 2, __target, multiway_merge_exact_splitting
|
||||
< /* __stable = */ true, _IteratorPair*,
|
||||
_Compare, _DifferenceType1>, __max_length, __comp,
|
||||
omp_get_max_threads());
|
||||
|
||||
return __target_end;
|
||||
}
|
||||
|
@ -53,8 +53,8 @@ namespace __gnu_parallel
|
||||
/** @brief Compare __a pair of types lexicographically, ascending. */
|
||||
template<typename _T1, typename _T2, typename _Compare>
|
||||
class _Lexicographic
|
||||
: public std::binary_function<
|
||||
std::pair<_T1, _T2>, std::pair<_T1, _T2>, bool>
|
||||
: public std::binary_function<std::pair<_T1, _T2>,
|
||||
std::pair<_T1, _T2>, bool>
|
||||
{
|
||||
private:
|
||||
_Compare& _M_comp;
|
||||
@ -142,19 +142,19 @@ namespace __gnu_parallel
|
||||
|
||||
// Number of sequences, number of elements in total (possibly
|
||||
// including padding).
|
||||
_DifferenceType __m = std::distance(__begin_seqs, __end_seqs), __N = 0,
|
||||
_DifferenceType __m = std::distance(__begin_seqs, __end_seqs), __nn = 0,
|
||||
__nmax, __n, __r;
|
||||
|
||||
for (int __i = 0; __i < __m; __i++)
|
||||
{
|
||||
__N += std::distance(__begin_seqs[__i].first,
|
||||
__nn += std::distance(__begin_seqs[__i].first,
|
||||
__begin_seqs[__i].second);
|
||||
_GLIBCXX_PARALLEL_ASSERT(
|
||||
std::distance(__begin_seqs[__i].first,
|
||||
__begin_seqs[__i].second) > 0);
|
||||
}
|
||||
|
||||
if (__rank == __N)
|
||||
if (__rank == __nn)
|
||||
{
|
||||
for (int __i = 0; __i < __m; __i++)
|
||||
__begin_offsets[__i] = __begin_seqs[__i].second; // Very end.
|
||||
@ -163,9 +163,9 @@ namespace __gnu_parallel
|
||||
}
|
||||
|
||||
_GLIBCXX_PARALLEL_ASSERT(__m != 0);
|
||||
_GLIBCXX_PARALLEL_ASSERT(__N != 0);
|
||||
_GLIBCXX_PARALLEL_ASSERT(__nn != 0);
|
||||
_GLIBCXX_PARALLEL_ASSERT(__rank >= 0);
|
||||
_GLIBCXX_PARALLEL_ASSERT(__rank < __N);
|
||||
_GLIBCXX_PARALLEL_ASSERT(__rank < __nn);
|
||||
|
||||
_DifferenceType* __ns = new _DifferenceType[__m];
|
||||
_DifferenceType* __a = new _DifferenceType[__m];
|
||||
@ -401,14 +401,14 @@ namespace __gnu_parallel
|
||||
// Number of sequences, number of elements in total (possibly
|
||||
// including padding).
|
||||
_DifferenceType __m = std::distance(__begin_seqs, __end_seqs);
|
||||
_DifferenceType __N = 0;
|
||||
_DifferenceType __nn = 0;
|
||||
_DifferenceType __nmax, __n, __r;
|
||||
|
||||
for (int __i = 0; __i < __m; __i++)
|
||||
__N += std::distance(__begin_seqs[__i].first,
|
||||
__begin_seqs[__i].second);
|
||||
__nn += std::distance(__begin_seqs[__i].first,
|
||||
__begin_seqs[__i].second);
|
||||
|
||||
if (__m == 0 || __N == 0 || __rank < 0 || __rank >= __N)
|
||||
if (__m == 0 || __nn == 0 || __rank < 0 || __rank >= __nn)
|
||||
{
|
||||
// result undefined if there is no data or __rank is outside bounds
|
||||
throw std::exception();
|
||||
@ -433,7 +433,7 @@ namespace __gnu_parallel
|
||||
|
||||
// Pad all lists to this length, at least as long as any ns[__i],
|
||||
// equality iff __nmax = 2^__k - 1
|
||||
__l = pow2(__r) - 1;
|
||||
__l = __round_up_to_pow2(__r) - 1;
|
||||
|
||||
for (int __i = 0; __i < __m; ++__i)
|
||||
{
|
||||
|
File diff suppressed because it is too large
Load Diff
@ -125,8 +125,7 @@ namespace __gnu_parallel
|
||||
/** @brief Split by exact splitting. */
|
||||
template<typename _RAIter, typename _Compare,
|
||||
typename _SortingPlacesIterator>
|
||||
struct _SplitConsistently<true, _RAIter,
|
||||
_Compare, _SortingPlacesIterator>
|
||||
struct _SplitConsistently<true, _RAIter, _Compare, _SortingPlacesIterator>
|
||||
{
|
||||
void
|
||||
operator()(const _ThreadIndex __iam,
|
||||
@ -140,19 +139,19 @@ namespace __gnu_parallel
|
||||
|
||||
std::vector<std::pair<_SortingPlacesIterator,
|
||||
_SortingPlacesIterator> >
|
||||
seqs(__sd->_M_num_threads);
|
||||
__seqs(__sd->_M_num_threads);
|
||||
for (_ThreadIndex __s = 0; __s < __sd->_M_num_threads; __s++)
|
||||
seqs[__s] = std::make_pair(__sd->_M_temporary[__s],
|
||||
__sd->_M_temporary[__s]
|
||||
+ (__sd->_M_starts[__s + 1]
|
||||
- __sd->_M_starts[__s]));
|
||||
__seqs[__s] = std::make_pair(__sd->_M_temporary[__s],
|
||||
__sd->_M_temporary[__s]
|
||||
+ (__sd->_M_starts[__s + 1]
|
||||
- __sd->_M_starts[__s]));
|
||||
|
||||
std::vector<_SortingPlacesIterator> _M_offsets(__sd->_M_num_threads);
|
||||
std::vector<_SortingPlacesIterator> __offsets(__sd->_M_num_threads);
|
||||
|
||||
// if not last thread
|
||||
if (__iam < __sd->_M_num_threads - 1)
|
||||
multiseq_partition(seqs.begin(), seqs.end(),
|
||||
__sd->_M_starts[__iam + 1], _M_offsets.begin(),
|
||||
multiseq_partition(__seqs.begin(), __seqs.end(),
|
||||
__sd->_M_starts[__iam + 1], __offsets.begin(),
|
||||
__comp);
|
||||
|
||||
for (int __seq = 0; __seq < __sd->_M_num_threads; __seq++)
|
||||
@ -160,7 +159,7 @@ namespace __gnu_parallel
|
||||
// for each sequence
|
||||
if (__iam < (__sd->_M_num_threads - 1))
|
||||
__sd->_M_pieces[__iam][__seq]._M_end
|
||||
= _M_offsets[__seq] - seqs[__seq].first;
|
||||
= __offsets[__seq] - __seqs[__seq].first;
|
||||
else
|
||||
// very end of this sequence
|
||||
__sd->_M_pieces[__iam][__seq]._M_end =
|
||||
@ -185,8 +184,7 @@ namespace __gnu_parallel
|
||||
/** @brief Split by sampling. */
|
||||
template<typename _RAIter, typename _Compare,
|
||||
typename _SortingPlacesIterator>
|
||||
struct _SplitConsistently<false, _RAIter, _Compare,
|
||||
_SortingPlacesIterator>
|
||||
struct _SplitConsistently<false, _RAIter, _Compare, _SortingPlacesIterator>
|
||||
{
|
||||
void
|
||||
operator()(const _ThreadIndex __iam,
|
||||
@ -282,10 +280,8 @@ namespace __gnu_parallel
|
||||
const _RAIter& __target,
|
||||
_Compare& __comp,
|
||||
_DiffType __length_am) const
|
||||
{
|
||||
stable_multiway_merge(__seqs_begin, __seqs_end, __target, __length_am,
|
||||
__comp, sequential_tag());
|
||||
}
|
||||
{ stable_multiway_merge(__seqs_begin, __seqs_end, __target,
|
||||
__length_am, __comp, sequential_tag()); }
|
||||
};
|
||||
|
||||
template<typename Seq_RAIter, typename _RAIter,
|
||||
@ -298,10 +294,8 @@ namespace __gnu_parallel
|
||||
const _RAIter& __target,
|
||||
_Compare& __comp,
|
||||
_DiffType __length_am) const
|
||||
{
|
||||
multiway_merge(__seqs_begin, __seqs_end, __target, __length_am, __comp,
|
||||
sequential_tag());
|
||||
}
|
||||
{ multiway_merge(__seqs_begin, __seqs_end, __target, __length_am,
|
||||
__comp, sequential_tag()); }
|
||||
};
|
||||
|
||||
/** @brief PMWMS code executed by each thread.
|
||||
@ -321,8 +315,8 @@ namespace __gnu_parallel
|
||||
_ThreadIndex __iam = omp_get_thread_num();
|
||||
|
||||
// Length of this thread's chunk, before merging.
|
||||
_DifferenceType __length_local
|
||||
= __sd->_M_starts[__iam + 1] - __sd->_M_starts[__iam];
|
||||
_DifferenceType __length_local =
|
||||
__sd->_M_starts[__iam + 1] - __sd->_M_starts[__iam];
|
||||
|
||||
// Sort in temporary storage, leave space for sentinel.
|
||||
|
||||
@ -350,8 +344,7 @@ namespace __gnu_parallel
|
||||
|
||||
_DifferenceType __num_samples =
|
||||
_Settings::get().sort_mwms_oversampling * __sd->_M_num_threads - 1;
|
||||
_SplitConsistently
|
||||
<__exact, _RAIter, _Compare, _SortingPlacesIterator>()
|
||||
_SplitConsistently<__exact, _RAIter, _Compare, _SortingPlacesIterator>()
|
||||
(__iam, __sd, __comp, __num_samples);
|
||||
|
||||
// Offset from __target __begin, __length after merging.
|
||||
@ -364,26 +357,24 @@ namespace __gnu_parallel
|
||||
}
|
||||
|
||||
typedef std::vector<
|
||||
std::pair<_SortingPlacesIterator, _SortingPlacesIterator> >
|
||||
std::pair<_SortingPlacesIterator, _SortingPlacesIterator> >
|
||||
_SeqVector;
|
||||
_SeqVector seqs(__sd->_M_num_threads);
|
||||
_SeqVector __seqs(__sd->_M_num_threads);
|
||||
|
||||
for (int __s = 0; __s < __sd->_M_num_threads; ++__s)
|
||||
{
|
||||
seqs[__s] =
|
||||
std::make_pair
|
||||
(__sd->_M_temporary[__s] + __sd->_M_pieces[__iam][__s]._M_begin,
|
||||
__sd->_M_temporary[__s] + __sd->_M_pieces[__iam][__s]._M_end);
|
||||
__seqs[__s] =
|
||||
std::make_pair(__sd->_M_temporary[__s]
|
||||
+ __sd->_M_pieces[__iam][__s]._M_begin,
|
||||
__sd->_M_temporary[__s]
|
||||
+ __sd->_M_pieces[__iam][__s]._M_end);
|
||||
}
|
||||
|
||||
__possibly_stable_multiway_merge<
|
||||
__stable,
|
||||
typename _SeqVector::iterator,
|
||||
_RAIter,
|
||||
_Compare, _DifferenceType>()
|
||||
(seqs.begin(), seqs.end(),
|
||||
__sd->_M_source + __offset, __comp,
|
||||
__length_am);
|
||||
__stable, typename _SeqVector::iterator,
|
||||
_RAIter, _Compare, _DifferenceType>()(__seqs.begin(), __seqs.end(),
|
||||
__sd->_M_source + __offset, __comp,
|
||||
__length_am);
|
||||
|
||||
# pragma omp barrier
|
||||
|
||||
@ -421,7 +412,7 @@ namespace __gnu_parallel
|
||||
|
||||
// shared variables
|
||||
_PMWMSSortingData<_RAIter> __sd;
|
||||
_DifferenceType* _M_starts;
|
||||
_DifferenceType* __starts;
|
||||
|
||||
# pragma omp parallel num_threads(__num_threads)
|
||||
{
|
||||
@ -450,30 +441,29 @@ namespace __gnu_parallel
|
||||
= new std::vector<_Piece<_DifferenceType> >[__num_threads];
|
||||
for (int __s = 0; __s < __num_threads; ++__s)
|
||||
__sd._M_pieces[__s].resize(__num_threads);
|
||||
_M_starts = __sd._M_starts
|
||||
= new _DifferenceType[__num_threads + 1];
|
||||
__starts = __sd._M_starts = new _DifferenceType[__num_threads + 1];
|
||||
|
||||
_DifferenceType __chunk_length = __n / __num_threads;
|
||||
_DifferenceType __split = __n % __num_threads;
|
||||
_DifferenceType __pos = 0;
|
||||
for (int __i = 0; __i < __num_threads; ++__i)
|
||||
{
|
||||
_M_starts[__i] = __pos;
|
||||
__pos += (__i < __split)
|
||||
? (__chunk_length + 1) : __chunk_length;
|
||||
__starts[__i] = __pos;
|
||||
__pos += ((__i < __split)
|
||||
? (__chunk_length + 1) : __chunk_length);
|
||||
}
|
||||
_M_starts[__num_threads] = __pos;
|
||||
__starts[__num_threads] = __pos;
|
||||
} //single
|
||||
|
||||
// Now sort in parallel.
|
||||
parallel_sort_mwms_pu<__stable, __exact>(&__sd, __comp);
|
||||
} //parallel
|
||||
|
||||
delete[] _M_starts;
|
||||
delete[] __starts;
|
||||
delete[] __sd._M_temporary;
|
||||
|
||||
if (!__exact)
|
||||
::operator delete(__sd._M_samples);
|
||||
::operator delete(__sd._M_samples);
|
||||
|
||||
delete[] __sd._M_offsets;
|
||||
delete[] __sd._M_pieces;
|
||||
|
@ -69,7 +69,7 @@ namespace __parallel
|
||||
__accumulate_switch(_IIter __begin, _IIter __end,
|
||||
_Tp __init, _IteratorTag)
|
||||
{ return accumulate(__begin, __end, __init,
|
||||
__gnu_parallel::sequential_tag()); }
|
||||
__gnu_parallel::sequential_tag()); }
|
||||
|
||||
template<typename _IIter, typename _Tp, typename _BinaryOperation,
|
||||
typename _IteratorTag>
|
||||
|
@ -74,8 +74,8 @@ namespace __gnu_parallel
|
||||
_DifferenceType;
|
||||
|
||||
_DifferenceType __length = __end - __begin;
|
||||
_ThreadIndex __num_threads =
|
||||
__gnu_parallel::min<_DifferenceType>(__get_max_threads(), __length);
|
||||
_ThreadIndex __num_threads = __gnu_parallel::min<_DifferenceType>
|
||||
(__get_max_threads(), __length);
|
||||
|
||||
_Result *__thread_results;
|
||||
|
||||
@ -94,8 +94,8 @@ namespace __gnu_parallel
|
||||
|
||||
#pragma omp for schedule(dynamic, _Settings::get().workstealing_chunk_size)
|
||||
for (_DifferenceType __pos = 0; __pos < __length; ++__pos)
|
||||
__thread_results[__iam] =
|
||||
__r(__thread_results[__iam], __f(__o, __begin+__pos));
|
||||
__thread_results[__iam] = __r(__thread_results[__iam],
|
||||
__f(__o, __begin+__pos));
|
||||
} //parallel
|
||||
|
||||
for (_ThreadIndex __i = 0; __i < __num_threads; ++__i)
|
||||
|
@ -74,8 +74,8 @@ namespace __gnu_parallel
|
||||
_DifferenceType;
|
||||
|
||||
_DifferenceType __length = __end - __begin;
|
||||
_ThreadIndex __num_threads =
|
||||
std::min<_DifferenceType>(__get_max_threads(), __length);
|
||||
_ThreadIndex __num_threads = std::min<_DifferenceType>
|
||||
(__get_max_threads(), __length);
|
||||
|
||||
_Result *__thread_results;
|
||||
|
||||
|
@ -75,25 +75,24 @@ namespace __gnu_parallel
|
||||
_Result *__thread_results;
|
||||
bool* __constructed;
|
||||
|
||||
_ThreadIndex __num_threads =
|
||||
__gnu_parallel::min<_DifferenceType>(__get_max_threads(), __length);
|
||||
_ThreadIndex __num_threads = __gnu_parallel::min<_DifferenceType>
|
||||
(__get_max_threads(), __length);
|
||||
|
||||
# pragma omp parallel num_threads(__num_threads)
|
||||
{
|
||||
# pragma omp single
|
||||
{
|
||||
__num_threads = omp_get_num_threads();
|
||||
__thread_results =
|
||||
static_cast<_Result*>(::operator new(__num_threads
|
||||
* sizeof(_Result)));
|
||||
__thread_results = static_cast<_Result*>
|
||||
(::operator new(__num_threads * sizeof(_Result)));
|
||||
__constructed = new bool[__num_threads];
|
||||
}
|
||||
|
||||
_ThreadIndex __iam = omp_get_thread_num();
|
||||
|
||||
// Neutral element.
|
||||
_Result* __reduct =
|
||||
static_cast<_Result*>(::operator new(sizeof(_Result)));
|
||||
_Result* __reduct = static_cast<_Result*>
|
||||
(::operator new(sizeof(_Result)));
|
||||
|
||||
_DifferenceType
|
||||
__start = equally_split_point(__length, __num_threads, __iam),
|
||||
|
@ -149,9 +149,10 @@ namespace __gnu_parallel
|
||||
if (__iam == 0)
|
||||
{
|
||||
*__result = *__begin;
|
||||
__parallel_partial_sum_basecase(
|
||||
__begin + 1, __begin + __borders[1], __result + 1,
|
||||
__bin_op, *__begin);
|
||||
__parallel_partial_sum_basecase(__begin + 1,
|
||||
__begin + __borders[1],
|
||||
__result + 1,
|
||||
__bin_op, *__begin);
|
||||
::new(&(__sums[__iam])) _ValueType(*(__result + __borders[1] - 1));
|
||||
}
|
||||
else
|
||||
@ -168,7 +169,7 @@ namespace __gnu_parallel
|
||||
|
||||
# pragma omp single
|
||||
__parallel_partial_sum_basecase(__sums + 1, __sums + __num_threads,
|
||||
__sums + 1, __bin_op, __sums[0]);
|
||||
__sums + 1, __bin_op, __sums[0]);
|
||||
|
||||
# pragma omp barrier
|
||||
|
||||
|
@ -44,387 +44,391 @@
|
||||
|
||||
namespace __gnu_parallel
|
||||
{
|
||||
/** @brief Parallel implementation of std::partition.
|
||||
* @param __begin Begin iterator of input sequence to split.
|
||||
* @param __end End iterator of input sequence to split.
|
||||
* @param __pred Partition predicate, possibly including some kind of pivot.
|
||||
* @param __num_threads Maximum number of threads to use for this task.
|
||||
* @return Number of elements not fulfilling the predicate. */
|
||||
template<typename _RAIter, typename _Predicate>
|
||||
typename std::iterator_traits<_RAIter>::difference_type
|
||||
__parallel_partition(_RAIter __begin, _RAIter __end,
|
||||
_Predicate __pred, _ThreadIndex __num_threads)
|
||||
{
|
||||
typedef std::iterator_traits<_RAIter> _TraitsType;
|
||||
typedef typename _TraitsType::value_type _ValueType;
|
||||
typedef typename _TraitsType::difference_type _DifferenceType;
|
||||
/** @brief Parallel implementation of std::partition.
|
||||
* @param __begin Begin iterator of input sequence to split.
|
||||
* @param __end End iterator of input sequence to split.
|
||||
* @param __pred Partition predicate, possibly including some kind
|
||||
* of pivot.
|
||||
* @param __num_threads Maximum number of threads to use for this task.
|
||||
* @return Number of elements not fulfilling the predicate. */
|
||||
template<typename _RAIter, typename _Predicate>
|
||||
typename std::iterator_traits<_RAIter>::difference_type
|
||||
__parallel_partition(_RAIter __begin, _RAIter __end,
|
||||
_Predicate __pred, _ThreadIndex __num_threads)
|
||||
{
|
||||
typedef std::iterator_traits<_RAIter> _TraitsType;
|
||||
typedef typename _TraitsType::value_type _ValueType;
|
||||
typedef typename _TraitsType::difference_type _DifferenceType;
|
||||
|
||||
_DifferenceType __n = __end - __begin;
|
||||
_DifferenceType __n = __end - __begin;
|
||||
|
||||
_GLIBCXX_CALL(__n)
|
||||
_GLIBCXX_CALL(__n)
|
||||
|
||||
const _Settings& __s = _Settings::get();
|
||||
const _Settings& __s = _Settings::get();
|
||||
|
||||
// Shared.
|
||||
_GLIBCXX_VOLATILE _DifferenceType __left = 0, __right = __n - 1;
|
||||
_GLIBCXX_VOLATILE _DifferenceType __leftover_left, __leftover_right;
|
||||
_GLIBCXX_VOLATILE _DifferenceType __leftnew, __rightnew;
|
||||
// Shared.
|
||||
_GLIBCXX_VOLATILE _DifferenceType __left = 0, __right = __n - 1;
|
||||
_GLIBCXX_VOLATILE _DifferenceType __leftover_left, __leftover_right;
|
||||
_GLIBCXX_VOLATILE _DifferenceType __leftnew, __rightnew;
|
||||
|
||||
bool* __reserved_left = NULL, * __reserved_right = NULL;
|
||||
bool* __reserved_left = NULL, * __reserved_right = NULL;
|
||||
|
||||
_DifferenceType __chunk_size;
|
||||
_DifferenceType __chunk_size;
|
||||
|
||||
omp_lock_t __result_lock;
|
||||
omp_init_lock(&__result_lock);
|
||||
omp_lock_t __result_lock;
|
||||
omp_init_lock(&__result_lock);
|
||||
|
||||
//at least two chunks per thread
|
||||
if(__right - __left + 1 >= 2 * __num_threads * __chunk_size)
|
||||
# pragma omp parallel num_threads(__num_threads)
|
||||
{
|
||||
# pragma omp single
|
||||
{
|
||||
__num_threads = omp_get_num_threads();
|
||||
__reserved_left = new bool[__num_threads];
|
||||
__reserved_right = new bool[__num_threads];
|
||||
//at least two chunks per thread
|
||||
if (__right - __left + 1 >= 2 * __num_threads * __chunk_size)
|
||||
# pragma omp parallel num_threads(__num_threads)
|
||||
{
|
||||
# pragma omp single
|
||||
{
|
||||
__num_threads = omp_get_num_threads();
|
||||
__reserved_left = new bool[__num_threads];
|
||||
__reserved_right = new bool[__num_threads];
|
||||
|
||||
if (__s.partition_chunk_share > 0.0)
|
||||
__chunk_size = std::max<_DifferenceType>(
|
||||
__s.partition_chunk_size,
|
||||
(double)__n * __s.partition_chunk_share /
|
||||
(double)__num_threads);
|
||||
else
|
||||
__chunk_size = __s.partition_chunk_size;
|
||||
}
|
||||
if (__s.partition_chunk_share > 0.0)
|
||||
__chunk_size = std::max<_DifferenceType>
|
||||
(__s.partition_chunk_size, (double)__n
|
||||
* __s.partition_chunk_share / (double)__num_threads);
|
||||
else
|
||||
__chunk_size = __s.partition_chunk_size;
|
||||
}
|
||||
|
||||
while (__right - __left + 1 >= 2 * __num_threads * __chunk_size)
|
||||
{
|
||||
# pragma omp single
|
||||
{
|
||||
_DifferenceType __num_chunks
|
||||
= (__right - __left + 1) / __chunk_size;
|
||||
while (__right - __left + 1 >= 2 * __num_threads * __chunk_size)
|
||||
{
|
||||
# pragma omp single
|
||||
{
|
||||
_DifferenceType __num_chunks = ((__right - __left + 1)
|
||||
/ __chunk_size);
|
||||
|
||||
for (int __r = 0; __r < __num_threads; ++__r)
|
||||
{
|
||||
__reserved_left[__r] = false;
|
||||
__reserved_right[__r] = false;
|
||||
}
|
||||
__leftover_left = 0;
|
||||
__leftover_right = 0;
|
||||
} //implicit barrier
|
||||
for (int __r = 0; __r < __num_threads; ++__r)
|
||||
{
|
||||
__reserved_left[__r] = false;
|
||||
__reserved_right[__r] = false;
|
||||
}
|
||||
__leftover_left = 0;
|
||||
__leftover_right = 0;
|
||||
} //implicit barrier
|
||||
|
||||
// Private.
|
||||
_DifferenceType __thread_left, __thread_left_border,
|
||||
__thread_right, __thread_right_border;
|
||||
__thread_left = __left + 1;
|
||||
// Private.
|
||||
_DifferenceType __thread_left, __thread_left_border,
|
||||
__thread_right, __thread_right_border;
|
||||
__thread_left = __left + 1;
|
||||
|
||||
// Just to satisfy the condition below.
|
||||
__thread_left_border = __thread_left - 1;
|
||||
__thread_right = __n - 1;
|
||||
__thread_right_border = __thread_right + 1;
|
||||
// Just to satisfy the condition below.
|
||||
__thread_left_border = __thread_left - 1;
|
||||
__thread_right = __n - 1;
|
||||
__thread_right_border = __thread_right + 1;
|
||||
|
||||
bool __iam_finished = false;
|
||||
while (!__iam_finished)
|
||||
{
|
||||
if (__thread_left > __thread_left_border)
|
||||
{
|
||||
omp_set_lock(&__result_lock);
|
||||
if (__left + (__chunk_size - 1) > __right)
|
||||
__iam_finished = true;
|
||||
else
|
||||
{
|
||||
__thread_left = __left;
|
||||
__thread_left_border = __left + (__chunk_size - 1);
|
||||
__left += __chunk_size;
|
||||
}
|
||||
omp_unset_lock(&__result_lock);
|
||||
}
|
||||
bool __iam_finished = false;
|
||||
while (!__iam_finished)
|
||||
{
|
||||
if (__thread_left > __thread_left_border)
|
||||
{
|
||||
omp_set_lock(&__result_lock);
|
||||
if (__left + (__chunk_size - 1) > __right)
|
||||
__iam_finished = true;
|
||||
else
|
||||
{
|
||||
__thread_left = __left;
|
||||
__thread_left_border = __left + (__chunk_size - 1);
|
||||
__left += __chunk_size;
|
||||
}
|
||||
omp_unset_lock(&__result_lock);
|
||||
}
|
||||
|
||||
if (__thread_right < __thread_right_border)
|
||||
{
|
||||
omp_set_lock(&__result_lock);
|
||||
if (__left > __right - (__chunk_size - 1))
|
||||
__iam_finished = true;
|
||||
else
|
||||
{
|
||||
__thread_right = __right;
|
||||
__thread_right_border = __right - (__chunk_size - 1);
|
||||
__right -= __chunk_size;
|
||||
}
|
||||
omp_unset_lock(&__result_lock);
|
||||
}
|
||||
if (__thread_right < __thread_right_border)
|
||||
{
|
||||
omp_set_lock(&__result_lock);
|
||||
if (__left > __right - (__chunk_size - 1))
|
||||
__iam_finished = true;
|
||||
else
|
||||
{
|
||||
__thread_right = __right;
|
||||
__thread_right_border = __right - (__chunk_size - 1);
|
||||
__right -= __chunk_size;
|
||||
}
|
||||
omp_unset_lock(&__result_lock);
|
||||
}
|
||||
|
||||
if (__iam_finished)
|
||||
break;
|
||||
if (__iam_finished)
|
||||
break;
|
||||
|
||||
// Swap as usual.
|
||||
while (__thread_left < __thread_right)
|
||||
{
|
||||
while (__pred(__begin[__thread_left])
|
||||
&& __thread_left <= __thread_left_border)
|
||||
++__thread_left;
|
||||
while (!__pred(__begin[__thread_right])
|
||||
&& __thread_right >= __thread_right_border)
|
||||
--__thread_right;
|
||||
// Swap as usual.
|
||||
while (__thread_left < __thread_right)
|
||||
{
|
||||
while (__pred(__begin[__thread_left])
|
||||
&& __thread_left <= __thread_left_border)
|
||||
++__thread_left;
|
||||
while (!__pred(__begin[__thread_right])
|
||||
&& __thread_right >= __thread_right_border)
|
||||
--__thread_right;
|
||||
|
||||
if (__thread_left > __thread_left_border
|
||||
|| __thread_right < __thread_right_border)
|
||||
// Fetch new chunk(__s).
|
||||
break;
|
||||
if (__thread_left > __thread_left_border
|
||||
|| __thread_right < __thread_right_border)
|
||||
// Fetch new chunk(__s).
|
||||
break;
|
||||
|
||||
std::swap(__begin[__thread_left], __begin[__thread_right]);
|
||||
++__thread_left;
|
||||
--__thread_right;
|
||||
}
|
||||
}
|
||||
std::swap(__begin[__thread_left],
|
||||
__begin[__thread_right]);
|
||||
++__thread_left;
|
||||
--__thread_right;
|
||||
}
|
||||
}
|
||||
|
||||
// Now swap the leftover chunks to the right places.
|
||||
if (__thread_left <= __thread_left_border)
|
||||
# pragma omp atomic
|
||||
++__leftover_left;
|
||||
if (__thread_right >= __thread_right_border)
|
||||
# pragma omp atomic
|
||||
++__leftover_right;
|
||||
// Now swap the leftover chunks to the right places.
|
||||
if (__thread_left <= __thread_left_border)
|
||||
# pragma omp atomic
|
||||
++__leftover_left;
|
||||
if (__thread_right >= __thread_right_border)
|
||||
# pragma omp atomic
|
||||
++__leftover_right;
|
||||
|
||||
# pragma omp barrier
|
||||
# pragma omp barrier
|
||||
|
||||
# pragma omp single
|
||||
{
|
||||
__leftnew = __left - __leftover_left * __chunk_size;
|
||||
__rightnew = __right + __leftover_right * __chunk_size;
|
||||
}
|
||||
# pragma omp single
|
||||
{
|
||||
__leftnew = __left - __leftover_left * __chunk_size;
|
||||
__rightnew = __right + __leftover_right * __chunk_size;
|
||||
}
|
||||
|
||||
# pragma omp barrier
|
||||
# pragma omp barrier
|
||||
|
||||
// <=> __thread_left_border + (__chunk_size - 1) >= __leftnew
|
||||
if (__thread_left <= __thread_left_border
|
||||
&& __thread_left_border >= __leftnew)
|
||||
{
|
||||
// Chunk already in place, reserve spot.
|
||||
__reserved_left
|
||||
[(__left - (__thread_left_border + 1)) / __chunk_size]
|
||||
= true;
|
||||
}
|
||||
// <=> __thread_left_border + (__chunk_size - 1) >= __leftnew
|
||||
if (__thread_left <= __thread_left_border
|
||||
&& __thread_left_border >= __leftnew)
|
||||
{
|
||||
// Chunk already in place, reserve spot.
|
||||
__reserved_left[(__left - (__thread_left_border + 1))
|
||||
/ __chunk_size] = true;
|
||||
}
|
||||
|
||||
// <=> __thread_right_border - (__chunk_size - 1) <= __rightnew
|
||||
if (__thread_right >= __thread_right_border
|
||||
&& __thread_right_border <= __rightnew)
|
||||
{
|
||||
// Chunk already in place, reserve spot.
|
||||
__reserved_right[((__thread_right_border - 1) - __right)
|
||||
/ __chunk_size] = true;
|
||||
}
|
||||
// <=> __thread_right_border - (__chunk_size - 1) <= __rightnew
|
||||
if (__thread_right >= __thread_right_border
|
||||
&& __thread_right_border <= __rightnew)
|
||||
{
|
||||
// Chunk already in place, reserve spot.
|
||||
__reserved_right[((__thread_right_border - 1) - __right)
|
||||
/ __chunk_size] = true;
|
||||
}
|
||||
|
||||
# pragma omp barrier
|
||||
# pragma omp barrier
|
||||
|
||||
if (__thread_left <= __thread_left_border
|
||||
&& __thread_left_border < __leftnew)
|
||||
{
|
||||
// Find spot and swap.
|
||||
_DifferenceType __swapstart = -1;
|
||||
omp_set_lock(&__result_lock);
|
||||
for (int __r = 0; __r < __leftover_left; ++__r)
|
||||
if (!__reserved_left[__r])
|
||||
{
|
||||
__reserved_left[__r] = true;
|
||||
__swapstart = __left - (__r + 1) * __chunk_size;
|
||||
break;
|
||||
}
|
||||
omp_unset_lock(&__result_lock);
|
||||
if (__thread_left <= __thread_left_border
|
||||
&& __thread_left_border < __leftnew)
|
||||
{
|
||||
// Find spot and swap.
|
||||
_DifferenceType __swapstart = -1;
|
||||
omp_set_lock(&__result_lock);
|
||||
for (int __r = 0; __r < __leftover_left; ++__r)
|
||||
if (!__reserved_left[__r])
|
||||
{
|
||||
__reserved_left[__r] = true;
|
||||
__swapstart = __left - (__r + 1) * __chunk_size;
|
||||
break;
|
||||
}
|
||||
omp_unset_lock(&__result_lock);
|
||||
|
||||
#if _GLIBCXX_ASSERTIONS
|
||||
_GLIBCXX_PARALLEL_ASSERT(__swapstart != -1);
|
||||
_GLIBCXX_PARALLEL_ASSERT(__swapstart != -1);
|
||||
#endif
|
||||
|
||||
std::swap_ranges(__begin + __thread_left_border
|
||||
- (__chunk_size - 1),
|
||||
__begin + __thread_left_border + 1,
|
||||
__begin + __swapstart);
|
||||
}
|
||||
std::swap_ranges(__begin + __thread_left_border
|
||||
- (__chunk_size - 1),
|
||||
__begin + __thread_left_border + 1,
|
||||
__begin + __swapstart);
|
||||
}
|
||||
|
||||
if (__thread_right >= __thread_right_border
|
||||
&& __thread_right_border > __rightnew)
|
||||
{
|
||||
// Find spot and swap
|
||||
_DifferenceType __swapstart = -1;
|
||||
omp_set_lock(&__result_lock);
|
||||
for (int __r = 0; __r < __leftover_right; ++__r)
|
||||
if (!__reserved_right[__r])
|
||||
{
|
||||
__reserved_right[__r] = true;
|
||||
__swapstart = __right + __r * __chunk_size + 1;
|
||||
break;
|
||||
}
|
||||
omp_unset_lock(&__result_lock);
|
||||
if (__thread_right >= __thread_right_border
|
||||
&& __thread_right_border > __rightnew)
|
||||
{
|
||||
// Find spot and swap
|
||||
_DifferenceType __swapstart = -1;
|
||||
omp_set_lock(&__result_lock);
|
||||
for (int __r = 0; __r < __leftover_right; ++__r)
|
||||
if (!__reserved_right[__r])
|
||||
{
|
||||
__reserved_right[__r] = true;
|
||||
__swapstart = __right + __r * __chunk_size + 1;
|
||||
break;
|
||||
}
|
||||
omp_unset_lock(&__result_lock);
|
||||
|
||||
#if _GLIBCXX_ASSERTIONS
|
||||
_GLIBCXX_PARALLEL_ASSERT(__swapstart != -1);
|
||||
_GLIBCXX_PARALLEL_ASSERT(__swapstart != -1);
|
||||
#endif
|
||||
|
||||
std::swap_ranges(
|
||||
__begin + __thread_right_border,
|
||||
__begin + __thread_right_border + __chunk_size,
|
||||
__begin + __swapstart);
|
||||
}
|
||||
std::swap_ranges(__begin + __thread_right_border,
|
||||
__begin + __thread_right_border
|
||||
+ __chunk_size, __begin + __swapstart);
|
||||
}
|
||||
#if _GLIBCXX_ASSERTIONS
|
||||
# pragma omp barrier
|
||||
|
||||
# pragma omp single
|
||||
{
|
||||
for (int __r = 0; __r < __leftover_left; ++__r)
|
||||
_GLIBCXX_PARALLEL_ASSERT(__reserved_left[__r]);
|
||||
for (int __r = 0; __r < __leftover_right; ++__r)
|
||||
_GLIBCXX_PARALLEL_ASSERT(__reserved_right[__r]);
|
||||
}
|
||||
{
|
||||
for (int __r = 0; __r < __leftover_left; ++__r)
|
||||
_GLIBCXX_PARALLEL_ASSERT(__reserved_left[__r]);
|
||||
for (int __r = 0; __r < __leftover_right; ++__r)
|
||||
_GLIBCXX_PARALLEL_ASSERT(__reserved_right[__r]);
|
||||
}
|
||||
|
||||
# pragma omp barrier
|
||||
#endif
|
||||
|
||||
# pragma omp barrier
|
||||
|
||||
__left = __leftnew;
|
||||
__right = __rightnew;
|
||||
}
|
||||
# pragma omp flush(__left, __right)
|
||||
} // end "recursion" //parallel
|
||||
__left = __leftnew;
|
||||
__right = __rightnew;
|
||||
}
|
||||
|
||||
_DifferenceType __final_left = __left, __final_right = __right;
|
||||
# pragma omp flush(__left, __right)
|
||||
} // end "recursion" //parallel
|
||||
|
||||
while (__final_left < __final_right)
|
||||
{
|
||||
// Go right until key is geq than pivot.
|
||||
while (__pred(__begin[__final_left]) && __final_left < __final_right)
|
||||
++__final_left;
|
||||
_DifferenceType __final_left = __left, __final_right = __right;
|
||||
|
||||
// Go left until key is less than pivot.
|
||||
while (!__pred(__begin[__final_right]) && __final_left < __final_right)
|
||||
--__final_right;
|
||||
while (__final_left < __final_right)
|
||||
{
|
||||
// Go right until key is geq than pivot.
|
||||
while (__pred(__begin[__final_left])
|
||||
&& __final_left < __final_right)
|
||||
++__final_left;
|
||||
|
||||
if (__final_left == __final_right)
|
||||
break;
|
||||
std::swap(__begin[__final_left], __begin[__final_right]);
|
||||
++__final_left;
|
||||
--__final_right;
|
||||
}
|
||||
// Go left until key is less than pivot.
|
||||
while (!__pred(__begin[__final_right])
|
||||
&& __final_left < __final_right)
|
||||
--__final_right;
|
||||
|
||||
// All elements on the left side are < piv, all elements on the
|
||||
// right are >= piv
|
||||
delete[] __reserved_left;
|
||||
delete[] __reserved_right;
|
||||
if (__final_left == __final_right)
|
||||
break;
|
||||
std::swap(__begin[__final_left], __begin[__final_right]);
|
||||
++__final_left;
|
||||
--__final_right;
|
||||
}
|
||||
|
||||
omp_destroy_lock(&__result_lock);
|
||||
// All elements on the left side are < piv, all elements on the
|
||||
// right are >= piv
|
||||
delete[] __reserved_left;
|
||||
delete[] __reserved_right;
|
||||
|
||||
// Element "between" __final_left and __final_right might not have
|
||||
// been regarded yet
|
||||
if (__final_left < __n && !__pred(__begin[__final_left]))
|
||||
// Really swapped.
|
||||
return __final_left;
|
||||
else
|
||||
return __final_left + 1;
|
||||
}
|
||||
omp_destroy_lock(&__result_lock);
|
||||
|
||||
/**
|
||||
* @brief Parallel implementation of std::nth_element().
|
||||
// Element "between" __final_left and __final_right might not have
|
||||
// been regarded yet
|
||||
if (__final_left < __n && !__pred(__begin[__final_left]))
|
||||
// Really swapped.
|
||||
return __final_left;
|
||||
else
|
||||
return __final_left + 1;
|
||||
}
|
||||
|
||||
/**
|
||||
* @brief Parallel implementation of std::nth_element().
|
||||
* @param __begin Begin iterator of input sequence.
|
||||
* @param __nth _Iterator of element that must be in position afterwards.
|
||||
* @param __end End iterator of input sequence.
|
||||
* @param __comp Comparator.
|
||||
*/
|
||||
template<typename _RAIter, typename _Compare>
|
||||
void
|
||||
__parallel_nth_element(_RAIter __begin, _RAIter __nth,
|
||||
_RAIter __end, _Compare __comp)
|
||||
{
|
||||
typedef std::iterator_traits<_RAIter> _TraitsType;
|
||||
typedef typename _TraitsType::value_type _ValueType;
|
||||
typedef typename _TraitsType::difference_type _DifferenceType;
|
||||
|
||||
_GLIBCXX_CALL(__end - __begin)
|
||||
|
||||
_RAIter __split;
|
||||
_RandomNumber __rng;
|
||||
|
||||
_DifferenceType __minimum_length =
|
||||
std::max<_DifferenceType>(2, _Settings::get().partition_minimal_n);
|
||||
|
||||
// Break if input range to small.
|
||||
while (static_cast<_SequenceIndex>(__end - __begin) >= __minimum_length)
|
||||
{
|
||||
_DifferenceType __n = __end - __begin;
|
||||
|
||||
_RAIter __pivot_pos = __begin + __rng(__n);
|
||||
|
||||
// Swap __pivot_pos value to end.
|
||||
if (__pivot_pos != (__end - 1))
|
||||
std::swap(*__pivot_pos, *(__end - 1));
|
||||
__pivot_pos = __end - 1;
|
||||
|
||||
// _Compare must have first_value_type, second_value_type,
|
||||
// result_type
|
||||
// _Compare ==
|
||||
// __gnu_parallel::_Lexicographic<S, int,
|
||||
// __gnu_parallel::_Less<S, S> >
|
||||
// __pivot_pos == std::pair<S, int>*
|
||||
__gnu_parallel::binder2nd<_Compare, _ValueType, _ValueType, bool>
|
||||
__pred(__comp, *__pivot_pos);
|
||||
|
||||
// Divide, leave pivot unchanged in last place.
|
||||
_RAIter __split_pos1, __split_pos2;
|
||||
__split_pos1 = __begin + __parallel_partition(__begin, __end - 1,
|
||||
__pred,
|
||||
__get_max_threads());
|
||||
|
||||
// Left side: < __pivot_pos; __right side: >= __pivot_pos
|
||||
|
||||
// Swap pivot back to middle.
|
||||
if (__split_pos1 != __pivot_pos)
|
||||
std::swap(*__split_pos1, *__pivot_pos);
|
||||
__pivot_pos = __split_pos1;
|
||||
|
||||
// In case all elements are equal, __split_pos1 == 0
|
||||
if ((__split_pos1 + 1 - __begin) < (__n >> 7)
|
||||
|| (__end - __split_pos1) < (__n >> 7))
|
||||
{
|
||||
// Very unequal split, one part smaller than one 128th
|
||||
// elements not strictly larger than the pivot.
|
||||
__gnu_parallel::__unary_negate<__gnu_parallel::
|
||||
__binder1st<_Compare, _ValueType,
|
||||
_ValueType, bool>, _ValueType>
|
||||
__pred(__gnu_parallel::__binder1st<_Compare, _ValueType,
|
||||
_ValueType, bool>(__comp, *__pivot_pos));
|
||||
|
||||
// Find other end of pivot-equal range.
|
||||
__split_pos2 = __gnu_sequential::partition(__split_pos1 + 1,
|
||||
__end, __pred);
|
||||
}
|
||||
else
|
||||
// Only skip the pivot.
|
||||
__split_pos2 = __split_pos1 + 1;
|
||||
|
||||
// Compare iterators.
|
||||
if (__split_pos2 <= __nth)
|
||||
__begin = __split_pos2;
|
||||
else if (__nth < __split_pos1)
|
||||
__end = __split_pos1;
|
||||
else
|
||||
break;
|
||||
}
|
||||
|
||||
// Only at most _Settings::partition_minimal_n __elements __left.
|
||||
__gnu_sequential::sort(__begin, __end, __comp);
|
||||
}
|
||||
|
||||
/** @brief Parallel implementation of std::partial_sort().
|
||||
* @param __begin Begin iterator of input sequence.
|
||||
* @param __nth _Iterator of element that must be in position afterwards.
|
||||
* @param __middle Sort until this position.
|
||||
* @param __end End iterator of input sequence.
|
||||
* @param __comp Comparator.
|
||||
*/
|
||||
template<typename _RAIter, typename _Compare>
|
||||
void
|
||||
__parallel_nth_element(_RAIter __begin, _RAIter __nth,
|
||||
_RAIter __end, _Compare __comp)
|
||||
{
|
||||
typedef std::iterator_traits<_RAIter> _TraitsType;
|
||||
typedef typename _TraitsType::value_type _ValueType;
|
||||
typedef typename _TraitsType::difference_type _DifferenceType;
|
||||
|
||||
_GLIBCXX_CALL(__end - __begin)
|
||||
|
||||
_RAIter __split;
|
||||
_RandomNumber __rng;
|
||||
|
||||
_DifferenceType __minimum_length =
|
||||
std::max<_DifferenceType>(2, _Settings::get().partition_minimal_n);
|
||||
|
||||
// Break if input range to small.
|
||||
while (static_cast<_SequenceIndex>(__end - __begin) >= __minimum_length)
|
||||
{
|
||||
_DifferenceType __n = __end - __begin;
|
||||
|
||||
_RAIter __pivot_pos = __begin + __rng(__n);
|
||||
|
||||
// Swap __pivot_pos value to end.
|
||||
if (__pivot_pos != (__end - 1))
|
||||
std::swap(*__pivot_pos, *(__end - 1));
|
||||
__pivot_pos = __end - 1;
|
||||
|
||||
// _Compare must have first_value_type, second_value_type,
|
||||
// result_type
|
||||
// _Compare ==
|
||||
// __gnu_parallel::_Lexicographic<S, int, __gnu_parallel::_Less<S, S> >
|
||||
// __pivot_pos == std::pair<S, int>*
|
||||
__gnu_parallel::binder2nd<_Compare, _ValueType, _ValueType, bool>
|
||||
__pred(__comp, *__pivot_pos);
|
||||
|
||||
// Divide, leave pivot unchanged in last place.
|
||||
_RAIter __split_pos1, __split_pos2;
|
||||
__split_pos1 = __begin
|
||||
+ __parallel_partition(__begin, __end - 1, __pred,
|
||||
__get_max_threads());
|
||||
|
||||
// Left side: < __pivot_pos; __right side: >= __pivot_pos
|
||||
|
||||
// Swap pivot back to middle.
|
||||
if (__split_pos1 != __pivot_pos)
|
||||
std::swap(*__split_pos1, *__pivot_pos);
|
||||
__pivot_pos = __split_pos1;
|
||||
|
||||
// In case all elements are equal, __split_pos1 == 0
|
||||
if ((__split_pos1 + 1 - __begin) < (__n >> 7)
|
||||
|| (__end - __split_pos1) < (__n >> 7))
|
||||
{
|
||||
// Very unequal split, one part smaller than one 128th
|
||||
// elements not strictly larger than the pivot.
|
||||
__gnu_parallel::__unary_negate<__gnu_parallel::
|
||||
__binder1st<_Compare, _ValueType, _ValueType, bool>, _ValueType>
|
||||
__pred(__gnu_parallel::__binder1st<_Compare, _ValueType,
|
||||
_ValueType, bool>(__comp, *__pivot_pos));
|
||||
|
||||
// Find other end of pivot-equal range.
|
||||
__split_pos2 = __gnu_sequential::partition(__split_pos1 + 1,
|
||||
__end, __pred);
|
||||
}
|
||||
else
|
||||
// Only skip the pivot.
|
||||
__split_pos2 = __split_pos1 + 1;
|
||||
|
||||
// Compare iterators.
|
||||
if (__split_pos2 <= __nth)
|
||||
__begin = __split_pos2;
|
||||
else if (__nth < __split_pos1)
|
||||
__end = __split_pos1;
|
||||
else
|
||||
break;
|
||||
}
|
||||
|
||||
// Only at most _Settings::partition_minimal_n __elements __left.
|
||||
__gnu_sequential::sort(__begin, __end, __comp);
|
||||
}
|
||||
|
||||
/** @brief Parallel implementation of std::partial_sort().
|
||||
* @param __begin Begin iterator of input sequence.
|
||||
* @param __middle Sort until this position.
|
||||
* @param __end End iterator of input sequence.
|
||||
* @param __comp Comparator. */
|
||||
template<typename _RAIter, typename _Compare>
|
||||
void
|
||||
__parallel_partial_sort(_RAIter __begin,
|
||||
_RAIter __middle,
|
||||
_RAIter __end, _Compare __comp)
|
||||
{
|
||||
__parallel_nth_element(__begin, __middle, __end, __comp);
|
||||
std::sort(__begin, __middle, __comp);
|
||||
}
|
||||
* @param __comp Comparator. */
|
||||
template<typename _RAIter, typename _Compare>
|
||||
void
|
||||
__parallel_partial_sort(_RAIter __begin,
|
||||
_RAIter __middle,
|
||||
_RAIter __end, _Compare __comp)
|
||||
{
|
||||
__parallel_nth_element(__begin, __middle, __end, __comp);
|
||||
std::sort(__begin, __middle, __comp);
|
||||
}
|
||||
|
||||
} //namespace __gnu_parallel
|
||||
|
||||
|
@ -65,10 +65,10 @@ namespace __gnu_parallel
|
||||
public:
|
||||
/** @brief Constructor. Not to be called concurrent, of course.
|
||||
* @param _M_max_size Maximal number of elements to be contained. */
|
||||
_RestrictedBoundedConcurrentQueue(_SequenceIndex _M_max_size)
|
||||
_RestrictedBoundedConcurrentQueue(_SequenceIndex __max_size)
|
||||
{
|
||||
this->_M_max_size = _M_max_size;
|
||||
_M_base = new _Tp[_M_max_size];
|
||||
_M_max_size = __max_size;
|
||||
_M_base = new _Tp[__max_size];
|
||||
_M_borders = __encode2(0, 0);
|
||||
#pragma omp flush
|
||||
}
|
||||
@ -105,12 +105,12 @@ namespace __gnu_parallel
|
||||
while (__former_front > __former_back)
|
||||
{
|
||||
// Chance.
|
||||
_CASable
|
||||
__former_borders = __encode2(__former_front, __former_back);
|
||||
_CASable
|
||||
__new_borders = __encode2(__former_front - 1, __former_back);
|
||||
if (__compare_and_swap(
|
||||
&_M_borders, __former_borders, __new_borders))
|
||||
_CASable __former_borders = __encode2(__former_front,
|
||||
__former_back);
|
||||
_CASable __new_borders = __encode2(__former_front - 1,
|
||||
__former_back);
|
||||
if (__compare_and_swap(&_M_borders, __former_borders,
|
||||
__new_borders))
|
||||
{
|
||||
__t = *(_M_base + (__former_front - 1) % _M_max_size);
|
||||
return true;
|
||||
@ -132,12 +132,12 @@ namespace __gnu_parallel
|
||||
while (__former_front > __former_back)
|
||||
{
|
||||
// Chance.
|
||||
_CASable
|
||||
__former_borders = __encode2(__former_front, __former_back);
|
||||
_CASable
|
||||
__new_borders = __encode2(__former_front, __former_back + 1);
|
||||
if (__compare_and_swap(
|
||||
&_M_borders, __former_borders, __new_borders))
|
||||
_CASable __former_borders = __encode2(__former_front,
|
||||
__former_back);
|
||||
_CASable __new_borders = __encode2(__former_front,
|
||||
__former_back + 1);
|
||||
if (__compare_and_swap(&_M_borders, __former_borders,
|
||||
__new_borders))
|
||||
{
|
||||
__t = *(_M_base + __former_back % _M_max_size);
|
||||
return true;
|
||||
|
@ -48,13 +48,12 @@ namespace __gnu_parallel
|
||||
*/
|
||||
template<typename _RAIter, typename _Compare>
|
||||
typename std::iterator_traits<_RAIter>::difference_type
|
||||
__parallel_sort_qs_divide(_RAIter __begin,
|
||||
_RAIter __end,
|
||||
_Compare __comp, typename std::iterator_traits
|
||||
<_RAIter>::difference_type __pivot_rank,
|
||||
typename std::iterator_traits
|
||||
<_RAIter>::difference_type
|
||||
__num_samples, _ThreadIndex __num_threads)
|
||||
__parallel_sort_qs_divide(_RAIter __begin, _RAIter __end,
|
||||
_Compare __comp, typename std::iterator_traits
|
||||
<_RAIter>::difference_type __pivot_rank,
|
||||
typename std::iterator_traits
|
||||
<_RAIter>::difference_type
|
||||
__num_samples, _ThreadIndex __num_threads)
|
||||
{
|
||||
typedef std::iterator_traits<_RAIter> _TraitsType;
|
||||
typedef typename _TraitsType::value_type _ValueType;
|
||||
@ -64,25 +63,24 @@ namespace __gnu_parallel
|
||||
__num_samples = std::min(__num_samples, __n);
|
||||
|
||||
// Allocate uninitialized, to avoid default constructor.
|
||||
_ValueType* __samples =
|
||||
static_cast<_ValueType*>(::operator new(__num_samples
|
||||
* sizeof(_ValueType)));
|
||||
_ValueType* __samples = static_cast<_ValueType*>
|
||||
(::operator new(__num_samples * sizeof(_ValueType)));
|
||||
|
||||
for (_DifferenceType __s = 0; __s < __num_samples; ++__s)
|
||||
{
|
||||
const unsigned long long __index
|
||||
= static_cast<unsigned long long>(__s) * __n / __num_samples;
|
||||
const unsigned long long __index = static_cast<unsigned long long>
|
||||
(__s) * __n / __num_samples;
|
||||
::new(&(__samples[__s])) _ValueType(__begin[__index]);
|
||||
}
|
||||
|
||||
__gnu_sequential::sort(__samples, __samples + __num_samples, __comp);
|
||||
|
||||
_ValueType& pivot = __samples[__pivot_rank * __num_samples / __n];
|
||||
_ValueType& __pivot = __samples[__pivot_rank * __num_samples / __n];
|
||||
|
||||
__gnu_parallel::binder2nd<_Compare, _ValueType, _ValueType, bool>
|
||||
__pred(__comp, pivot);
|
||||
_DifferenceType __split =
|
||||
__parallel_partition(__begin, __end, __pred, __num_threads);
|
||||
__pred(__comp, __pivot);
|
||||
_DifferenceType __split = __parallel_partition(__begin, __end,
|
||||
__pred, __num_threads);
|
||||
|
||||
::operator delete(__samples);
|
||||
|
||||
@ -98,10 +96,9 @@ namespace __gnu_parallel
|
||||
*/
|
||||
template<typename _RAIter, typename _Compare>
|
||||
void
|
||||
__parallel_sort_qs_conquer(_RAIter __begin,
|
||||
_RAIter __end,
|
||||
_Compare __comp,
|
||||
_ThreadIndex __num_threads)
|
||||
__parallel_sort_qs_conquer(_RAIter __begin, _RAIter __end,
|
||||
_Compare __comp,
|
||||
_ThreadIndex __num_threads)
|
||||
{
|
||||
typedef std::iterator_traits<_RAIter> _TraitsType;
|
||||
typedef typename _TraitsType::value_type _ValueType;
|
||||
@ -127,24 +124,22 @@ namespace __gnu_parallel
|
||||
|
||||
__pivot_rank = __n * __num_threads_left / __num_threads;
|
||||
|
||||
_DifferenceType __split =
|
||||
__parallel_sort_qs_divide(__begin, __end, __comp, __pivot_rank,
|
||||
_Settings::get().sort_qs_num_samples_preset,
|
||||
__num_threads);
|
||||
_DifferenceType __split = __parallel_sort_qs_divide
|
||||
(__begin, __end, __comp, __pivot_rank,
|
||||
_Settings::get().sort_qs_num_samples_preset, __num_threads);
|
||||
|
||||
#pragma omp parallel sections num_threads(2)
|
||||
{
|
||||
#pragma omp section
|
||||
__parallel_sort_qs_conquer(__begin, __begin + __split,
|
||||
__comp, __num_threads_left);
|
||||
__comp, __num_threads_left);
|
||||
#pragma omp section
|
||||
__parallel_sort_qs_conquer(__begin + __split, __end,
|
||||
__comp, __num_threads - __num_threads_left);
|
||||
__comp, __num_threads - __num_threads_left);
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
|
||||
/** @brief Unbalanced quicksort main call.
|
||||
* @param __begin Begin iterator of input sequence.
|
||||
* @param __end End iterator input sequence, ignored.
|
||||
@ -154,10 +149,9 @@ namespace __gnu_parallel
|
||||
*/
|
||||
template<typename _RAIter, typename _Compare>
|
||||
void
|
||||
__parallel_sort_qs(_RAIter __begin,
|
||||
_RAIter __end,
|
||||
_Compare __comp,
|
||||
_ThreadIndex __num_threads)
|
||||
__parallel_sort_qs(_RAIter __begin, _RAIter __end,
|
||||
_Compare __comp,
|
||||
_ThreadIndex __num_threads)
|
||||
{
|
||||
_GLIBCXX_CALL(__n)
|
||||
|
||||
|
@ -39,492 +39,484 @@
|
||||
|
||||
namespace __gnu_parallel
|
||||
{
|
||||
/** @brief Type to hold the index of a bin.
|
||||
*
|
||||
* Since many variables of this type are allocated, it should be
|
||||
* chosen as small as possible.
|
||||
*/
|
||||
typedef unsigned short _BinIndex;
|
||||
/** @brief Type to hold the index of a bin.
|
||||
*
|
||||
* Since many variables of this type are allocated, it should be
|
||||
* chosen as small as possible.
|
||||
*/
|
||||
typedef unsigned short _BinIndex;
|
||||
|
||||
/** @brief Data known to every thread participating in
|
||||
__gnu_parallel::__parallel_random_shuffle(). */
|
||||
template<typename _RAIter>
|
||||
struct _DRandomShufflingGlobalData
|
||||
{
|
||||
typedef std::iterator_traits<_RAIter> _TraitsType;
|
||||
typedef typename _TraitsType::value_type _ValueType;
|
||||
typedef typename _TraitsType::difference_type _DifferenceType;
|
||||
/** @brief Data known to every thread participating in
|
||||
__gnu_parallel::__parallel_random_shuffle(). */
|
||||
template<typename _RAIter>
|
||||
struct _DRandomShufflingGlobalData
|
||||
{
|
||||
typedef std::iterator_traits<_RAIter> _TraitsType;
|
||||
typedef typename _TraitsType::value_type _ValueType;
|
||||
typedef typename _TraitsType::difference_type _DifferenceType;
|
||||
|
||||
/** @brief Begin iterator of the __source. */
|
||||
_RAIter& _M_source;
|
||||
/** @brief Begin iterator of the __source. */
|
||||
_RAIter& _M_source;
|
||||
|
||||
/** @brief Temporary arrays for each thread. */
|
||||
_ValueType** _M_temporaries;
|
||||
/** @brief Temporary arrays for each thread. */
|
||||
_ValueType** _M_temporaries;
|
||||
|
||||
/** @brief Two-dimensional array to hold the thread-bin distribution.
|
||||
*
|
||||
* Dimensions (_M_num_threads + 1) __x (_M_num_bins + 1). */
|
||||
_DifferenceType** _M_dist;
|
||||
/** @brief Two-dimensional array to hold the thread-bin distribution.
|
||||
*
|
||||
* Dimensions (_M_num_threads + 1) __x (_M_num_bins + 1). */
|
||||
_DifferenceType** _M_dist;
|
||||
|
||||
/** @brief Start indexes of the threads' __chunks. */
|
||||
_DifferenceType* _M_starts;
|
||||
/** @brief Start indexes of the threads' __chunks. */
|
||||
_DifferenceType* _M_starts;
|
||||
|
||||
/** @brief Number of the thread that will further process the
|
||||
corresponding bin. */
|
||||
_ThreadIndex* _M_bin_proc;
|
||||
/** @brief Number of the thread that will further process the
|
||||
corresponding bin. */
|
||||
_ThreadIndex* _M_bin_proc;
|
||||
|
||||
/** @brief Number of bins to distribute to. */
|
||||
int _M_num_bins;
|
||||
/** @brief Number of bins to distribute to. */
|
||||
int _M_num_bins;
|
||||
|
||||
/** @brief Number of bits needed to address the bins. */
|
||||
int _M_num_bits;
|
||||
/** @brief Number of bits needed to address the bins. */
|
||||
int _M_num_bits;
|
||||
|
||||
/** @brief Constructor. */
|
||||
_DRandomShufflingGlobalData(_RAIter& __source)
|
||||
: _M_source(__source) { }
|
||||
};
|
||||
/** @brief Constructor. */
|
||||
_DRandomShufflingGlobalData(_RAIter& __source)
|
||||
: _M_source(__source) { }
|
||||
};
|
||||
|
||||
/** @brief Local data for a thread participating in
|
||||
__gnu_parallel::__parallel_random_shuffle().
|
||||
*/
|
||||
template<typename _RAIter, typename _RandomNumberGenerator>
|
||||
struct _DRSSorterPU
|
||||
{
|
||||
/** @brief Number of threads participating in total. */
|
||||
int _M_num_threads;
|
||||
/** @brief Local data for a thread participating in
|
||||
__gnu_parallel::__parallel_random_shuffle().
|
||||
*/
|
||||
template<typename _RAIter, typename _RandomNumberGenerator>
|
||||
struct _DRSSorterPU
|
||||
{
|
||||
/** @brief Number of threads participating in total. */
|
||||
int _M_num_threads;
|
||||
|
||||
/** @brief Begin index for bins taken care of by this thread. */
|
||||
_BinIndex _M_bins_begin;
|
||||
/** @brief Begin index for bins taken care of by this thread. */
|
||||
_BinIndex _M_bins_begin;
|
||||
|
||||
/** @brief End index for bins taken care of by this thread. */
|
||||
_BinIndex __bins_end;
|
||||
/** @brief End index for bins taken care of by this thread. */
|
||||
_BinIndex __bins_end;
|
||||
|
||||
/** @brief Random _M_seed for this thread. */
|
||||
uint32_t _M_seed;
|
||||
/** @brief Random _M_seed for this thread. */
|
||||
uint32_t _M_seed;
|
||||
|
||||
/** @brief Pointer to global data. */
|
||||
_DRandomShufflingGlobalData<_RAIter>* _M_sd;
|
||||
};
|
||||
/** @brief Pointer to global data. */
|
||||
_DRandomShufflingGlobalData<_RAIter>* _M_sd;
|
||||
};
|
||||
|
||||
/** @brief Generate a random number in @__c [0,2^logp).
|
||||
* @param logp Logarithm (basis 2) of the upper range __bound.
|
||||
* @param __rng Random number generator to use.
|
||||
*/
|
||||
template<typename _RandomNumberGenerator>
|
||||
inline int
|
||||
__random_number_pow2(int logp, _RandomNumberGenerator& __rng)
|
||||
{ return __rng.__genrand_bits(logp); }
|
||||
/** @brief Generate a random number in @__c [0,2^__logp).
|
||||
* @param __logp Logarithm (basis 2) of the upper range __bound.
|
||||
* @param __rng Random number generator to use.
|
||||
*/
|
||||
template<typename _RandomNumberGenerator>
|
||||
inline int
|
||||
__random_number_pow2(int __logp, _RandomNumberGenerator& __rng)
|
||||
{ return __rng.__genrand_bits(__logp); }
|
||||
|
||||
/** @brief Random shuffle code executed by each thread.
|
||||
* @param __pus Array of thread-local data records. */
|
||||
template<typename _RAIter, typename _RandomNumberGenerator>
|
||||
void
|
||||
__parallel_random_shuffle_drs_pu(_DRSSorterPU<_RAIter,
|
||||
_RandomNumberGenerator>* __pus)
|
||||
{
|
||||
typedef std::iterator_traits<_RAIter> _TraitsType;
|
||||
typedef typename _TraitsType::value_type _ValueType;
|
||||
typedef typename _TraitsType::difference_type _DifferenceType;
|
||||
/** @brief Random shuffle code executed by each thread.
|
||||
* @param __pus Array of thread-local data records. */
|
||||
template<typename _RAIter, typename _RandomNumberGenerator>
|
||||
void
|
||||
__parallel_random_shuffle_drs_pu(_DRSSorterPU<_RAIter,
|
||||
_RandomNumberGenerator>* __pus)
|
||||
{
|
||||
typedef std::iterator_traits<_RAIter> _TraitsType;
|
||||
typedef typename _TraitsType::value_type _ValueType;
|
||||
typedef typename _TraitsType::difference_type _DifferenceType;
|
||||
|
||||
_ThreadIndex __iam = omp_get_thread_num();
|
||||
_DRSSorterPU<_RAIter, _RandomNumberGenerator>* d = &__pus[__iam];
|
||||
_DRandomShufflingGlobalData<_RAIter>* _M_sd = d->_M_sd;
|
||||
_ThreadIndex __iam = omp_get_thread_num();
|
||||
_DRSSorterPU<_RAIter, _RandomNumberGenerator>* __d = &__pus[__iam];
|
||||
_DRandomShufflingGlobalData<_RAIter>* __sd = __d->_M_sd;
|
||||
|
||||
// Indexing: _M_dist[bin][processor]
|
||||
_DifferenceType __length = _M_sd->_M_starts[__iam + 1] -
|
||||
_M_sd->_M_starts[__iam];
|
||||
_BinIndex* __oracles = new _BinIndex[__length];
|
||||
_DifferenceType* _M_dist = new _DifferenceType[_M_sd->_M_num_bins + 1];
|
||||
_BinIndex* _M_bin_proc = new _BinIndex[_M_sd->_M_num_bins];
|
||||
_ValueType** _M_temporaries = new _ValueType*[d->_M_num_threads];
|
||||
// Indexing: _M_dist[bin][processor]
|
||||
_DifferenceType __length = (__sd->_M_starts[__iam + 1]
|
||||
- __sd->_M_starts[__iam]);
|
||||
_BinIndex* __oracles = new _BinIndex[__length];
|
||||
_DifferenceType* __dist = new _DifferenceType[__sd->_M_num_bins + 1];
|
||||
_BinIndex* __bin_proc = new _BinIndex[__sd->_M_num_bins];
|
||||
_ValueType** __temporaries = new _ValueType*[__d->_M_num_threads];
|
||||
|
||||
// Compute oracles and count appearances.
|
||||
for (_BinIndex __b = 0; __b < _M_sd->_M_num_bins + 1; ++__b)
|
||||
_M_dist[__b] = 0;
|
||||
int _M_num_bits = _M_sd->_M_num_bits;
|
||||
// Compute oracles and count appearances.
|
||||
for (_BinIndex __b = 0; __b < __sd->_M_num_bins + 1; ++__b)
|
||||
__dist[__b] = 0;
|
||||
int __num_bits = __sd->_M_num_bits;
|
||||
|
||||
_RandomNumber __rng(d->_M_seed);
|
||||
_RandomNumber __rng(__d->_M_seed);
|
||||
|
||||
// First main loop.
|
||||
for (_DifferenceType __i = 0; __i < __length; ++__i)
|
||||
// First main loop.
|
||||
for (_DifferenceType __i = 0; __i < __length; ++__i)
|
||||
{
|
||||
_BinIndex __oracle = __random_number_pow2(__num_bits, __rng);
|
||||
__oracles[__i] = __oracle;
|
||||
|
||||
// To allow prefix (partial) sum.
|
||||
++(__dist[__oracle + 1]);
|
||||
}
|
||||
|
||||
for (_BinIndex __b = 0; __b < __sd->_M_num_bins + 1; ++__b)
|
||||
__sd->_M_dist[__b][__iam + 1] = __dist[__b];
|
||||
|
||||
# pragma omp barrier
|
||||
|
||||
# pragma omp single
|
||||
{
|
||||
_BinIndex __oracle = __random_number_pow2(_M_num_bits, __rng);
|
||||
__oracles[__i] = __oracle;
|
||||
|
||||
// To allow prefix (partial) sum.
|
||||
++(_M_dist[__oracle + 1]);
|
||||
// Sum up bins, __sd->_M_dist[__s + 1][__d->_M_num_threads] now
|
||||
// contains the total number of items in bin __s
|
||||
for (_BinIndex __s = 0; __s < __sd->_M_num_bins; ++__s)
|
||||
__gnu_sequential::partial_sum(__sd->_M_dist[__s + 1],
|
||||
__sd->_M_dist[__s + 1]
|
||||
+ __d->_M_num_threads + 1,
|
||||
__sd->_M_dist[__s + 1]);
|
||||
}
|
||||
|
||||
for (_BinIndex __b = 0; __b < _M_sd->_M_num_bins + 1; ++__b)
|
||||
_M_sd->_M_dist[__b][__iam + 1] = _M_dist[__b];
|
||||
# pragma omp barrier
|
||||
|
||||
# pragma omp barrier
|
||||
_SequenceIndex __offset = 0, __global_offset = 0;
|
||||
for (_BinIndex __s = 0; __s < __d->_M_bins_begin; ++__s)
|
||||
__global_offset += __sd->_M_dist[__s + 1][__d->_M_num_threads];
|
||||
|
||||
# pragma omp single
|
||||
{
|
||||
// Sum up bins, _M_sd->_M_dist[__s + 1][d->_M_num_threads] now contains
|
||||
// the total number of items in bin __s
|
||||
for (_BinIndex __s = 0; __s < _M_sd->_M_num_bins; ++__s)
|
||||
__gnu_sequential::partial_sum(
|
||||
_M_sd->_M_dist[__s + 1],
|
||||
_M_sd->_M_dist[__s + 1] + d->_M_num_threads + 1,
|
||||
_M_sd->_M_dist[__s + 1]);
|
||||
# pragma omp barrier
|
||||
|
||||
for (_BinIndex __s = __d->_M_bins_begin; __s < __d->__bins_end; ++__s)
|
||||
{
|
||||
for (int __t = 0; __t < __d->_M_num_threads + 1; ++__t)
|
||||
__sd->_M_dist[__s + 1][__t] += __offset;
|
||||
__offset = __sd->_M_dist[__s + 1][__d->_M_num_threads];
|
||||
}
|
||||
|
||||
__sd->_M_temporaries[__iam] = static_cast<_ValueType*>
|
||||
(::operator new(sizeof(_ValueType) * __offset));
|
||||
|
||||
# pragma omp barrier
|
||||
|
||||
// Draw local copies to avoid false sharing.
|
||||
for (_BinIndex __b = 0; __b < __sd->_M_num_bins + 1; ++__b)
|
||||
__dist[__b] = __sd->_M_dist[__b][__iam];
|
||||
for (_BinIndex __b = 0; __b < __sd->_M_num_bins; ++__b)
|
||||
__bin_proc[__b] = __sd->_M_bin_proc[__b];
|
||||
for (_ThreadIndex __t = 0; __t < __d->_M_num_threads; ++__t)
|
||||
__temporaries[__t] = __sd->_M_temporaries[__t];
|
||||
|
||||
_RAIter __source = __sd->_M_source;
|
||||
_DifferenceType __start = __sd->_M_starts[__iam];
|
||||
|
||||
// Distribute according to oracles, second main loop.
|
||||
for (_DifferenceType __i = 0; __i < __length; ++__i)
|
||||
{
|
||||
_BinIndex __target_bin = __oracles[__i];
|
||||
_ThreadIndex __target_p = __bin_proc[__target_bin];
|
||||
|
||||
// Last column [__d->_M_num_threads] stays unchanged.
|
||||
::new(&(__temporaries[__target_p][__dist[__target_bin + 1]++]))
|
||||
_ValueType(*(__source + __i + __start));
|
||||
}
|
||||
|
||||
delete[] __oracles;
|
||||
delete[] __dist;
|
||||
delete[] __bin_proc;
|
||||
delete[] __temporaries;
|
||||
|
||||
# pragma omp barrier
|
||||
|
||||
// Shuffle bins internally.
|
||||
for (_BinIndex __b = __d->_M_bins_begin; __b < __d->__bins_end; ++__b)
|
||||
{
|
||||
_ValueType* __begin =
|
||||
(__sd->_M_temporaries[__iam]
|
||||
+ (__b == __d->_M_bins_begin
|
||||
? 0 : __sd->_M_dist[__b][__d->_M_num_threads])),
|
||||
* __end = (__sd->_M_temporaries[__iam]
|
||||
+ __sd->_M_dist[__b + 1][__d->_M_num_threads]);
|
||||
|
||||
__sequential_random_shuffle(__begin, __end, __rng);
|
||||
std::copy(__begin, __end, __sd->_M_source + __global_offset
|
||||
+ (__b == __d->_M_bins_begin
|
||||
? 0 : __sd->_M_dist[__b][__d->_M_num_threads]));
|
||||
}
|
||||
|
||||
::operator delete(__sd->_M_temporaries[__iam]);
|
||||
}
|
||||
|
||||
# pragma omp barrier
|
||||
/** @brief Round up to the next greater power of 2.
|
||||
* @param __x _Integer to round up */
|
||||
template<typename _Tp>
|
||||
_Tp
|
||||
__round_up_to_pow2(_Tp __x)
|
||||
{
|
||||
if (__x <= 1)
|
||||
return 1;
|
||||
else
|
||||
return (_Tp)1 << (__rd_log2(__x - 1) + 1);
|
||||
}
|
||||
|
||||
_SequenceIndex __offset = 0, __global_offset = 0;
|
||||
for (_BinIndex __s = 0; __s < d->_M_bins_begin; ++__s)
|
||||
__global_offset += _M_sd->_M_dist[__s + 1][d->_M_num_threads];
|
||||
/** @brief Main parallel random shuffle step.
|
||||
* @param __begin Begin iterator of sequence.
|
||||
* @param __end End iterator of sequence.
|
||||
* @param __n Length of sequence.
|
||||
* @param __num_threads Number of threads to use.
|
||||
* @param __rng Random number generator to use.
|
||||
*/
|
||||
template<typename _RAIter, typename _RandomNumberGenerator>
|
||||
void
|
||||
__parallel_random_shuffle_drs(_RAIter __begin, _RAIter __end,
|
||||
typename std::iterator_traits
|
||||
<_RAIter>::difference_type __n,
|
||||
_ThreadIndex __num_threads,
|
||||
_RandomNumberGenerator& __rng)
|
||||
{
|
||||
typedef std::iterator_traits<_RAIter> _TraitsType;
|
||||
typedef typename _TraitsType::value_type _ValueType;
|
||||
typedef typename _TraitsType::difference_type _DifferenceType;
|
||||
|
||||
# pragma omp barrier
|
||||
_GLIBCXX_CALL(__n)
|
||||
|
||||
for (_BinIndex __s = d->_M_bins_begin; __s < d->__bins_end; ++__s)
|
||||
{
|
||||
for (int __t = 0; __t < d->_M_num_threads + 1; ++__t)
|
||||
_M_sd->_M_dist[__s + 1][__t] += __offset;
|
||||
__offset = _M_sd->_M_dist[__s + 1][d->_M_num_threads];
|
||||
}
|
||||
const _Settings& __s = _Settings::get();
|
||||
|
||||
_M_sd->_M_temporaries[__iam] = static_cast<_ValueType*>(
|
||||
::operator new(sizeof(_ValueType) * __offset));
|
||||
if (__num_threads > __n)
|
||||
__num_threads = static_cast<_ThreadIndex>(__n);
|
||||
|
||||
# pragma omp barrier
|
||||
|
||||
// Draw local copies to avoid false sharing.
|
||||
for (_BinIndex __b = 0; __b < _M_sd->_M_num_bins + 1; ++__b)
|
||||
_M_dist[__b] = _M_sd->_M_dist[__b][__iam];
|
||||
for (_BinIndex __b = 0; __b < _M_sd->_M_num_bins; ++__b)
|
||||
_M_bin_proc[__b] = _M_sd->_M_bin_proc[__b];
|
||||
for (_ThreadIndex __t = 0; __t < d->_M_num_threads; ++__t)
|
||||
_M_temporaries[__t] = _M_sd->_M_temporaries[__t];
|
||||
|
||||
_RAIter _M_source = _M_sd->_M_source;
|
||||
_DifferenceType __start = _M_sd->_M_starts[__iam];
|
||||
|
||||
// Distribute according to oracles, second main loop.
|
||||
for (_DifferenceType __i = 0; __i < __length; ++__i)
|
||||
{
|
||||
_BinIndex target_bin = __oracles[__i];
|
||||
_ThreadIndex target_p = _M_bin_proc[target_bin];
|
||||
|
||||
// Last column [d->_M_num_threads] stays unchanged.
|
||||
::new(&(_M_temporaries[target_p][_M_dist[target_bin + 1]++]))
|
||||
_ValueType(*(_M_source + __i + __start));
|
||||
}
|
||||
|
||||
delete[] __oracles;
|
||||
delete[] _M_dist;
|
||||
delete[] _M_bin_proc;
|
||||
delete[] _M_temporaries;
|
||||
|
||||
# pragma omp barrier
|
||||
|
||||
// Shuffle bins internally.
|
||||
for (_BinIndex __b = d->_M_bins_begin; __b < d->__bins_end; ++__b)
|
||||
{
|
||||
_ValueType* __begin =
|
||||
_M_sd->_M_temporaries[__iam] +
|
||||
((__b == d->_M_bins_begin)
|
||||
? 0 : _M_sd->_M_dist[__b][d->_M_num_threads]),
|
||||
* __end =
|
||||
_M_sd->_M_temporaries[__iam] +
|
||||
_M_sd->_M_dist[__b + 1][d->_M_num_threads];
|
||||
__sequential_random_shuffle(__begin, __end, __rng);
|
||||
std::copy(__begin, __end, _M_sd->_M_source + __global_offset +
|
||||
((__b == d->_M_bins_begin)
|
||||
? 0 : _M_sd->_M_dist[__b][d->_M_num_threads]));
|
||||
}
|
||||
|
||||
::operator delete(_M_sd->_M_temporaries[__iam]);
|
||||
}
|
||||
|
||||
/** @brief Round up to the next greater power of 2.
|
||||
* @param __x _Integer to round up */
|
||||
template<typename _Tp>
|
||||
_Tp
|
||||
__round_up_to_pow2(_Tp __x)
|
||||
{
|
||||
if (__x <= 1)
|
||||
return 1;
|
||||
else
|
||||
return (_Tp)1 << (__rd_log2(__x - 1) + 1);
|
||||
}
|
||||
|
||||
/** @brief Main parallel random shuffle step.
|
||||
* @param __begin Begin iterator of sequence.
|
||||
* @param __end End iterator of sequence.
|
||||
* @param __n Length of sequence.
|
||||
* @param __num_threads Number of threads to use.
|
||||
* @param __rng Random number generator to use.
|
||||
*/
|
||||
template<typename _RAIter, typename _RandomNumberGenerator>
|
||||
void
|
||||
__parallel_random_shuffle_drs(_RAIter __begin,
|
||||
_RAIter __end,
|
||||
typename std::iterator_traits
|
||||
<_RAIter>::difference_type __n,
|
||||
_ThreadIndex __num_threads,
|
||||
_RandomNumberGenerator& __rng)
|
||||
{
|
||||
typedef std::iterator_traits<_RAIter> _TraitsType;
|
||||
typedef typename _TraitsType::value_type _ValueType;
|
||||
typedef typename _TraitsType::difference_type _DifferenceType;
|
||||
|
||||
_GLIBCXX_CALL(__n)
|
||||
|
||||
const _Settings& __s = _Settings::get();
|
||||
|
||||
if (__num_threads > __n)
|
||||
__num_threads = static_cast<_ThreadIndex>(__n);
|
||||
|
||||
_BinIndex _M_num_bins, __num_bins_cache;
|
||||
_BinIndex __num_bins, __num_bins_cache;
|
||||
|
||||
#if _GLIBCXX_RANDOM_SHUFFLE_CONSIDER_L1
|
||||
// Try the L1 cache first.
|
||||
// Try the L1 cache first.
|
||||
|
||||
// Must fit into L1.
|
||||
__num_bins_cache = std::max<_DifferenceType>(
|
||||
1, __n / (__s.L1_cache_size_lb / sizeof(_ValueType)));
|
||||
__num_bins_cache = __round_up_to_pow2(__num_bins_cache);
|
||||
// Must fit into L1.
|
||||
__num_bins_cache =
|
||||
std::max<_DifferenceType>(1, __n / (__s.L1_cache_size_lb
|
||||
/ sizeof(_ValueType)));
|
||||
__num_bins_cache = __round_up_to_pow2(__num_bins_cache);
|
||||
|
||||
// No more buckets than TLB entries, power of 2
|
||||
// Power of 2 and at least one element per bin, at most the TLB size.
|
||||
_M_num_bins = std::min<_DifferenceType>(__n, __num_bins_cache);
|
||||
// No more buckets than TLB entries, power of 2
|
||||
// Power of 2 and at least one element per bin, at most the TLB size.
|
||||
__num_bins = std::min<_DifferenceType>(__n, __num_bins_cache);
|
||||
|
||||
#if _GLIBCXX_RANDOM_SHUFFLE_CONSIDER_TLB
|
||||
// 2 TLB entries needed per bin.
|
||||
_M_num_bins = std::min<_DifferenceType>(__s.TLB_size / 2, _M_num_bins);
|
||||
// 2 TLB entries needed per bin.
|
||||
__num_bins = std::min<_DifferenceType>(__s.TLB_size / 2, __num_bins);
|
||||
#endif
|
||||
_M_num_bins = __round_up_to_pow2(_M_num_bins);
|
||||
__num_bins = __round_up_to_pow2(__num_bins);
|
||||
|
||||
if (_M_num_bins < __num_bins_cache)
|
||||
{
|
||||
if (__num_bins < __num_bins_cache)
|
||||
{
|
||||
#endif
|
||||
// Now try the L2 cache
|
||||
// Must fit into L2
|
||||
__num_bins_cache = static_cast<_BinIndex>(std::max<_DifferenceType>(
|
||||
1, __n / (__s.L2_cache_size / sizeof(_ValueType))));
|
||||
__num_bins_cache = __round_up_to_pow2(__num_bins_cache);
|
||||
// Now try the L2 cache
|
||||
// Must fit into L2
|
||||
__num_bins_cache = static_cast<_BinIndex>
|
||||
(std::max<_DifferenceType>(1, __n / (__s.L2_cache_size
|
||||
/ sizeof(_ValueType))));
|
||||
__num_bins_cache = __round_up_to_pow2(__num_bins_cache);
|
||||
|
||||
// No more buckets than TLB entries, power of 2.
|
||||
_M_num_bins = static_cast<_BinIndex>(
|
||||
std::min(__n, static_cast<_DifferenceType>(__num_bins_cache)));
|
||||
// Power of 2 and at least one element per bin, at most the TLB size.
|
||||
// No more buckets than TLB entries, power of 2.
|
||||
__num_bins = static_cast<_BinIndex>
|
||||
(std::min(__n, static_cast<_DifferenceType>(__num_bins_cache)));
|
||||
// Power of 2 and at least one element per bin, at most the TLB size.
|
||||
#if _GLIBCXX_RANDOM_SHUFFLE_CONSIDER_TLB
|
||||
// 2 TLB entries needed per bin.
|
||||
_M_num_bins = std::min(
|
||||
static_cast<_DifferenceType>(__s.TLB_size / 2), _M_num_bins);
|
||||
// 2 TLB entries needed per bin.
|
||||
__num_bins = std::min(static_cast<_DifferenceType>(__s.TLB_size / 2),
|
||||
__num_bins);
|
||||
#endif
|
||||
_M_num_bins = __round_up_to_pow2(_M_num_bins);
|
||||
__num_bins = __round_up_to_pow2(__num_bins);
|
||||
#if _GLIBCXX_RANDOM_SHUFFLE_CONSIDER_L1
|
||||
}
|
||||
}
|
||||
#endif
|
||||
|
||||
__num_threads = std::min<_BinIndex>(__num_threads, _M_num_bins);
|
||||
__num_threads = std::min<_BinIndex>(__num_threads, __num_bins);
|
||||
|
||||
if (__num_threads <= 1)
|
||||
return __sequential_random_shuffle(__begin, __end, __rng);
|
||||
if (__num_threads <= 1)
|
||||
return __sequential_random_shuffle(__begin, __end, __rng);
|
||||
|
||||
_DRandomShufflingGlobalData<_RAIter> _M_sd(__begin);
|
||||
_DRSSorterPU<_RAIter, _RandomNumber >* __pus;
|
||||
_DifferenceType* _M_starts;
|
||||
_DRandomShufflingGlobalData<_RAIter> __sd(__begin);
|
||||
_DRSSorterPU<_RAIter, _RandomNumber >* __pus;
|
||||
_DifferenceType* __starts;
|
||||
|
||||
# pragma omp parallel num_threads(__num_threads)
|
||||
# pragma omp parallel num_threads(__num_threads)
|
||||
{
|
||||
_ThreadIndex __num_threads = omp_get_num_threads();
|
||||
_ThreadIndex __num_threads = omp_get_num_threads();
|
||||
# pragma omp single
|
||||
{
|
||||
__pus = new _DRSSorterPU<_RAIter, _RandomNumber>
|
||||
[__num_threads];
|
||||
{
|
||||
__pus = new _DRSSorterPU<_RAIter, _RandomNumber>[__num_threads];
|
||||
|
||||
__sd._M_temporaries = new _ValueType*[__num_threads];
|
||||
__sd._M_dist = new _DifferenceType*[__num_bins + 1];
|
||||
__sd._M_bin_proc = new _ThreadIndex[__num_bins];
|
||||
for (_BinIndex __b = 0; __b < __num_bins + 1; ++__b)
|
||||
__sd._M_dist[__b] = new _DifferenceType[__num_threads + 1];
|
||||
for (_BinIndex __b = 0; __b < (__num_bins + 1); ++__b)
|
||||
{
|
||||
__sd._M_dist[0][0] = 0;
|
||||
__sd._M_dist[__b][0] = 0;
|
||||
}
|
||||
__starts = __sd._M_starts = new _DifferenceType[__num_threads + 1];
|
||||
int __bin_cursor = 0;
|
||||
__sd._M_num_bins = __num_bins;
|
||||
__sd._M_num_bits = __rd_log2(__num_bins);
|
||||
|
||||
_M_sd._M_temporaries = new _ValueType*[__num_threads];
|
||||
_M_sd._M_dist = new _DifferenceType*[_M_num_bins + 1];
|
||||
_M_sd._M_bin_proc = new _ThreadIndex[_M_num_bins];
|
||||
for (_BinIndex __b = 0; __b < _M_num_bins + 1; ++__b)
|
||||
_M_sd._M_dist[__b] = new _DifferenceType[__num_threads + 1];
|
||||
for (_BinIndex __b = 0; __b < (_M_num_bins + 1); ++__b)
|
||||
{
|
||||
_M_sd._M_dist[0][0] = 0;
|
||||
_M_sd._M_dist[__b][0] = 0;
|
||||
}
|
||||
_M_starts = _M_sd._M_starts
|
||||
= new _DifferenceType[__num_threads + 1];
|
||||
int bin_cursor = 0;
|
||||
_M_sd._M_num_bins = _M_num_bins;
|
||||
_M_sd._M_num_bits = __rd_log2(_M_num_bins);
|
||||
_DifferenceType __chunk_length = __n / __num_threads,
|
||||
__split = __n % __num_threads,
|
||||
__start = 0;
|
||||
_DifferenceType __bin_chunk_length = __num_bins / __num_threads,
|
||||
__bin_split = __num_bins % __num_threads;
|
||||
for (_ThreadIndex __i = 0; __i < __num_threads; ++__i)
|
||||
{
|
||||
__starts[__i] = __start;
|
||||
__start += (__i < __split
|
||||
? (__chunk_length + 1) : __chunk_length);
|
||||
int __j = __pus[__i]._M_bins_begin = __bin_cursor;
|
||||
|
||||
_DifferenceType __chunk_length = __n / __num_threads,
|
||||
__split = __n % __num_threads, __start = 0;
|
||||
_DifferenceType bin_chunk_length = _M_num_bins / __num_threads,
|
||||
bin_split = _M_num_bins % __num_threads;
|
||||
for (_ThreadIndex __i = 0; __i < __num_threads; ++__i)
|
||||
{
|
||||
_M_starts[__i] = __start;
|
||||
__start += (__i < __split)
|
||||
? (__chunk_length + 1) : __chunk_length;
|
||||
int __j = __pus[__i]._M_bins_begin = bin_cursor;
|
||||
|
||||
// Range of bins for this processor.
|
||||
bin_cursor += (__i < bin_split) ?
|
||||
(bin_chunk_length + 1) : bin_chunk_length;
|
||||
__pus[__i].__bins_end = bin_cursor;
|
||||
for (; __j < bin_cursor; ++__j)
|
||||
_M_sd._M_bin_proc[__j] = __i;
|
||||
__pus[__i]._M_num_threads = __num_threads;
|
||||
__pus[__i]._M_seed =
|
||||
__rng(std::numeric_limits<uint32_t>::max());
|
||||
__pus[__i]._M_sd = &_M_sd;
|
||||
}
|
||||
_M_starts[__num_threads] = __start;
|
||||
} //single
|
||||
// Now shuffle in parallel.
|
||||
__parallel_random_shuffle_drs_pu(__pus);
|
||||
// Range of bins for this processor.
|
||||
__bin_cursor += (__i < __bin_split
|
||||
? (__bin_chunk_length + 1)
|
||||
: __bin_chunk_length);
|
||||
__pus[__i].__bins_end = __bin_cursor;
|
||||
for (; __j < __bin_cursor; ++__j)
|
||||
__sd._M_bin_proc[__j] = __i;
|
||||
__pus[__i]._M_num_threads = __num_threads;
|
||||
__pus[__i]._M_seed = __rng(std::numeric_limits<uint32_t>::max());
|
||||
__pus[__i]._M_sd = &__sd;
|
||||
}
|
||||
__starts[__num_threads] = __start;
|
||||
} //single
|
||||
// Now shuffle in parallel.
|
||||
__parallel_random_shuffle_drs_pu(__pus);
|
||||
} // parallel
|
||||
|
||||
delete[] _M_starts;
|
||||
delete[] _M_sd._M_bin_proc;
|
||||
for (int __s = 0; __s < (_M_num_bins + 1); ++__s)
|
||||
delete[] _M_sd._M_dist[__s];
|
||||
delete[] _M_sd._M_dist;
|
||||
delete[] _M_sd._M_temporaries;
|
||||
delete[] __starts;
|
||||
delete[] __sd._M_bin_proc;
|
||||
for (int __s = 0; __s < (__num_bins + 1); ++__s)
|
||||
delete[] __sd._M_dist[__s];
|
||||
delete[] __sd._M_dist;
|
||||
delete[] __sd._M_temporaries;
|
||||
|
||||
delete[] __pus;
|
||||
}
|
||||
delete[] __pus;
|
||||
}
|
||||
|
||||
/** @brief Sequential cache-efficient random shuffle.
|
||||
* @param __begin Begin iterator of sequence.
|
||||
* @param __end End iterator of sequence.
|
||||
* @param __rng Random number generator to use.
|
||||
*/
|
||||
template<typename _RAIter, typename _RandomNumberGenerator>
|
||||
void
|
||||
__sequential_random_shuffle(_RAIter __begin,
|
||||
_RAIter __end,
|
||||
_RandomNumberGenerator& __rng)
|
||||
{
|
||||
typedef std::iterator_traits<_RAIter> _TraitsType;
|
||||
typedef typename _TraitsType::value_type _ValueType;
|
||||
typedef typename _TraitsType::difference_type _DifferenceType;
|
||||
/** @brief Sequential cache-efficient random shuffle.
|
||||
* @param __begin Begin iterator of sequence.
|
||||
* @param __end End iterator of sequence.
|
||||
* @param __rng Random number generator to use.
|
||||
*/
|
||||
template<typename _RAIter, typename _RandomNumberGenerator>
|
||||
void
|
||||
__sequential_random_shuffle(_RAIter __begin, _RAIter __end,
|
||||
_RandomNumberGenerator& __rng)
|
||||
{
|
||||
typedef std::iterator_traits<_RAIter> _TraitsType;
|
||||
typedef typename _TraitsType::value_type _ValueType;
|
||||
typedef typename _TraitsType::difference_type _DifferenceType;
|
||||
|
||||
_DifferenceType __n = __end - __begin;
|
||||
const _Settings& __s = _Settings::get();
|
||||
_DifferenceType __n = __end - __begin;
|
||||
const _Settings& __s = _Settings::get();
|
||||
|
||||
_BinIndex _M_num_bins, __num_bins_cache;
|
||||
_BinIndex __num_bins, __num_bins_cache;
|
||||
|
||||
#if _GLIBCXX_RANDOM_SHUFFLE_CONSIDER_L1
|
||||
// Try the L1 cache first, must fit into L1.
|
||||
__num_bins_cache =
|
||||
std::max<_DifferenceType>
|
||||
(1, __n / (__s.L1_cache_size_lb / sizeof(_ValueType)));
|
||||
__num_bins_cache = __round_up_to_pow2(__num_bins_cache);
|
||||
// Try the L1 cache first, must fit into L1.
|
||||
__num_bins_cache = std::max<_DifferenceType>
|
||||
(1, __n / (__s.L1_cache_size_lb / sizeof(_ValueType)));
|
||||
__num_bins_cache = __round_up_to_pow2(__num_bins_cache);
|
||||
|
||||
// No more buckets than TLB entries, power of 2
|
||||
// Power of 2 and at least one element per bin, at most the TLB size
|
||||
_M_num_bins = std::min(__n, (_DifferenceType)__num_bins_cache);
|
||||
// No more buckets than TLB entries, power of 2
|
||||
// Power of 2 and at least one element per bin, at most the TLB size
|
||||
__num_bins = std::min(__n, (_DifferenceType)__num_bins_cache);
|
||||
#if _GLIBCXX_RANDOM_SHUFFLE_CONSIDER_TLB
|
||||
// 2 TLB entries needed per bin
|
||||
_M_num_bins = std::min((_DifferenceType)__s.TLB_size / 2, _M_num_bins);
|
||||
// 2 TLB entries needed per bin
|
||||
__num_bins = std::min((_DifferenceType)__s.TLB_size / 2, __num_bins);
|
||||
#endif
|
||||
_M_num_bins = __round_up_to_pow2(_M_num_bins);
|
||||
__num_bins = __round_up_to_pow2(__num_bins);
|
||||
|
||||
if (_M_num_bins < __num_bins_cache)
|
||||
{
|
||||
if (__num_bins < __num_bins_cache)
|
||||
{
|
||||
#endif
|
||||
// Now try the L2 cache, must fit into L2.
|
||||
__num_bins_cache =
|
||||
static_cast<_BinIndex>(std::max<_DifferenceType>(
|
||||
1, __n / (__s.L2_cache_size / sizeof(_ValueType))));
|
||||
__num_bins_cache = __round_up_to_pow2(__num_bins_cache);
|
||||
// Now try the L2 cache, must fit into L2.
|
||||
__num_bins_cache = static_cast<_BinIndex>
|
||||
(std::max<_DifferenceType>(1, __n / (__s.L2_cache_size
|
||||
/ sizeof(_ValueType))));
|
||||
__num_bins_cache = __round_up_to_pow2(__num_bins_cache);
|
||||
|
||||
// No more buckets than TLB entries, power of 2
|
||||
// Power of 2 and at least one element per bin, at most the TLB size.
|
||||
_M_num_bins = static_cast<_BinIndex>
|
||||
(std::min(__n, static_cast<_DifferenceType>(__num_bins_cache)));
|
||||
// No more buckets than TLB entries, power of 2
|
||||
// Power of 2 and at least one element per bin, at most the TLB size.
|
||||
__num_bins = static_cast<_BinIndex>
|
||||
(std::min(__n, static_cast<_DifferenceType>(__num_bins_cache)));
|
||||
|
||||
#if _GLIBCXX_RANDOM_SHUFFLE_CONSIDER_TLB
|
||||
// 2 TLB entries needed per bin
|
||||
_M_num_bins =
|
||||
std::min<_DifferenceType>(__s.TLB_size / 2, _M_num_bins);
|
||||
// 2 TLB entries needed per bin
|
||||
__num_bins = std::min<_DifferenceType>(__s.TLB_size / 2, __num_bins);
|
||||
#endif
|
||||
_M_num_bins = __round_up_to_pow2(_M_num_bins);
|
||||
__num_bins = __round_up_to_pow2(__num_bins);
|
||||
#if _GLIBCXX_RANDOM_SHUFFLE_CONSIDER_L1
|
||||
}
|
||||
}
|
||||
#endif
|
||||
|
||||
int _M_num_bits = __rd_log2(_M_num_bins);
|
||||
int __num_bits = __rd_log2(__num_bins);
|
||||
|
||||
if (_M_num_bins > 1)
|
||||
{
|
||||
_ValueType* __target = static_cast<_ValueType*>(
|
||||
::operator new(sizeof(_ValueType) * __n));
|
||||
_BinIndex* __oracles = new _BinIndex[__n];
|
||||
_DifferenceType* __dist0 = new _DifferenceType[_M_num_bins + 1],
|
||||
* __dist1 = new _DifferenceType[_M_num_bins + 1];
|
||||
if (__num_bins > 1)
|
||||
{
|
||||
_ValueType* __target =
|
||||
static_cast<_ValueType*>(::operator new(sizeof(_ValueType) * __n));
|
||||
_BinIndex* __oracles = new _BinIndex[__n];
|
||||
_DifferenceType* __dist0 = new _DifferenceType[__num_bins + 1],
|
||||
* __dist1 = new _DifferenceType[__num_bins + 1];
|
||||
|
||||
for (int __b = 0; __b < _M_num_bins + 1; ++__b)
|
||||
__dist0[__b] = 0;
|
||||
for (int __b = 0; __b < __num_bins + 1; ++__b)
|
||||
__dist0[__b] = 0;
|
||||
|
||||
_RandomNumber bitrng(__rng(0xFFFFFFFF));
|
||||
_RandomNumber __bitrng(__rng(0xFFFFFFFF));
|
||||
|
||||
for (_DifferenceType __i = 0; __i < __n; ++__i)
|
||||
{
|
||||
_BinIndex __oracle = __random_number_pow2(_M_num_bits, bitrng);
|
||||
__oracles[__i] = __oracle;
|
||||
for (_DifferenceType __i = 0; __i < __n; ++__i)
|
||||
{
|
||||
_BinIndex __oracle = __random_number_pow2(__num_bits, __bitrng);
|
||||
__oracles[__i] = __oracle;
|
||||
|
||||
// To allow prefix (partial) sum.
|
||||
++(__dist0[__oracle + 1]);
|
||||
}
|
||||
// To allow prefix (partial) sum.
|
||||
++(__dist0[__oracle + 1]);
|
||||
}
|
||||
|
||||
// Sum up bins.
|
||||
__gnu_sequential::
|
||||
partial_sum(__dist0, __dist0 + _M_num_bins + 1, __dist0);
|
||||
// Sum up bins.
|
||||
__gnu_sequential::partial_sum(__dist0, __dist0 + __num_bins + 1,
|
||||
__dist0);
|
||||
|
||||
for (int __b = 0; __b < _M_num_bins + 1; ++__b)
|
||||
__dist1[__b] = __dist0[__b];
|
||||
for (int __b = 0; __b < __num_bins + 1; ++__b)
|
||||
__dist1[__b] = __dist0[__b];
|
||||
|
||||
// Distribute according to oracles.
|
||||
for (_DifferenceType __i = 0; __i < __n; ++__i)
|
||||
::new(&(__target[(__dist0[__oracles[__i]])++]))
|
||||
_ValueType(*(__begin + __i));
|
||||
// Distribute according to oracles.
|
||||
for (_DifferenceType __i = 0; __i < __n; ++__i)
|
||||
::new(&(__target[(__dist0[__oracles[__i]])++]))
|
||||
_ValueType(*(__begin + __i));
|
||||
|
||||
for (int __b = 0; __b < _M_num_bins; ++__b)
|
||||
{
|
||||
__sequential_random_shuffle(__target + __dist1[__b],
|
||||
__target + __dist1[__b + 1],
|
||||
__rng);
|
||||
}
|
||||
for (int __b = 0; __b < __num_bins; ++__b)
|
||||
__sequential_random_shuffle(__target + __dist1[__b],
|
||||
__target + __dist1[__b + 1], __rng);
|
||||
|
||||
// Copy elements back.
|
||||
std::copy(__target, __target + __n, __begin);
|
||||
// Copy elements back.
|
||||
std::copy(__target, __target + __n, __begin);
|
||||
|
||||
delete[] __dist0;
|
||||
delete[] __dist1;
|
||||
delete[] __oracles;
|
||||
::operator delete(__target);
|
||||
}
|
||||
else
|
||||
__gnu_sequential::random_shuffle(__begin, __end, __rng);
|
||||
}
|
||||
|
||||
/** @brief Parallel random public call.
|
||||
* @param __begin Begin iterator of sequence.
|
||||
* @param __end End iterator of sequence.
|
||||
* @param __rng Random number generator to use.
|
||||
*/
|
||||
template<typename _RAIter, typename _RandomNumberGenerator>
|
||||
inline void
|
||||
__parallel_random_shuffle(_RAIter __begin,
|
||||
_RAIter __end,
|
||||
_RandomNumberGenerator __rng = _RandomNumber())
|
||||
{
|
||||
typedef std::iterator_traits<_RAIter> _TraitsType;
|
||||
typedef typename _TraitsType::difference_type _DifferenceType;
|
||||
_DifferenceType __n = __end - __begin;
|
||||
__parallel_random_shuffle_drs(
|
||||
__begin, __end, __n, __get_max_threads(), __rng) ;
|
||||
}
|
||||
delete[] __dist0;
|
||||
delete[] __dist1;
|
||||
delete[] __oracles;
|
||||
::operator delete(__target);
|
||||
}
|
||||
else
|
||||
__gnu_sequential::random_shuffle(__begin, __end, __rng);
|
||||
}
|
||||
|
||||
/** @brief Parallel random public call.
|
||||
* @param __begin Begin iterator of sequence.
|
||||
* @param __end End iterator of sequence.
|
||||
* @param __rng Random number generator to use.
|
||||
*/
|
||||
template<typename _RAIter, typename _RandomNumberGenerator>
|
||||
inline void
|
||||
__parallel_random_shuffle(_RAIter __begin, _RAIter __end,
|
||||
_RandomNumberGenerator __rng = _RandomNumber())
|
||||
{
|
||||
typedef std::iterator_traits<_RAIter> _TraitsType;
|
||||
typedef typename _TraitsType::difference_type _DifferenceType;
|
||||
_DifferenceType __n = __end - __begin;
|
||||
__parallel_random_shuffle_drs(__begin, __end, __n,
|
||||
__get_max_threads(), __rng);
|
||||
}
|
||||
}
|
||||
|
||||
#endif /* _GLIBCXX_PARALLEL_RANDOM_SHUFFLE_H */
|
||||
|
@ -38,7 +38,6 @@
|
||||
#include <parallel/parallel.h>
|
||||
#include <parallel/equally_split.h>
|
||||
|
||||
|
||||
namespace __gnu_parallel
|
||||
{
|
||||
/**
|
||||
@ -47,24 +46,24 @@ namespace __gnu_parallel
|
||||
* @param __length Length of sequence to search for.
|
||||
* @param __advances Returned __offsets.
|
||||
*/
|
||||
template<typename _RAIter, typename _DifferenceTp>
|
||||
void
|
||||
__calc_borders(_RAIter __elements, _DifferenceTp __length,
|
||||
_DifferenceTp* __off)
|
||||
{
|
||||
typedef _DifferenceTp _DifferenceType;
|
||||
template<typename _RAIter, typename _DifferenceTp>
|
||||
void
|
||||
__calc_borders(_RAIter __elements, _DifferenceTp __length,
|
||||
_DifferenceTp* __off)
|
||||
{
|
||||
typedef _DifferenceTp _DifferenceType;
|
||||
|
||||
__off[0] = -1;
|
||||
if (__length > 1)
|
||||
__off[1] = 0;
|
||||
_DifferenceType __k = 0;
|
||||
for (_DifferenceType __j = 2; __j <= __length; __j++)
|
||||
{
|
||||
while ((__k >= 0) && !(__elements[__k] == __elements[__j-1]))
|
||||
__k = __off[__k];
|
||||
__off[__j] = ++__k;
|
||||
}
|
||||
}
|
||||
__off[0] = -1;
|
||||
if (__length > 1)
|
||||
__off[1] = 0;
|
||||
_DifferenceType __k = 0;
|
||||
for (_DifferenceType __j = 2; __j <= __length; __j++)
|
||||
{
|
||||
while ((__k >= 0) && !(__elements[__k] == __elements[__j-1]))
|
||||
__k = __off[__k];
|
||||
__off[__j] = ++__k;
|
||||
}
|
||||
}
|
||||
|
||||
// Generic parallel find algorithm (requires random access iterator).
|
||||
|
||||
@ -75,100 +74,99 @@ template<typename _RAIter, typename _DifferenceTp>
|
||||
* @param __end2 End iterator of second sequence.
|
||||
* @param __pred Find predicate.
|
||||
* @return Place of finding in first sequences. */
|
||||
template<typename __RAIter1,
|
||||
typename __RAIter2,
|
||||
typename _Pred>
|
||||
__RAIter1
|
||||
__search_template(__RAIter1 __begin1, __RAIter1 __end1,
|
||||
__RAIter2 __begin2, __RAIter2 __end2,
|
||||
_Pred __pred)
|
||||
{
|
||||
typedef std::iterator_traits<__RAIter1> _TraitsType;
|
||||
typedef typename _TraitsType::difference_type _DifferenceType;
|
||||
template<typename __RAIter1,
|
||||
typename __RAIter2,
|
||||
typename _Pred>
|
||||
__RAIter1
|
||||
__search_template(__RAIter1 __begin1, __RAIter1 __end1,
|
||||
__RAIter2 __begin2, __RAIter2 __end2,
|
||||
_Pred __pred)
|
||||
{
|
||||
typedef std::iterator_traits<__RAIter1> _TraitsType;
|
||||
typedef typename _TraitsType::difference_type _DifferenceType;
|
||||
|
||||
_GLIBCXX_CALL((__end1 - __begin1) + (__end2 - __begin2));
|
||||
_GLIBCXX_CALL((__end1 - __begin1) + (__end2 - __begin2));
|
||||
|
||||
_DifferenceType __pattern_length = __end2 - __begin2;
|
||||
_DifferenceType __pattern_length = __end2 - __begin2;
|
||||
|
||||
// Pattern too short.
|
||||
if(__pattern_length <= 0)
|
||||
return __end1;
|
||||
// Pattern too short.
|
||||
if(__pattern_length <= 0)
|
||||
return __end1;
|
||||
|
||||
// Last point to start search.
|
||||
_DifferenceType __input_length = (__end1 - __begin1) - __pattern_length;
|
||||
// Last point to start search.
|
||||
_DifferenceType __input_length = (__end1 - __begin1) - __pattern_length;
|
||||
|
||||
// Where is first occurrence of pattern? defaults to end.
|
||||
_DifferenceType __result = (__end1 - __begin1);
|
||||
_DifferenceType *__splitters;
|
||||
// Where is first occurrence of pattern? defaults to end.
|
||||
_DifferenceType __result = (__end1 - __begin1);
|
||||
_DifferenceType *__splitters;
|
||||
|
||||
// Pattern too long.
|
||||
if (__input_length < 0)
|
||||
return __end1;
|
||||
// Pattern too long.
|
||||
if (__input_length < 0)
|
||||
return __end1;
|
||||
|
||||
omp_lock_t __result_lock;
|
||||
omp_init_lock(&__result_lock);
|
||||
omp_lock_t __result_lock;
|
||||
omp_init_lock(&__result_lock);
|
||||
|
||||
_ThreadIndex __num_threads =
|
||||
std::max<_DifferenceType>(1,
|
||||
std::min<_DifferenceType>(__input_length, __get_max_threads()));
|
||||
_ThreadIndex __num_threads = std::max<_DifferenceType>
|
||||
(1, std::min<_DifferenceType>(__input_length,
|
||||
__get_max_threads()));
|
||||
|
||||
_DifferenceType __advances[__pattern_length];
|
||||
__calc_borders(__begin2, __pattern_length, __advances);
|
||||
_DifferenceType __advances[__pattern_length];
|
||||
__calc_borders(__begin2, __pattern_length, __advances);
|
||||
|
||||
# pragma omp parallel num_threads(__num_threads)
|
||||
# pragma omp parallel num_threads(__num_threads)
|
||||
{
|
||||
# pragma omp single
|
||||
{
|
||||
__num_threads = omp_get_num_threads();
|
||||
__splitters = new _DifferenceType[__num_threads + 1];
|
||||
equally_split(__input_length, __num_threads, __splitters);
|
||||
}
|
||||
{
|
||||
__num_threads = omp_get_num_threads();
|
||||
__splitters = new _DifferenceType[__num_threads + 1];
|
||||
equally_split(__input_length, __num_threads, __splitters);
|
||||
}
|
||||
|
||||
_ThreadIndex __iam = omp_get_thread_num();
|
||||
_ThreadIndex __iam = omp_get_thread_num();
|
||||
|
||||
_DifferenceType __start = __splitters[__iam],
|
||||
__stop = __splitters[__iam + 1];
|
||||
_DifferenceType __start = __splitters[__iam],
|
||||
__stop = __splitters[__iam + 1];
|
||||
|
||||
_DifferenceType __pos_in_pattern = 0;
|
||||
bool __found_pattern = false;
|
||||
_DifferenceType __pos_in_pattern = 0;
|
||||
bool __found_pattern = false;
|
||||
|
||||
while (__start <= __stop && !__found_pattern)
|
||||
{
|
||||
// Get new value of result.
|
||||
#pragma omp flush(__result)
|
||||
// No chance for this thread to find first occurrence.
|
||||
if (__result < __start)
|
||||
break;
|
||||
while (__pred(__begin1[__start + __pos_in_pattern],
|
||||
__begin2[__pos_in_pattern]))
|
||||
{
|
||||
++__pos_in_pattern;
|
||||
if (__pos_in_pattern == __pattern_length)
|
||||
{
|
||||
// Found new candidate for result.
|
||||
omp_set_lock(&__result_lock);
|
||||
__result = std::min(__result, __start);
|
||||
omp_unset_lock(&__result_lock);
|
||||
while (__start <= __stop && !__found_pattern)
|
||||
{
|
||||
// Get new value of result.
|
||||
#pragma omp flush(__result)
|
||||
// No chance for this thread to find first occurrence.
|
||||
if (__result < __start)
|
||||
break;
|
||||
while (__pred(__begin1[__start + __pos_in_pattern],
|
||||
__begin2[__pos_in_pattern]))
|
||||
{
|
||||
++__pos_in_pattern;
|
||||
if (__pos_in_pattern == __pattern_length)
|
||||
{
|
||||
// Found new candidate for result.
|
||||
omp_set_lock(&__result_lock);
|
||||
__result = std::min(__result, __start);
|
||||
omp_unset_lock(&__result_lock);
|
||||
|
||||
__found_pattern = true;
|
||||
break;
|
||||
}
|
||||
}
|
||||
// Make safe jump.
|
||||
__start += (__pos_in_pattern - __advances[__pos_in_pattern]);
|
||||
__pos_in_pattern =
|
||||
(__advances[__pos_in_pattern] < 0) ?
|
||||
0 : __advances[__pos_in_pattern];
|
||||
}
|
||||
__found_pattern = true;
|
||||
break;
|
||||
}
|
||||
}
|
||||
// Make safe jump.
|
||||
__start += (__pos_in_pattern - __advances[__pos_in_pattern]);
|
||||
__pos_in_pattern = (__advances[__pos_in_pattern] < 0
|
||||
? 0 : __advances[__pos_in_pattern]);
|
||||
}
|
||||
} //parallel
|
||||
|
||||
omp_destroy_lock(&__result_lock);
|
||||
omp_destroy_lock(&__result_lock);
|
||||
|
||||
delete[] __splitters;
|
||||
|
||||
// Return iterator on found element.
|
||||
return (__begin1 + __result);
|
||||
}
|
||||
delete[] __splitters;
|
||||
|
||||
// Return iterator on found element.
|
||||
return (__begin1 + __result);
|
||||
}
|
||||
} // end namespace
|
||||
|
||||
#endif /* _GLIBCXX_PARALLEL_SEARCH_H */
|
||||
|
@ -41,490 +41,489 @@
|
||||
|
||||
namespace __gnu_parallel
|
||||
{
|
||||
template<typename _IIter, typename _OutputIterator>
|
||||
_OutputIterator
|
||||
copy_tail(std::pair<_IIter, _IIter> __b,
|
||||
std::pair<_IIter, _IIter> __e, _OutputIterator __r)
|
||||
{
|
||||
if (__b.first != __e.first)
|
||||
{
|
||||
do
|
||||
{
|
||||
*__r++ = *__b.first++;
|
||||
}
|
||||
while (__b.first != __e.first);
|
||||
}
|
||||
else
|
||||
{
|
||||
while (__b.second != __e.second)
|
||||
*__r++ = *__b.second++;
|
||||
}
|
||||
return __r;
|
||||
}
|
||||
|
||||
template<typename _IIter,
|
||||
typename _OutputIterator,
|
||||
typename _Compare>
|
||||
struct symmetric_difference_func
|
||||
{
|
||||
typedef std::iterator_traits<_IIter> _TraitsType;
|
||||
typedef typename _TraitsType::difference_type _DifferenceType;
|
||||
typedef typename std::pair<_IIter, _IIter> _IteratorPair;
|
||||
|
||||
symmetric_difference_func(_Compare __comp) : _M_comp(__comp) {}
|
||||
|
||||
_Compare _M_comp;
|
||||
|
||||
template<typename _IIter, typename _OutputIterator>
|
||||
_OutputIterator
|
||||
_M_invoke(_IIter __a, _IIter __b,
|
||||
_IIter __c, _IIter d,
|
||||
_OutputIterator __r) const
|
||||
__copy_tail(std::pair<_IIter, _IIter> __b,
|
||||
std::pair<_IIter, _IIter> __e, _OutputIterator __r)
|
||||
{
|
||||
while (__a != __b && __c != d)
|
||||
{
|
||||
if (_M_comp(*__a, *__c))
|
||||
if (__b.first != __e.first)
|
||||
{
|
||||
do
|
||||
{
|
||||
*__r = *__a;
|
||||
++__a;
|
||||
++__r;
|
||||
*__r++ = *__b.first++;
|
||||
}
|
||||
else if (_M_comp(*__c, *__a))
|
||||
{
|
||||
*__r = *__c;
|
||||
++__c;
|
||||
++__r;
|
||||
}
|
||||
else
|
||||
{
|
||||
++__a;
|
||||
++__c;
|
||||
}
|
||||
}
|
||||
return std::copy(__c, d, std::copy(__a, __b, __r));
|
||||
}
|
||||
|
||||
_DifferenceType
|
||||
__count(_IIter __a, _IIter __b,
|
||||
_IIter __c, _IIter d) const
|
||||
{
|
||||
_DifferenceType __counter = 0;
|
||||
|
||||
while (__a != __b && __c != d)
|
||||
{
|
||||
if (_M_comp(*__a, *__c))
|
||||
{
|
||||
++__a;
|
||||
++__counter;
|
||||
}
|
||||
else if (_M_comp(*__c, *__a))
|
||||
{
|
||||
++__c;
|
||||
++__counter;
|
||||
}
|
||||
else
|
||||
{
|
||||
++__a;
|
||||
++__c;
|
||||
}
|
||||
}
|
||||
|
||||
return __counter + (__b - __a) + (d - __c);
|
||||
}
|
||||
|
||||
_OutputIterator
|
||||
__first_empty(_IIter __c, _IIter d, _OutputIterator __out) const
|
||||
{ return std::copy(__c, d, __out); }
|
||||
|
||||
_OutputIterator
|
||||
__second_empty(_IIter __a, _IIter __b, _OutputIterator __out) const
|
||||
{ return std::copy(__a, __b, __out); }
|
||||
};
|
||||
|
||||
|
||||
template<typename _IIter,
|
||||
typename _OutputIterator,
|
||||
typename _Compare>
|
||||
struct __difference_func
|
||||
{
|
||||
typedef std::iterator_traits<_IIter> _TraitsType;
|
||||
typedef typename _TraitsType::difference_type _DifferenceType;
|
||||
typedef typename std::pair<_IIter, _IIter> _IteratorPair;
|
||||
|
||||
__difference_func(_Compare __comp) : _M_comp(__comp) {}
|
||||
|
||||
_Compare _M_comp;
|
||||
|
||||
_OutputIterator
|
||||
_M_invoke(_IIter __a, _IIter __b, _IIter __c, _IIter d,
|
||||
_OutputIterator __r) const
|
||||
{
|
||||
while (__a != __b && __c != d)
|
||||
{
|
||||
if (_M_comp(*__a, *__c))
|
||||
{
|
||||
*__r = *__a;
|
||||
++__a;
|
||||
++__r;
|
||||
}
|
||||
else if (_M_comp(*__c, *__a))
|
||||
{ ++__c; }
|
||||
else
|
||||
{
|
||||
++__a;
|
||||
++__c;
|
||||
}
|
||||
}
|
||||
return std::copy(__a, __b, __r);
|
||||
}
|
||||
|
||||
_DifferenceType
|
||||
__count(_IIter __a, _IIter __b,
|
||||
_IIter __c, _IIter d) const
|
||||
{
|
||||
_DifferenceType __counter = 0;
|
||||
|
||||
while (__a != __b && __c != d)
|
||||
{
|
||||
if (_M_comp(*__a, *__c))
|
||||
{
|
||||
++__a;
|
||||
++__counter;
|
||||
}
|
||||
else if (_M_comp(*__c, *__a))
|
||||
{ ++__c; }
|
||||
else
|
||||
{ ++__a; ++__c; }
|
||||
}
|
||||
|
||||
return __counter + (__b - __a);
|
||||
}
|
||||
|
||||
inline _OutputIterator
|
||||
__first_empty(_IIter __c, _IIter d, _OutputIterator __out) const
|
||||
{ return __out; }
|
||||
|
||||
inline _OutputIterator
|
||||
__second_empty(_IIter __a, _IIter __b, _OutputIterator __out) const
|
||||
{ return std::copy(__a, __b, __out); }
|
||||
};
|
||||
|
||||
|
||||
template<typename _IIter,
|
||||
typename _OutputIterator,
|
||||
typename _Compare>
|
||||
struct __intersection_func
|
||||
{
|
||||
typedef std::iterator_traits<_IIter> _TraitsType;
|
||||
typedef typename _TraitsType::difference_type _DifferenceType;
|
||||
typedef typename std::pair<_IIter, _IIter> _IteratorPair;
|
||||
|
||||
__intersection_func(_Compare __comp) : _M_comp(__comp) {}
|
||||
|
||||
_Compare _M_comp;
|
||||
|
||||
_OutputIterator
|
||||
_M_invoke(_IIter __a, _IIter __b, _IIter __c, _IIter d,
|
||||
_OutputIterator __r) const
|
||||
{
|
||||
while (__a != __b && __c != d)
|
||||
{
|
||||
if (_M_comp(*__a, *__c))
|
||||
{ ++__a; }
|
||||
else if (_M_comp(*__c, *__a))
|
||||
{ ++__c; }
|
||||
else
|
||||
{
|
||||
*__r = *__a;
|
||||
++__a;
|
||||
++__c;
|
||||
++__r;
|
||||
}
|
||||
}
|
||||
|
||||
while (__b.first != __e.first);
|
||||
}
|
||||
else
|
||||
{
|
||||
while (__b.second != __e.second)
|
||||
*__r++ = *__b.second++;
|
||||
}
|
||||
return __r;
|
||||
}
|
||||
|
||||
_DifferenceType
|
||||
__count(_IIter __a, _IIter __b,
|
||||
_IIter __c, _IIter d) const
|
||||
template<typename _IIter,
|
||||
typename _OutputIterator,
|
||||
typename _Compare>
|
||||
struct __symmetric_difference_func
|
||||
{
|
||||
_DifferenceType __counter = 0;
|
||||
typedef std::iterator_traits<_IIter> _TraitsType;
|
||||
typedef typename _TraitsType::difference_type _DifferenceType;
|
||||
typedef typename std::pair<_IIter, _IIter> _IteratorPair;
|
||||
|
||||
while (__a != __b && __c != d)
|
||||
{
|
||||
if (_M_comp(*__a, *__c))
|
||||
{ ++__a; }
|
||||
else if (_M_comp(*__c, *__a))
|
||||
{ ++__c; }
|
||||
else
|
||||
{
|
||||
++__a;
|
||||
++__c;
|
||||
++__counter;
|
||||
}
|
||||
}
|
||||
__symmetric_difference_func(_Compare __comp) : _M_comp(__comp) {}
|
||||
|
||||
return __counter;
|
||||
}
|
||||
_Compare _M_comp;
|
||||
|
||||
inline _OutputIterator
|
||||
__first_empty(_IIter __c, _IIter d, _OutputIterator __out) const
|
||||
{ return __out; }
|
||||
_OutputIterator
|
||||
_M_invoke(_IIter __a, _IIter __b, _IIter __c, _IIter __d,
|
||||
_OutputIterator __r) const
|
||||
{
|
||||
while (__a != __b && __c != __d)
|
||||
{
|
||||
if (_M_comp(*__a, *__c))
|
||||
{
|
||||
*__r = *__a;
|
||||
++__a;
|
||||
++__r;
|
||||
}
|
||||
else if (_M_comp(*__c, *__a))
|
||||
{
|
||||
*__r = *__c;
|
||||
++__c;
|
||||
++__r;
|
||||
}
|
||||
else
|
||||
{
|
||||
++__a;
|
||||
++__c;
|
||||
}
|
||||
}
|
||||
return std::copy(__c, __d, std::copy(__a, __b, __r));
|
||||
}
|
||||
|
||||
inline _OutputIterator
|
||||
__second_empty(_IIter __a, _IIter __b, _OutputIterator __out) const
|
||||
{ return __out; }
|
||||
};
|
||||
_DifferenceType
|
||||
__count(_IIter __a, _IIter __b, _IIter __c, _IIter d) const
|
||||
{
|
||||
_DifferenceType __counter = 0;
|
||||
|
||||
template<class _IIter, class _OutputIterator, class _Compare>
|
||||
struct __union_func
|
||||
{
|
||||
typedef typename std::iterator_traits<_IIter>::difference_type
|
||||
_DifferenceType;
|
||||
while (__a != __b && __c != d)
|
||||
{
|
||||
if (_M_comp(*__a, *__c))
|
||||
{
|
||||
++__a;
|
||||
++__counter;
|
||||
}
|
||||
else if (_M_comp(*__c, *__a))
|
||||
{
|
||||
++__c;
|
||||
++__counter;
|
||||
}
|
||||
else
|
||||
{
|
||||
++__a;
|
||||
++__c;
|
||||
}
|
||||
}
|
||||
|
||||
__union_func(_Compare __comp) : _M_comp(__comp) {}
|
||||
return __counter + (__b - __a) + (d - __c);
|
||||
}
|
||||
|
||||
_Compare _M_comp;
|
||||
_OutputIterator
|
||||
__first_empty(_IIter __c, _IIter d, _OutputIterator __out) const
|
||||
{ return std::copy(__c, d, __out); }
|
||||
|
||||
_OutputIterator
|
||||
__second_empty(_IIter __a, _IIter __b, _OutputIterator __out) const
|
||||
{ return std::copy(__a, __b, __out); }
|
||||
};
|
||||
|
||||
|
||||
template<typename _IIter,
|
||||
typename _OutputIterator,
|
||||
typename _Compare>
|
||||
struct __difference_func
|
||||
{
|
||||
typedef std::iterator_traits<_IIter> _TraitsType;
|
||||
typedef typename _TraitsType::difference_type _DifferenceType;
|
||||
typedef typename std::pair<_IIter, _IIter> _IteratorPair;
|
||||
|
||||
__difference_func(_Compare __comp) : _M_comp(__comp) {}
|
||||
|
||||
_Compare _M_comp;
|
||||
|
||||
_OutputIterator
|
||||
_M_invoke(_IIter __a, _IIter __b, _IIter __c, _IIter d,
|
||||
_OutputIterator __r) const
|
||||
{
|
||||
while (__a != __b && __c != d)
|
||||
{
|
||||
if (_M_comp(*__a, *__c))
|
||||
{
|
||||
*__r = *__a;
|
||||
++__a;
|
||||
++__r;
|
||||
}
|
||||
else if (_M_comp(*__c, *__a))
|
||||
{ ++__c; }
|
||||
else
|
||||
{
|
||||
++__a;
|
||||
++__c;
|
||||
}
|
||||
}
|
||||
return std::copy(__a, __b, __r);
|
||||
}
|
||||
|
||||
_DifferenceType
|
||||
__count(_IIter __a, _IIter __b,
|
||||
_IIter __c, _IIter d) const
|
||||
{
|
||||
_DifferenceType __counter = 0;
|
||||
|
||||
while (__a != __b && __c != d)
|
||||
{
|
||||
if (_M_comp(*__a, *__c))
|
||||
{
|
||||
++__a;
|
||||
++__counter;
|
||||
}
|
||||
else if (_M_comp(*__c, *__a))
|
||||
{ ++__c; }
|
||||
else
|
||||
{ ++__a; ++__c; }
|
||||
}
|
||||
|
||||
return __counter + (__b - __a);
|
||||
}
|
||||
|
||||
_OutputIterator
|
||||
__first_empty(_IIter, _IIter, _OutputIterator __out) const
|
||||
{ return __out; }
|
||||
|
||||
_OutputIterator
|
||||
__second_empty(_IIter __a, _IIter __b, _OutputIterator __out) const
|
||||
{ return std::copy(__a, __b, __out); }
|
||||
};
|
||||
|
||||
|
||||
template<typename _IIter,
|
||||
typename _OutputIterator,
|
||||
typename _Compare>
|
||||
struct __intersection_func
|
||||
{
|
||||
typedef std::iterator_traits<_IIter> _TraitsType;
|
||||
typedef typename _TraitsType::difference_type _DifferenceType;
|
||||
typedef typename std::pair<_IIter, _IIter> _IteratorPair;
|
||||
|
||||
__intersection_func(_Compare __comp) : _M_comp(__comp) {}
|
||||
|
||||
_Compare _M_comp;
|
||||
|
||||
_OutputIterator
|
||||
_M_invoke(_IIter __a, _IIter __b, _IIter __c, _IIter __d,
|
||||
_OutputIterator __r) const
|
||||
{
|
||||
while (__a != __b && __c != __d)
|
||||
{
|
||||
if (_M_comp(*__a, *__c))
|
||||
{ ++__a; }
|
||||
else if (_M_comp(*__c, *__a))
|
||||
{ ++__c; }
|
||||
else
|
||||
{
|
||||
*__r = *__a;
|
||||
++__a;
|
||||
++__c;
|
||||
++__r;
|
||||
}
|
||||
}
|
||||
|
||||
return __r;
|
||||
}
|
||||
|
||||
_DifferenceType
|
||||
__count(_IIter __a, _IIter __b, _IIter __c, _IIter __d) const
|
||||
{
|
||||
_DifferenceType __counter = 0;
|
||||
|
||||
while (__a != __b && __c != __d)
|
||||
{
|
||||
if (_M_comp(*__a, *__c))
|
||||
{ ++__a; }
|
||||
else if (_M_comp(*__c, *__a))
|
||||
{ ++__c; }
|
||||
else
|
||||
{
|
||||
++__a;
|
||||
++__c;
|
||||
++__counter;
|
||||
}
|
||||
}
|
||||
|
||||
return __counter;
|
||||
}
|
||||
|
||||
_OutputIterator
|
||||
__first_empty(_IIter, _IIter, _OutputIterator __out) const
|
||||
{ return __out; }
|
||||
|
||||
_OutputIterator
|
||||
__second_empty(_IIter, _IIter, _OutputIterator __out) const
|
||||
{ return __out; }
|
||||
};
|
||||
|
||||
template<class _IIter, class _OutputIterator, class _Compare>
|
||||
struct __union_func
|
||||
{
|
||||
typedef typename std::iterator_traits<_IIter>::difference_type
|
||||
_DifferenceType;
|
||||
|
||||
__union_func(_Compare __comp) : _M_comp(__comp) {}
|
||||
|
||||
_Compare _M_comp;
|
||||
|
||||
_OutputIterator
|
||||
_M_invoke(_IIter __a, const _IIter __b, _IIter __c,
|
||||
const _IIter __d, _OutputIterator __r) const
|
||||
{
|
||||
while (__a != __b && __c != __d)
|
||||
{
|
||||
if (_M_comp(*__a, *__c))
|
||||
{
|
||||
*__r = *__a;
|
||||
++__a;
|
||||
}
|
||||
else if (_M_comp(*__c, *__a))
|
||||
{
|
||||
*__r = *__c;
|
||||
++__c;
|
||||
}
|
||||
else
|
||||
{
|
||||
*__r = *__a;
|
||||
++__a;
|
||||
++__c;
|
||||
}
|
||||
++__r;
|
||||
}
|
||||
return std::copy(__c, __d, std::copy(__a, __b, __r));
|
||||
}
|
||||
|
||||
_DifferenceType
|
||||
__count(_IIter __a, _IIter __b, _IIter __c, _IIter __d) const
|
||||
{
|
||||
_DifferenceType __counter = 0;
|
||||
|
||||
while (__a != __b && __c != __d)
|
||||
{
|
||||
if (_M_comp(*__a, *__c))
|
||||
{ ++__a; }
|
||||
else if (_M_comp(*__c, *__a))
|
||||
{ ++__c; }
|
||||
else
|
||||
{
|
||||
++__a;
|
||||
++__c;
|
||||
}
|
||||
++__counter;
|
||||
}
|
||||
|
||||
__counter += (__b - __a);
|
||||
__counter += (__d - __c);
|
||||
return __counter;
|
||||
}
|
||||
|
||||
_OutputIterator
|
||||
__first_empty(_IIter __c, _IIter __d, _OutputIterator __out) const
|
||||
{ return std::copy(__c, __d, __out); }
|
||||
|
||||
_OutputIterator
|
||||
__second_empty(_IIter __a, _IIter __b, _OutputIterator __out) const
|
||||
{ return std::copy(__a, __b, __out); }
|
||||
};
|
||||
|
||||
template<typename _IIter,
|
||||
typename _OutputIterator,
|
||||
typename Operation>
|
||||
_OutputIterator
|
||||
_M_invoke(_IIter __a, const _IIter __b, _IIter __c,
|
||||
const _IIter d, _OutputIterator __r) const
|
||||
__parallel_set_operation(_IIter __begin1, _IIter __end1,
|
||||
_IIter __begin2, _IIter __end2,
|
||||
_OutputIterator __result, Operation __op)
|
||||
{
|
||||
while (__a != __b && __c != d)
|
||||
{
|
||||
if (_M_comp(*__a, *__c))
|
||||
{
|
||||
*__r = *__a;
|
||||
++__a;
|
||||
}
|
||||
else if (_M_comp(*__c, *__a))
|
||||
{
|
||||
*__r = *__c;
|
||||
++__c;
|
||||
}
|
||||
else
|
||||
{
|
||||
*__r = *__a;
|
||||
++__a;
|
||||
++__c;
|
||||
}
|
||||
++__r;
|
||||
}
|
||||
return std::copy(__c, d, std::copy(__a, __b, __r));
|
||||
}
|
||||
_GLIBCXX_CALL((__end1 - __begin1) + (__end2 - __begin2))
|
||||
|
||||
_DifferenceType
|
||||
__count(_IIter __a, _IIter __b,
|
||||
_IIter __c, _IIter d) const
|
||||
{
|
||||
_DifferenceType __counter = 0;
|
||||
typedef std::iterator_traits<_IIter> _TraitsType;
|
||||
typedef typename _TraitsType::difference_type _DifferenceType;
|
||||
typedef typename std::pair<_IIter, _IIter> _IteratorPair;
|
||||
|
||||
while (__a != __b && __c != d)
|
||||
{
|
||||
if (_M_comp(*__a, *__c))
|
||||
{ ++__a; }
|
||||
else if (_M_comp(*__c, *__a))
|
||||
{ ++__c; }
|
||||
else
|
||||
{
|
||||
++__a;
|
||||
++__c;
|
||||
}
|
||||
++__counter;
|
||||
}
|
||||
if (__begin1 == __end1)
|
||||
return __op.__first_empty(__begin2, __end2, __result);
|
||||
|
||||
__counter += (__b - __a);
|
||||
__counter += (d - __c);
|
||||
return __counter;
|
||||
}
|
||||
if (__begin2 == __end2)
|
||||
return __op.__second_empty(__begin1, __end1, __result);
|
||||
|
||||
inline _OutputIterator
|
||||
__first_empty(_IIter __c, _IIter d, _OutputIterator __out) const
|
||||
{ return std::copy(__c, d, __out); }
|
||||
const _DifferenceType __size = (__end1 - __begin1) + (__end2 - __begin2);
|
||||
|
||||
inline _OutputIterator
|
||||
__second_empty(_IIter __a, _IIter __b, _OutputIterator __out) const
|
||||
{ return std::copy(__a, __b, __out); }
|
||||
};
|
||||
const _IteratorPair __sequence[2] = { std::make_pair(__begin1, __end1),
|
||||
std::make_pair(__begin2, __end2) };
|
||||
_OutputIterator __return_value = __result;
|
||||
_DifferenceType *__borders;
|
||||
_IteratorPair *__block_begins;
|
||||
_DifferenceType* __lengths;
|
||||
|
||||
template<typename _IIter,
|
||||
typename _OutputIterator,
|
||||
typename Operation>
|
||||
_OutputIterator
|
||||
__parallel_set_operation(_IIter __begin1, _IIter __end1,
|
||||
_IIter __begin2, _IIter __end2,
|
||||
_OutputIterator __result, Operation __op)
|
||||
{
|
||||
_GLIBCXX_CALL((__end1 - __begin1) + (__end2 - __begin2))
|
||||
_ThreadIndex __num_threads =
|
||||
std::min<_DifferenceType>(__get_max_threads(),
|
||||
std::min(__end1 - __begin1, __end2 - __begin2));
|
||||
|
||||
typedef std::iterator_traits<_IIter> _TraitsType;
|
||||
typedef typename _TraitsType::difference_type _DifferenceType;
|
||||
typedef typename std::pair<_IIter, _IIter> _IteratorPair;
|
||||
|
||||
if (__begin1 == __end1)
|
||||
return __op.__first_empty(__begin2, __end2, __result);
|
||||
|
||||
if (__begin2 == __end2)
|
||||
return __op.__second_empty(__begin1, __end1, __result);
|
||||
|
||||
const _DifferenceType size = (__end1 - __begin1) + (__end2 - __begin2);
|
||||
|
||||
const _IteratorPair __sequence[ 2 ] =
|
||||
{ std::make_pair(__begin1, __end1), std::make_pair(__begin2, __end2) };
|
||||
_OutputIterator return_value = __result;
|
||||
_DifferenceType *__borders;
|
||||
_IteratorPair *__block_begins;
|
||||
_DifferenceType* __lengths;
|
||||
|
||||
_ThreadIndex __num_threads =
|
||||
std::min<_DifferenceType>(__get_max_threads(),
|
||||
std::min(__end1 - __begin1, __end2 - __begin2));
|
||||
|
||||
# pragma omp parallel num_threads(__num_threads)
|
||||
# pragma omp parallel num_threads(__num_threads)
|
||||
{
|
||||
# pragma omp single
|
||||
{
|
||||
__num_threads = omp_get_num_threads();
|
||||
{
|
||||
__num_threads = omp_get_num_threads();
|
||||
|
||||
__borders = new _DifferenceType[__num_threads + 2];
|
||||
equally_split(size, __num_threads + 1, __borders);
|
||||
__block_begins = new _IteratorPair[__num_threads + 1];
|
||||
// Very __start.
|
||||
__block_begins[0] = std::make_pair(__begin1, __begin2);
|
||||
__lengths = new _DifferenceType[__num_threads];
|
||||
} //single
|
||||
__borders = new _DifferenceType[__num_threads + 2];
|
||||
equally_split(__size, __num_threads + 1, __borders);
|
||||
__block_begins = new _IteratorPair[__num_threads + 1];
|
||||
// Very __start.
|
||||
__block_begins[0] = std::make_pair(__begin1, __begin2);
|
||||
__lengths = new _DifferenceType[__num_threads];
|
||||
} //single
|
||||
|
||||
_ThreadIndex __iam = omp_get_thread_num();
|
||||
_ThreadIndex __iam = omp_get_thread_num();
|
||||
|
||||
// _Result from multiseq_partition.
|
||||
_IIter __offset[2];
|
||||
const _DifferenceType __rank = __borders[__iam + 1];
|
||||
// _Result from multiseq_partition.
|
||||
_IIter __offset[2];
|
||||
const _DifferenceType __rank = __borders[__iam + 1];
|
||||
|
||||
multiseq_partition(__sequence, __sequence + 2,
|
||||
__rank, __offset, __op._M_comp);
|
||||
multiseq_partition(__sequence, __sequence + 2,
|
||||
__rank, __offset, __op._M_comp);
|
||||
|
||||
// allowed to read?
|
||||
// together
|
||||
// *(__offset[ 0 ] - 1) == *__offset[ 1 ]
|
||||
if (__offset[ 0 ] != __begin1 && __offset[ 1 ] != __end2
|
||||
&& !__op._M_comp(*(__offset[ 0 ] - 1), *__offset[ 1 ])
|
||||
&& !__op._M_comp(*__offset[ 1 ], *(__offset[ 0 ] - 1)))
|
||||
{
|
||||
// Avoid split between globally equal elements: move one to
|
||||
// front in first sequence.
|
||||
--__offset[ 0 ];
|
||||
}
|
||||
// allowed to read?
|
||||
// together
|
||||
// *(__offset[ 0 ] - 1) == *__offset[ 1 ]
|
||||
if (__offset[ 0 ] != __begin1 && __offset[1] != __end2
|
||||
&& !__op._M_comp(*(__offset[0] - 1), *__offset[1])
|
||||
&& !__op._M_comp(*__offset[1], *(__offset[0] - 1)))
|
||||
{
|
||||
// Avoid split between globally equal elements: move one to
|
||||
// front in first sequence.
|
||||
--__offset[0];
|
||||
}
|
||||
|
||||
_IteratorPair block_end = __block_begins[ __iam + 1 ] =
|
||||
_IteratorPair(__offset[ 0 ], __offset[ 1 ]);
|
||||
_IteratorPair __block_end = __block_begins[__iam + 1] =
|
||||
_IteratorPair(__offset[0], __offset[1]);
|
||||
|
||||
// Make sure all threads have their block_begin result written out.
|
||||
// Make sure all threads have their block_begin result written out.
|
||||
# pragma omp barrier
|
||||
|
||||
_IteratorPair __block_begin = __block_begins[ __iam ];
|
||||
_IteratorPair __block_begin = __block_begins[__iam];
|
||||
|
||||
// Begin working for the first block, while the others except
|
||||
// the last start to count.
|
||||
if (__iam == 0)
|
||||
{
|
||||
// The first thread can copy already.
|
||||
__lengths[ __iam ] =
|
||||
__op._M_invoke(__block_begin.first, block_end.first,
|
||||
__block_begin.second, block_end.second, __result)
|
||||
- __result;
|
||||
}
|
||||
else
|
||||
{
|
||||
__lengths[ __iam ] =
|
||||
__op.__count(__block_begin.first, block_end.first,
|
||||
__block_begin.second, block_end.second);
|
||||
}
|
||||
// Begin working for the first block, while the others except
|
||||
// the last start to count.
|
||||
if (__iam == 0)
|
||||
{
|
||||
// The first thread can copy already.
|
||||
__lengths[ __iam ] =
|
||||
__op._M_invoke(__block_begin.first, __block_end.first,
|
||||
__block_begin.second, __block_end.second,
|
||||
__result) - __result;
|
||||
}
|
||||
else
|
||||
{
|
||||
__lengths[ __iam ] =
|
||||
__op.__count(__block_begin.first, __block_end.first,
|
||||
__block_begin.second, __block_end.second);
|
||||
}
|
||||
|
||||
// Make sure everyone wrote their lengths.
|
||||
// Make sure everyone wrote their lengths.
|
||||
# pragma omp barrier
|
||||
|
||||
_OutputIterator __r = __result;
|
||||
_OutputIterator __r = __result;
|
||||
|
||||
if (__iam == 0)
|
||||
{
|
||||
// Do the last block.
|
||||
for (int __i = 0; __i < __num_threads; ++__i)
|
||||
__r += __lengths[__i];
|
||||
if (__iam == 0)
|
||||
{
|
||||
// Do the last block.
|
||||
for (int __i = 0; __i < __num_threads; ++__i)
|
||||
__r += __lengths[__i];
|
||||
|
||||
__block_begin = __block_begins[__num_threads];
|
||||
__block_begin = __block_begins[__num_threads];
|
||||
|
||||
// Return the result iterator of the last block.
|
||||
return_value = __op._M_invoke(
|
||||
__block_begin.first, __end1, __block_begin.second, __end2, __r);
|
||||
// Return the result iterator of the last block.
|
||||
__return_value =
|
||||
__op._M_invoke(__block_begin.first, __end1,
|
||||
__block_begin.second, __end2, __r);
|
||||
|
||||
}
|
||||
else
|
||||
{
|
||||
for (int __i = 0; __i < __iam; ++__i)
|
||||
__r += __lengths[ __i ];
|
||||
}
|
||||
else
|
||||
{
|
||||
for (int __i = 0; __i < __iam; ++__i)
|
||||
__r += __lengths[ __i ];
|
||||
|
||||
// Reset begins for copy pass.
|
||||
__op._M_invoke(__block_begin.first, block_end.first,
|
||||
__block_begin.second, block_end.second, __r);
|
||||
}
|
||||
}
|
||||
return return_value;
|
||||
}
|
||||
// Reset begins for copy pass.
|
||||
__op._M_invoke(__block_begin.first, __block_end.first,
|
||||
__block_begin.second, __block_end.second, __r);
|
||||
}
|
||||
}
|
||||
return __return_value;
|
||||
}
|
||||
|
||||
template<typename _IIter,
|
||||
typename _OutputIterator,
|
||||
typename _Compare>
|
||||
inline _OutputIterator
|
||||
__parallel_set_union(_IIter __begin1, _IIter __end1,
|
||||
_IIter __begin2, _IIter __end2,
|
||||
_OutputIterator __result, _Compare __comp)
|
||||
{
|
||||
return __parallel_set_operation(__begin1, __end1, __begin2, __end2,
|
||||
__result,
|
||||
__union_func< _IIter, _OutputIterator,
|
||||
_Compare>(__comp));
|
||||
}
|
||||
|
||||
template<typename _IIter,
|
||||
typename _OutputIterator,
|
||||
typename _Compare>
|
||||
inline _OutputIterator
|
||||
__parallel_set_union(_IIter __begin1, _IIter __end1,
|
||||
_IIter __begin2, _IIter __end2,
|
||||
_OutputIterator __result, _Compare _M_comp)
|
||||
{
|
||||
return __parallel_set_operation(__begin1, __end1, __begin2, __end2,
|
||||
__result, __union_func< _IIter, _OutputIterator, _Compare>(_M_comp));
|
||||
}
|
||||
template<typename _IIter,
|
||||
typename _OutputIterator,
|
||||
typename _Compare>
|
||||
inline _OutputIterator
|
||||
__parallel_set_intersection(_IIter __begin1, _IIter __end1,
|
||||
_IIter __begin2, _IIter __end2,
|
||||
_OutputIterator __result, _Compare __comp)
|
||||
{
|
||||
return __parallel_set_operation(__begin1, __end1, __begin2, __end2,
|
||||
__result,
|
||||
__intersection_func<_IIter,
|
||||
_OutputIterator, _Compare>(__comp));
|
||||
}
|
||||
|
||||
template<typename _IIter,
|
||||
typename _OutputIterator,
|
||||
typename _Compare>
|
||||
inline _OutputIterator
|
||||
__parallel_set_intersection(_IIter __begin1, _IIter __end1,
|
||||
template<typename _IIter,
|
||||
typename _OutputIterator,
|
||||
typename _Compare>
|
||||
inline _OutputIterator
|
||||
__parallel_set_difference(_IIter __begin1, _IIter __end1,
|
||||
_IIter __begin2, _IIter __end2,
|
||||
_OutputIterator __result, _Compare _M_comp)
|
||||
{
|
||||
return __parallel_set_operation(
|
||||
__begin1, __end1, __begin2, __end2, __result,
|
||||
__intersection_func<_IIter, _OutputIterator, _Compare>(_M_comp));
|
||||
}
|
||||
|
||||
template<typename _IIter,
|
||||
typename _OutputIterator,
|
||||
typename _Compare>
|
||||
inline _OutputIterator
|
||||
__parallel_set_difference(_IIter __begin1, _IIter __end1,
|
||||
_IIter __begin2, _IIter __end2,
|
||||
_OutputIterator __result, _Compare _M_comp)
|
||||
{
|
||||
return __parallel_set_operation(
|
||||
__begin1, __end1, __begin2, __end2, __result,
|
||||
__difference_func<_IIter, _OutputIterator, _Compare>(_M_comp));
|
||||
}
|
||||
|
||||
template<typename _IIter,
|
||||
typename _OutputIterator,
|
||||
typename _Compare>
|
||||
inline _OutputIterator
|
||||
__parallel_set_symmetric_difference(_IIter __begin1, _IIter __end1,
|
||||
_IIter __begin2, _IIter __end2,
|
||||
_OutputIterator __result,
|
||||
_Compare _M_comp)
|
||||
{
|
||||
return __parallel_set_operation(
|
||||
__begin1, __end1, __begin2, __end2, __result,
|
||||
symmetric_difference_func<_IIter, _OutputIterator, _Compare>
|
||||
(_M_comp));
|
||||
}
|
||||
_OutputIterator __result, _Compare __comp)
|
||||
{
|
||||
return __parallel_set_operation(__begin1, __end1, __begin2, __end2,
|
||||
__result,
|
||||
__difference_func<_IIter,
|
||||
_OutputIterator, _Compare>(__comp));
|
||||
}
|
||||
|
||||
template<typename _IIter,
|
||||
typename _OutputIterator,
|
||||
typename _Compare>
|
||||
inline _OutputIterator
|
||||
__parallel_set_symmetric_difference(_IIter __begin1, _IIter __end1,
|
||||
_IIter __begin2, _IIter __end2,
|
||||
_OutputIterator __result,
|
||||
_Compare __comp)
|
||||
{
|
||||
return __parallel_set_operation(__begin1, __end1, __begin2, __end2,
|
||||
__result,
|
||||
__symmetric_difference_func<_IIter,
|
||||
_OutputIterator, _Compare>(__comp));
|
||||
}
|
||||
}
|
||||
|
||||
#endif /* _GLIBCXX_PARALLEL_SET_OPERATIONS_H */
|
||||
|
@ -54,12 +54,12 @@
|
||||
|
||||
namespace __gnu_parallel
|
||||
{
|
||||
//prototype
|
||||
//prototype
|
||||
template<bool __stable, typename _RAIter,
|
||||
typename _Compare, typename _Parallelism>
|
||||
void
|
||||
__parallel_sort(_RAIter __begin, _RAIter __end,
|
||||
_Compare __comp, _Parallelism __parallelism);
|
||||
void
|
||||
__parallel_sort(_RAIter __begin, _RAIter __end,
|
||||
_Compare __comp, _Parallelism __parallelism);
|
||||
|
||||
/**
|
||||
* @brief Choose multiway mergesort, splitting variant at run-time,
|
||||
@ -70,19 +70,19 @@ namespace __gnu_parallel
|
||||
* @callgraph
|
||||
*/
|
||||
template<bool __stable, typename _RAIter, typename _Compare>
|
||||
inline void
|
||||
__parallel_sort(_RAIter __begin, _RAIter __end,
|
||||
_Compare __comp, multiway_mergesort_tag __parallelism)
|
||||
{
|
||||
_GLIBCXX_CALL(__end - __begin)
|
||||
inline void
|
||||
__parallel_sort(_RAIter __begin, _RAIter __end,
|
||||
_Compare __comp, multiway_mergesort_tag __parallelism)
|
||||
{
|
||||
_GLIBCXX_CALL(__end - __begin)
|
||||
|
||||
if(_Settings::get().sort_splitting == EXACT)
|
||||
parallel_sort_mwms<__stable, true>
|
||||
(__begin, __end, __comp, __parallelism.__get_num_threads());
|
||||
else
|
||||
parallel_sort_mwms<__stable, false>
|
||||
(__begin, __end, __comp, __parallelism.__get_num_threads());
|
||||
}
|
||||
if(_Settings::get().sort_splitting == EXACT)
|
||||
parallel_sort_mwms<__stable, true>
|
||||
(__begin, __end, __comp, __parallelism.__get_num_threads());
|
||||
else
|
||||
parallel_sort_mwms<__stable, false>
|
||||
(__begin, __end, __comp, __parallelism.__get_num_threads());
|
||||
}
|
||||
|
||||
/**
|
||||
* @brief Choose multiway mergesort with exact splitting,
|
||||
@ -93,15 +93,16 @@ namespace __gnu_parallel
|
||||
* @callgraph
|
||||
*/
|
||||
template<bool __stable, typename _RAIter, typename _Compare>
|
||||
inline void
|
||||
__parallel_sort(_RAIter __begin, _RAIter __end,
|
||||
_Compare __comp, multiway_mergesort_exact_tag __parallelism)
|
||||
{
|
||||
_GLIBCXX_CALL(__end - __begin)
|
||||
inline void
|
||||
__parallel_sort(_RAIter __begin, _RAIter __end,
|
||||
_Compare __comp,
|
||||
multiway_mergesort_exact_tag __parallelism)
|
||||
{
|
||||
_GLIBCXX_CALL(__end - __begin)
|
||||
|
||||
parallel_sort_mwms<__stable, true>
|
||||
(__begin, __end, __comp, __parallelism.__get_num_threads());
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* @brief Choose multiway mergesort with splitting by sampling,
|
||||
@ -112,15 +113,16 @@ namespace __gnu_parallel
|
||||
* @callgraph
|
||||
*/
|
||||
template<bool __stable, typename _RAIter, typename _Compare>
|
||||
inline void
|
||||
__parallel_sort(_RAIter __begin, _RAIter __end,
|
||||
_Compare __comp, multiway_mergesort_sampling_tag __parallelism)
|
||||
{
|
||||
_GLIBCXX_CALL(__end - __begin)
|
||||
inline void
|
||||
__parallel_sort(_RAIter __begin, _RAIter __end,
|
||||
_Compare __comp,
|
||||
multiway_mergesort_sampling_tag __parallelism)
|
||||
{
|
||||
_GLIBCXX_CALL(__end - __begin)
|
||||
|
||||
parallel_sort_mwms<__stable, false>
|
||||
parallel_sort_mwms<__stable, false>
|
||||
(__begin, __end, __comp, __parallelism.__get_num_threads());
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* @brief Choose quicksort for parallel sorting.
|
||||
@ -130,17 +132,17 @@ namespace __gnu_parallel
|
||||
* @callgraph
|
||||
*/
|
||||
template<bool __stable, typename _RAIter, typename _Compare>
|
||||
inline void
|
||||
__parallel_sort(_RAIter __begin, _RAIter __end,
|
||||
_Compare __comp, quicksort_tag __parallelism)
|
||||
{
|
||||
_GLIBCXX_CALL(__end - __begin)
|
||||
inline void
|
||||
__parallel_sort(_RAIter __begin, _RAIter __end,
|
||||
_Compare __comp, quicksort_tag __parallelism)
|
||||
{
|
||||
_GLIBCXX_CALL(__end - __begin)
|
||||
|
||||
_GLIBCXX_PARALLEL_ASSERT(__stable == false);
|
||||
_GLIBCXX_PARALLEL_ASSERT(__stable == false);
|
||||
|
||||
__parallel_sort_qs(__begin, __end, __comp,
|
||||
__parallelism.__get_num_threads());
|
||||
}
|
||||
__parallel_sort_qs(__begin, __end, __comp,
|
||||
__parallelism.__get_num_threads());
|
||||
}
|
||||
|
||||
/**
|
||||
* @brief Choose balanced quicksort for parallel sorting.
|
||||
@ -150,19 +152,18 @@ namespace __gnu_parallel
|
||||
* @param __stable Sort __stable.
|
||||
* @callgraph
|
||||
*/
|
||||
template<bool __stable, typename _RAIter, typename _Compare>
|
||||
inline void
|
||||
__parallel_sort(_RAIter __begin, _RAIter __end,
|
||||
_Compare __comp, balanced_quicksort_tag __parallelism)
|
||||
{
|
||||
_GLIBCXX_CALL(__end - __begin)
|
||||
template<bool __stable, typename _RAIter, typename _Compare>
|
||||
inline void
|
||||
__parallel_sort(_RAIter __begin, _RAIter __end,
|
||||
_Compare __comp, balanced_quicksort_tag __parallelism)
|
||||
{
|
||||
_GLIBCXX_CALL(__end - __begin)
|
||||
|
||||
_GLIBCXX_PARALLEL_ASSERT(__stable == false);
|
||||
|
||||
__parallel_sort_qsb(__begin, __end, __comp,
|
||||
__parallelism.__get_num_threads());
|
||||
}
|
||||
_GLIBCXX_PARALLEL_ASSERT(__stable == false);
|
||||
|
||||
__parallel_sort_qsb(__begin, __end, __comp,
|
||||
__parallelism.__get_num_threads());
|
||||
}
|
||||
|
||||
/**
|
||||
* @brief Choose multiway mergesort with exact splitting,
|
||||
@ -173,17 +174,16 @@ namespace __gnu_parallel
|
||||
* @callgraph
|
||||
*/
|
||||
template<bool __stable, typename _RAIter, typename _Compare>
|
||||
inline void
|
||||
__parallel_sort(_RAIter __begin, _RAIter __end,
|
||||
_Compare __comp, default_parallel_tag __parallelism)
|
||||
{
|
||||
_GLIBCXX_CALL(__end - __begin)
|
||||
|
||||
__parallel_sort<__stable>
|
||||
(__begin, __end, __comp,
|
||||
multiway_mergesort_exact_tag(__parallelism.__get_num_threads()));
|
||||
}
|
||||
inline void
|
||||
__parallel_sort(_RAIter __begin, _RAIter __end,
|
||||
_Compare __comp, default_parallel_tag __parallelism)
|
||||
{
|
||||
_GLIBCXX_CALL(__end - __begin)
|
||||
|
||||
__parallel_sort<__stable>
|
||||
(__begin, __end, __comp,
|
||||
multiway_mergesort_exact_tag(__parallelism.__get_num_threads()));
|
||||
}
|
||||
|
||||
/**
|
||||
* @brief Choose a parallel sorting algorithm.
|
||||
@ -196,7 +196,7 @@ namespace __gnu_parallel
|
||||
template<bool __stable, typename _RAIter, typename _Compare>
|
||||
inline void
|
||||
__parallel_sort(_RAIter __begin, _RAIter __end,
|
||||
_Compare __comp, parallel_tag __parallelism)
|
||||
_Compare __comp, parallel_tag __parallelism)
|
||||
{
|
||||
_GLIBCXX_CALL(__end - __begin)
|
||||
typedef std::iterator_traits<_RAIter> _TraitsType;
|
||||
|
@ -51,20 +51,16 @@ namespace __gnu_parallel
|
||||
public:
|
||||
/** @brief Default constructor. Use default number of threads. */
|
||||
parallel_tag()
|
||||
{
|
||||
this->_M_num_threads = 0;
|
||||
}
|
||||
{ _M_num_threads = 0; }
|
||||
|
||||
/** @brief Default constructor. Recommend number of threads to use.
|
||||
* @param __num_threads Desired number of threads. */
|
||||
parallel_tag(_ThreadIndex __num_threads)
|
||||
{
|
||||
this->_M_num_threads = __num_threads;
|
||||
}
|
||||
{ _M_num_threads = __num_threads; }
|
||||
|
||||
/** @brief Find out desired number of threads.
|
||||
* @return Desired number of threads. */
|
||||
inline _ThreadIndex __get_num_threads()
|
||||
_ThreadIndex __get_num_threads()
|
||||
{
|
||||
if(_M_num_threads == 0)
|
||||
return omp_get_max_threads();
|
||||
@ -74,19 +70,17 @@ namespace __gnu_parallel
|
||||
|
||||
/** @brief Set the desired number of threads.
|
||||
* @param __num_threads Desired number of threads. */
|
||||
inline void set_num_threads(_ThreadIndex __num_threads)
|
||||
{
|
||||
this->_M_num_threads = __num_threads;
|
||||
}
|
||||
void set_num_threads(_ThreadIndex __num_threads)
|
||||
{ _M_num_threads = __num_threads; }
|
||||
};
|
||||
|
||||
/** @brief Recommends parallel execution using the
|
||||
default parallel algorithm. */
|
||||
struct default_parallel_tag : public parallel_tag
|
||||
{
|
||||
default_parallel_tag() { }
|
||||
default_parallel_tag(_ThreadIndex __num_threads)
|
||||
: parallel_tag(__num_threads) { }
|
||||
default_parallel_tag() { }
|
||||
default_parallel_tag(_ThreadIndex __num_threads)
|
||||
: parallel_tag(__num_threads) { }
|
||||
};
|
||||
|
||||
/** @brief Recommends parallel execution using dynamic
|
||||
@ -114,18 +108,18 @@ namespace __gnu_parallel
|
||||
* with exact splitting, at compile time. */
|
||||
struct exact_tag : public parallel_tag
|
||||
{
|
||||
exact_tag() { }
|
||||
exact_tag(_ThreadIndex __num_threads)
|
||||
: parallel_tag(__num_threads) { }
|
||||
exact_tag() { }
|
||||
exact_tag(_ThreadIndex __num_threads)
|
||||
: parallel_tag(__num_threads) { }
|
||||
};
|
||||
|
||||
/** @brief Forces parallel merging
|
||||
* with exact splitting, at compile time. */
|
||||
struct sampling_tag : public parallel_tag
|
||||
{
|
||||
sampling_tag() { }
|
||||
sampling_tag(_ThreadIndex __num_threads)
|
||||
: parallel_tag(__num_threads) { }
|
||||
sampling_tag() { }
|
||||
sampling_tag(_ThreadIndex __num_threads)
|
||||
: parallel_tag(__num_threads) { }
|
||||
};
|
||||
|
||||
|
||||
@ -133,45 +127,45 @@ namespace __gnu_parallel
|
||||
* at compile time. */
|
||||
struct multiway_mergesort_tag : public parallel_tag
|
||||
{
|
||||
multiway_mergesort_tag() { }
|
||||
multiway_mergesort_tag(_ThreadIndex __num_threads)
|
||||
: parallel_tag(__num_threads) { }
|
||||
multiway_mergesort_tag() { }
|
||||
multiway_mergesort_tag(_ThreadIndex __num_threads)
|
||||
: parallel_tag(__num_threads) { }
|
||||
};
|
||||
|
||||
/** @brief Forces parallel sorting using multiway mergesort
|
||||
* with exact splitting at compile time. */
|
||||
struct multiway_mergesort_exact_tag : public parallel_tag
|
||||
{
|
||||
multiway_mergesort_exact_tag() { }
|
||||
multiway_mergesort_exact_tag(_ThreadIndex __num_threads)
|
||||
: parallel_tag(__num_threads) { }
|
||||
multiway_mergesort_exact_tag() { }
|
||||
multiway_mergesort_exact_tag(_ThreadIndex __num_threads)
|
||||
: parallel_tag(__num_threads) { }
|
||||
};
|
||||
|
||||
/** @brief Forces parallel sorting using multiway mergesort
|
||||
* with splitting by sampling at compile time. */
|
||||
struct multiway_mergesort_sampling_tag : public parallel_tag
|
||||
{
|
||||
multiway_mergesort_sampling_tag() { }
|
||||
multiway_mergesort_sampling_tag(_ThreadIndex __num_threads)
|
||||
: parallel_tag(__num_threads) { }
|
||||
multiway_mergesort_sampling_tag() { }
|
||||
multiway_mergesort_sampling_tag(_ThreadIndex __num_threads)
|
||||
: parallel_tag(__num_threads) { }
|
||||
};
|
||||
|
||||
/** @brief Forces parallel sorting using unbalanced quicksort
|
||||
* at compile time. */
|
||||
struct quicksort_tag : public parallel_tag
|
||||
{
|
||||
quicksort_tag() { }
|
||||
quicksort_tag(_ThreadIndex __num_threads)
|
||||
: parallel_tag(__num_threads) { }
|
||||
quicksort_tag() { }
|
||||
quicksort_tag(_ThreadIndex __num_threads)
|
||||
: parallel_tag(__num_threads) { }
|
||||
};
|
||||
|
||||
/** @brief Forces parallel sorting using balanced quicksort
|
||||
* at compile time. */
|
||||
struct balanced_quicksort_tag : public parallel_tag
|
||||
{
|
||||
balanced_quicksort_tag() { }
|
||||
balanced_quicksort_tag(_ThreadIndex __num_threads)
|
||||
: parallel_tag(__num_threads) { }
|
||||
balanced_quicksort_tag() { }
|
||||
balanced_quicksort_tag(_ThreadIndex __num_threads)
|
||||
: parallel_tag(__num_threads) { }
|
||||
};
|
||||
|
||||
|
||||
|
@ -37,155 +37,160 @@
|
||||
|
||||
namespace __gnu_parallel
|
||||
{
|
||||
/** @brief Parallel std::unique_copy(), w/__o explicit equality predicate.
|
||||
* @param __first Begin iterator of input sequence.
|
||||
* @param __last End iterator of input sequence.
|
||||
* @param __result Begin iterator of result __sequence.
|
||||
* @param __binary_pred Equality predicate.
|
||||
* @return End iterator of result __sequence. */
|
||||
template<typename _IIter,
|
||||
class _OutputIterator,
|
||||
class _BinaryPredicate>
|
||||
_OutputIterator
|
||||
__parallel_unique_copy(_IIter __first, _IIter __last,
|
||||
_OutputIterator __result,
|
||||
_BinaryPredicate __binary_pred)
|
||||
{
|
||||
_GLIBCXX_CALL(__last - __first)
|
||||
|
||||
/** @brief Parallel std::unique_copy(), w/__o explicit equality predicate.
|
||||
* @param __first Begin iterator of input sequence.
|
||||
* @param __last End iterator of input sequence.
|
||||
* @param __result Begin iterator of result __sequence.
|
||||
* @param __binary_pred Equality predicate.
|
||||
* @return End iterator of result __sequence. */
|
||||
template<typename _IIter,
|
||||
class _OutputIterator,
|
||||
class _BinaryPredicate>
|
||||
_OutputIterator
|
||||
__parallel_unique_copy(_IIter __first, _IIter __last,
|
||||
_OutputIterator __result, _BinaryPredicate __binary_pred)
|
||||
{
|
||||
_GLIBCXX_CALL(__last - __first)
|
||||
typedef std::iterator_traits<_IIter> _TraitsType;
|
||||
typedef typename _TraitsType::value_type _ValueType;
|
||||
typedef typename _TraitsType::difference_type _DifferenceType;
|
||||
|
||||
typedef std::iterator_traits<_IIter> _TraitsType;
|
||||
typedef typename _TraitsType::value_type _ValueType;
|
||||
typedef typename _TraitsType::difference_type _DifferenceType;
|
||||
_DifferenceType __size = __last - __first;
|
||||
|
||||
_DifferenceType size = __last - __first;
|
||||
if (__size == 0)
|
||||
return __result;
|
||||
|
||||
if (size == 0)
|
||||
return __result;
|
||||
// Let the first thread process two parts.
|
||||
_DifferenceType *__counter;
|
||||
_DifferenceType *__borders;
|
||||
|
||||
// Let the first thread process two parts.
|
||||
_DifferenceType *__counter;
|
||||
_DifferenceType *__borders;
|
||||
|
||||
_ThreadIndex __num_threads = __get_max_threads();
|
||||
// First part contains at least one element.
|
||||
# pragma omp parallel num_threads(__num_threads)
|
||||
_ThreadIndex __num_threads = __get_max_threads();
|
||||
// First part contains at least one element.
|
||||
# pragma omp parallel num_threads(__num_threads)
|
||||
{
|
||||
# pragma omp single
|
||||
{
|
||||
__num_threads = omp_get_num_threads();
|
||||
__borders = new _DifferenceType[__num_threads + 2];
|
||||
equally_split(__size, __num_threads + 1, __borders);
|
||||
__counter = new _DifferenceType[__num_threads + 1];
|
||||
}
|
||||
|
||||
_ThreadIndex __iam = omp_get_thread_num();
|
||||
|
||||
_DifferenceType __begin, __end;
|
||||
|
||||
// Check for length without duplicates
|
||||
// Needed for position in output
|
||||
_DifferenceType __i = 0;
|
||||
_OutputIterator __out = __result;
|
||||
|
||||
if (__iam == 0)
|
||||
{
|
||||
__num_threads = omp_get_num_threads();
|
||||
__borders = new _DifferenceType[__num_threads + 2];
|
||||
equally_split(size, __num_threads + 1, __borders);
|
||||
__counter = new _DifferenceType[__num_threads + 1];
|
||||
__begin = __borders[0] + 1; // == 1
|
||||
__end = __borders[__iam + 1];
|
||||
|
||||
++__i;
|
||||
*__out++ = *__first;
|
||||
|
||||
for (_IIter __iter = __first + __begin; __iter < __first + __end;
|
||||
++__iter)
|
||||
{
|
||||
if (!__binary_pred(*__iter, *(__iter - 1)))
|
||||
{
|
||||
++__i;
|
||||
*__out++ = *__iter;
|
||||
}
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
__begin = __borders[__iam]; //one part
|
||||
__end = __borders[__iam + 1];
|
||||
|
||||
_ThreadIndex __iam = omp_get_thread_num();
|
||||
|
||||
_DifferenceType __begin, __end;
|
||||
|
||||
// Check for length without duplicates
|
||||
// Needed for position in output
|
||||
_DifferenceType __i = 0;
|
||||
_OutputIterator __out = __result;
|
||||
|
||||
if (__iam == 0)
|
||||
{
|
||||
__begin = __borders[0] + 1; // == 1
|
||||
__end = __borders[__iam + 1];
|
||||
|
||||
++__i;
|
||||
*__out++ = *__first;
|
||||
|
||||
for (_IIter iter = __first + __begin; iter < __first + __end; ++iter)
|
||||
{
|
||||
if (!__binary_pred(*iter, *(iter-1)))
|
||||
{
|
||||
for (_IIter __iter = __first + __begin; __iter < __first + __end;
|
||||
++__iter)
|
||||
{
|
||||
if (!__binary_pred(*__iter, *(__iter - 1)))
|
||||
++__i;
|
||||
*__out++ = *iter;
|
||||
}
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
__begin = __borders[__iam]; //one part
|
||||
__end = __borders[__iam + 1];
|
||||
}
|
||||
}
|
||||
__counter[__iam] = __i;
|
||||
|
||||
for (_IIter iter = __first + __begin; iter < __first + __end; ++iter)
|
||||
{
|
||||
if (!__binary_pred(*iter, *(iter - 1)))
|
||||
++__i;
|
||||
}
|
||||
}
|
||||
__counter[__iam] = __i;
|
||||
// Last part still untouched.
|
||||
_DifferenceType __begin_output;
|
||||
|
||||
// Last part still untouched.
|
||||
_DifferenceType __begin_output;
|
||||
# pragma omp barrier
|
||||
|
||||
# pragma omp barrier
|
||||
// Store result in output on calculated positions.
|
||||
__begin_output = 0;
|
||||
|
||||
// Store result in output on calculated positions.
|
||||
__begin_output = 0;
|
||||
if (__iam == 0)
|
||||
{
|
||||
for (int __t = 0; __t < __num_threads; ++__t)
|
||||
__begin_output += __counter[__t];
|
||||
|
||||
if (__iam == 0)
|
||||
{
|
||||
for (int __t = 0; __t < __num_threads; ++__t)
|
||||
__begin_output += __counter[__t];
|
||||
__i = 0;
|
||||
|
||||
__i = 0;
|
||||
_OutputIterator __iter_out = __result + __begin_output;
|
||||
|
||||
_OutputIterator __iter_out = __result + __begin_output;
|
||||
__begin = __borders[__num_threads];
|
||||
__end = __size;
|
||||
|
||||
__begin = __borders[__num_threads];
|
||||
__end = size;
|
||||
for (_IIter __iter = __first + __begin; __iter < __first + __end;
|
||||
++__iter)
|
||||
{
|
||||
if (__iter == __first
|
||||
|| !__binary_pred(*__iter, *(__iter - 1)))
|
||||
{
|
||||
++__i;
|
||||
*__iter_out++ = *__iter;
|
||||
}
|
||||
}
|
||||
|
||||
for (_IIter iter = __first + __begin; iter < __first + __end; ++iter)
|
||||
{
|
||||
if (iter == __first || !__binary_pred(*iter, *(iter - 1)))
|
||||
{
|
||||
++__i;
|
||||
*__iter_out++ = *iter;
|
||||
}
|
||||
}
|
||||
__counter[__num_threads] = __i;
|
||||
}
|
||||
else
|
||||
{
|
||||
for (int __t = 0; __t < __iam; __t++)
|
||||
__begin_output += __counter[__t];
|
||||
|
||||
__counter[__num_threads] = __i;
|
||||
}
|
||||
else
|
||||
{
|
||||
for (int __t = 0; __t < __iam; __t++)
|
||||
__begin_output += __counter[__t];
|
||||
_OutputIterator __iter_out = __result + __begin_output;
|
||||
for (_IIter __iter = __first + __begin; __iter < __first + __end;
|
||||
++__iter)
|
||||
{
|
||||
if (!__binary_pred(*__iter, *(__iter - 1)))
|
||||
*__iter_out++ = *__iter;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
_OutputIterator __iter_out = __result + __begin_output;
|
||||
for (_IIter iter = __first + __begin; iter < __first + __end; ++iter)
|
||||
{
|
||||
if (!__binary_pred(*iter, *(iter-1)))
|
||||
*__iter_out++ = *iter;
|
||||
}
|
||||
}
|
||||
_DifferenceType __end_output = 0;
|
||||
for (int __t = 0; __t < __num_threads + 1; __t++)
|
||||
__end_output += __counter[__t];
|
||||
|
||||
delete[] __borders;
|
||||
|
||||
return __result + __end_output;
|
||||
}
|
||||
|
||||
_DifferenceType __end_output = 0;
|
||||
for (int __t = 0; __t < __num_threads + 1; __t++)
|
||||
__end_output += __counter[__t];
|
||||
|
||||
delete[] __borders;
|
||||
|
||||
return __result + __end_output;
|
||||
}
|
||||
|
||||
/** @brief Parallel std::unique_copy(), without explicit equality predicate
|
||||
* @param __first Begin iterator of input sequence.
|
||||
* @param __last End iterator of input sequence.
|
||||
* @param __result Begin iterator of result __sequence.
|
||||
* @return End iterator of result __sequence. */
|
||||
template<typename _IIter, class _OutputIterator>
|
||||
inline _OutputIterator
|
||||
__parallel_unique_copy(_IIter __first, _IIter __last,
|
||||
_OutputIterator __result)
|
||||
{
|
||||
typedef typename std::iterator_traits<_IIter>::value_type
|
||||
_ValueType;
|
||||
return __parallel_unique_copy(__first, __last, __result,
|
||||
std::equal_to<_ValueType>());
|
||||
}
|
||||
/** @brief Parallel std::unique_copy(), without explicit equality predicate
|
||||
* @param __first Begin iterator of input sequence.
|
||||
* @param __last End iterator of input sequence.
|
||||
* @param __result Begin iterator of result __sequence.
|
||||
* @return End iterator of result __sequence. */
|
||||
template<typename _IIter, class _OutputIterator>
|
||||
inline _OutputIterator
|
||||
__parallel_unique_copy(_IIter __first, _IIter __last,
|
||||
_OutputIterator __result)
|
||||
{
|
||||
typedef typename std::iterator_traits<_IIter>::value_type
|
||||
_ValueType;
|
||||
return __parallel_unique_copy(__first, __last, __result,
|
||||
std::equal_to<_ValueType>());
|
||||
}
|
||||
|
||||
}//namespace __gnu_parallel
|
||||
|
||||
|
@ -49,261 +49,264 @@ namespace __gnu_parallel
|
||||
|
||||
#define _GLIBCXX_JOB_VOLATILE volatile
|
||||
|
||||
/** @brief One __job for a certain thread. */
|
||||
template<typename _DifferenceTp>
|
||||
struct _Job
|
||||
{
|
||||
typedef _DifferenceTp _DifferenceType;
|
||||
/** @brief One __job for a certain thread. */
|
||||
template<typename _DifferenceTp>
|
||||
struct _Job
|
||||
{
|
||||
typedef _DifferenceTp _DifferenceType;
|
||||
|
||||
/** @brief First element.
|
||||
*
|
||||
* Changed by owning and stealing thread. By stealing thread,
|
||||
* always incremented. */
|
||||
_GLIBCXX_JOB_VOLATILE _DifferenceType _M_first;
|
||||
/** @brief First element.
|
||||
*
|
||||
* Changed by owning and stealing thread. By stealing thread,
|
||||
* always incremented. */
|
||||
_GLIBCXX_JOB_VOLATILE _DifferenceType _M_first;
|
||||
|
||||
/** @brief Last element.
|
||||
*
|
||||
* Changed by owning thread only. */
|
||||
_GLIBCXX_JOB_VOLATILE _DifferenceType _M_last;
|
||||
/** @brief Last element.
|
||||
*
|
||||
* Changed by owning thread only. */
|
||||
_GLIBCXX_JOB_VOLATILE _DifferenceType _M_last;
|
||||
|
||||
/** @brief Number of elements, i.e. @__c _M_last-_M_first+1.
|
||||
*
|
||||
* Changed by owning thread only. */
|
||||
_GLIBCXX_JOB_VOLATILE _DifferenceType _M_load;
|
||||
};
|
||||
/** @brief Number of elements, i.e. @__c _M_last-_M_first+1.
|
||||
*
|
||||
* Changed by owning thread only. */
|
||||
_GLIBCXX_JOB_VOLATILE _DifferenceType _M_load;
|
||||
};
|
||||
|
||||
/** @brief Work stealing algorithm for random access iterators.
|
||||
*
|
||||
* Uses O(1) additional memory. Synchronization at job lists is
|
||||
* done with atomic operations.
|
||||
* @param __begin Begin iterator of element sequence.
|
||||
* @param __end End iterator of element sequence.
|
||||
* @param __op User-supplied functor (comparator, predicate, adding
|
||||
* functor, ...).
|
||||
* @param __f Functor to "process" an element with __op (depends on
|
||||
* desired functionality, e. g. for std::for_each(), ...).
|
||||
* @param __r Functor to "add" a single __result to the already
|
||||
* processed elements (depends on functionality).
|
||||
* @param __base Base value for reduction.
|
||||
* @param __output Pointer to position where final result is written to
|
||||
* @param __bound Maximum number of elements processed (e. g. for
|
||||
* std::count_n()).
|
||||
* @return User-supplied functor (that may contain a part of the result).
|
||||
*/
|
||||
template<typename _RAIter,
|
||||
typename _Op,
|
||||
typename _Fu,
|
||||
typename _Red,
|
||||
typename _Result>
|
||||
_Op
|
||||
__for_each_template_random_access_workstealing(
|
||||
_RAIter __begin, _RAIter __end, _Op __op, _Fu& __f, _Red __r,
|
||||
_Result __base, _Result& __output,
|
||||
typename std::iterator_traits<_RAIter>::difference_type __bound)
|
||||
{
|
||||
_GLIBCXX_CALL(__end - __begin)
|
||||
/** @brief Work stealing algorithm for random access iterators.
|
||||
*
|
||||
* Uses O(1) additional memory. Synchronization at job lists is
|
||||
* done with atomic operations.
|
||||
* @param __begin Begin iterator of element sequence.
|
||||
* @param __end End iterator of element sequence.
|
||||
* @param __op User-supplied functor (comparator, predicate, adding
|
||||
* functor, ...).
|
||||
* @param __f Functor to "process" an element with __op (depends on
|
||||
* desired functionality, e. g. for std::for_each(), ...).
|
||||
* @param __r Functor to "add" a single __result to the already
|
||||
* processed elements (depends on functionality).
|
||||
* @param __base Base value for reduction.
|
||||
* @param __output Pointer to position where final result is written to
|
||||
* @param __bound Maximum number of elements processed (e. g. for
|
||||
* std::count_n()).
|
||||
* @return User-supplied functor (that may contain a part of the result).
|
||||
*/
|
||||
template<typename _RAIter,
|
||||
typename _Op,
|
||||
typename _Fu,
|
||||
typename _Red,
|
||||
typename _Result>
|
||||
_Op
|
||||
__for_each_template_random_access_workstealing(_RAIter __begin,
|
||||
_RAIter __end, _Op __op,
|
||||
_Fu& __f, _Red __r,
|
||||
_Result __base,
|
||||
_Result& __output,
|
||||
typename std::iterator_traits<_RAIter>::difference_type __bound)
|
||||
{
|
||||
_GLIBCXX_CALL(__end - __begin)
|
||||
|
||||
typedef std::iterator_traits<_RAIter> _TraitsType;
|
||||
typedef typename _TraitsType::difference_type _DifferenceType;
|
||||
|
||||
const _Settings& __s = _Settings::get();
|
||||
typedef std::iterator_traits<_RAIter> _TraitsType;
|
||||
typedef typename _TraitsType::difference_type _DifferenceType;
|
||||
|
||||
_DifferenceType __chunk_size =
|
||||
static_cast<_DifferenceType>(__s.workstealing_chunk_size);
|
||||
const _Settings& __s = _Settings::get();
|
||||
|
||||
// How many jobs?
|
||||
_DifferenceType __length = (__bound < 0) ? (__end - __begin) : __bound;
|
||||
_DifferenceType __chunk_size =
|
||||
static_cast<_DifferenceType>(__s.workstealing_chunk_size);
|
||||
|
||||
// To avoid false sharing in a cache line.
|
||||
const int __stride =
|
||||
__s.cache_line_size * 10 / sizeof(_Job<_DifferenceType>) + 1;
|
||||
// How many jobs?
|
||||
_DifferenceType __length = (__bound < 0) ? (__end - __begin) : __bound;
|
||||
|
||||
// Total number of threads currently working.
|
||||
_ThreadIndex __busy = 0;
|
||||
// To avoid false sharing in a cache line.
|
||||
const int __stride = (__s.cache_line_size * 10
|
||||
/ sizeof(_Job<_DifferenceType>) + 1);
|
||||
|
||||
_Job<_DifferenceType> *__job;
|
||||
// Total number of threads currently working.
|
||||
_ThreadIndex __busy = 0;
|
||||
|
||||
omp_lock_t __output_lock;
|
||||
omp_init_lock(&__output_lock);
|
||||
_Job<_DifferenceType> *__job;
|
||||
|
||||
// Write base value to output.
|
||||
__output = __base;
|
||||
omp_lock_t __output_lock;
|
||||
omp_init_lock(&__output_lock);
|
||||
|
||||
// No more threads than jobs, at least one thread.
|
||||
_ThreadIndex __num_threads =
|
||||
__gnu_parallel::max<_ThreadIndex>(1,
|
||||
__gnu_parallel::min<_DifferenceType>(__length, __get_max_threads()));
|
||||
// Write base value to output.
|
||||
__output = __base;
|
||||
|
||||
# pragma omp parallel shared(__busy) num_threads(__num_threads)
|
||||
// No more threads than jobs, at least one thread.
|
||||
_ThreadIndex __num_threads = __gnu_parallel::max<_ThreadIndex>
|
||||
(1, __gnu_parallel::min<_DifferenceType>(__length,
|
||||
__get_max_threads()));
|
||||
|
||||
# pragma omp parallel shared(__busy) num_threads(__num_threads)
|
||||
{
|
||||
|
||||
# pragma omp single
|
||||
{
|
||||
__num_threads = omp_get_num_threads();
|
||||
{
|
||||
__num_threads = omp_get_num_threads();
|
||||
|
||||
// Create job description array.
|
||||
__job = new _Job<_DifferenceType>[__num_threads * __stride];
|
||||
}
|
||||
// Create job description array.
|
||||
__job = new _Job<_DifferenceType>[__num_threads * __stride];
|
||||
}
|
||||
|
||||
// Initialization phase.
|
||||
// Initialization phase.
|
||||
|
||||
// Flags for every thread if it is doing productive work.
|
||||
bool __iam_working = false;
|
||||
// Flags for every thread if it is doing productive work.
|
||||
bool __iam_working = false;
|
||||
|
||||
// Thread id.
|
||||
_ThreadIndex __iam = omp_get_thread_num();
|
||||
// Thread id.
|
||||
_ThreadIndex __iam = omp_get_thread_num();
|
||||
|
||||
// This job.
|
||||
_Job<_DifferenceType>& __my_job = __job[__iam * __stride];
|
||||
// This job.
|
||||
_Job<_DifferenceType>& __my_job = __job[__iam * __stride];
|
||||
|
||||
// Random number (for work stealing).
|
||||
_ThreadIndex __victim;
|
||||
// Random number (for work stealing).
|
||||
_ThreadIndex __victim;
|
||||
|
||||
// Local value for reduction.
|
||||
_Result __result = _Result();
|
||||
// Local value for reduction.
|
||||
_Result __result = _Result();
|
||||
|
||||
// Number of elements to steal in one attempt.
|
||||
_DifferenceType __steal;
|
||||
// Number of elements to steal in one attempt.
|
||||
_DifferenceType __steal;
|
||||
|
||||
// Every thread has its own random number generator
|
||||
// (modulo __num_threads).
|
||||
_RandomNumber rand_gen(__iam, __num_threads);
|
||||
// Every thread has its own random number generator
|
||||
// (modulo __num_threads).
|
||||
_RandomNumber __rand_gen(__iam, __num_threads);
|
||||
|
||||
// This thread is currently working.
|
||||
// This thread is currently working.
|
||||
# pragma omp atomic
|
||||
++__busy;
|
||||
++__busy;
|
||||
|
||||
__iam_working = true;
|
||||
__iam_working = true;
|
||||
|
||||
// How many jobs per thread? last thread gets the rest.
|
||||
__my_job._M_first =
|
||||
static_cast<_DifferenceType>(__iam * (__length / __num_threads));
|
||||
// How many jobs per thread? last thread gets the rest.
|
||||
__my_job._M_first = static_cast<_DifferenceType>
|
||||
(__iam * (__length / __num_threads));
|
||||
|
||||
__my_job._M_last = (__iam == (__num_threads - 1)) ?
|
||||
(__length - 1) : ((__iam + 1) * (__length / __num_threads) - 1);
|
||||
__my_job._M_load = __my_job._M_last - __my_job._M_first + 1;
|
||||
__my_job._M_last = (__iam == (__num_threads - 1)
|
||||
? (__length - 1)
|
||||
: ((__iam + 1) * (__length / __num_threads) - 1));
|
||||
__my_job._M_load = __my_job._M_last - __my_job._M_first + 1;
|
||||
|
||||
// Init result with _M_first value (to have a base value for reduction)
|
||||
if (__my_job._M_first <= __my_job._M_last)
|
||||
{
|
||||
// Cannot use volatile variable directly.
|
||||
_DifferenceType __my_first = __my_job._M_first;
|
||||
__result = __f(__op, __begin + __my_first);
|
||||
++__my_job._M_first;
|
||||
--__my_job._M_load;
|
||||
}
|
||||
// Init result with _M_first value (to have a base value for reduction)
|
||||
if (__my_job._M_first <= __my_job._M_last)
|
||||
{
|
||||
// Cannot use volatile variable directly.
|
||||
_DifferenceType __my_first = __my_job._M_first;
|
||||
__result = __f(__op, __begin + __my_first);
|
||||
++__my_job._M_first;
|
||||
--__my_job._M_load;
|
||||
}
|
||||
|
||||
_RAIter __current;
|
||||
_RAIter __current;
|
||||
|
||||
# pragma omp barrier
|
||||
|
||||
// Actual work phase
|
||||
// Work on own or stolen current start
|
||||
while (__busy > 0)
|
||||
{
|
||||
// Work until no productive thread left.
|
||||
// Actual work phase
|
||||
// Work on own or stolen current start
|
||||
while (__busy > 0)
|
||||
{
|
||||
// Work until no productive thread left.
|
||||
# pragma omp flush(__busy)
|
||||
|
||||
// Thread has own work to do
|
||||
while (__my_job._M_first <= __my_job._M_last)
|
||||
{
|
||||
// fetch-and-add call
|
||||
// Reserve current job block (size __chunk_size) in my queue.
|
||||
_DifferenceType __current_job =
|
||||
__fetch_and_add<_DifferenceType>(
|
||||
&(__my_job._M_first), __chunk_size);
|
||||
// Thread has own work to do
|
||||
while (__my_job._M_first <= __my_job._M_last)
|
||||
{
|
||||
// fetch-and-add call
|
||||
// Reserve current job block (size __chunk_size) in my queue.
|
||||
_DifferenceType __current_job =
|
||||
__fetch_and_add<_DifferenceType>(&(__my_job._M_first),
|
||||
__chunk_size);
|
||||
|
||||
// Update _M_load, to make the three values consistent,
|
||||
// _M_first might have been changed in the meantime
|
||||
__my_job._M_load = __my_job._M_last - __my_job._M_first + 1;
|
||||
for (_DifferenceType __job_counter = 0;
|
||||
__job_counter < __chunk_size
|
||||
&& __current_job <= __my_job._M_last;
|
||||
++__job_counter)
|
||||
{
|
||||
// Yes: process it!
|
||||
__current = __begin + __current_job;
|
||||
++__current_job;
|
||||
// Update _M_load, to make the three values consistent,
|
||||
// _M_first might have been changed in the meantime
|
||||
__my_job._M_load = __my_job._M_last - __my_job._M_first + 1;
|
||||
for (_DifferenceType __job_counter = 0;
|
||||
__job_counter < __chunk_size
|
||||
&& __current_job <= __my_job._M_last;
|
||||
++__job_counter)
|
||||
{
|
||||
// Yes: process it!
|
||||
__current = __begin + __current_job;
|
||||
++__current_job;
|
||||
|
||||
// Do actual work.
|
||||
__result = __r(__result, __f(__op, __current));
|
||||
}
|
||||
// Do actual work.
|
||||
__result = __r(__result, __f(__op, __current));
|
||||
}
|
||||
|
||||
# pragma omp flush(__busy)
|
||||
}
|
||||
}
|
||||
|
||||
// After reaching this point, a thread's __job list is empty.
|
||||
if (__iam_working)
|
||||
{
|
||||
// This thread no longer has work.
|
||||
// After reaching this point, a thread's __job list is empty.
|
||||
if (__iam_working)
|
||||
{
|
||||
// This thread no longer has work.
|
||||
# pragma omp atomic
|
||||
--__busy;
|
||||
--__busy;
|
||||
|
||||
__iam_working = false;
|
||||
}
|
||||
__iam_working = false;
|
||||
}
|
||||
|
||||
_DifferenceType __supposed_first, __supposed_last, __supposed_load;
|
||||
do
|
||||
{
|
||||
// Find random nonempty deque (not own), do consistency check.
|
||||
__yield();
|
||||
_DifferenceType __supposed_first, __supposed_last,
|
||||
__supposed_load;
|
||||
do
|
||||
{
|
||||
// Find random nonempty deque (not own), do consistency check.
|
||||
__yield();
|
||||
# pragma omp flush(__busy)
|
||||
__victim = rand_gen();
|
||||
__supposed_first = __job[__victim * __stride]._M_first;
|
||||
__supposed_last = __job[__victim * __stride]._M_last;
|
||||
__supposed_load = __job[__victim * __stride]._M_load;
|
||||
}
|
||||
while (__busy > 0
|
||||
&& ((__supposed_load <= 0)
|
||||
|| ((__supposed_first + __supposed_load - 1)
|
||||
!= __supposed_last)));
|
||||
__victim = __rand_gen();
|
||||
__supposed_first = __job[__victim * __stride]._M_first;
|
||||
__supposed_last = __job[__victim * __stride]._M_last;
|
||||
__supposed_load = __job[__victim * __stride]._M_load;
|
||||
}
|
||||
while (__busy > 0
|
||||
&& ((__supposed_load <= 0)
|
||||
|| ((__supposed_first + __supposed_load - 1)
|
||||
!= __supposed_last)));
|
||||
|
||||
if (__busy == 0)
|
||||
break;
|
||||
if (__busy == 0)
|
||||
break;
|
||||
|
||||
if (__supposed_load > 0)
|
||||
{
|
||||
// Has work and work to do.
|
||||
// Number of elements to steal (at least one).
|
||||
__steal = (__supposed_load < 2) ? 1 : __supposed_load / 2;
|
||||
if (__supposed_load > 0)
|
||||
{
|
||||
// Has work and work to do.
|
||||
// Number of elements to steal (at least one).
|
||||
__steal = (__supposed_load < 2) ? 1 : __supposed_load / 2;
|
||||
|
||||
// Push __victim's current start forward.
|
||||
_DifferenceType __stolen_first =
|
||||
__fetch_and_add<_DifferenceType>(
|
||||
&(__job[__victim * __stride]._M_first), __steal);
|
||||
_DifferenceType __stolen_try =
|
||||
__stolen_first + __steal - _DifferenceType(1);
|
||||
// Push __victim's current start forward.
|
||||
_DifferenceType __stolen_first =
|
||||
__fetch_and_add<_DifferenceType>
|
||||
(&(__job[__victim * __stride]._M_first), __steal);
|
||||
_DifferenceType __stolen_try = (__stolen_first + __steal
|
||||
- _DifferenceType(1));
|
||||
|
||||
__my_job._M_first = __stolen_first;
|
||||
__my_job._M_last =
|
||||
__gnu_parallel::min(__stolen_try, __supposed_last);
|
||||
__my_job._M_load = __my_job._M_last - __my_job._M_first + 1;
|
||||
__my_job._M_first = __stolen_first;
|
||||
__my_job._M_last = __gnu_parallel::min(__stolen_try,
|
||||
__supposed_last);
|
||||
__my_job._M_load = __my_job._M_last - __my_job._M_first + 1;
|
||||
|
||||
// Has potential work again.
|
||||
// Has potential work again.
|
||||
# pragma omp atomic
|
||||
++__busy;
|
||||
__iam_working = true;
|
||||
++__busy;
|
||||
__iam_working = true;
|
||||
|
||||
# pragma omp flush(__busy)
|
||||
}
|
||||
}
|
||||
# pragma omp flush(__busy)
|
||||
} // end while __busy > 0
|
||||
// Add accumulated result to output.
|
||||
omp_set_lock(&__output_lock);
|
||||
__output = __r(__output, __result);
|
||||
omp_unset_lock(&__output_lock);
|
||||
} // end while __busy > 0
|
||||
// Add accumulated result to output.
|
||||
omp_set_lock(&__output_lock);
|
||||
__output = __r(__output, __result);
|
||||
omp_unset_lock(&__output_lock);
|
||||
}
|
||||
|
||||
delete[] __job;
|
||||
delete[] __job;
|
||||
|
||||
// Points to last element processed (needed as return value for
|
||||
// some algorithms like transform)
|
||||
__f._M_finish_iterator = __begin + __length;
|
||||
// Points to last element processed (needed as return value for
|
||||
// some algorithms like transform)
|
||||
__f._M_finish_iterator = __begin + __length;
|
||||
|
||||
omp_destroy_lock(&__output_lock);
|
||||
omp_destroy_lock(&__output_lock);
|
||||
|
||||
return __op;
|
||||
}
|
||||
return __op;
|
||||
}
|
||||
} // end namespace
|
||||
|
||||
#endif /* _GLIBCXX_PARALLEL_WORKSTEALING_H */
|
||||
|
Loading…
Reference in New Issue
Block a user