multiway_merge.h: Simple formatting and uglification fixes.

2009-11-05  Paolo Carlini  <paolo.carlini@oracle.com>

	* include/parallel/multiway_merge.h: Simple formatting and
	uglification fixes.
	* include/parallel/losertree.h: Likewise.
	* include/parallel/base.h: Likewise.
	* include/parallel/par_loop.h: Likewise.
	* include/parallel/omp_loop_static.h: Likewise.
	* include/parallel/multiway_mergesort.h: Likewise.
	* include/parallel/partial_sum.h: Likewise.
	* include/parallel/omp_loop.h: Likewise.

From-SVN: r153939
This commit is contained in:
Paolo Carlini 2009-11-05 14:06:13 +00:00 committed by Paolo Carlini
parent 94086ef69c
commit 338311e5d7
9 changed files with 1135 additions and 1186 deletions

View File

@ -1,3 +1,15 @@
2009-11-05 Paolo Carlini <paolo.carlini@oracle.com>
* include/parallel/multiway_merge.h: Simple formatting and
uglification fixes.
* include/parallel/losertree.h: Likewise.
* include/parallel/base.h: Likewise.
* include/parallel/par_loop.h: Likewise.
* include/parallel/omp_loop_static.h: Likewise.
* include/parallel/multiway_mergesort.h: Likewise.
* include/parallel/partial_sum.h: Likewise.
* include/parallel/omp_loop.h: Likewise.
2009-11-04 Benjamin Kosnik <bkoz@redhat.com>
* testsuite/25_algorithms/fill/5.cc: Move...

View File

@ -93,13 +93,13 @@ namespace __gnu_parallel
__is_parallel(const _Parallelism __p) { return __p != sequential; }
/** @brief Calculates the rounded-down logarithm of @__c __n for base 2.
* @param __n Argument.
* @return Returns 0 for any argument <1.
*/
template<typename _Size>
inline _Size
__rd_log2(_Size __n)
/** @brief Calculates the rounded-down logarithm of @__c __n for base 2.
* @param __n Argument.
* @return Returns 0 for any argument <1.
*/
template<typename _Size>
inline _Size
__rd_log2(_Size __n)
{
_Size __k;
for (__k = 0; __n > 1; __n >>= 1)
@ -107,356 +107,352 @@ template<typename _Size>
return __k;
}
/** @brief Encode two integers into one gnu_parallel::_CASable.
* @param __a First integer, to be encoded in the most-significant @__c
* _CASable_bits/2 bits.
* @param __b Second integer, to be encoded in the least-significant
* @__c _CASable_bits/2 bits.
* @return value encoding @__c __a and @__c __b.
* @see decode2
*/
inline _CASable
__encode2(int __a, int __b) //must all be non-negative, actually
{
return (((_CASable)__a) << (_CASable_bits / 2)) | (((_CASable)__b) << 0);
}
/** @brief Decode two integers from one gnu_parallel::_CASable.
* @param __x __gnu_parallel::_CASable to decode integers from.
* @param __a First integer, to be decoded from the most-significant
* @__c _CASable_bits/2 bits of @__c __x.
* @param __b Second integer, to be encoded in the least-significant
* @__c _CASable_bits/2 bits of @__c __x.
* @see __encode2
*/
inline void
decode2(_CASable __x, int& __a, int& __b)
{
__a = (int)((__x >> (_CASable_bits / 2)) & _CASable_mask);
__b = (int)((__x >> 0 ) & _CASable_mask);
}
//needed for parallel "numeric", even if "algorithm" not included
/** @brief Equivalent to std::min. */
template<typename _Tp>
const _Tp&
min(const _Tp& __a, const _Tp& __b)
{ return (__a < __b) ? __a : __b; }
/** @brief Equivalent to std::max. */
template<typename _Tp>
const _Tp&
max(const _Tp& __a, const _Tp& __b)
{ return (__a > __b) ? __a : __b; }
/** @brief Constructs predicate for equality from strict weak
* ordering predicate
*/
template<typename _T1, typename _T2, typename _Compare>
class _EqualFromLess : public std::binary_function<_T1, _T2, bool>
/** @brief Encode two integers into one gnu_parallel::_CASable.
* @param __a First integer, to be encoded in the most-significant @__c
* _CASable_bits/2 bits.
* @param __b Second integer, to be encoded in the least-significant
* @__c _CASable_bits/2 bits.
* @return value encoding @__c __a and @__c __b.
* @see decode2
*/
inline _CASable
__encode2(int __a, int __b) //must all be non-negative, actually
{
private:
_Compare& _M_comp;
return (((_CASable)__a) << (_CASable_bits / 2)) | (((_CASable)__b) << 0);
}
public:
_EqualFromLess(_Compare& __comp) : _M_comp(__comp) { }
/** @brief Decode two integers from one gnu_parallel::_CASable.
* @param __x __gnu_parallel::_CASable to decode integers from.
* @param __a First integer, to be decoded from the most-significant
* @__c _CASable_bits/2 bits of @__c __x.
* @param __b Second integer, to be encoded in the least-significant
* @__c _CASable_bits/2 bits of @__c __x.
* @see __encode2
*/
inline void
decode2(_CASable __x, int& __a, int& __b)
{
__a = (int)((__x >> (_CASable_bits / 2)) & _CASable_mask);
__b = (int)((__x >> 0 ) & _CASable_mask);
}
bool operator()(const _T1& __a, const _T2& __b)
//needed for parallel "numeric", even if "algorithm" not included
/** @brief Equivalent to std::min. */
template<typename _Tp>
const _Tp&
min(const _Tp& __a, const _Tp& __b)
{ return (__a < __b) ? __a : __b; }
/** @brief Equivalent to std::max. */
template<typename _Tp>
const _Tp&
max(const _Tp& __a, const _Tp& __b)
{ return (__a > __b) ? __a : __b; }
/** @brief Constructs predicate for equality from strict weak
* ordering predicate
*/
template<typename _T1, typename _T2, typename _Compare>
class _EqualFromLess : public std::binary_function<_T1, _T2, bool>
{
return !_M_comp(__a, __b) && !_M_comp(__b, __a);
}
};
private:
_Compare& _M_comp;
public:
_EqualFromLess(_Compare& __comp) : _M_comp(__comp) { }
bool operator()(const _T1& __a, const _T2& __b)
{ return !_M_comp(__a, __b) && !_M_comp(__b, __a); }
};
/** @brief Similar to std::binder1st,
* but giving the argument types explicitly. */
template<typename _Predicate, typename argument_type>
class __unary_negate
: public std::unary_function<argument_type, bool>
{
protected:
_Predicate _M_pred;
/** @brief Similar to std::binder1st,
* but giving the argument types explicitly. */
template<typename _Predicate, typename argument_type>
class __unary_negate
: public std::unary_function<argument_type, bool>
{
protected:
_Predicate _M_pred;
public:
explicit
__unary_negate(const _Predicate& __x) : _M_pred(__x) { }
public:
explicit
__unary_negate(const _Predicate& __x) : _M_pred(__x) { }
bool
operator()(const argument_type& __x)
{ return !_M_pred(__x); }
};
bool
operator()(const argument_type& __x)
{ return !_M_pred(__x); }
};
/** @brief Similar to std::binder1st,
* but giving the argument types explicitly. */
template<typename _Operation, typename _FirstArgumentType,
typename _SecondArgumentType, typename _ResultType>
class __binder1st
: public std::unary_function<_SecondArgumentType, _ResultType>
{
protected:
_Operation _M_op;
_FirstArgumentType _M_value;
/** @brief Similar to std::binder1st,
* but giving the argument types explicitly. */
template<typename _Operation, typename _FirstArgumentType,
typename _SecondArgumentType, typename _ResultType>
class __binder1st
: public std::unary_function<_SecondArgumentType, _ResultType>
{
protected:
_Operation _M_op;
_FirstArgumentType _M_value;
public:
__binder1st(const _Operation& __x,
const _FirstArgumentType& __y)
: _M_op(__x), _M_value(__y) { }
public:
__binder1st(const _Operation& __x, const _FirstArgumentType& __y)
: _M_op(__x), _M_value(__y) { }
_ResultType
operator()(const _SecondArgumentType& __x)
{ return _M_op(_M_value, __x); }
_ResultType
operator()(const _SecondArgumentType& __x)
{ return _M_op(_M_value, __x); }
// _GLIBCXX_RESOLVE_LIB_DEFECTS
// 109. Missing binders for non-const sequence elements
_ResultType
operator()(_SecondArgumentType& __x) const
{ return _M_op(_M_value, __x); }
};
// _GLIBCXX_RESOLVE_LIB_DEFECTS
// 109. Missing binders for non-const sequence elements
_ResultType
operator()(_SecondArgumentType& __x) const
{ return _M_op(_M_value, __x); }
};
/**
* @brief Similar to std::binder2nd, but giving the argument types
* explicitly.
*/
template<typename _Operation, typename _FirstArgumentType,
typename _SecondArgumentType, typename _ResultType>
class binder2nd
: public std::unary_function<_FirstArgumentType, _ResultType>
{
protected:
_Operation _M_op;
_SecondArgumentType _M_value;
/**
* @brief Similar to std::binder2nd, but giving the argument types
* explicitly.
*/
template<typename _Operation, typename _FirstArgumentType,
typename _SecondArgumentType, typename _ResultType>
class binder2nd
: public std::unary_function<_FirstArgumentType, _ResultType>
{
protected:
_Operation _M_op;
_SecondArgumentType _M_value;
public:
binder2nd(const _Operation& __x,
const _SecondArgumentType& __y)
: _M_op(__x), _M_value(__y) { }
public:
binder2nd(const _Operation& __x, const _SecondArgumentType& __y)
: _M_op(__x), _M_value(__y) { }
_ResultType
operator()(const _FirstArgumentType& __x) const
{ return _M_op(__x, _M_value); }
_ResultType
operator()(const _FirstArgumentType& __x) const
{ return _M_op(__x, _M_value); }
// _GLIBCXX_RESOLVE_LIB_DEFECTS
// 109. Missing binders for non-const sequence elements
_ResultType
operator()(_FirstArgumentType& __x)
{ return _M_op(__x, _M_value); }
};
// _GLIBCXX_RESOLVE_LIB_DEFECTS
// 109. Missing binders for non-const sequence elements
_ResultType
operator()(_FirstArgumentType& __x)
{ return _M_op(__x, _M_value); }
};
/** @brief Similar to std::equal_to, but allows two different types. */
template<typename _T1, typename _T2>
struct _EqualTo : std::binary_function<_T1, _T2, bool>
{
bool operator()(const _T1& __t1, const _T2& __t2) const
{ return __t1 == __t2; }
};
/** @brief Similar to std::equal_to, but allows two different types. */
template<typename _T1, typename _T2>
struct _EqualTo : std::binary_function<_T1, _T2, bool>
{
bool operator()(const _T1& __t1, const _T2& __t2) const
{ return __t1 == __t2; }
};
/** @brief Similar to std::less, but allows two different types. */
template<typename _T1, typename _T2>
struct _Less : std::binary_function<_T1, _T2, bool>
{
bool
operator()(const _T1& __t1, const _T2& __t2) const
{ return __t1 < __t2; }
/** @brief Similar to std::less, but allows two different types. */
template<typename _T1, typename _T2>
struct _Less : std::binary_function<_T1, _T2, bool>
{
bool
operator()(const _T1& __t1, const _T2& __t2) const
{ return __t1 < __t2; }
bool
operator()(const _T2& __t2, const _T1& __t1) const
{ return __t2 < __t1; }
};
bool
operator()(const _T2& __t2, const _T1& __t1) const
{ return __t2 < __t1; }
};
// Partial specialization for one type. Same as std::less.
template<typename _Tp>
struct _Less<_Tp, _Tp> : public std::binary_function<_Tp, _Tp, bool>
{
bool
operator()(const _Tp& __x, const _Tp& __y) const
{ return __x < __y; }
};
// Partial specialization for one type. Same as std::less.
template<typename _Tp>
struct _Less<_Tp, _Tp> : public std::binary_function<_Tp, _Tp, bool>
{
bool
operator()(const _Tp& __x, const _Tp& __y) const
{ return __x < __y; }
};
/** @brief Similar to std::plus, but allows two different types. */
template<typename _Tp1, typename _Tp2>
struct _Plus : public std::binary_function<_Tp1, _Tp2, _Tp1>
{
typedef __typeof__(*static_cast<_Tp1*>(NULL)
+ *static_cast<_Tp2*>(NULL)) __result;
__result
operator()(const _Tp1& __x, const _Tp2& __y) const
{ return __x + __y; }
};
// Partial specialization for one type. Same as std::plus.
template<typename _Tp>
struct _Plus<_Tp, _Tp> : public std::binary_function<_Tp, _Tp, _Tp>
{
typedef __typeof__(*static_cast<_Tp*>(NULL)
+ *static_cast<_Tp*>(NULL)) __result;
__result
operator()(const _Tp& __x, const _Tp& __y) const
{ return __x + __y; }
};
/** @brief Similar to std::multiplies, but allows two different types. */
template<typename _Tp1, typename _Tp2>
struct _Multiplies : public std::binary_function<_Tp1, _Tp2, _Tp1>
{
typedef __typeof__(*static_cast<_Tp1*>(NULL)
* *static_cast<_Tp2*>(NULL)) __result;
__result
operator()(const _Tp1& __x, const _Tp2& __y) const
{ return __x * __y; }
};
// Partial specialization for one type. Same as std::multiplies.
template<typename _Tp>
struct _Multiplies<_Tp, _Tp> : public std::binary_function<_Tp, _Tp, _Tp>
{
typedef __typeof__(*static_cast<_Tp*>(NULL)
* *static_cast<_Tp*>(NULL)) __result;
__result
operator()(const _Tp& __x, const _Tp& __y) const
{ return __x * __y; }
};
template<typename _Tp, typename _DifferenceTp>
class _PseudoSequence;
/** @brief _Iterator associated with __gnu_parallel::_PseudoSequence.
* If features the usual random-access iterator functionality.
* @param _Tp Sequence _M_value type.
* @param _DifferenceType Sequence difference type.
*/
template<typename _Tp, typename _DifferenceTp>
class _PseudoSequenceIterator
{
public:
typedef _DifferenceTp _DifferenceType;
private:
const _Tp& _M_val;
_DifferenceType _M_pos;
public:
_PseudoSequenceIterator(const _Tp& _M_val, _DifferenceType _M_pos)
: _M_val(_M_val), _M_pos(_M_pos) { }
// Pre-increment operator.
_PseudoSequenceIterator&
operator++()
template<typename _Tp1, typename _Tp2>
struct _Plus : public std::binary_function<_Tp1, _Tp2, _Tp1>
{
++_M_pos;
return *this;
}
typedef __typeof__(*static_cast<_Tp1*>(NULL)
+ *static_cast<_Tp2*>(NULL)) __result;
// Post-increment operator.
const _PseudoSequenceIterator
operator++(int)
{ return _PseudoSequenceIterator(_M_pos++); }
__result
operator()(const _Tp1& __x, const _Tp2& __y) const
{ return __x + __y; }
};
const _Tp&
operator*() const
{ return _M_val; }
// Partial specialization for one type. Same as std::plus.
template<typename _Tp>
struct _Plus<_Tp, _Tp> : public std::binary_function<_Tp, _Tp, _Tp>
{
typedef __typeof__(*static_cast<_Tp*>(NULL)
+ *static_cast<_Tp*>(NULL)) __result;
const _Tp&
operator[](_DifferenceType) const
{ return _M_val; }
__result
operator()(const _Tp& __x, const _Tp& __y) const
{ return __x + __y; }
};
bool
operator==(const _PseudoSequenceIterator& __i2)
{ return _M_pos == __i2._M_pos; }
_DifferenceType
operator!=(const _PseudoSequenceIterator& __i2)
{ return _M_pos != __i2._M_pos; }
/** @brief Similar to std::multiplies, but allows two different types. */
template<typename _Tp1, typename _Tp2>
struct _Multiplies : public std::binary_function<_Tp1, _Tp2, _Tp1>
{
typedef __typeof__(*static_cast<_Tp1*>(NULL)
* *static_cast<_Tp2*>(NULL)) __result;
_DifferenceType
operator-(const _PseudoSequenceIterator& __i2)
{ return _M_pos - __i2._M_pos; }
};
__result
operator()(const _Tp1& __x, const _Tp2& __y) const
{ return __x * __y; }
};
/** @brief Sequence that conceptually consists of multiple copies of
the same element.
* The copies are not stored explicitly, of course.
* @param _Tp Sequence _M_value type.
* @param _DifferenceType Sequence difference type.
*/
template<typename _Tp, typename _DifferenceTp>
class _PseudoSequence
{
public:
typedef _DifferenceTp _DifferenceType;
// Partial specialization for one type. Same as std::multiplies.
template<typename _Tp>
struct _Multiplies<_Tp, _Tp> : public std::binary_function<_Tp, _Tp, _Tp>
{
typedef __typeof__(*static_cast<_Tp*>(NULL)
* *static_cast<_Tp*>(NULL)) __result;
// Better cast down to uint64_t, than up to _DifferenceTp.
typedef _PseudoSequenceIterator<_Tp, uint64_t> iterator;
__result
operator()(const _Tp& __x, const _Tp& __y) const
{ return __x * __y; }
};
/** @brief Constructor.
* @param _M_val Element of the sequence.
* @param __count Number of (virtual) copies.
*/
_PseudoSequence(const _Tp& _M_val, _DifferenceType __count)
: _M_val(_M_val), __count(__count) { }
/** @brief Begin iterator. */
iterator
begin() const
{ return iterator(_M_val, 0); }
template<typename _Tp, typename _DifferenceTp>
class _PseudoSequence;
/** @brief End iterator. */
iterator
end() const
{ return iterator(_M_val, __count); }
/** @brief _Iterator associated with __gnu_parallel::_PseudoSequence.
* If features the usual random-access iterator functionality.
* @param _Tp Sequence _M_value type.
* @param _DifferenceType Sequence difference type.
*/
template<typename _Tp, typename _DifferenceTp>
class _PseudoSequenceIterator
{
public:
typedef _DifferenceTp _DifferenceType;
private:
const _Tp& _M_val;
_DifferenceType __count;
};
private:
const _Tp& _M_val;
_DifferenceType _M_pos;
/** @brief Functor that does nothing */
template<typename _ValueTp>
class _VoidFunctor
{
inline void
operator()(const _ValueTp& __v) const { }
};
public:
_PseudoSequenceIterator(const _Tp& __val, _DifferenceType __pos)
: _M_val(__val), _M_pos(__pos) { }
/** @brief Compute the median of three referenced elements,
according to @__c __comp.
* @param __a First iterator.
* @param __b Second iterator.
* @param __c Third iterator.
* @param __comp Comparator.
*/
template<typename _RAIter, typename _Compare>
_RAIter
__median_of_three_iterators(_RAIter __a, _RAIter __b,
_RAIter __c, _Compare& __comp)
{
if (__comp(*__a, *__b))
if (__comp(*__b, *__c))
return __b;
else
if (__comp(*__a, *__c))
return __c;
else
return __a;
else
// Pre-increment operator.
_PseudoSequenceIterator&
operator++()
{
// Just swap __a and __b.
if (__comp(*__a, *__c))
return __a;
else
if (__comp(*__b, *__c))
return __c;
else
return __b;
++_M_pos;
return *this;
}
}
// Post-increment operator.
const _PseudoSequenceIterator
operator++(int)
{ return _PseudoSequenceIterator(_M_pos++); }
const _Tp&
operator*() const
{ return _M_val; }
const _Tp&
operator[](_DifferenceType) const
{ return _M_val; }
bool
operator==(const _PseudoSequenceIterator& __i2)
{ return _M_pos == __i2._M_pos; }
_DifferenceType
operator!=(const _PseudoSequenceIterator& __i2)
{ return _M_pos != __i2._M_pos; }
_DifferenceType
operator-(const _PseudoSequenceIterator& __i2)
{ return _M_pos - __i2._M_pos; }
};
/** @brief Sequence that conceptually consists of multiple copies of
the same element.
* The copies are not stored explicitly, of course.
* @param _Tp Sequence _M_value type.
* @param _DifferenceType Sequence difference type.
*/
template<typename _Tp, typename _DifferenceTp>
class _PseudoSequence
{
public:
typedef _DifferenceTp _DifferenceType;
// Better cast down to uint64_t, than up to _DifferenceTp.
typedef _PseudoSequenceIterator<_Tp, uint64_t> iterator;
/** @brief Constructor.
* @param _M_val Element of the sequence.
* @param __count Number of (virtual) copies.
*/
_PseudoSequence(const _Tp& __val, _DifferenceType __count)
: _M_val(__val), _M_count(__count) { }
/** @brief Begin iterator. */
iterator
begin() const
{ return iterator(_M_val, 0); }
/** @brief End iterator. */
iterator
end() const
{ return iterator(_M_val, _M_count); }
private:
const _Tp& _M_val;
_DifferenceType _M_count;
};
/** @brief Functor that does nothing */
template<typename _ValueTp>
class _VoidFunctor
{
inline void
operator()(const _ValueTp& __v) const { }
};
/** @brief Compute the median of three referenced elements,
according to @__c __comp.
* @param __a First iterator.
* @param __b Second iterator.
* @param __c Third iterator.
* @param __comp Comparator.
*/
template<typename _RAIter, typename _Compare>
_RAIter
__median_of_three_iterators(_RAIter __a, _RAIter __b,
_RAIter __c, _Compare& __comp)
{
if (__comp(*__a, *__b))
if (__comp(*__b, *__c))
return __b;
else
if (__comp(*__a, *__c))
return __c;
else
return __a;
else
{
// Just swap __a and __b.
if (__comp(*__a, *__c))
return __a;
else
if (__comp(*__b, *__c))
return __c;
else
return __b;
}
}
#define _GLIBCXX_PARALLEL_ASSERT(_Condition) __glibcxx_assert(_Condition)

View File

@ -163,7 +163,8 @@ template<typename _Tp, typename _Compare>
*/
template<bool __stable/* default == true */, typename _Tp,
typename _Compare>
class _LoserTree : public _LoserTreeBase<_Tp, _Compare>
class _LoserTree
: public _LoserTreeBase<_Tp, _Compare>
{
typedef _LoserTreeBase<_Tp, _Compare> _Base;
using _Base::_M_k;
@ -797,7 +798,7 @@ public:
* run empty. This is a very fast variant.
*/
template<typename _Tp, typename _Compare>
class LoserTreePointerUnguardedBase
class _LoserTreePointerUnguardedBase
{
protected:
struct _Loser
@ -812,8 +813,8 @@ template<typename _Tp, typename _Compare>
public:
LoserTreePointerUnguardedBase(unsigned int __k, const _Tp& _sentinel,
_Compare __comp = std::less<_Tp>())
_LoserTreePointerUnguardedBase(unsigned int __k, const _Tp& _sentinel,
_Compare __comp = std::less<_Tp>())
: _M_comp(__comp)
{
_M_ik = __k;
@ -831,7 +832,7 @@ template<typename _Tp, typename _Compare>
}
}
~LoserTreePointerUnguardedBase()
~_LoserTreePointerUnguardedBase()
{ delete[] _M_losers; }
int
@ -861,16 +862,16 @@ template<typename _Tp, typename _Compare>
*/
template<bool __stable/* default == true */, typename _Tp, typename _Compare>
class _LoserTreePointerUnguarded
: public LoserTreePointerUnguardedBase<_Tp, _Compare>
: public _LoserTreePointerUnguardedBase<_Tp, _Compare>
{
typedef LoserTreePointerUnguardedBase<_Tp, _Compare> _Base;
typedef _LoserTreePointerUnguardedBase<_Tp, _Compare> _Base;
using _Base::_M_k;
using _Base::_M_losers;
public:
_LoserTreePointerUnguarded(unsigned int __k, const _Tp& _sentinel,
_Compare __comp = std::less<_Tp>())
: _Base::LoserTreePointerUnguardedBase(__k, _sentinel, __comp)
: _Base::_LoserTreePointerUnguardedBase(__k, _sentinel, __comp)
{ }
unsigned int
@ -945,16 +946,16 @@ template<bool __stable/* default == true */, typename _Tp, typename _Compare>
*/
template<typename _Tp, typename _Compare>
class _LoserTreePointerUnguarded</* __stable == */false, _Tp, _Compare>
: public LoserTreePointerUnguardedBase<_Tp, _Compare>
: public _LoserTreePointerUnguardedBase<_Tp, _Compare>
{
typedef LoserTreePointerUnguardedBase<_Tp, _Compare> _Base;
typedef _LoserTreePointerUnguardedBase<_Tp, _Compare> _Base;
using _Base::_M_k;
using _Base::_M_losers;
public:
_LoserTreePointerUnguarded(unsigned int __k, const _Tp& _sentinel,
_Compare __comp = std::less<_Tp>())
: _Base::LoserTreePointerUnguardedBase(__k, _sentinel, __comp)
: _Base::_LoserTreePointerUnguardedBase(__k, _sentinel, __comp)
{ }
unsigned int

View File

@ -54,24 +54,6 @@
namespace __gnu_parallel
{
// Announce guarded and unguarded iterator.
template<typename _RAIter, typename _Compare>
class _GuardedIterator;
// Making the arguments const references seems to dangerous,
// the user-defined comparator might not be const.
template<typename _RAIter, typename _Compare>
inline bool
operator<(_GuardedIterator<_RAIter, _Compare>& __bi1,
_GuardedIterator<_RAIter, _Compare>& __bi2);
template<typename _RAIter, typename _Compare>
inline bool
operator<=(_GuardedIterator<_RAIter, _Compare>& __bi1,
_GuardedIterator<_RAIter, _Compare>& __bi2);
/** @brief _Iterator wrapper supporting an implicit supremum at the end
* of the sequence, dominating all comparisons.
*
@ -96,12 +78,11 @@ template<typename _RAIter, typename _Compare>
public:
/** @brief Constructor. Sets iterator to beginning of sequence.
* @param __begin Begin iterator of sequence.
* @param _M_end End iterator of sequence.
* @param __end End iterator of sequence.
* @param __comp Comparator provided for associated overloaded
* compare operators. */
_GuardedIterator(_RAIter __begin,
_RAIter _M_end, _Compare& __comp)
: _M_current(__begin), _M_end(_M_end), __comp(__comp)
_GuardedIterator(_RAIter __begin, _RAIter __end, _Compare& __comp)
: _M_current(__begin), _M_end(__end), __comp(__comp)
{ }
/** @brief Pre-increment operator.
@ -124,62 +105,37 @@ template<typename _RAIter, typename _Compare>
operator _RAIter()
{ return _M_current; }
/** @brief Compare two elements referenced by guarded iterators.
* @param __bi1 First iterator.
* @param __bi2 Second iterator.
* @return @__c true if less. */
friend bool
operator< <_RAIter, _Compare>(
_GuardedIterator<_RAIter, _Compare>& __bi1,
_GuardedIterator<_RAIter, _Compare>& __bi2);
operator<(_GuardedIterator<_RAIter, _Compare>& __bi1,
_GuardedIterator<_RAIter, _Compare>& __bi2)
{
if (__bi1._M_current == __bi1._M_end) //__bi1 is sup
return __bi2._M_current == __bi2._M_end; //__bi2 is not sup
if (__bi2._M_current == __bi2._M_end) //__bi2 is sup
return true;
return (__bi1.__comp)(*__bi1, *__bi2); //normal compare
}
/** @brief Compare two elements referenced by guarded iterators.
* @param __bi1 First iterator.
* @param __bi2 Second iterator.
* @return @__c True if less equal. */
friend bool
operator<= <_RAIter, _Compare>(
_GuardedIterator<_RAIter, _Compare>& __bi1,
_GuardedIterator<_RAIter, _Compare>& __bi2);
operator<=(_GuardedIterator<_RAIter, _Compare>& __bi1,
_GuardedIterator<_RAIter, _Compare>& __bi2)
{
if (__bi2._M_current == __bi2._M_end) //__bi1 is sup
return __bi1._M_current != __bi1._M_end; //__bi2 is not sup
if (__bi1._M_current == __bi1._M_end) //__bi2 is sup
return false;
return !(__bi1.__comp)(*__bi2, *__bi1); //normal compare
}
};
/** @brief Compare two elements referenced by guarded iterators.
* @param __bi1 First iterator.
* @param __bi2 Second iterator.
* @return @__c true if less. */
template<typename _RAIter, typename _Compare>
inline bool
operator<(_GuardedIterator<_RAIter, _Compare>& __bi1,
_GuardedIterator<_RAIter, _Compare>& __bi2)
{
if (__bi1._M_current == __bi1._M_end) //__bi1 is sup
return __bi2._M_current == __bi2._M_end; //__bi2 is not sup
if (__bi2._M_current == __bi2._M_end) //__bi2 is sup
return true;
return (__bi1.__comp)(*__bi1, *__bi2); //normal compare
}
/** @brief Compare two elements referenced by guarded iterators.
* @param __bi1 First iterator.
* @param __bi2 Second iterator.
* @return @__c True if less equal. */
template<typename _RAIter, typename _Compare>
inline bool
operator<=(_GuardedIterator<_RAIter, _Compare>& __bi1,
_GuardedIterator<_RAIter, _Compare>& __bi2)
{
if (__bi2._M_current == __bi2._M_end) //__bi1 is sup
return __bi1._M_current != __bi1._M_end; //__bi2 is not sup
if (__bi1._M_current == __bi1._M_end) //__bi2 is sup
return false;
return !(__bi1.__comp)(*__bi2, *__bi1); //normal compare
}
template<typename _RAIter, typename _Compare>
class _UnguardedIterator;
template<typename _RAIter, typename _Compare>
inline bool
operator<(_UnguardedIterator<_RAIter, _Compare>& __bi1,
_UnguardedIterator<_RAIter, _Compare>& __bi2);
template<typename _RAIter, typename _Compare>
inline bool
operator<=(_UnguardedIterator<_RAIter, _Compare>& __bi1,
_UnguardedIterator<_RAIter, _Compare>& __bi2);
template<typename _RAIter, typename _Compare>
class _UnguardedIterator
{
@ -192,10 +148,10 @@ template<typename _RAIter, typename _Compare>
public:
/** @brief Constructor. Sets iterator to beginning of sequence.
* @param __begin Begin iterator of sequence.
* @param _M_end Unused, only for compatibility.
* @param __end Unused, only for compatibility.
* @param __comp Unused, only for compatibility. */
_UnguardedIterator(_RAIter __begin,
_RAIter _M_end, _Compare& __comp)
_RAIter /* __end */, _Compare& __comp)
: _M_current(__begin), __comp(__comp)
{ }
@ -219,43 +175,31 @@ template<typename _RAIter, typename _Compare>
operator _RAIter()
{ return _M_current; }
/** @brief Compare two elements referenced by unguarded iterators.
* @param __bi1 First iterator.
* @param __bi2 Second iterator.
* @return @__c true if less. */
friend bool
operator< <_RAIter, _Compare>(
_UnguardedIterator<_RAIter, _Compare>& __bi1,
_UnguardedIterator<_RAIter, _Compare>& __bi2);
operator<(_UnguardedIterator<_RAIter, _Compare>& __bi1,
_UnguardedIterator<_RAIter, _Compare>& __bi2)
{
// Normal compare.
return (__bi1.__comp)(*__bi1, *__bi2);
}
/** @brief Compare two elements referenced by unguarded iterators.
* @param __bi1 First iterator.
* @param __bi2 Second iterator.
* @return @__c True if less equal. */
friend bool
operator<= <_RAIter, _Compare>(
_UnguardedIterator<_RAIter, _Compare>& __bi1,
_UnguardedIterator<_RAIter, _Compare>& __bi2);
operator<=(_UnguardedIterator<_RAIter, _Compare>& __bi1,
_UnguardedIterator<_RAIter, _Compare>& __bi2)
{
// Normal compare.
return !(__bi1.__comp)(*__bi2, *__bi1);
}
};
/** @brief Compare two elements referenced by unguarded iterators.
* @param __bi1 First iterator.
* @param __bi2 Second iterator.
* @return @__c true if less. */
template<typename _RAIter, typename _Compare>
inline bool
operator<(_UnguardedIterator<_RAIter, _Compare>& __bi1,
_UnguardedIterator<_RAIter, _Compare>& __bi2)
{
// Normal compare.
return (__bi1.__comp)(*__bi1, *__bi2);
}
/** @brief Compare two elements referenced by unguarded iterators.
* @param __bi1 First iterator.
* @param __bi2 Second iterator.
* @return @__c True if less equal. */
template<typename _RAIter, typename _Compare>
inline bool
operator<=(_UnguardedIterator<_RAIter, _Compare>& __bi1,
_UnguardedIterator<_RAIter, _Compare>& __bi2)
{
// Normal compare.
return !(__bi1.__comp)(*__bi2, *__bi1);
}
/** @brief Highly efficient 3-way merging procedure.
*
* Merging is done with the algorithm implementation described by Peter
@ -287,11 +231,10 @@ template<template<typename RAI, typename C> class iterator,
typename _DifferenceTp,
typename _Compare>
_RAIter3
multiway_merge_3_variant(
_RAIterIterator __seqs_begin,
_RAIterIterator __seqs_end,
_RAIter3 __target,
_DifferenceTp __length, _Compare __comp)
multiway_merge_3_variant(_RAIterIterator __seqs_begin,
_RAIterIterator __seqs_end,
_RAIter3 __target,
_DifferenceTp __length, _Compare __comp)
{
_GLIBCXX_CALL(__length);
@ -612,15 +555,14 @@ template<typename _LT,
typename _RAIter3,
typename _DifferenceTp, typename _Compare>
_RAIter3
multiway_merge_loser_tree_unguarded(
_RAIterIterator __seqs_begin,
_RAIterIterator __seqs_end,
_RAIter3 __target,
const typename std::iterator_traits<typename std::iterator_traits<
_RAIterIterator>::value_type::first_type>::value_type&
__sentinel,
_DifferenceTp __length,
_Compare __comp)
multiway_merge_loser_tree_unguarded(_RAIterIterator __seqs_begin,
_RAIterIterator __seqs_end,
_RAIter3 __target,
const typename std::iterator_traits<typename std::iterator_traits<
_RAIterIterator>::value_type::first_type>::value_type&
__sentinel,
_DifferenceTp __length,
_Compare __comp)
{
_GLIBCXX_CALL(__length)
typedef _DifferenceTp _DifferenceType;
@ -702,15 +644,14 @@ template<typename UnguardedLoserTree,
typename _DifferenceTp,
typename _Compare>
_RAIter3
multiway_merge_loser_tree_sentinel(
_RAIterIterator __seqs_begin,
_RAIterIterator __seqs_end,
_RAIter3 __target,
multiway_merge_loser_tree_sentinel(_RAIterIterator __seqs_begin,
_RAIterIterator __seqs_end,
_RAIter3 __target,
const typename std::iterator_traits<typename std::iterator_traits<
_RAIterIterator>::value_type::first_type>::value_type&
__sentinel,
_DifferenceTp __length,
_Compare __comp)
__sentinel,
_DifferenceTp __length,
_Compare __comp)
{
_GLIBCXX_CALL(__length)
@ -773,16 +714,16 @@ template<typename UnguardedLoserTree,
* @param _Tp type to give the loser tree traits for.
*/
template <typename _Tp>
struct _LoserTreeTraits
{
/**
* @brief True iff to use pointers instead of values in loser trees.
*
* The default behavior is to use pointers if the data type is four
* times as big as the pointer to it.
*/
static const bool _M_use_pointer = (sizeof(_Tp) > 4 * sizeof(_Tp*));
};
struct _LoserTreeTraits
{
/**
* @brief True iff to use pointers instead of values in loser trees.
*
* The default behavior is to use pointers if the data type is four
* times as big as the pointer to it.
*/
static const bool _M_use_pointer = (sizeof(_Tp) > 4 * sizeof(_Tp*));
};
/**
* @brief Switch for 3-way merging with __sentinels turned off.
@ -796,10 +737,11 @@ template<bool __sentinels /*default == false*/,
typename _Compare>
struct __multiway_merge_3_variant_sentinel_switch
{
_RAIter3 operator()(_RAIterIterator __seqs_begin,
_RAIterIterator __seqs_end,
_RAIter3 __target,
_DifferenceTp __length, _Compare __comp)
_RAIter3
operator()(_RAIterIterator __seqs_begin,
_RAIterIterator __seqs_end,
_RAIter3 __target,
_DifferenceTp __length, _Compare __comp)
{
return multiway_merge_3_variant<_GuardedIterator>
(__seqs_begin, __seqs_end, __target, __length, __comp);
@ -815,13 +757,15 @@ template<typename _RAIterIterator,
typename _RAIter3,
typename _DifferenceTp,
typename _Compare>
struct __multiway_merge_3_variant_sentinel_switch
<true, _RAIterIterator, _RAIter3, _DifferenceTp, _Compare>
struct __multiway_merge_3_variant_sentinel_switch<true, _RAIterIterator,
_RAIter3, _DifferenceTp,
_Compare>
{
_RAIter3 operator()(_RAIterIterator __seqs_begin,
_RAIterIterator __seqs_end,
_RAIter3 __target,
_DifferenceTp __length, _Compare __comp)
_RAIter3
operator()(_RAIterIterator __seqs_begin,
_RAIterIterator __seqs_end,
_RAIter3 __target,
_DifferenceTp __length, _Compare __comp)
{
return multiway_merge_3_variant<_UnguardedIterator>
(__seqs_begin, __seqs_end, __target, __length, __comp);
@ -840,10 +784,11 @@ template<bool __sentinels /*default == false*/,
typename _Compare>
struct __multiway_merge_4_variant_sentinel_switch
{
_RAIter3 operator()(_RAIterIterator __seqs_begin,
_RAIterIterator __seqs_end,
_RAIter3 __target,
_DifferenceTp __length, _Compare __comp)
_RAIter3
operator()(_RAIterIterator __seqs_begin,
_RAIterIterator __seqs_end,
_RAIter3 __target,
_DifferenceTp __length, _Compare __comp)
{
return multiway_merge_4_variant<_GuardedIterator>
(__seqs_begin, __seqs_end, __target, __length, __comp);
@ -859,13 +804,15 @@ template<typename _RAIterIterator,
typename _RAIter3,
typename _DifferenceTp,
typename _Compare>
struct __multiway_merge_4_variant_sentinel_switch
<true, _RAIterIterator, _RAIter3, _DifferenceTp, _Compare>
struct __multiway_merge_4_variant_sentinel_switch<true, _RAIterIterator,
_RAIter3, _DifferenceTp,
_Compare>
{
_RAIter3 operator()(_RAIterIterator __seqs_begin,
_RAIterIterator __seqs_end,
_RAIter3 __target,
_DifferenceTp __length, _Compare __comp)
_RAIter3
operator()(_RAIterIterator __seqs_begin,
_RAIterIterator __seqs_end,
_RAIter3 __target,
_DifferenceTp __length, _Compare __comp)
{
return multiway_merge_4_variant<_UnguardedIterator>
(__seqs_begin, __seqs_end, __target, __length, __comp);
@ -882,30 +829,30 @@ template<bool __sentinels,
typename _DifferenceTp,
typename _Compare>
struct __multiway_merge_k_variant_sentinel_switch
{
_RAIter3 operator()
(_RAIterIterator __seqs_begin,
_RAIterIterator __seqs_end,
_RAIter3 __target,
const typename std::iterator_traits<typename std::iterator_traits<
_RAIterIterator>::value_type::first_type>::value_type&
__sentinel,
_DifferenceTp __length, _Compare __comp)
{
typedef typename std::iterator_traits<_RAIterIterator>
::value_type::first_type
_RAIter1;
typedef typename std::iterator_traits<_RAIter1>::value_type
_ValueType;
{
_RAIter3
operator()(_RAIterIterator __seqs_begin,
_RAIterIterator __seqs_end,
_RAIter3 __target,
const typename std::iterator_traits<typename std::iterator_traits<
_RAIterIterator>::value_type::first_type>::value_type&
__sentinel,
_DifferenceTp __length, _Compare __comp)
{
typedef typename std::iterator_traits<_RAIterIterator>
::value_type::first_type
_RAIter1;
typedef typename std::iterator_traits<_RAIter1>::value_type
_ValueType;
return multiway_merge_loser_tree_sentinel<
typename __gnu_cxx::__conditional_type<
_LoserTreeTraits<_ValueType>::_M_use_pointer,
_LoserTreePointerUnguarded<__stable, _ValueType, _Compare>,
_LoserTreeUnguarded<__stable, _ValueType, _Compare>
>::__type>(
__seqs_begin, __seqs_end, __target, __sentinel, __length, __comp);
}
return multiway_merge_loser_tree_sentinel<
typename __gnu_cxx::__conditional_type<
_LoserTreeTraits<_ValueType>::_M_use_pointer,
_LoserTreePointerUnguarded<__stable, _ValueType, _Compare>,
_LoserTreeUnguarded<__stable, _ValueType, _Compare>
>::__type>
(__seqs_begin, __seqs_end, __target, __sentinel, __length, __comp);
}
};
/**
@ -916,17 +863,18 @@ template<bool __stable,
typename _RAIter3,
typename _DifferenceTp,
typename _Compare>
struct __multiway_merge_k_variant_sentinel_switch
<false, __stable, _RAIterIterator, _RAIter3, _DifferenceTp, _Compare>
struct __multiway_merge_k_variant_sentinel_switch<false, __stable,
_RAIterIterator, _RAIter3,
_DifferenceTp, _Compare>
{
_RAIter3 operator()
(_RAIterIterator __seqs_begin,
_RAIterIterator __seqs_end,
_RAIter3 __target,
_RAIter3
operator()(_RAIterIterator __seqs_begin,
_RAIterIterator __seqs_end,
_RAIter3 __target,
const typename std::iterator_traits<typename std::iterator_traits<
_RAIterIterator>::value_type::first_type>::value_type&
__sentinel,
_DifferenceTp __length, _Compare __comp)
__sentinel,
_DifferenceTp __length, _Compare __comp)
{
typedef typename std::iterator_traits<_RAIterIterator>
::value_type::first_type
@ -963,14 +911,13 @@ template<bool __stable,
typename _DifferenceTp,
typename _Compare>
_RAIter3
__sequential_multiway_merge(
_RAIterIterator __seqs_begin,
_RAIterIterator __seqs_end,
_RAIter3 __target,
__sequential_multiway_merge(_RAIterIterator __seqs_begin,
_RAIterIterator __seqs_end,
_RAIter3 __target,
const typename std::iterator_traits<typename std::iterator_traits<
_RAIterIterator>::value_type::first_type>::value_type&
__sentinel,
_DifferenceTp __length, _Compare __comp)
__sentinel,
_DifferenceTp __length, _Compare __comp)
{
_GLIBCXX_CALL(__length)
@ -1061,8 +1008,8 @@ template<bool __stable,
template<bool __stable, class _RAIter, class _StrictWeakOrdering>
struct _SamplingSorter
{
void operator()(_RAIter __first, _RAIter __last,
_StrictWeakOrdering __comp)
void
operator()(_RAIter __first, _RAIter __last, _StrictWeakOrdering __comp)
{ __gnu_sequential::stable_sort(__first, __last, __comp); }
};
@ -1074,8 +1021,8 @@ template<bool __stable, class _RAIter, class _StrictWeakOrdering>
template<class _RAIter, class _StrictWeakOrdering>
struct _SamplingSorter<false, _RAIter, _StrictWeakOrdering>
{
void operator()(_RAIter __first, _RAIter __last,
_StrictWeakOrdering __comp)
void
operator()(_RAIter __first, _RAIter __last, _StrictWeakOrdering __comp)
{ __gnu_sequential::sort(__first, __last, __comp); }
};
@ -1087,10 +1034,11 @@ template<bool __stable,
typename _Compare,
typename _DifferenceType>
void
multiway_merge_sampling_splitting
(_RAIterIterator __seqs_begin,
_RAIterIterator __seqs_end,
_DifferenceType __length, _DifferenceType __total_length, _Compare __comp,
multiway_merge_sampling_splitting(_RAIterIterator __seqs_begin,
_RAIterIterator __seqs_end,
_DifferenceType __length,
_DifferenceType __total_length,
_Compare __comp,
std::vector<std::pair<_DifferenceType, _DifferenceType> > *__pieces)
{
typedef typename std::iterator_traits<_RAIterIterator>
@ -1168,11 +1116,11 @@ template<bool __stable,
typename _Compare,
typename _DifferenceType>
void
multiway_merge_exact_splitting
(_RAIterIterator __seqs_begin,
_RAIterIterator __seqs_end,
_DifferenceType __length, _DifferenceType __total_length,
_Compare __comp,
multiway_merge_exact_splitting(_RAIterIterator __seqs_begin,
_RAIterIterator __seqs_end,
_DifferenceType __length,
_DifferenceType __total_length,
_Compare __comp,
std::vector<std::pair<_DifferenceType, _DifferenceType> > *__pieces)
{
typedef typename std::iterator_traits<_RAIterIterator>

View File

@ -41,451 +41,444 @@
namespace __gnu_parallel
{
/** @brief Subsequence description. */
template<typename _DifferenceTp>
struct _Piece
{
typedef _DifferenceTp _DifferenceType;
/** @brief Subsequence description. */
template<typename _DifferenceTp>
struct _Piece
{
typedef _DifferenceTp _DifferenceType;
/** @brief Begin of subsequence. */
_DifferenceType _M_begin;
/** @brief Begin of subsequence. */
_DifferenceType _M_begin;
/** @brief End of subsequence. */
_DifferenceType _M_end;
};
/** @brief End of subsequence. */
_DifferenceType _M_end;
};
/** @brief Data accessed by all threads.
*
* PMWMS = parallel multiway mergesort */
template<typename _RAIter>
struct _PMWMSSortingData
{
typedef std::iterator_traits<_RAIter> _TraitsType;
typedef typename _TraitsType::value_type _ValueType;
typedef typename _TraitsType::difference_type _DifferenceType;
/** @brief Number of threads involved. */
_ThreadIndex _M_num_threads;
/** @brief Input __begin. */
_RAIter _M_source;
/** @brief Start indices, per thread. */
_DifferenceType* _M_starts;
/** @brief Storage in which to sort. */
_ValueType** _M_temporary;
/** @brief Samples. */
_ValueType* _M_samples;
/** @brief Offsets to add to the found positions. */
_DifferenceType* _M_offsets;
/** @brief Pieces of data to merge @__c [thread][__sequence] */
std::vector<_Piece<_DifferenceType> >* _M_pieces;
};
/**
* @brief Select _M_samples from a sequence.
* @param __sd Pointer to algorithm data. _Result will be placed in
* @__c __sd->_M_samples.
* @param __num_samples Number of _M_samples to select.
*/
template<typename _RAIter, typename _DifferenceTp>
void
__determine_samples(_PMWMSSortingData<_RAIter>* __sd,
_DifferenceTp __num_samples)
{
typedef std::iterator_traits<_RAIter> _TraitsType;
typedef typename _TraitsType::value_type _ValueType;
typedef _DifferenceTp _DifferenceType;
_ThreadIndex __iam = omp_get_thread_num();
_DifferenceType* __es = new _DifferenceType[__num_samples + 2];
equally_split(__sd->_M_starts[__iam + 1] - __sd->_M_starts[__iam],
__num_samples + 1, __es);
for (_DifferenceType __i = 0; __i < __num_samples; ++__i)
::new(&(__sd->_M_samples[__iam * __num_samples + __i]))
_ValueType(__sd->_M_source[__sd->_M_starts[__iam] + __es[__i + 1]]);
delete[] __es;
}
/** @brief Split consistently. */
template<bool __exact, typename _RAIter,
typename _Compare, typename _SortingPlacesIterator>
struct _SplitConsistently
{
};
/** @brief Split by exact splitting. */
template<typename _RAIter, typename _Compare,
typename _SortingPlacesIterator>
struct _SplitConsistently
<true, _RAIter, _Compare, _SortingPlacesIterator>
{
void operator()(
const _ThreadIndex __iam,
_PMWMSSortingData<_RAIter>* __sd,
_Compare& __comp,
const typename
std::iterator_traits<_RAIter>::difference_type
__num_samples)
const
{
# pragma omp barrier
std::vector<std::pair<_SortingPlacesIterator, _SortingPlacesIterator> >
seqs(__sd->_M_num_threads);
for (_ThreadIndex __s = 0; __s < __sd->_M_num_threads; __s++)
seqs[__s] = std::make_pair(__sd->_M_temporary[__s],
__sd->_M_temporary[__s]
+ (__sd->_M_starts[__s + 1]
- __sd->_M_starts[__s]));
std::vector<_SortingPlacesIterator> _M_offsets(__sd->_M_num_threads);
// if not last thread
if (__iam < __sd->_M_num_threads - 1)
multiseq_partition(seqs.begin(), seqs.end(),
__sd->_M_starts[__iam + 1], _M_offsets.begin(),
__comp);
for (int __seq = 0; __seq < __sd->_M_num_threads; __seq++)
{
// for each sequence
if (__iam < (__sd->_M_num_threads - 1))
__sd->_M_pieces[__iam][__seq]._M_end
= _M_offsets[__seq] - seqs[__seq].first;
else
// very end of this sequence
__sd->_M_pieces[__iam][__seq]._M_end =
__sd->_M_starts[__seq + 1] - __sd->_M_starts[__seq];
}
# pragma omp barrier
for (_ThreadIndex __seq = 0; __seq < __sd->_M_num_threads; __seq++)
{
// For each sequence.
if (__iam > 0)
__sd->_M_pieces[__iam][__seq]._M_begin =
__sd->_M_pieces[__iam - 1][__seq]._M_end;
else
// Absolute beginning.
__sd->_M_pieces[__iam][__seq]._M_begin = 0;
}
}
};
/** @brief Split by sampling. */
template<typename _RAIter, typename _Compare,
typename _SortingPlacesIterator>
struct _SplitConsistently<false, _RAIter, _Compare,
_SortingPlacesIterator>
{
void operator()(
const _ThreadIndex __iam,
_PMWMSSortingData<_RAIter>* __sd,
_Compare& __comp,
const typename
std::iterator_traits<_RAIter>::difference_type
__num_samples)
const
/** @brief Data accessed by all threads.
*
* PMWMS = parallel multiway mergesort */
template<typename _RAIter>
struct _PMWMSSortingData
{
typedef std::iterator_traits<_RAIter> _TraitsType;
typedef typename _TraitsType::value_type _ValueType;
typedef typename _TraitsType::difference_type _DifferenceType;
__determine_samples(__sd, __num_samples);
/** @brief Number of threads involved. */
_ThreadIndex _M_num_threads;
# pragma omp barrier
/** @brief Input __begin. */
_RAIter _M_source;
# pragma omp single
__gnu_sequential::sort(__sd->_M_samples,
__sd->_M_samples
+ (__num_samples * __sd->_M_num_threads),
__comp);
/** @brief Start indices, per thread. */
_DifferenceType* _M_starts;
# pragma omp barrier
/** @brief Storage in which to sort. */
_ValueType** _M_temporary;
for (_ThreadIndex __s = 0; __s < __sd->_M_num_threads; ++__s)
{
// For each sequence.
if (__num_samples * __iam > 0)
__sd->_M_pieces[__iam][__s]._M_begin =
std::lower_bound(__sd->_M_temporary[__s],
__sd->_M_temporary[__s]
+ (__sd->_M_starts[__s + 1] - __sd->_M_starts[__s]),
__sd->_M_samples[__num_samples * __iam],
__comp)
- __sd->_M_temporary[__s];
else
// Absolute beginning.
__sd->_M_pieces[__iam][__s]._M_begin = 0;
/** @brief Samples. */
_ValueType* _M_samples;
if ((__num_samples * (__iam + 1)) <
(__num_samples * __sd->_M_num_threads))
__sd->_M_pieces[__iam][__s]._M_end =
std::lower_bound(__sd->_M_temporary[__s],
__sd->_M_temporary[__s]
+ (__sd->_M_starts[__s + 1] - __sd->_M_starts[__s]),
__sd->_M_samples[__num_samples * (__iam + 1)],
__comp)
- __sd->_M_temporary[__s];
else
// Absolute end.
__sd->_M_pieces[__iam][__s]._M_end = __sd->_M_starts[__s + 1]
- __sd->_M_starts[__s];
}
/** @brief Offsets to add to the found positions. */
_DifferenceType* _M_offsets;
/** @brief Pieces of data to merge @__c [thread][__sequence] */
std::vector<_Piece<_DifferenceType> >* _M_pieces;
};
/**
* @brief Select _M_samples from a sequence.
* @param __sd Pointer to algorithm data. _Result will be placed in
* @__c __sd->_M_samples.
* @param __num_samples Number of _M_samples to select.
*/
template<typename _RAIter, typename _DifferenceTp>
void
__determine_samples(_PMWMSSortingData<_RAIter>* __sd,
_DifferenceTp __num_samples)
{
typedef std::iterator_traits<_RAIter> _TraitsType;
typedef typename _TraitsType::value_type _ValueType;
typedef _DifferenceTp _DifferenceType;
_ThreadIndex __iam = omp_get_thread_num();
_DifferenceType* __es = new _DifferenceType[__num_samples + 2];
equally_split(__sd->_M_starts[__iam + 1] - __sd->_M_starts[__iam],
__num_samples + 1, __es);
for (_DifferenceType __i = 0; __i < __num_samples; ++__i)
::new(&(__sd->_M_samples[__iam * __num_samples + __i]))
_ValueType(__sd->_M_source[__sd->_M_starts[__iam]
+ __es[__i + 1]]);
delete[] __es;
}
/** @brief Split consistently. */
template<bool __exact, typename _RAIter,
typename _Compare, typename _SortingPlacesIterator>
struct _SplitConsistently
{ };
/** @brief Split by exact splitting. */
template<typename _RAIter, typename _Compare,
typename _SortingPlacesIterator>
struct _SplitConsistently<true, _RAIter,
_Compare, _SortingPlacesIterator>
{
void
operator()(const _ThreadIndex __iam,
_PMWMSSortingData<_RAIter>* __sd,
_Compare& __comp,
const typename
std::iterator_traits<_RAIter>::difference_type
__num_samples) const
{
# pragma omp barrier
std::vector<std::pair<_SortingPlacesIterator,
_SortingPlacesIterator> >
seqs(__sd->_M_num_threads);
for (_ThreadIndex __s = 0; __s < __sd->_M_num_threads; __s++)
seqs[__s] = std::make_pair(__sd->_M_temporary[__s],
__sd->_M_temporary[__s]
+ (__sd->_M_starts[__s + 1]
- __sd->_M_starts[__s]));
std::vector<_SortingPlacesIterator> _M_offsets(__sd->_M_num_threads);
// if not last thread
if (__iam < __sd->_M_num_threads - 1)
multiseq_partition(seqs.begin(), seqs.end(),
__sd->_M_starts[__iam + 1], _M_offsets.begin(),
__comp);
for (int __seq = 0; __seq < __sd->_M_num_threads; __seq++)
{
// for each sequence
if (__iam < (__sd->_M_num_threads - 1))
__sd->_M_pieces[__iam][__seq]._M_end
= _M_offsets[__seq] - seqs[__seq].first;
else
// very end of this sequence
__sd->_M_pieces[__iam][__seq]._M_end =
__sd->_M_starts[__seq + 1] - __sd->_M_starts[__seq];
}
# pragma omp barrier
for (_ThreadIndex __seq = 0; __seq < __sd->_M_num_threads; __seq++)
{
// For each sequence.
if (__iam > 0)
__sd->_M_pieces[__iam][__seq]._M_begin =
__sd->_M_pieces[__iam - 1][__seq]._M_end;
else
// Absolute beginning.
__sd->_M_pieces[__iam][__seq]._M_begin = 0;
}
}
};
/** @brief Split by sampling. */
template<typename _RAIter, typename _Compare,
typename _SortingPlacesIterator>
struct _SplitConsistently<false, _RAIter, _Compare,
_SortingPlacesIterator>
{
void
operator()(const _ThreadIndex __iam,
_PMWMSSortingData<_RAIter>* __sd,
_Compare& __comp,
const typename
std::iterator_traits<_RAIter>::difference_type
__num_samples) const
{
typedef std::iterator_traits<_RAIter> _TraitsType;
typedef typename _TraitsType::value_type _ValueType;
typedef typename _TraitsType::difference_type _DifferenceType;
__determine_samples(__sd, __num_samples);
# pragma omp barrier
# pragma omp single
__gnu_sequential::sort(__sd->_M_samples,
__sd->_M_samples
+ (__num_samples * __sd->_M_num_threads),
__comp);
# pragma omp barrier
for (_ThreadIndex __s = 0; __s < __sd->_M_num_threads; ++__s)
{
// For each sequence.
if (__num_samples * __iam > 0)
__sd->_M_pieces[__iam][__s]._M_begin =
std::lower_bound(__sd->_M_temporary[__s],
__sd->_M_temporary[__s]
+ (__sd->_M_starts[__s + 1]
- __sd->_M_starts[__s]),
__sd->_M_samples[__num_samples * __iam],
__comp)
- __sd->_M_temporary[__s];
else
// Absolute beginning.
__sd->_M_pieces[__iam][__s]._M_begin = 0;
if ((__num_samples * (__iam + 1)) <
(__num_samples * __sd->_M_num_threads))
__sd->_M_pieces[__iam][__s]._M_end =
std::lower_bound(__sd->_M_temporary[__s],
__sd->_M_temporary[__s]
+ (__sd->_M_starts[__s + 1]
- __sd->_M_starts[__s]),
__sd->_M_samples[__num_samples * (__iam + 1)],
__comp)
- __sd->_M_temporary[__s];
else
// Absolute end.
__sd->_M_pieces[__iam][__s]._M_end = (__sd->_M_starts[__s + 1]
- __sd->_M_starts[__s]);
}
}
};
template<bool __stable, typename _RAIter, typename _Compare>
struct __possibly_stable_sort
{
};
template<bool __stable, typename _RAIter, typename _Compare>
struct __possibly_stable_sort
{ };
template<typename _RAIter, typename _Compare>
struct __possibly_stable_sort<true, _RAIter, _Compare>
{
void operator()(const _RAIter& __begin,
const _RAIter& __end, _Compare& __comp) const
template<typename _RAIter, typename _Compare>
struct __possibly_stable_sort<true, _RAIter, _Compare>
{
__gnu_sequential::stable_sort(__begin, __end, __comp);
}
};
void operator()(const _RAIter& __begin,
const _RAIter& __end, _Compare& __comp) const
{ __gnu_sequential::stable_sort(__begin, __end, __comp); }
};
template<typename _RAIter, typename _Compare>
struct __possibly_stable_sort<false, _RAIter, _Compare>
{
void operator()(const _RAIter __begin,
const _RAIter __end, _Compare& __comp) const
template<typename _RAIter, typename _Compare>
struct __possibly_stable_sort<false, _RAIter, _Compare>
{
__gnu_sequential::sort(__begin, __end, __comp);
}
};
void operator()(const _RAIter __begin,
const _RAIter __end, _Compare& __comp) const
{ __gnu_sequential::sort(__begin, __end, __comp); }
};
template<bool __stable, typename Seq_RAIter,
typename _RAIter, typename _Compare,
typename DiffType>
struct __possibly_stable_multiway_merge
{
};
template<bool __stable, typename Seq_RAIter,
typename _RAIter, typename _Compare,
typename DiffType>
struct __possibly_stable_multiway_merge
{ };
template<typename Seq_RAIter, typename _RAIter,
typename _Compare, typename DiffType>
struct __possibly_stable_multiway_merge
<true, Seq_RAIter, _RAIter, _Compare,
DiffType>
{
void operator()(const Seq_RAIter& __seqs_begin,
template<typename Seq_RAIter, typename _RAIter,
typename _Compare, typename _DiffType>
struct __possibly_stable_multiway_merge<true, Seq_RAIter,
_RAIter, _Compare, _DiffType>
{
void operator()(const Seq_RAIter& __seqs_begin,
const Seq_RAIter& __seqs_end,
const _RAIter& __target,
_Compare& __comp,
_DiffType __length_am) const
{
stable_multiway_merge(__seqs_begin, __seqs_end, __target, __length_am,
__comp, sequential_tag());
}
};
template<typename Seq_RAIter, typename _RAIter,
typename _Compare, typename _DiffType>
struct __possibly_stable_multiway_merge<false, Seq_RAIter,
_RAIter, _Compare, _DiffType>
{
void operator()(const Seq_RAIter& __seqs_begin,
const Seq_RAIter& __seqs_end,
const _RAIter& __target,
_Compare& __comp,
DiffType __length_am) const
{
stable_multiway_merge(__seqs_begin, __seqs_end, __target, __length_am,
__comp, sequential_tag());
}
};
template<typename Seq_RAIter, typename _RAIter,
typename _Compare, typename DiffType>
struct __possibly_stable_multiway_merge
<false, Seq_RAIter, _RAIter, _Compare,
DiffType>
{
void operator()(const Seq_RAIter& __seqs_begin,
const Seq_RAIter& __seqs_end,
const _RAIter& __target,
_Compare& __comp,
DiffType __length_am) const
{
multiway_merge(__seqs_begin, __seqs_end, __target, __length_am, __comp,
_DiffType __length_am) const
{
multiway_merge(__seqs_begin, __seqs_end, __target, __length_am, __comp,
sequential_tag());
}
};
}
};
/** @brief PMWMS code executed by each thread.
* @param __sd Pointer to algorithm data.
* @param __comp Comparator.
*/
template<bool __stable, bool __exact, typename _RAIter,
typename _Compare>
void
parallel_sort_mwms_pu(_PMWMSSortingData<_RAIter>* __sd,
_Compare& __comp)
{
typedef std::iterator_traits<_RAIter> _TraitsType;
typedef typename _TraitsType::value_type _ValueType;
typedef typename _TraitsType::difference_type _DifferenceType;
/** @brief PMWMS code executed by each thread.
* @param __sd Pointer to algorithm data.
* @param __comp Comparator.
*/
template<bool __stable, bool __exact, typename _RAIter,
typename _Compare>
void
parallel_sort_mwms_pu(_PMWMSSortingData<_RAIter>* __sd,
_Compare& __comp)
{
typedef std::iterator_traits<_RAIter> _TraitsType;
typedef typename _TraitsType::value_type _ValueType;
typedef typename _TraitsType::difference_type _DifferenceType;
_ThreadIndex __iam = omp_get_thread_num();
_ThreadIndex __iam = omp_get_thread_num();
// Length of this thread's chunk, before merging.
_DifferenceType __length_local
= __sd->_M_starts[__iam + 1] - __sd->_M_starts[__iam];
// Length of this thread's chunk, before merging.
_DifferenceType __length_local
= __sd->_M_starts[__iam + 1] - __sd->_M_starts[__iam];
// Sort in temporary storage, leave space for sentinel.
// Sort in temporary storage, leave space for sentinel.
typedef _ValueType* _SortingPlacesIterator;
typedef _ValueType* _SortingPlacesIterator;
__sd->_M_temporary[__iam] =
static_cast<_ValueType*>(
::operator new(sizeof(_ValueType) * (__length_local + 1)));
__sd->_M_temporary[__iam] =
static_cast<_ValueType*>(::operator new(sizeof(_ValueType)
* (__length_local + 1)));
// Copy there.
std::uninitialized_copy(
__sd->_M_source + __sd->_M_starts[__iam],
__sd->_M_source + __sd->_M_starts[__iam] + __length_local,
__sd->_M_temporary[__iam]);
// Copy there.
std::uninitialized_copy(__sd->_M_source + __sd->_M_starts[__iam],
__sd->_M_source + __sd->_M_starts[__iam]
+ __length_local,
__sd->_M_temporary[__iam]);
__possibly_stable_sort<__stable, _SortingPlacesIterator, _Compare>()
__possibly_stable_sort<__stable, _SortingPlacesIterator, _Compare>()
(__sd->_M_temporary[__iam],
__sd->_M_temporary[__iam] + __length_local,
__sd->_M_temporary[__iam] + __length_local,
__comp);
// Invariant: locally sorted subsequence in sd->_M_temporary[__iam],
// __sd->_M_temporary[__iam] + __length_local.
// Invariant: locally sorted subsequence in sd->_M_temporary[__iam],
// __sd->_M_temporary[__iam] + __length_local.
// No barrier here: Synchronization is done by the splitting routine.
// No barrier here: Synchronization is done by the splitting routine.
_DifferenceType __num_samples =
_DifferenceType __num_samples =
_Settings::get().sort_mwms_oversampling * __sd->_M_num_threads - 1;
_SplitConsistently
<__exact, _RAIter, _Compare, _SortingPlacesIterator>()
_SplitConsistently
<__exact, _RAIter, _Compare, _SortingPlacesIterator>()
(__iam, __sd, __comp, __num_samples);
// Offset from __target __begin, __length after merging.
_DifferenceType __offset = 0, __length_am = 0;
for (_ThreadIndex __s = 0; __s < __sd->_M_num_threads; __s++)
{
__length_am += __sd->_M_pieces[__iam][__s]._M_end
- __sd->_M_pieces[__iam][__s]._M_begin;
__offset += __sd->_M_pieces[__iam][__s]._M_begin;
}
// Offset from __target __begin, __length after merging.
_DifferenceType __offset = 0, __length_am = 0;
for (_ThreadIndex __s = 0; __s < __sd->_M_num_threads; __s++)
{
__length_am += (__sd->_M_pieces[__iam][__s]._M_end
- __sd->_M_pieces[__iam][__s]._M_begin);
__offset += __sd->_M_pieces[__iam][__s]._M_begin;
}
typedef std::vector<
typedef std::vector<
std::pair<_SortingPlacesIterator, _SortingPlacesIterator> >
_SeqVector;
_SeqVector seqs(__sd->_M_num_threads);
_SeqVector seqs(__sd->_M_num_threads);
for (int __s = 0; __s < __sd->_M_num_threads; ++__s)
{
seqs[__s] =
std::make_pair(
__sd->_M_temporary[__s] + __sd->_M_pieces[__iam][__s]._M_begin,
__sd->_M_temporary[__s] + __sd->_M_pieces[__iam][__s]._M_end);
}
for (int __s = 0; __s < __sd->_M_num_threads; ++__s)
{
seqs[__s] =
std::make_pair
(__sd->_M_temporary[__s] + __sd->_M_pieces[__iam][__s]._M_begin,
__sd->_M_temporary[__s] + __sd->_M_pieces[__iam][__s]._M_end);
}
__possibly_stable_multiway_merge<
__possibly_stable_multiway_merge<
__stable,
typename _SeqVector::iterator,
_RAIter,
_Compare, _DifferenceType>()
(seqs.begin(), seqs.end(),
__sd->_M_source + __offset, __comp,
__sd->_M_source + __offset, __comp,
__length_am);
# pragma omp barrier
# pragma omp barrier
::operator delete(__sd->_M_temporary[__iam]);
}
::operator delete(__sd->_M_temporary[__iam]);
}
/** @brief PMWMS main call.
* @param __begin Begin iterator of sequence.
* @param __end End iterator of sequence.
* @param __comp Comparator.
* @param __n Length of sequence.
* @param __num_threads Number of threads to use.
*/
template<bool __stable, bool __exact, typename _RAIter,
/** @brief PMWMS main call.
* @param __begin Begin iterator of sequence.
* @param __end End iterator of sequence.
* @param __comp Comparator.
* @param __n Length of sequence.
* @param __num_threads Number of threads to use.
*/
template<bool __stable, bool __exact, typename _RAIter,
typename _Compare>
void
parallel_sort_mwms(_RAIter __begin, _RAIter __end,
_Compare __comp,
_ThreadIndex __num_threads)
{
_GLIBCXX_CALL(__end - __begin)
void
parallel_sort_mwms(_RAIter __begin, _RAIter __end,
_Compare __comp,
_ThreadIndex __num_threads)
{
_GLIBCXX_CALL(__end - __begin)
typedef std::iterator_traits<_RAIter> _TraitsType;
typedef typename _TraitsType::value_type _ValueType;
typedef typename _TraitsType::difference_type _DifferenceType;
typedef std::iterator_traits<_RAIter> _TraitsType;
typedef typename _TraitsType::value_type _ValueType;
typedef typename _TraitsType::difference_type _DifferenceType;
_DifferenceType __n = __end - __begin;
_DifferenceType __n = __end - __begin;
if (__n <= 1)
return;
if (__n <= 1)
return;
// at least one element per thread
if (__num_threads > __n)
__num_threads = static_cast<_ThreadIndex>(__n);
// at least one element per thread
if (__num_threads > __n)
__num_threads = static_cast<_ThreadIndex>(__n);
// shared variables
_PMWMSSortingData<_RAIter> __sd;
_DifferenceType* _M_starts;
// shared variables
_PMWMSSortingData<_RAIter> __sd;
_DifferenceType* _M_starts;
# pragma omp parallel num_threads(__num_threads)
# pragma omp parallel num_threads(__num_threads)
{
__num_threads = omp_get_num_threads(); //no more threads than requested
# pragma omp single
{
__sd._M_num_threads = __num_threads;
__sd._M_source = __begin;
{
__sd._M_num_threads = __num_threads;
__sd._M_source = __begin;
__sd._M_temporary = new _ValueType*[__num_threads];
__sd._M_temporary = new _ValueType*[__num_threads];
if (!__exact)
{
_DifferenceType __size =
(_Settings::get().sort_mwms_oversampling * __num_threads - 1)
* __num_threads;
__sd._M_samples = static_cast<_ValueType*>
(::operator new(__size * sizeof(_ValueType)));
}
else
__sd._M_samples = NULL;
if (!__exact)
{
_DifferenceType __size =
(_Settings::get().sort_mwms_oversampling * __num_threads - 1)
* __num_threads;
__sd._M_samples = static_cast<_ValueType*>(
::operator new(__size * sizeof(_ValueType)));
}
else
__sd._M_samples = NULL;
__sd._M_offsets = new _DifferenceType[__num_threads - 1];
__sd._M_pieces
= new std::vector<_Piece<_DifferenceType> >[__num_threads];
for (int __s = 0; __s < __num_threads; ++__s)
__sd._M_pieces[__s].resize(__num_threads);
_M_starts = __sd._M_starts
= new _DifferenceType[__num_threads + 1];
__sd._M_offsets = new _DifferenceType[__num_threads - 1];
__sd._M_pieces
= new std::vector<_Piece<_DifferenceType> >[__num_threads];
for (int __s = 0; __s < __num_threads; ++__s)
__sd._M_pieces[__s].resize(__num_threads);
_M_starts = __sd._M_starts
= new _DifferenceType[__num_threads + 1];
_DifferenceType __chunk_length = __n / __num_threads;
_DifferenceType __split = __n % __num_threads;
_DifferenceType __pos = 0;
for (int __i = 0; __i < __num_threads; ++__i)
{
_M_starts[__i] = __pos;
__pos += (__i < __split)
? (__chunk_length + 1) : __chunk_length;
}
_M_starts[__num_threads] = __pos;
} //single
_DifferenceType __chunk_length = __n / __num_threads;
_DifferenceType __split = __n % __num_threads;
_DifferenceType __pos = 0;
for (int __i = 0; __i < __num_threads; ++__i)
{
_M_starts[__i] = __pos;
__pos += (__i < __split)
? (__chunk_length + 1) : __chunk_length;
}
_M_starts[__num_threads] = __pos;
} //single
// Now sort in parallel.
parallel_sort_mwms_pu<__stable, __exact>(&__sd, __comp);
} //parallel
delete[] _M_starts;
delete[] __sd._M_temporary;
delete[] _M_starts;
delete[] __sd._M_temporary;
if (!__exact)
if (!__exact)
::operator delete(__sd._M_samples);
delete[] __sd._M_offsets;
delete[] __sd._M_pieces;
}
delete[] __sd._M_offsets;
delete[] __sd._M_pieces;
}
} //namespace __gnu_parallel
#endif /* _GLIBCXX_PARALLEL_MULTIWAY_MERGESORT_H */

View File

@ -41,74 +41,74 @@
namespace __gnu_parallel
{
/** @brief Embarrassingly parallel algorithm for random access
* iterators, using an OpenMP for loop.
*
* @param __begin Begin iterator of element sequence.
* @param __end End iterator of element sequence.
* @param __o User-supplied functor (comparator, predicate, adding
* functor, etc.).
* @param __f Functor to "process" an element with __op (depends on
* desired functionality, e. g. for std::for_each(), ...).
* @param __r Functor to "add" a single __result to the already
* processed elements (depends on functionality).
* @param __base Base value for reduction.
* @param __output Pointer to position where final result is written to
* @param __bound Maximum number of elements processed (e. g. for
* std::count_n()).
* @return User-supplied functor (that may contain a part of the result).
*/
template<typename _RAIter,
typename _Op,
typename _Fu,
typename _Red,
typename _Result>
_Op
__for_each_template_random_access_omp_loop(
_RAIter __begin, _RAIter __end, _Op __o, _Fu& __f, _Red __r,
_Result __base, _Result& __output,
typename std::iterator_traits<_RAIter>::difference_type __bound)
{
typedef typename
std::iterator_traits<_RAIter>::difference_type
/** @brief Embarrassingly parallel algorithm for random access
* iterators, using an OpenMP for loop.
*
* @param __begin Begin iterator of element sequence.
* @param __end End iterator of element sequence.
* @param __o User-supplied functor (comparator, predicate, adding
* functor, etc.).
* @param __f Functor to "process" an element with __op (depends on
* desired functionality, e. g. for std::for_each(), ...).
* @param __r Functor to "add" a single __result to the already
* processed elements (depends on functionality).
* @param __base Base value for reduction.
* @param __output Pointer to position where final result is written to
* @param __bound Maximum number of elements processed (e. g. for
* std::count_n()).
* @return User-supplied functor (that may contain a part of the result).
*/
template<typename _RAIter,
typename _Op,
typename _Fu,
typename _Red,
typename _Result>
_Op
__for_each_template_random_access_omp_loop(_RAIter __begin, _RAIter __end,
_Op __o, _Fu& __f, _Red __r,
_Result __base,
_Result& __output,
typename std::iterator_traits<_RAIter>::difference_type __bound)
{
typedef typename std::iterator_traits<_RAIter>::difference_type
_DifferenceType;
_DifferenceType __length = __end - __begin;
_ThreadIndex __num_threads =
__gnu_parallel::min<_DifferenceType>(__get_max_threads(), __length);
_DifferenceType __length = __end - __begin;
_ThreadIndex __num_threads =
__gnu_parallel::min<_DifferenceType>(__get_max_threads(), __length);
_Result *__thread_results;
_Result *__thread_results;
# pragma omp parallel num_threads(__num_threads)
# pragma omp parallel num_threads(__num_threads)
{
# pragma omp single
{
__num_threads = omp_get_num_threads();
__thread_results = new _Result[__num_threads];
{
__num_threads = omp_get_num_threads();
__thread_results = new _Result[__num_threads];
for (_ThreadIndex __i = 0; __i < __num_threads; ++__i)
__thread_results[__i] = _Result();
}
for (_ThreadIndex __i = 0; __i < __num_threads; ++__i)
__thread_results[__i] = _Result();
}
_ThreadIndex __iam = omp_get_thread_num();
#pragma omp for schedule(dynamic, _Settings::get().workstealing_chunk_size)
for (_DifferenceType __pos = 0; __pos < __length; ++__pos)
__thread_results[__iam] =
__r(__thread_results[__iam], __f(__o, __begin+__pos));
__r(__thread_results[__iam], __f(__o, __begin+__pos));
} //parallel
for (_ThreadIndex __i = 0; __i < __num_threads; ++__i)
for (_ThreadIndex __i = 0; __i < __num_threads; ++__i)
__output = __r(__output, __thread_results[__i]);
delete [] __thread_results;
delete [] __thread_results;
// Points to last element processed (needed as return value for
// some algorithms like transform).
__f._M_finish_iterator = __begin + __length;
// Points to last element processed (needed as return value for
// some algorithms like transform).
__f._M_finish_iterator = __begin + __length;
return __o;
}
return __o;
}
} // end namespace

View File

@ -40,7 +40,6 @@
namespace __gnu_parallel
{
/** @brief Embarrassingly parallel algorithm for random access
* iterators, using an OpenMP for loop with static scheduling.
*
@ -58,37 +57,38 @@ namespace __gnu_parallel
* std::count_n()).
* @return User-supplied functor (that may contain a part of the result).
*/
template<typename _RAIter,
typename _Op,
typename _Fu,
typename _Red,
typename _Result>
_Op
__for_each_template_random_access_omp_loop_static(
_RAIter __begin, _RAIter __end, _Op __o, _Fu& __f, _Red __r,
_Result __base, _Result& __output,
typename std::iterator_traits<_RAIter>::difference_type __bound)
{
typedef typename
std::iterator_traits<_RAIter>::difference_type
_DifferenceType;
template<typename _RAIter,
typename _Op,
typename _Fu,
typename _Red,
typename _Result>
_Op
__for_each_template_random_access_omp_loop_static(_RAIter __begin,
_RAIter __end, _Op __o,
_Fu& __f, _Red __r,
_Result __base,
_Result& __output,
typename std::iterator_traits<_RAIter>::difference_type __bound)
{
typedef typename std::iterator_traits<_RAIter>::difference_type
_DifferenceType;
_DifferenceType __length = __end - __begin;
_ThreadIndex __num_threads =
std::min<_DifferenceType>(__get_max_threads(), __length);
_DifferenceType __length = __end - __begin;
_ThreadIndex __num_threads =
std::min<_DifferenceType>(__get_max_threads(), __length);
_Result *__thread_results;
_Result *__thread_results;
# pragma omp parallel num_threads(__num_threads)
# pragma omp parallel num_threads(__num_threads)
{
# pragma omp single
{
__num_threads = omp_get_num_threads();
__thread_results = new _Result[__num_threads];
{
__num_threads = omp_get_num_threads();
__thread_results = new _Result[__num_threads];
for (_ThreadIndex __i = 0; __i < __num_threads; ++__i)
__thread_results[__i] = _Result();
}
for (_ThreadIndex __i = 0; __i < __num_threads; ++__i)
__thread_results[__i] = _Result();
}
_ThreadIndex __iam = omp_get_thread_num();
@ -98,17 +98,17 @@ template<typename _RAIter,
__f(__o, __begin+__pos));
} //parallel
for (_ThreadIndex __i = 0; __i < __num_threads; ++__i)
__output = __r(__output, __thread_results[__i]);
for (_ThreadIndex __i = 0; __i < __num_threads; ++__i)
__output = __r(__output, __thread_results[__i]);
delete [] __thread_results;
delete [] __thread_results;
// Points to last element processed (needed as return value for
// some algorithms like transform).
__f.finish_iterator = __begin + __length;
// Points to last element processed (needed as return value for
// some algorithms like transform).
__f.finish_iterator = __begin + __length;
return __o;
}
return __o;
}
} // end namespace

View File

@ -40,94 +40,93 @@
namespace __gnu_parallel
{
/** @brief Embarrassingly parallel algorithm for random access
* iterators, using hand-crafted parallelization by equal splitting
* the work.
*
* @param __begin Begin iterator of element sequence.
* @param __end End iterator of element sequence.
* @param __o User-supplied functor (comparator, predicate, adding
* functor, ...)
* @param __f Functor to "process" an element with __op (depends on
* desired functionality, e. g. for std::for_each(), ...).
* @param __r Functor to "add" a single __result to the already
* processed elements (depends on functionality).
* @param __base Base value for reduction.
* @param __output Pointer to position where final result is written to
* @param __bound Maximum number of elements processed (e. g. for
* std::count_n()).
* @return User-supplied functor (that may contain a part of the result).
*/
template<typename _RAIter,
typename _Op,
typename _Fu,
typename _Red,
typename _Result>
_Op
__for_each_template_random_access_ed(_RAIter __begin, _RAIter __end,
_Op __o, _Fu& __f, _Red __r,
_Result __base, _Result& __output,
typename std::iterator_traits<_RAIter>::difference_type __bound)
{
typedef std::iterator_traits<_RAIter> _TraitsType;
typedef typename _TraitsType::difference_type _DifferenceType;
const _DifferenceType __length = __end - __begin;
_Result *__thread_results;
bool* __constructed;
/** @brief Embarrassingly parallel algorithm for random access
* iterators, using hand-crafted parallelization by equal splitting
* the work.
*
* @param __begin Begin iterator of element sequence.
* @param __end End iterator of element sequence.
* @param __o User-supplied functor (comparator, predicate, adding
* functor, ...)
* @param __f Functor to "process" an element with __op (depends on
* desired functionality, e. g. for std::for_each(), ...).
* @param __r Functor to "add" a single __result to the already
* processed elements (depends on functionality).
* @param __base Base value for reduction.
* @param __output Pointer to position where final result is written to
* @param __bound Maximum number of elements processed (e. g. for
* std::count_n()).
* @return User-supplied functor (that may contain a part of the result).
*/
template<typename _RAIter,
typename _Op,
typename _Fu,
typename _Red,
typename _Result>
_Op
__for_each_template_random_access_ed(
_RAIter __begin, _RAIter __end, _Op __o, _Fu& __f, _Red __r,
_Result __base, _Result& __output,
typename std::iterator_traits<_RAIter>::difference_type __bound)
{
typedef std::iterator_traits<_RAIter> _TraitsType;
typedef typename _TraitsType::difference_type _DifferenceType;
const _DifferenceType __length = __end - __begin;
_Result *__thread_results;
bool* __constructed;
_ThreadIndex __num_threads =
__gnu_parallel::min<_DifferenceType>(__get_max_threads(), __length);
_ThreadIndex __num_threads =
__gnu_parallel::min<_DifferenceType>(__get_max_threads(), __length);
# pragma omp parallel num_threads(__num_threads)
# pragma omp parallel num_threads(__num_threads)
{
# pragma omp single
{
__num_threads = omp_get_num_threads();
__thread_results =
static_cast<_Result*>(
::operator new(__num_threads * sizeof(_Result)));
__constructed = new bool[__num_threads];
}
{
__num_threads = omp_get_num_threads();
__thread_results =
static_cast<_Result*>(::operator new(__num_threads
* sizeof(_Result)));
__constructed = new bool[__num_threads];
}
_ThreadIndex __iam = omp_get_thread_num();
_ThreadIndex __iam = omp_get_thread_num();
// Neutral element.
_Result* __reduct =
static_cast<_Result*>(::operator new(sizeof(_Result)));
// Neutral element.
_Result* __reduct =
static_cast<_Result*>(::operator new(sizeof(_Result)));
_DifferenceType
__start = equally_split_point(__length, __num_threads, __iam),
__stop = equally_split_point(__length, __num_threads, __iam + 1);
_DifferenceType
__start = equally_split_point(__length, __num_threads, __iam),
__stop = equally_split_point(__length, __num_threads, __iam + 1);
if (__start < __stop)
{
new(__reduct) _Result(__f(__o, __begin + __start));
++__start;
__constructed[__iam] = true;
}
else
__constructed[__iam] = false;
if (__start < __stop)
{
new(__reduct) _Result(__f(__o, __begin + __start));
++__start;
__constructed[__iam] = true;
}
else
__constructed[__iam] = false;
for (; __start < __stop; ++__start)
*__reduct = __r(*__reduct, __f(__o, __begin + __start));
for (; __start < __stop; ++__start)
*__reduct = __r(*__reduct, __f(__o, __begin + __start));
__thread_results[__iam] = *__reduct;
__thread_results[__iam] = *__reduct;
} //parallel
for (_ThreadIndex __i = 0; __i < __num_threads; ++__i)
if (__constructed[__i])
__output = __r(__output, __thread_results[__i]);
for (_ThreadIndex __i = 0; __i < __num_threads; ++__i)
if (__constructed[__i])
__output = __r(__output, __thread_results[__i]);
// Points to last element processed (needed as return value for
// some algorithms like transform).
__f._M_finish_iterator = __begin + __length;
// Points to last element processed (needed as return value for
// some algorithms like transform).
__f._M_finish_iterator = __begin + __length;
delete[] __thread_results;
delete[] __constructed;
delete[] __thread_results;
delete[] __constructed;
return __o;
}
return __o;
}
} // end namespace

View File

@ -43,106 +43,107 @@ namespace __gnu_parallel
{
// Problem: there is no 0-element given.
/** @brief Base case prefix sum routine.
* @param __begin Begin iterator of input sequence.
* @param __end End iterator of input sequence.
* @param __result Begin iterator of output sequence.
* @param __bin_op Associative binary function.
* @param __value Start value. Must be passed since the neutral
* element is unknown in general.
* @return End iterator of output sequence. */
template<typename _IIter,
typename _OutputIterator,
typename _BinaryOperation>
_OutputIterator
__parallel_partial_sum_basecase(
_IIter __begin, _IIter __end, _OutputIterator __result,
_BinaryOperation __bin_op,
typename std::iterator_traits <_IIter>::value_type __value)
{
if (__begin == __end)
/** @brief Base case prefix sum routine.
* @param __begin Begin iterator of input sequence.
* @param __end End iterator of input sequence.
* @param __result Begin iterator of output sequence.
* @param __bin_op Associative binary function.
* @param __value Start value. Must be passed since the neutral
* element is unknown in general.
* @return End iterator of output sequence. */
template<typename _IIter,
typename _OutputIterator,
typename _BinaryOperation>
_OutputIterator
__parallel_partial_sum_basecase(_IIter __begin, _IIter __end,
_OutputIterator __result,
_BinaryOperation __bin_op,
typename std::iterator_traits <_IIter>::value_type __value)
{
if (__begin == __end)
return __result;
while (__begin != __end)
{
__value = __bin_op(__value, *__begin);
*__result = __value;
++__result;
++__begin;
}
return __result;
}
while (__begin != __end)
{
__value = __bin_op(__value, *__begin);
*__result = __value;
++__result;
++__begin;
}
return __result;
}
/** @brief Parallel partial sum implementation, two-phase approach,
no recursion.
* @param __begin Begin iterator of input sequence.
* @param __end End iterator of input sequence.
* @param __result Begin iterator of output sequence.
* @param __bin_op Associative binary function.
* @param __n Length of sequence.
* @param __num_threads Number of threads to use.
* @return End iterator of output sequence.
*/
template<typename _IIter,
typename _OutputIterator,
typename _BinaryOperation>
_OutputIterator
__parallel_partial_sum_linear(_IIter __begin, _IIter __end,
_OutputIterator __result,
_BinaryOperation __bin_op,
typename std::iterator_traits<_IIter>::difference_type __n)
{
typedef std::iterator_traits<_IIter> _TraitsType;
typedef typename _TraitsType::value_type _ValueType;
typedef typename _TraitsType::difference_type _DifferenceType;
/** @brief Parallel partial sum implementation, two-phase approach,
no recursion.
* @param __begin Begin iterator of input sequence.
* @param __end End iterator of input sequence.
* @param __result Begin iterator of output sequence.
* @param __bin_op Associative binary function.
* @param __n Length of sequence.
* @param __num_threads Number of threads to use.
* @return End iterator of output sequence.
*/
template<typename _IIter,
typename _OutputIterator,
typename _BinaryOperation>
_OutputIterator
__parallel_partial_sum_linear(
_IIter __begin, _IIter __end, _OutputIterator __result,
_BinaryOperation __bin_op,
typename std::iterator_traits<_IIter>::difference_type __n)
{
typedef std::iterator_traits<_IIter> _TraitsType;
typedef typename _TraitsType::value_type _ValueType;
typedef typename _TraitsType::difference_type _DifferenceType;
if (__begin == __end)
return __result;
if (__begin == __end)
return __result;
_ThreadIndex __num_threads =
_ThreadIndex __num_threads =
std::min<_DifferenceType>(__get_max_threads(), __n - 1);
if (__num_threads < 2)
{
*__result = *__begin;
return __parallel_partial_sum_basecase(
__begin + 1, __end, __result + 1, __bin_op, *__begin);
}
if (__num_threads < 2)
{
*__result = *__begin;
return __parallel_partial_sum_basecase(__begin + 1, __end,
__result + 1, __bin_op,
*__begin);
}
_DifferenceType* __borders;
_ValueType* __sums;
_DifferenceType* __borders;
_ValueType* __sums;
const _Settings& __s = _Settings::get();
const _Settings& __s = _Settings::get();
# pragma omp parallel num_threads(__num_threads)
# pragma omp parallel num_threads(__num_threads)
{
# pragma omp single
{
__num_threads = omp_get_num_threads();
{
__num_threads = omp_get_num_threads();
__borders = new _DifferenceType[__num_threads + 2];
__borders = new _DifferenceType[__num_threads + 2];
if (__s.partial_sum_dilation == 1.0f)
equally_split(__n, __num_threads + 1, __borders);
else
{
_DifferenceType __chunk_length =
((double)__n
/ ((double)__num_threads + __s.partial_sum_dilation)),
__borderstart = __n - __num_threads * __chunk_length;
__borders[0] = 0;
for (int __i = 1; __i < (__num_threads + 1); ++__i)
{
__borders[__i] = __borderstart;
__borderstart += __chunk_length;
}
__borders[__num_threads + 1] = __n;
}
if (__s.partial_sum_dilation == 1.0f)
equally_split(__n, __num_threads + 1, __borders);
else
{
_DifferenceType __chunk_length =
((double)__n
/ ((double)__num_threads + __s.partial_sum_dilation)),
__borderstart = __n - __num_threads * __chunk_length;
__borders[0] = 0;
for (int __i = 1; __i < (__num_threads + 1); ++__i)
{
__borders[__i] = __borderstart;
__borderstart += __chunk_length;
}
__borders[__num_threads + 1] = __n;
}
__sums = static_cast<_ValueType*>(::operator new(sizeof(_ValueType)
__sums = static_cast<_ValueType*>(::operator new(sizeof(_ValueType)
* __num_threads));
_OutputIterator __target_end;
} //single
_OutputIterator __target_end;
} //single
_ThreadIndex __iam = omp_get_thread_num();
if (__iam == 0)
@ -166,58 +167,57 @@ template<typename _IIter,
# pragma omp barrier
# pragma omp single
__parallel_partial_sum_basecase(__sums + 1, __sums + __num_threads,
__parallel_partial_sum_basecase(__sums + 1, __sums + __num_threads,
__sums + 1, __bin_op, __sums[0]);
# pragma omp barrier
// Still same team.
__parallel_partial_sum_basecase(
__begin + __borders[__iam + 1],
__begin + __borders[__iam + 2],
__result + __borders[__iam + 1],
__bin_op, __sums[__iam]);
// Still same team.
__parallel_partial_sum_basecase(__begin + __borders[__iam + 1],
__begin + __borders[__iam + 2],
__result + __borders[__iam + 1],
__bin_op, __sums[__iam]);
} //parallel
::operator delete(__sums);
delete[] __borders;
::operator delete(__sums);
delete[] __borders;
return __result + __n;
}
return __result + __n;
}
/** @brief Parallel partial sum front-__end.
* @param __begin Begin iterator of input sequence.
* @param __end End iterator of input sequence.
* @param __result Begin iterator of output sequence.
* @param __bin_op Associative binary function.
* @return End iterator of output sequence. */
template<typename _IIter,
typename _OutputIterator,
typename _BinaryOperation>
_OutputIterator
__parallel_partial_sum(_IIter __begin, _IIter __end,
_OutputIterator __result, _BinaryOperation __bin_op)
{
_GLIBCXX_CALL(__begin - __end)
/** @brief Parallel partial sum front-__end.
* @param __begin Begin iterator of input sequence.
* @param __end End iterator of input sequence.
* @param __result Begin iterator of output sequence.
* @param __bin_op Associative binary function.
* @return End iterator of output sequence. */
template<typename _IIter,
typename _OutputIterator,
typename _BinaryOperation>
_OutputIterator
__parallel_partial_sum(_IIter __begin, _IIter __end,
_OutputIterator __result, _BinaryOperation __bin_op)
{
_GLIBCXX_CALL(__begin - __end)
typedef std::iterator_traits<_IIter> _TraitsType;
typedef typename _TraitsType::value_type _ValueType;
typedef typename _TraitsType::difference_type _DifferenceType;
typedef std::iterator_traits<_IIter> _TraitsType;
typedef typename _TraitsType::value_type _ValueType;
typedef typename _TraitsType::difference_type _DifferenceType;
_DifferenceType __n = __end - __begin;
_DifferenceType __n = __end - __begin;
switch (_Settings::get().partial_sum_algorithm)
{
case LINEAR:
// Need an initial offset.
return __parallel_partial_sum_linear(
__begin, __end, __result, __bin_op, __n);
default:
// Partial_sum algorithm not implemented.
_GLIBCXX_PARALLEL_ASSERT(0);
return __result + __n;
}
}
switch (_Settings::get().partial_sum_algorithm)
{
case LINEAR:
// Need an initial offset.
return __parallel_partial_sum_linear(__begin, __end, __result,
__bin_op, __n);
default:
// Partial_sum algorithm not implemented.
_GLIBCXX_PARALLEL_ASSERT(0);
return __result + __n;
}
}
}
#endif /* _GLIBCXX_PARALLEL_PARTIAL_SUM_H */