multiway_merge.h: Destruct only elements that were have been constructed before.
2007-11-28 Johannes Singler <singler@ira.uka.de> * include/parallel/multiway_merge.h: Destruct only elements that were have been constructed before. Code beautifying and formatting. * include/parallel/losertree.h: (Copy) construct all loser tree item keys, so they can be deconstructed all at once. * include/parallel/quicksort.h: Fix memory leak. * include/parallel/random_shuffle.h: Use copy constructor instead of assignment. Code beautifying and formatting. * include/parallel/unique_copy.h: Use assignment instead of copy constructor. * include/parallel/multiway_mergesort.h: Use copy constructor instead of assignment. Code beautifying and formatting. * include/parallel/random_shuffle.h: Use copy constructor instead of assignment. Code beautifying. From-SVN: r130490
This commit is contained in:
parent
87300e8c81
commit
1661473b7f
|
@ -1,3 +1,19 @@
|
|||
2007-11-28 Johannes Singler <singler@ira.uka.de>
|
||||
|
||||
* include/parallel/multiway_merge.h: Destruct only elements that
|
||||
were have been constructed before. Code beautifying and formatting.
|
||||
* include/parallel/losertree.h: (Copy) construct all loser tree
|
||||
item keys, so they can be deconstructed all at once.
|
||||
* include/parallel/quicksort.h: Fix memory leak.
|
||||
* include/parallel/random_shuffle.h: Use copy constructor instead
|
||||
of assignment. Code beautifying and formatting.
|
||||
* include/parallel/unique_copy.h: Use assignment instead of copy
|
||||
constructor.
|
||||
* include/parallel/multiway_mergesort.h: Use copy constructor
|
||||
instead of assignment. Code beautifying and formatting.
|
||||
* include/parallel/random_shuffle.h: Use copy constructor instead
|
||||
of assignment. Code beautifying.
|
||||
|
||||
2007-11-27 Kaz Kojima <kkojima@gcc.gnu.org>
|
||||
|
||||
* testsuite/tr1/5_numerical_facilities/special_functions/
|
||||
|
|
|
@ -230,6 +230,7 @@ template<typename T, typename Comparator = std::less<T> >
|
|||
unsigned int ik, k, offset;
|
||||
Loser* losers;
|
||||
Comparator comp;
|
||||
bool first_insert;
|
||||
|
||||
public:
|
||||
inline LoserTree(unsigned int _k, Comparator _comp = std::less<T>())
|
||||
|
@ -240,9 +241,12 @@ template<typename T, typename Comparator = std::less<T> >
|
|||
// Next greater power of 2.
|
||||
k = 1 << (log2(ik - 1) + 1);
|
||||
offset = k;
|
||||
losers = static_cast<Loser*>(::operator new(k * 2 * sizeof(Loser)));
|
||||
for (unsigned int i = ik - 1; i < k; i++)
|
||||
// Avoid default-constructing losers[].key
|
||||
losers = static_cast<Loser*>(::operator new(2 * k * sizeof(Loser)));
|
||||
for (unsigned int i = ik - 1; i < k; ++i)
|
||||
losers[i + k].sup = true;
|
||||
|
||||
first_insert = true;
|
||||
}
|
||||
|
||||
inline ~LoserTree()
|
||||
|
@ -257,9 +261,18 @@ template<typename T, typename Comparator = std::less<T> >
|
|||
{
|
||||
unsigned int pos = k + source;
|
||||
|
||||
if(first_insert)
|
||||
{
|
||||
// Construct all keys, so we can easily deconstruct them.
|
||||
for (unsigned int i = 0; i < (2 * k); ++i)
|
||||
new(&(losers[i].key)) T(key);
|
||||
first_insert = false;
|
||||
}
|
||||
else
|
||||
new(&(losers[pos].key)) T(key);
|
||||
|
||||
losers[pos].sup = sup;
|
||||
losers[pos].source = source;
|
||||
new(&(losers[pos].key)) T(key);
|
||||
}
|
||||
|
||||
unsigned int
|
||||
|
@ -282,7 +295,8 @@ template<typename T, typename Comparator = std::less<T> >
|
|||
return left;
|
||||
}
|
||||
else
|
||||
{ // Right one is less.
|
||||
{
|
||||
// Right one is less.
|
||||
losers[root] = losers[left];
|
||||
return right;
|
||||
}
|
||||
|
|
|
@ -57,7 +57,7 @@
|
|||
#endif
|
||||
|
||||
/** @brief Length of a sequence described by a pair of iterators. */
|
||||
#define LENGTH(s) ((s).second - (s).first)
|
||||
#define _GLIBCXX_PARALLEL_LENGTH(s) ((s).second - (s).first)
|
||||
|
||||
// XXX need iterator typedefs
|
||||
namespace __gnu_parallel
|
||||
|
@ -204,7 +204,7 @@ template<typename RandomAccessIterator, typename Comparator>
|
|||
inline unguarded_iterator<RandomAccessIterator, Comparator>&
|
||||
operator++()
|
||||
{
|
||||
current++;
|
||||
++current;
|
||||
return *this;
|
||||
}
|
||||
|
||||
|
@ -293,7 +293,7 @@ template<typename RandomAccessIteratorIterator, typename Comparator>
|
|||
// Last element in sequence.
|
||||
value_type min = *((*seqs_begin).second - 1);
|
||||
min_sequence = 0;
|
||||
for (RandomAccessIteratorIterator s = seqs_begin + 1; s != seqs_end; s++)
|
||||
for (RandomAccessIteratorIterator s = seqs_begin + 1; s != seqs_end; ++s)
|
||||
{
|
||||
if ((*s).first == (*s).second)
|
||||
{
|
||||
|
@ -314,7 +314,7 @@ template<typename RandomAccessIteratorIterator, typename Comparator>
|
|||
difference_type overhang_size = 0;
|
||||
|
||||
int s = 0;
|
||||
for (s = 0; s <= min_sequence; s++)
|
||||
for (s = 0; s <= min_sequence; ++s)
|
||||
{
|
||||
RandomAccessIterator1 split;
|
||||
if (stable)
|
||||
|
@ -327,7 +327,7 @@ template<typename RandomAccessIteratorIterator, typename Comparator>
|
|||
overhang_size += seqs_begin[s].second - split;
|
||||
}
|
||||
|
||||
for (; s < (seqs_end - seqs_begin); s++)
|
||||
for (; s < (seqs_end - seqs_begin); ++s)
|
||||
{
|
||||
RandomAccessIterator1 split = std::lower_bound(
|
||||
seqs_begin[s].first, seqs_begin[s].second, min, comp);
|
||||
|
@ -363,7 +363,7 @@ template<typename RandomAccessIteratorIterator, typename Comparator>
|
|||
|
||||
// Last element in sequence.
|
||||
value_type* max = NULL;
|
||||
for (RandomAccessIteratorIterator s = seqs_begin; s != seqs_end; s++)
|
||||
for (RandomAccessIteratorIterator s = seqs_begin; s != seqs_end; ++s)
|
||||
{
|
||||
if ((*s).first == (*s).second)
|
||||
continue;
|
||||
|
@ -377,7 +377,7 @@ template<typename RandomAccessIteratorIterator, typename Comparator>
|
|||
}
|
||||
|
||||
difference_type overhang_size = 0;
|
||||
for (RandomAccessIteratorIterator s = seqs_begin; s != seqs_end; s++)
|
||||
for (RandomAccessIteratorIterator s = seqs_begin; s != seqs_end; ++s)
|
||||
{
|
||||
RandomAccessIterator1 split =
|
||||
std::lower_bound((*s).first, (*s).second, *max, comp);
|
||||
|
@ -453,25 +453,25 @@ template<
|
|||
goto s210;
|
||||
}
|
||||
|
||||
#define Merge3Case(a,b,c,c0,c1) \
|
||||
s ## a ## b ## c : \
|
||||
*target = *seq ## a; \
|
||||
++target; \
|
||||
length--; \
|
||||
++seq ## a; \
|
||||
if (length == 0) goto finish; \
|
||||
if (seq ## a c0 seq ## b) goto s ## a ## b ## c; \
|
||||
if (seq ## a c1 seq ## c) goto s ## b ## a ## c; \
|
||||
#define _GLIBCXX_PARALLEL_MERGE_3_CASE(a,b,c,c0,c1)\
|
||||
s ## a ## b ## c : \
|
||||
*target = *seq ## a; \
|
||||
++target; \
|
||||
--length; \
|
||||
++seq ## a; \
|
||||
if (length == 0) goto finish; \
|
||||
if (seq ## a c0 seq ## b) goto s ## a ## b ## c; \
|
||||
if (seq ## a c1 seq ## c) goto s ## b ## a ## c; \
|
||||
goto s ## b ## c ## a;
|
||||
|
||||
Merge3Case(0, 1, 2, <=, <=);
|
||||
Merge3Case(1, 2, 0, <=, < );
|
||||
Merge3Case(2, 0, 1, < , < );
|
||||
Merge3Case(1, 0, 2, < , <=);
|
||||
Merge3Case(0, 2, 1, <=, <=);
|
||||
Merge3Case(2, 1, 0, < , < );
|
||||
_GLIBCXX_PARALLEL_MERGE_3_CASE(0, 1, 2, <=, <=);
|
||||
_GLIBCXX_PARALLEL_MERGE_3_CASE(1, 2, 0, <=, < );
|
||||
_GLIBCXX_PARALLEL_MERGE_3_CASE(2, 0, 1, < , < );
|
||||
_GLIBCXX_PARALLEL_MERGE_3_CASE(1, 0, 2, < , <=);
|
||||
_GLIBCXX_PARALLEL_MERGE_3_CASE(0, 2, 1, <=, <=);
|
||||
_GLIBCXX_PARALLEL_MERGE_3_CASE(2, 1, 0, < , < );
|
||||
|
||||
#undef Merge3Case
|
||||
#undef _GLIBCXX_PARALLEL_MERGE_3_CASE
|
||||
|
||||
finish:
|
||||
;
|
||||
|
@ -513,7 +513,7 @@ template<
|
|||
|
||||
difference_type total_length = 0;
|
||||
for (RandomAccessIteratorIterator s = seqs_begin; s != seqs_end; ++s)
|
||||
total_length += LENGTH(*s);
|
||||
total_length += _GLIBCXX_PARALLEL_LENGTH(*s);
|
||||
|
||||
if (overhang != -1)
|
||||
{
|
||||
|
@ -600,7 +600,7 @@ template<
|
|||
seq2(seqs_begin[2].first, seqs_begin[2].second, comp),
|
||||
seq3(seqs_begin[3].first, seqs_begin[3].second, comp);
|
||||
|
||||
#define Decision(a,b,c,d) { \
|
||||
#define _GLIBCXX_PARALLEL_DECISION(a,b,c,d) { \
|
||||
if (seq ## d < seq ## a) goto s ## d ## a ## b ## c; \
|
||||
if (seq ## d < seq ## b) goto s ## a ## d ## b ## c; \
|
||||
if (seq ## d < seq ## c) goto s ## a ## b ## d ## c; \
|
||||
|
@ -609,65 +609,65 @@ template<
|
|||
if (seq0 <= seq1)
|
||||
{
|
||||
if (seq1 <= seq2)
|
||||
Decision(0,1,2,3)
|
||||
_GLIBCXX_PARALLEL_DECISION(0,1,2,3)
|
||||
else
|
||||
if (seq2 < seq0)
|
||||
Decision(2,0,1,3)
|
||||
_GLIBCXX_PARALLEL_DECISION(2,0,1,3)
|
||||
else
|
||||
Decision(0,2,1,3)
|
||||
_GLIBCXX_PARALLEL_DECISION(0,2,1,3)
|
||||
}
|
||||
else
|
||||
{
|
||||
if (seq1 <= seq2)
|
||||
{
|
||||
if (seq0 <= seq2)
|
||||
Decision(1,0,2,3)
|
||||
_GLIBCXX_PARALLEL_DECISION(1,0,2,3)
|
||||
else
|
||||
Decision(1,2,0,3)
|
||||
_GLIBCXX_PARALLEL_DECISION(1,2,0,3)
|
||||
}
|
||||
else
|
||||
Decision(2,1,0,3)
|
||||
_GLIBCXX_PARALLEL_DECISION(2,1,0,3)
|
||||
}
|
||||
|
||||
#define Merge4Case(a,b,c,d,c0,c1,c2) \
|
||||
s ## a ## b ## c ## d: \
|
||||
if (length == 0) goto finish; \
|
||||
*target = *seq ## a; \
|
||||
++target; \
|
||||
length--; \
|
||||
++seq ## a; \
|
||||
if (seq ## a c0 seq ## b) goto s ## a ## b ## c ## d; \
|
||||
if (seq ## a c1 seq ## c) goto s ## b ## a ## c ## d; \
|
||||
if (seq ## a c2 seq ## d) goto s ## b ## c ## a ## d; \
|
||||
#define _GLIBCXX_PARALLEL_MERGE_4_CASE(a,b,c,d,c0,c1,c2) \
|
||||
s ## a ## b ## c ## d: \
|
||||
if (length == 0) goto finish; \
|
||||
*target = *seq ## a; \
|
||||
++target; \
|
||||
--length; \
|
||||
++seq ## a; \
|
||||
if (seq ## a c0 seq ## b) goto s ## a ## b ## c ## d; \
|
||||
if (seq ## a c1 seq ## c) goto s ## b ## a ## c ## d; \
|
||||
if (seq ## a c2 seq ## d) goto s ## b ## c ## a ## d; \
|
||||
goto s ## b ## c ## d ## a;
|
||||
|
||||
Merge4Case(0, 1, 2, 3, <=, <=, <=);
|
||||
Merge4Case(0, 1, 3, 2, <=, <=, <=);
|
||||
Merge4Case(0, 2, 1, 3, <=, <=, <=);
|
||||
Merge4Case(0, 2, 3, 1, <=, <=, <=);
|
||||
Merge4Case(0, 3, 1, 2, <=, <=, <=);
|
||||
Merge4Case(0, 3, 2, 1, <=, <=, <=);
|
||||
Merge4Case(1, 0, 2, 3, < , <=, <=);
|
||||
Merge4Case(1, 0, 3, 2, < , <=, <=);
|
||||
Merge4Case(1, 2, 0, 3, <=, < , <=);
|
||||
Merge4Case(1, 2, 3, 0, <=, <=, < );
|
||||
Merge4Case(1, 3, 0, 2, <=, < , <=);
|
||||
Merge4Case(1, 3, 2, 0, <=, <=, < );
|
||||
Merge4Case(2, 0, 1, 3, < , < , <=);
|
||||
Merge4Case(2, 0, 3, 1, < , <=, < );
|
||||
Merge4Case(2, 1, 0, 3, < , < , <=);
|
||||
Merge4Case(2, 1, 3, 0, < , <=, < );
|
||||
Merge4Case(2, 3, 0, 1, <=, < , < );
|
||||
Merge4Case(2, 3, 1, 0, <=, < , < );
|
||||
Merge4Case(3, 0, 1, 2, < , < , < );
|
||||
Merge4Case(3, 0, 2, 1, < , < , < );
|
||||
Merge4Case(3, 1, 0, 2, < , < , < );
|
||||
Merge4Case(3, 1, 2, 0, < , < , < );
|
||||
Merge4Case(3, 2, 0, 1, < , < , < );
|
||||
Merge4Case(3, 2, 1, 0, < , < , < );
|
||||
_GLIBCXX_PARALLEL_MERGE_4_CASE(0, 1, 2, 3, <=, <=, <=);
|
||||
_GLIBCXX_PARALLEL_MERGE_4_CASE(0, 1, 3, 2, <=, <=, <=);
|
||||
_GLIBCXX_PARALLEL_MERGE_4_CASE(0, 2, 1, 3, <=, <=, <=);
|
||||
_GLIBCXX_PARALLEL_MERGE_4_CASE(0, 2, 3, 1, <=, <=, <=);
|
||||
_GLIBCXX_PARALLEL_MERGE_4_CASE(0, 3, 1, 2, <=, <=, <=);
|
||||
_GLIBCXX_PARALLEL_MERGE_4_CASE(0, 3, 2, 1, <=, <=, <=);
|
||||
_GLIBCXX_PARALLEL_MERGE_4_CASE(1, 0, 2, 3, < , <=, <=);
|
||||
_GLIBCXX_PARALLEL_MERGE_4_CASE(1, 0, 3, 2, < , <=, <=);
|
||||
_GLIBCXX_PARALLEL_MERGE_4_CASE(1, 2, 0, 3, <=, < , <=);
|
||||
_GLIBCXX_PARALLEL_MERGE_4_CASE(1, 2, 3, 0, <=, <=, < );
|
||||
_GLIBCXX_PARALLEL_MERGE_4_CASE(1, 3, 0, 2, <=, < , <=);
|
||||
_GLIBCXX_PARALLEL_MERGE_4_CASE(1, 3, 2, 0, <=, <=, < );
|
||||
_GLIBCXX_PARALLEL_MERGE_4_CASE(2, 0, 1, 3, < , < , <=);
|
||||
_GLIBCXX_PARALLEL_MERGE_4_CASE(2, 0, 3, 1, < , <=, < );
|
||||
_GLIBCXX_PARALLEL_MERGE_4_CASE(2, 1, 0, 3, < , < , <=);
|
||||
_GLIBCXX_PARALLEL_MERGE_4_CASE(2, 1, 3, 0, < , <=, < );
|
||||
_GLIBCXX_PARALLEL_MERGE_4_CASE(2, 3, 0, 1, <=, < , < );
|
||||
_GLIBCXX_PARALLEL_MERGE_4_CASE(2, 3, 1, 0, <=, < , < );
|
||||
_GLIBCXX_PARALLEL_MERGE_4_CASE(3, 0, 1, 2, < , < , < );
|
||||
_GLIBCXX_PARALLEL_MERGE_4_CASE(3, 0, 2, 1, < , < , < );
|
||||
_GLIBCXX_PARALLEL_MERGE_4_CASE(3, 1, 0, 2, < , < , < );
|
||||
_GLIBCXX_PARALLEL_MERGE_4_CASE(3, 1, 2, 0, < , < , < );
|
||||
_GLIBCXX_PARALLEL_MERGE_4_CASE(3, 2, 0, 1, < , < , < );
|
||||
_GLIBCXX_PARALLEL_MERGE_4_CASE(3, 2, 1, 0, < , < , < );
|
||||
|
||||
#undef Merge4Case
|
||||
#undef Decision
|
||||
#undef _GLIBCXX_PARALLEL_MERGE_4_CASE
|
||||
#undef _GLIBCXX_PARALLEL_DECISION
|
||||
|
||||
finish:
|
||||
;
|
||||
|
@ -710,7 +710,7 @@ template<
|
|||
|
||||
difference_type total_length = 0;
|
||||
for (RandomAccessIteratorIterator s = seqs_begin; s != seqs_end; ++s)
|
||||
total_length += LENGTH(*s);
|
||||
total_length += _GLIBCXX_PARALLEL_LENGTH(*s);
|
||||
|
||||
if (overhang != -1)
|
||||
{
|
||||
|
@ -785,48 +785,47 @@ template<
|
|||
typedef typename std::iterator_traits<RandomAccessIterator1>::value_type
|
||||
value_type;
|
||||
|
||||
// Num remaining pieces.
|
||||
int k = static_cast<int>(seqs_end - seqs_begin), nrp;
|
||||
int k = static_cast<int>(seqs_end - seqs_begin);
|
||||
int nrs; // Number of remaining sequences.
|
||||
|
||||
value_type* pl = static_cast<value_type*>(
|
||||
::operator new(sizeof(value_type) * k));
|
||||
// Avoid default constructor.
|
||||
value_type* fe = static_cast<value_type*>(
|
||||
::operator new(sizeof(value_type) * k)); // Front elements.
|
||||
int* source = new int[k];
|
||||
difference_type total_length = 0;
|
||||
|
||||
#define POS(i) seqs_begin[(i)].first
|
||||
#define STOPS(i) seqs_begin[(i)].second
|
||||
|
||||
// Write entries into queue.
|
||||
nrp = 0;
|
||||
for (int pi = 0; pi < k; pi++)
|
||||
nrs = 0;
|
||||
for (int pi = 0; pi < k; ++pi)
|
||||
{
|
||||
if (STOPS(pi) != POS(pi))
|
||||
if (seqs_begin[pi].first != seqs_begin[pi].second)
|
||||
{
|
||||
pl[nrp] = *(POS(pi));
|
||||
source[nrp] = pi;
|
||||
nrp++;
|
||||
total_length += LENGTH(seqs_begin[pi]);
|
||||
new(&(fe[nrs])) value_type(*(seqs_begin[pi].first));
|
||||
source[nrs] = pi;
|
||||
++nrs;
|
||||
total_length += _GLIBCXX_PARALLEL_LENGTH(seqs_begin[pi]);
|
||||
}
|
||||
}
|
||||
|
||||
if (stable)
|
||||
{
|
||||
for (int k = 0; k < nrp - 1; k++)
|
||||
for (int pi = nrp - 1; pi > k; pi--)
|
||||
if (comp(pl[pi], pl[pi - 1]) ||
|
||||
(!comp(pl[pi - 1], pl[pi]) && source[pi] < source[pi - 1]))
|
||||
// Bubble sort fe and source by fe.
|
||||
for (int k = 0; k < nrs - 1; ++k)
|
||||
for (int pi = nrs - 1; pi > k; --pi)
|
||||
if (comp(fe[pi], fe[pi - 1]) ||
|
||||
(!comp(fe[pi - 1], fe[pi]) && source[pi] < source[pi - 1]))
|
||||
{
|
||||
std::swap(pl[pi - 1], pl[pi]);
|
||||
std::swap(fe[pi - 1], fe[pi]);
|
||||
std::swap(source[pi - 1], source[pi]);
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
for (int k = 0; k < nrp - 1; k++)
|
||||
for (int pi = nrp - 1; pi > k; pi--)
|
||||
if (comp(pl[pi], pl[pi-1]))
|
||||
for (int k = 0; k < nrs - 1; ++k)
|
||||
for (int pi = nrs - 1; pi > k; --pi)
|
||||
if (comp(fe[pi], fe[pi-1]))
|
||||
{
|
||||
std::swap(pl[pi-1], pl[pi]);
|
||||
std::swap(fe[pi-1], fe[pi]);
|
||||
std::swap(source[pi-1], source[pi]);
|
||||
}
|
||||
}
|
||||
|
@ -835,106 +834,109 @@ template<
|
|||
if (stable)
|
||||
{
|
||||
int j;
|
||||
while (nrp > 0 && length > 0)
|
||||
while (nrs > 0 && length > 0)
|
||||
{
|
||||
if (source[0] < source[1])
|
||||
{
|
||||
// pl[0] <= pl[1]
|
||||
while ((nrp == 1 || !(comp(pl[1], pl[0]))) && length > 0)
|
||||
// fe[0] <= fe[1]
|
||||
while ((nrs == 1 || !comp(fe[1], fe[0])) && length > 0)
|
||||
{
|
||||
*target = pl[0];
|
||||
*target = fe[0];
|
||||
++target;
|
||||
++POS(source[0]);
|
||||
length--;
|
||||
if (POS(source[0]) == STOPS(source[0]))
|
||||
++(seqs_begin[source[0]].first);
|
||||
--length;
|
||||
if (seqs_begin[source[0]].first == seqs_begin[source[0]].second)
|
||||
{
|
||||
// Move everything to the left.
|
||||
for (int s = 0; s < nrp - 1; s++)
|
||||
for (int s = 0; s < nrs - 1; ++s)
|
||||
{
|
||||
pl[s] = pl[s + 1];
|
||||
fe[s] = fe[s + 1];
|
||||
source[s] = source[s + 1];
|
||||
}
|
||||
nrp--;
|
||||
fe[nrs - 1].~value_type(); //Destruct explicitly.
|
||||
--nrs;
|
||||
break;
|
||||
}
|
||||
else
|
||||
pl[0] = *(POS(source[0]));
|
||||
fe[0] = *(seqs_begin[source[0]].first);
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
// pl[0] < pl[1]
|
||||
while ((nrp == 1 || comp(pl[0], pl[1])) && length > 0)
|
||||
// fe[0] < fe[1]
|
||||
while ((nrs == 1 || comp(fe[0], fe[1])) && length > 0)
|
||||
{
|
||||
*target = pl[0];
|
||||
*target = fe[0];
|
||||
++target;
|
||||
++POS(source[0]);
|
||||
length--;
|
||||
if (POS(source[0]) == STOPS(source[0]))
|
||||
++(seqs_begin[source[0]].first);
|
||||
--length;
|
||||
if (seqs_begin[source[0]].first == seqs_begin[source[0]].second)
|
||||
{
|
||||
for (int s = 0; s < nrp - 1; s++)
|
||||
for (int s = 0; s < nrs - 1; ++s)
|
||||
{
|
||||
pl[s] = pl[s + 1];
|
||||
fe[s] = fe[s + 1];
|
||||
source[s] = source[s + 1];
|
||||
}
|
||||
nrp--;
|
||||
fe[nrs - 1].~value_type(); //Destruct explicitly.
|
||||
--nrs;
|
||||
break;
|
||||
}
|
||||
else
|
||||
pl[0] = *(POS(source[0]));
|
||||
fe[0] = *(seqs_begin[source[0]].first);
|
||||
}
|
||||
}
|
||||
|
||||
// Sink down.
|
||||
j = 1;
|
||||
while ((j < nrp) && (comp(pl[j], pl[j - 1]) ||
|
||||
(!comp(pl[j - 1], pl[j])
|
||||
while ((j < nrs) && (comp(fe[j], fe[j - 1]) ||
|
||||
(!comp(fe[j - 1], fe[j])
|
||||
&& (source[j] < source[j - 1]))))
|
||||
{
|
||||
std::swap(pl[j - 1], pl[j]);
|
||||
std::swap(fe[j - 1], fe[j]);
|
||||
std::swap(source[j - 1], source[j]);
|
||||
j++;
|
||||
++j;
|
||||
}
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
int j;
|
||||
while (nrp > 0 && length > 0)
|
||||
while (nrs > 0 && length > 0)
|
||||
{
|
||||
// pl[0] <= pl[1]
|
||||
while (nrp == 1 || (!comp(pl[1], pl[0])) && length > 0)
|
||||
// fe[0] <= fe[1]
|
||||
while (nrs == 1 || (!comp(fe[1], fe[0])) && length > 0)
|
||||
{
|
||||
*target = pl[0];
|
||||
*target = fe[0];
|
||||
++target;
|
||||
++POS(source[0]);
|
||||
length--;
|
||||
if (POS(source[0]) == STOPS(source[0]))
|
||||
++seqs_begin[source[0]].first;
|
||||
--length;
|
||||
if (seqs_begin[source[0]].first == seqs_begin[source[0]].second)
|
||||
{
|
||||
for (int s = 0; s < (nrp - 1); s++)
|
||||
for (int s = 0; s < (nrs - 1); ++s)
|
||||
{
|
||||
pl[s] = pl[s + 1];
|
||||
fe[s] = fe[s + 1];
|
||||
source[s] = source[s + 1];
|
||||
}
|
||||
nrp--;
|
||||
fe[nrs - 1].~value_type(); //Destruct explicitly.
|
||||
--nrs;
|
||||
break;
|
||||
}
|
||||
else
|
||||
pl[0] = *(POS(source[0]));
|
||||
fe[0] = *(seqs_begin[source[0]].first);
|
||||
}
|
||||
|
||||
// Sink down.
|
||||
j = 1;
|
||||
while ((j < nrp) && comp(pl[j], pl[j - 1]))
|
||||
while ((j < nrs) && comp(fe[j], fe[j - 1]))
|
||||
{
|
||||
std::swap(pl[j - 1], pl[j]);
|
||||
std::swap(fe[j - 1], fe[j]);
|
||||
std::swap(source[j - 1], source[j]);
|
||||
j++;
|
||||
++j;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
delete[] pl;
|
||||
delete fe; //Destructors already called.
|
||||
delete[] source;
|
||||
|
||||
return target;
|
||||
|
@ -983,17 +985,17 @@ template<
|
|||
// Default value for potentially non-default-constructible types.
|
||||
value_type* arbitrary_element = NULL;
|
||||
|
||||
for (int t = 0; t < k; t++)
|
||||
for (int t = 0; t < k; ++t)
|
||||
{
|
||||
if(arbitrary_element == NULL && LENGTH(seqs_begin[t]) > 0)
|
||||
if(arbitrary_element == NULL && _GLIBCXX_PARALLEL_LENGTH(seqs_begin[t]) > 0)
|
||||
arbitrary_element = &(*seqs_begin[t].first);
|
||||
total_length += LENGTH(seqs_begin[t]);
|
||||
total_length += _GLIBCXX_PARALLEL_LENGTH(seqs_begin[t]);
|
||||
}
|
||||
|
||||
if(total_length == 0)
|
||||
return target;
|
||||
|
||||
for (int t = 0; t < k; t++)
|
||||
for (int t = 0; t < k; ++t)
|
||||
{
|
||||
if (stable)
|
||||
{
|
||||
|
@ -1022,7 +1024,7 @@ template<
|
|||
|
||||
if (stable)
|
||||
{
|
||||
for (difference_type i = 0; i < total_length; i++)
|
||||
for (difference_type i = 0; i < total_length; ++i)
|
||||
{
|
||||
// Take out.
|
||||
source = lt.get_min_source();
|
||||
|
@ -1040,7 +1042,7 @@ template<
|
|||
}
|
||||
else
|
||||
{
|
||||
for (difference_type i = 0; i < total_length; i++)
|
||||
for (difference_type i = 0; i < total_length; ++i)
|
||||
{
|
||||
//take out
|
||||
source = lt.get_min_source();
|
||||
|
@ -1099,7 +1101,7 @@ template<
|
|||
|
||||
difference_type total_length = 0;
|
||||
|
||||
for (int t = 0; t < k; t++)
|
||||
for (int t = 0; t < k; ++t)
|
||||
{
|
||||
#if _GLIBCXX_ASSERTIONS
|
||||
_GLIBCXX_PARALLEL_ASSERT(seqs_begin[t].first != seqs_begin[t].second);
|
||||
|
@ -1109,7 +1111,7 @@ template<
|
|||
else
|
||||
lt.insert_start(*seqs_begin[t].first, t, false);
|
||||
|
||||
total_length += LENGTH(seqs_begin[t]);
|
||||
total_length += _GLIBCXX_PARALLEL_LENGTH(seqs_begin[t]);
|
||||
}
|
||||
|
||||
if (stable)
|
||||
|
@ -1145,7 +1147,7 @@ template<
|
|||
_GLIBCXX_PARALLEL_ASSERT(
|
||||
(seqs_begin[source].first != seqs_begin[source].second)
|
||||
|| (i == length - 1));
|
||||
i++;
|
||||
++i;
|
||||
#endif
|
||||
// Feed.
|
||||
// Replace from same source.
|
||||
|
@ -1177,7 +1179,7 @@ template<
|
|||
_GLIBCXX_PARALLEL_ASSERT(
|
||||
(seqs_begin[source].first != seqs_begin[source].second)
|
||||
|| (i >= length - 1));
|
||||
i++;
|
||||
++i;
|
||||
#endif
|
||||
// Feed.
|
||||
// Replace from same source.
|
||||
|
@ -1216,8 +1218,8 @@ template<
|
|||
comp, min_seq, stable);
|
||||
|
||||
difference_type total_length = 0;
|
||||
for (RandomAccessIteratorIterator s = seqs_begin; s != seqs_end; s++)
|
||||
total_length += LENGTH(*s);
|
||||
for (RandomAccessIteratorIterator s = seqs_begin; s != seqs_end; ++s)
|
||||
total_length += _GLIBCXX_PARALLEL_LENGTH(*s);
|
||||
|
||||
if (overhang != -1)
|
||||
{
|
||||
|
@ -1279,12 +1281,12 @@ template<
|
|||
prepare_unguarded_sentinel(seqs_begin, seqs_end, comp);
|
||||
|
||||
difference_type total_length = 0;
|
||||
for (RandomAccessIteratorIterator s = seqs_begin; s != seqs_end; s++)
|
||||
for (RandomAccessIteratorIterator s = seqs_begin; s != seqs_end; ++s)
|
||||
{
|
||||
total_length += LENGTH(*s);
|
||||
total_length += _GLIBCXX_PARALLEL_LENGTH(*s);
|
||||
|
||||
// Sentinel spot.
|
||||
(*s).second++;
|
||||
++((*s).second);
|
||||
}
|
||||
|
||||
difference_type unguarded_length =
|
||||
|
@ -1301,12 +1303,12 @@ template<
|
|||
|
||||
// Copy rest stable.
|
||||
for (RandomAccessIteratorIterator s = seqs_begin;
|
||||
s != seqs_end && overhang > 0; s++)
|
||||
s != seqs_end && overhang > 0; ++s)
|
||||
{
|
||||
// Restore.
|
||||
(*s).second--;
|
||||
--((*s).second);
|
||||
difference_type local_length =
|
||||
std::min<difference_type>(overhang, LENGTH(*s));
|
||||
std::min<difference_type>(overhang, _GLIBCXX_PARALLEL_LENGTH(*s));
|
||||
target_end = std::copy((*s).first, (*s).first + local_length,
|
||||
target_end);
|
||||
(*s).first += local_length;
|
||||
|
@ -1324,7 +1326,7 @@ template<
|
|||
|
||||
/** @brief Sequential multi-way merging switch.
|
||||
*
|
||||
* The decision if based on the branching factor and runtime settings.
|
||||
* The _GLIBCXX_PARALLEL_DECISION if based on the branching factor and runtime settings.
|
||||
* @param seqs_begin Begin iterator of iterator pair input sequence.
|
||||
* @param seqs_end End iterator of iterator pair input sequence.
|
||||
* @param target Begin iterator out output sequence.
|
||||
|
@ -1356,7 +1358,7 @@ template<
|
|||
value_type;
|
||||
|
||||
#if _GLIBCXX_ASSERTIONS
|
||||
for (RandomAccessIteratorIterator s = seqs_begin; s != seqs_end; s++)
|
||||
for (RandomAccessIteratorIterator s = seqs_begin; s != seqs_end; ++s)
|
||||
_GLIBCXX_PARALLEL_ASSERT(is_sorted((*s).first, (*s).second, comp));
|
||||
#endif
|
||||
|
||||
|
@ -1502,7 +1504,7 @@ template<
|
|||
|
||||
/** @brief Parallel multi-way merge routine.
|
||||
*
|
||||
* The decision if based on the branching factor and runtime settings.
|
||||
* The _GLIBCXX_PARALLEL_DECISION if based on the branching factor and runtime settings.
|
||||
* @param seqs_begin Begin iterator of iterator pair input sequence.
|
||||
* @param seqs_end End iterator of iterator pair input sequence.
|
||||
* @param target Begin iterator out output sequence.
|
||||
|
@ -1538,8 +1540,8 @@ template<
|
|||
|
||||
difference_type total_length = 0;
|
||||
for (RandomAccessIteratorIterator raii = seqs_begin;
|
||||
raii != seqs_end; raii++)
|
||||
total_length += LENGTH(*raii);
|
||||
raii != seqs_end; ++raii)
|
||||
total_length += _GLIBCXX_PARALLEL_LENGTH(*raii);
|
||||
|
||||
_GLIBCXX_CALL(total_length)
|
||||
|
||||
|
@ -1561,7 +1563,7 @@ template<
|
|||
// Thread t will have to merge pieces[iam][0..k - 1]
|
||||
pieces = new std::vector<
|
||||
std::pair<difference_type, difference_type> >[num_threads];
|
||||
for (int s = 0; s < num_threads; s++)
|
||||
for (int s = 0; s < num_threads; ++s)
|
||||
pieces[s].resize(k);
|
||||
|
||||
difference_type num_samples =
|
||||
|
@ -1572,16 +1574,16 @@ template<
|
|||
value_type* samples = static_cast<value_type*>(
|
||||
::operator new(sizeof(value_type) * k * num_samples));
|
||||
// Sample.
|
||||
for (int s = 0; s < k; s++)
|
||||
for (int i = 0; (difference_type)i < num_samples; i++)
|
||||
for (int s = 0; s < k; ++s)
|
||||
for (difference_type i = 0; i < num_samples; ++i)
|
||||
{
|
||||
difference_type sample_index =
|
||||
static_cast<difference_type>(
|
||||
LENGTH(seqs_begin[s]) * (double(i + 1) /
|
||||
_GLIBCXX_PARALLEL_LENGTH(seqs_begin[s]) * (double(i + 1) /
|
||||
(num_samples + 1)) * (double(length)
|
||||
/ total_length));
|
||||
samples[s * num_samples + i] =
|
||||
seqs_begin[s].first[sample_index];
|
||||
new(&(samples[s * num_samples + i])) value_type(
|
||||
seqs_begin[s].first[sample_index]);
|
||||
}
|
||||
|
||||
if (stable)
|
||||
|
@ -1591,9 +1593,9 @@ template<
|
|||
__gnu_sequential::sort(
|
||||
samples, samples + (num_samples * k), comp);
|
||||
|
||||
for (int slab = 0; slab < num_threads; slab++)
|
||||
for (int slab = 0; slab < num_threads; ++slab)
|
||||
// For each slab / processor.
|
||||
for (int seq = 0; seq < k; seq++)
|
||||
for (int seq = 0; seq < k; ++seq)
|
||||
{
|
||||
// For each sequence.
|
||||
if (slab > 0)
|
||||
|
@ -1618,7 +1620,7 @@ template<
|
|||
num_threads], comp)
|
||||
- seqs_begin[seq].first;
|
||||
else
|
||||
pieces[slab][seq].second = LENGTH(seqs_begin[seq]);
|
||||
pieces[slab][seq].second = _GLIBCXX_PARALLEL_LENGTH(seqs_begin[seq]);
|
||||
}
|
||||
delete[] samples;
|
||||
}
|
||||
|
@ -1637,7 +1639,7 @@ template<
|
|||
new difference_type[num_threads + 1];
|
||||
equally_split(length, num_threads, borders);
|
||||
|
||||
for (int s = 0; s < (num_threads - 1); s++)
|
||||
for (int s = 0; s < (num_threads - 1); ++s)
|
||||
{
|
||||
offsets[s].resize(k);
|
||||
multiseq_partition(
|
||||
|
@ -1655,10 +1657,10 @@ template<
|
|||
}
|
||||
|
||||
|
||||
for (int slab = 0; slab < num_threads; slab++)
|
||||
for (int slab = 0; slab < num_threads; ++slab)
|
||||
{
|
||||
// For each slab / processor.
|
||||
for (int seq = 0; seq < k; seq++)
|
||||
for (int seq = 0; seq < k; ++seq)
|
||||
{
|
||||
// For each sequence.
|
||||
if (slab == 0)
|
||||
|
@ -1676,7 +1678,7 @@ template<
|
|||
{
|
||||
// slab == num_threads - 1
|
||||
pieces[slab][seq].second =
|
||||
LENGTH(seqs_begin[seq]);
|
||||
_GLIBCXX_PARALLEL_LENGTH(seqs_begin[seq]);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -1688,7 +1690,7 @@ template<
|
|||
|
||||
difference_type target_position = 0;
|
||||
|
||||
for (int c = 0; c < k; c++)
|
||||
for (int c = 0; c < k; ++c)
|
||||
target_position += pieces[iam][c].first;
|
||||
|
||||
if (k > 2)
|
||||
|
@ -1698,12 +1700,12 @@ template<
|
|||
std::pair<RandomAccessIterator1, RandomAccessIterator1>[k];
|
||||
|
||||
difference_type local_length = 0;
|
||||
for (int s = 0; s < k; s++)
|
||||
for (int s = 0; s < k; ++s)
|
||||
{
|
||||
chunks[s] = std::make_pair(
|
||||
seqs_begin[s].first + pieces[iam][s].first,
|
||||
seqs_begin[s].first + pieces[iam][s].second);
|
||||
local_length += LENGTH(chunks[s]);
|
||||
local_length += _GLIBCXX_PARALLEL_LENGTH(chunks[s]);
|
||||
}
|
||||
|
||||
multiway_merge(
|
||||
|
@ -1734,7 +1736,7 @@ template<
|
|||
#endif
|
||||
|
||||
// Update ends of sequences.
|
||||
for (int s = 0; s < k; s++)
|
||||
for (int s = 0; s < k; ++s)
|
||||
seqs_begin[s].first += pieces[num_threads - 1][s].second;
|
||||
|
||||
delete[] pieces;
|
||||
|
|
|
@ -124,6 +124,8 @@ template<typename RandomAccessIterator, typename _DifferenceTp>
|
|||
determine_samples(PMWMSSortingData<RandomAccessIterator>* sd,
|
||||
_DifferenceTp& num_samples)
|
||||
{
|
||||
typedef std::iterator_traits<RandomAccessIterator> traits_type;
|
||||
typedef typename traits_type::value_type value_type;
|
||||
typedef _DifferenceTp difference_type;
|
||||
|
||||
thread_index_t iam = omp_get_thread_num();
|
||||
|
@ -137,8 +139,8 @@ template<typename RandomAccessIterator, typename _DifferenceTp>
|
|||
num_samples + 1, es);
|
||||
|
||||
for (difference_type i = 0; i < num_samples; i++)
|
||||
sd->samples[iam * num_samples + i] =
|
||||
sd->source[sd->starts[iam] + es[i + 1]];
|
||||
new(&(sd->samples[iam * num_samples + i])) value_type(
|
||||
sd->source[sd->starts[iam] + es[i + 1]]);
|
||||
|
||||
delete[] es;
|
||||
}
|
||||
|
@ -213,7 +215,8 @@ template<typename RandomAccessIterator, typename Comparator>
|
|||
if (num_samples * iam > 0)
|
||||
sd->pieces[iam][s].begin =
|
||||
std::lower_bound(sd->sorting_places[s],
|
||||
sd->sorting_places[s] + sd->starts[s + 1] - sd->starts[s],
|
||||
sd->sorting_places[s]
|
||||
+ (sd->starts[s + 1] - sd->starts[s]),
|
||||
sd->samples[num_samples * iam],
|
||||
comp)
|
||||
- sd->sorting_places[s];
|
||||
|
@ -224,8 +227,10 @@ template<typename RandomAccessIterator, typename Comparator>
|
|||
if ((num_samples * (iam + 1)) < (num_samples * sd->num_threads))
|
||||
sd->pieces[iam][s].end =
|
||||
std::lower_bound(sd->sorting_places[s],
|
||||
sd->sorting_places[s] + sd->starts[s + 1] - sd->starts[s],
|
||||
sd->samples[num_samples * (iam + 1)], comp)
|
||||
sd->sorting_places[s]
|
||||
+ (sd->starts[s + 1] - sd->starts[s]),
|
||||
sd->samples[num_samples * (iam + 1)],
|
||||
comp)
|
||||
- sd->sorting_places[s];
|
||||
else
|
||||
// Absolute end.
|
||||
|
@ -240,7 +245,8 @@ template<typename RandomAccessIterator, typename Comparator>
|
|||
seqs(sd->num_threads);
|
||||
for (int s = 0; s < sd->num_threads; s++)
|
||||
seqs[s] = std::make_pair(sd->sorting_places[s],
|
||||
sd->sorting_places[s] + sd->starts[s + 1] - sd->starts[s]);
|
||||
sd->sorting_places[s]
|
||||
+ (sd->starts[s + 1] - sd->starts[s]));
|
||||
|
||||
std::vector<SortingPlacesIterator> offsets(sd->num_threads);
|
||||
|
||||
|
@ -256,7 +262,8 @@ template<typename RandomAccessIterator, typename Comparator>
|
|||
sd->pieces[iam][seq].end = offsets[seq] - seqs[seq].first;
|
||||
else
|
||||
// very end of this sequence
|
||||
sd->pieces[iam][seq].end = sd->starts[seq + 1] - sd->starts[seq];
|
||||
sd->pieces[iam][seq].end =
|
||||
sd->starts[seq + 1] - sd->starts[seq];
|
||||
}
|
||||
|
||||
# pragma omp barrier
|
||||
|
@ -284,6 +291,7 @@ template<typename RandomAccessIterator, typename Comparator>
|
|||
// Merge to temporary storage, uninitialized creation not possible
|
||||
// since there is no multiway_merge calling the placement new
|
||||
// instead of the assignment operator.
|
||||
// XXX incorrect (de)construction
|
||||
sd->merging_places[iam] = sd->temporaries[iam] =
|
||||
static_cast<value_type*>(
|
||||
::operator new(sizeof(value_type) * length_am));
|
||||
|
@ -296,11 +304,13 @@ template<typename RandomAccessIterator, typename Comparator>
|
|||
|
||||
for (int s = 0; s < sd->num_threads; s++)
|
||||
{
|
||||
seqs[s] = std::make_pair(sd->sorting_places[s] + sd->pieces[iam][s].begin,
|
||||
sd->sorting_places[s] + sd->pieces[iam][s].end);
|
||||
seqs[s] =
|
||||
std::make_pair(sd->sorting_places[s] + sd->pieces[iam][s].begin,
|
||||
sd->sorting_places[s] + sd->pieces[iam][s].end);
|
||||
}
|
||||
|
||||
multiway_merge(seqs.begin(), seqs.end(), sd->merging_places[iam], comp, length_am, sd->stable, false, sequential_tag());
|
||||
multiway_merge(seqs.begin(), seqs.end(), sd->merging_places[iam], comp,
|
||||
length_am, sd->stable, false, sequential_tag());
|
||||
|
||||
# pragma omp barrier
|
||||
|
||||
|
@ -326,7 +336,8 @@ template<typename RandomAccessIterator, typename Comparator>
|
|||
inline void
|
||||
parallel_sort_mwms(RandomAccessIterator begin, RandomAccessIterator end,
|
||||
Comparator comp,
|
||||
typename std::iterator_traits<RandomAccessIterator>::difference_type n,
|
||||
typename std::iterator_traits<RandomAccessIterator>
|
||||
::difference_type n,
|
||||
int num_threads,
|
||||
bool stable)
|
||||
{
|
||||
|
@ -368,7 +379,8 @@ template<typename RandomAccessIterator, typename Comparator>
|
|||
if (Settings::sort_splitting == Settings::SAMPLING)
|
||||
{
|
||||
unsigned int size =
|
||||
(Settings::sort_mwms_oversampling * num_threads - 1) * num_threads;
|
||||
(Settings::sort_mwms_oversampling * num_threads - 1)
|
||||
* num_threads;
|
||||
sd.samples = static_cast<value_type*>(
|
||||
::operator new(size * sizeof(value_type)));
|
||||
}
|
||||
|
|
|
@ -73,8 +73,8 @@ template<
|
|||
{
|
||||
value = bin_op(value, *begin);
|
||||
*result = value;
|
||||
result++;
|
||||
begin++;
|
||||
++result;
|
||||
++begin;
|
||||
}
|
||||
return result;
|
||||
}
|
||||
|
@ -103,6 +103,9 @@ template<
|
|||
typedef typename traits_type::value_type value_type;
|
||||
typedef typename traits_type::difference_type difference_type;
|
||||
|
||||
if (begin == end)
|
||||
return result;
|
||||
|
||||
thread_index_t num_threads =
|
||||
std::min<difference_type>(get_max_threads(), n - 1);
|
||||
|
||||
|
@ -133,7 +136,7 @@ template<
|
|||
((double)num_threads + Settings::partial_sum_dilatation)),
|
||||
borderstart = n - num_threads * chunk_length;
|
||||
borders[0] = 0;
|
||||
for (int i = 1; i < (num_threads + 1); i++)
|
||||
for (int i = 1; i < (num_threads + 1); ++i)
|
||||
{
|
||||
borders[i] = borderstart;
|
||||
borderstart += chunk_length;
|
||||
|
@ -146,20 +149,21 @@ template<
|
|||
OutputIterator target_end;
|
||||
} //single
|
||||
|
||||
int iam = omp_get_thread_num();
|
||||
thread_index_t iam = omp_get_thread_num();
|
||||
if (iam == 0)
|
||||
{
|
||||
*result = *begin;
|
||||
parallel_partial_sum_basecase(begin + 1, begin + borders[1],
|
||||
result + 1, bin_op, *begin);
|
||||
sums[0] = *(result + borders[1] - 1);
|
||||
new(&(sums[iam])) value_type(*(result + borders[1] - 1));
|
||||
}
|
||||
else
|
||||
{
|
||||
sums[iam] = std::accumulate(begin + borders[iam] + 1,
|
||||
begin + borders[iam + 1],
|
||||
*(begin + borders[iam]),
|
||||
bin_op, __gnu_parallel::sequential_tag());
|
||||
new(&(sums[iam])) value_type(
|
||||
std::accumulate(begin + borders[iam] + 1,
|
||||
begin + borders[iam + 1],
|
||||
*(begin + borders[iam]),
|
||||
bin_op, __gnu_parallel::sequential_tag()));
|
||||
}
|
||||
|
||||
# pragma omp barrier
|
||||
|
|
|
@ -74,13 +74,13 @@ namespace __gnu_parallel
|
|||
|
||||
// Allocate uninitialized, to avoid default constructor.
|
||||
value_type* samples = static_cast<value_type*>(
|
||||
operator new(num_samples * sizeof(value_type)));
|
||||
::operator new(num_samples * sizeof(value_type)));
|
||||
|
||||
for (difference_type s = 0; s < num_samples; s++)
|
||||
for (difference_type s = 0; s < num_samples; ++s)
|
||||
{
|
||||
const unsigned long long index = static_cast<unsigned long long>(s)
|
||||
* n / num_samples;
|
||||
new(samples + s) value_type(begin[index]);
|
||||
new(&(samples[s])) value_type(begin[index]);
|
||||
}
|
||||
|
||||
__gnu_sequential::sort(samples, samples + num_samples, comp);
|
||||
|
@ -91,6 +91,8 @@ namespace __gnu_parallel
|
|||
pred(comp, pivot);
|
||||
difference_type split = parallel_partition(begin, end, pred, num_threads);
|
||||
|
||||
delete[] samples;
|
||||
|
||||
return split;
|
||||
}
|
||||
|
||||
|
|
|
@ -144,23 +144,23 @@ template<typename RandomAccessIterator, typename RandomNumberGenerator>
|
|||
value_type** temporaries = new value_type*[d->num_threads];
|
||||
|
||||
// Compute oracles and count appearances.
|
||||
for (bin_index b = 0; b < sd->num_bins + 1; b++)
|
||||
for (bin_index b = 0; b < sd->num_bins + 1; ++b)
|
||||
dist[b] = 0;
|
||||
int num_bits = sd->num_bits;
|
||||
|
||||
random_number rng(d->seed);
|
||||
|
||||
// First main loop.
|
||||
for (difference_type i = 0; i < length; i++)
|
||||
for (difference_type i = 0; i < length; ++i)
|
||||
{
|
||||
bin_index oracle = random_number_pow2(num_bits, rng);
|
||||
oracles[i] = oracle;
|
||||
|
||||
// To allow prefix (partial) sum.
|
||||
dist[oracle + 1]++;
|
||||
++(dist[oracle + 1]);
|
||||
}
|
||||
|
||||
for (bin_index b = 0; b < sd->num_bins + 1; b++)
|
||||
for (bin_index b = 0; b < sd->num_bins + 1; ++b)
|
||||
sd->dist[b][iam + 1] = dist[b];
|
||||
|
||||
# pragma omp barrier
|
||||
|
@ -169,7 +169,7 @@ template<typename RandomAccessIterator, typename RandomNumberGenerator>
|
|||
{
|
||||
// Sum up bins, sd->dist[s + 1][d->num_threads] now contains the
|
||||
// total number of items in bin s
|
||||
for (bin_index s = 0; s < sd->num_bins; s++)
|
||||
for (bin_index s = 0; s < sd->num_bins; ++s)
|
||||
__gnu_sequential::partial_sum(sd->dist[s + 1],
|
||||
sd->dist[s + 1] + d->num_threads + 1,
|
||||
sd->dist[s + 1]);
|
||||
|
@ -178,14 +178,14 @@ template<typename RandomAccessIterator, typename RandomNumberGenerator>
|
|||
# pragma omp barrier
|
||||
|
||||
sequence_index_t offset = 0, global_offset = 0;
|
||||
for (bin_index s = 0; s < d->bins_begin; s++)
|
||||
for (bin_index s = 0; s < d->bins_begin; ++s)
|
||||
global_offset += sd->dist[s + 1][d->num_threads];
|
||||
|
||||
# pragma omp barrier
|
||||
|
||||
for (bin_index s = d->bins_begin; s < d->bins_end; s++)
|
||||
for (bin_index s = d->bins_begin; s < d->bins_end; ++s)
|
||||
{
|
||||
for (int t = 0; t < d->num_threads + 1; t++)
|
||||
for (int t = 0; t < d->num_threads + 1; ++t)
|
||||
sd->dist[s + 1][t] += offset;
|
||||
offset = sd->dist[s + 1][d->num_threads];
|
||||
}
|
||||
|
@ -196,24 +196,25 @@ template<typename RandomAccessIterator, typename RandomNumberGenerator>
|
|||
# pragma omp barrier
|
||||
|
||||
// Draw local copies to avoid false sharing.
|
||||
for (bin_index b = 0; b < sd->num_bins + 1; b++)
|
||||
for (bin_index b = 0; b < sd->num_bins + 1; ++b)
|
||||
dist[b] = sd->dist[b][iam];
|
||||
for (bin_index b = 0; b < sd->num_bins; b++)
|
||||
for (bin_index b = 0; b < sd->num_bins; ++b)
|
||||
bin_proc[b] = sd->bin_proc[b];
|
||||
for (thread_index_t t = 0; t < d->num_threads; t++)
|
||||
for (thread_index_t t = 0; t < d->num_threads; ++t)
|
||||
temporaries[t] = sd->temporaries[t];
|
||||
|
||||
RandomAccessIterator source = sd->source;
|
||||
difference_type start = sd->starts[iam];
|
||||
|
||||
// Distribute according to oracles, second main loop.
|
||||
for (difference_type i = 0; i < length; i++)
|
||||
for (difference_type i = 0; i < length; ++i)
|
||||
{
|
||||
bin_index target_bin = oracles[i];
|
||||
thread_index_t target_p = bin_proc[target_bin];
|
||||
|
||||
// Last column [d->num_threads] stays unchanged.
|
||||
temporaries[target_p][dist[target_bin + 1]++] = *(source + i + start);
|
||||
new(&(temporaries[target_p][dist[target_bin + 1]++])) value_type(
|
||||
*(source + i + start));
|
||||
}
|
||||
|
||||
delete[] oracles;
|
||||
|
@ -224,7 +225,7 @@ template<typename RandomAccessIterator, typename RandomNumberGenerator>
|
|||
# pragma omp barrier
|
||||
|
||||
// Shuffle bins internally.
|
||||
for (bin_index b = d->bins_begin; b < d->bins_end; b++)
|
||||
for (bin_index b = d->bins_begin; b < d->bins_end; ++b)
|
||||
{
|
||||
value_type* begin =
|
||||
sd->temporaries[iam] +
|
||||
|
@ -338,9 +339,9 @@ template<typename RandomAccessIterator, typename RandomNumberGenerator>
|
|||
sd.temporaries = new value_type*[num_threads];
|
||||
sd.dist = new difference_type*[num_bins + 1];
|
||||
sd.bin_proc = new thread_index_t[num_bins];
|
||||
for (bin_index b = 0; b < num_bins + 1; b++)
|
||||
for (bin_index b = 0; b < num_bins + 1; ++b)
|
||||
sd.dist[b] = new difference_type[num_threads + 1];
|
||||
for (bin_index b = 0; b < (num_bins + 1); b++)
|
||||
for (bin_index b = 0; b < (num_bins + 1); ++b)
|
||||
{
|
||||
sd.dist[0][0] = 0;
|
||||
sd.dist[b][0] = 0;
|
||||
|
@ -354,7 +355,7 @@ template<typename RandomAccessIterator, typename RandomNumberGenerator>
|
|||
split = n % num_threads, start = 0;
|
||||
difference_type bin_chunk_length = num_bins / num_threads,
|
||||
bin_split = num_bins % num_threads;
|
||||
for (thread_index_t i = 0; i < num_threads; i++)
|
||||
for (thread_index_t i = 0; i < num_threads; ++i)
|
||||
{
|
||||
starts[i] = start;
|
||||
start += (i < split) ? (chunk_length + 1) : chunk_length;
|
||||
|
@ -364,7 +365,7 @@ template<typename RandomAccessIterator, typename RandomNumberGenerator>
|
|||
bin_cursor += (i < bin_split) ?
|
||||
(bin_chunk_length + 1) : bin_chunk_length;
|
||||
pus[i].bins_end = bin_cursor;
|
||||
for (; j < bin_cursor; j++)
|
||||
for (; j < bin_cursor; ++j)
|
||||
sd.bin_proc[j] = i;
|
||||
pus[i].num_threads = num_threads;
|
||||
pus[i].seed = rng(std::numeric_limits<uint32>::max());
|
||||
|
@ -378,7 +379,7 @@ template<typename RandomAccessIterator, typename RandomNumberGenerator>
|
|||
|
||||
delete[] starts;
|
||||
delete[] sd.bin_proc;
|
||||
for (int s = 0; s < (num_bins + 1); s++)
|
||||
for (int s = 0; s < (num_bins + 1); ++s)
|
||||
delete[] sd.dist[s];
|
||||
delete[] sd.dist;
|
||||
delete[] sd.temporaries;
|
||||
|
@ -455,31 +456,31 @@ template<typename RandomAccessIterator, typename RandomNumberGenerator>
|
|||
difference_type* dist0 = new difference_type[num_bins + 1],
|
||||
* dist1 = new difference_type[num_bins + 1];
|
||||
|
||||
for (int b = 0; b < num_bins + 1; b++)
|
||||
for (int b = 0; b < num_bins + 1; ++b)
|
||||
dist0[b] = 0;
|
||||
|
||||
random_number bitrng(rng(0xFFFFFFFF));
|
||||
|
||||
for (difference_type i = 0; i < n; i++)
|
||||
for (difference_type i = 0; i < n; ++i)
|
||||
{
|
||||
bin_index oracle = random_number_pow2(num_bits, bitrng);
|
||||
oracles[i] = oracle;
|
||||
|
||||
// To allow prefix (partial) sum.
|
||||
dist0[oracle + 1]++;
|
||||
++(dist0[oracle + 1]);
|
||||
}
|
||||
|
||||
// Sum up bins.
|
||||
__gnu_sequential::partial_sum(dist0, dist0 + num_bins + 1, dist0);
|
||||
|
||||
for (int b = 0; b < num_bins + 1; b++)
|
||||
for (int b = 0; b < num_bins + 1; ++b)
|
||||
dist1[b] = dist0[b];
|
||||
|
||||
// Distribute according to oracles.
|
||||
for (difference_type i = 0; i < n; i++)
|
||||
target[(dist0[oracles[i]])++] = *(begin + i);
|
||||
for (difference_type i = 0; i < n; ++i)
|
||||
new(&(target[(dist0[oracles[i]])++])) value_type(*(begin + i));
|
||||
|
||||
for (int b = 0; b < num_bins; b++)
|
||||
for (int b = 0; b < num_bins; ++b)
|
||||
{
|
||||
sequential_random_shuffle(target + dist1[b],
|
||||
target + dist1[b + 1],
|
||||
|
|
|
@ -100,16 +100,14 @@ template<
|
|||
end = borders[iam + 1];
|
||||
|
||||
i++;
|
||||
new (static_cast<void *>(&*out)) value_type(*first);
|
||||
out++;
|
||||
*out++ = *first;
|
||||
|
||||
for (InputIterator iter = first + begin; iter < first + end; ++iter)
|
||||
{
|
||||
if (!binary_pred(*iter, *(iter-1)))
|
||||
{
|
||||
i++;
|
||||
new (static_cast<void *>(&*out)) value_type(*iter);
|
||||
out++;
|
||||
*out++ = *iter;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -153,8 +151,7 @@ template<
|
|||
if (iter == first || !binary_pred(*iter, *(iter-1)))
|
||||
{
|
||||
i++;
|
||||
new (static_cast<void *>(&*iter_out)) value_type(*iter);
|
||||
iter_out++;
|
||||
*iter_out++ = *iter;
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -170,8 +167,7 @@ template<
|
|||
{
|
||||
if (!binary_pred(*iter, *(iter-1)))
|
||||
{
|
||||
new (static_cast<void *> (&*iter_out)) value_type(*iter);
|
||||
iter_out++;
|
||||
*iter_out++ = *iter;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
Loading…
Reference in New Issue