1904bef10a
* docs/html/parallel_mode.html: Added reference to MCSTL. More documentation on compile-time settings and tuning. * include/parallel/multiway_merge.h: Added reference to paper. * include/parallel/multiseq_selection.h: Added reference to paper. * include/parallel/workstealing.h: Added reference to paper. * include/parallel/balanced_quicksort.h: Added reference to paper. * include/parallel/tree.h: Added reference to paper. From-SVN: r129129
1648 lines
53 KiB
C++
1648 lines
53 KiB
C++
// -*- C++ -*-
|
|
|
|
// Copyright (C) 2007 Free Software Foundation, Inc.
|
|
//
|
|
// This file is part of the GNU ISO C++ Library. This library is free
|
|
// software; you can redistribute it and/or modify it under the terms
|
|
// of the GNU General Public License as published by the Free Software
|
|
// Foundation; either version 2, or (at your option) any later
|
|
// version.
|
|
|
|
// This library is distributed in the hope that it will be useful, but
|
|
// WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
|
// General Public License for more details.
|
|
|
|
// You should have received a copy of the GNU General Public License
|
|
// along with this library; see the file COPYING. If not, write to
|
|
// the Free Software Foundation, 59 Temple Place - Suite 330, Boston,
|
|
// MA 02111-1307, USA.
|
|
|
|
// As a special exception, you may use this file as part of a free
|
|
// software library without restriction. Specifically, if other files
|
|
// instantiate templates or use macros or inline functions from this
|
|
// file, or you compile this file and link it with other files to
|
|
// produce an executable, this file does not by itself cause the
|
|
// resulting executable to be covered by the GNU General Public
|
|
// License. This exception does not however invalidate any other
|
|
// reasons why the executable file might be covered by the GNU General
|
|
// Public License.
|
|
|
|
/** @file parallel/multiway_merge.h
|
|
* @brief Implementation of sequential and parallel multiway merge.
|
|
*
|
|
* Explanations on the high-speed merging routines in the appendix of
|
|
*
|
|
* P. Sanders.
|
|
* Fast priority queues for cached memory.
|
|
* ACM Journal of Experimental Algorithmics, 5, 2000.
|
|
*
|
|
* This file is a GNU parallel extension to the Standard C++ Library.
|
|
*/
|
|
|
|
// Written by Johannes Singler.
|
|
|
|
#ifndef _GLIBCXX_PARALLEL_MULTIWAY_MERGE_H
|
|
#define _GLIBCXX_PARALLEL_MULTIWAY_MERGE_H
|
|
|
|
#include <vector>
|
|
|
|
#include <bits/stl_algo.h>
|
|
#include <parallel/features.h>
|
|
#include <parallel/parallel.h>
|
|
#include <parallel/merge.h>
|
|
#include <parallel/losertree.h>
|
|
#include <parallel/timing.h>
|
|
#if _GLIBCXX_ASSERTIONS
|
|
#include <parallel/checkers.h>
|
|
#endif
|
|
|
|
/** @brief Length of a sequence described by a pair of iterators. */
|
|
#define LENGTH(s) ((s).second - (s).first)
|
|
|
|
// XXX need iterator typedefs
|
|
namespace __gnu_parallel
|
|
{
|
|
template<typename RandomAccessIterator, typename Comparator>
|
|
class guarded_iterator;
|
|
|
|
template<typename RandomAccessIterator, typename Comparator>
|
|
inline bool
|
|
operator<(guarded_iterator<RandomAccessIterator, Comparator>& bi1,
|
|
guarded_iterator<RandomAccessIterator, Comparator>& bi2);
|
|
|
|
template<typename RandomAccessIterator, typename Comparator>
|
|
inline bool
|
|
operator<=(guarded_iterator<RandomAccessIterator, Comparator>& bi1,
|
|
guarded_iterator<RandomAccessIterator, Comparator>& bi2);
|
|
|
|
/** @brief Iterator wrapper supporting an implicit supremum at the end
|
|
of the sequence, dominating all comparisons.
|
|
* Deriving from RandomAccessIterator is not possible since
|
|
* RandomAccessIterator need not be a class.
|
|
*/
|
|
template<typename RandomAccessIterator, typename Comparator>
|
|
class guarded_iterator
|
|
{
|
|
private:
|
|
/** @brief Current iterator position. */
|
|
RandomAccessIterator current;
|
|
|
|
/** @brief End iterator of the sequence. */
|
|
RandomAccessIterator end;
|
|
|
|
/** @brief Comparator. */
|
|
Comparator& comp;
|
|
|
|
public:
|
|
/** @brief Constructor. Sets iterator to beginning of sequence.
|
|
* @param begin Begin iterator of sequence.
|
|
* @param end End iterator of sequence.
|
|
* @param comp Comparator provided for associated overloaded
|
|
* compare operators. */
|
|
inline guarded_iterator(RandomAccessIterator begin,
|
|
RandomAccessIterator end, Comparator& comp)
|
|
: current(begin), end(end), comp(comp)
|
|
{ }
|
|
|
|
/** @brief Pre-increment operator.
|
|
* @return This. */
|
|
inline guarded_iterator<RandomAccessIterator, Comparator>&
|
|
operator++()
|
|
{
|
|
++current;
|
|
return *this;
|
|
}
|
|
|
|
/** @brief Dereference operator.
|
|
* @return Referenced element. */
|
|
inline typename std::iterator_traits<RandomAccessIterator>::value_type
|
|
operator*()
|
|
{ return *current; }
|
|
|
|
/** @brief Convert to wrapped iterator.
|
|
* @return Wrapped iterator. */
|
|
inline operator RandomAccessIterator()
|
|
{ return current; }
|
|
|
|
friend bool
|
|
operator< <RandomAccessIterator, Comparator>(guarded_iterator<RandomAccessIterator, Comparator>& bi1, guarded_iterator<RandomAccessIterator, Comparator>& bi2);
|
|
|
|
friend bool
|
|
operator<= <RandomAccessIterator, Comparator>(guarded_iterator<RandomAccessIterator, Comparator>& bi1, guarded_iterator<RandomAccessIterator, Comparator>& bi2);
|
|
};
|
|
|
|
/** @brief Compare two elements referenced by guarded iterators.
|
|
* @param bi1 First iterator.
|
|
* @param bi2 Second iterator.
|
|
* @return @c True if less. */
|
|
template<typename RandomAccessIterator, typename Comparator>
|
|
inline bool
|
|
operator<(guarded_iterator<RandomAccessIterator, Comparator>& bi1,
|
|
guarded_iterator<RandomAccessIterator, Comparator>& bi2)
|
|
{
|
|
if (bi1.current == bi1.end) //bi1 is sup
|
|
return bi2.current == bi2.end; //bi2 is not sup
|
|
if (bi2.current == bi2.end) //bi2 is sup
|
|
return true;
|
|
return (bi1.comp)(*bi1, *bi2); //normal compare
|
|
}
|
|
|
|
/** @brief Compare two elements referenced by guarded iterators.
|
|
* @param bi1 First iterator.
|
|
* @param bi2 Second iterator.
|
|
* @return @c True if less equal. */
|
|
template<typename RandomAccessIterator, typename Comparator>
|
|
inline bool
|
|
operator<=(guarded_iterator<RandomAccessIterator, Comparator>& bi1,
|
|
guarded_iterator<RandomAccessIterator, Comparator>& bi2)
|
|
{
|
|
if (bi2.current == bi2.end) //bi1 is sup
|
|
return bi1.current != bi1.end; //bi2 is not sup
|
|
if (bi1.current == bi1.end) //bi2 is sup
|
|
return false;
|
|
return !(bi1.comp)(*bi2, *bi1); //normal compare
|
|
}
|
|
|
|
template<typename RandomAccessIterator, typename Comparator>
|
|
class unguarded_iterator;
|
|
|
|
template<typename RandomAccessIterator, typename Comparator>
|
|
inline bool
|
|
operator<(unguarded_iterator<RandomAccessIterator, Comparator>& bi1,
|
|
unguarded_iterator<RandomAccessIterator, Comparator>& bi2);
|
|
|
|
template<typename RandomAccessIterator, typename Comparator>
|
|
inline bool
|
|
operator<=(unguarded_iterator<RandomAccessIterator, Comparator>& bi1,
|
|
unguarded_iterator<RandomAccessIterator, Comparator>& bi2);
|
|
|
|
template<typename RandomAccessIterator, typename Comparator>
|
|
class unguarded_iterator
|
|
{
|
|
private:
|
|
/** @brief Current iterator position. */
|
|
RandomAccessIterator& current;
|
|
/** @brief Comparator. */
|
|
mutable Comparator& comp;
|
|
|
|
public:
|
|
/** @brief Constructor. Sets iterator to beginning of sequence.
|
|
* @param begin Begin iterator of sequence.
|
|
* @param end Unused, only for compatibility.
|
|
* @param comp Unused, only for compatibility. */
|
|
inline unguarded_iterator(RandomAccessIterator begin,
|
|
RandomAccessIterator end, Comparator& comp)
|
|
: current(begin), comp(comp)
|
|
{ }
|
|
|
|
/** @brief Pre-increment operator.
|
|
* @return This. */
|
|
inline unguarded_iterator<RandomAccessIterator, Comparator>&
|
|
operator++()
|
|
{
|
|
current++;
|
|
return *this;
|
|
}
|
|
|
|
/** @brief Dereference operator.
|
|
* @return Referenced element. */
|
|
inline typename std::iterator_traits<RandomAccessIterator>::value_type
|
|
operator*()
|
|
{ return *current; }
|
|
|
|
/** @brief Convert to wrapped iterator.
|
|
* @return Wrapped iterator. */
|
|
inline
|
|
operator RandomAccessIterator()
|
|
{ return current; }
|
|
|
|
friend bool
|
|
operator< <RandomAccessIterator, Comparator>(unguarded_iterator<RandomAccessIterator, Comparator>& bi1, unguarded_iterator<RandomAccessIterator, Comparator>& bi2);
|
|
|
|
friend bool
|
|
operator<= <RandomAccessIterator, Comparator>(unguarded_iterator<RandomAccessIterator, Comparator>& bi1, unguarded_iterator<RandomAccessIterator, Comparator>& bi2);
|
|
};
|
|
|
|
/** @brief Compare two elements referenced by unguarded iterators.
|
|
* @param bi1 First iterator.
|
|
* @param bi2 Second iterator.
|
|
* @return @c True if less. */
|
|
template<typename RandomAccessIterator, typename Comparator>
|
|
inline bool
|
|
operator<(unguarded_iterator<RandomAccessIterator, Comparator>& bi1,
|
|
unguarded_iterator<RandomAccessIterator, Comparator>& bi2)
|
|
{
|
|
// Normal compare.
|
|
return (bi1.comp)(*bi1, *bi2);
|
|
}
|
|
|
|
/** @brief Compare two elements referenced by unguarded iterators.
|
|
* @param bi1 First iterator.
|
|
* @param bi2 Second iterator.
|
|
* @return @c True if less equal. */
|
|
template<typename RandomAccessIterator, typename Comparator>
|
|
inline bool
|
|
operator<=(unguarded_iterator<RandomAccessIterator, Comparator>& bi1,
|
|
unguarded_iterator<RandomAccessIterator, Comparator>& bi2)
|
|
{
|
|
// Normal compare.
|
|
return !(bi1.comp)(*bi2, *bi1);
|
|
}
|
|
|
|
/** Prepare a set of sequences to be merged without a (end) guard
|
|
* @param seqs_begin
|
|
* @param seqs_end
|
|
* @param comp
|
|
* @param min_sequence
|
|
* @param stable
|
|
* @pre (seqs_end - seqs_begin > 0) */
|
|
template<typename RandomAccessIteratorIterator, typename Comparator>
|
|
typename std::iterator_traits<typename std::iterator_traits<RandomAccessIteratorIterator>::value_type::first_type>::difference_type
|
|
prepare_unguarded(RandomAccessIteratorIterator seqs_begin,
|
|
RandomAccessIteratorIterator seqs_end, Comparator comp,
|
|
int& min_sequence, bool stable)
|
|
{
|
|
_GLIBCXX_CALL(seqs_end - seqs_begin)
|
|
|
|
typedef typename std::iterator_traits<RandomAccessIteratorIterator>::value_type::first_type
|
|
RandomAccessIterator1;
|
|
typedef typename std::iterator_traits<RandomAccessIterator1>::value_type
|
|
value_type;
|
|
typedef typename std::iterator_traits<RandomAccessIterator1>::difference_type
|
|
difference_type;
|
|
|
|
if ((*seqs_begin).first == (*seqs_begin).second)
|
|
{
|
|
// Empty sequence found, it's the first one.
|
|
min_sequence = 0;
|
|
return -1;
|
|
}
|
|
|
|
// Last element in sequence.
|
|
value_type min = *((*seqs_begin).second - 1);
|
|
min_sequence = 0;
|
|
for (RandomAccessIteratorIterator s = seqs_begin + 1; s != seqs_end; s++)
|
|
{
|
|
if ((*s).first == (*s).second)
|
|
{
|
|
// Empty sequence found.
|
|
min_sequence = static_cast<int>(s - seqs_begin);
|
|
return -1;
|
|
}
|
|
|
|
// Last element in sequence.
|
|
const value_type& v = *((*s).second - 1);
|
|
if (comp(v, min)) //strictly smaller
|
|
{
|
|
min = v;
|
|
min_sequence = static_cast<int>(s - seqs_begin);
|
|
}
|
|
}
|
|
|
|
difference_type overhang_size = 0;
|
|
|
|
int s = 0;
|
|
for (s = 0; s <= min_sequence; s++)
|
|
{
|
|
RandomAccessIterator1 split;
|
|
if (stable)
|
|
split = std::upper_bound(seqs_begin[s].first, seqs_begin[s].second,
|
|
min, comp);
|
|
else
|
|
split = std::lower_bound(seqs_begin[s].first, seqs_begin[s].second,
|
|
min, comp);
|
|
|
|
overhang_size += seqs_begin[s].second - split;
|
|
}
|
|
|
|
for (; s < (seqs_end - seqs_begin); s++)
|
|
{
|
|
RandomAccessIterator1 split = std::lower_bound(seqs_begin[s].first, seqs_begin[s].second, min, comp);
|
|
overhang_size += seqs_begin[s].second - split;
|
|
}
|
|
|
|
// So many elements will be left over afterwards.
|
|
return overhang_size;
|
|
}
|
|
|
|
/** Prepare a set of sequences to be merged with a (end) guard (sentinel)
|
|
* @param seqs_begin
|
|
* @param seqs_end
|
|
* @param comp */
|
|
template<typename RandomAccessIteratorIterator, typename Comparator>
|
|
typename std::iterator_traits<typename std::iterator_traits<RandomAccessIteratorIterator>::value_type::first_type>::difference_type
|
|
prepare_unguarded_sentinel(RandomAccessIteratorIterator seqs_begin,
|
|
RandomAccessIteratorIterator seqs_end,
|
|
Comparator comp)
|
|
{
|
|
_GLIBCXX_CALL(seqs_end - seqs_begin)
|
|
|
|
typedef typename std::iterator_traits<RandomAccessIteratorIterator>::value_type::first_type
|
|
RandomAccessIterator1;
|
|
typedef typename std::iterator_traits<RandomAccessIterator1>::value_type
|
|
value_type;
|
|
typedef typename std::iterator_traits<RandomAccessIterator1>::difference_type
|
|
difference_type;
|
|
|
|
// Last element in sequence.
|
|
value_type max;
|
|
bool max_found = false;
|
|
for (RandomAccessIteratorIterator s = seqs_begin; s != seqs_end; s++)
|
|
{
|
|
if ((*s).first == (*s).second)
|
|
continue;
|
|
|
|
// Last element in sequence.
|
|
value_type& v = *((*s).second - 1);
|
|
|
|
// Strictly greater.
|
|
if (!max_found || comp(max, v))
|
|
max = v;
|
|
max_found = true;
|
|
}
|
|
|
|
difference_type overhang_size = 0;
|
|
|
|
for (RandomAccessIteratorIterator s = seqs_begin; s != seqs_end; s++)
|
|
{
|
|
RandomAccessIterator1 split = std::lower_bound((*s).first, (*s).second, max, comp);
|
|
overhang_size += (*s).second - split;
|
|
|
|
// Set sentinel.
|
|
*((*s).second) = max;
|
|
}
|
|
|
|
// So many elements will be left over afterwards.
|
|
return overhang_size;
|
|
}
|
|
|
|
/** @brief Highly efficient 3-way merging procedure.
|
|
* @param seqs_begin Begin iterator of iterator pair input sequence.
|
|
* @param seqs_end End iterator of iterator pair input sequence.
|
|
* @param target Begin iterator out output sequence.
|
|
* @param comp Comparator.
|
|
* @param length Maximum length to merge.
|
|
* @param stable Unused, stable anyway.
|
|
* @return End iterator of output sequence. */
|
|
template<template<typename RAI, typename C> class iterator, typename RandomAccessIteratorIterator, typename RandomAccessIterator3, typename _DifferenceTp, typename Comparator>
|
|
RandomAccessIterator3
|
|
multiway_merge_3_variant(RandomAccessIteratorIterator seqs_begin, RandomAccessIteratorIterator seqs_end, RandomAccessIterator3 target, Comparator comp, _DifferenceTp length, bool stable)
|
|
{
|
|
_GLIBCXX_CALL(length);
|
|
|
|
typedef _DifferenceTp difference_type;
|
|
|
|
typedef typename std::iterator_traits<RandomAccessIteratorIterator>::value_type::first_type
|
|
RandomAccessIterator1;
|
|
typedef typename std::iterator_traits<RandomAccessIterator1>::value_type
|
|
value_type;
|
|
|
|
if (length == 0)
|
|
return target;
|
|
|
|
iterator<RandomAccessIterator1, Comparator>
|
|
seq0(seqs_begin[0].first, seqs_begin[0].second, comp),
|
|
seq1(seqs_begin[1].first, seqs_begin[1].second, comp),
|
|
seq2(seqs_begin[2].first, seqs_begin[2].second, comp);
|
|
|
|
if (seq0 <= seq1)
|
|
{
|
|
if (seq1 <= seq2)
|
|
goto s012;
|
|
else
|
|
if (seq2 < seq0)
|
|
goto s201;
|
|
else
|
|
goto s021;
|
|
}
|
|
else
|
|
{
|
|
if (seq1 <= seq2)
|
|
{
|
|
if (seq0 <= seq2)
|
|
goto s102;
|
|
else
|
|
goto s120;
|
|
}
|
|
else
|
|
goto s210;
|
|
}
|
|
|
|
#define Merge3Case(a,b,c,c0,c1) \
|
|
s ## a ## b ## c : \
|
|
*target = *seq ## a; \
|
|
++target; \
|
|
length--; \
|
|
++seq ## a; \
|
|
if (length == 0) goto finish; \
|
|
if (seq ## a c0 seq ## b) goto s ## a ## b ## c; \
|
|
if (seq ## a c1 seq ## c) goto s ## b ## a ## c; \
|
|
goto s ## b ## c ## a;
|
|
|
|
Merge3Case(0, 1, 2, <=, <=);
|
|
Merge3Case(1, 2, 0, <=, < );
|
|
Merge3Case(2, 0, 1, < , < );
|
|
Merge3Case(1, 0, 2, < , <=);
|
|
Merge3Case(0, 2, 1, <=, <=);
|
|
Merge3Case(2, 1, 0, < , < );
|
|
|
|
#undef Merge3Case
|
|
|
|
finish:
|
|
;
|
|
|
|
seqs_begin[0].first = seq0;
|
|
seqs_begin[1].first = seq1;
|
|
seqs_begin[2].first = seq2;
|
|
|
|
return target;
|
|
}
|
|
|
|
template<typename RandomAccessIteratorIterator, typename RandomAccessIterator3, typename _DifferenceTp, typename Comparator>
|
|
RandomAccessIterator3
|
|
multiway_merge_3_combined(RandomAccessIteratorIterator seqs_begin, RandomAccessIteratorIterator seqs_end, RandomAccessIterator3 target, Comparator comp, _DifferenceTp length, bool stable)
|
|
{
|
|
_GLIBCXX_CALL(length);
|
|
|
|
typedef _DifferenceTp difference_type;
|
|
typedef typename std::iterator_traits<RandomAccessIteratorIterator>::value_type::first_type
|
|
RandomAccessIterator1;
|
|
typedef typename std::iterator_traits<RandomAccessIterator1>::value_type
|
|
value_type;
|
|
|
|
int min_seq;
|
|
RandomAccessIterator3 target_end;
|
|
|
|
// Stable anyway.
|
|
difference_type overhang = prepare_unguarded(seqs_begin, seqs_end, comp, min_seq, true);
|
|
|
|
difference_type total_length = 0;
|
|
for (RandomAccessIteratorIterator s = seqs_begin; s != seqs_end; ++s)
|
|
total_length += LENGTH(*s);
|
|
|
|
if (overhang != -1)
|
|
{
|
|
difference_type unguarded_length = std::min(length, total_length - overhang);
|
|
target_end = multiway_merge_3_variant<unguarded_iterator>
|
|
(seqs_begin, seqs_end, target, comp, unguarded_length, stable);
|
|
overhang = length - unguarded_length;
|
|
}
|
|
else
|
|
{
|
|
// Empty sequence found.
|
|
overhang = length;
|
|
target_end = target;
|
|
}
|
|
|
|
#if _GLIBCXX_ASSERTIONS
|
|
_GLIBCXX_PARALLEL_ASSERT(target_end == target + length - overhang);
|
|
_GLIBCXX_PARALLEL_ASSERT(is_sorted(target, target_end, comp));
|
|
#endif
|
|
|
|
switch (min_seq)
|
|
{
|
|
case 0:
|
|
// Iterators will be advanced accordingly.
|
|
target_end = merge_advance(seqs_begin[1].first, seqs_begin[1].second,
|
|
seqs_begin[2].first, seqs_begin[2].second,
|
|
target_end, overhang, comp);
|
|
break;
|
|
case 1:
|
|
target_end = merge_advance(seqs_begin[0].first, seqs_begin[0].second,
|
|
seqs_begin[2].first, seqs_begin[2].second,
|
|
target_end, overhang, comp);
|
|
break;
|
|
case 2:
|
|
target_end = merge_advance(seqs_begin[0].first, seqs_begin[0].second,
|
|
seqs_begin[1].first, seqs_begin[1].second,
|
|
target_end, overhang, comp);
|
|
break;
|
|
default:
|
|
_GLIBCXX_PARALLEL_ASSERT(false);
|
|
}
|
|
|
|
#if _GLIBCXX_ASSERTIONS
|
|
_GLIBCXX_PARALLEL_ASSERT(target_end == target + length);
|
|
_GLIBCXX_PARALLEL_ASSERT(is_sorted(target, target_end, comp));
|
|
#endif
|
|
|
|
return target_end;
|
|
}
|
|
|
|
/** @brief Highly efficient 4-way merging procedure.
|
|
* @param seqs_begin Begin iterator of iterator pair input sequence.
|
|
* @param seqs_end End iterator of iterator pair input sequence.
|
|
* @param target Begin iterator out output sequence.
|
|
* @param comp Comparator.
|
|
* @param length Maximum length to merge.
|
|
* @param stable Unused, stable anyway.
|
|
* @return End iterator of output sequence. */
|
|
template<template<typename RAI, typename C> class iterator, typename RandomAccessIteratorIterator, typename RandomAccessIterator3, typename _DifferenceTp, typename Comparator>
|
|
RandomAccessIterator3
|
|
multiway_merge_4_variant(RandomAccessIteratorIterator seqs_begin, RandomAccessIteratorIterator seqs_end, RandomAccessIterator3 target, Comparator comp, _DifferenceTp length, bool stable)
|
|
{
|
|
_GLIBCXX_CALL(length);
|
|
typedef _DifferenceTp difference_type;
|
|
|
|
typedef typename std::iterator_traits<RandomAccessIteratorIterator>::value_type::first_type
|
|
RandomAccessIterator1;
|
|
typedef typename std::iterator_traits<RandomAccessIterator1>::value_type
|
|
value_type;
|
|
|
|
iterator<RandomAccessIterator1, Comparator>
|
|
seq0(seqs_begin[0].first, seqs_begin[0].second, comp),
|
|
seq1(seqs_begin[1].first, seqs_begin[1].second, comp),
|
|
seq2(seqs_begin[2].first, seqs_begin[2].second, comp),
|
|
seq3(seqs_begin[3].first, seqs_begin[3].second, comp);
|
|
|
|
#define Decision(a,b,c,d) { \
|
|
if (seq ## d < seq ## a) goto s ## d ## a ## b ## c; \
|
|
if (seq ## d < seq ## b) goto s ## a ## d ## b ## c; \
|
|
if (seq ## d < seq ## c) goto s ## a ## b ## d ## c; \
|
|
goto s ## a ## b ## c ## d; }
|
|
|
|
if (seq0 <= seq1)
|
|
{
|
|
if (seq1 <= seq2)
|
|
Decision(0,1,2,3)
|
|
else
|
|
if (seq2 < seq0)
|
|
Decision(2,0,1,3)
|
|
else
|
|
Decision(0,2,1,3)
|
|
}
|
|
else
|
|
{
|
|
if (seq1 <= seq2)
|
|
{
|
|
if (seq0 <= seq2)
|
|
Decision(1,0,2,3)
|
|
else
|
|
Decision(1,2,0,3)
|
|
}
|
|
else
|
|
Decision(2,1,0,3)
|
|
}
|
|
|
|
#define Merge4Case(a,b,c,d,c0,c1,c2) \
|
|
s ## a ## b ## c ## d: \
|
|
if (length == 0) goto finish; \
|
|
*target = *seq ## a; \
|
|
++target; \
|
|
length--; \
|
|
++seq ## a; \
|
|
if (seq ## a c0 seq ## b) goto s ## a ## b ## c ## d; \
|
|
if (seq ## a c1 seq ## c) goto s ## b ## a ## c ## d; \
|
|
if (seq ## a c2 seq ## d) goto s ## b ## c ## a ## d; \
|
|
goto s ## b ## c ## d ## a;
|
|
|
|
Merge4Case(0, 1, 2, 3, <=, <=, <=);
|
|
Merge4Case(0, 1, 3, 2, <=, <=, <=);
|
|
Merge4Case(0, 2, 1, 3, <=, <=, <=);
|
|
Merge4Case(0, 2, 3, 1, <=, <=, <=);
|
|
Merge4Case(0, 3, 1, 2, <=, <=, <=);
|
|
Merge4Case(0, 3, 2, 1, <=, <=, <=);
|
|
Merge4Case(1, 0, 2, 3, < , <=, <=);
|
|
Merge4Case(1, 0, 3, 2, < , <=, <=);
|
|
Merge4Case(1, 2, 0, 3, <=, < , <=);
|
|
Merge4Case(1, 2, 3, 0, <=, <=, < );
|
|
Merge4Case(1, 3, 0, 2, <=, < , <=);
|
|
Merge4Case(1, 3, 2, 0, <=, <=, < );
|
|
Merge4Case(2, 0, 1, 3, < , < , <=);
|
|
Merge4Case(2, 0, 3, 1, < , <=, < );
|
|
Merge4Case(2, 1, 0, 3, < , < , <=);
|
|
Merge4Case(2, 1, 3, 0, < , <=, < );
|
|
Merge4Case(2, 3, 0, 1, <=, < , < );
|
|
Merge4Case(2, 3, 1, 0, <=, < , < );
|
|
Merge4Case(3, 0, 1, 2, < , < , < );
|
|
Merge4Case(3, 0, 2, 1, < , < , < );
|
|
Merge4Case(3, 1, 0, 2, < , < , < );
|
|
Merge4Case(3, 1, 2, 0, < , < , < );
|
|
Merge4Case(3, 2, 0, 1, < , < , < );
|
|
Merge4Case(3, 2, 1, 0, < , < , < );
|
|
|
|
#undef Merge4Case
|
|
#undef Decision
|
|
|
|
finish:
|
|
;
|
|
|
|
seqs_begin[0].first = seq0;
|
|
seqs_begin[1].first = seq1;
|
|
seqs_begin[2].first = seq2;
|
|
seqs_begin[3].first = seq3;
|
|
|
|
return target;
|
|
}
|
|
|
|
template<typename RandomAccessIteratorIterator, typename RandomAccessIterator3, typename _DifferenceTp, typename Comparator>
|
|
RandomAccessIterator3
|
|
multiway_merge_4_combined(RandomAccessIteratorIterator seqs_begin, RandomAccessIteratorIterator seqs_end, RandomAccessIterator3 target, Comparator comp, _DifferenceTp length, bool stable)
|
|
{
|
|
_GLIBCXX_CALL(length);
|
|
typedef _DifferenceTp difference_type;
|
|
|
|
typedef typename std::iterator_traits<RandomAccessIteratorIterator>::value_type::first_type
|
|
RandomAccessIterator1;
|
|
typedef typename std::iterator_traits<RandomAccessIterator1>::value_type
|
|
value_type;
|
|
|
|
int min_seq;
|
|
RandomAccessIterator3 target_end;
|
|
|
|
// Stable anyway.
|
|
difference_type overhang = prepare_unguarded(seqs_begin, seqs_end, comp, min_seq, true);
|
|
|
|
difference_type total_length = 0;
|
|
for (RandomAccessIteratorIterator s = seqs_begin; s != seqs_end; ++s)
|
|
total_length += LENGTH(*s);
|
|
|
|
if (overhang != -1)
|
|
{
|
|
difference_type unguarded_length = std::min(length, total_length - overhang);
|
|
target_end = multiway_merge_4_variant<unguarded_iterator>
|
|
(seqs_begin, seqs_end, target, comp, unguarded_length, stable);
|
|
overhang = length - unguarded_length;
|
|
}
|
|
else
|
|
{
|
|
// Empty sequence found.
|
|
overhang = length;
|
|
target_end = target;
|
|
}
|
|
|
|
#if _GLIBCXX_ASSERTIONS
|
|
_GLIBCXX_PARALLEL_ASSERT(target_end == target + length - overhang);
|
|
_GLIBCXX_PARALLEL_ASSERT(is_sorted(target, target_end, comp));
|
|
#endif
|
|
|
|
std::vector<std::pair<RandomAccessIterator1, RandomAccessIterator1> > one_missing(seqs_begin, seqs_end);
|
|
one_missing.erase(one_missing.begin() + min_seq); //remove
|
|
|
|
target_end = multiway_merge_3_variant<guarded_iterator>(one_missing.begin(), one_missing.end(), target_end, comp, overhang, stable);
|
|
|
|
// Insert back again.
|
|
one_missing.insert(one_missing.begin() + min_seq, seqs_begin[min_seq]);
|
|
// Write back modified iterators.
|
|
copy(one_missing.begin(), one_missing.end(), seqs_begin);
|
|
|
|
#if _GLIBCXX_ASSERTIONS
|
|
_GLIBCXX_PARALLEL_ASSERT(target_end == target + length);
|
|
_GLIBCXX_PARALLEL_ASSERT(is_sorted(target, target_end, comp));
|
|
#endif
|
|
|
|
return target_end;
|
|
}
|
|
|
|
/** @brief Basic multi-way merging procedure.
|
|
*
|
|
* The head elements are kept in a sorted array, new heads are
|
|
* inserted linearly.
|
|
* @param seqs_begin Begin iterator of iterator pair input sequence.
|
|
* @param seqs_end End iterator of iterator pair input sequence.
|
|
* @param target Begin iterator out output sequence.
|
|
* @param comp Comparator.
|
|
* @param length Maximum length to merge.
|
|
* @param stable Stable merging incurs a performance penalty.
|
|
* @return End iterator of output sequence.
|
|
*/
|
|
template<typename RandomAccessIteratorIterator, typename RandomAccessIterator3, typename _DifferenceTp, typename Comparator>
|
|
RandomAccessIterator3
|
|
multiway_merge_bubble(RandomAccessIteratorIterator seqs_begin, RandomAccessIteratorIterator seqs_end, RandomAccessIterator3 target, Comparator comp, _DifferenceTp length, bool stable)
|
|
{
|
|
_GLIBCXX_CALL(length)
|
|
|
|
typedef _DifferenceTp difference_type;
|
|
typedef typename std::iterator_traits<RandomAccessIteratorIterator>::value_type::first_type
|
|
RandomAccessIterator1;
|
|
typedef typename std::iterator_traits<RandomAccessIterator1>::value_type
|
|
value_type;
|
|
|
|
// Num remaining pieces.
|
|
int k = static_cast<int>(seqs_end - seqs_begin), nrp;
|
|
|
|
value_type* pl = new value_type[k];
|
|
int* source = new int[k];
|
|
difference_type total_length = 0;
|
|
|
|
#define POS(i) seqs_begin[(i)].first
|
|
#define STOPS(i) seqs_begin[(i)].second
|
|
|
|
// Write entries into queue.
|
|
nrp = 0;
|
|
for (int pi = 0; pi < k; pi++)
|
|
{
|
|
if (STOPS(pi) != POS(pi))
|
|
{
|
|
pl[nrp] = *(POS(pi));
|
|
source[nrp] = pi;
|
|
nrp++;
|
|
total_length += LENGTH(seqs_begin[pi]);
|
|
}
|
|
}
|
|
|
|
if (stable)
|
|
{
|
|
for (int k = 0; k < nrp - 1; k++)
|
|
for (int pi = nrp - 1; pi > k; pi--)
|
|
if (comp(pl[pi], pl[pi - 1]) ||
|
|
(!comp(pl[pi - 1], pl[pi]) && source[pi] < source[pi - 1]))
|
|
{
|
|
std::swap(pl[pi - 1], pl[pi]);
|
|
std::swap(source[pi - 1], source[pi]);
|
|
}
|
|
}
|
|
else
|
|
{
|
|
for (int k = 0; k < nrp - 1; k++)
|
|
for (int pi = nrp - 1; pi > k; pi--)
|
|
if (comp(pl[pi], pl[pi-1]))
|
|
{
|
|
std::swap(pl[pi-1], pl[pi]);
|
|
std::swap(source[pi-1], source[pi]);
|
|
}
|
|
}
|
|
|
|
// Iterate.
|
|
if (stable)
|
|
{
|
|
int j;
|
|
while (nrp > 0 && length > 0)
|
|
{
|
|
if (source[0] < source[1])
|
|
{
|
|
// pl[0] <= pl[1]
|
|
while ((nrp == 1 || !(comp(pl[1], pl[0]))) && length > 0)
|
|
{
|
|
*target = pl[0];
|
|
++target;
|
|
++POS(source[0]);
|
|
length--;
|
|
if (POS(source[0]) == STOPS(source[0]))
|
|
{
|
|
// Move everything to the left.
|
|
for (int s = 0; s < nrp - 1; s++)
|
|
{
|
|
pl[s] = pl[s + 1];
|
|
source[s] = source[s + 1];
|
|
}
|
|
nrp--;
|
|
break;
|
|
}
|
|
else
|
|
pl[0] = *(POS(source[0]));
|
|
}
|
|
}
|
|
else
|
|
{
|
|
// pl[0] < pl[1]
|
|
while ((nrp == 1 || comp(pl[0], pl[1])) && length > 0)
|
|
{
|
|
*target = pl[0];
|
|
++target;
|
|
++POS(source[0]);
|
|
length--;
|
|
if (POS(source[0]) == STOPS(source[0]))
|
|
{
|
|
for (int s = 0; s < nrp - 1; s++)
|
|
{
|
|
pl[s] = pl[s + 1];
|
|
source[s] = source[s + 1];
|
|
}
|
|
nrp--;
|
|
break;
|
|
}
|
|
else
|
|
pl[0] = *(POS(source[0]));
|
|
}
|
|
}
|
|
|
|
// Sink down.
|
|
j = 1;
|
|
while ((j < nrp) && (comp(pl[j], pl[j - 1]) ||
|
|
(!comp(pl[j - 1], pl[j]) && (source[j] < source[j - 1]))))
|
|
{
|
|
std::swap(pl[j - 1], pl[j]);
|
|
std::swap(source[j - 1], source[j]);
|
|
j++;
|
|
}
|
|
}
|
|
}
|
|
else
|
|
{
|
|
int j;
|
|
while (nrp > 0 && length > 0)
|
|
{
|
|
// pl[0] <= pl[1]
|
|
while (nrp == 1 || (!comp(pl[1], pl[0])) && length > 0)
|
|
{
|
|
*target = pl[0];
|
|
++target;
|
|
++POS(source[0]);
|
|
length--;
|
|
if (POS(source[0]) == STOPS(source[0]))
|
|
{
|
|
for (int s = 0; s < (nrp - 1); s++)
|
|
{
|
|
pl[s] = pl[s + 1];
|
|
source[s] = source[s + 1];
|
|
}
|
|
nrp--;
|
|
break;
|
|
}
|
|
else
|
|
pl[0] = *(POS(source[0]));
|
|
}
|
|
|
|
// Sink down.
|
|
j = 1;
|
|
while ((j < nrp) && comp(pl[j], pl[j - 1]))
|
|
{
|
|
std::swap(pl[j - 1], pl[j]);
|
|
std::swap(source[j - 1], source[j]);
|
|
j++;
|
|
}
|
|
}
|
|
}
|
|
|
|
delete[] pl;
|
|
delete[] source;
|
|
|
|
return target;
|
|
}
|
|
|
|
/** @brief Multi-way merging procedure for a high branching factor,
|
|
* guarded case.
|
|
*
|
|
* The head elements are kept in a loser tree.
|
|
* @param seqs_begin Begin iterator of iterator pair input sequence.
|
|
* @param seqs_end End iterator of iterator pair input sequence.
|
|
* @param target Begin iterator out output sequence.
|
|
* @param comp Comparator.
|
|
* @param length Maximum length to merge.
|
|
* @param stable Stable merging incurs a performance penalty.
|
|
* @return End iterator of output sequence.
|
|
*/
|
|
template<typename LT, typename RandomAccessIteratorIterator, typename RandomAccessIterator3, typename _DifferenceTp, typename Comparator>
|
|
RandomAccessIterator3
|
|
multiway_merge_loser_tree(RandomAccessIteratorIterator seqs_begin, RandomAccessIteratorIterator seqs_end, RandomAccessIterator3 target, Comparator comp, _DifferenceTp length, bool stable)
|
|
{
|
|
_GLIBCXX_CALL(length)
|
|
|
|
typedef _DifferenceTp difference_type;
|
|
typedef typename std::iterator_traits<RandomAccessIteratorIterator>::value_type::first_type
|
|
RandomAccessIterator1;
|
|
typedef typename std::iterator_traits<RandomAccessIterator1>::value_type
|
|
value_type;
|
|
|
|
int k = static_cast<int>(seqs_end - seqs_begin);
|
|
|
|
LT lt(k, comp);
|
|
|
|
difference_type total_length = 0;
|
|
|
|
for (int t = 0; t < k; t++)
|
|
{
|
|
if (stable)
|
|
{
|
|
if (seqs_begin[t].first == seqs_begin[t].second)
|
|
lt.insert_start_stable(value_type(), t, true);
|
|
else
|
|
lt.insert_start_stable(*seqs_begin[t].first, t, false);
|
|
}
|
|
else
|
|
{
|
|
if (seqs_begin[t].first == seqs_begin[t].second)
|
|
lt.insert_start(value_type(), t, true);
|
|
else
|
|
lt.insert_start(*seqs_begin[t].first, t, false);
|
|
}
|
|
|
|
total_length += LENGTH(seqs_begin[t]);
|
|
}
|
|
|
|
if (stable)
|
|
lt.init_stable();
|
|
else
|
|
lt.init();
|
|
|
|
total_length = std::min(total_length, length);
|
|
|
|
int source;
|
|
|
|
if (stable)
|
|
{
|
|
for (difference_type i = 0; i < total_length; i++)
|
|
{
|
|
// Take out.
|
|
source = lt.get_min_source();
|
|
|
|
*(target++) = *(seqs_begin[source].first++);
|
|
|
|
// Feed.
|
|
if (seqs_begin[source].first == seqs_begin[source].second)
|
|
lt.delete_min_insert_stable(value_type(), true);
|
|
else
|
|
// Replace from same source.
|
|
lt.delete_min_insert_stable(*seqs_begin[source].first, false);
|
|
|
|
}
|
|
}
|
|
else
|
|
{
|
|
for (difference_type i = 0; i < total_length; i++)
|
|
{
|
|
//take out
|
|
source = lt.get_min_source();
|
|
|
|
*(target++) = *(seqs_begin[source].first++);
|
|
|
|
// Feed.
|
|
if (seqs_begin[source].first == seqs_begin[source].second)
|
|
lt.delete_min_insert(value_type(), true);
|
|
else
|
|
// Replace from same source.
|
|
lt.delete_min_insert(*seqs_begin[source].first, false);
|
|
}
|
|
}
|
|
|
|
return target;
|
|
}
|
|
|
|
/** @brief Multi-way merging procedure for a high branching factor,
|
|
* unguarded case.
|
|
*
|
|
* The head elements are kept in a loser tree.
|
|
* @param seqs_begin Begin iterator of iterator pair input sequence.
|
|
* @param seqs_end End iterator of iterator pair input sequence.
|
|
* @param target Begin iterator out output sequence.
|
|
* @param comp Comparator.
|
|
* @param length Maximum length to merge.
|
|
* @param stable Stable merging incurs a performance penalty.
|
|
* @return End iterator of output sequence.
|
|
* @pre No input will run out of elements during the merge.
|
|
*/
|
|
template<typename LT, typename RandomAccessIteratorIterator, typename RandomAccessIterator3, typename _DifferenceTp, typename Comparator>
|
|
RandomAccessIterator3
|
|
multiway_merge_loser_tree_unguarded(RandomAccessIteratorIterator seqs_begin, RandomAccessIteratorIterator seqs_end, RandomAccessIterator3 target, Comparator comp, _DifferenceTp length, bool stable)
|
|
{
|
|
_GLIBCXX_CALL(length)
|
|
typedef _DifferenceTp difference_type;
|
|
|
|
typedef typename std::iterator_traits<RandomAccessIteratorIterator>::value_type::first_type
|
|
RandomAccessIterator1;
|
|
typedef typename std::iterator_traits<RandomAccessIterator1>::value_type
|
|
value_type;
|
|
|
|
int k = seqs_end - seqs_begin;
|
|
|
|
LT lt(k, comp);
|
|
|
|
difference_type total_length = 0;
|
|
|
|
for (int t = 0; t < k; t++)
|
|
{
|
|
#if _GLIBCXX_ASSERTIONS
|
|
_GLIBCXX_PARALLEL_ASSERT(seqs_begin[t].first != seqs_begin[t].second);
|
|
#endif
|
|
if (stable)
|
|
lt.insert_start_stable(*seqs_begin[t].first, t, false);
|
|
else
|
|
lt.insert_start(*seqs_begin[t].first, t, false);
|
|
|
|
total_length += LENGTH(seqs_begin[t]);
|
|
}
|
|
|
|
if (stable)
|
|
lt.init_stable();
|
|
else
|
|
lt.init();
|
|
|
|
// Do not go past end.
|
|
length = std::min(total_length, length);
|
|
|
|
int source;
|
|
|
|
#if _GLIBCXX_ASSERTIONS
|
|
difference_type i = 0;
|
|
#endif
|
|
|
|
if (stable)
|
|
{
|
|
RandomAccessIterator3 target_end = target + length;
|
|
while (target < target_end)
|
|
{
|
|
// Take out.
|
|
source = lt.get_min_source();
|
|
|
|
#if _GLIBCXX_ASSERTIONS
|
|
_GLIBCXX_PARALLEL_ASSERT(i == 0 || !comp(*(seqs_begin[source].first), *(target - 1)));
|
|
#endif
|
|
|
|
*(target++) = *(seqs_begin[source].first++);
|
|
|
|
#if _GLIBCXX_ASSERTIONS
|
|
_GLIBCXX_PARALLEL_ASSERT((seqs_begin[source].first != seqs_begin[source].second) || (i == length - 1));
|
|
i++;
|
|
#endif
|
|
// Feed.
|
|
// Replace from same source.
|
|
lt.delete_min_insert_stable(*seqs_begin[source].first, false);
|
|
|
|
}
|
|
}
|
|
else
|
|
{
|
|
RandomAccessIterator3 target_end = target + length;
|
|
while (target < target_end)
|
|
{
|
|
// Take out.
|
|
source = lt.get_min_source();
|
|
|
|
#if _GLIBCXX_ASSERTIONS
|
|
if (i > 0 && comp(*(seqs_begin[source].first), *(target - 1)))
|
|
printf(" %i %i %i\n", length, i, source);
|
|
_GLIBCXX_PARALLEL_ASSERT(i == 0 || !comp(*(seqs_begin[source].first), *(target - 1)));
|
|
#endif
|
|
|
|
*(target++) = *(seqs_begin[source].first++);
|
|
|
|
#if _GLIBCXX_ASSERTIONS
|
|
if (!((seqs_begin[source].first != seqs_begin[source].second) || (i >= length - 1)))
|
|
printf(" %i %i %i\n", length, i, source);
|
|
_GLIBCXX_PARALLEL_ASSERT((seqs_begin[source].first != seqs_begin[source].second) || (i >= length - 1));
|
|
i++;
|
|
#endif
|
|
// Feed.
|
|
// Replace from same source.
|
|
lt.delete_min_insert(*seqs_begin[source].first, false);
|
|
}
|
|
}
|
|
|
|
return target;
|
|
}
|
|
|
|
template<typename _ValueTp, class Comparator>
|
|
struct loser_tree_traits
|
|
{
|
|
typedef LoserTree/*Pointer*/<_ValueTp, Comparator> LT;
|
|
};
|
|
|
|
|
|
/*#define NO_POINTER(T) \
|
|
template<typename Comparator> \
|
|
struct loser_tree_traits<T, Comparator> \
|
|
{ \
|
|
typedef LoserTreePointer<T, Comparator> LT; \
|
|
};*/
|
|
//
|
|
// NO_POINTER(unsigned char)
|
|
// NO_POINTER(char)
|
|
// NO_POINTER(unsigned short)
|
|
// NO_POINTER(short)
|
|
// NO_POINTER(unsigned int)
|
|
// NO_POINTER(int)
|
|
// NO_POINTER(unsigned long)
|
|
// NO_POINTER(long)
|
|
// NO_POINTER(unsigned long long)
|
|
// NO_POINTER(long long)
|
|
//
|
|
// #undef NO_POINTER
|
|
|
|
template<typename _ValueTp, class Comparator>
|
|
struct loser_tree_traits_unguarded
|
|
{
|
|
typedef LoserTreeUnguarded<_ValueTp, Comparator> LT;
|
|
};
|
|
|
|
/*#define NO_POINTER_UNGUARDED(T) \
|
|
template<typename Comparator> \
|
|
struct loser_tree_traits_unguarded<T, Comparator> \
|
|
{ \
|
|
typedef LoserTreePointerUnguarded<T, Comparator> LT; \
|
|
};*/
|
|
//
|
|
// NO_POINTER_UNGUARDED(unsigned char)
|
|
// NO_POINTER_UNGUARDED(char)
|
|
// NO_POINTER_UNGUARDED(unsigned short)
|
|
// NO_POINTER_UNGUARDED(short)
|
|
// NO_POINTER_UNGUARDED(unsigned int)
|
|
// NO_POINTER_UNGUARDED(int)
|
|
// NO_POINTER_UNGUARDED(unsigned long)
|
|
// NO_POINTER_UNGUARDED(long)
|
|
// NO_POINTER_UNGUARDED(unsigned long long)
|
|
// NO_POINTER_UNGUARDED(long long)
|
|
//
|
|
// #undef NO_POINTER_UNGUARDED
|
|
|
|
template<typename RandomAccessIteratorIterator, typename RandomAccessIterator3, typename _DifferenceTp, typename Comparator>
|
|
RandomAccessIterator3
|
|
multiway_merge_loser_tree_combined(RandomAccessIteratorIterator seqs_begin, RandomAccessIteratorIterator seqs_end, RandomAccessIterator3 target, Comparator comp, _DifferenceTp length, bool stable)
|
|
{
|
|
_GLIBCXX_CALL(length)
|
|
|
|
typedef _DifferenceTp difference_type;
|
|
|
|
typedef typename std::iterator_traits<RandomAccessIteratorIterator>::value_type::first_type
|
|
RandomAccessIterator1;
|
|
typedef typename std::iterator_traits<RandomAccessIterator1>::value_type
|
|
value_type;
|
|
|
|
int min_seq;
|
|
RandomAccessIterator3 target_end;
|
|
difference_type overhang = prepare_unguarded(seqs_begin, seqs_end,
|
|
comp, min_seq, stable);
|
|
|
|
difference_type total_length = 0;
|
|
for (RandomAccessIteratorIterator s = seqs_begin; s != seqs_end; s++)
|
|
total_length += LENGTH(*s);
|
|
|
|
if (overhang != -1)
|
|
{
|
|
difference_type unguarded_length = std::min(length, total_length - overhang);
|
|
target_end = multiway_merge_loser_tree_unguarded
|
|
<typename loser_tree_traits_unguarded<value_type, Comparator>::LT>
|
|
(seqs_begin, seqs_end, target, comp, unguarded_length, stable);
|
|
overhang = length - unguarded_length;
|
|
}
|
|
else
|
|
{
|
|
// Empty sequence found.
|
|
overhang = length;
|
|
target_end = target;
|
|
}
|
|
|
|
#if _GLIBCXX_ASSERTIONS
|
|
_GLIBCXX_PARALLEL_ASSERT(target_end == target + length - overhang);
|
|
_GLIBCXX_PARALLEL_ASSERT(is_sorted(target, target_end, comp));
|
|
#endif
|
|
|
|
target_end = multiway_merge_loser_tree
|
|
<typename loser_tree_traits<value_type, Comparator>::LT>
|
|
(seqs_begin, seqs_end, target_end, comp, overhang, stable);
|
|
|
|
#if _GLIBCXX_ASSERTIONS
|
|
_GLIBCXX_PARALLEL_ASSERT(target_end == target + length);
|
|
_GLIBCXX_PARALLEL_ASSERT(is_sorted(target, target_end, comp));
|
|
#endif
|
|
|
|
return target_end;
|
|
}
|
|
|
|
template<typename RandomAccessIteratorIterator, typename RandomAccessIterator3, typename _DifferenceTp, typename Comparator>
|
|
RandomAccessIterator3
|
|
multiway_merge_loser_tree_sentinel(RandomAccessIteratorIterator seqs_begin, RandomAccessIteratorIterator seqs_end, RandomAccessIterator3 target, Comparator comp, _DifferenceTp length, bool stable)
|
|
{
|
|
_GLIBCXX_CALL(length)
|
|
|
|
typedef _DifferenceTp difference_type;
|
|
typedef std::iterator_traits<RandomAccessIteratorIterator> traits_type;
|
|
typedef typename std::iterator_traits<RandomAccessIteratorIterator>::value_type::first_type
|
|
RandomAccessIterator1;
|
|
typedef typename std::iterator_traits<RandomAccessIterator1>::value_type
|
|
value_type;
|
|
typedef typename std::iterator_traits<RandomAccessIteratorIterator>::value_type::first_type
|
|
RandomAccessIterator1;
|
|
|
|
RandomAccessIterator3 target_end;
|
|
difference_type overhang = prepare_unguarded_sentinel(seqs_begin, seqs_end, comp);
|
|
|
|
difference_type total_length = 0;
|
|
for (RandomAccessIteratorIterator s = seqs_begin; s != seqs_end; s++)
|
|
{
|
|
total_length += LENGTH(*s);
|
|
|
|
// Sentinel spot.
|
|
(*s).second++;
|
|
}
|
|
|
|
difference_type unguarded_length = std::min(length, total_length - overhang);
|
|
target_end = multiway_merge_loser_tree_unguarded
|
|
<typename loser_tree_traits_unguarded<value_type, Comparator>::LT>
|
|
(seqs_begin, seqs_end, target, comp, unguarded_length, stable);
|
|
overhang = length - unguarded_length;
|
|
|
|
#if _GLIBCXX_ASSERTIONS
|
|
_GLIBCXX_PARALLEL_ASSERT(target_end == target + length - overhang);
|
|
_GLIBCXX_PARALLEL_ASSERT(is_sorted(target, target_end, comp));
|
|
#endif
|
|
|
|
// Copy rest stable.
|
|
for (RandomAccessIteratorIterator s = seqs_begin; s != seqs_end && overhang > 0; s++)
|
|
{
|
|
// Restore.
|
|
(*s).second--;
|
|
difference_type local_length = std::min((difference_type)overhang, (difference_type)LENGTH(*s));
|
|
target_end = std::copy((*s).first, (*s).first + local_length, target_end);
|
|
(*s).first += local_length;
|
|
overhang -= local_length;
|
|
}
|
|
|
|
#if _GLIBCXX_ASSERTIONS
|
|
_GLIBCXX_PARALLEL_ASSERT(overhang == 0);
|
|
_GLIBCXX_PARALLEL_ASSERT(target_end == target + length);
|
|
_GLIBCXX_PARALLEL_ASSERT(is_sorted(target, target_end, comp));
|
|
#endif
|
|
|
|
return target_end;
|
|
}
|
|
|
|
/** @brief Sequential multi-way merging switch.
|
|
*
|
|
* The decision if based on the branching factor and runtime settings.
|
|
* @param seqs_begin Begin iterator of iterator pair input sequence.
|
|
* @param seqs_end End iterator of iterator pair input sequence.
|
|
* @param target Begin iterator out output sequence.
|
|
* @param comp Comparator.
|
|
* @param length Maximum length to merge.
|
|
* @param stable Stable merging incurs a performance penalty.
|
|
* @param sentinel The sequences have a sentinel element.
|
|
* @return End iterator of output sequence. */
|
|
template<typename RandomAccessIteratorIterator, typename RandomAccessIterator3, typename _DifferenceTp, typename Comparator>
|
|
RandomAccessIterator3
|
|
multiway_merge(RandomAccessIteratorIterator seqs_begin, RandomAccessIteratorIterator seqs_end, RandomAccessIterator3 target, Comparator comp, _DifferenceTp length, bool stable, bool sentinel, sequential_tag)
|
|
{
|
|
_GLIBCXX_CALL(length)
|
|
|
|
typedef _DifferenceTp difference_type;
|
|
typedef typename std::iterator_traits<RandomAccessIteratorIterator>::value_type::first_type
|
|
RandomAccessIterator1;
|
|
typedef typename std::iterator_traits<RandomAccessIterator1>::value_type
|
|
value_type;
|
|
|
|
#if _GLIBCXX_ASSERTIONS
|
|
for (RandomAccessIteratorIterator s = seqs_begin; s != seqs_end; s++)
|
|
_GLIBCXX_PARALLEL_ASSERT(is_sorted((*s).first, (*s).second, comp));
|
|
#endif
|
|
|
|
RandomAccessIterator3 return_target = target;
|
|
int k = static_cast<int>(seqs_end - seqs_begin);
|
|
|
|
Settings::MultiwayMergeAlgorithm mwma = Settings::multiway_merge_algorithm;
|
|
|
|
if (!sentinel && mwma == Settings::LOSER_TREE_SENTINEL)
|
|
mwma = Settings::LOSER_TREE_COMBINED;
|
|
|
|
switch (k)
|
|
{
|
|
case 0:
|
|
break;
|
|
case 1:
|
|
return_target = std::copy(seqs_begin[0].first, seqs_begin[0].first + length, target);
|
|
seqs_begin[0].first += length;
|
|
break;
|
|
case 2:
|
|
return_target = merge_advance(seqs_begin[0].first, seqs_begin[0].second, seqs_begin[1].first, seqs_begin[1].second, target, length, comp);
|
|
break;
|
|
case 3:
|
|
switch (mwma)
|
|
{
|
|
case Settings::LOSER_TREE_COMBINED:
|
|
return_target = multiway_merge_3_combined(seqs_begin, seqs_end, target, comp, length, stable);
|
|
break;
|
|
case Settings::LOSER_TREE_SENTINEL:
|
|
return_target = multiway_merge_3_variant<unguarded_iterator>(seqs_begin, seqs_end, target, comp, length, stable);
|
|
break;
|
|
default:
|
|
return_target = multiway_merge_3_variant<guarded_iterator>(seqs_begin, seqs_end, target, comp, length, stable);
|
|
break;
|
|
}
|
|
break;
|
|
case 4:
|
|
switch (mwma)
|
|
{
|
|
case Settings::LOSER_TREE_COMBINED:
|
|
return_target = multiway_merge_4_combined(seqs_begin, seqs_end, target, comp, length, stable);
|
|
break;
|
|
case Settings::LOSER_TREE_SENTINEL:
|
|
return_target = multiway_merge_4_variant<unguarded_iterator>(seqs_begin, seqs_end, target, comp, length, stable);
|
|
break;
|
|
default:
|
|
return_target = multiway_merge_4_variant<guarded_iterator>(seqs_begin, seqs_end, target, comp, length, stable);
|
|
break;
|
|
}
|
|
break;
|
|
default:
|
|
{
|
|
switch (mwma)
|
|
{
|
|
case Settings::BUBBLE:
|
|
return_target = multiway_merge_bubble(seqs_begin, seqs_end, target, comp, length, stable);
|
|
break;
|
|
#if _GLIBCXX_LOSER_TREE_EXPLICIT
|
|
case Settings::LOSER_TREE_EXPLICIT:
|
|
return_target = multiway_merge_loser_tree<LoserTreeExplicit<value_type, Comparator> >(seqs_begin, seqs_end, target, comp, length, stable);
|
|
break;
|
|
#endif
|
|
#if _GLIBCXX_LOSER_TREE
|
|
case Settings::LOSER_TREE:
|
|
return_target = multiway_merge_loser_tree<LoserTree<value_type, Comparator> >(seqs_begin, seqs_end, target, comp, length, stable);
|
|
break;
|
|
#endif
|
|
#if _GLIBCXX_LOSER_TREE_COMBINED
|
|
case Settings::LOSER_TREE_COMBINED:
|
|
return_target = multiway_merge_loser_tree_combined(seqs_begin, seqs_end, target, comp, length, stable);
|
|
break;
|
|
#endif
|
|
#if _GLIBCXX_LOSER_TREE_SENTINEL
|
|
case Settings::LOSER_TREE_SENTINEL:
|
|
return_target = multiway_merge_loser_tree_sentinel(seqs_begin, seqs_end, target, comp, length, stable);
|
|
break;
|
|
#endif
|
|
default:
|
|
// multiway_merge algorithm not implemented.
|
|
_GLIBCXX_PARALLEL_ASSERT(0);
|
|
break;
|
|
}
|
|
}
|
|
}
|
|
#if _GLIBCXX_ASSERTIONS
|
|
_GLIBCXX_PARALLEL_ASSERT(is_sorted(target, target + length, comp));
|
|
#endif
|
|
|
|
return return_target;
|
|
}
|
|
|
|
/** @brief Parallel multi-way merge routine.
|
|
*
|
|
* The decision if based on the branching factor and runtime settings.
|
|
* @param seqs_begin Begin iterator of iterator pair input sequence.
|
|
* @param seqs_end End iterator of iterator pair input sequence.
|
|
* @param target Begin iterator out output sequence.
|
|
* @param comp Comparator.
|
|
* @param length Maximum length to merge.
|
|
* @param stable Stable merging incurs a performance penalty.
|
|
* @param sentinel Ignored.
|
|
* @return End iterator of output sequence.
|
|
*/
|
|
template<typename RandomAccessIteratorIterator, typename RandomAccessIterator3, typename _DifferenceTp, typename Comparator>
|
|
RandomAccessIterator3
|
|
parallel_multiway_merge(RandomAccessIteratorIterator seqs_begin, RandomAccessIteratorIterator seqs_end, RandomAccessIterator3 target, Comparator comp, _DifferenceTp length, bool stable, bool sentinel)
|
|
{
|
|
_GLIBCXX_CALL(length)
|
|
|
|
typedef _DifferenceTp difference_type;
|
|
typedef typename std::iterator_traits<RandomAccessIteratorIterator>::value_type::first_type
|
|
RandomAccessIterator1;
|
|
typedef typename std::iterator_traits<RandomAccessIterator1>::value_type
|
|
value_type;
|
|
|
|
#if _GLIBCXX_ASSERTIONS
|
|
for (RandomAccessIteratorIterator rii = seqs_begin; rii != seqs_end; rii++)
|
|
_GLIBCXX_PARALLEL_ASSERT(is_sorted((*rii).first, (*rii).second, comp));
|
|
#endif
|
|
|
|
// k sequences.
|
|
int k = static_cast<int>(seqs_end - seqs_begin);
|
|
|
|
difference_type total_length = 0;
|
|
for (RandomAccessIteratorIterator raii = seqs_begin; raii != seqs_end; raii++)
|
|
total_length += LENGTH(*raii);
|
|
|
|
_GLIBCXX_CALL(total_length)
|
|
|
|
if (total_length == 0 || k == 0)
|
|
return target;
|
|
|
|
thread_index_t num_threads = static_cast<thread_index_t>(std::min(static_cast<difference_type>(get_max_threads()), total_length));
|
|
|
|
Timing<sequential_tag>* t = new Timing<sequential_tag>[num_threads];
|
|
|
|
for (int pr = 0; pr < num_threads; pr++)
|
|
t[pr].tic();
|
|
|
|
bool tight = (total_length == length);
|
|
|
|
// Thread t will have to merge pieces[iam][0..k - 1]
|
|
std::vector<std::pair<difference_type, difference_type> >* pieces = new std::vector<std::pair<difference_type, difference_type> >[num_threads];
|
|
for (int s = 0; s < num_threads; s++)
|
|
pieces[s].resize(k);
|
|
|
|
difference_type num_samples = Settings::merge_oversampling * num_threads;
|
|
|
|
if (Settings::multiway_merge_splitting == Settings::SAMPLING)
|
|
{
|
|
value_type* samples = new value_type[k * num_samples];
|
|
// Sample.
|
|
for (int s = 0; s < k; s++)
|
|
for (int i = 0; (difference_type)i < num_samples; i++)
|
|
{
|
|
difference_type sample_index = static_cast<difference_type>(LENGTH(seqs_begin[s]) * (double(i + 1) / (num_samples + 1)) * (double(length) / total_length));
|
|
samples[s * num_samples + i] = seqs_begin[s].first[sample_index];
|
|
}
|
|
|
|
if (stable)
|
|
__gnu_sequential::stable_sort(samples, samples + (num_samples * k), comp);
|
|
else
|
|
__gnu_sequential::sort(samples, samples + (num_samples * k), comp);
|
|
|
|
for (int slab = 0; slab < num_threads; slab++)
|
|
// For each slab / processor.
|
|
for (int seq = 0; seq < k; seq++)
|
|
{
|
|
// For each sequence.
|
|
if (slab > 0)
|
|
pieces[slab][seq].first = std::upper_bound(seqs_begin[seq].first, seqs_begin[seq].second, samples[num_samples * k * slab / num_threads], comp) - seqs_begin[seq].first;
|
|
else
|
|
{
|
|
// Absolute beginning.
|
|
pieces[slab][seq].first = 0;
|
|
}
|
|
if ((slab + 1) < num_threads)
|
|
pieces[slab][seq].second = std::upper_bound(seqs_begin[seq].first, seqs_begin[seq].second, samples[num_samples * k * (slab + 1) / num_threads], comp) - seqs_begin[seq].first;
|
|
else
|
|
pieces[slab][seq].second = LENGTH(seqs_begin[seq]); //absolute ending
|
|
}
|
|
delete[] samples;
|
|
}
|
|
else
|
|
{
|
|
// (Settings::multiway_merge_splitting == Settings::EXACT).
|
|
std::vector<RandomAccessIterator1>* offsets = new std::vector<RandomAccessIterator1>[num_threads];
|
|
std::vector<std::pair<RandomAccessIterator1, RandomAccessIterator1> > se(k);
|
|
|
|
copy(seqs_begin, seqs_end, se.begin());
|
|
|
|
difference_type* borders = static_cast<difference_type*>(__builtin_alloca(sizeof(difference_type) * (num_threads + 1)));
|
|
equally_split(length, num_threads, borders);
|
|
|
|
for (int s = 0; s < (num_threads - 1); s++)
|
|
{
|
|
offsets[s].resize(k);
|
|
multiseq_partition(se.begin(), se.end(), borders[s + 1],
|
|
offsets[s].begin(), comp);
|
|
|
|
// Last one also needed and available.
|
|
if (!tight)
|
|
{
|
|
offsets[num_threads - 1].resize(k);
|
|
multiseq_partition(se.begin(), se.end(),
|
|
difference_type(length),
|
|
offsets[num_threads - 1].begin(), comp);
|
|
}
|
|
}
|
|
|
|
|
|
for (int slab = 0; slab < num_threads; slab++)
|
|
{
|
|
// For each slab / processor.
|
|
for (int seq = 0; seq < k; seq++)
|
|
{
|
|
// For each sequence.
|
|
if (slab == 0)
|
|
{
|
|
// Absolute beginning.
|
|
pieces[slab][seq].first = 0;
|
|
}
|
|
else
|
|
pieces[slab][seq].first = pieces[slab - 1][seq].second;
|
|
if (!tight || slab < (num_threads - 1))
|
|
pieces[slab][seq].second = offsets[slab][seq] - seqs_begin[seq].first;
|
|
else
|
|
{
|
|
// slab == num_threads - 1
|
|
pieces[slab][seq].second = LENGTH(seqs_begin[seq]);
|
|
}
|
|
}
|
|
}
|
|
delete[] offsets;
|
|
}
|
|
|
|
for (int pr = 0; pr < num_threads; pr++)
|
|
t[pr].tic();
|
|
|
|
# pragma omp parallel num_threads(num_threads)
|
|
{
|
|
thread_index_t iam = omp_get_thread_num();
|
|
|
|
t[iam].tic();
|
|
|
|
difference_type target_position = 0;
|
|
|
|
for (int c = 0; c < k; c++)
|
|
target_position += pieces[iam][c].first;
|
|
|
|
if (k > 2)
|
|
{
|
|
std::pair<RandomAccessIterator1, RandomAccessIterator1>* chunks = new std::pair<RandomAccessIterator1, RandomAccessIterator1>[k];
|
|
|
|
difference_type local_length = 0;
|
|
for (int s = 0; s < k; s++)
|
|
{
|
|
chunks[s] = std::make_pair(seqs_begin[s].first + pieces[iam][s].first, seqs_begin[s].first + pieces[iam][s].second);
|
|
local_length += LENGTH(chunks[s]);
|
|
}
|
|
|
|
multiway_merge(chunks, chunks + k, target + target_position, comp,
|
|
std::min(local_length, length - target_position),
|
|
stable, false, sequential_tag());
|
|
|
|
delete[] chunks;
|
|
}
|
|
else if (k == 2)
|
|
{
|
|
RandomAccessIterator1 begin0 = seqs_begin[0].first + pieces[iam][0].first, begin1 = seqs_begin[1].first + pieces[iam][1].first;
|
|
merge_advance(begin0,
|
|
seqs_begin[0].first + pieces[iam][0].second,
|
|
begin1,
|
|
seqs_begin[1].first + pieces[iam][1].second,
|
|
target + target_position,
|
|
(pieces[iam][0].second - pieces[iam][0].first) + (pieces[iam][1].second - pieces[iam][1].first),
|
|
comp);
|
|
}
|
|
|
|
t[iam].tic();
|
|
|
|
}
|
|
|
|
for (int pr = 0; pr < num_threads; pr++)
|
|
t[pr].tic();
|
|
|
|
#if _GLIBCXX_ASSERTIONS
|
|
_GLIBCXX_PARALLEL_ASSERT(is_sorted(target, target + length, comp));
|
|
#endif
|
|
|
|
// Update ends of sequences.
|
|
for (int s = 0; s < k; s++)
|
|
seqs_begin[s].first += pieces[num_threads - 1][s].second;
|
|
|
|
delete[] pieces;
|
|
|
|
for (int pr = 0; pr < num_threads; pr++)
|
|
t[pr].tic();
|
|
for (int pr = 0; pr < num_threads; pr++)
|
|
t[pr].print();
|
|
delete[] t;
|
|
|
|
return target + length;
|
|
}
|
|
|
|
/**
|
|
* @brief Multi-way merging front-end.
|
|
* @param seqs_begin Begin iterator of iterator pair input sequence.
|
|
* @param seqs_end End iterator of iterator pair input sequence.
|
|
* @param target Begin iterator out output sequence.
|
|
* @param comp Comparator.
|
|
* @param length Maximum length to merge.
|
|
* @param stable Stable merging incurs a performance penalty.
|
|
* @return End iterator of output sequence.
|
|
*/
|
|
template<typename RandomAccessIteratorPairIterator, typename RandomAccessIterator3, typename _DifferenceTp, typename Comparator>
|
|
RandomAccessIterator3
|
|
multiway_merge(RandomAccessIteratorPairIterator seqs_begin,
|
|
RandomAccessIteratorPairIterator seqs_end,
|
|
RandomAccessIterator3 target, Comparator comp,
|
|
_DifferenceTp length, bool stable)
|
|
{
|
|
typedef _DifferenceTp difference_type;
|
|
_GLIBCXX_CALL(seqs_end - seqs_begin)
|
|
|
|
if (seqs_begin == seqs_end)
|
|
return target;
|
|
|
|
RandomAccessIterator3 target_end;
|
|
if (_GLIBCXX_PARALLEL_CONDITION(((seqs_end - seqs_begin) >= Settings::multiway_merge_minimal_k) && ((sequence_index_t)length >= Settings::multiway_merge_minimal_n)))
|
|
target_end = parallel_multiway_merge(seqs_begin, seqs_end, target, comp, (difference_type)length, stable, false);
|
|
else
|
|
target_end = multiway_merge(seqs_begin, seqs_end, target, comp, length, stable, false, sequential_tag());
|
|
|
|
return target_end;
|
|
}
|
|
|
|
/** @brief Multi-way merging front-end.
|
|
* @param seqs_begin Begin iterator of iterator pair input sequence.
|
|
* @param seqs_end End iterator of iterator pair input sequence.
|
|
* @param target Begin iterator out output sequence.
|
|
* @param comp Comparator.
|
|
* @param length Maximum length to merge.
|
|
* @param stable Stable merging incurs a performance penalty.
|
|
* @return End iterator of output sequence.
|
|
* @pre For each @c i, @c seqs_begin[i].second must be the end
|
|
* marker of the sequence, but also reference the one more sentinel
|
|
* element. */
|
|
template<typename RandomAccessIteratorPairIterator, typename RandomAccessIterator3, typename _DifferenceTp, typename Comparator>
|
|
RandomAccessIterator3
|
|
multiway_merge_sentinel(RandomAccessIteratorPairIterator seqs_begin,
|
|
RandomAccessIteratorPairIterator seqs_end,
|
|
RandomAccessIterator3 target,
|
|
Comparator comp,
|
|
_DifferenceTp length,
|
|
bool stable)
|
|
{
|
|
typedef _DifferenceTp difference_type;
|
|
|
|
if (seqs_begin == seqs_end)
|
|
return target;
|
|
|
|
_GLIBCXX_CALL(seqs_end - seqs_begin)
|
|
|
|
if (_GLIBCXX_PARALLEL_CONDITION(((seqs_end - seqs_begin) >= Settings::multiway_merge_minimal_k) && ((sequence_index_t)length >= Settings::multiway_merge_minimal_n)))
|
|
return parallel_multiway_merge(seqs_begin, seqs_end, target, comp, (typename std::iterator_traits<RandomAccessIterator3>::difference_type)length, stable, true);
|
|
else
|
|
return multiway_merge(seqs_begin, seqs_end, target, comp, length, stable, true, sequential_tag());
|
|
}
|
|
}
|
|
|
|
#endif
|