// -*- C++ -*- // Copyright (C) 2007 Free Software Foundation, Inc. // // This file is part of the GNU ISO C++ Library. This library is free // software; you can redistribute it and/or modify it under the terms // of the GNU General Public License as published by the Free Software // Foundation; either version 2, or (at your option) any later // version. // This library is distributed in the hope that it will be useful, but // WITHOUT ANY WARRANTY; without even the implied warranty of // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU // General Public License for more details. // You should have received a copy of the GNU General Public License // along with this library; see the file COPYING. If not, write to // the Free Software Foundation, 59 Temple Place - Suite 330, Boston, // MA 02111-1307, USA. // As a special exception, you may use this file as part of a free // software library without restriction. Specifically, if other files // instantiate templates or use macros or inline functions from this // file, or you compile this file and link it with other files to // produce an executable, this file does not by itself cause the // resulting executable to be covered by the GNU General Public // License. This exception does not however invalidate any other // reasons why the executable file might be covered by the GNU General // Public License. /** @file parallel/multiway_merge.h * @brief Implementation of sequential and parallel multiway merge. * * Explanations on the high-speed merging routines in the appendix of * * P. Sanders. * Fast priority queues for cached memory. * ACM Journal of Experimental Algorithmics, 5, 2000. * * This file is a GNU parallel extension to the Standard C++ Library. */ // Written by Johannes Singler. #ifndef _GLIBCXX_PARALLEL_MULTIWAY_MERGE_H #define _GLIBCXX_PARALLEL_MULTIWAY_MERGE_H #include #include #include #include #include #include #include #if _GLIBCXX_ASSERTIONS #include #endif /** @brief Length of a sequence described by a pair of iterators. */ #define LENGTH(s) ((s).second - (s).first) // XXX need iterator typedefs namespace __gnu_parallel { template class guarded_iterator; template inline bool operator<(guarded_iterator& bi1, guarded_iterator& bi2); template inline bool operator<=(guarded_iterator& bi1, guarded_iterator& bi2); /** @brief Iterator wrapper supporting an implicit supremum at the end of the sequence, dominating all comparisons. * Deriving from RandomAccessIterator is not possible since * RandomAccessIterator need not be a class. */ template class guarded_iterator { private: /** @brief Current iterator position. */ RandomAccessIterator current; /** @brief End iterator of the sequence. */ RandomAccessIterator end; /** @brief Comparator. */ Comparator& comp; public: /** @brief Constructor. Sets iterator to beginning of sequence. * @param begin Begin iterator of sequence. * @param end End iterator of sequence. * @param comp Comparator provided for associated overloaded * compare operators. */ inline guarded_iterator(RandomAccessIterator begin, RandomAccessIterator end, Comparator& comp) : current(begin), end(end), comp(comp) { } /** @brief Pre-increment operator. * @return This. */ inline guarded_iterator& operator++() { ++current; return *this; } /** @brief Dereference operator. * @return Referenced element. */ inline typename std::iterator_traits::value_type operator*() { return *current; } /** @brief Convert to wrapped iterator. * @return Wrapped iterator. */ inline operator RandomAccessIterator() { return current; } friend bool operator< (guarded_iterator& bi1, guarded_iterator& bi2); friend bool operator<= (guarded_iterator& bi1, guarded_iterator& bi2); }; /** @brief Compare two elements referenced by guarded iterators. * @param bi1 First iterator. * @param bi2 Second iterator. * @return @c True if less. */ template inline bool operator<(guarded_iterator& bi1, guarded_iterator& bi2) { if (bi1.current == bi1.end) //bi1 is sup return bi2.current == bi2.end; //bi2 is not sup if (bi2.current == bi2.end) //bi2 is sup return true; return (bi1.comp)(*bi1, *bi2); //normal compare } /** @brief Compare two elements referenced by guarded iterators. * @param bi1 First iterator. * @param bi2 Second iterator. * @return @c True if less equal. */ template inline bool operator<=(guarded_iterator& bi1, guarded_iterator& bi2) { if (bi2.current == bi2.end) //bi1 is sup return bi1.current != bi1.end; //bi2 is not sup if (bi1.current == bi1.end) //bi2 is sup return false; return !(bi1.comp)(*bi2, *bi1); //normal compare } template class unguarded_iterator; template inline bool operator<(unguarded_iterator& bi1, unguarded_iterator& bi2); template inline bool operator<=(unguarded_iterator& bi1, unguarded_iterator& bi2); template class unguarded_iterator { private: /** @brief Current iterator position. */ RandomAccessIterator& current; /** @brief Comparator. */ mutable Comparator& comp; public: /** @brief Constructor. Sets iterator to beginning of sequence. * @param begin Begin iterator of sequence. * @param end Unused, only for compatibility. * @param comp Unused, only for compatibility. */ inline unguarded_iterator(RandomAccessIterator begin, RandomAccessIterator end, Comparator& comp) : current(begin), comp(comp) { } /** @brief Pre-increment operator. * @return This. */ inline unguarded_iterator& operator++() { current++; return *this; } /** @brief Dereference operator. * @return Referenced element. */ inline typename std::iterator_traits::value_type operator*() { return *current; } /** @brief Convert to wrapped iterator. * @return Wrapped iterator. */ inline operator RandomAccessIterator() { return current; } friend bool operator< (unguarded_iterator& bi1, unguarded_iterator& bi2); friend bool operator<= (unguarded_iterator& bi1, unguarded_iterator& bi2); }; /** @brief Compare two elements referenced by unguarded iterators. * @param bi1 First iterator. * @param bi2 Second iterator. * @return @c True if less. */ template inline bool operator<(unguarded_iterator& bi1, unguarded_iterator& bi2) { // Normal compare. return (bi1.comp)(*bi1, *bi2); } /** @brief Compare two elements referenced by unguarded iterators. * @param bi1 First iterator. * @param bi2 Second iterator. * @return @c True if less equal. */ template inline bool operator<=(unguarded_iterator& bi1, unguarded_iterator& bi2) { // Normal compare. return !(bi1.comp)(*bi2, *bi1); } /** Prepare a set of sequences to be merged without a (end) guard * @param seqs_begin * @param seqs_end * @param comp * @param min_sequence * @param stable * @pre (seqs_end - seqs_begin > 0) */ template typename std::iterator_traits::value_type::first_type>::difference_type prepare_unguarded(RandomAccessIteratorIterator seqs_begin, RandomAccessIteratorIterator seqs_end, Comparator comp, int& min_sequence, bool stable) { _GLIBCXX_CALL(seqs_end - seqs_begin) typedef typename std::iterator_traits::value_type::first_type RandomAccessIterator1; typedef typename std::iterator_traits::value_type value_type; typedef typename std::iterator_traits::difference_type difference_type; if ((*seqs_begin).first == (*seqs_begin).second) { // Empty sequence found, it's the first one. min_sequence = 0; return -1; } // Last element in sequence. value_type min = *((*seqs_begin).second - 1); min_sequence = 0; for (RandomAccessIteratorIterator s = seqs_begin + 1; s != seqs_end; s++) { if ((*s).first == (*s).second) { // Empty sequence found. min_sequence = static_cast(s - seqs_begin); return -1; } // Last element in sequence. const value_type& v = *((*s).second - 1); if (comp(v, min)) //strictly smaller { min = v; min_sequence = static_cast(s - seqs_begin); } } difference_type overhang_size = 0; int s = 0; for (s = 0; s <= min_sequence; s++) { RandomAccessIterator1 split; if (stable) split = std::upper_bound(seqs_begin[s].first, seqs_begin[s].second, min, comp); else split = std::lower_bound(seqs_begin[s].first, seqs_begin[s].second, min, comp); overhang_size += seqs_begin[s].second - split; } for (; s < (seqs_end - seqs_begin); s++) { RandomAccessIterator1 split = std::lower_bound(seqs_begin[s].first, seqs_begin[s].second, min, comp); overhang_size += seqs_begin[s].second - split; } // So many elements will be left over afterwards. return overhang_size; } /** Prepare a set of sequences to be merged with a (end) guard (sentinel) * @param seqs_begin * @param seqs_end * @param comp */ template typename std::iterator_traits::value_type::first_type>::difference_type prepare_unguarded_sentinel(RandomAccessIteratorIterator seqs_begin, RandomAccessIteratorIterator seqs_end, Comparator comp) { _GLIBCXX_CALL(seqs_end - seqs_begin) typedef typename std::iterator_traits::value_type::first_type RandomAccessIterator1; typedef typename std::iterator_traits::value_type value_type; typedef typename std::iterator_traits::difference_type difference_type; // Last element in sequence. value_type max; bool max_found = false; for (RandomAccessIteratorIterator s = seqs_begin; s != seqs_end; s++) { if ((*s).first == (*s).second) continue; // Last element in sequence. value_type& v = *((*s).second - 1); // Strictly greater. if (!max_found || comp(max, v)) max = v; max_found = true; } difference_type overhang_size = 0; for (RandomAccessIteratorIterator s = seqs_begin; s != seqs_end; s++) { RandomAccessIterator1 split = std::lower_bound((*s).first, (*s).second, max, comp); overhang_size += (*s).second - split; // Set sentinel. *((*s).second) = max; } // So many elements will be left over afterwards. return overhang_size; } /** @brief Highly efficient 3-way merging procedure. * @param seqs_begin Begin iterator of iterator pair input sequence. * @param seqs_end End iterator of iterator pair input sequence. * @param target Begin iterator out output sequence. * @param comp Comparator. * @param length Maximum length to merge. * @param stable Unused, stable anyway. * @return End iterator of output sequence. */ template class iterator, typename RandomAccessIteratorIterator, typename RandomAccessIterator3, typename _DifferenceTp, typename Comparator> RandomAccessIterator3 multiway_merge_3_variant(RandomAccessIteratorIterator seqs_begin, RandomAccessIteratorIterator seqs_end, RandomAccessIterator3 target, Comparator comp, _DifferenceTp length, bool stable) { _GLIBCXX_CALL(length); typedef _DifferenceTp difference_type; typedef typename std::iterator_traits::value_type::first_type RandomAccessIterator1; typedef typename std::iterator_traits::value_type value_type; if (length == 0) return target; iterator seq0(seqs_begin[0].first, seqs_begin[0].second, comp), seq1(seqs_begin[1].first, seqs_begin[1].second, comp), seq2(seqs_begin[2].first, seqs_begin[2].second, comp); if (seq0 <= seq1) { if (seq1 <= seq2) goto s012; else if (seq2 < seq0) goto s201; else goto s021; } else { if (seq1 <= seq2) { if (seq0 <= seq2) goto s102; else goto s120; } else goto s210; } #define Merge3Case(a,b,c,c0,c1) \ s ## a ## b ## c : \ *target = *seq ## a; \ ++target; \ length--; \ ++seq ## a; \ if (length == 0) goto finish; \ if (seq ## a c0 seq ## b) goto s ## a ## b ## c; \ if (seq ## a c1 seq ## c) goto s ## b ## a ## c; \ goto s ## b ## c ## a; Merge3Case(0, 1, 2, <=, <=); Merge3Case(1, 2, 0, <=, < ); Merge3Case(2, 0, 1, < , < ); Merge3Case(1, 0, 2, < , <=); Merge3Case(0, 2, 1, <=, <=); Merge3Case(2, 1, 0, < , < ); #undef Merge3Case finish: ; seqs_begin[0].first = seq0; seqs_begin[1].first = seq1; seqs_begin[2].first = seq2; return target; } template RandomAccessIterator3 multiway_merge_3_combined(RandomAccessIteratorIterator seqs_begin, RandomAccessIteratorIterator seqs_end, RandomAccessIterator3 target, Comparator comp, _DifferenceTp length, bool stable) { _GLIBCXX_CALL(length); typedef _DifferenceTp difference_type; typedef typename std::iterator_traits::value_type::first_type RandomAccessIterator1; typedef typename std::iterator_traits::value_type value_type; int min_seq; RandomAccessIterator3 target_end; // Stable anyway. difference_type overhang = prepare_unguarded(seqs_begin, seqs_end, comp, min_seq, true); difference_type total_length = 0; for (RandomAccessIteratorIterator s = seqs_begin; s != seqs_end; ++s) total_length += LENGTH(*s); if (overhang != -1) { difference_type unguarded_length = std::min(length, total_length - overhang); target_end = multiway_merge_3_variant (seqs_begin, seqs_end, target, comp, unguarded_length, stable); overhang = length - unguarded_length; } else { // Empty sequence found. overhang = length; target_end = target; } #if _GLIBCXX_ASSERTIONS _GLIBCXX_PARALLEL_ASSERT(target_end == target + length - overhang); _GLIBCXX_PARALLEL_ASSERT(is_sorted(target, target_end, comp)); #endif switch (min_seq) { case 0: // Iterators will be advanced accordingly. target_end = merge_advance(seqs_begin[1].first, seqs_begin[1].second, seqs_begin[2].first, seqs_begin[2].second, target_end, overhang, comp); break; case 1: target_end = merge_advance(seqs_begin[0].first, seqs_begin[0].second, seqs_begin[2].first, seqs_begin[2].second, target_end, overhang, comp); break; case 2: target_end = merge_advance(seqs_begin[0].first, seqs_begin[0].second, seqs_begin[1].first, seqs_begin[1].second, target_end, overhang, comp); break; default: _GLIBCXX_PARALLEL_ASSERT(false); } #if _GLIBCXX_ASSERTIONS _GLIBCXX_PARALLEL_ASSERT(target_end == target + length); _GLIBCXX_PARALLEL_ASSERT(is_sorted(target, target_end, comp)); #endif return target_end; } /** @brief Highly efficient 4-way merging procedure. * @param seqs_begin Begin iterator of iterator pair input sequence. * @param seqs_end End iterator of iterator pair input sequence. * @param target Begin iterator out output sequence. * @param comp Comparator. * @param length Maximum length to merge. * @param stable Unused, stable anyway. * @return End iterator of output sequence. */ template class iterator, typename RandomAccessIteratorIterator, typename RandomAccessIterator3, typename _DifferenceTp, typename Comparator> RandomAccessIterator3 multiway_merge_4_variant(RandomAccessIteratorIterator seqs_begin, RandomAccessIteratorIterator seqs_end, RandomAccessIterator3 target, Comparator comp, _DifferenceTp length, bool stable) { _GLIBCXX_CALL(length); typedef _DifferenceTp difference_type; typedef typename std::iterator_traits::value_type::first_type RandomAccessIterator1; typedef typename std::iterator_traits::value_type value_type; iterator seq0(seqs_begin[0].first, seqs_begin[0].second, comp), seq1(seqs_begin[1].first, seqs_begin[1].second, comp), seq2(seqs_begin[2].first, seqs_begin[2].second, comp), seq3(seqs_begin[3].first, seqs_begin[3].second, comp); #define Decision(a,b,c,d) { \ if (seq ## d < seq ## a) goto s ## d ## a ## b ## c; \ if (seq ## d < seq ## b) goto s ## a ## d ## b ## c; \ if (seq ## d < seq ## c) goto s ## a ## b ## d ## c; \ goto s ## a ## b ## c ## d; } if (seq0 <= seq1) { if (seq1 <= seq2) Decision(0,1,2,3) else if (seq2 < seq0) Decision(2,0,1,3) else Decision(0,2,1,3) } else { if (seq1 <= seq2) { if (seq0 <= seq2) Decision(1,0,2,3) else Decision(1,2,0,3) } else Decision(2,1,0,3) } #define Merge4Case(a,b,c,d,c0,c1,c2) \ s ## a ## b ## c ## d: \ if (length == 0) goto finish; \ *target = *seq ## a; \ ++target; \ length--; \ ++seq ## a; \ if (seq ## a c0 seq ## b) goto s ## a ## b ## c ## d; \ if (seq ## a c1 seq ## c) goto s ## b ## a ## c ## d; \ if (seq ## a c2 seq ## d) goto s ## b ## c ## a ## d; \ goto s ## b ## c ## d ## a; Merge4Case(0, 1, 2, 3, <=, <=, <=); Merge4Case(0, 1, 3, 2, <=, <=, <=); Merge4Case(0, 2, 1, 3, <=, <=, <=); Merge4Case(0, 2, 3, 1, <=, <=, <=); Merge4Case(0, 3, 1, 2, <=, <=, <=); Merge4Case(0, 3, 2, 1, <=, <=, <=); Merge4Case(1, 0, 2, 3, < , <=, <=); Merge4Case(1, 0, 3, 2, < , <=, <=); Merge4Case(1, 2, 0, 3, <=, < , <=); Merge4Case(1, 2, 3, 0, <=, <=, < ); Merge4Case(1, 3, 0, 2, <=, < , <=); Merge4Case(1, 3, 2, 0, <=, <=, < ); Merge4Case(2, 0, 1, 3, < , < , <=); Merge4Case(2, 0, 3, 1, < , <=, < ); Merge4Case(2, 1, 0, 3, < , < , <=); Merge4Case(2, 1, 3, 0, < , <=, < ); Merge4Case(2, 3, 0, 1, <=, < , < ); Merge4Case(2, 3, 1, 0, <=, < , < ); Merge4Case(3, 0, 1, 2, < , < , < ); Merge4Case(3, 0, 2, 1, < , < , < ); Merge4Case(3, 1, 0, 2, < , < , < ); Merge4Case(3, 1, 2, 0, < , < , < ); Merge4Case(3, 2, 0, 1, < , < , < ); Merge4Case(3, 2, 1, 0, < , < , < ); #undef Merge4Case #undef Decision finish: ; seqs_begin[0].first = seq0; seqs_begin[1].first = seq1; seqs_begin[2].first = seq2; seqs_begin[3].first = seq3; return target; } template RandomAccessIterator3 multiway_merge_4_combined(RandomAccessIteratorIterator seqs_begin, RandomAccessIteratorIterator seqs_end, RandomAccessIterator3 target, Comparator comp, _DifferenceTp length, bool stable) { _GLIBCXX_CALL(length); typedef _DifferenceTp difference_type; typedef typename std::iterator_traits::value_type::first_type RandomAccessIterator1; typedef typename std::iterator_traits::value_type value_type; int min_seq; RandomAccessIterator3 target_end; // Stable anyway. difference_type overhang = prepare_unguarded(seqs_begin, seqs_end, comp, min_seq, true); difference_type total_length = 0; for (RandomAccessIteratorIterator s = seqs_begin; s != seqs_end; ++s) total_length += LENGTH(*s); if (overhang != -1) { difference_type unguarded_length = std::min(length, total_length - overhang); target_end = multiway_merge_4_variant (seqs_begin, seqs_end, target, comp, unguarded_length, stable); overhang = length - unguarded_length; } else { // Empty sequence found. overhang = length; target_end = target; } #if _GLIBCXX_ASSERTIONS _GLIBCXX_PARALLEL_ASSERT(target_end == target + length - overhang); _GLIBCXX_PARALLEL_ASSERT(is_sorted(target, target_end, comp)); #endif std::vector > one_missing(seqs_begin, seqs_end); one_missing.erase(one_missing.begin() + min_seq); //remove target_end = multiway_merge_3_variant(one_missing.begin(), one_missing.end(), target_end, comp, overhang, stable); // Insert back again. one_missing.insert(one_missing.begin() + min_seq, seqs_begin[min_seq]); // Write back modified iterators. copy(one_missing.begin(), one_missing.end(), seqs_begin); #if _GLIBCXX_ASSERTIONS _GLIBCXX_PARALLEL_ASSERT(target_end == target + length); _GLIBCXX_PARALLEL_ASSERT(is_sorted(target, target_end, comp)); #endif return target_end; } /** @brief Basic multi-way merging procedure. * * The head elements are kept in a sorted array, new heads are * inserted linearly. * @param seqs_begin Begin iterator of iterator pair input sequence. * @param seqs_end End iterator of iterator pair input sequence. * @param target Begin iterator out output sequence. * @param comp Comparator. * @param length Maximum length to merge. * @param stable Stable merging incurs a performance penalty. * @return End iterator of output sequence. */ template RandomAccessIterator3 multiway_merge_bubble(RandomAccessIteratorIterator seqs_begin, RandomAccessIteratorIterator seqs_end, RandomAccessIterator3 target, Comparator comp, _DifferenceTp length, bool stable) { _GLIBCXX_CALL(length) typedef _DifferenceTp difference_type; typedef typename std::iterator_traits::value_type::first_type RandomAccessIterator1; typedef typename std::iterator_traits::value_type value_type; // Num remaining pieces. int k = static_cast(seqs_end - seqs_begin), nrp; value_type* pl = new value_type[k]; int* source = new int[k]; difference_type total_length = 0; #define POS(i) seqs_begin[(i)].first #define STOPS(i) seqs_begin[(i)].second // Write entries into queue. nrp = 0; for (int pi = 0; pi < k; pi++) { if (STOPS(pi) != POS(pi)) { pl[nrp] = *(POS(pi)); source[nrp] = pi; nrp++; total_length += LENGTH(seqs_begin[pi]); } } if (stable) { for (int k = 0; k < nrp - 1; k++) for (int pi = nrp - 1; pi > k; pi--) if (comp(pl[pi], pl[pi - 1]) || (!comp(pl[pi - 1], pl[pi]) && source[pi] < source[pi - 1])) { std::swap(pl[pi - 1], pl[pi]); std::swap(source[pi - 1], source[pi]); } } else { for (int k = 0; k < nrp - 1; k++) for (int pi = nrp - 1; pi > k; pi--) if (comp(pl[pi], pl[pi-1])) { std::swap(pl[pi-1], pl[pi]); std::swap(source[pi-1], source[pi]); } } // Iterate. if (stable) { int j; while (nrp > 0 && length > 0) { if (source[0] < source[1]) { // pl[0] <= pl[1] while ((nrp == 1 || !(comp(pl[1], pl[0]))) && length > 0) { *target = pl[0]; ++target; ++POS(source[0]); length--; if (POS(source[0]) == STOPS(source[0])) { // Move everything to the left. for (int s = 0; s < nrp - 1; s++) { pl[s] = pl[s + 1]; source[s] = source[s + 1]; } nrp--; break; } else pl[0] = *(POS(source[0])); } } else { // pl[0] < pl[1] while ((nrp == 1 || comp(pl[0], pl[1])) && length > 0) { *target = pl[0]; ++target; ++POS(source[0]); length--; if (POS(source[0]) == STOPS(source[0])) { for (int s = 0; s < nrp - 1; s++) { pl[s] = pl[s + 1]; source[s] = source[s + 1]; } nrp--; break; } else pl[0] = *(POS(source[0])); } } // Sink down. j = 1; while ((j < nrp) && (comp(pl[j], pl[j - 1]) || (!comp(pl[j - 1], pl[j]) && (source[j] < source[j - 1])))) { std::swap(pl[j - 1], pl[j]); std::swap(source[j - 1], source[j]); j++; } } } else { int j; while (nrp > 0 && length > 0) { // pl[0] <= pl[1] while (nrp == 1 || (!comp(pl[1], pl[0])) && length > 0) { *target = pl[0]; ++target; ++POS(source[0]); length--; if (POS(source[0]) == STOPS(source[0])) { for (int s = 0; s < (nrp - 1); s++) { pl[s] = pl[s + 1]; source[s] = source[s + 1]; } nrp--; break; } else pl[0] = *(POS(source[0])); } // Sink down. j = 1; while ((j < nrp) && comp(pl[j], pl[j - 1])) { std::swap(pl[j - 1], pl[j]); std::swap(source[j - 1], source[j]); j++; } } } delete[] pl; delete[] source; return target; } /** @brief Multi-way merging procedure for a high branching factor, * guarded case. * * The head elements are kept in a loser tree. * @param seqs_begin Begin iterator of iterator pair input sequence. * @param seqs_end End iterator of iterator pair input sequence. * @param target Begin iterator out output sequence. * @param comp Comparator. * @param length Maximum length to merge. * @param stable Stable merging incurs a performance penalty. * @return End iterator of output sequence. */ template RandomAccessIterator3 multiway_merge_loser_tree(RandomAccessIteratorIterator seqs_begin, RandomAccessIteratorIterator seqs_end, RandomAccessIterator3 target, Comparator comp, _DifferenceTp length, bool stable) { _GLIBCXX_CALL(length) typedef _DifferenceTp difference_type; typedef typename std::iterator_traits::value_type::first_type RandomAccessIterator1; typedef typename std::iterator_traits::value_type value_type; int k = static_cast(seqs_end - seqs_begin); LT lt(k, comp); difference_type total_length = 0; for (int t = 0; t < k; t++) { if (stable) { if (seqs_begin[t].first == seqs_begin[t].second) lt.insert_start_stable(value_type(), t, true); else lt.insert_start_stable(*seqs_begin[t].first, t, false); } else { if (seqs_begin[t].first == seqs_begin[t].second) lt.insert_start(value_type(), t, true); else lt.insert_start(*seqs_begin[t].first, t, false); } total_length += LENGTH(seqs_begin[t]); } if (stable) lt.init_stable(); else lt.init(); total_length = std::min(total_length, length); int source; if (stable) { for (difference_type i = 0; i < total_length; i++) { // Take out. source = lt.get_min_source(); *(target++) = *(seqs_begin[source].first++); // Feed. if (seqs_begin[source].first == seqs_begin[source].second) lt.delete_min_insert_stable(value_type(), true); else // Replace from same source. lt.delete_min_insert_stable(*seqs_begin[source].first, false); } } else { for (difference_type i = 0; i < total_length; i++) { //take out source = lt.get_min_source(); *(target++) = *(seqs_begin[source].first++); // Feed. if (seqs_begin[source].first == seqs_begin[source].second) lt.delete_min_insert(value_type(), true); else // Replace from same source. lt.delete_min_insert(*seqs_begin[source].first, false); } } return target; } /** @brief Multi-way merging procedure for a high branching factor, * unguarded case. * * The head elements are kept in a loser tree. * @param seqs_begin Begin iterator of iterator pair input sequence. * @param seqs_end End iterator of iterator pair input sequence. * @param target Begin iterator out output sequence. * @param comp Comparator. * @param length Maximum length to merge. * @param stable Stable merging incurs a performance penalty. * @return End iterator of output sequence. * @pre No input will run out of elements during the merge. */ template RandomAccessIterator3 multiway_merge_loser_tree_unguarded(RandomAccessIteratorIterator seqs_begin, RandomAccessIteratorIterator seqs_end, RandomAccessIterator3 target, Comparator comp, _DifferenceTp length, bool stable) { _GLIBCXX_CALL(length) typedef _DifferenceTp difference_type; typedef typename std::iterator_traits::value_type::first_type RandomAccessIterator1; typedef typename std::iterator_traits::value_type value_type; int k = seqs_end - seqs_begin; LT lt(k, comp); difference_type total_length = 0; for (int t = 0; t < k; t++) { #if _GLIBCXX_ASSERTIONS _GLIBCXX_PARALLEL_ASSERT(seqs_begin[t].first != seqs_begin[t].second); #endif if (stable) lt.insert_start_stable(*seqs_begin[t].first, t, false); else lt.insert_start(*seqs_begin[t].first, t, false); total_length += LENGTH(seqs_begin[t]); } if (stable) lt.init_stable(); else lt.init(); // Do not go past end. length = std::min(total_length, length); int source; #if _GLIBCXX_ASSERTIONS difference_type i = 0; #endif if (stable) { RandomAccessIterator3 target_end = target + length; while (target < target_end) { // Take out. source = lt.get_min_source(); #if _GLIBCXX_ASSERTIONS _GLIBCXX_PARALLEL_ASSERT(i == 0 || !comp(*(seqs_begin[source].first), *(target - 1))); #endif *(target++) = *(seqs_begin[source].first++); #if _GLIBCXX_ASSERTIONS _GLIBCXX_PARALLEL_ASSERT((seqs_begin[source].first != seqs_begin[source].second) || (i == length - 1)); i++; #endif // Feed. // Replace from same source. lt.delete_min_insert_stable(*seqs_begin[source].first, false); } } else { RandomAccessIterator3 target_end = target + length; while (target < target_end) { // Take out. source = lt.get_min_source(); #if _GLIBCXX_ASSERTIONS if (i > 0 && comp(*(seqs_begin[source].first), *(target - 1))) printf(" %i %i %i\n", length, i, source); _GLIBCXX_PARALLEL_ASSERT(i == 0 || !comp(*(seqs_begin[source].first), *(target - 1))); #endif *(target++) = *(seqs_begin[source].first++); #if _GLIBCXX_ASSERTIONS if (!((seqs_begin[source].first != seqs_begin[source].second) || (i >= length - 1))) printf(" %i %i %i\n", length, i, source); _GLIBCXX_PARALLEL_ASSERT((seqs_begin[source].first != seqs_begin[source].second) || (i >= length - 1)); i++; #endif // Feed. // Replace from same source. lt.delete_min_insert(*seqs_begin[source].first, false); } } return target; } template struct loser_tree_traits { typedef LoserTree/*Pointer*/<_ValueTp, Comparator> LT; }; /*#define NO_POINTER(T) \ template \ struct loser_tree_traits \ { \ typedef LoserTreePointer LT; \ };*/ // // NO_POINTER(unsigned char) // NO_POINTER(char) // NO_POINTER(unsigned short) // NO_POINTER(short) // NO_POINTER(unsigned int) // NO_POINTER(int) // NO_POINTER(unsigned long) // NO_POINTER(long) // NO_POINTER(unsigned long long) // NO_POINTER(long long) // // #undef NO_POINTER template struct loser_tree_traits_unguarded { typedef LoserTreeUnguarded<_ValueTp, Comparator> LT; }; /*#define NO_POINTER_UNGUARDED(T) \ template \ struct loser_tree_traits_unguarded \ { \ typedef LoserTreePointerUnguarded LT; \ };*/ // // NO_POINTER_UNGUARDED(unsigned char) // NO_POINTER_UNGUARDED(char) // NO_POINTER_UNGUARDED(unsigned short) // NO_POINTER_UNGUARDED(short) // NO_POINTER_UNGUARDED(unsigned int) // NO_POINTER_UNGUARDED(int) // NO_POINTER_UNGUARDED(unsigned long) // NO_POINTER_UNGUARDED(long) // NO_POINTER_UNGUARDED(unsigned long long) // NO_POINTER_UNGUARDED(long long) // // #undef NO_POINTER_UNGUARDED template RandomAccessIterator3 multiway_merge_loser_tree_combined(RandomAccessIteratorIterator seqs_begin, RandomAccessIteratorIterator seqs_end, RandomAccessIterator3 target, Comparator comp, _DifferenceTp length, bool stable) { _GLIBCXX_CALL(length) typedef _DifferenceTp difference_type; typedef typename std::iterator_traits::value_type::first_type RandomAccessIterator1; typedef typename std::iterator_traits::value_type value_type; int min_seq; RandomAccessIterator3 target_end; difference_type overhang = prepare_unguarded(seqs_begin, seqs_end, comp, min_seq, stable); difference_type total_length = 0; for (RandomAccessIteratorIterator s = seqs_begin; s != seqs_end; s++) total_length += LENGTH(*s); if (overhang != -1) { difference_type unguarded_length = std::min(length, total_length - overhang); target_end = multiway_merge_loser_tree_unguarded ::LT> (seqs_begin, seqs_end, target, comp, unguarded_length, stable); overhang = length - unguarded_length; } else { // Empty sequence found. overhang = length; target_end = target; } #if _GLIBCXX_ASSERTIONS _GLIBCXX_PARALLEL_ASSERT(target_end == target + length - overhang); _GLIBCXX_PARALLEL_ASSERT(is_sorted(target, target_end, comp)); #endif target_end = multiway_merge_loser_tree ::LT> (seqs_begin, seqs_end, target_end, comp, overhang, stable); #if _GLIBCXX_ASSERTIONS _GLIBCXX_PARALLEL_ASSERT(target_end == target + length); _GLIBCXX_PARALLEL_ASSERT(is_sorted(target, target_end, comp)); #endif return target_end; } template RandomAccessIterator3 multiway_merge_loser_tree_sentinel(RandomAccessIteratorIterator seqs_begin, RandomAccessIteratorIterator seqs_end, RandomAccessIterator3 target, Comparator comp, _DifferenceTp length, bool stable) { _GLIBCXX_CALL(length) typedef _DifferenceTp difference_type; typedef std::iterator_traits traits_type; typedef typename std::iterator_traits::value_type::first_type RandomAccessIterator1; typedef typename std::iterator_traits::value_type value_type; typedef typename std::iterator_traits::value_type::first_type RandomAccessIterator1; RandomAccessIterator3 target_end; difference_type overhang = prepare_unguarded_sentinel(seqs_begin, seqs_end, comp); difference_type total_length = 0; for (RandomAccessIteratorIterator s = seqs_begin; s != seqs_end; s++) { total_length += LENGTH(*s); // Sentinel spot. (*s).second++; } difference_type unguarded_length = std::min(length, total_length - overhang); target_end = multiway_merge_loser_tree_unguarded ::LT> (seqs_begin, seqs_end, target, comp, unguarded_length, stable); overhang = length - unguarded_length; #if _GLIBCXX_ASSERTIONS _GLIBCXX_PARALLEL_ASSERT(target_end == target + length - overhang); _GLIBCXX_PARALLEL_ASSERT(is_sorted(target, target_end, comp)); #endif // Copy rest stable. for (RandomAccessIteratorIterator s = seqs_begin; s != seqs_end && overhang > 0; s++) { // Restore. (*s).second--; difference_type local_length = std::min((difference_type)overhang, (difference_type)LENGTH(*s)); target_end = std::copy((*s).first, (*s).first + local_length, target_end); (*s).first += local_length; overhang -= local_length; } #if _GLIBCXX_ASSERTIONS _GLIBCXX_PARALLEL_ASSERT(overhang == 0); _GLIBCXX_PARALLEL_ASSERT(target_end == target + length); _GLIBCXX_PARALLEL_ASSERT(is_sorted(target, target_end, comp)); #endif return target_end; } /** @brief Sequential multi-way merging switch. * * The decision if based on the branching factor and runtime settings. * @param seqs_begin Begin iterator of iterator pair input sequence. * @param seqs_end End iterator of iterator pair input sequence. * @param target Begin iterator out output sequence. * @param comp Comparator. * @param length Maximum length to merge. * @param stable Stable merging incurs a performance penalty. * @param sentinel The sequences have a sentinel element. * @return End iterator of output sequence. */ template RandomAccessIterator3 multiway_merge(RandomAccessIteratorIterator seqs_begin, RandomAccessIteratorIterator seqs_end, RandomAccessIterator3 target, Comparator comp, _DifferenceTp length, bool stable, bool sentinel, sequential_tag) { _GLIBCXX_CALL(length) typedef _DifferenceTp difference_type; typedef typename std::iterator_traits::value_type::first_type RandomAccessIterator1; typedef typename std::iterator_traits::value_type value_type; #if _GLIBCXX_ASSERTIONS for (RandomAccessIteratorIterator s = seqs_begin; s != seqs_end; s++) _GLIBCXX_PARALLEL_ASSERT(is_sorted((*s).first, (*s).second, comp)); #endif RandomAccessIterator3 return_target = target; int k = static_cast(seqs_end - seqs_begin); Settings::MultiwayMergeAlgorithm mwma = Settings::multiway_merge_algorithm; if (!sentinel && mwma == Settings::LOSER_TREE_SENTINEL) mwma = Settings::LOSER_TREE_COMBINED; switch (k) { case 0: break; case 1: return_target = std::copy(seqs_begin[0].first, seqs_begin[0].first + length, target); seqs_begin[0].first += length; break; case 2: return_target = merge_advance(seqs_begin[0].first, seqs_begin[0].second, seqs_begin[1].first, seqs_begin[1].second, target, length, comp); break; case 3: switch (mwma) { case Settings::LOSER_TREE_COMBINED: return_target = multiway_merge_3_combined(seqs_begin, seqs_end, target, comp, length, stable); break; case Settings::LOSER_TREE_SENTINEL: return_target = multiway_merge_3_variant(seqs_begin, seqs_end, target, comp, length, stable); break; default: return_target = multiway_merge_3_variant(seqs_begin, seqs_end, target, comp, length, stable); break; } break; case 4: switch (mwma) { case Settings::LOSER_TREE_COMBINED: return_target = multiway_merge_4_combined(seqs_begin, seqs_end, target, comp, length, stable); break; case Settings::LOSER_TREE_SENTINEL: return_target = multiway_merge_4_variant(seqs_begin, seqs_end, target, comp, length, stable); break; default: return_target = multiway_merge_4_variant(seqs_begin, seqs_end, target, comp, length, stable); break; } break; default: { switch (mwma) { case Settings::BUBBLE: return_target = multiway_merge_bubble(seqs_begin, seqs_end, target, comp, length, stable); break; #if _GLIBCXX_LOSER_TREE_EXPLICIT case Settings::LOSER_TREE_EXPLICIT: return_target = multiway_merge_loser_tree >(seqs_begin, seqs_end, target, comp, length, stable); break; #endif #if _GLIBCXX_LOSER_TREE case Settings::LOSER_TREE: return_target = multiway_merge_loser_tree >(seqs_begin, seqs_end, target, comp, length, stable); break; #endif #if _GLIBCXX_LOSER_TREE_COMBINED case Settings::LOSER_TREE_COMBINED: return_target = multiway_merge_loser_tree_combined(seqs_begin, seqs_end, target, comp, length, stable); break; #endif #if _GLIBCXX_LOSER_TREE_SENTINEL case Settings::LOSER_TREE_SENTINEL: return_target = multiway_merge_loser_tree_sentinel(seqs_begin, seqs_end, target, comp, length, stable); break; #endif default: // multiway_merge algorithm not implemented. _GLIBCXX_PARALLEL_ASSERT(0); break; } } } #if _GLIBCXX_ASSERTIONS _GLIBCXX_PARALLEL_ASSERT(is_sorted(target, target + length, comp)); #endif return return_target; } /** @brief Parallel multi-way merge routine. * * The decision if based on the branching factor and runtime settings. * @param seqs_begin Begin iterator of iterator pair input sequence. * @param seqs_end End iterator of iterator pair input sequence. * @param target Begin iterator out output sequence. * @param comp Comparator. * @param length Maximum length to merge. * @param stable Stable merging incurs a performance penalty. * @param sentinel Ignored. * @return End iterator of output sequence. */ template RandomAccessIterator3 parallel_multiway_merge(RandomAccessIteratorIterator seqs_begin, RandomAccessIteratorIterator seqs_end, RandomAccessIterator3 target, Comparator comp, _DifferenceTp length, bool stable, bool sentinel) { _GLIBCXX_CALL(length) typedef _DifferenceTp difference_type; typedef typename std::iterator_traits::value_type::first_type RandomAccessIterator1; typedef typename std::iterator_traits::value_type value_type; #if _GLIBCXX_ASSERTIONS for (RandomAccessIteratorIterator rii = seqs_begin; rii != seqs_end; rii++) _GLIBCXX_PARALLEL_ASSERT(is_sorted((*rii).first, (*rii).second, comp)); #endif // k sequences. int k = static_cast(seqs_end - seqs_begin); difference_type total_length = 0; for (RandomAccessIteratorIterator raii = seqs_begin; raii != seqs_end; raii++) total_length += LENGTH(*raii); _GLIBCXX_CALL(total_length) if (total_length == 0 || k == 0) return target; thread_index_t num_threads = static_cast(std::min(static_cast(get_max_threads()), total_length)); Timing* t = new Timing[num_threads]; for (int pr = 0; pr < num_threads; pr++) t[pr].tic(); bool tight = (total_length == length); // Thread t will have to merge pieces[iam][0..k - 1] std::vector >* pieces = new std::vector >[num_threads]; for (int s = 0; s < num_threads; s++) pieces[s].resize(k); difference_type num_samples = Settings::merge_oversampling * num_threads; if (Settings::multiway_merge_splitting == Settings::SAMPLING) { value_type* samples = new value_type[k * num_samples]; // Sample. for (int s = 0; s < k; s++) for (int i = 0; (difference_type)i < num_samples; i++) { difference_type sample_index = static_cast(LENGTH(seqs_begin[s]) * (double(i + 1) / (num_samples + 1)) * (double(length) / total_length)); samples[s * num_samples + i] = seqs_begin[s].first[sample_index]; } if (stable) __gnu_sequential::stable_sort(samples, samples + (num_samples * k), comp); else __gnu_sequential::sort(samples, samples + (num_samples * k), comp); for (int slab = 0; slab < num_threads; slab++) // For each slab / processor. for (int seq = 0; seq < k; seq++) { // For each sequence. if (slab > 0) pieces[slab][seq].first = std::upper_bound(seqs_begin[seq].first, seqs_begin[seq].second, samples[num_samples * k * slab / num_threads], comp) - seqs_begin[seq].first; else { // Absolute beginning. pieces[slab][seq].first = 0; } if ((slab + 1) < num_threads) pieces[slab][seq].second = std::upper_bound(seqs_begin[seq].first, seqs_begin[seq].second, samples[num_samples * k * (slab + 1) / num_threads], comp) - seqs_begin[seq].first; else pieces[slab][seq].second = LENGTH(seqs_begin[seq]); //absolute ending } delete[] samples; } else { // (Settings::multiway_merge_splitting == Settings::EXACT). std::vector* offsets = new std::vector[num_threads]; std::vector > se(k); copy(seqs_begin, seqs_end, se.begin()); difference_type* borders = static_cast(__builtin_alloca(sizeof(difference_type) * (num_threads + 1))); equally_split(length, num_threads, borders); for (int s = 0; s < (num_threads - 1); s++) { offsets[s].resize(k); multiseq_partition(se.begin(), se.end(), borders[s + 1], offsets[s].begin(), comp); // Last one also needed and available. if (!tight) { offsets[num_threads - 1].resize(k); multiseq_partition(se.begin(), se.end(), difference_type(length), offsets[num_threads - 1].begin(), comp); } } for (int slab = 0; slab < num_threads; slab++) { // For each slab / processor. for (int seq = 0; seq < k; seq++) { // For each sequence. if (slab == 0) { // Absolute beginning. pieces[slab][seq].first = 0; } else pieces[slab][seq].first = pieces[slab - 1][seq].second; if (!tight || slab < (num_threads - 1)) pieces[slab][seq].second = offsets[slab][seq] - seqs_begin[seq].first; else { // slab == num_threads - 1 pieces[slab][seq].second = LENGTH(seqs_begin[seq]); } } } delete[] offsets; } for (int pr = 0; pr < num_threads; pr++) t[pr].tic(); # pragma omp parallel num_threads(num_threads) { thread_index_t iam = omp_get_thread_num(); t[iam].tic(); difference_type target_position = 0; for (int c = 0; c < k; c++) target_position += pieces[iam][c].first; if (k > 2) { std::pair* chunks = new std::pair[k]; difference_type local_length = 0; for (int s = 0; s < k; s++) { chunks[s] = std::make_pair(seqs_begin[s].first + pieces[iam][s].first, seqs_begin[s].first + pieces[iam][s].second); local_length += LENGTH(chunks[s]); } multiway_merge(chunks, chunks + k, target + target_position, comp, std::min(local_length, length - target_position), stable, false, sequential_tag()); delete[] chunks; } else if (k == 2) { RandomAccessIterator1 begin0 = seqs_begin[0].first + pieces[iam][0].first, begin1 = seqs_begin[1].first + pieces[iam][1].first; merge_advance(begin0, seqs_begin[0].first + pieces[iam][0].second, begin1, seqs_begin[1].first + pieces[iam][1].second, target + target_position, (pieces[iam][0].second - pieces[iam][0].first) + (pieces[iam][1].second - pieces[iam][1].first), comp); } t[iam].tic(); } for (int pr = 0; pr < num_threads; pr++) t[pr].tic(); #if _GLIBCXX_ASSERTIONS _GLIBCXX_PARALLEL_ASSERT(is_sorted(target, target + length, comp)); #endif // Update ends of sequences. for (int s = 0; s < k; s++) seqs_begin[s].first += pieces[num_threads - 1][s].second; delete[] pieces; for (int pr = 0; pr < num_threads; pr++) t[pr].tic(); for (int pr = 0; pr < num_threads; pr++) t[pr].print(); delete[] t; return target + length; } /** * @brief Multi-way merging front-end. * @param seqs_begin Begin iterator of iterator pair input sequence. * @param seqs_end End iterator of iterator pair input sequence. * @param target Begin iterator out output sequence. * @param comp Comparator. * @param length Maximum length to merge. * @param stable Stable merging incurs a performance penalty. * @return End iterator of output sequence. */ template RandomAccessIterator3 multiway_merge(RandomAccessIteratorPairIterator seqs_begin, RandomAccessIteratorPairIterator seqs_end, RandomAccessIterator3 target, Comparator comp, _DifferenceTp length, bool stable) { typedef _DifferenceTp difference_type; _GLIBCXX_CALL(seqs_end - seqs_begin) if (seqs_begin == seqs_end) return target; RandomAccessIterator3 target_end; if (_GLIBCXX_PARALLEL_CONDITION(((seqs_end - seqs_begin) >= Settings::multiway_merge_minimal_k) && ((sequence_index_t)length >= Settings::multiway_merge_minimal_n))) target_end = parallel_multiway_merge(seqs_begin, seqs_end, target, comp, (difference_type)length, stable, false); else target_end = multiway_merge(seqs_begin, seqs_end, target, comp, length, stable, false, sequential_tag()); return target_end; } /** @brief Multi-way merging front-end. * @param seqs_begin Begin iterator of iterator pair input sequence. * @param seqs_end End iterator of iterator pair input sequence. * @param target Begin iterator out output sequence. * @param comp Comparator. * @param length Maximum length to merge. * @param stable Stable merging incurs a performance penalty. * @return End iterator of output sequence. * @pre For each @c i, @c seqs_begin[i].second must be the end * marker of the sequence, but also reference the one more sentinel * element. */ template RandomAccessIterator3 multiway_merge_sentinel(RandomAccessIteratorPairIterator seqs_begin, RandomAccessIteratorPairIterator seqs_end, RandomAccessIterator3 target, Comparator comp, _DifferenceTp length, bool stable) { typedef _DifferenceTp difference_type; if (seqs_begin == seqs_end) return target; _GLIBCXX_CALL(seqs_end - seqs_begin) if (_GLIBCXX_PARALLEL_CONDITION(((seqs_end - seqs_begin) >= Settings::multiway_merge_minimal_k) && ((sequence_index_t)length >= Settings::multiway_merge_minimal_n))) return parallel_multiway_merge(seqs_begin, seqs_end, target, comp, (typename std::iterator_traits::difference_type)length, stable, true); else return multiway_merge(seqs_begin, seqs_end, target, comp, length, stable, true, sequential_tag()); } } #endif