multiway_merge.h: Removed Timing<inactive_tag>

2007-10-25  Johannes Singler  <singler@ira.uka.de>

      * include/parallel/multiway_merge.h: Removed Timing<inactive_tag>
      * include/parallel/random_shuffle.h: Same
      * include/parallel/set_operations.h: Same
      * include/parallel/tree.h: Same
      * include/parallel/multiway_mergesort.h: Same
      * include/parallel/timing.h: Removed completely

From-SVN: r129629
This commit is contained in:
Johannes Singler 2007-10-25 17:07:56 +00:00 committed by Johannes Singler
parent 8d358a4aae
commit 740936e08e
7 changed files with 9 additions and 355 deletions

View File

@ -1,3 +1,12 @@
2007-10-25 Johannes Singler <singler@ira.uka.de>
* include/parallel/multiway_merge.h: Removed Timing<inactive_tag>
* include/parallel/random_shuffle.h: Same
* include/parallel/set_operations.h: Same
* include/parallel/tree.h: Same
* include/parallel/multiway_mergesort.h: Same
* include/parallel/timing.h: Removed completely
2007-10-25 Paolo Carlini <pcarlini@suse.de>
* include/bits/stl_algo.h (__lg<>(_Size)): Slightly tweak.

View File

@ -52,7 +52,6 @@
#include <parallel/parallel.h>
#include <parallel/merge.h>
#include <parallel/losertree.h>
#include <parallel/timing.h>
#if _GLIBCXX_ASSERTIONS
#include <parallel/checkers.h>
#endif
@ -1354,11 +1353,6 @@ namespace __gnu_parallel
thread_index_t num_threads = static_cast<thread_index_t>(std::min(static_cast<difference_type>(get_max_threads()), total_length));
Timing<sequential_tag>* t = new Timing<sequential_tag>[num_threads];
for (int pr = 0; pr < num_threads; pr++)
t[pr].tic();
bool tight = (total_length == length);
// Thread t will have to merge pieces[iam][0..k - 1]
@ -1456,15 +1450,10 @@ namespace __gnu_parallel
delete[] offsets;
}
for (int pr = 0; pr < num_threads; pr++)
t[pr].tic();
# pragma omp parallel num_threads(num_threads)
{
thread_index_t iam = omp_get_thread_num();
t[iam].tic();
difference_type target_position = 0;
for (int c = 0; c < k; c++)
@ -1498,14 +1487,8 @@ namespace __gnu_parallel
(pieces[iam][0].second - pieces[iam][0].first) + (pieces[iam][1].second - pieces[iam][1].first),
comp);
}
t[iam].tic();
}
for (int pr = 0; pr < num_threads; pr++)
t[pr].tic();
#if _GLIBCXX_ASSERTIONS
_GLIBCXX_PARALLEL_ASSERT(is_sorted(target, target + length, comp));
#endif
@ -1516,12 +1499,6 @@ namespace __gnu_parallel
delete[] pieces;
for (int pr = 0; pr < num_threads; pr++)
t[pr].tic();
for (int pr = 0; pr < num_threads; pr++)
t[pr].print();
delete[] t;
return target + length;
}

View File

@ -44,7 +44,6 @@
#include <bits/stl_algo.h>
#include <parallel/parallel.h>
#include <parallel/multiway_merge.h>
#include <parallel/timing.h>
namespace __gnu_parallel
{
@ -160,9 +159,6 @@ namespace __gnu_parallel
typedef typename traits_type::value_type value_type;
typedef typename traits_type::difference_type difference_type;
Timing<sequential_tag> t;
t.tic();
PMWMSSortingData<RandomAccessIterator>* sd = d->sd;
thread_index_t iam = d->iam;
@ -196,7 +192,6 @@ namespace __gnu_parallel
// Invariant: locally sorted subsequence in sd->sorting_places[iam],
// sd->sorting_places[iam] + length_local.
t.tic("local sort");
if (Settings::sort_splitting == Settings::SAMPLING)
{
@ -205,8 +200,6 @@ namespace __gnu_parallel
#pragma omp barrier
t.tic("sample/wait");
#pragma omp single
__gnu_sequential::sort(sd->samples,
sd->samples + (num_samples * d->num_threads),
@ -241,8 +234,6 @@ namespace __gnu_parallel
{
#pragma omp barrier
t.tic("wait");
std::vector<std::pair<SortingPlacesIterator, SortingPlacesIterator> > seqs(d->num_threads);
for (int s = 0; s < d->num_threads; s++)
seqs[s] = std::make_pair(sd->sorting_places[s], sd->sorting_places[s] + sd->starts[s + 1] - sd->starts[s]);
@ -276,8 +267,6 @@ namespace __gnu_parallel
}
}
t.tic("split");
// Offset from target begin, length after merging.
difference_type offset = 0, length_am = 0;
for (int s = 0; s < d->num_threads; s++)
@ -308,8 +297,6 @@ namespace __gnu_parallel
multiway_merge(seqs.begin(), seqs.end(), sd->merging_places[iam], comp, length_am, d->stable, false, sequential_tag());
t.tic("merge");
#if _GLIBCXX_ASSERTIONS
_GLIBCXX_PARALLEL_ASSERT(is_sorted(sd->merging_places[iam], sd->merging_places[iam] + length_am, comp));
#endif
@ -323,10 +310,6 @@ namespace __gnu_parallel
#endif
delete[] sd->temporaries[iam];
t.tic("copy back");
t.print();
}
/** @brief PMWMS main call.

View File

@ -42,7 +42,6 @@
#include <bits/stl_numeric.h>
#include <parallel/parallel.h>
#include <parallel/random_number.h>
#include <parallel/timing.h>
namespace __gnu_parallel
{
@ -136,9 +135,6 @@ namespace __gnu_parallel
typedef typename traits_type::value_type value_type;
typedef typename traits_type::difference_type difference_type;
Timing<sequential_tag> t;
t.tic();
DRSSorterPU<RandomAccessIterator, RandomNumberGenerator>* d = &pus[omp_get_thread_num()];
DRandomShufflingGlobalData<RandomAccessIterator>* sd = d->sd;
thread_index_t iam = d->iam;
@ -170,12 +166,8 @@ namespace __gnu_parallel
for (bin_index b = 0; b < sd->num_bins + 1; b++)
sd->dist[b][iam + 1] = dist[b];
t.tic();
#pragma omp barrier
t.tic();
#pragma omp single
{
// Sum up bins, sd->dist[s + 1][d->num_threads] now contains the
@ -188,8 +180,6 @@ namespace __gnu_parallel
#pragma omp barrier
t.tic();
sequence_index_t offset = 0, global_offset = 0;
for (bin_index s = 0; s < d->bins_begin; s++)
global_offset += sd->dist[s + 1][d->num_threads];
@ -205,12 +195,8 @@ namespace __gnu_parallel
sd->temporaries[iam] = static_cast<value_type*>(::operator new(sizeof(value_type) * offset));
t.tic();
#pragma omp barrier
t.tic();
// Draw local copies to avoid false sharing.
for (bin_index b = 0; b < sd->num_bins + 1; b++)
dist[b] = sd->dist[b][iam];
@ -237,12 +223,8 @@ namespace __gnu_parallel
delete[] bin_proc;
delete[] temporaries;
t.tic();
#pragma omp barrier
t.tic();
// Shuffle bins internally.
for (bin_index b = d->bins_begin; b < d->bins_end; b++)
{
@ -253,10 +235,6 @@ namespace __gnu_parallel
}
delete[] sd->temporaries[iam];
t.tic();
t.print();
}
/** @brief Round up to the next greater power of 2.
@ -453,9 +431,6 @@ namespace __gnu_parallel
for (int b = 0; b < num_bins + 1; b++)
dist0[b] = 0;
Timing<sequential_tag> t;
t.tic();
random_number bitrng(rng(0xFFFFFFFF));
for (difference_type i = 0; i < n; i++)
@ -467,16 +442,12 @@ namespace __gnu_parallel
dist0[oracle + 1]++;
}
t.tic();
// Sum up bins.
__gnu_sequential::partial_sum(dist0, dist0 + num_bins + 1, dist0);
for (int b = 0; b < num_bins + 1; b++)
dist1[b] = dist0[b];
t.tic();
// Distribute according to oracles.
for (difference_type i = 0; i < n; i++)
target[(dist0[oracles[i]])++] = *(begin + i);
@ -485,9 +456,7 @@ namespace __gnu_parallel
{
sequential_random_shuffle(target + dist1[b], target + dist1[b + 1],
rng);
t.tic();
}
t.print();
delete[] dist0;
delete[] dist1;

View File

@ -381,10 +381,6 @@ namespace __gnu_parallel
#pragma omp parallel num_threads(num_threads)
{
Timing<sequential_tag> t;
t.tic();
// Result from multiseq_partition.
InputIterator offset[2];
const int iam = omp_get_thread_num();
@ -407,13 +403,9 @@ namespace __gnu_parallel
iterator_pair block_end = block_begins[ iam + 1 ] = iterator_pair(offset[ 0 ], offset[ 1 ]);
t.tic();
// Make sure all threads have their block_begin result written out.
#pragma omp barrier
t.tic();
iterator_pair block_begin = block_begins[ iam ];
// Begin working for the first block, while the others except
@ -429,12 +421,9 @@ namespace __gnu_parallel
block_begin.second, block_end.second);
}
t.tic();
// Make sure everyone wrote their lengths.
#pragma omp barrier
t.tic();
OutputIterator r = result;
if (iam == 0)
@ -458,9 +447,6 @@ namespace __gnu_parallel
op.invoke(block_begin.first, block_end.first,
block_begin.second, block_end.second, r);
}
t.tic();
t.print();
}
return return_value;
}

View File

@ -1,217 +0,0 @@
// -*- C++ -*-
// Copyright (C) 2007 Free Software Foundation, Inc.
//
// This file is part of the GNU ISO C++ Library. This library is free
// software; you can redistribute it and/or modify it under the terms
// of the GNU General Public License as published by the Free Software
// Foundation; either version 2, or (at your option) any later
// version.
// This library is distributed in the hope that it will be useful, but
// WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
// General Public License for more details.
// You should have received a copy of the GNU General Public License
// along with this library; see the file COPYING. If not, write to
// the Free Software Foundation, 59 Temple Place - Suite 330, Boston,
// MA 02111-1307, USA.
// As a special exception, you may use this file as part of a free
// software library without restriction. Specifically, if other files
// instantiate templates or use macros or inline functions from this
// file, or you compile this file and link it with other files to
// produce an executable, this file does not by itself cause the
// resulting executable to be covered by the GNU General Public
// License. This exception does not however invalidate any other
// reasons why the executable file might be covered by the GNU General
// Public License.
/** @file parallel/timing.h
* @brief Provides a simple tool to do performance debugging, also in
* parallel code.
* This file is a GNU parallel extension to the Standard C++ Library.
*/
// Written by Johannes Singler.
#ifndef _GLIBCXX_PARALLEL_TIMING_H
#define _GLIBCXX_PARALLEL_TIMING_H 1
#include <omp.h>
#include <cstdio>
#include <cstring>
#include <parallel/tags.h>
namespace __gnu_parallel
{
// XXX integrate with existing performance testing infrastructure.
/** @brief Type of of point in time, used for the Timing classes. */
typedef double point_in_time;
template<typename tag, typename must_be_int = int>
class Timing;
/** @brief A class that provides simple run time measurements, also
for parallel code.
* @param tag If parallel_tag, then the measurements are actually done.
* Otherwise, no code at all is emitted by the compiler. */
template<typename must_be_int>
class Timing<parallel_tag, must_be_int>
{
private:
static const int max_points_in_time = 100;
point_in_time points_in_time[max_points_in_time];
point_in_time active, last_start;
int pos;
char* str;
const char* tags[max_points_in_time];
public:
Timing()
{
str = NULL;
pos = 0;
active = 0.0;
last_start = -1.0;
}
~Timing()
{
delete[] str;
}
/** @brief Take a running time measurement.
* @param tag Optional description that will be output again with
* the timings.
* It should describe the operation before the tic(). To time a
* series of @c n operations, there should be @c n+1 calls to
* tic(), and one call to print(). */
inline void
tic(const char* tag = NULL)
{
points_in_time[pos] = omp_get_wtime();
tags[pos] = tag;
pos++;
}
/** @brief Start the running time measurement.
*
* Should be paired with stop(). */
inline void
start()
{
_GLIBCXX_PARALLEL_ASSERT(last_start == -1.0);
last_start = omp_get_wtime();
}
/** @brief Stop the running time measurement.
*
* Should be paired with start(). */
inline void
stop()
{
_GLIBCXX_PARALLEL_ASSERT(last_start != -1.0);
active += (omp_get_wtime() - last_start);
last_start = -1.0;
}
/** @brief Reset running time accumulation. */
inline void
reset()
{
active = 0.0;
last_start = -1.0;
}
/** @brief Accumulate the time between all pairs of start() and
stop() so far */
inline point_in_time
active_time()
{ return active; }
/** @brief Total time between first and last tic() */
inline point_in_time
total_time()
{ return (points_in_time[pos - 1] - points_in_time[0]) * 1000.0; }
private:
/** @brief Construct string to print out, presenting the timings. */
const char*
c_str()
{
// Avoid stream library here, to avoid cyclic dependencies in
// header files.
char tmp[1000];
if (!str)
str = new char[pos * 200];
else
str[0] = '\0';
sprintf(str, "t %2d T[ms]", omp_get_thread_num());
strcat(str, "\n");
for (int i = 0; i < pos; )
{
point_in_time last = points_in_time[i];
i++;
if (i == pos)
break;
if (tags[i] == NULL)
sprintf(tmp, "%2d: ", i - 1);
else
sprintf(tmp, "%20s: ", tags[i]);
strcat(str, tmp);
sprintf(tmp, "%7.2f ", (points_in_time[i] - last) * 1000.0);
strcat(str, tmp);
strcat(str, "\n");
}
return str;
}
public:
/** @brief Print the running times between the tic()s. */
void
print()
{
printf("print\n");
#pragma omp barrier
#pragma omp master
printf("\n\n");
#pragma omp critical
printf("%s\n", c_str());
}
};
/** @brief A class that provides simple run time measurements, also
for parallel code.
* @param tag If parallel_tag, then the measurements are actually done,
* otherwise, no code at all is emitted by the compiler.
*/
template<typename must_be_int>
class Timing<sequential_tag, must_be_int>
{
private:
static const char* empty_string;
public:
inline void tic(const char* /*tag*/ = NULL) { }
inline void start() { }
inline void stop() { }
inline void reset() { }
inline point_in_time active_time() { return -1.0; }
inline point_in_time total_time() { return -1.0; }
inline const char* c_str() { return empty_string; }
inline void print() { }
};
template<typename must_be_int>
const char* Timing<sequential_tag, must_be_int>::empty_string = "";
}
#endif

View File

@ -57,13 +57,6 @@
#include <parallel/list_partition.h>
//#define _GLIBCXX_TIMING
#ifdef _GLIBCXX_TIMING
#define _timing_tag parallel_tag
#else
#define _timing_tag sequential_tag
#endif
namespace std
{
// XXX Declaration should go to stl_tree.h.
@ -1217,10 +1210,6 @@ namespace __gnu_parallel
void
_M_bulk_insertion_construction(const _InputIterator __first, const _InputIterator __last, const bool is_construction, StrictlyLessOrLessEqual strictly_less_or_less_equal)
{
Timing<_timing_tag> t;
t.tic();
thread_index_t num_threads = get_max_threads();
size_type n;
size_type beg_partition[num_threads+1];
@ -1228,8 +1217,6 @@ namespace __gnu_parallel
beg_partition[0] = 0;
bool is_sorted= is_sorted_distance_accessors(__first, __last, access, beg_partition,n, num_threads, std::__iterator_category(__first));
t.tic("is_sorted");
if (not is_sorted)
{
_M_not_sorted_bulk_insertion_construction(access, beg_partition, n, num_threads, is_construction, strictly_less_or_less_equal);
@ -1260,10 +1247,6 @@ namespace __gnu_parallel
_M_sorted_bulk_insertion(access, beg_partition, n, num_threads,
strictly_less_or_less_equal);
}
t.tic("main work");
t.print();
}
/** @brief Bulk construction and insertion helper method on an
@ -1349,31 +1332,19 @@ namespace __gnu_parallel
_M_not_sorted_bulk_insertion_construction(size_type* beg_partition, ElementsToSort* v, Comparator comp, const size_type n, thread_index_t num_threads, const bool is_construction, StrictlyLessOrLessEqual strictly_less_or_less_equal)
{
// The accessors have been calculated for the non sorted.
Timing<_timing_tag> t;
t.tic();
num_threads = static_cast<thread_index_t>(std::min<size_type>(num_threads, n));
std::stable_sort(v, v+n, comp);
t.tic("sort");
IteratorSortedElements sorted_access[num_threads+1];
range_accessors(IteratorSortedElements(v), IteratorSortedElements(v+n), sorted_access, beg_partition, n, num_threads, std::__iterator_category(v));
t.tic("range_accessors");
// Partial template specialization not available.
if (is_construction)
_M_sorted_bulk_construction(sorted_access, beg_partition, n, num_threads, strictly_less_or_less_equal);
else
_M_sorted_bulk_insertion(sorted_access, beg_partition, n, num_threads, strictly_less_or_less_equal);
delete v;
t.tic("actual construction or insertion");
t.print();
}
/** @brief Construct a tree sequentially using the parallel routine
@ -1753,17 +1724,11 @@ namespace __gnu_parallel
void
_M_sorted_bulk_construction(_Iterator* access, size_type* beg_partition, const size_type n, thread_index_t num_threads, StrictlyLessOrLessEqual strictly_less_or_less_equal)
{
Timing<_timing_tag> t;
// Dealing with repetitions (EFFICIENCY ISSUE).
size_type rank_shift[num_threads+1];
t.tic();
_Rb_tree_node_ptr* r = _M_sorted_bulk_allocation_and_initialization(access, beg_partition, rank_shift, n, num_threads, strictly_less_or_less_equal);
t.tic("bulk allocation and initialization");
// Link the tree appropriately.
// Dealing with repetitions (EFFICIENCY ISSUE).
ranker_gaps rank(beg_partition, rank_shift, num_threads);
@ -1818,11 +1783,7 @@ namespace __gnu_parallel
base_type::_M_impl._M_header._M_parent = nodes_init.get_root();
nodes_init.get_root()->_M_parent= &base_type::_M_impl._M_header;
t.tic("linking nodes");
::operator delete(r);
t.tic("delete array of pointers");
t.print();
}
@ -1850,10 +1811,6 @@ namespace __gnu_parallel
_M_sorted_bulk_insertion(_Iterator* access, size_type* beg_partition, size_type k, thread_index_t num_threads, StrictlyLessOrLessEqual strictly_less_or_less_equal)
{
_GLIBCXX_PARALLEL_ASSERT((size_type)num_threads <= k);
Timing<_timing_tag> t;
t.tic();
// num_thr-1 problems in the upper part of the tree
// num_thr problems to further parallelize
std::vector<size_type> existing(num_threads,0);
@ -1873,7 +1830,6 @@ namespace __gnu_parallel
// 1. Construct the nodes with their corresponding data
#if _GLIBCXX_TREE_INITIAL_SPLITTING
r = _M_sorted_bulk_allocation_and_initialization(access, beg_partition, rank_shift, k, num_threads, strictly_less_or_less_equal);
t.tic("bulk allocation and initialization");
#else
r = _M_sorted_no_gapped_bulk_allocation_and_initialization(access, beg_partition, k, num_threads, strictly_less_or_less_equal);
#endif
@ -1896,8 +1852,6 @@ namespace __gnu_parallel
repetitions (EFFICIENCY ISSUE) *****/
size_type last = beg_partition[num_threads] - (rank_shift[num_threads] - rank_shift[num_threads - 1]);
t.tic("last element to be inserted");
//2. Split the tree according to access in num_threads parts
//Initialize upper concat_problems
//Allocate them dynamically because they are afterwards so erased
@ -1960,8 +1914,6 @@ namespace __gnu_parallel
size_type last = k;
#endif
t.tic("sorted_no_gapped...");
// 3. Split the range according to tree and create
// 3. insertion/concatenation problems to be solved in parallel
#if _GLIBCXX_TREE_DYNAMIC_BALANCING
@ -2018,8 +1970,6 @@ namespace __gnu_parallel
} while (change);
}
t.tic("merging");
// Update root and sizes.
base_type::_M_root() = root_problem->t;
root_problem->t->_M_parent = &(base_type::_M_impl._M_header);
@ -2069,9 +2019,6 @@ namespace __gnu_parallel
// Delete array of pointers
::operator delete(r);
t.tic();
t.print();
}