multiway_merge.h: Removed Timing<inactive_tag>

2007-10-25 Johannes Singler <singler@ira.uka.de> * include/parallel/multiway_merge.h: Removed Timing<inactive_tag> * include/parallel/random_shuffle.h: Same * include/parallel/set_operations.h: Same * include/parallel/tree.h: Same * include/parallel/multiway_mergesort.h: Same * include/parallel/timing.h: Removed completely From-SVN: r129629
2007-10-25 17:07:56 +00:00 · 2007-10-25 17:07:56 +00:00 · 740936e08e
parent 8d358a4aae
commit 740936e08e
7 changed files with 9 additions and 355 deletions
--- a/libstdc++-v3/ChangeLog
+++ b/libstdc++-v3/ChangeLog
@ -1,3 +1,12 @@
+2007-10-25  Johannes Singler  <singler@ira.uka.de>
+
+      * include/parallel/multiway_merge.h: Removed Timing<inactive_tag>
+      * include/parallel/random_shuffle.h: Same
+      * include/parallel/set_operations.h: Same
+      * include/parallel/tree.h: Same
+      * include/parallel/multiway_mergesort.h: Same
+      * include/parallel/timing.h: Removed completely
+
 2007-10-25  Paolo Carlini  <pcarlini@suse.de>

 	* include/bits/stl_algo.h (__lg<>(_Size)): Slightly tweak.
--- a/libstdc++-v3/include/parallel/multiway_merge.h
+++ b/libstdc++-v3/include/parallel/multiway_merge.h
@ -52,7 +52,6 @@
 #include <parallel/parallel.h>
 #include <parallel/merge.h>
 #include <parallel/losertree.h>
-#include <parallel/timing.h>
 #if _GLIBCXX_ASSERTIONS
 #include <parallel/checkers.h>
 #endif
@ -1354,11 +1353,6 @@ namespace __gnu_parallel

    thread_index_t num_threads = static_cast<thread_index_t>(std::min(static_cast<difference_type>(get_max_threads()), total_length));

-    Timing<sequential_tag>* t = new Timing<sequential_tag>[num_threads];
-
-    for (int pr = 0; pr < num_threads; pr++)
-      t[pr].tic();
-
    bool tight = (total_length == length);

    // Thread t will have to merge pieces[iam][0..k - 1]
@ -1456,15 +1450,10 @@ namespace __gnu_parallel
 	delete[] offsets;
      }

-    for (int pr = 0; pr < num_threads; pr++)
-      t[pr].tic();
-
 #	pragma omp parallel num_threads(num_threads)
    {
      thread_index_t iam = omp_get_thread_num();

-      t[iam].tic();
-
      difference_type target_position = 0;

      for (int c = 0; c < k; c++)
@ -1498,14 +1487,8 @@ namespace __gnu_parallel
 			(pieces[iam][0].second - pieces[iam][0].first) + (pieces[iam][1].second - pieces[iam][1].first),
 			comp);
 	}
-
-      t[iam].tic();
-
    }

-    for (int pr = 0; pr < num_threads; pr++)
-      t[pr].tic();
-
 #if _GLIBCXX_ASSERTIONS
    _GLIBCXX_PARALLEL_ASSERT(is_sorted(target, target + length, comp));
 #endif
@ -1516,12 +1499,6 @@ namespace __gnu_parallel

    delete[] pieces;

-    for (int pr = 0; pr < num_threads; pr++)
-      t[pr].tic();
-    for (int pr = 0; pr < num_threads; pr++)
-      t[pr].print();
-    delete[] t;
-
    return target + length;
  }

--- a/libstdc++-v3/include/parallel/multiway_mergesort.h
+++ b/libstdc++-v3/include/parallel/multiway_mergesort.h
@ -44,7 +44,6 @@
 #include <bits/stl_algo.h>
 #include <parallel/parallel.h>
 #include <parallel/multiway_merge.h>
-#include <parallel/timing.h>

 namespace __gnu_parallel
 {
@ -160,9 +159,6 @@ namespace __gnu_parallel
    typedef typename traits_type::value_type value_type;
    typedef typename traits_type::difference_type difference_type;

-    Timing<sequential_tag> t;
-    t.tic();
-
    PMWMSSortingData<RandomAccessIterator>* sd = d->sd;
    thread_index_t iam = d->iam;

@ -196,7 +192,6 @@ namespace __gnu_parallel

    // Invariant: locally sorted subsequence in sd->sorting_places[iam],
    // sd->sorting_places[iam] + length_local.
-    t.tic("local sort");

    if (Settings::sort_splitting == Settings::SAMPLING)
      {
@ -205,8 +200,6 @@ namespace __gnu_parallel

 #pragma omp barrier

-	t.tic("sample/wait");
-
 #pragma omp single
 	__gnu_sequential::sort(sd->samples, 
 			       sd->samples + (num_samples * d->num_threads), 
@ -241,8 +234,6 @@ namespace __gnu_parallel
      {
 #pragma omp barrier

-	t.tic("wait");
-
 	std::vector<std::pair<SortingPlacesIterator, SortingPlacesIterator> > seqs(d->num_threads);
 	for (int s = 0; s < d->num_threads; s++)
 	  seqs[s] = std::make_pair(sd->sorting_places[s], sd->sorting_places[s] + sd->starts[s + 1] - sd->starts[s]);
@ -276,8 +267,6 @@ namespace __gnu_parallel
 	  }
      }

-    t.tic("split");
-
    // Offset from target begin, length after merging.
    difference_type offset = 0, length_am = 0;
    for (int s = 0; s < d->num_threads; s++)
@ -308,8 +297,6 @@ namespace __gnu_parallel

    multiway_merge(seqs.begin(), seqs.end(), sd->merging_places[iam], comp, length_am, d->stable, false, sequential_tag());

-    t.tic("merge");
-
 #if _GLIBCXX_ASSERTIONS
    _GLIBCXX_PARALLEL_ASSERT(is_sorted(sd->merging_places[iam], sd->merging_places[iam] + length_am, comp));
 #endif
@ -323,10 +310,6 @@ namespace __gnu_parallel
 #endif

    delete[] sd->temporaries[iam];
-
-    t.tic("copy back");
-
-    t.print();
  }

  /** @brief PMWMS main call.
--- a/libstdc++-v3/include/parallel/random_shuffle.h
+++ b/libstdc++-v3/include/parallel/random_shuffle.h
@ -42,7 +42,6 @@
 #include <bits/stl_numeric.h>
 #include <parallel/parallel.h>
 #include <parallel/random_number.h>
-#include <parallel/timing.h>

 namespace __gnu_parallel
 {
@ -136,9 +135,6 @@ namespace __gnu_parallel
    typedef typename traits_type::value_type value_type;
    typedef typename traits_type::difference_type difference_type;

-    Timing<sequential_tag> t;
-    t.tic();
-
    DRSSorterPU<RandomAccessIterator, RandomNumberGenerator>* d = &pus[omp_get_thread_num()];
    DRandomShufflingGlobalData<RandomAccessIterator>* sd = d->sd;
    thread_index_t iam = d->iam;
@ -170,12 +166,8 @@ namespace __gnu_parallel
    for (bin_index b = 0; b < sd->num_bins + 1; b++)
      sd->dist[b][iam + 1] = dist[b];

-    t.tic();
-
 #pragma omp barrier

-    t.tic();
-
 #pragma omp single
    {
      // Sum up bins, sd->dist[s + 1][d->num_threads] now contains the
@ -188,8 +180,6 @@ namespace __gnu_parallel

 #pragma omp barrier

-    t.tic();
-
    sequence_index_t offset = 0, global_offset = 0;
    for (bin_index s = 0; s < d->bins_begin; s++)
      global_offset += sd->dist[s + 1][d->num_threads];
@ -205,12 +195,8 @@ namespace __gnu_parallel

    sd->temporaries[iam] = static_cast<value_type*>(::operator new(sizeof(value_type) * offset));

-    t.tic();
-
 #pragma omp barrier

-    t.tic();
-
    // Draw local copies to avoid false sharing.
    for (bin_index b = 0; b < sd->num_bins + 1; b++)
      dist[b] = sd->dist[b][iam];
@ -237,12 +223,8 @@ namespace __gnu_parallel
    delete[] bin_proc;
    delete[] temporaries;

-    t.tic();
-
 #pragma omp barrier

-    t.tic();
-
    // Shuffle bins internally.
    for (bin_index b = d->bins_begin; b < d->bins_end; b++)
      {
@ -253,10 +235,6 @@ namespace __gnu_parallel
      }

    delete[] sd->temporaries[iam];
-
-    t.tic();
-
-    t.print();
  }

  /** @brief Round up to the next greater power of 2.
@ -453,9 +431,6 @@ namespace __gnu_parallel
 	for (int b = 0; b < num_bins + 1; b++)
 	  dist0[b] = 0;

-	Timing<sequential_tag> t;
-	t.tic();
-
 	random_number bitrng(rng(0xFFFFFFFF));

 	for (difference_type i = 0; i < n; i++)
@ -467,16 +442,12 @@ namespace __gnu_parallel
 	    dist0[oracle + 1]++;
 	  }

-	t.tic();
-
 	// Sum up bins.
 	__gnu_sequential::partial_sum(dist0, dist0 + num_bins + 1, dist0);

 	for (int b = 0; b < num_bins + 1; b++)
 	  dist1[b] = dist0[b];

-	t.tic();
-
 	// Distribute according to oracles.
 	for (difference_type i = 0; i < n; i++)
 	  target[(dist0[oracles[i]])++] = *(begin + i);
@ -485,9 +456,7 @@ namespace __gnu_parallel
 	  {
 	    sequential_random_shuffle(target + dist1[b], target + dist1[b + 1],
 				      rng);
-	    t.tic();
 	  }
-	t.print();

 	delete[] dist0;
 	delete[] dist1;
--- a/libstdc++-v3/include/parallel/set_operations.h
+++ b/libstdc++-v3/include/parallel/set_operations.h
@ -381,10 +381,6 @@ namespace __gnu_parallel

 #pragma omp parallel num_threads(num_threads)
    {
-      Timing<sequential_tag> t;
-
-      t.tic();
-
      // Result from multiseq_partition.
      InputIterator offset[2];
      const int iam = omp_get_thread_num();
@ -407,13 +403,9 @@ namespace __gnu_parallel

      iterator_pair block_end = block_begins[ iam + 1 ] = iterator_pair(offset[ 0 ], offset[ 1 ]);

-      t.tic();
-
      // Make sure all threads have their block_begin result written out.
 #pragma omp barrier

-      t.tic();
-
      iterator_pair block_begin = block_begins[ iam ];

      // Begin working for the first block, while the others except
@ -429,12 +421,9 @@ namespace __gnu_parallel
 				   block_begin.second, block_end.second);
 	}

-      t.tic();
-
      // Make sure everyone wrote their lengths.
 #pragma omp barrier

-      t.tic();
      OutputIterator r = result;

      if (iam == 0)
@ -458,9 +447,6 @@ namespace __gnu_parallel
 	  op.invoke(block_begin.first, block_end.first,
 		    block_begin.second, block_end.second, r);
 	}
-
-      t.tic();
-      t.print();
    }
    return return_value;
  }
--- a/libstdc++-v3/include/parallel/timing.h
+++ b/libstdc++-v3/include/parallel/timing.h
@ -1,217 +0,0 @@
-// -*- C++ -*-
-
-// Copyright (C) 2007 Free Software Foundation, Inc.
-//
-// This file is part of the GNU ISO C++ Library.  This library is free
-// software; you can redistribute it and/or modify it under the terms
-// of the GNU General Public License as published by the Free Software
-// Foundation; either version 2, or (at your option) any later
-// version.
-
-// This library is distributed in the hope that it will be useful, but
-// WITHOUT ANY WARRANTY; without even the implied warranty of
-// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
-// General Public License for more details.
-
-// You should have received a copy of the GNU General Public License
-// along with this library; see the file COPYING.  If not, write to
-// the Free Software Foundation, 59 Temple Place - Suite 330, Boston,
-// MA 02111-1307, USA.
-
-// As a special exception, you may use this file as part of a free
-// software library without restriction.  Specifically, if other files
-// instantiate templates or use macros or inline functions from this
-// file, or you compile this file and link it with other files to
-// produce an executable, this file does not by itself cause the
-// resulting executable to be covered by the GNU General Public
-// License.  This exception does not however invalidate any other
-// reasons why the executable file might be covered by the GNU General
-// Public License.
-
-/** @file parallel/timing.h
- *  @brief Provides a simple tool to do performance debugging, also in
- *  parallel code.
- *  This file is a GNU parallel extension to the Standard C++ Library.
- */
-
-// Written by Johannes Singler.
-
-#ifndef _GLIBCXX_PARALLEL_TIMING_H
-#define _GLIBCXX_PARALLEL_TIMING_H 1
-
-#include <omp.h>
-#include <cstdio>
-#include <cstring>
-#include <parallel/tags.h>
-
-namespace __gnu_parallel
-{
-  // XXX integrate with existing performance testing infrastructure.
-  /** @brief Type of of point in time, used for the Timing classes. */
-  typedef double point_in_time;
-
-  template<typename tag, typename must_be_int = int>
-  class Timing;
-
-  /** @brief A class that provides simple run time measurements, also
-      for parallel code.
-   *  @param tag If parallel_tag, then the measurements are actually done.
-   *  Otherwise, no code at all is emitted by the compiler. */
-  template<typename must_be_int>
-  class Timing<parallel_tag, must_be_int>
-  {
-  private:
-    static const int max_points_in_time = 100;
-    point_in_time points_in_time[max_points_in_time];
-    point_in_time active, last_start;
-    int pos;
-    char* str;
-    const char* tags[max_points_in_time];
-
-  public:
-    Timing()
-    {
-      str = NULL;
-      pos = 0;
-      active = 0.0;
-      last_start = -1.0;
-    }
-
-    ~Timing()
-    {
-      delete[] str;
-    }
-
-    /** @brief Take a running time measurement.
-     *  @param tag Optional description that will be output again with
-     *  the timings.
-     *  It should describe the operation before the tic(). To time a
-     *  series of @c n operations, there should be @c n+1 calls to
-     *  tic(), and one call to print(). */
-    inline void
-    tic(const char* tag = NULL)
-    {
-      points_in_time[pos] = omp_get_wtime();
-      tags[pos] = tag;
-      pos++;
-    }
-
-    /** @brief Start the running time measurement.
-     *
-     *  Should be paired with stop(). */
-    inline void
-    start()
-    {
-      _GLIBCXX_PARALLEL_ASSERT(last_start == -1.0);
-      last_start = omp_get_wtime();
-    }
-
-    /** @brief Stop the running time measurement.
-     *
-     *  Should be paired with start(). */
-    inline void
-    stop()
-    {
-      _GLIBCXX_PARALLEL_ASSERT(last_start != -1.0);
-      active += (omp_get_wtime() - last_start);
-      last_start = -1.0;
-    }
-
-    /** @brief Reset running time accumulation. */
-    inline void
-    reset()
-    {
-      active = 0.0;
-      last_start = -1.0;
-    }
-
-    /** @brief Accumulate the time between all pairs of start() and
-	stop() so far */
-    inline point_in_time
-    active_time()
-    { return active; }
-
-    /** @brief Total time between first and last tic() */
-    inline point_in_time
-    total_time()
-    { return (points_in_time[pos - 1] - points_in_time[0]) * 1000.0; }
-
-  private:
-    /** @brief Construct string to print out, presenting the timings. */
-    const char*
-    c_str()
-    {
-      // Avoid stream library here, to avoid cyclic dependencies in
-      // header files.
-      char tmp[1000];
-
-      if (!str)
-	str = new char[pos * 200];
-      else
-	str[0] = '\0';
-
-      sprintf(str, "t %2d      T[ms]", omp_get_thread_num());
-      strcat(str, "\n");
-
-      for (int i = 0; i < pos; )
-	{
-	  point_in_time last = points_in_time[i];
-	  i++;
-	  if (i == pos)
-	    break;
-	  if (tags[i] == NULL)
-	    sprintf(tmp, "%2d:     ", i - 1);
-	  else
-	    sprintf(tmp, "%20s:     ", tags[i]);
-	  strcat(str, tmp);
-
-	  sprintf(tmp, "%7.2f     ", (points_in_time[i] - last) * 1000.0);
-	  strcat(str, tmp);
-	  strcat(str, "\n");
-	}
-
-      return str;
-    }
-
-  public:
-    /** @brief Print the running times between the tic()s. */
-    void
-    print()
-    {
-      printf("print\n");
-#pragma omp barrier
-#pragma omp master
-      printf("\n\n");
-#pragma omp critical
-      printf("%s\n", c_str());
-    }
-  };
-
-  /** @brief A class that provides simple run time measurements, also
-      for parallel code.
-   *  @param tag If parallel_tag, then the measurements are actually done,
-   *  otherwise, no code at all is emitted by the compiler.
-   */
-  template<typename must_be_int>
-  class Timing<sequential_tag, must_be_int>
-  {
-  private:
-    static const char* empty_string;
-
-  public:
-    inline void tic(const char* /*tag*/ = NULL) { }
-    inline void start() { }
-    inline void stop() { }
-    inline void reset() { }
-    inline point_in_time active_time() { return -1.0; }
-    inline point_in_time total_time() { return -1.0; }
-    inline const char* c_str() { return empty_string; }
-    inline void print() { }
-  };
-
-  template<typename must_be_int>
-  const char* Timing<sequential_tag, must_be_int>::empty_string = "";
-
-}
-
-#endif
--- a/libstdc++-v3/include/parallel/tree.h
+++ b/libstdc++-v3/include/parallel/tree.h
@ -57,13 +57,6 @@

 #include <parallel/list_partition.h>

-//#define _GLIBCXX_TIMING
-#ifdef _GLIBCXX_TIMING
-#define _timing_tag parallel_tag
-#else
-#define _timing_tag sequential_tag
-#endif
-
 namespace std
 {
  // XXX Declaration should go to stl_tree.h.
@ -1217,10 +1210,6 @@ namespace __gnu_parallel
    void
    _M_bulk_insertion_construction(const _InputIterator __first, const _InputIterator __last, const bool is_construction, StrictlyLessOrLessEqual strictly_less_or_less_equal)
    {
-      Timing<_timing_tag> t;
-
-      t.tic();
-
      thread_index_t num_threads = get_max_threads();
      size_type n;
      size_type beg_partition[num_threads+1];
@ -1228,8 +1217,6 @@ namespace __gnu_parallel
      beg_partition[0] = 0;
      bool is_sorted= is_sorted_distance_accessors(__first, __last, access, beg_partition,n, num_threads, std::__iterator_category(__first));

-      t.tic("is_sorted");
-
      if (not is_sorted)
 	{
 	  _M_not_sorted_bulk_insertion_construction(access, beg_partition, n, num_threads, is_construction, strictly_less_or_less_equal);
@ -1260,10 +1247,6 @@ namespace __gnu_parallel
 	    _M_sorted_bulk_insertion(access, beg_partition, n, num_threads, 
 				     strictly_less_or_less_equal);
 	}
-
-      t.tic("main work");
-
-      t.print();
    }

    /** @brief Bulk construction and insertion helper method on an
@ -1349,31 +1332,19 @@ namespace __gnu_parallel
    _M_not_sorted_bulk_insertion_construction(size_type* beg_partition, ElementsToSort* v, Comparator comp, const size_type n, thread_index_t num_threads, const bool is_construction, StrictlyLessOrLessEqual strictly_less_or_less_equal)
    {
      // The accessors have been calculated for the non sorted.
-      Timing<_timing_tag> t;
-
-      t.tic();
-
      num_threads = static_cast<thread_index_t>(std::min<size_type>(num_threads, n));

      std::stable_sort(v, v+n, comp);

-      t.tic("sort");
-
      IteratorSortedElements sorted_access[num_threads+1];
      range_accessors(IteratorSortedElements(v), IteratorSortedElements(v+n), sorted_access, beg_partition, n, num_threads, std::__iterator_category(v));

-      t.tic("range_accessors");
-
      // Partial template specialization not available.
      if (is_construction)
 	_M_sorted_bulk_construction(sorted_access, beg_partition, n, num_threads, strictly_less_or_less_equal);
      else
 	_M_sorted_bulk_insertion(sorted_access, beg_partition, n, num_threads, strictly_less_or_less_equal);
      delete v;
-
-      t.tic("actual construction or insertion");
-
-      t.print();
    }

    /** @brief Construct a tree sequentially using the parallel routine
@ -1753,17 +1724,11 @@ namespace __gnu_parallel
    void
    _M_sorted_bulk_construction(_Iterator* access, size_type* beg_partition, const size_type n, thread_index_t num_threads, StrictlyLessOrLessEqual strictly_less_or_less_equal)
    {
-      Timing<_timing_tag> t;
-
      // Dealing with repetitions (EFFICIENCY ISSUE).
      size_type rank_shift[num_threads+1];

-      t.tic();
-
      _Rb_tree_node_ptr* r = _M_sorted_bulk_allocation_and_initialization(access, beg_partition, rank_shift, n, num_threads, strictly_less_or_less_equal);

-      t.tic("bulk allocation and initialization");
-
      // Link the tree appropriately.
      // Dealing with repetitions (EFFICIENCY ISSUE).
      ranker_gaps rank(beg_partition, rank_shift, num_threads);
@ -1818,11 +1783,7 @@ namespace __gnu_parallel
      base_type::_M_impl._M_header._M_parent = nodes_init.get_root();
      nodes_init.get_root()->_M_parent= &base_type::_M_impl._M_header;

-      t.tic("linking nodes");
      ::operator delete(r);
-
-      t.tic("delete array of pointers");
-      t.print();
    }


@ -1850,10 +1811,6 @@ namespace __gnu_parallel
    _M_sorted_bulk_insertion(_Iterator* access, size_type* beg_partition, size_type k, thread_index_t num_threads, StrictlyLessOrLessEqual strictly_less_or_less_equal)
    {
      _GLIBCXX_PARALLEL_ASSERT((size_type)num_threads <= k);
-      Timing<_timing_tag> t;
-
-      t.tic();
-
      // num_thr-1 problems in the upper part of the tree
      // num_thr problems to further parallelize
      std::vector<size_type> existing(num_threads,0);
@ -1873,7 +1830,6 @@ namespace __gnu_parallel
 	  // 1. Construct the nodes with their corresponding data
 #if _GLIBCXX_TREE_INITIAL_SPLITTING
 	  r = _M_sorted_bulk_allocation_and_initialization(access, beg_partition, rank_shift, k, num_threads, strictly_less_or_less_equal);
-	  t.tic("bulk allocation and initialization");
 #else
 	  r = _M_sorted_no_gapped_bulk_allocation_and_initialization(access, beg_partition, k, num_threads, strictly_less_or_less_equal);
 #endif
@ -1896,8 +1852,6 @@ namespace __gnu_parallel
      repetitions (EFFICIENCY ISSUE) *****/
      size_type last = beg_partition[num_threads] - (rank_shift[num_threads] - rank_shift[num_threads - 1]);

-      t.tic("last element to be inserted");
-
      //2. Split the tree according to access in num_threads parts
      //Initialize upper concat_problems
      //Allocate them dynamically because they are afterwards so erased
@ -1960,8 +1914,6 @@ namespace __gnu_parallel
      size_type last = k;
 #endif

-      t.tic("sorted_no_gapped...");
-
      // 3. Split the range according to tree and create
      // 3. insertion/concatenation problems to be solved in parallel
 #if _GLIBCXX_TREE_DYNAMIC_BALANCING
@ -2018,8 +1970,6 @@ namespace __gnu_parallel
 	  } while (change);
      }

-      t.tic("merging");
-
      // Update root and sizes.
      base_type::_M_root() = root_problem->t;
      root_problem->t->_M_parent = &(base_type::_M_impl._M_header);
@ -2069,9 +2019,6 @@ namespace __gnu_parallel

      // Delete array of pointers
      ::operator delete(r);
-
-      t.tic();
-      t.print();
    }