736cc331c6
* include/parallel/partial_sum.h (__parallel_partial_sum_linear): Correctly calculate part lengths for partial_sum_dilation!=1.0f. From-SVN: r160428
229 lines
7.3 KiB
C++
229 lines
7.3 KiB
C++
// -*- C++ -*-
|
|
|
|
// Copyright (C) 2007, 2008, 2009 Free Software Foundation, Inc.
|
|
//
|
|
// This file is part of the GNU ISO C++ Library. This library is free
|
|
// software; you can redistribute it and/or modify it under the terms
|
|
// of the GNU General Public License as published by the Free Software
|
|
// Foundation; either version 3, or (at your option) any later
|
|
// version.
|
|
|
|
// This library is distributed in the hope that it will be useful, but
|
|
// WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
|
// General Public License for more details.
|
|
|
|
// Under Section 7 of GPL version 3, you are granted additional
|
|
// permissions described in the GCC Runtime Library Exception, version
|
|
// 3.1, as published by the Free Software Foundation.
|
|
|
|
// You should have received a copy of the GNU General Public License and
|
|
// a copy of the GCC Runtime Library Exception along with this program;
|
|
// see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
|
|
// <http://www.gnu.org/licenses/>.
|
|
|
|
/** @file parallel/partial_sum.h
|
|
* @brief Parallel implementation of std::partial_sum(), i.e. prefix
|
|
* sums.
|
|
* This file is a GNU parallel extension to the Standard C++ Library.
|
|
*/
|
|
|
|
// Written by Johannes Singler.
|
|
|
|
#ifndef _GLIBCXX_PARALLEL_PARTIAL_SUM_H
|
|
#define _GLIBCXX_PARALLEL_PARTIAL_SUM_H 1
|
|
|
|
#include <omp.h>
|
|
#include <new>
|
|
#include <bits/stl_algobase.h>
|
|
#include <parallel/parallel.h>
|
|
#include <parallel/numericfwd.h>
|
|
|
|
namespace __gnu_parallel
|
|
{
|
|
// Problem: there is no 0-element given.
|
|
|
|
/** @brief Base case prefix sum routine.
|
|
* @param __begin Begin iterator of input sequence.
|
|
* @param __end End iterator of input sequence.
|
|
* @param __result Begin iterator of output sequence.
|
|
* @param __bin_op Associative binary function.
|
|
* @param __value Start value. Must be passed since the neutral
|
|
* element is unknown in general.
|
|
* @return End iterator of output sequence. */
|
|
template<typename _IIter,
|
|
typename _OutputIterator,
|
|
typename _BinaryOperation>
|
|
_OutputIterator
|
|
__parallel_partial_sum_basecase(_IIter __begin, _IIter __end,
|
|
_OutputIterator __result,
|
|
_BinaryOperation __bin_op,
|
|
typename std::iterator_traits <_IIter>::value_type __value)
|
|
{
|
|
if (__begin == __end)
|
|
return __result;
|
|
|
|
while (__begin != __end)
|
|
{
|
|
__value = __bin_op(__value, *__begin);
|
|
*__result = __value;
|
|
++__result;
|
|
++__begin;
|
|
}
|
|
return __result;
|
|
}
|
|
|
|
/** @brief Parallel partial sum implementation, two-phase approach,
|
|
no recursion.
|
|
* @param __begin Begin iterator of input sequence.
|
|
* @param __end End iterator of input sequence.
|
|
* @param __result Begin iterator of output sequence.
|
|
* @param __bin_op Associative binary function.
|
|
* @param __n Length of sequence.
|
|
* @param __num_threads Number of threads to use.
|
|
* @return End iterator of output sequence.
|
|
*/
|
|
template<typename _IIter,
|
|
typename _OutputIterator,
|
|
typename _BinaryOperation>
|
|
_OutputIterator
|
|
__parallel_partial_sum_linear(_IIter __begin, _IIter __end,
|
|
_OutputIterator __result,
|
|
_BinaryOperation __bin_op,
|
|
typename std::iterator_traits<_IIter>::difference_type __n)
|
|
{
|
|
typedef std::iterator_traits<_IIter> _TraitsType;
|
|
typedef typename _TraitsType::value_type _ValueType;
|
|
typedef typename _TraitsType::difference_type _DifferenceType;
|
|
|
|
if (__begin == __end)
|
|
return __result;
|
|
|
|
_ThreadIndex __num_threads =
|
|
std::min<_DifferenceType>(__get_max_threads(), __n - 1);
|
|
|
|
if (__num_threads < 2)
|
|
{
|
|
*__result = *__begin;
|
|
return __parallel_partial_sum_basecase(__begin + 1, __end,
|
|
__result + 1, __bin_op,
|
|
*__begin);
|
|
}
|
|
|
|
_DifferenceType* __borders;
|
|
_ValueType* __sums;
|
|
|
|
const _Settings& __s = _Settings::get();
|
|
|
|
# pragma omp parallel num_threads(__num_threads)
|
|
{
|
|
# pragma omp single
|
|
{
|
|
__num_threads = omp_get_num_threads();
|
|
|
|
__borders = new _DifferenceType[__num_threads + 2];
|
|
|
|
if (__s.partial_sum_dilation == 1.0f)
|
|
equally_split(__n, __num_threads + 1, __borders);
|
|
else
|
|
{
|
|
_DifferenceType __first_part_length =
|
|
std::max<_DifferenceType>(1,
|
|
__n / (1.0f + __s.partial_sum_dilation * __num_threads));
|
|
_DifferenceType __chunk_length =
|
|
(__n - __first_part_length) / __num_threads;
|
|
_DifferenceType __borderstart =
|
|
__n - __num_threads * __chunk_length;
|
|
__borders[0] = 0;
|
|
for (_ThreadIndex __i = 1; __i < (__num_threads + 1); ++__i)
|
|
{
|
|
__borders[__i] = __borderstart;
|
|
__borderstart += __chunk_length;
|
|
}
|
|
__borders[__num_threads + 1] = __n;
|
|
}
|
|
|
|
__sums = static_cast<_ValueType*>(::operator new(sizeof(_ValueType)
|
|
* __num_threads));
|
|
_OutputIterator __target_end;
|
|
} //single
|
|
|
|
_ThreadIndex __iam = omp_get_thread_num();
|
|
if (__iam == 0)
|
|
{
|
|
*__result = *__begin;
|
|
__parallel_partial_sum_basecase(__begin + 1,
|
|
__begin + __borders[1],
|
|
__result + 1,
|
|
__bin_op, *__begin);
|
|
::new(&(__sums[__iam])) _ValueType(*(__result + __borders[1] - 1));
|
|
}
|
|
else
|
|
{
|
|
::new(&(__sums[__iam]))
|
|
_ValueType(__gnu_parallel::accumulate(
|
|
__begin + __borders[__iam] + 1,
|
|
__begin + __borders[__iam + 1],
|
|
*(__begin + __borders[__iam]),
|
|
__bin_op,
|
|
__gnu_parallel::sequential_tag()));
|
|
}
|
|
|
|
# pragma omp barrier
|
|
|
|
# pragma omp single
|
|
__parallel_partial_sum_basecase(__sums + 1, __sums + __num_threads,
|
|
__sums + 1, __bin_op, __sums[0]);
|
|
|
|
# pragma omp barrier
|
|
|
|
// Still same team.
|
|
__parallel_partial_sum_basecase(__begin + __borders[__iam + 1],
|
|
__begin + __borders[__iam + 2],
|
|
__result + __borders[__iam + 1],
|
|
__bin_op, __sums[__iam]);
|
|
} //parallel
|
|
|
|
::operator delete(__sums);
|
|
delete[] __borders;
|
|
|
|
return __result + __n;
|
|
}
|
|
|
|
/** @brief Parallel partial sum front-__end.
|
|
* @param __begin Begin iterator of input sequence.
|
|
* @param __end End iterator of input sequence.
|
|
* @param __result Begin iterator of output sequence.
|
|
* @param __bin_op Associative binary function.
|
|
* @return End iterator of output sequence. */
|
|
template<typename _IIter,
|
|
typename _OutputIterator,
|
|
typename _BinaryOperation>
|
|
_OutputIterator
|
|
__parallel_partial_sum(_IIter __begin, _IIter __end,
|
|
_OutputIterator __result, _BinaryOperation __bin_op)
|
|
{
|
|
_GLIBCXX_CALL(__begin - __end)
|
|
|
|
typedef std::iterator_traits<_IIter> _TraitsType;
|
|
typedef typename _TraitsType::value_type _ValueType;
|
|
typedef typename _TraitsType::difference_type _DifferenceType;
|
|
|
|
_DifferenceType __n = __end - __begin;
|
|
|
|
switch (_Settings::get().partial_sum_algorithm)
|
|
{
|
|
case LINEAR:
|
|
// Need an initial offset.
|
|
return __parallel_partial_sum_linear(__begin, __end, __result,
|
|
__bin_op, __n);
|
|
default:
|
|
// Partial_sum algorithm not implemented.
|
|
_GLIBCXX_PARALLEL_ASSERT(0);
|
|
return __result + __n;
|
|
}
|
|
}
|
|
}
|
|
|
|
#endif /* _GLIBCXX_PARALLEL_PARTIAL_SUM_H */
|