partition.h (__parallel_partition): Improve scalability by...

2010-04-22  Johannes Singler  <singler@kit.edu>

        * include/parallel/partition.h (__parallel_partition):
        Improve scalability by:
        -introducing new variables __leftold, __rightold, __dist, thus
        -getting rid of omp lock by using atomic operations
        -getting rid of two omp barriers

From-SVN: r158636
This commit is contained in:
Johannes Singler 2010-04-22 10:14:07 +00:00 committed by Johannes Singler
parent 6a0447ba7f
commit ed27799786
2 changed files with 79 additions and 75 deletions

View File

@ -1,3 +1,11 @@
2010-04-22 Johannes Singler <singler@kit.edu>
* include/parallel/partition.h (__parallel_partition):
Improve scalability by:
-introducing new variables __leftold, __rightold, __dist, thus
-getting rid of omp lock by using atomic operations
-getting rid of two omp barriers
2010-04-22 Jonathan Wakely <jwakely.gcc@gmail.com> 2010-04-22 Jonathan Wakely <jwakely.gcc@gmail.com>
* doc/xml/faq.xml: Link to manual. * doc/xml/faq.xml: Link to manual.

View File

@ -66,27 +66,26 @@ namespace __gnu_parallel
const _Settings& __s = _Settings::get(); const _Settings& __s = _Settings::get();
// Shared. // shared
_GLIBCXX_VOLATILE _DifferenceType __left = 0, __right = __n - 1; _GLIBCXX_VOLATILE _DifferenceType __left = 0, __right = __n - 1,
_GLIBCXX_VOLATILE _DifferenceType __leftover_left, __leftover_right; __dist = __n,
_GLIBCXX_VOLATILE _DifferenceType __leftnew, __rightnew; __leftover_left, __leftover_right,
__leftnew, __rightnew;
bool* __reserved_left = NULL, * __reserved_right = NULL; // just 0 or 1, but int to allow atomic operations
int* __reserved_left = NULL, * __reserved_right = NULL;
_DifferenceType __chunk_size = __s.partition_chunk_size; _DifferenceType __chunk_size = __s.partition_chunk_size;
omp_lock_t __result_lock;
omp_init_lock(&__result_lock);
//at least two chunks per thread //at least two chunks per thread
if (__right - __left + 1 >= 2 * __num_threads * __chunk_size) if (__dist >= 2 * __num_threads * __chunk_size)
# pragma omp parallel num_threads(__num_threads) # pragma omp parallel num_threads(__num_threads)
{ {
# pragma omp single # pragma omp single
{ {
__num_threads = omp_get_num_threads(); __num_threads = omp_get_num_threads();
__reserved_left = new bool[__num_threads]; __reserved_left = new int[__num_threads];
__reserved_right = new bool[__num_threads]; __reserved_right = new int[__num_threads];
if (__s.partition_chunk_share > 0.0) if (__s.partition_chunk_share > 0.0)
__chunk_size = std::max<_DifferenceType> __chunk_size = std::max<_DifferenceType>
@ -96,17 +95,16 @@ namespace __gnu_parallel
__chunk_size = __s.partition_chunk_size; __chunk_size = __s.partition_chunk_size;
} }
while (__right - __left + 1 >= 2 * __num_threads * __chunk_size) while (__dist >= 2 * __num_threads * __chunk_size)
{ {
# pragma omp single # pragma omp single
{ {
_DifferenceType __num_chunks = ((__right - __left + 1) _DifferenceType __num_chunks = __dist / __chunk_size;
/ __chunk_size);
for (_ThreadIndex __r = 0; __r < __num_threads; ++__r) for (_ThreadIndex __r = 0; __r < __num_threads; ++__r)
{ {
__reserved_left[__r] = false; __reserved_left [__r] = 0; // false
__reserved_right[__r] = false; __reserved_right[__r] = 0; // false
} }
__leftover_left = 0; __leftover_left = 0;
__leftover_right = 0; __leftover_right = 0;
@ -115,11 +113,13 @@ namespace __gnu_parallel
// Private. // Private.
_DifferenceType __thread_left, __thread_left_border, _DifferenceType __thread_left, __thread_left_border,
__thread_right, __thread_right_border; __thread_right, __thread_right_border;
__thread_left = __left + 1;
__thread_left = __left + 1;
// Just to satisfy the condition below. // Just to satisfy the condition below.
__thread_left_border = __thread_left - 1; __thread_left_border = __thread_left - 1;
__thread_right = __n - 1; __thread_right = __n - 1;
// Just to satisfy the condition below.
__thread_right_border = __thread_right + 1; __thread_right_border = __thread_right + 1;
bool __iam_finished = false; bool __iam_finished = false;
@ -127,35 +127,42 @@ namespace __gnu_parallel
{ {
if (__thread_left > __thread_left_border) if (__thread_left > __thread_left_border)
{ {
omp_set_lock(&__result_lock); _DifferenceType __former_dist =
if (__left + (__chunk_size - 1) > __right) __fetch_and_add(&__dist, -__chunk_size);
if (__former_dist < __chunk_size)
{
__fetch_and_add(&__dist, __chunk_size);
__iam_finished = true; __iam_finished = true;
break;
}
else else
{ {
__thread_left = __left; __thread_left =
__thread_left_border = __left + (__chunk_size - 1); __fetch_and_add(&__left, __chunk_size);
__left += __chunk_size; __thread_left_border =
__thread_left + (__chunk_size - 1);
} }
omp_unset_lock(&__result_lock);
} }
if (__thread_right < __thread_right_border) if (__thread_right < __thread_right_border)
{ {
omp_set_lock(&__result_lock); _DifferenceType __former_dist =
if (__left > __right - (__chunk_size - 1)) __fetch_and_add(&__dist, -__chunk_size);
if (__former_dist < __chunk_size)
{
__fetch_and_add(&__dist, __chunk_size);
__iam_finished = true; __iam_finished = true;
break;
}
else else
{ {
__thread_right = __right; __thread_right =
__thread_right_border = __right - (__chunk_size - 1); __fetch_and_add(&__right, -__chunk_size);
__right -= __chunk_size; __thread_right_border =
__thread_right - (__chunk_size - 1);
} }
omp_unset_lock(&__result_lock);
} }
if (__iam_finished)
break;
// Swap as usual. // Swap as usual.
while (__thread_left < __thread_right) while (__thread_left < __thread_right)
{ {
@ -188,13 +195,11 @@ namespace __gnu_parallel
# pragma omp barrier # pragma omp barrier
# pragma omp single _DifferenceType
{ __leftold = __left,
__leftnew = __left - __leftover_left * __chunk_size; __leftnew = __left - __leftover_left * __chunk_size,
__rightold = __right,
__rightnew = __right + __leftover_right * __chunk_size; __rightnew = __right + __leftover_right * __chunk_size;
}
# pragma omp barrier
// <=> __thread_left_border + (__chunk_size - 1) >= __leftnew // <=> __thread_left_border + (__chunk_size - 1) >= __leftnew
if (__thread_left <= __thread_left_border if (__thread_left <= __thread_left_border
@ -202,7 +207,7 @@ namespace __gnu_parallel
{ {
// Chunk already in place, reserve spot. // Chunk already in place, reserve spot.
__reserved_left[(__left - (__thread_left_border + 1)) __reserved_left[(__left - (__thread_left_border + 1))
/ __chunk_size] = true; / __chunk_size] = 1;
} }
// <=> __thread_right_border - (__chunk_size - 1) <= __rightnew // <=> __thread_right_border - (__chunk_size - 1) <= __rightnew
@ -211,7 +216,7 @@ namespace __gnu_parallel
{ {
// Chunk already in place, reserve spot. // Chunk already in place, reserve spot.
__reserved_right[((__thread_right_border - 1) - __right) __reserved_right[((__thread_right_border - 1) - __right)
/ __chunk_size] = true; / __chunk_size] = 1;
} }
# pragma omp barrier # pragma omp barrier
@ -221,15 +226,13 @@ namespace __gnu_parallel
{ {
// Find spot and swap. // Find spot and swap.
_DifferenceType __swapstart = -1; _DifferenceType __swapstart = -1;
omp_set_lock(&__result_lock); for (int __r = 0; __r < __leftover_left; ++__r)
for (_DifferenceType __r = 0; __r < __leftover_left; ++__r) if (__reserved_left[__r] == 0
if (!__reserved_left[__r]) && __compare_and_swap(&(__reserved_left[__r]), 0, 1))
{ {
__reserved_left[__r] = true; __swapstart = __leftold - (__r + 1) * __chunk_size;
__swapstart = __left - (__r + 1) * __chunk_size;
break; break;
} }
omp_unset_lock(&__result_lock);
#if _GLIBCXX_ASSERTIONS #if _GLIBCXX_ASSERTIONS
_GLIBCXX_PARALLEL_ASSERT(__swapstart != -1); _GLIBCXX_PARALLEL_ASSERT(__swapstart != -1);
@ -246,15 +249,13 @@ namespace __gnu_parallel
{ {
// Find spot and swap // Find spot and swap
_DifferenceType __swapstart = -1; _DifferenceType __swapstart = -1;
omp_set_lock(&__result_lock); for (int __r = 0; __r < __leftover_right; ++__r)
for (_DifferenceType __r = 0; __r < __leftover_right; ++__r) if (__reserved_right[__r] == 0
if (!__reserved_right[__r]) && __compare_and_swap(&(__reserved_right[__r]), 0, 1))
{ {
__reserved_right[__r] = true; __swapstart = __rightold + __r * __chunk_size + 1;
__swapstart = __right + __r * __chunk_size + 1;
break; break;
} }
omp_unset_lock(&__result_lock);
#if _GLIBCXX_ASSERTIONS #if _GLIBCXX_ASSERTIONS
_GLIBCXX_PARALLEL_ASSERT(__swapstart != -1); _GLIBCXX_PARALLEL_ASSERT(__swapstart != -1);
@ -270,18 +271,15 @@ namespace __gnu_parallel
# pragma omp single # pragma omp single
{ {
for (_DifferenceType __r = 0; __r < __leftover_left; ++__r) for (_DifferenceType __r = 0; __r < __leftover_left; ++__r)
_GLIBCXX_PARALLEL_ASSERT(__reserved_left[__r]); _GLIBCXX_PARALLEL_ASSERT(__reserved_left[__r] == 1);
for (_DifferenceType __r = 0; __r < __leftover_right; ++__r) for (_DifferenceType __r = 0; __r < __leftover_right; ++__r)
_GLIBCXX_PARALLEL_ASSERT(__reserved_right[__r]); _GLIBCXX_PARALLEL_ASSERT(__reserved_right[__r] == 1);
} }
# pragma omp barrier
#endif #endif
# pragma omp barrier
__left = __leftnew; __left = __leftnew;
__right = __rightnew; __right = __rightnew;
__dist = __right - __left + 1;
} }
# pragma omp flush(__left, __right) # pragma omp flush(__left, __right)
@ -313,8 +311,6 @@ namespace __gnu_parallel
delete[] __reserved_left; delete[] __reserved_left;
delete[] __reserved_right; delete[] __reserved_right;
omp_destroy_lock(&__result_lock);
// Element "between" __final_left and __final_right might not have // Element "between" __final_left and __final_right might not have
// been regarded yet // been regarded yet
if (__final_left < __n && !__pred(__begin[__final_left])) if (__final_left < __n && !__pred(__begin[__final_left]))