1186 lines
34 KiB
C
1186 lines
34 KiB
C
/* Copyright (C) 2005-2021 Free Software Foundation, Inc.
|
|
Contributed by Richard Henderson <rth@redhat.com>.
|
|
|
|
This file is part of the GNU Offloading and Multi Processing Library
|
|
(libgomp).
|
|
|
|
Libgomp is free software; you can redistribute it and/or modify it
|
|
under the terms of the GNU General Public License as published by
|
|
the Free Software Foundation; either version 3, or (at your option)
|
|
any later version.
|
|
|
|
Libgomp is distributed in the hope that it will be useful, but WITHOUT ANY
|
|
WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
|
|
FOR A PARTICULAR PURPOSE. See the GNU General Public License for
|
|
more details.
|
|
|
|
Under Section 7 of GPL version 3, you are granted additional
|
|
permissions described in the GCC Runtime Library Exception, version
|
|
3.1, as published by the Free Software Foundation.
|
|
|
|
You should have received a copy of the GNU General Public License and
|
|
a copy of the GCC Runtime Library Exception along with this program;
|
|
see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
|
|
<http://www.gnu.org/licenses/>. */
|
|
|
|
/* This file handles the LOOP (FOR/DO) construct. */
|
|
|
|
#include <limits.h>
|
|
#include <stdlib.h>
|
|
#include <string.h>
|
|
#include "libgomp.h"
|
|
|
|
|
|
ialias (GOMP_loop_runtime_next)
|
|
ialias_redirect (GOMP_taskgroup_reduction_register)
|
|
|
|
/* Initialize the given work share construct from the given arguments. */
|
|
|
|
static inline void
|
|
gomp_loop_init (struct gomp_work_share *ws, long start, long end, long incr,
|
|
enum gomp_schedule_type sched, long chunk_size)
|
|
{
|
|
ws->sched = sched;
|
|
ws->chunk_size = chunk_size;
|
|
/* Canonicalize loops that have zero iterations to ->next == ->end. */
|
|
ws->end = ((incr > 0 && start > end) || (incr < 0 && start < end))
|
|
? start : end;
|
|
ws->incr = incr;
|
|
ws->next = start;
|
|
if (sched == GFS_DYNAMIC)
|
|
{
|
|
ws->chunk_size *= incr;
|
|
|
|
#ifdef HAVE_SYNC_BUILTINS
|
|
{
|
|
/* For dynamic scheduling prepare things to make each iteration
|
|
faster. */
|
|
struct gomp_thread *thr = gomp_thread ();
|
|
struct gomp_team *team = thr->ts.team;
|
|
long nthreads = team ? team->nthreads : 1;
|
|
|
|
if (__builtin_expect (incr > 0, 1))
|
|
{
|
|
/* Cheap overflow protection. */
|
|
if (__builtin_expect ((nthreads | ws->chunk_size)
|
|
>= 1UL << (sizeof (long)
|
|
* __CHAR_BIT__ / 2 - 1), 0))
|
|
ws->mode = 0;
|
|
else
|
|
ws->mode = ws->end < (LONG_MAX
|
|
- (nthreads + 1) * ws->chunk_size);
|
|
}
|
|
/* Cheap overflow protection. */
|
|
else if (__builtin_expect ((nthreads | -ws->chunk_size)
|
|
>= 1UL << (sizeof (long)
|
|
* __CHAR_BIT__ / 2 - 1), 0))
|
|
ws->mode = 0;
|
|
else
|
|
ws->mode = ws->end > (nthreads + 1) * -ws->chunk_size - LONG_MAX;
|
|
}
|
|
#endif
|
|
}
|
|
}
|
|
|
|
/* The *_start routines are called when first encountering a loop construct
|
|
that is not bound directly to a parallel construct. The first thread
|
|
that arrives will create the work-share construct; subsequent threads
|
|
will see the construct exists and allocate work from it.
|
|
|
|
START, END, INCR are the bounds of the loop; due to the restrictions of
|
|
OpenMP, these values must be the same in every thread. This is not
|
|
verified (nor is it entirely verifiable, since START is not necessarily
|
|
retained intact in the work-share data structure). CHUNK_SIZE is the
|
|
scheduling parameter; again this must be identical in all threads.
|
|
|
|
Returns true if there's any work for this thread to perform. If so,
|
|
*ISTART and *IEND are filled with the bounds of the iteration block
|
|
allocated to this thread. Returns false if all work was assigned to
|
|
other threads prior to this thread's arrival. */
|
|
|
|
static bool
|
|
gomp_loop_static_start (long start, long end, long incr, long chunk_size,
|
|
long *istart, long *iend)
|
|
{
|
|
struct gomp_thread *thr = gomp_thread ();
|
|
|
|
thr->ts.static_trip = 0;
|
|
if (gomp_work_share_start (0))
|
|
{
|
|
gomp_loop_init (thr->ts.work_share, start, end, incr,
|
|
GFS_STATIC, chunk_size);
|
|
gomp_work_share_init_done ();
|
|
}
|
|
|
|
return !gomp_iter_static_next (istart, iend);
|
|
}
|
|
|
|
/* The current dynamic implementation is always monotonic. The
|
|
entrypoints without nonmonotonic in them have to be always monotonic,
|
|
but the nonmonotonic ones could be changed to use work-stealing for
|
|
improved scalability. */
|
|
|
|
static bool
|
|
gomp_loop_dynamic_start (long start, long end, long incr, long chunk_size,
|
|
long *istart, long *iend)
|
|
{
|
|
struct gomp_thread *thr = gomp_thread ();
|
|
bool ret;
|
|
|
|
if (gomp_work_share_start (0))
|
|
{
|
|
gomp_loop_init (thr->ts.work_share, start, end, incr,
|
|
GFS_DYNAMIC, chunk_size);
|
|
gomp_work_share_init_done ();
|
|
}
|
|
|
|
#ifdef HAVE_SYNC_BUILTINS
|
|
ret = gomp_iter_dynamic_next (istart, iend);
|
|
#else
|
|
gomp_mutex_lock (&thr->ts.work_share->lock);
|
|
ret = gomp_iter_dynamic_next_locked (istart, iend);
|
|
gomp_mutex_unlock (&thr->ts.work_share->lock);
|
|
#endif
|
|
|
|
return ret;
|
|
}
|
|
|
|
/* Similarly as for dynamic, though the question is how can the chunk sizes
|
|
be decreased without a central locking or atomics. */
|
|
|
|
static bool
|
|
gomp_loop_guided_start (long start, long end, long incr, long chunk_size,
|
|
long *istart, long *iend)
|
|
{
|
|
struct gomp_thread *thr = gomp_thread ();
|
|
bool ret;
|
|
|
|
if (gomp_work_share_start (0))
|
|
{
|
|
gomp_loop_init (thr->ts.work_share, start, end, incr,
|
|
GFS_GUIDED, chunk_size);
|
|
gomp_work_share_init_done ();
|
|
}
|
|
|
|
#ifdef HAVE_SYNC_BUILTINS
|
|
ret = gomp_iter_guided_next (istart, iend);
|
|
#else
|
|
gomp_mutex_lock (&thr->ts.work_share->lock);
|
|
ret = gomp_iter_guided_next_locked (istart, iend);
|
|
gomp_mutex_unlock (&thr->ts.work_share->lock);
|
|
#endif
|
|
|
|
return ret;
|
|
}
|
|
|
|
bool
|
|
GOMP_loop_runtime_start (long start, long end, long incr,
|
|
long *istart, long *iend)
|
|
{
|
|
struct gomp_task_icv *icv = gomp_icv (false);
|
|
switch (icv->run_sched_var & ~GFS_MONOTONIC)
|
|
{
|
|
case GFS_STATIC:
|
|
return gomp_loop_static_start (start, end, incr,
|
|
icv->run_sched_chunk_size,
|
|
istart, iend);
|
|
case GFS_DYNAMIC:
|
|
return gomp_loop_dynamic_start (start, end, incr,
|
|
icv->run_sched_chunk_size,
|
|
istart, iend);
|
|
case GFS_GUIDED:
|
|
return gomp_loop_guided_start (start, end, incr,
|
|
icv->run_sched_chunk_size,
|
|
istart, iend);
|
|
case GFS_AUTO:
|
|
/* For now map to schedule(static), later on we could play with feedback
|
|
driven choice. */
|
|
return gomp_loop_static_start (start, end, incr, 0, istart, iend);
|
|
default:
|
|
abort ();
|
|
}
|
|
}
|
|
|
|
static long
|
|
gomp_adjust_sched (long sched, long *chunk_size)
|
|
{
|
|
sched &= ~GFS_MONOTONIC;
|
|
switch (sched)
|
|
{
|
|
case GFS_STATIC:
|
|
case GFS_DYNAMIC:
|
|
case GFS_GUIDED:
|
|
return sched;
|
|
/* GFS_RUNTIME is used for runtime schedule without monotonic
|
|
or nonmonotonic modifiers on the clause.
|
|
GFS_RUNTIME|GFS_MONOTONIC for runtime schedule with monotonic
|
|
modifier. */
|
|
case GFS_RUNTIME:
|
|
/* GFS_AUTO is used for runtime schedule with nonmonotonic
|
|
modifier. */
|
|
case GFS_AUTO:
|
|
{
|
|
struct gomp_task_icv *icv = gomp_icv (false);
|
|
sched = icv->run_sched_var & ~GFS_MONOTONIC;
|
|
switch (sched)
|
|
{
|
|
case GFS_STATIC:
|
|
case GFS_DYNAMIC:
|
|
case GFS_GUIDED:
|
|
*chunk_size = icv->run_sched_chunk_size;
|
|
break;
|
|
case GFS_AUTO:
|
|
sched = GFS_STATIC;
|
|
*chunk_size = 0;
|
|
break;
|
|
default:
|
|
abort ();
|
|
}
|
|
return sched;
|
|
}
|
|
default:
|
|
abort ();
|
|
}
|
|
}
|
|
|
|
bool
|
|
GOMP_loop_start (long start, long end, long incr, long sched,
|
|
long chunk_size, long *istart, long *iend,
|
|
uintptr_t *reductions, void **mem)
|
|
{
|
|
struct gomp_thread *thr = gomp_thread ();
|
|
|
|
thr->ts.static_trip = 0;
|
|
if (reductions)
|
|
gomp_workshare_taskgroup_start ();
|
|
if (gomp_work_share_start (0))
|
|
{
|
|
sched = gomp_adjust_sched (sched, &chunk_size);
|
|
gomp_loop_init (thr->ts.work_share, start, end, incr,
|
|
sched, chunk_size);
|
|
if (reductions)
|
|
{
|
|
GOMP_taskgroup_reduction_register (reductions);
|
|
thr->task->taskgroup->workshare = true;
|
|
thr->ts.work_share->task_reductions = reductions;
|
|
}
|
|
if (mem)
|
|
{
|
|
uintptr_t size = (uintptr_t) *mem;
|
|
#define INLINE_ORDERED_TEAM_IDS_OFF \
|
|
((offsetof (struct gomp_work_share, inline_ordered_team_ids) \
|
|
+ __alignof__ (long long) - 1) & ~(__alignof__ (long long) - 1))
|
|
if (size > (sizeof (struct gomp_work_share)
|
|
- INLINE_ORDERED_TEAM_IDS_OFF))
|
|
*mem
|
|
= (void *) (thr->ts.work_share->ordered_team_ids
|
|
= gomp_malloc_cleared (size));
|
|
else
|
|
*mem = memset (((char *) thr->ts.work_share)
|
|
+ INLINE_ORDERED_TEAM_IDS_OFF, '\0', size);
|
|
}
|
|
gomp_work_share_init_done ();
|
|
}
|
|
else
|
|
{
|
|
if (reductions)
|
|
{
|
|
uintptr_t *first_reductions = thr->ts.work_share->task_reductions;
|
|
gomp_workshare_task_reduction_register (reductions,
|
|
first_reductions);
|
|
}
|
|
if (mem)
|
|
{
|
|
if ((offsetof (struct gomp_work_share, inline_ordered_team_ids)
|
|
& (__alignof__ (long long) - 1)) == 0)
|
|
*mem = (void *) thr->ts.work_share->ordered_team_ids;
|
|
else
|
|
{
|
|
uintptr_t p = (uintptr_t) thr->ts.work_share->ordered_team_ids;
|
|
p += __alignof__ (long long) - 1;
|
|
p &= ~(__alignof__ (long long) - 1);
|
|
*mem = (void *) p;
|
|
}
|
|
}
|
|
}
|
|
|
|
if (!istart)
|
|
return true;
|
|
return ialias_call (GOMP_loop_runtime_next) (istart, iend);
|
|
}
|
|
|
|
/* The *_ordered_*_start routines are similar. The only difference is that
|
|
this work-share construct is initialized to expect an ORDERED section. */
|
|
|
|
static bool
|
|
gomp_loop_ordered_static_start (long start, long end, long incr,
|
|
long chunk_size, long *istart, long *iend)
|
|
{
|
|
struct gomp_thread *thr = gomp_thread ();
|
|
|
|
thr->ts.static_trip = 0;
|
|
if (gomp_work_share_start (1))
|
|
{
|
|
gomp_loop_init (thr->ts.work_share, start, end, incr,
|
|
GFS_STATIC, chunk_size);
|
|
gomp_ordered_static_init ();
|
|
gomp_work_share_init_done ();
|
|
}
|
|
|
|
return !gomp_iter_static_next (istart, iend);
|
|
}
|
|
|
|
static bool
|
|
gomp_loop_ordered_dynamic_start (long start, long end, long incr,
|
|
long chunk_size, long *istart, long *iend)
|
|
{
|
|
struct gomp_thread *thr = gomp_thread ();
|
|
bool ret;
|
|
|
|
if (gomp_work_share_start (1))
|
|
{
|
|
gomp_loop_init (thr->ts.work_share, start, end, incr,
|
|
GFS_DYNAMIC, chunk_size);
|
|
gomp_mutex_lock (&thr->ts.work_share->lock);
|
|
gomp_work_share_init_done ();
|
|
}
|
|
else
|
|
gomp_mutex_lock (&thr->ts.work_share->lock);
|
|
|
|
ret = gomp_iter_dynamic_next_locked (istart, iend);
|
|
if (ret)
|
|
gomp_ordered_first ();
|
|
gomp_mutex_unlock (&thr->ts.work_share->lock);
|
|
|
|
return ret;
|
|
}
|
|
|
|
static bool
|
|
gomp_loop_ordered_guided_start (long start, long end, long incr,
|
|
long chunk_size, long *istart, long *iend)
|
|
{
|
|
struct gomp_thread *thr = gomp_thread ();
|
|
bool ret;
|
|
|
|
if (gomp_work_share_start (1))
|
|
{
|
|
gomp_loop_init (thr->ts.work_share, start, end, incr,
|
|
GFS_GUIDED, chunk_size);
|
|
gomp_mutex_lock (&thr->ts.work_share->lock);
|
|
gomp_work_share_init_done ();
|
|
}
|
|
else
|
|
gomp_mutex_lock (&thr->ts.work_share->lock);
|
|
|
|
ret = gomp_iter_guided_next_locked (istart, iend);
|
|
if (ret)
|
|
gomp_ordered_first ();
|
|
gomp_mutex_unlock (&thr->ts.work_share->lock);
|
|
|
|
return ret;
|
|
}
|
|
|
|
bool
|
|
GOMP_loop_ordered_runtime_start (long start, long end, long incr,
|
|
long *istart, long *iend)
|
|
{
|
|
struct gomp_task_icv *icv = gomp_icv (false);
|
|
switch (icv->run_sched_var & ~GFS_MONOTONIC)
|
|
{
|
|
case GFS_STATIC:
|
|
return gomp_loop_ordered_static_start (start, end, incr,
|
|
icv->run_sched_chunk_size,
|
|
istart, iend);
|
|
case GFS_DYNAMIC:
|
|
return gomp_loop_ordered_dynamic_start (start, end, incr,
|
|
icv->run_sched_chunk_size,
|
|
istart, iend);
|
|
case GFS_GUIDED:
|
|
return gomp_loop_ordered_guided_start (start, end, incr,
|
|
icv->run_sched_chunk_size,
|
|
istart, iend);
|
|
case GFS_AUTO:
|
|
/* For now map to schedule(static), later on we could play with feedback
|
|
driven choice. */
|
|
return gomp_loop_ordered_static_start (start, end, incr,
|
|
0, istart, iend);
|
|
default:
|
|
abort ();
|
|
}
|
|
}
|
|
|
|
bool
|
|
GOMP_loop_ordered_start (long start, long end, long incr, long sched,
|
|
long chunk_size, long *istart, long *iend,
|
|
uintptr_t *reductions, void **mem)
|
|
{
|
|
struct gomp_thread *thr = gomp_thread ();
|
|
size_t ordered = 1;
|
|
bool ret;
|
|
|
|
thr->ts.static_trip = 0;
|
|
if (reductions)
|
|
gomp_workshare_taskgroup_start ();
|
|
if (mem)
|
|
ordered += (uintptr_t) *mem;
|
|
if (gomp_work_share_start (ordered))
|
|
{
|
|
sched = gomp_adjust_sched (sched, &chunk_size);
|
|
gomp_loop_init (thr->ts.work_share, start, end, incr,
|
|
sched, chunk_size);
|
|
if (reductions)
|
|
{
|
|
GOMP_taskgroup_reduction_register (reductions);
|
|
thr->task->taskgroup->workshare = true;
|
|
thr->ts.work_share->task_reductions = reductions;
|
|
}
|
|
if (sched == GFS_STATIC)
|
|
gomp_ordered_static_init ();
|
|
else
|
|
gomp_mutex_lock (&thr->ts.work_share->lock);
|
|
gomp_work_share_init_done ();
|
|
}
|
|
else
|
|
{
|
|
if (reductions)
|
|
{
|
|
uintptr_t *first_reductions = thr->ts.work_share->task_reductions;
|
|
gomp_workshare_task_reduction_register (reductions,
|
|
first_reductions);
|
|
}
|
|
sched = thr->ts.work_share->sched;
|
|
if (sched != GFS_STATIC)
|
|
gomp_mutex_lock (&thr->ts.work_share->lock);
|
|
}
|
|
|
|
if (mem)
|
|
{
|
|
uintptr_t p
|
|
= (uintptr_t) (thr->ts.work_share->ordered_team_ids
|
|
+ (thr->ts.team ? thr->ts.team->nthreads : 1));
|
|
p += __alignof__ (long long) - 1;
|
|
p &= ~(__alignof__ (long long) - 1);
|
|
*mem = (void *) p;
|
|
}
|
|
|
|
switch (sched)
|
|
{
|
|
case GFS_STATIC:
|
|
case GFS_AUTO:
|
|
return !gomp_iter_static_next (istart, iend);
|
|
case GFS_DYNAMIC:
|
|
ret = gomp_iter_dynamic_next_locked (istart, iend);
|
|
break;
|
|
case GFS_GUIDED:
|
|
ret = gomp_iter_guided_next_locked (istart, iend);
|
|
break;
|
|
default:
|
|
abort ();
|
|
}
|
|
|
|
if (ret)
|
|
gomp_ordered_first ();
|
|
gomp_mutex_unlock (&thr->ts.work_share->lock);
|
|
return ret;
|
|
}
|
|
|
|
/* The *_doacross_*_start routines are similar. The only difference is that
|
|
this work-share construct is initialized to expect an ORDERED(N) - DOACROSS
|
|
section, and the worksharing loop iterates always from 0 to COUNTS[0] - 1
|
|
and other COUNTS array elements tell the library number of iterations
|
|
in the ordered inner loops. */
|
|
|
|
static bool
|
|
gomp_loop_doacross_static_start (unsigned ncounts, long *counts,
|
|
long chunk_size, long *istart, long *iend)
|
|
{
|
|
struct gomp_thread *thr = gomp_thread ();
|
|
|
|
thr->ts.static_trip = 0;
|
|
if (gomp_work_share_start (0))
|
|
{
|
|
gomp_loop_init (thr->ts.work_share, 0, counts[0], 1,
|
|
GFS_STATIC, chunk_size);
|
|
gomp_doacross_init (ncounts, counts, chunk_size, 0);
|
|
gomp_work_share_init_done ();
|
|
}
|
|
|
|
return !gomp_iter_static_next (istart, iend);
|
|
}
|
|
|
|
static bool
|
|
gomp_loop_doacross_dynamic_start (unsigned ncounts, long *counts,
|
|
long chunk_size, long *istart, long *iend)
|
|
{
|
|
struct gomp_thread *thr = gomp_thread ();
|
|
bool ret;
|
|
|
|
if (gomp_work_share_start (0))
|
|
{
|
|
gomp_loop_init (thr->ts.work_share, 0, counts[0], 1,
|
|
GFS_DYNAMIC, chunk_size);
|
|
gomp_doacross_init (ncounts, counts, chunk_size, 0);
|
|
gomp_work_share_init_done ();
|
|
}
|
|
|
|
#ifdef HAVE_SYNC_BUILTINS
|
|
ret = gomp_iter_dynamic_next (istart, iend);
|
|
#else
|
|
gomp_mutex_lock (&thr->ts.work_share->lock);
|
|
ret = gomp_iter_dynamic_next_locked (istart, iend);
|
|
gomp_mutex_unlock (&thr->ts.work_share->lock);
|
|
#endif
|
|
|
|
return ret;
|
|
}
|
|
|
|
static bool
|
|
gomp_loop_doacross_guided_start (unsigned ncounts, long *counts,
|
|
long chunk_size, long *istart, long *iend)
|
|
{
|
|
struct gomp_thread *thr = gomp_thread ();
|
|
bool ret;
|
|
|
|
if (gomp_work_share_start (0))
|
|
{
|
|
gomp_loop_init (thr->ts.work_share, 0, counts[0], 1,
|
|
GFS_GUIDED, chunk_size);
|
|
gomp_doacross_init (ncounts, counts, chunk_size, 0);
|
|
gomp_work_share_init_done ();
|
|
}
|
|
|
|
#ifdef HAVE_SYNC_BUILTINS
|
|
ret = gomp_iter_guided_next (istart, iend);
|
|
#else
|
|
gomp_mutex_lock (&thr->ts.work_share->lock);
|
|
ret = gomp_iter_guided_next_locked (istart, iend);
|
|
gomp_mutex_unlock (&thr->ts.work_share->lock);
|
|
#endif
|
|
|
|
return ret;
|
|
}
|
|
|
|
bool
|
|
GOMP_loop_doacross_runtime_start (unsigned ncounts, long *counts,
|
|
long *istart, long *iend)
|
|
{
|
|
struct gomp_task_icv *icv = gomp_icv (false);
|
|
switch (icv->run_sched_var & ~GFS_MONOTONIC)
|
|
{
|
|
case GFS_STATIC:
|
|
return gomp_loop_doacross_static_start (ncounts, counts,
|
|
icv->run_sched_chunk_size,
|
|
istart, iend);
|
|
case GFS_DYNAMIC:
|
|
return gomp_loop_doacross_dynamic_start (ncounts, counts,
|
|
icv->run_sched_chunk_size,
|
|
istart, iend);
|
|
case GFS_GUIDED:
|
|
return gomp_loop_doacross_guided_start (ncounts, counts,
|
|
icv->run_sched_chunk_size,
|
|
istart, iend);
|
|
case GFS_AUTO:
|
|
/* For now map to schedule(static), later on we could play with feedback
|
|
driven choice. */
|
|
return gomp_loop_doacross_static_start (ncounts, counts,
|
|
0, istart, iend);
|
|
default:
|
|
abort ();
|
|
}
|
|
}
|
|
|
|
bool
|
|
GOMP_loop_doacross_start (unsigned ncounts, long *counts, long sched,
|
|
long chunk_size, long *istart, long *iend,
|
|
uintptr_t *reductions, void **mem)
|
|
{
|
|
struct gomp_thread *thr = gomp_thread ();
|
|
|
|
thr->ts.static_trip = 0;
|
|
if (reductions)
|
|
gomp_workshare_taskgroup_start ();
|
|
if (gomp_work_share_start (0))
|
|
{
|
|
size_t extra = 0;
|
|
if (mem)
|
|
extra = (uintptr_t) *mem;
|
|
sched = gomp_adjust_sched (sched, &chunk_size);
|
|
gomp_loop_init (thr->ts.work_share, 0, counts[0], 1,
|
|
sched, chunk_size);
|
|
gomp_doacross_init (ncounts, counts, chunk_size, extra);
|
|
if (reductions)
|
|
{
|
|
GOMP_taskgroup_reduction_register (reductions);
|
|
thr->task->taskgroup->workshare = true;
|
|
thr->ts.work_share->task_reductions = reductions;
|
|
}
|
|
gomp_work_share_init_done ();
|
|
}
|
|
else
|
|
{
|
|
if (reductions)
|
|
{
|
|
uintptr_t *first_reductions = thr->ts.work_share->task_reductions;
|
|
gomp_workshare_task_reduction_register (reductions,
|
|
first_reductions);
|
|
}
|
|
sched = thr->ts.work_share->sched;
|
|
}
|
|
|
|
if (mem)
|
|
*mem = thr->ts.work_share->doacross->extra;
|
|
|
|
return ialias_call (GOMP_loop_runtime_next) (istart, iend);
|
|
}
|
|
|
|
/* The *_next routines are called when the thread completes processing of
|
|
the iteration block currently assigned to it. If the work-share
|
|
construct is bound directly to a parallel construct, then the iteration
|
|
bounds may have been set up before the parallel. In which case, this
|
|
may be the first iteration for the thread.
|
|
|
|
Returns true if there is work remaining to be performed; *ISTART and
|
|
*IEND are filled with a new iteration block. Returns false if all work
|
|
has been assigned. */
|
|
|
|
static bool
|
|
gomp_loop_static_next (long *istart, long *iend)
|
|
{
|
|
return !gomp_iter_static_next (istart, iend);
|
|
}
|
|
|
|
static bool
|
|
gomp_loop_dynamic_next (long *istart, long *iend)
|
|
{
|
|
bool ret;
|
|
|
|
#ifdef HAVE_SYNC_BUILTINS
|
|
ret = gomp_iter_dynamic_next (istart, iend);
|
|
#else
|
|
struct gomp_thread *thr = gomp_thread ();
|
|
gomp_mutex_lock (&thr->ts.work_share->lock);
|
|
ret = gomp_iter_dynamic_next_locked (istart, iend);
|
|
gomp_mutex_unlock (&thr->ts.work_share->lock);
|
|
#endif
|
|
|
|
return ret;
|
|
}
|
|
|
|
static bool
|
|
gomp_loop_guided_next (long *istart, long *iend)
|
|
{
|
|
bool ret;
|
|
|
|
#ifdef HAVE_SYNC_BUILTINS
|
|
ret = gomp_iter_guided_next (istart, iend);
|
|
#else
|
|
struct gomp_thread *thr = gomp_thread ();
|
|
gomp_mutex_lock (&thr->ts.work_share->lock);
|
|
ret = gomp_iter_guided_next_locked (istart, iend);
|
|
gomp_mutex_unlock (&thr->ts.work_share->lock);
|
|
#endif
|
|
|
|
return ret;
|
|
}
|
|
|
|
bool
|
|
GOMP_loop_runtime_next (long *istart, long *iend)
|
|
{
|
|
struct gomp_thread *thr = gomp_thread ();
|
|
|
|
switch (thr->ts.work_share->sched)
|
|
{
|
|
case GFS_STATIC:
|
|
case GFS_AUTO:
|
|
return gomp_loop_static_next (istart, iend);
|
|
case GFS_DYNAMIC:
|
|
return gomp_loop_dynamic_next (istart, iend);
|
|
case GFS_GUIDED:
|
|
return gomp_loop_guided_next (istart, iend);
|
|
default:
|
|
abort ();
|
|
}
|
|
}
|
|
|
|
/* The *_ordered_*_next routines are called when the thread completes
|
|
processing of the iteration block currently assigned to it.
|
|
|
|
Returns true if there is work remaining to be performed; *ISTART and
|
|
*IEND are filled with a new iteration block. Returns false if all work
|
|
has been assigned. */
|
|
|
|
static bool
|
|
gomp_loop_ordered_static_next (long *istart, long *iend)
|
|
{
|
|
struct gomp_thread *thr = gomp_thread ();
|
|
int test;
|
|
|
|
gomp_ordered_sync ();
|
|
gomp_mutex_lock (&thr->ts.work_share->lock);
|
|
test = gomp_iter_static_next (istart, iend);
|
|
if (test >= 0)
|
|
gomp_ordered_static_next ();
|
|
gomp_mutex_unlock (&thr->ts.work_share->lock);
|
|
|
|
return test == 0;
|
|
}
|
|
|
|
static bool
|
|
gomp_loop_ordered_dynamic_next (long *istart, long *iend)
|
|
{
|
|
struct gomp_thread *thr = gomp_thread ();
|
|
bool ret;
|
|
|
|
gomp_ordered_sync ();
|
|
gomp_mutex_lock (&thr->ts.work_share->lock);
|
|
ret = gomp_iter_dynamic_next_locked (istart, iend);
|
|
if (ret)
|
|
gomp_ordered_next ();
|
|
else
|
|
gomp_ordered_last ();
|
|
gomp_mutex_unlock (&thr->ts.work_share->lock);
|
|
|
|
return ret;
|
|
}
|
|
|
|
static bool
|
|
gomp_loop_ordered_guided_next (long *istart, long *iend)
|
|
{
|
|
struct gomp_thread *thr = gomp_thread ();
|
|
bool ret;
|
|
|
|
gomp_ordered_sync ();
|
|
gomp_mutex_lock (&thr->ts.work_share->lock);
|
|
ret = gomp_iter_guided_next_locked (istart, iend);
|
|
if (ret)
|
|
gomp_ordered_next ();
|
|
else
|
|
gomp_ordered_last ();
|
|
gomp_mutex_unlock (&thr->ts.work_share->lock);
|
|
|
|
return ret;
|
|
}
|
|
|
|
bool
|
|
GOMP_loop_ordered_runtime_next (long *istart, long *iend)
|
|
{
|
|
struct gomp_thread *thr = gomp_thread ();
|
|
|
|
switch (thr->ts.work_share->sched)
|
|
{
|
|
case GFS_STATIC:
|
|
case GFS_AUTO:
|
|
return gomp_loop_ordered_static_next (istart, iend);
|
|
case GFS_DYNAMIC:
|
|
return gomp_loop_ordered_dynamic_next (istart, iend);
|
|
case GFS_GUIDED:
|
|
return gomp_loop_ordered_guided_next (istart, iend);
|
|
default:
|
|
abort ();
|
|
}
|
|
}
|
|
|
|
/* The GOMP_parallel_loop_* routines pre-initialize a work-share construct
|
|
to avoid one synchronization once we get into the loop. */
|
|
|
|
static void
|
|
gomp_parallel_loop_start (void (*fn) (void *), void *data,
|
|
unsigned num_threads, long start, long end,
|
|
long incr, enum gomp_schedule_type sched,
|
|
long chunk_size, unsigned int flags)
|
|
{
|
|
struct gomp_team *team;
|
|
|
|
num_threads = gomp_resolve_num_threads (num_threads, 0);
|
|
team = gomp_new_team (num_threads);
|
|
gomp_loop_init (&team->work_shares[0], start, end, incr, sched, chunk_size);
|
|
gomp_team_start (fn, data, num_threads, flags, team, NULL);
|
|
}
|
|
|
|
void
|
|
GOMP_parallel_loop_static_start (void (*fn) (void *), void *data,
|
|
unsigned num_threads, long start, long end,
|
|
long incr, long chunk_size)
|
|
{
|
|
gomp_parallel_loop_start (fn, data, num_threads, start, end, incr,
|
|
GFS_STATIC, chunk_size, 0);
|
|
}
|
|
|
|
void
|
|
GOMP_parallel_loop_dynamic_start (void (*fn) (void *), void *data,
|
|
unsigned num_threads, long start, long end,
|
|
long incr, long chunk_size)
|
|
{
|
|
gomp_parallel_loop_start (fn, data, num_threads, start, end, incr,
|
|
GFS_DYNAMIC, chunk_size, 0);
|
|
}
|
|
|
|
void
|
|
GOMP_parallel_loop_guided_start (void (*fn) (void *), void *data,
|
|
unsigned num_threads, long start, long end,
|
|
long incr, long chunk_size)
|
|
{
|
|
gomp_parallel_loop_start (fn, data, num_threads, start, end, incr,
|
|
GFS_GUIDED, chunk_size, 0);
|
|
}
|
|
|
|
void
|
|
GOMP_parallel_loop_runtime_start (void (*fn) (void *), void *data,
|
|
unsigned num_threads, long start, long end,
|
|
long incr)
|
|
{
|
|
struct gomp_task_icv *icv = gomp_icv (false);
|
|
gomp_parallel_loop_start (fn, data, num_threads, start, end, incr,
|
|
icv->run_sched_var & ~GFS_MONOTONIC,
|
|
icv->run_sched_chunk_size, 0);
|
|
}
|
|
|
|
ialias_redirect (GOMP_parallel_end)
|
|
|
|
void
|
|
GOMP_parallel_loop_static (void (*fn) (void *), void *data,
|
|
unsigned num_threads, long start, long end,
|
|
long incr, long chunk_size, unsigned flags)
|
|
{
|
|
gomp_parallel_loop_start (fn, data, num_threads, start, end, incr,
|
|
GFS_STATIC, chunk_size, flags);
|
|
fn (data);
|
|
GOMP_parallel_end ();
|
|
}
|
|
|
|
void
|
|
GOMP_parallel_loop_dynamic (void (*fn) (void *), void *data,
|
|
unsigned num_threads, long start, long end,
|
|
long incr, long chunk_size, unsigned flags)
|
|
{
|
|
gomp_parallel_loop_start (fn, data, num_threads, start, end, incr,
|
|
GFS_DYNAMIC, chunk_size, flags);
|
|
fn (data);
|
|
GOMP_parallel_end ();
|
|
}
|
|
|
|
void
|
|
GOMP_parallel_loop_guided (void (*fn) (void *), void *data,
|
|
unsigned num_threads, long start, long end,
|
|
long incr, long chunk_size, unsigned flags)
|
|
{
|
|
gomp_parallel_loop_start (fn, data, num_threads, start, end, incr,
|
|
GFS_GUIDED, chunk_size, flags);
|
|
fn (data);
|
|
GOMP_parallel_end ();
|
|
}
|
|
|
|
void
|
|
GOMP_parallel_loop_runtime (void (*fn) (void *), void *data,
|
|
unsigned num_threads, long start, long end,
|
|
long incr, unsigned flags)
|
|
{
|
|
struct gomp_task_icv *icv = gomp_icv (false);
|
|
gomp_parallel_loop_start (fn, data, num_threads, start, end, incr,
|
|
icv->run_sched_var & ~GFS_MONOTONIC,
|
|
icv->run_sched_chunk_size, flags);
|
|
fn (data);
|
|
GOMP_parallel_end ();
|
|
}
|
|
|
|
#ifdef HAVE_ATTRIBUTE_ALIAS
|
|
extern __typeof(GOMP_parallel_loop_dynamic) GOMP_parallel_loop_nonmonotonic_dynamic
|
|
__attribute__((alias ("GOMP_parallel_loop_dynamic")));
|
|
extern __typeof(GOMP_parallel_loop_guided) GOMP_parallel_loop_nonmonotonic_guided
|
|
__attribute__((alias ("GOMP_parallel_loop_guided")));
|
|
extern __typeof(GOMP_parallel_loop_runtime) GOMP_parallel_loop_nonmonotonic_runtime
|
|
__attribute__((alias ("GOMP_parallel_loop_runtime")));
|
|
extern __typeof(GOMP_parallel_loop_runtime) GOMP_parallel_loop_maybe_nonmonotonic_runtime
|
|
__attribute__((alias ("GOMP_parallel_loop_runtime")));
|
|
#else
|
|
void
|
|
GOMP_parallel_loop_nonmonotonic_dynamic (void (*fn) (void *), void *data,
|
|
unsigned num_threads, long start,
|
|
long end, long incr, long chunk_size,
|
|
unsigned flags)
|
|
{
|
|
gomp_parallel_loop_start (fn, data, num_threads, start, end, incr,
|
|
GFS_DYNAMIC, chunk_size, flags);
|
|
fn (data);
|
|
GOMP_parallel_end ();
|
|
}
|
|
|
|
void
|
|
GOMP_parallel_loop_nonmonotonic_guided (void (*fn) (void *), void *data,
|
|
unsigned num_threads, long start,
|
|
long end, long incr, long chunk_size,
|
|
unsigned flags)
|
|
{
|
|
gomp_parallel_loop_start (fn, data, num_threads, start, end, incr,
|
|
GFS_GUIDED, chunk_size, flags);
|
|
fn (data);
|
|
GOMP_parallel_end ();
|
|
}
|
|
|
|
void
|
|
GOMP_parallel_loop_nonmonotonic_runtime (void (*fn) (void *), void *data,
|
|
unsigned num_threads, long start,
|
|
long end, long incr, unsigned flags)
|
|
{
|
|
struct gomp_task_icv *icv = gomp_icv (false);
|
|
gomp_parallel_loop_start (fn, data, num_threads, start, end, incr,
|
|
icv->run_sched_var & ~GFS_MONOTONIC,
|
|
icv->run_sched_chunk_size, flags);
|
|
fn (data);
|
|
GOMP_parallel_end ();
|
|
}
|
|
|
|
void
|
|
GOMP_parallel_loop_maybe_nonmonotonic_runtime (void (*fn) (void *), void *data,
|
|
unsigned num_threads, long start,
|
|
long end, long incr,
|
|
unsigned flags)
|
|
{
|
|
struct gomp_task_icv *icv = gomp_icv (false);
|
|
gomp_parallel_loop_start (fn, data, num_threads, start, end, incr,
|
|
icv->run_sched_var & ~GFS_MONOTONIC,
|
|
icv->run_sched_chunk_size, flags);
|
|
fn (data);
|
|
GOMP_parallel_end ();
|
|
}
|
|
#endif
|
|
|
|
/* The GOMP_loop_end* routines are called after the thread is told that
|
|
all loop iterations are complete. The first two versions synchronize
|
|
all threads; the nowait version does not. */
|
|
|
|
void
|
|
GOMP_loop_end (void)
|
|
{
|
|
gomp_work_share_end ();
|
|
}
|
|
|
|
bool
|
|
GOMP_loop_end_cancel (void)
|
|
{
|
|
return gomp_work_share_end_cancel ();
|
|
}
|
|
|
|
void
|
|
GOMP_loop_end_nowait (void)
|
|
{
|
|
gomp_work_share_end_nowait ();
|
|
}
|
|
|
|
|
|
/* We use static functions above so that we're sure that the "runtime"
|
|
function can defer to the proper routine without interposition. We
|
|
export the static function with a strong alias when possible, or with
|
|
a wrapper function otherwise. */
|
|
|
|
#ifdef HAVE_ATTRIBUTE_ALIAS
|
|
extern __typeof(gomp_loop_static_start) GOMP_loop_static_start
|
|
__attribute__((alias ("gomp_loop_static_start")));
|
|
extern __typeof(gomp_loop_dynamic_start) GOMP_loop_dynamic_start
|
|
__attribute__((alias ("gomp_loop_dynamic_start")));
|
|
extern __typeof(gomp_loop_guided_start) GOMP_loop_guided_start
|
|
__attribute__((alias ("gomp_loop_guided_start")));
|
|
extern __typeof(gomp_loop_dynamic_start) GOMP_loop_nonmonotonic_dynamic_start
|
|
__attribute__((alias ("gomp_loop_dynamic_start")));
|
|
extern __typeof(gomp_loop_guided_start) GOMP_loop_nonmonotonic_guided_start
|
|
__attribute__((alias ("gomp_loop_guided_start")));
|
|
extern __typeof(GOMP_loop_runtime_start) GOMP_loop_nonmonotonic_runtime_start
|
|
__attribute__((alias ("GOMP_loop_runtime_start")));
|
|
extern __typeof(GOMP_loop_runtime_start) GOMP_loop_maybe_nonmonotonic_runtime_start
|
|
__attribute__((alias ("GOMP_loop_runtime_start")));
|
|
|
|
extern __typeof(gomp_loop_ordered_static_start) GOMP_loop_ordered_static_start
|
|
__attribute__((alias ("gomp_loop_ordered_static_start")));
|
|
extern __typeof(gomp_loop_ordered_dynamic_start) GOMP_loop_ordered_dynamic_start
|
|
__attribute__((alias ("gomp_loop_ordered_dynamic_start")));
|
|
extern __typeof(gomp_loop_ordered_guided_start) GOMP_loop_ordered_guided_start
|
|
__attribute__((alias ("gomp_loop_ordered_guided_start")));
|
|
|
|
extern __typeof(gomp_loop_doacross_static_start) GOMP_loop_doacross_static_start
|
|
__attribute__((alias ("gomp_loop_doacross_static_start")));
|
|
extern __typeof(gomp_loop_doacross_dynamic_start) GOMP_loop_doacross_dynamic_start
|
|
__attribute__((alias ("gomp_loop_doacross_dynamic_start")));
|
|
extern __typeof(gomp_loop_doacross_guided_start) GOMP_loop_doacross_guided_start
|
|
__attribute__((alias ("gomp_loop_doacross_guided_start")));
|
|
|
|
extern __typeof(gomp_loop_static_next) GOMP_loop_static_next
|
|
__attribute__((alias ("gomp_loop_static_next")));
|
|
extern __typeof(gomp_loop_dynamic_next) GOMP_loop_dynamic_next
|
|
__attribute__((alias ("gomp_loop_dynamic_next")));
|
|
extern __typeof(gomp_loop_guided_next) GOMP_loop_guided_next
|
|
__attribute__((alias ("gomp_loop_guided_next")));
|
|
extern __typeof(gomp_loop_dynamic_next) GOMP_loop_nonmonotonic_dynamic_next
|
|
__attribute__((alias ("gomp_loop_dynamic_next")));
|
|
extern __typeof(gomp_loop_guided_next) GOMP_loop_nonmonotonic_guided_next
|
|
__attribute__((alias ("gomp_loop_guided_next")));
|
|
extern __typeof(GOMP_loop_runtime_next) GOMP_loop_nonmonotonic_runtime_next
|
|
__attribute__((alias ("GOMP_loop_runtime_next")));
|
|
extern __typeof(GOMP_loop_runtime_next) GOMP_loop_maybe_nonmonotonic_runtime_next
|
|
__attribute__((alias ("GOMP_loop_runtime_next")));
|
|
|
|
extern __typeof(gomp_loop_ordered_static_next) GOMP_loop_ordered_static_next
|
|
__attribute__((alias ("gomp_loop_ordered_static_next")));
|
|
extern __typeof(gomp_loop_ordered_dynamic_next) GOMP_loop_ordered_dynamic_next
|
|
__attribute__((alias ("gomp_loop_ordered_dynamic_next")));
|
|
extern __typeof(gomp_loop_ordered_guided_next) GOMP_loop_ordered_guided_next
|
|
__attribute__((alias ("gomp_loop_ordered_guided_next")));
|
|
#else
|
|
bool
|
|
GOMP_loop_static_start (long start, long end, long incr, long chunk_size,
|
|
long *istart, long *iend)
|
|
{
|
|
return gomp_loop_static_start (start, end, incr, chunk_size, istart, iend);
|
|
}
|
|
|
|
bool
|
|
GOMP_loop_dynamic_start (long start, long end, long incr, long chunk_size,
|
|
long *istart, long *iend)
|
|
{
|
|
return gomp_loop_dynamic_start (start, end, incr, chunk_size, istart, iend);
|
|
}
|
|
|
|
bool
|
|
GOMP_loop_guided_start (long start, long end, long incr, long chunk_size,
|
|
long *istart, long *iend)
|
|
{
|
|
return gomp_loop_guided_start (start, end, incr, chunk_size, istart, iend);
|
|
}
|
|
|
|
bool
|
|
GOMP_loop_nonmonotonic_dynamic_start (long start, long end, long incr,
|
|
long chunk_size, long *istart,
|
|
long *iend)
|
|
{
|
|
return gomp_loop_dynamic_start (start, end, incr, chunk_size, istart, iend);
|
|
}
|
|
|
|
bool
|
|
GOMP_loop_nonmonotonic_guided_start (long start, long end, long incr,
|
|
long chunk_size, long *istart, long *iend)
|
|
{
|
|
return gomp_loop_guided_start (start, end, incr, chunk_size, istart, iend);
|
|
}
|
|
|
|
bool
|
|
GOMP_loop_nonmonotonic_runtime_start (long start, long end, long incr,
|
|
long *istart, long *iend)
|
|
{
|
|
return GOMP_loop_runtime_start (start, end, incr, istart, iend);
|
|
}
|
|
|
|
bool
|
|
GOMP_loop_maybe_nonmonotonic_runtime_start (long start, long end, long incr,
|
|
long *istart, long *iend)
|
|
{
|
|
return GOMP_loop_runtime_start (start, end, incr, istart, iend);
|
|
}
|
|
|
|
bool
|
|
GOMP_loop_ordered_static_start (long start, long end, long incr,
|
|
long chunk_size, long *istart, long *iend)
|
|
{
|
|
return gomp_loop_ordered_static_start (start, end, incr, chunk_size,
|
|
istart, iend);
|
|
}
|
|
|
|
bool
|
|
GOMP_loop_ordered_dynamic_start (long start, long end, long incr,
|
|
long chunk_size, long *istart, long *iend)
|
|
{
|
|
return gomp_loop_ordered_dynamic_start (start, end, incr, chunk_size,
|
|
istart, iend);
|
|
}
|
|
|
|
bool
|
|
GOMP_loop_ordered_guided_start (long start, long end, long incr,
|
|
long chunk_size, long *istart, long *iend)
|
|
{
|
|
return gomp_loop_ordered_guided_start (start, end, incr, chunk_size,
|
|
istart, iend);
|
|
}
|
|
|
|
bool
|
|
GOMP_loop_doacross_static_start (unsigned ncounts, long *counts,
|
|
long chunk_size, long *istart, long *iend)
|
|
{
|
|
return gomp_loop_doacross_static_start (ncounts, counts, chunk_size,
|
|
istart, iend);
|
|
}
|
|
|
|
bool
|
|
GOMP_loop_doacross_dynamic_start (unsigned ncounts, long *counts,
|
|
long chunk_size, long *istart, long *iend)
|
|
{
|
|
return gomp_loop_doacross_dynamic_start (ncounts, counts, chunk_size,
|
|
istart, iend);
|
|
}
|
|
|
|
bool
|
|
GOMP_loop_doacross_guided_start (unsigned ncounts, long *counts,
|
|
long chunk_size, long *istart, long *iend)
|
|
{
|
|
return gomp_loop_doacross_guided_start (ncounts, counts, chunk_size,
|
|
istart, iend);
|
|
}
|
|
|
|
bool
|
|
GOMP_loop_static_next (long *istart, long *iend)
|
|
{
|
|
return gomp_loop_static_next (istart, iend);
|
|
}
|
|
|
|
bool
|
|
GOMP_loop_dynamic_next (long *istart, long *iend)
|
|
{
|
|
return gomp_loop_dynamic_next (istart, iend);
|
|
}
|
|
|
|
bool
|
|
GOMP_loop_guided_next (long *istart, long *iend)
|
|
{
|
|
return gomp_loop_guided_next (istart, iend);
|
|
}
|
|
|
|
bool
|
|
GOMP_loop_nonmonotonic_dynamic_next (long *istart, long *iend)
|
|
{
|
|
return gomp_loop_dynamic_next (istart, iend);
|
|
}
|
|
|
|
bool
|
|
GOMP_loop_nonmonotonic_guided_next (long *istart, long *iend)
|
|
{
|
|
return gomp_loop_guided_next (istart, iend);
|
|
}
|
|
|
|
bool
|
|
GOMP_loop_nonmonotonic_runtime_next (long *istart, long *iend)
|
|
{
|
|
return GOMP_loop_runtime_next (istart, iend);
|
|
}
|
|
|
|
bool
|
|
GOMP_loop_maybe_nonmonotonic_runtime_next (long *istart, long *iend)
|
|
{
|
|
return GOMP_loop_runtime_next (istart, iend);
|
|
}
|
|
|
|
bool
|
|
GOMP_loop_ordered_static_next (long *istart, long *iend)
|
|
{
|
|
return gomp_loop_ordered_static_next (istart, iend);
|
|
}
|
|
|
|
bool
|
|
GOMP_loop_ordered_dynamic_next (long *istart, long *iend)
|
|
{
|
|
return gomp_loop_ordered_dynamic_next (istart, iend);
|
|
}
|
|
|
|
bool
|
|
GOMP_loop_ordered_guided_next (long *istart, long *iend)
|
|
{
|
|
return gomp_loop_ordered_guided_next (istart, iend);
|
|
}
|
|
#endif
|