openmp: Retire nest-var ICV for OpenMP 5.1
This removes the nest-var ICV, expressing nesting in terms of the max-active-levels-var ICV instead. The max-active-levels-var ICV is now per data environment rather than per device. 2020-11-18 Kwok Cheung Yeung <kcy@codesourcery.com> libgomp/ * env.c (gomp_global_icv): Remove nest_var field. Add max_active_levels_var field. (gomp_max_active_levels_var): Remove. (parse_boolean): Return true on success. (handle_omp_display_env): Express OMP_NESTED in terms of max_active_levels_var. Change format specifier for max_active_levels_var. (initialize_env): Set max_active_levels_var from OMP_MAX_ACTIVE_LEVELS, OMP_NESTED, OMP_NUM_THREADS and OMP_PROC_BIND. * icv.c (omp_set_nested): Express in terms of max_active_levels_var. (omp_get_nested): Likewise. (omp_set_max_active_levels): Use max_active_levels_var field instead of gomp_max_active_levels_var. (omp_get_max_active_levels): Likewise. * libgomp.h (struct gomp_task_icv): Remove nest_var field. Add max_active_levels_var field. (gomp_supported_active_levels): Set to UCHAR_MAX. (gomp_max_active_levels_var): Delete. * libgomp.texi (omp_get_nested): Update documentation. (omp_set_nested): Likewise. (OMP_MAX_ACTIVE_LEVELS): Likewise. (OMP_NESTED): Likewise. (OMP_NUM_THREADS): Likewise. (OMP_PROC_BIND): Likewise. * parallel.c (gomp_resolve_num_threads): Replace reference to nest_var with max_active_levels_var. Use max_active_levels_var field instead of gomp_max_active_levels_var.
This commit is contained in:
parent
965e0cc371
commit
6fae7eda96
@ -68,12 +68,11 @@ struct gomp_task_icv gomp_global_icv = {
|
||||
.run_sched_chunk_size = 1,
|
||||
.default_device_var = 0,
|
||||
.dyn_var = false,
|
||||
.nest_var = false,
|
||||
.max_active_levels_var = 1,
|
||||
.bind_var = omp_proc_bind_false,
|
||||
.target_data = NULL
|
||||
};
|
||||
|
||||
unsigned long gomp_max_active_levels_var = gomp_supported_active_levels;
|
||||
bool gomp_cancel_var = false;
|
||||
enum gomp_target_offload_t gomp_target_offload_var
|
||||
= GOMP_TARGET_OFFLOAD_DEFAULT;
|
||||
@ -959,16 +958,17 @@ parse_spincount (const char *name, unsigned long long *pvalue)
|
||||
}
|
||||
|
||||
/* Parse a boolean value for environment variable NAME and store the
|
||||
result in VALUE. */
|
||||
result in VALUE. Return true if one was present and it was
|
||||
successfully parsed. */
|
||||
|
||||
static void
|
||||
static bool
|
||||
parse_boolean (const char *name, bool *value)
|
||||
{
|
||||
const char *env;
|
||||
|
||||
env = getenv (name);
|
||||
if (env == NULL)
|
||||
return;
|
||||
return false;
|
||||
|
||||
while (isspace ((unsigned char) *env))
|
||||
++env;
|
||||
@ -987,7 +987,11 @@ parse_boolean (const char *name, bool *value)
|
||||
while (isspace ((unsigned char) *env))
|
||||
++env;
|
||||
if (*env != '\0')
|
||||
gomp_error ("Invalid value for environment variable %s", name);
|
||||
{
|
||||
gomp_error ("Invalid value for environment variable %s", name);
|
||||
return false;
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
/* Parse the OMP_WAIT_POLICY environment variable and return the value. */
|
||||
@ -1252,7 +1256,7 @@ handle_omp_display_env (unsigned long stacksize, int wait_policy)
|
||||
fprintf (stderr, " OMP_DYNAMIC = '%s'\n",
|
||||
gomp_global_icv.dyn_var ? "TRUE" : "FALSE");
|
||||
fprintf (stderr, " OMP_NESTED = '%s'\n",
|
||||
gomp_global_icv.nest_var ? "TRUE" : "FALSE");
|
||||
gomp_global_icv.max_active_levels_var > 1 ? "TRUE" : "FALSE");
|
||||
|
||||
fprintf (stderr, " OMP_NUM_THREADS = '%lu", gomp_global_icv.nthreads_var);
|
||||
for (i = 1; i < gomp_nthreads_var_list_len; i++)
|
||||
@ -1344,8 +1348,8 @@ handle_omp_display_env (unsigned long stacksize, int wait_policy)
|
||||
wait_policy > 0 ? "ACTIVE" : "PASSIVE");
|
||||
fprintf (stderr, " OMP_THREAD_LIMIT = '%u'\n",
|
||||
gomp_global_icv.thread_limit_var);
|
||||
fprintf (stderr, " OMP_MAX_ACTIVE_LEVELS = '%lu'\n",
|
||||
gomp_max_active_levels_var);
|
||||
fprintf (stderr, " OMP_MAX_ACTIVE_LEVELS = '%u'\n",
|
||||
gomp_global_icv.max_active_levels_var);
|
||||
|
||||
fprintf (stderr, " OMP_CANCELLATION = '%s'\n",
|
||||
gomp_cancel_var ? "TRUE" : "FALSE");
|
||||
@ -1410,6 +1414,7 @@ static void __attribute__((constructor))
|
||||
initialize_env (void)
|
||||
{
|
||||
unsigned long thread_limit_var, stacksize = GOMP_DEFAULT_STACKSIZE;
|
||||
unsigned long max_active_levels_var;
|
||||
int wait_policy;
|
||||
|
||||
/* Do a compile time check that mkomp_h.pl did good job. */
|
||||
@ -1417,16 +1422,11 @@ initialize_env (void)
|
||||
|
||||
parse_schedule ();
|
||||
parse_boolean ("OMP_DYNAMIC", &gomp_global_icv.dyn_var);
|
||||
parse_boolean ("OMP_NESTED", &gomp_global_icv.nest_var);
|
||||
parse_boolean ("OMP_CANCELLATION", &gomp_cancel_var);
|
||||
parse_boolean ("OMP_DISPLAY_AFFINITY", &gomp_display_affinity_var);
|
||||
parse_int ("OMP_DEFAULT_DEVICE", &gomp_global_icv.default_device_var, true);
|
||||
parse_target_offload ("OMP_TARGET_OFFLOAD", &gomp_target_offload_var);
|
||||
parse_int ("OMP_MAX_TASK_PRIORITY", &gomp_max_task_priority_var, true);
|
||||
parse_unsigned_long ("OMP_MAX_ACTIVE_LEVELS", &gomp_max_active_levels_var,
|
||||
true);
|
||||
if (gomp_max_active_levels_var > gomp_supported_active_levels)
|
||||
gomp_max_active_levels_var = gomp_supported_active_levels;
|
||||
gomp_def_allocator = parse_allocator ();
|
||||
if (parse_unsigned_long ("OMP_THREAD_LIMIT", &thread_limit_var, false))
|
||||
{
|
||||
@ -1451,6 +1451,22 @@ initialize_env (void)
|
||||
&gomp_bind_var_list_len)
|
||||
&& gomp_global_icv.bind_var == omp_proc_bind_false)
|
||||
ignore = true;
|
||||
if (parse_unsigned_long ("OMP_MAX_ACTIVE_LEVELS",
|
||||
&max_active_levels_var, true))
|
||||
gomp_global_icv.max_active_levels_var
|
||||
= (max_active_levels_var > gomp_supported_active_levels)
|
||||
? gomp_supported_active_levels : max_active_levels_var;
|
||||
else
|
||||
{
|
||||
bool nested = true;
|
||||
|
||||
/* OMP_NESTED is deprecated in OpenMP 5.0. */
|
||||
if (parse_boolean ("OMP_NESTED", &nested))
|
||||
gomp_global_icv.max_active_levels_var
|
||||
= nested ? gomp_supported_active_levels : 1;
|
||||
else if (gomp_nthreads_var_list_len > 1 || gomp_bind_var_list_len > 1)
|
||||
gomp_global_icv.max_active_levels_var = gomp_supported_active_levels;
|
||||
}
|
||||
/* Make sure OMP_PLACES and GOMP_CPU_AFFINITY env vars are always
|
||||
parsed if present in the environment. If OMP_PROC_BIND was set
|
||||
explicitly to false, don't populate places list though. If places
|
||||
|
@ -57,14 +57,18 @@ void
|
||||
omp_set_nested (int val)
|
||||
{
|
||||
struct gomp_task_icv *icv = gomp_icv (true);
|
||||
icv->nest_var = val;
|
||||
if (val)
|
||||
icv->max_active_levels_var = gomp_supported_active_levels;
|
||||
else if (icv->max_active_levels_var > 1)
|
||||
icv->max_active_levels_var = 1;
|
||||
}
|
||||
|
||||
int
|
||||
omp_get_nested (void)
|
||||
{
|
||||
struct gomp_task_icv *icv = gomp_icv (false);
|
||||
return icv->nest_var;
|
||||
return (icv->max_active_levels_var > 1
|
||||
&& icv->max_active_levels_var > omp_get_active_level ());
|
||||
}
|
||||
#pragma GCC diagnostic pop
|
||||
|
||||
@ -120,17 +124,20 @@ omp_set_max_active_levels (int max_levels)
|
||||
{
|
||||
if (max_levels >= 0)
|
||||
{
|
||||
struct gomp_task_icv *icv = gomp_icv (true);
|
||||
|
||||
if (max_levels <= gomp_supported_active_levels)
|
||||
gomp_max_active_levels_var = max_levels;
|
||||
icv->max_active_levels_var = max_levels;
|
||||
else
|
||||
gomp_max_active_levels_var = gomp_supported_active_levels;
|
||||
icv->max_active_levels_var = gomp_supported_active_levels;
|
||||
}
|
||||
}
|
||||
|
||||
int
|
||||
omp_get_max_active_levels (void)
|
||||
{
|
||||
return gomp_max_active_levels_var;
|
||||
struct gomp_task_icv *icv = gomp_icv (false);
|
||||
return icv->max_active_levels_var;
|
||||
}
|
||||
|
||||
int
|
||||
|
@ -428,7 +428,7 @@ struct gomp_task_icv
|
||||
int default_device_var;
|
||||
unsigned int thread_limit_var;
|
||||
bool dyn_var;
|
||||
bool nest_var;
|
||||
unsigned char max_active_levels_var;
|
||||
char bind_var;
|
||||
/* Internal ICV. */
|
||||
struct target_mem_desc *target_data;
|
||||
@ -441,13 +441,12 @@ enum gomp_target_offload_t
|
||||
GOMP_TARGET_OFFLOAD_DISABLED
|
||||
};
|
||||
|
||||
#define gomp_supported_active_levels INT_MAX
|
||||
#define gomp_supported_active_levels UCHAR_MAX
|
||||
|
||||
extern struct gomp_task_icv gomp_global_icv;
|
||||
#ifndef HAVE_SYNC_BUILTINS
|
||||
extern gomp_mutex_t gomp_managed_threads_lock;
|
||||
#endif
|
||||
extern unsigned long gomp_max_active_levels_var;
|
||||
extern bool gomp_cancel_var;
|
||||
extern enum gomp_target_offload_t gomp_target_offload_var;
|
||||
extern int gomp_max_task_priority_var;
|
||||
|
@ -487,10 +487,20 @@ This function returns @code{true} if nested parallel regions are
|
||||
enabled, @code{false} otherwise. Here, @code{true} and @code{false}
|
||||
represent their language-specific counterparts.
|
||||
|
||||
Nested parallel regions may be initialized at startup by the
|
||||
@env{OMP_NESTED} environment variable or at runtime using
|
||||
@code{omp_set_nested}. If undefined, nested parallel regions are
|
||||
disabled by default.
|
||||
The state of nested parallel regions at startup depends on several
|
||||
environment variables. If @env{OMP_MAX_ACTIVE_LEVELS} is defined
|
||||
and is set to greater than one, then nested parallel regions will be
|
||||
enabled. If not defined, then the value of the @env{OMP_NESTED}
|
||||
environment variable will be followed if defined. If neither are
|
||||
defined, then if either @env{OMP_NUM_THREADS} or @env{OMP_PROC_BIND}
|
||||
are defined with a list of more than one value, then nested parallel
|
||||
regions are enabled. If none of these are defined, then nested parallel
|
||||
regions are disabled by default.
|
||||
|
||||
Nested parallel regions can be enabled or disabled at runtime using
|
||||
@code{omp_set_nested}, or by setting the maximum number of nested
|
||||
regions with @code{omp_set_max_active_levels} to one to disable, or
|
||||
above one to enable.
|
||||
|
||||
@item @emph{C/C++}:
|
||||
@multitable @columnfractions .20 .80
|
||||
@ -503,7 +513,8 @@ disabled by default.
|
||||
@end multitable
|
||||
|
||||
@item @emph{See also}:
|
||||
@ref{omp_set_nested}, @ref{OMP_NESTED}
|
||||
@ref{omp_set_max_active_levels}, @ref{omp_set_nested},
|
||||
@ref{OMP_MAX_ACTIVE_LEVELS}, @ref{OMP_NESTED}
|
||||
|
||||
@item @emph{Reference}:
|
||||
@uref{https://www.openmp.org, OpenMP specification v4.5}, Section 3.2.11.
|
||||
@ -964,6 +975,10 @@ are allowed to create new teams. The function takes the language-specific
|
||||
equivalent of @code{true} and @code{false}, where @code{true} enables
|
||||
dynamic adjustment of team sizes and @code{false} disables it.
|
||||
|
||||
Enabling nested parallel regions will also set the maximum number of
|
||||
active nested regions to the maximum supported. Disabling nested parallel
|
||||
regions will set the maximum number of active nested regions to one.
|
||||
|
||||
@item @emph{C/C++}:
|
||||
@multitable @columnfractions .20 .80
|
||||
@item @emph{Prototype}: @tab @code{void omp_set_nested(int nested);}
|
||||
@ -976,7 +991,8 @@ dynamic adjustment of team sizes and @code{false} disables it.
|
||||
@end multitable
|
||||
|
||||
@item @emph{See also}:
|
||||
@ref{OMP_NESTED}, @ref{omp_get_nested}
|
||||
@ref{omp_get_nested}, @ref{omp_set_max_active_levels},
|
||||
@ref{OMP_MAX_ACTIVE_LEVELS}, @ref{OMP_NESTED}
|
||||
|
||||
@item @emph{Reference}:
|
||||
@uref{https://www.openmp.org, OpenMP specification v4.5}, Section 3.2.10.
|
||||
@ -1502,10 +1518,14 @@ disabled by default.
|
||||
@item @emph{Description}:
|
||||
Specifies the initial value for the maximum number of nested parallel
|
||||
regions. The value of this variable shall be a positive integer.
|
||||
If undefined, the number of active levels is unlimited.
|
||||
If undefined, then if @env{OMP_NESTED} is defined and set to true, or
|
||||
if @env{OMP_NUM_THREADS} or @env{OMP_PROC_BIND} are defined and set to
|
||||
a list with more than one item, the maximum number of nested parallel
|
||||
regions will be initialized to the largest number supported, otherwise
|
||||
it will be set to one.
|
||||
|
||||
@item @emph{See also}:
|
||||
@ref{omp_set_max_active_levels}
|
||||
@ref{omp_set_max_active_levels}, @ref{OMP_NESTED}
|
||||
|
||||
@item @emph{Reference}:
|
||||
@uref{https://www.openmp.org, OpenMP specification v4.5}, Section 4.9
|
||||
@ -1541,11 +1561,16 @@ integer, and zero is allowed. If undefined, the default priority is
|
||||
@item @emph{Description}:
|
||||
Enable or disable nested parallel regions, i.e., whether team members
|
||||
are allowed to create new teams. The value of this environment variable
|
||||
shall be @code{TRUE} or @code{FALSE}. If undefined, nested parallel
|
||||
regions are disabled by default.
|
||||
shall be @code{TRUE} or @code{FALSE}. If set to @code{TRUE}, the number
|
||||
of maximum active nested regions supported will by default be set to the
|
||||
maximum supported, otherwise it will be set to one. If
|
||||
@env{OMP_MAX_ACTIVE_LEVELS} is defined, its setting will override this
|
||||
setting. If both are undefined, nested parallel regions are enabled if
|
||||
@env{OMP_NUM_THREADS} or @env{OMP_PROC_BINDS} are defined to a list with
|
||||
more than one item, otherwise they are disabled by default.
|
||||
|
||||
@item @emph{See also}:
|
||||
@ref{omp_set_nested}
|
||||
@ref{omp_set_max_active_levels}, @ref{omp_set_nested}
|
||||
|
||||
@item @emph{Reference}:
|
||||
@uref{https://www.openmp.org, OpenMP specification v4.5}, Section 4.6
|
||||
@ -1561,11 +1586,12 @@ regions are disabled by default.
|
||||
@item @emph{Description}:
|
||||
Specifies the default number of threads to use in parallel regions. The
|
||||
value of this variable shall be a comma-separated list of positive integers;
|
||||
the value specified the number of threads to use for the corresponding nested
|
||||
level. If undefined one thread per CPU is used.
|
||||
the value specifies the number of threads to use for the corresponding nested
|
||||
level. Specifying more than one item in the list will automatically enable
|
||||
nesting by default. If undefined one thread per CPU is used.
|
||||
|
||||
@item @emph{See also}:
|
||||
@ref{omp_set_num_threads}
|
||||
@ref{omp_set_num_threads}, @ref{OMP_NESTED}
|
||||
|
||||
@item @emph{Reference}:
|
||||
@uref{https://www.openmp.org, OpenMP specification v4.5}, Section 4.2
|
||||
@ -1586,13 +1612,15 @@ the thread affinity policy for the corresponding nesting level. With
|
||||
@code{MASTER} the worker threads are in the same place partition as the
|
||||
master thread. With @code{CLOSE} those are kept close to the master thread
|
||||
in contiguous place partitions. And with @code{SPREAD} a sparse distribution
|
||||
across the place partitions is used.
|
||||
across the place partitions is used. Specifying more than one item in the
|
||||
list will automatically enable nesting by default.
|
||||
|
||||
When undefined, @env{OMP_PROC_BIND} defaults to @code{TRUE} when
|
||||
@env{OMP_PLACES} or @env{GOMP_CPU_AFFINITY} is set and @code{FALSE} otherwise.
|
||||
|
||||
@item @emph{See also}:
|
||||
@ref{OMP_PLACES}, @ref{GOMP_CPU_AFFINITY}, @ref{omp_get_proc_bind}
|
||||
@ref{omp_get_proc_bind}, @ref{GOMP_CPU_AFFINITY},
|
||||
@ref{OMP_NESTED}, @ref{OMP_PLACES}
|
||||
|
||||
@item @emph{Reference}:
|
||||
@uref{https://www.openmp.org, OpenMP specification v4.5}, Section 4.4
|
||||
|
@ -53,11 +53,11 @@ gomp_resolve_num_threads (unsigned specified, unsigned count)
|
||||
/* Accelerators with fixed thread counts require this to return 1 for
|
||||
nested parallel regions. */
|
||||
#if !defined(__AMDGCN__) && !defined(__nvptx__)
|
||||
&& !icv->nest_var
|
||||
&& icv->max_active_levels_var <= 1
|
||||
#endif
|
||||
)
|
||||
return 1;
|
||||
else if (thr->ts.active_level >= gomp_max_active_levels_var)
|
||||
else if (thr->ts.active_level >= icv->max_active_levels_var)
|
||||
return 1;
|
||||
|
||||
/* If NUM_THREADS not specified, use nthreads_var. */
|
||||
|
Loading…
Reference in New Issue
Block a user