openmp: Retire nest-var ICV for OpenMP 5.1

This removes the nest-var ICV, expressing nesting in terms of the
max-active-levels-var ICV instead.  The max-active-levels-var ICV
is now per data environment rather than per device.

2020-11-18  Kwok Cheung Yeung  <kcy@codesourcery.com>

	libgomp/
	* env.c (gomp_global_icv): Remove nest_var field.  Add
	max_active_levels_var field.
	(gomp_max_active_levels_var): Remove.
	(parse_boolean): Return true on success.
	(handle_omp_display_env): Express OMP_NESTED in terms of
	max_active_levels_var.  Change format specifier for
	max_active_levels_var.
	(initialize_env): Set max_active_levels_var from
	OMP_MAX_ACTIVE_LEVELS, OMP_NESTED, OMP_NUM_THREADS and
	OMP_PROC_BIND.
	* icv.c (omp_set_nested): Express in terms of
	max_active_levels_var.
	(omp_get_nested): Likewise.
	(omp_set_max_active_levels): Use max_active_levels_var field instead
	of gomp_max_active_levels_var.
	(omp_get_max_active_levels): Likewise.
	* libgomp.h (struct gomp_task_icv): Remove nest_var field.  Add
	max_active_levels_var field.
	(gomp_supported_active_levels): Set to UCHAR_MAX.
	(gomp_max_active_levels_var): Delete.
	* libgomp.texi (omp_get_nested): Update documentation.
	(omp_set_nested): Likewise.
	(OMP_MAX_ACTIVE_LEVELS): Likewise.
	(OMP_NESTED): Likewise.
	(OMP_NUM_THREADS): Likewise.
	(OMP_PROC_BIND): Likewise.
	* parallel.c (gomp_resolve_num_threads): Replace reference
	to nest_var with max_active_levels_var.  Use max_active_levels_var
	field instead of gomp_max_active_levels_var.
This commit is contained in:
Kwok Cheung Yeung 2020-11-18 11:24:36 -08:00
parent 965e0cc371
commit 6fae7eda96
5 changed files with 90 additions and 40 deletions

View File

@ -68,12 +68,11 @@ struct gomp_task_icv gomp_global_icv = {
.run_sched_chunk_size = 1,
.default_device_var = 0,
.dyn_var = false,
.nest_var = false,
.max_active_levels_var = 1,
.bind_var = omp_proc_bind_false,
.target_data = NULL
};
unsigned long gomp_max_active_levels_var = gomp_supported_active_levels;
bool gomp_cancel_var = false;
enum gomp_target_offload_t gomp_target_offload_var
= GOMP_TARGET_OFFLOAD_DEFAULT;
@ -959,16 +958,17 @@ parse_spincount (const char *name, unsigned long long *pvalue)
}
/* Parse a boolean value for environment variable NAME and store the
result in VALUE. */
result in VALUE. Return true if one was present and it was
successfully parsed. */
static void
static bool
parse_boolean (const char *name, bool *value)
{
const char *env;
env = getenv (name);
if (env == NULL)
return;
return false;
while (isspace ((unsigned char) *env))
++env;
@ -987,7 +987,11 @@ parse_boolean (const char *name, bool *value)
while (isspace ((unsigned char) *env))
++env;
if (*env != '\0')
gomp_error ("Invalid value for environment variable %s", name);
{
gomp_error ("Invalid value for environment variable %s", name);
return false;
}
return true;
}
/* Parse the OMP_WAIT_POLICY environment variable and return the value. */
@ -1252,7 +1256,7 @@ handle_omp_display_env (unsigned long stacksize, int wait_policy)
fprintf (stderr, " OMP_DYNAMIC = '%s'\n",
gomp_global_icv.dyn_var ? "TRUE" : "FALSE");
fprintf (stderr, " OMP_NESTED = '%s'\n",
gomp_global_icv.nest_var ? "TRUE" : "FALSE");
gomp_global_icv.max_active_levels_var > 1 ? "TRUE" : "FALSE");
fprintf (stderr, " OMP_NUM_THREADS = '%lu", gomp_global_icv.nthreads_var);
for (i = 1; i < gomp_nthreads_var_list_len; i++)
@ -1344,8 +1348,8 @@ handle_omp_display_env (unsigned long stacksize, int wait_policy)
wait_policy > 0 ? "ACTIVE" : "PASSIVE");
fprintf (stderr, " OMP_THREAD_LIMIT = '%u'\n",
gomp_global_icv.thread_limit_var);
fprintf (stderr, " OMP_MAX_ACTIVE_LEVELS = '%lu'\n",
gomp_max_active_levels_var);
fprintf (stderr, " OMP_MAX_ACTIVE_LEVELS = '%u'\n",
gomp_global_icv.max_active_levels_var);
fprintf (stderr, " OMP_CANCELLATION = '%s'\n",
gomp_cancel_var ? "TRUE" : "FALSE");
@ -1410,6 +1414,7 @@ static void __attribute__((constructor))
initialize_env (void)
{
unsigned long thread_limit_var, stacksize = GOMP_DEFAULT_STACKSIZE;
unsigned long max_active_levels_var;
int wait_policy;
/* Do a compile time check that mkomp_h.pl did good job. */
@ -1417,16 +1422,11 @@ initialize_env (void)
parse_schedule ();
parse_boolean ("OMP_DYNAMIC", &gomp_global_icv.dyn_var);
parse_boolean ("OMP_NESTED", &gomp_global_icv.nest_var);
parse_boolean ("OMP_CANCELLATION", &gomp_cancel_var);
parse_boolean ("OMP_DISPLAY_AFFINITY", &gomp_display_affinity_var);
parse_int ("OMP_DEFAULT_DEVICE", &gomp_global_icv.default_device_var, true);
parse_target_offload ("OMP_TARGET_OFFLOAD", &gomp_target_offload_var);
parse_int ("OMP_MAX_TASK_PRIORITY", &gomp_max_task_priority_var, true);
parse_unsigned_long ("OMP_MAX_ACTIVE_LEVELS", &gomp_max_active_levels_var,
true);
if (gomp_max_active_levels_var > gomp_supported_active_levels)
gomp_max_active_levels_var = gomp_supported_active_levels;
gomp_def_allocator = parse_allocator ();
if (parse_unsigned_long ("OMP_THREAD_LIMIT", &thread_limit_var, false))
{
@ -1451,6 +1451,22 @@ initialize_env (void)
&gomp_bind_var_list_len)
&& gomp_global_icv.bind_var == omp_proc_bind_false)
ignore = true;
if (parse_unsigned_long ("OMP_MAX_ACTIVE_LEVELS",
&max_active_levels_var, true))
gomp_global_icv.max_active_levels_var
= (max_active_levels_var > gomp_supported_active_levels)
? gomp_supported_active_levels : max_active_levels_var;
else
{
bool nested = true;
/* OMP_NESTED is deprecated in OpenMP 5.0. */
if (parse_boolean ("OMP_NESTED", &nested))
gomp_global_icv.max_active_levels_var
= nested ? gomp_supported_active_levels : 1;
else if (gomp_nthreads_var_list_len > 1 || gomp_bind_var_list_len > 1)
gomp_global_icv.max_active_levels_var = gomp_supported_active_levels;
}
/* Make sure OMP_PLACES and GOMP_CPU_AFFINITY env vars are always
parsed if present in the environment. If OMP_PROC_BIND was set
explicitly to false, don't populate places list though. If places

View File

@ -57,14 +57,18 @@ void
omp_set_nested (int val)
{
struct gomp_task_icv *icv = gomp_icv (true);
icv->nest_var = val;
if (val)
icv->max_active_levels_var = gomp_supported_active_levels;
else if (icv->max_active_levels_var > 1)
icv->max_active_levels_var = 1;
}
int
omp_get_nested (void)
{
struct gomp_task_icv *icv = gomp_icv (false);
return icv->nest_var;
return (icv->max_active_levels_var > 1
&& icv->max_active_levels_var > omp_get_active_level ());
}
#pragma GCC diagnostic pop
@ -120,17 +124,20 @@ omp_set_max_active_levels (int max_levels)
{
if (max_levels >= 0)
{
struct gomp_task_icv *icv = gomp_icv (true);
if (max_levels <= gomp_supported_active_levels)
gomp_max_active_levels_var = max_levels;
icv->max_active_levels_var = max_levels;
else
gomp_max_active_levels_var = gomp_supported_active_levels;
icv->max_active_levels_var = gomp_supported_active_levels;
}
}
int
omp_get_max_active_levels (void)
{
return gomp_max_active_levels_var;
struct gomp_task_icv *icv = gomp_icv (false);
return icv->max_active_levels_var;
}
int

View File

@ -428,7 +428,7 @@ struct gomp_task_icv
int default_device_var;
unsigned int thread_limit_var;
bool dyn_var;
bool nest_var;
unsigned char max_active_levels_var;
char bind_var;
/* Internal ICV. */
struct target_mem_desc *target_data;
@ -441,13 +441,12 @@ enum gomp_target_offload_t
GOMP_TARGET_OFFLOAD_DISABLED
};
#define gomp_supported_active_levels INT_MAX
#define gomp_supported_active_levels UCHAR_MAX
extern struct gomp_task_icv gomp_global_icv;
#ifndef HAVE_SYNC_BUILTINS
extern gomp_mutex_t gomp_managed_threads_lock;
#endif
extern unsigned long gomp_max_active_levels_var;
extern bool gomp_cancel_var;
extern enum gomp_target_offload_t gomp_target_offload_var;
extern int gomp_max_task_priority_var;

View File

@ -487,10 +487,20 @@ This function returns @code{true} if nested parallel regions are
enabled, @code{false} otherwise. Here, @code{true} and @code{false}
represent their language-specific counterparts.
Nested parallel regions may be initialized at startup by the
@env{OMP_NESTED} environment variable or at runtime using
@code{omp_set_nested}. If undefined, nested parallel regions are
disabled by default.
The state of nested parallel regions at startup depends on several
environment variables. If @env{OMP_MAX_ACTIVE_LEVELS} is defined
and is set to greater than one, then nested parallel regions will be
enabled. If not defined, then the value of the @env{OMP_NESTED}
environment variable will be followed if defined. If neither are
defined, then if either @env{OMP_NUM_THREADS} or @env{OMP_PROC_BIND}
are defined with a list of more than one value, then nested parallel
regions are enabled. If none of these are defined, then nested parallel
regions are disabled by default.
Nested parallel regions can be enabled or disabled at runtime using
@code{omp_set_nested}, or by setting the maximum number of nested
regions with @code{omp_set_max_active_levels} to one to disable, or
above one to enable.
@item @emph{C/C++}:
@multitable @columnfractions .20 .80
@ -503,7 +513,8 @@ disabled by default.
@end multitable
@item @emph{See also}:
@ref{omp_set_nested}, @ref{OMP_NESTED}
@ref{omp_set_max_active_levels}, @ref{omp_set_nested},
@ref{OMP_MAX_ACTIVE_LEVELS}, @ref{OMP_NESTED}
@item @emph{Reference}:
@uref{https://www.openmp.org, OpenMP specification v4.5}, Section 3.2.11.
@ -964,6 +975,10 @@ are allowed to create new teams. The function takes the language-specific
equivalent of @code{true} and @code{false}, where @code{true} enables
dynamic adjustment of team sizes and @code{false} disables it.
Enabling nested parallel regions will also set the maximum number of
active nested regions to the maximum supported. Disabling nested parallel
regions will set the maximum number of active nested regions to one.
@item @emph{C/C++}:
@multitable @columnfractions .20 .80
@item @emph{Prototype}: @tab @code{void omp_set_nested(int nested);}
@ -976,7 +991,8 @@ dynamic adjustment of team sizes and @code{false} disables it.
@end multitable
@item @emph{See also}:
@ref{OMP_NESTED}, @ref{omp_get_nested}
@ref{omp_get_nested}, @ref{omp_set_max_active_levels},
@ref{OMP_MAX_ACTIVE_LEVELS}, @ref{OMP_NESTED}
@item @emph{Reference}:
@uref{https://www.openmp.org, OpenMP specification v4.5}, Section 3.2.10.
@ -1502,10 +1518,14 @@ disabled by default.
@item @emph{Description}:
Specifies the initial value for the maximum number of nested parallel
regions. The value of this variable shall be a positive integer.
If undefined, the number of active levels is unlimited.
If undefined, then if @env{OMP_NESTED} is defined and set to true, or
if @env{OMP_NUM_THREADS} or @env{OMP_PROC_BIND} are defined and set to
a list with more than one item, the maximum number of nested parallel
regions will be initialized to the largest number supported, otherwise
it will be set to one.
@item @emph{See also}:
@ref{omp_set_max_active_levels}
@ref{omp_set_max_active_levels}, @ref{OMP_NESTED}
@item @emph{Reference}:
@uref{https://www.openmp.org, OpenMP specification v4.5}, Section 4.9
@ -1541,11 +1561,16 @@ integer, and zero is allowed. If undefined, the default priority is
@item @emph{Description}:
Enable or disable nested parallel regions, i.e., whether team members
are allowed to create new teams. The value of this environment variable
shall be @code{TRUE} or @code{FALSE}. If undefined, nested parallel
regions are disabled by default.
shall be @code{TRUE} or @code{FALSE}. If set to @code{TRUE}, the number
of maximum active nested regions supported will by default be set to the
maximum supported, otherwise it will be set to one. If
@env{OMP_MAX_ACTIVE_LEVELS} is defined, its setting will override this
setting. If both are undefined, nested parallel regions are enabled if
@env{OMP_NUM_THREADS} or @env{OMP_PROC_BINDS} are defined to a list with
more than one item, otherwise they are disabled by default.
@item @emph{See also}:
@ref{omp_set_nested}
@ref{omp_set_max_active_levels}, @ref{omp_set_nested}
@item @emph{Reference}:
@uref{https://www.openmp.org, OpenMP specification v4.5}, Section 4.6
@ -1561,11 +1586,12 @@ regions are disabled by default.
@item @emph{Description}:
Specifies the default number of threads to use in parallel regions. The
value of this variable shall be a comma-separated list of positive integers;
the value specified the number of threads to use for the corresponding nested
level. If undefined one thread per CPU is used.
the value specifies the number of threads to use for the corresponding nested
level. Specifying more than one item in the list will automatically enable
nesting by default. If undefined one thread per CPU is used.
@item @emph{See also}:
@ref{omp_set_num_threads}
@ref{omp_set_num_threads}, @ref{OMP_NESTED}
@item @emph{Reference}:
@uref{https://www.openmp.org, OpenMP specification v4.5}, Section 4.2
@ -1586,13 +1612,15 @@ the thread affinity policy for the corresponding nesting level. With
@code{MASTER} the worker threads are in the same place partition as the
master thread. With @code{CLOSE} those are kept close to the master thread
in contiguous place partitions. And with @code{SPREAD} a sparse distribution
across the place partitions is used.
across the place partitions is used. Specifying more than one item in the
list will automatically enable nesting by default.
When undefined, @env{OMP_PROC_BIND} defaults to @code{TRUE} when
@env{OMP_PLACES} or @env{GOMP_CPU_AFFINITY} is set and @code{FALSE} otherwise.
@item @emph{See also}:
@ref{OMP_PLACES}, @ref{GOMP_CPU_AFFINITY}, @ref{omp_get_proc_bind}
@ref{omp_get_proc_bind}, @ref{GOMP_CPU_AFFINITY},
@ref{OMP_NESTED}, @ref{OMP_PLACES}
@item @emph{Reference}:
@uref{https://www.openmp.org, OpenMP specification v4.5}, Section 4.4

View File

@ -53,11 +53,11 @@ gomp_resolve_num_threads (unsigned specified, unsigned count)
/* Accelerators with fixed thread counts require this to return 1 for
nested parallel regions. */
#if !defined(__AMDGCN__) && !defined(__nvptx__)
&& !icv->nest_var
&& icv->max_active_levels_var <= 1
#endif
)
return 1;
else if (thr->ts.active_level >= gomp_max_active_levels_var)
else if (thr->ts.active_level >= icv->max_active_levels_var)
return 1;
/* If NUM_THREADS not specified, use nthreads_var. */