From 6fae7eda968db658c280ad6f94fe6906a15af0c9 Mon Sep 17 00:00:00 2001 From: Kwok Cheung Yeung Date: Wed, 18 Nov 2020 11:24:36 -0800 Subject: [PATCH] openmp: Retire nest-var ICV for OpenMP 5.1 This removes the nest-var ICV, expressing nesting in terms of the max-active-levels-var ICV instead. The max-active-levels-var ICV is now per data environment rather than per device. 2020-11-18 Kwok Cheung Yeung libgomp/ * env.c (gomp_global_icv): Remove nest_var field. Add max_active_levels_var field. (gomp_max_active_levels_var): Remove. (parse_boolean): Return true on success. (handle_omp_display_env): Express OMP_NESTED in terms of max_active_levels_var. Change format specifier for max_active_levels_var. (initialize_env): Set max_active_levels_var from OMP_MAX_ACTIVE_LEVELS, OMP_NESTED, OMP_NUM_THREADS and OMP_PROC_BIND. * icv.c (omp_set_nested): Express in terms of max_active_levels_var. (omp_get_nested): Likewise. (omp_set_max_active_levels): Use max_active_levels_var field instead of gomp_max_active_levels_var. (omp_get_max_active_levels): Likewise. * libgomp.h (struct gomp_task_icv): Remove nest_var field. Add max_active_levels_var field. (gomp_supported_active_levels): Set to UCHAR_MAX. (gomp_max_active_levels_var): Delete. * libgomp.texi (omp_get_nested): Update documentation. (omp_set_nested): Likewise. (OMP_MAX_ACTIVE_LEVELS): Likewise. (OMP_NESTED): Likewise. (OMP_NUM_THREADS): Likewise. (OMP_PROC_BIND): Likewise. * parallel.c (gomp_resolve_num_threads): Replace reference to nest_var with max_active_levels_var. Use max_active_levels_var field instead of gomp_max_active_levels_var. --- libgomp/env.c | 44 +++++++++++++++++++++----------- libgomp/icv.c | 17 +++++++++---- libgomp/libgomp.h | 5 ++-- libgomp/libgomp.texi | 60 ++++++++++++++++++++++++++++++++------------ libgomp/parallel.c | 4 +-- 5 files changed, 90 insertions(+), 40 deletions(-) diff --git a/libgomp/env.c b/libgomp/env.c index ab225255709..5a49ae611bd 100644 --- a/libgomp/env.c +++ b/libgomp/env.c @@ -68,12 +68,11 @@ struct gomp_task_icv gomp_global_icv = { .run_sched_chunk_size = 1, .default_device_var = 0, .dyn_var = false, - .nest_var = false, + .max_active_levels_var = 1, .bind_var = omp_proc_bind_false, .target_data = NULL }; -unsigned long gomp_max_active_levels_var = gomp_supported_active_levels; bool gomp_cancel_var = false; enum gomp_target_offload_t gomp_target_offload_var = GOMP_TARGET_OFFLOAD_DEFAULT; @@ -959,16 +958,17 @@ parse_spincount (const char *name, unsigned long long *pvalue) } /* Parse a boolean value for environment variable NAME and store the - result in VALUE. */ + result in VALUE. Return true if one was present and it was + successfully parsed. */ -static void +static bool parse_boolean (const char *name, bool *value) { const char *env; env = getenv (name); if (env == NULL) - return; + return false; while (isspace ((unsigned char) *env)) ++env; @@ -987,7 +987,11 @@ parse_boolean (const char *name, bool *value) while (isspace ((unsigned char) *env)) ++env; if (*env != '\0') - gomp_error ("Invalid value for environment variable %s", name); + { + gomp_error ("Invalid value for environment variable %s", name); + return false; + } + return true; } /* Parse the OMP_WAIT_POLICY environment variable and return the value. */ @@ -1252,7 +1256,7 @@ handle_omp_display_env (unsigned long stacksize, int wait_policy) fprintf (stderr, " OMP_DYNAMIC = '%s'\n", gomp_global_icv.dyn_var ? "TRUE" : "FALSE"); fprintf (stderr, " OMP_NESTED = '%s'\n", - gomp_global_icv.nest_var ? "TRUE" : "FALSE"); + gomp_global_icv.max_active_levels_var > 1 ? "TRUE" : "FALSE"); fprintf (stderr, " OMP_NUM_THREADS = '%lu", gomp_global_icv.nthreads_var); for (i = 1; i < gomp_nthreads_var_list_len; i++) @@ -1344,8 +1348,8 @@ handle_omp_display_env (unsigned long stacksize, int wait_policy) wait_policy > 0 ? "ACTIVE" : "PASSIVE"); fprintf (stderr, " OMP_THREAD_LIMIT = '%u'\n", gomp_global_icv.thread_limit_var); - fprintf (stderr, " OMP_MAX_ACTIVE_LEVELS = '%lu'\n", - gomp_max_active_levels_var); + fprintf (stderr, " OMP_MAX_ACTIVE_LEVELS = '%u'\n", + gomp_global_icv.max_active_levels_var); fprintf (stderr, " OMP_CANCELLATION = '%s'\n", gomp_cancel_var ? "TRUE" : "FALSE"); @@ -1410,6 +1414,7 @@ static void __attribute__((constructor)) initialize_env (void) { unsigned long thread_limit_var, stacksize = GOMP_DEFAULT_STACKSIZE; + unsigned long max_active_levels_var; int wait_policy; /* Do a compile time check that mkomp_h.pl did good job. */ @@ -1417,16 +1422,11 @@ initialize_env (void) parse_schedule (); parse_boolean ("OMP_DYNAMIC", &gomp_global_icv.dyn_var); - parse_boolean ("OMP_NESTED", &gomp_global_icv.nest_var); parse_boolean ("OMP_CANCELLATION", &gomp_cancel_var); parse_boolean ("OMP_DISPLAY_AFFINITY", &gomp_display_affinity_var); parse_int ("OMP_DEFAULT_DEVICE", &gomp_global_icv.default_device_var, true); parse_target_offload ("OMP_TARGET_OFFLOAD", &gomp_target_offload_var); parse_int ("OMP_MAX_TASK_PRIORITY", &gomp_max_task_priority_var, true); - parse_unsigned_long ("OMP_MAX_ACTIVE_LEVELS", &gomp_max_active_levels_var, - true); - if (gomp_max_active_levels_var > gomp_supported_active_levels) - gomp_max_active_levels_var = gomp_supported_active_levels; gomp_def_allocator = parse_allocator (); if (parse_unsigned_long ("OMP_THREAD_LIMIT", &thread_limit_var, false)) { @@ -1451,6 +1451,22 @@ initialize_env (void) &gomp_bind_var_list_len) && gomp_global_icv.bind_var == omp_proc_bind_false) ignore = true; + if (parse_unsigned_long ("OMP_MAX_ACTIVE_LEVELS", + &max_active_levels_var, true)) + gomp_global_icv.max_active_levels_var + = (max_active_levels_var > gomp_supported_active_levels) + ? gomp_supported_active_levels : max_active_levels_var; + else + { + bool nested = true; + + /* OMP_NESTED is deprecated in OpenMP 5.0. */ + if (parse_boolean ("OMP_NESTED", &nested)) + gomp_global_icv.max_active_levels_var + = nested ? gomp_supported_active_levels : 1; + else if (gomp_nthreads_var_list_len > 1 || gomp_bind_var_list_len > 1) + gomp_global_icv.max_active_levels_var = gomp_supported_active_levels; + } /* Make sure OMP_PLACES and GOMP_CPU_AFFINITY env vars are always parsed if present in the environment. If OMP_PROC_BIND was set explicitly to false, don't populate places list though. If places diff --git a/libgomp/icv.c b/libgomp/icv.c index 8df15e385e7..c0c0305102f 100644 --- a/libgomp/icv.c +++ b/libgomp/icv.c @@ -57,14 +57,18 @@ void omp_set_nested (int val) { struct gomp_task_icv *icv = gomp_icv (true); - icv->nest_var = val; + if (val) + icv->max_active_levels_var = gomp_supported_active_levels; + else if (icv->max_active_levels_var > 1) + icv->max_active_levels_var = 1; } int omp_get_nested (void) { struct gomp_task_icv *icv = gomp_icv (false); - return icv->nest_var; + return (icv->max_active_levels_var > 1 + && icv->max_active_levels_var > omp_get_active_level ()); } #pragma GCC diagnostic pop @@ -120,17 +124,20 @@ omp_set_max_active_levels (int max_levels) { if (max_levels >= 0) { + struct gomp_task_icv *icv = gomp_icv (true); + if (max_levels <= gomp_supported_active_levels) - gomp_max_active_levels_var = max_levels; + icv->max_active_levels_var = max_levels; else - gomp_max_active_levels_var = gomp_supported_active_levels; + icv->max_active_levels_var = gomp_supported_active_levels; } } int omp_get_max_active_levels (void) { - return gomp_max_active_levels_var; + struct gomp_task_icv *icv = gomp_icv (false); + return icv->max_active_levels_var; } int diff --git a/libgomp/libgomp.h b/libgomp/libgomp.h index 0cc3f4d406b..070d29c969e 100644 --- a/libgomp/libgomp.h +++ b/libgomp/libgomp.h @@ -428,7 +428,7 @@ struct gomp_task_icv int default_device_var; unsigned int thread_limit_var; bool dyn_var; - bool nest_var; + unsigned char max_active_levels_var; char bind_var; /* Internal ICV. */ struct target_mem_desc *target_data; @@ -441,13 +441,12 @@ enum gomp_target_offload_t GOMP_TARGET_OFFLOAD_DISABLED }; -#define gomp_supported_active_levels INT_MAX +#define gomp_supported_active_levels UCHAR_MAX extern struct gomp_task_icv gomp_global_icv; #ifndef HAVE_SYNC_BUILTINS extern gomp_mutex_t gomp_managed_threads_lock; #endif -extern unsigned long gomp_max_active_levels_var; extern bool gomp_cancel_var; extern enum gomp_target_offload_t gomp_target_offload_var; extern int gomp_max_task_priority_var; diff --git a/libgomp/libgomp.texi b/libgomp/libgomp.texi index 69370639c5b..473b191ae14 100644 --- a/libgomp/libgomp.texi +++ b/libgomp/libgomp.texi @@ -487,10 +487,20 @@ This function returns @code{true} if nested parallel regions are enabled, @code{false} otherwise. Here, @code{true} and @code{false} represent their language-specific counterparts. -Nested parallel regions may be initialized at startup by the -@env{OMP_NESTED} environment variable or at runtime using -@code{omp_set_nested}. If undefined, nested parallel regions are -disabled by default. +The state of nested parallel regions at startup depends on several +environment variables. If @env{OMP_MAX_ACTIVE_LEVELS} is defined +and is set to greater than one, then nested parallel regions will be +enabled. If not defined, then the value of the @env{OMP_NESTED} +environment variable will be followed if defined. If neither are +defined, then if either @env{OMP_NUM_THREADS} or @env{OMP_PROC_BIND} +are defined with a list of more than one value, then nested parallel +regions are enabled. If none of these are defined, then nested parallel +regions are disabled by default. + +Nested parallel regions can be enabled or disabled at runtime using +@code{omp_set_nested}, or by setting the maximum number of nested +regions with @code{omp_set_max_active_levels} to one to disable, or +above one to enable. @item @emph{C/C++}: @multitable @columnfractions .20 .80 @@ -503,7 +513,8 @@ disabled by default. @end multitable @item @emph{See also}: -@ref{omp_set_nested}, @ref{OMP_NESTED} +@ref{omp_set_max_active_levels}, @ref{omp_set_nested}, +@ref{OMP_MAX_ACTIVE_LEVELS}, @ref{OMP_NESTED} @item @emph{Reference}: @uref{https://www.openmp.org, OpenMP specification v4.5}, Section 3.2.11. @@ -964,6 +975,10 @@ are allowed to create new teams. The function takes the language-specific equivalent of @code{true} and @code{false}, where @code{true} enables dynamic adjustment of team sizes and @code{false} disables it. +Enabling nested parallel regions will also set the maximum number of +active nested regions to the maximum supported. Disabling nested parallel +regions will set the maximum number of active nested regions to one. + @item @emph{C/C++}: @multitable @columnfractions .20 .80 @item @emph{Prototype}: @tab @code{void omp_set_nested(int nested);} @@ -976,7 +991,8 @@ dynamic adjustment of team sizes and @code{false} disables it. @end multitable @item @emph{See also}: -@ref{OMP_NESTED}, @ref{omp_get_nested} +@ref{omp_get_nested}, @ref{omp_set_max_active_levels}, +@ref{OMP_MAX_ACTIVE_LEVELS}, @ref{OMP_NESTED} @item @emph{Reference}: @uref{https://www.openmp.org, OpenMP specification v4.5}, Section 3.2.10. @@ -1502,10 +1518,14 @@ disabled by default. @item @emph{Description}: Specifies the initial value for the maximum number of nested parallel regions. The value of this variable shall be a positive integer. -If undefined, the number of active levels is unlimited. +If undefined, then if @env{OMP_NESTED} is defined and set to true, or +if @env{OMP_NUM_THREADS} or @env{OMP_PROC_BIND} are defined and set to +a list with more than one item, the maximum number of nested parallel +regions will be initialized to the largest number supported, otherwise +it will be set to one. @item @emph{See also}: -@ref{omp_set_max_active_levels} +@ref{omp_set_max_active_levels}, @ref{OMP_NESTED} @item @emph{Reference}: @uref{https://www.openmp.org, OpenMP specification v4.5}, Section 4.9 @@ -1541,11 +1561,16 @@ integer, and zero is allowed. If undefined, the default priority is @item @emph{Description}: Enable or disable nested parallel regions, i.e., whether team members are allowed to create new teams. The value of this environment variable -shall be @code{TRUE} or @code{FALSE}. If undefined, nested parallel -regions are disabled by default. +shall be @code{TRUE} or @code{FALSE}. If set to @code{TRUE}, the number +of maximum active nested regions supported will by default be set to the +maximum supported, otherwise it will be set to one. If +@env{OMP_MAX_ACTIVE_LEVELS} is defined, its setting will override this +setting. If both are undefined, nested parallel regions are enabled if +@env{OMP_NUM_THREADS} or @env{OMP_PROC_BINDS} are defined to a list with +more than one item, otherwise they are disabled by default. @item @emph{See also}: -@ref{omp_set_nested} +@ref{omp_set_max_active_levels}, @ref{omp_set_nested} @item @emph{Reference}: @uref{https://www.openmp.org, OpenMP specification v4.5}, Section 4.6 @@ -1561,11 +1586,12 @@ regions are disabled by default. @item @emph{Description}: Specifies the default number of threads to use in parallel regions. The value of this variable shall be a comma-separated list of positive integers; -the value specified the number of threads to use for the corresponding nested -level. If undefined one thread per CPU is used. +the value specifies the number of threads to use for the corresponding nested +level. Specifying more than one item in the list will automatically enable +nesting by default. If undefined one thread per CPU is used. @item @emph{See also}: -@ref{omp_set_num_threads} +@ref{omp_set_num_threads}, @ref{OMP_NESTED} @item @emph{Reference}: @uref{https://www.openmp.org, OpenMP specification v4.5}, Section 4.2 @@ -1586,13 +1612,15 @@ the thread affinity policy for the corresponding nesting level. With @code{MASTER} the worker threads are in the same place partition as the master thread. With @code{CLOSE} those are kept close to the master thread in contiguous place partitions. And with @code{SPREAD} a sparse distribution -across the place partitions is used. +across the place partitions is used. Specifying more than one item in the +list will automatically enable nesting by default. When undefined, @env{OMP_PROC_BIND} defaults to @code{TRUE} when @env{OMP_PLACES} or @env{GOMP_CPU_AFFINITY} is set and @code{FALSE} otherwise. @item @emph{See also}: -@ref{OMP_PLACES}, @ref{GOMP_CPU_AFFINITY}, @ref{omp_get_proc_bind} +@ref{omp_get_proc_bind}, @ref{GOMP_CPU_AFFINITY}, +@ref{OMP_NESTED}, @ref{OMP_PLACES} @item @emph{Reference}: @uref{https://www.openmp.org, OpenMP specification v4.5}, Section 4.4 diff --git a/libgomp/parallel.c b/libgomp/parallel.c index 2fe4f573a32..ebce49223fe 100644 --- a/libgomp/parallel.c +++ b/libgomp/parallel.c @@ -53,11 +53,11 @@ gomp_resolve_num_threads (unsigned specified, unsigned count) /* Accelerators with fixed thread counts require this to return 1 for nested parallel regions. */ #if !defined(__AMDGCN__) && !defined(__nvptx__) - && !icv->nest_var + && icv->max_active_levels_var <= 1 #endif ) return 1; - else if (thr->ts.active_level >= gomp_max_active_levels_var) + else if (thr->ts.active_level >= icv->max_active_levels_var) return 1; /* If NUM_THREADS not specified, use nthreads_var. */