From 1f62d6375bb12008bb152af13eab7eb83b458a1e Mon Sep 17 00:00:00 2001 From: Tom de Vries Date: Wed, 2 May 2018 17:53:29 +0000 Subject: [PATCH] [openacc] Add __builtin_goacc_parlevel_{id,size} 2018-05-02 Tom de Vries PR libgomp/82428 * builtins.def (DEF_GOACC_BUILTIN_ONLY): Define. * omp-builtins.def (BUILT_IN_GOACC_PARLEVEL_ID) (BUILT_IN_GOACC_PARLEVEL_SIZE): New builtin. * builtins.c (expand_builtin_goacc_parlevel_id_size): New function. (expand_builtin): Call expand_builtin_goacc_parlevel_id_size. * doc/extend.texi (Other Builtins): Add __builtin_goacc_parlevel_id and __builtin_goacc_parlevel_size. * f95-lang.c (DEF_GOACC_BUILTIN_ONLY): Define. * c-c++-common/goacc/builtin-goacc-parlevel-id-size-2.c: New test. * c-c++-common/goacc/builtin-goacc-parlevel-id-size.c: New test. * testsuite/libgomp.oacc-c-c++-common/gang-static-2.c: Use __builtin_goacc_parlevel_{id,size}. * testsuite/libgomp.oacc-c-c++-common/loop-auto-1.c: Same. * testsuite/libgomp.oacc-c-c++-common/loop-dim-default.c: Same. * testsuite/libgomp.oacc-c-c++-common/loop-g-1.c: Same. * testsuite/libgomp.oacc-c-c++-common/loop-g-2.c: Same. * testsuite/libgomp.oacc-c-c++-common/loop-gwv-1.c: Same. * testsuite/libgomp.oacc-c-c++-common/loop-red-g-1.c: Same. * testsuite/libgomp.oacc-c-c++-common/loop-red-gwv-1.c: Same. * testsuite/libgomp.oacc-c-c++-common/loop-red-v-1.c: Same. * testsuite/libgomp.oacc-c-c++-common/loop-red-v-2.c: Same. * testsuite/libgomp.oacc-c-c++-common/loop-red-w-1.c: Same. * testsuite/libgomp.oacc-c-c++-common/loop-red-w-2.c: Same. * testsuite/libgomp.oacc-c-c++-common/loop-red-wv-1.c: Same. * testsuite/libgomp.oacc-c-c++-common/loop-v-1.c: Same. * testsuite/libgomp.oacc-c-c++-common/loop-w-1.c: Same. * testsuite/libgomp.oacc-c-c++-common/loop-wv-1.c: Same. * testsuite/libgomp.oacc-c-c++-common/parallel-dims.c: Same. * testsuite/libgomp.oacc-c-c++-common/routine-g-1.c: Same. * testsuite/libgomp.oacc-c-c++-common/routine-gwv-1.c: Same. * testsuite/libgomp.oacc-c-c++-common/routine-v-1.c: Same. * testsuite/libgomp.oacc-c-c++-common/routine-w-1.c: Same. * testsuite/libgomp.oacc-c-c++-common/routine-wv-1.c: Same. * testsuite/libgomp.oacc-c-c++-common/routine-wv-2.c: Same. * testsuite/libgomp.oacc-c-c++-common/tile-1.c: Same. From-SVN: r259850 --- gcc/ChangeLog | 11 +++ gcc/builtins.c | 71 +++++++++++++++++ gcc/builtins.def | 4 + gcc/doc/extend.texi | 10 +++ gcc/fortran/ChangeLog | 5 ++ gcc/fortran/f95-lang.c | 4 + gcc/omp-builtins.def | 5 ++ gcc/testsuite/ChangeLog | 6 ++ .../goacc/builtin-goacc-parlevel-id-size-2.c | 37 +++++++++ .../goacc/builtin-goacc-parlevel-id-size.c | 79 +++++++++++++++++++ libgomp/ChangeLog | 29 +++++++ .../libgomp.oacc-c-c++-common/gang-static-2.c | 21 ++--- .../libgomp.oacc-c-c++-common/loop-auto-1.c | 18 ++--- .../loop-dim-default.c | 14 ++-- .../libgomp.oacc-c-c++-common/loop-g-1.c | 17 ++-- .../libgomp.oacc-c-c++-common/loop-g-2.c | 16 ++-- .../libgomp.oacc-c-c++-common/loop-gwv-1.c | 17 ++-- .../libgomp.oacc-c-c++-common/loop-red-g-1.c | 16 ++-- .../loop-red-gwv-1.c | 16 ++-- .../libgomp.oacc-c-c++-common/loop-red-v-1.c | 16 ++-- .../libgomp.oacc-c-c++-common/loop-red-v-2.c | 16 ++-- .../libgomp.oacc-c-c++-common/loop-red-w-1.c | 16 ++-- .../libgomp.oacc-c-c++-common/loop-red-w-2.c | 16 ++-- .../libgomp.oacc-c-c++-common/loop-red-wv-1.c | 12 +-- .../libgomp.oacc-c-c++-common/loop-v-1.c | 16 ++-- .../libgomp.oacc-c-c++-common/loop-w-1.c | 16 ++-- .../libgomp.oacc-c-c++-common/loop-wv-1.c | 16 ++-- .../libgomp.oacc-c-c++-common/parallel-dims.c | 19 +---- .../libgomp.oacc-c-c++-common/routine-g-1.c | 18 ++--- .../libgomp.oacc-c-c++-common/routine-gwv-1.c | 18 ++--- .../libgomp.oacc-c-c++-common/routine-v-1.c | 18 ++--- .../libgomp.oacc-c-c++-common/routine-w-1.c | 18 ++--- .../libgomp.oacc-c-c++-common/routine-wv-1.c | 18 ++--- .../libgomp.oacc-c-c++-common/routine-wv-2.c | 19 ++--- .../libgomp.oacc-c-c++-common/tile-1.c | 15 ++-- 35 files changed, 433 insertions(+), 230 deletions(-) create mode 100644 gcc/testsuite/c-c++-common/goacc/builtin-goacc-parlevel-id-size-2.c create mode 100644 gcc/testsuite/c-c++-common/goacc/builtin-goacc-parlevel-id-size.c diff --git a/gcc/ChangeLog b/gcc/ChangeLog index 1c2a8283ed6..806f74cc3b7 100644 --- a/gcc/ChangeLog +++ b/gcc/ChangeLog @@ -1,3 +1,14 @@ +2018-05-02 Tom de Vries + + PR libgomp/82428 + * builtins.def (DEF_GOACC_BUILTIN_ONLY): Define. + * omp-builtins.def (BUILT_IN_GOACC_PARLEVEL_ID) + (BUILT_IN_GOACC_PARLEVEL_SIZE): New builtin. + * builtins.c (expand_builtin_goacc_parlevel_id_size): New function. + (expand_builtin): Call expand_builtin_goacc_parlevel_id_size. + * doc/extend.texi (Other Builtins): Add __builtin_goacc_parlevel_id and + __builtin_goacc_parlevel_size. + 2018-05-02 Richard Biener PR tree-optimization/85597 diff --git a/gcc/builtins.c b/gcc/builtins.c index a71555e8198..300e13c2f47 100644 --- a/gcc/builtins.c +++ b/gcc/builtins.c @@ -71,6 +71,8 @@ along with GCC; see the file COPYING3. If not see #include "gimple-fold.h" #include "intl.h" #include "file-prefix-map.h" /* remap_macro_filename() */ +#include "gomp-constants.h" +#include "omp-general.h" struct target_builtins default_target_builtins; #if SWITCHABLE_TARGET @@ -6628,6 +6630,71 @@ expand_stack_save (void) return ret; } +/* Emit code to get the openacc gang, worker or vector id or size. */ + +static rtx +expand_builtin_goacc_parlevel_id_size (tree exp, rtx target, int ignore) +{ + const char *name; + rtx fallback_retval; + rtx_insn *(*gen_fn) (rtx, rtx); + switch (DECL_FUNCTION_CODE (get_callee_fndecl (exp))) + { + case BUILT_IN_GOACC_PARLEVEL_ID: + name = "__builtin_goacc_parlevel_id"; + fallback_retval = const0_rtx; + gen_fn = targetm.gen_oacc_dim_pos; + break; + case BUILT_IN_GOACC_PARLEVEL_SIZE: + name = "__builtin_goacc_parlevel_size"; + fallback_retval = const1_rtx; + gen_fn = targetm.gen_oacc_dim_size; + break; + default: + gcc_unreachable (); + } + + if (oacc_get_fn_attrib (current_function_decl) == NULL_TREE) + { + error ("%qs only supported in OpenACC code", name); + return const0_rtx; + } + + tree arg = CALL_EXPR_ARG (exp, 0); + if (TREE_CODE (arg) != INTEGER_CST) + { + error ("non-constant argument 0 to %qs", name); + return const0_rtx; + } + + int dim = TREE_INT_CST_LOW (arg); + switch (dim) + { + case GOMP_DIM_GANG: + case GOMP_DIM_WORKER: + case GOMP_DIM_VECTOR: + break; + default: + error ("illegal argument 0 to %qs", name); + return const0_rtx; + } + + if (ignore) + return target; + + if (!targetm.have_oacc_dim_size ()) + { + emit_move_insn (target, fallback_retval); + return target; + } + + rtx reg = MEM_P (target) ? gen_reg_rtx (GET_MODE (target)) : target; + emit_insn (gen_fn (reg, GEN_INT (dim))); + if (reg != target) + emit_move_insn (target, reg); + + return target; +} /* Expand an expression EXP that calls a built-in function, with result going to TARGET if that's convenient @@ -7758,6 +7825,10 @@ expand_builtin (tree exp, rtx target, rtx subtarget, machine_mode mode, folding. */ break; + case BUILT_IN_GOACC_PARLEVEL_ID: + case BUILT_IN_GOACC_PARLEVEL_SIZE: + return expand_builtin_goacc_parlevel_id_size (exp, target, ignore); + default: /* just do library call, if unknown builtin */ break; } diff --git a/gcc/builtins.def b/gcc/builtins.def index 17f825da367..449d08d682f 100644 --- a/gcc/builtins.def +++ b/gcc/builtins.def @@ -214,6 +214,10 @@ along with GCC; see the file COPYING3. If not see #define DEF_GOACC_BUILTIN_COMPILER(ENUM, NAME, TYPE, ATTRS) \ DEF_BUILTIN (ENUM, "__builtin_" NAME, BUILT_IN_NORMAL, TYPE, TYPE, \ flag_openacc, true, true, ATTRS, false, true) +#undef DEF_GOACC_BUILTIN_ONLY +#define DEF_GOACC_BUILTIN_ONLY(ENUM, NAME, TYPE, ATTRS) \ + DEF_BUILTIN (ENUM, "__builtin_" NAME, BUILT_IN_NORMAL, TYPE, BT_LAST, \ + false, false, true, ATTRS, false, flag_openacc) #undef DEF_GOMP_BUILTIN #define DEF_GOMP_BUILTIN(ENUM, NAME, TYPE, ATTRS) \ DEF_BUILTIN (ENUM, "__builtin_" NAME, BUILT_IN_NORMAL, TYPE, TYPE, \ diff --git a/gcc/doc/extend.texi b/gcc/doc/extend.texi index 5571d05d93b..f751b082b0e 100644 --- a/gcc/doc/extend.texi +++ b/gcc/doc/extend.texi @@ -12437,6 +12437,16 @@ Aarch64. This function is mainly useful when writing inline assembly code. @end deftypefn +@deftypefn {Built-in Function} int __builtin_goacc_parlevel_id (int x) +Returns the openacc gang, worker or vector id depending on whether @var{x} is +0, 1 or 2. +@end deftypefn + +@deftypefn {Built-in Function} int __builtin_goacc_parlevel_size (int x) +Returns the openacc gang, worker or vector size depending on whether @var{x} is +0, 1 or 2. +@end deftypefn + @node Target Builtins @section Built-in Functions Specific to Particular Target Machines diff --git a/gcc/fortran/ChangeLog b/gcc/fortran/ChangeLog index 46c234fdab9..787f0ae22ec 100644 --- a/gcc/fortran/ChangeLog +++ b/gcc/fortran/ChangeLog @@ -1,3 +1,8 @@ +2018-05-02 Tom de Vries + + PR libgomp/82428 + * f95-lang.c (DEF_GOACC_BUILTIN_ONLY): Define. + 2018-04-24 Steven G. Kargl PR fortran/85520 diff --git a/gcc/fortran/f95-lang.c b/gcc/fortran/f95-lang.c index 5fe34b243cb..0f39f0ca788 100644 --- a/gcc/fortran/f95-lang.c +++ b/gcc/fortran/f95-lang.c @@ -1202,6 +1202,10 @@ gfc_init_builtin_functions (void) #undef DEF_GOACC_BUILTIN_COMPILER #define DEF_GOACC_BUILTIN_COMPILER(code, name, type, attr) \ gfc_define_builtin (name, builtin_types[type], code, name, attr); +#undef DEF_GOACC_BUILTIN_ONLY +#define DEF_GOACC_BUILTIN_ONLY(code, name, type, attr) \ + gfc_define_builtin ("__builtin_" name, builtin_types[type], code, NULL, \ + attr); #undef DEF_GOMP_BUILTIN #define DEF_GOMP_BUILTIN(code, name, type, attr) /* ignore */ #include "../omp-builtins.def" diff --git a/gcc/omp-builtins.def b/gcc/omp-builtins.def index 8b5d663e52c..55e56336ae9 100644 --- a/gcc/omp-builtins.def +++ b/gcc/omp-builtins.def @@ -51,6 +51,11 @@ DEF_GOACC_BUILTIN (BUILT_IN_GOACC_WAIT, "GOACC_wait", DEF_GOACC_BUILTIN_COMPILER (BUILT_IN_ACC_ON_DEVICE, "acc_on_device", BT_FN_INT_INT, ATTR_CONST_NOTHROW_LEAF_LIST) +DEF_GOACC_BUILTIN_ONLY (BUILT_IN_GOACC_PARLEVEL_ID, "goacc_parlevel_id", + BT_FN_INT_INT, ATTR_NOTHROW_LEAF_LIST) +DEF_GOACC_BUILTIN_ONLY (BUILT_IN_GOACC_PARLEVEL_SIZE, "goacc_parlevel_size", + BT_FN_INT_INT, ATTR_NOTHROW_LEAF_LIST) + DEF_GOMP_BUILTIN (BUILT_IN_OMP_GET_THREAD_NUM, "omp_get_thread_num", BT_FN_INT, ATTR_CONST_NOTHROW_LEAF_LIST) DEF_GOMP_BUILTIN (BUILT_IN_OMP_GET_NUM_THREADS, "omp_get_num_threads", diff --git a/gcc/testsuite/ChangeLog b/gcc/testsuite/ChangeLog index 63750af3495..9c290eb1e68 100644 --- a/gcc/testsuite/ChangeLog +++ b/gcc/testsuite/ChangeLog @@ -1,3 +1,9 @@ +2018-05-02 Tom de Vries + + PR libgomp/82428 + * c-c++-common/goacc/builtin-goacc-parlevel-id-size-2.c: New test. + * c-c++-common/goacc/builtin-goacc-parlevel-id-size.c: New test. + 2018-05-02 David Pagan PR c/30552 diff --git a/gcc/testsuite/c-c++-common/goacc/builtin-goacc-parlevel-id-size-2.c b/gcc/testsuite/c-c++-common/goacc/builtin-goacc-parlevel-id-size-2.c new file mode 100644 index 00000000000..16c7b34f66c --- /dev/null +++ b/gcc/testsuite/c-c++-common/goacc/builtin-goacc-parlevel-id-size-2.c @@ -0,0 +1,37 @@ +/* { dg-do compile } */ +/* { dg-additional-options "-O2" } */ + +#include "../../../../include/gomp-constants.h" + +void +foo (void) +{ + __builtin_goacc_parlevel_id (GOMP_DIM_GANG); + /* { dg-error "'__builtin_goacc_parlevel_id' only supported in OpenACC code" "" { target *-*-* } .-1 } */ + + __builtin_goacc_parlevel_size (GOMP_DIM_GANG); + /* { dg-error "'__builtin_goacc_parlevel_size' only supported in OpenACC code" "" { target *-*-* } .-1 } */ +} + +#pragma acc routine +void +foo2 (int arg) +{ + __builtin_goacc_parlevel_id (arg); + /* { dg-error "non-constant argument 0 to '__builtin_goacc_parlevel_id'" "" { target *-*-* } .-1 } */ + + __builtin_goacc_parlevel_size (arg); + /* { dg-error "non-constant argument 0 to '__builtin_goacc_parlevel_size'" "" { target *-*-* } .-1 } */ + + __builtin_goacc_parlevel_id (-1); + /* { dg-error "illegal argument 0 to '__builtin_goacc_parlevel_id'" "" { target *-*-* } .-1 } */ + + __builtin_goacc_parlevel_id (-1); + /* { dg-error "illegal argument 0 to '__builtin_goacc_parlevel_id'" "" { target *-*-* } .-1 } */ + + __builtin_goacc_parlevel_size (-1); + /* { dg-error "illegal argument 0 to '__builtin_goacc_parlevel_size'" "" { target *-*-* } .-1 } */ + + __builtin_goacc_parlevel_size (3); + /* { dg-error "illegal argument 0 to '__builtin_goacc_parlevel_size'" "" { target *-*-* } .-1 } */ +} diff --git a/gcc/testsuite/c-c++-common/goacc/builtin-goacc-parlevel-id-size.c b/gcc/testsuite/c-c++-common/goacc/builtin-goacc-parlevel-id-size.c new file mode 100644 index 00000000000..5cda8189fb8 --- /dev/null +++ b/gcc/testsuite/c-c++-common/goacc/builtin-goacc-parlevel-id-size.c @@ -0,0 +1,79 @@ +/* { dg-do compile } */ +/* { dg-additional-options "-O2" } */ + +#include "../../../../include/gomp-constants.h" + +#pragma acc routine +int +foo (void) +{ + int res; + + __builtin_goacc_parlevel_id (GOMP_DIM_GANG); + __builtin_goacc_parlevel_id (GOMP_DIM_WORKER); + __builtin_goacc_parlevel_id (GOMP_DIM_VECTOR); + + __builtin_goacc_parlevel_size (GOMP_DIM_GANG); + __builtin_goacc_parlevel_size (GOMP_DIM_WORKER); + __builtin_goacc_parlevel_size (GOMP_DIM_VECTOR); + + res += __builtin_goacc_parlevel_id (GOMP_DIM_GANG); + res += __builtin_goacc_parlevel_id (GOMP_DIM_WORKER); + res += __builtin_goacc_parlevel_id (GOMP_DIM_VECTOR); + + res += __builtin_goacc_parlevel_size (GOMP_DIM_GANG); + res += __builtin_goacc_parlevel_size (GOMP_DIM_WORKER); + res += __builtin_goacc_parlevel_size (GOMP_DIM_VECTOR); + + return res; +} + +void +foo2 (void) +{ + int res; + +#pragma acc parallel + { + __builtin_goacc_parlevel_id (GOMP_DIM_GANG); + __builtin_goacc_parlevel_id (GOMP_DIM_WORKER); + __builtin_goacc_parlevel_id (GOMP_DIM_VECTOR); + + __builtin_goacc_parlevel_size (GOMP_DIM_GANG); + __builtin_goacc_parlevel_size (GOMP_DIM_WORKER); + __builtin_goacc_parlevel_size (GOMP_DIM_VECTOR); + + res += __builtin_goacc_parlevel_id (GOMP_DIM_GANG); + res += __builtin_goacc_parlevel_id (GOMP_DIM_WORKER); + res += __builtin_goacc_parlevel_id (GOMP_DIM_VECTOR); + + res += __builtin_goacc_parlevel_size (GOMP_DIM_GANG); + res += __builtin_goacc_parlevel_size (GOMP_DIM_WORKER); + res += __builtin_goacc_parlevel_size (GOMP_DIM_VECTOR); + } +} + +void +foo3 (void) +{ + int res; + +#pragma acc kernels + { + __builtin_goacc_parlevel_id (GOMP_DIM_GANG); + __builtin_goacc_parlevel_id (GOMP_DIM_WORKER); + __builtin_goacc_parlevel_id (GOMP_DIM_VECTOR); + + __builtin_goacc_parlevel_size (GOMP_DIM_GANG); + __builtin_goacc_parlevel_size (GOMP_DIM_WORKER); + __builtin_goacc_parlevel_size (GOMP_DIM_VECTOR); + + res += __builtin_goacc_parlevel_id (GOMP_DIM_GANG); + res += __builtin_goacc_parlevel_id (GOMP_DIM_WORKER); + res += __builtin_goacc_parlevel_id (GOMP_DIM_VECTOR); + + res += __builtin_goacc_parlevel_size (GOMP_DIM_GANG); + res += __builtin_goacc_parlevel_size (GOMP_DIM_WORKER); + res += __builtin_goacc_parlevel_size (GOMP_DIM_VECTOR); + } +} diff --git a/libgomp/ChangeLog b/libgomp/ChangeLog index 958f92bc4c7..669f9f1e59f 100644 --- a/libgomp/ChangeLog +++ b/libgomp/ChangeLog @@ -1,3 +1,32 @@ +2018-05-02 Tom de Vries + + PR libgomp/82428 + * testsuite/libgomp.oacc-c-c++-common/gang-static-2.c: Use + __builtin_goacc_parlevel_{id,size}. + * testsuite/libgomp.oacc-c-c++-common/loop-auto-1.c: Same. + * testsuite/libgomp.oacc-c-c++-common/loop-dim-default.c: Same. + * testsuite/libgomp.oacc-c-c++-common/loop-g-1.c: Same. + * testsuite/libgomp.oacc-c-c++-common/loop-g-2.c: Same. + * testsuite/libgomp.oacc-c-c++-common/loop-gwv-1.c: Same. + * testsuite/libgomp.oacc-c-c++-common/loop-red-g-1.c: Same. + * testsuite/libgomp.oacc-c-c++-common/loop-red-gwv-1.c: Same. + * testsuite/libgomp.oacc-c-c++-common/loop-red-v-1.c: Same. + * testsuite/libgomp.oacc-c-c++-common/loop-red-v-2.c: Same. + * testsuite/libgomp.oacc-c-c++-common/loop-red-w-1.c: Same. + * testsuite/libgomp.oacc-c-c++-common/loop-red-w-2.c: Same. + * testsuite/libgomp.oacc-c-c++-common/loop-red-wv-1.c: Same. + * testsuite/libgomp.oacc-c-c++-common/loop-v-1.c: Same. + * testsuite/libgomp.oacc-c-c++-common/loop-w-1.c: Same. + * testsuite/libgomp.oacc-c-c++-common/loop-wv-1.c: Same. + * testsuite/libgomp.oacc-c-c++-common/parallel-dims.c: Same. + * testsuite/libgomp.oacc-c-c++-common/routine-g-1.c: Same. + * testsuite/libgomp.oacc-c-c++-common/routine-gwv-1.c: Same. + * testsuite/libgomp.oacc-c-c++-common/routine-v-1.c: Same. + * testsuite/libgomp.oacc-c-c++-common/routine-w-1.c: Same. + * testsuite/libgomp.oacc-c-c++-common/routine-wv-1.c: Same. + * testsuite/libgomp.oacc-c-c++-common/routine-wv-2.c: Same. + * testsuite/libgomp.oacc-c-c++-common/tile-1.c: Same. + 2018-05-02 Tom de Vries PR testsuite/85106 diff --git a/libgomp/testsuite/libgomp.oacc-c-c++-common/gang-static-2.c b/libgomp/testsuite/libgomp.oacc-c-c++-common/gang-static-2.c index 6de739ae99f..e273a797b8e 100644 --- a/libgomp/testsuite/libgomp.oacc-c-c++-common/gang-static-2.c +++ b/libgomp/testsuite/libgomp.oacc-c-c++-common/gang-static-2.c @@ -1,25 +1,23 @@ -/* { dg-do run { target openacc_nvidia_accel_selected } } */ -/* This code uses nvptx inline assembly guarded with acc_on_device, which is - not optimized away at -O0, and then confuses the target assembler. - { dg-skip-if "" { *-*-* } { "-O0" } { "" } } */ - #include #include +#include #define N 100 #define GANG_ID(I) \ - (acc_on_device (acc_device_nvidia) \ - ? ({unsigned __r; \ - __asm__ volatile ("mov.u32 %0,%%ctaid.x;" : "=r" (__r)); \ - __r; }) : (I)) + (acc_on_device (acc_device_not_host) \ + ? __builtin_goacc_parlevel_id (GOMP_DIM_GANG) \ + : (I)) void test_static(int *a, int num_gangs, int sarg) { int i, j; - if (sarg == 0) + if (acc_on_device (acc_device_host)) + return; + + if (sarg == 0) sarg = 1; for (i = 0; i < N / sarg; i++) @@ -32,6 +30,9 @@ test_nonstatic(int *a, int gangs) { int i, j; + if (acc_on_device (acc_device_host)) + return; + for (i = 0; i < N; i+=gangs) for (j = 0; j < gangs; j++) assert (a[i+j] == i/gangs); diff --git a/libgomp/testsuite/libgomp.oacc-c-c++-common/loop-auto-1.c b/libgomp/testsuite/libgomp.oacc-c-c++-common/loop-auto-1.c index 863b6b38c34..34bc57e51f5 100644 --- a/libgomp/testsuite/libgomp.oacc-c-c++-common/loop-auto-1.c +++ b/libgomp/testsuite/libgomp.oacc-c-c++-common/loop-auto-1.c @@ -1,11 +1,8 @@ -/* This code uses nvptx inline assembly guarded with acc_on_device, which is - not optimized away at -O0, and then confuses the target assembler. - { dg-skip-if "" { *-*-* } { "-O0" } { "" } } */ - /* { dg-additional-options "-fopenacc-dim=32" } */ #include #include +#include int check (const int *ary, int size, int gp, int wp, int vp) { @@ -79,15 +76,12 @@ static int __attribute__((noinline)) place () { int r = 0; - if (acc_on_device (acc_device_nvidia)) - { - int g = 0, w = 0, v = 0; + int g = 0, w = 0, v = 0; + g = __builtin_goacc_parlevel_id (GOMP_DIM_GANG); + w = __builtin_goacc_parlevel_id (GOMP_DIM_WORKER); + v = __builtin_goacc_parlevel_id (GOMP_DIM_VECTOR); + r = (g << 16) | (w << 8) | v; - __asm__ volatile ("mov.u32 %0,%%ctaid.x;" : "=r" (g)); - __asm__ volatile ("mov.u32 %0,%%tid.y;" : "=r" (w)); - __asm__ volatile ("mov.u32 %0,%%tid.x;" : "=r" (v)); - r = (g << 16) | (w << 8) | v; - } return r; } diff --git a/libgomp/testsuite/libgomp.oacc-c-c++-common/loop-dim-default.c b/libgomp/testsuite/libgomp.oacc-c-c++-common/loop-dim-default.c index e2b08c3e0bc..dd8107c1acc 100644 --- a/libgomp/testsuite/libgomp.oacc-c-c++-common/loop-dim-default.c +++ b/libgomp/testsuite/libgomp.oacc-c-c++-common/loop-dim-default.c @@ -1,25 +1,23 @@ -/* This code uses nvptx inline assembly guarded with acc_on_device, which is - not optimized away at -O0, and then confuses the target assembler. */ -/* { dg-skip-if "" { *-*-* } { "-O0" } { "" } } */ /* { dg-additional-options "-fopenacc-dim=16:16" } */ #include #include #include #include +#include #pragma acc routine static int __attribute__ ((noinline)) coord () { int res = 0; - if (acc_on_device (acc_device_nvidia)) + if (acc_on_device (acc_device_not_host)) { - int g = 0, w = 0, v = 0; + int g, w, v; - __asm__ volatile ("mov.u32 %0,%%ctaid.x;" : "=r" (g)); - __asm__ volatile ("mov.u32 %0,%%tid.y;" : "=r" (w)); - __asm__ volatile ("mov.u32 %0,%%tid.x;" : "=r" (v)); + g = __builtin_goacc_parlevel_id (GOMP_DIM_GANG); + w = __builtin_goacc_parlevel_id (GOMP_DIM_WORKER); + v = __builtin_goacc_parlevel_id (GOMP_DIM_VECTOR); res = (1 << 24) | (g << 16) | (w << 8) | v; } return res; diff --git a/libgomp/testsuite/libgomp.oacc-c-c++-common/loop-g-1.c b/libgomp/testsuite/libgomp.oacc-c-c++-common/loop-g-1.c index ae1d588db8f..98f02e9840a 100644 --- a/libgomp/testsuite/libgomp.oacc-c-c++-common/loop-g-1.c +++ b/libgomp/testsuite/libgomp.oacc-c-c++-common/loop-g-1.c @@ -1,8 +1,6 @@ -/* This code uses nvptx inline assembly guarded with acc_on_device, which is - not optimized away at -O0, and then confuses the target assembler. - { dg-skip-if "" { *-*-* } { "-O0" } { "" } } */ - #include +#include +#include #define N (32*32*32+17) int main () @@ -20,13 +18,12 @@ int main () #pragma acc loop gang for (unsigned ix = 0; ix < N; ix++) { - if (__builtin_acc_on_device (5)) + if (acc_on_device (acc_device_not_host)) { - int g = 0, w = 0, v = 0; - - __asm__ volatile ("mov.u32 %0,%%ctaid.x;" : "=r" (g)); - __asm__ volatile ("mov.u32 %0,%%tid.y;" : "=r" (w)); - __asm__ volatile ("mov.u32 %0,%%tid.x;" : "=r" (v)); + int g, w, v; + g = __builtin_goacc_parlevel_id (GOMP_DIM_GANG); + w = __builtin_goacc_parlevel_id (GOMP_DIM_WORKER); + v = __builtin_goacc_parlevel_id (GOMP_DIM_VECTOR); ary[ix] = (g << 16) | (w << 8) | v; ondev = 1; } diff --git a/libgomp/testsuite/libgomp.oacc-c-c++-common/loop-g-2.c b/libgomp/testsuite/libgomp.oacc-c-c++-common/loop-g-2.c index c06d861b07d..4152a4e6c82 100644 --- a/libgomp/testsuite/libgomp.oacc-c-c++-common/loop-g-2.c +++ b/libgomp/testsuite/libgomp.oacc-c-c++-common/loop-g-2.c @@ -1,8 +1,6 @@ -/* This code uses nvptx inline assembly guarded with acc_on_device, which is - not optimized away at -O0, and then confuses the target assembler. - { dg-skip-if "" { *-*-* } { "-O0" } { "" } } */ - #include +#include +#include #define N (32*32*32+17) int main () @@ -20,13 +18,13 @@ int main () #pragma acc loop gang (static:1) for (unsigned ix = 0; ix < N; ix++) { - if (__builtin_acc_on_device (5)) + if (acc_on_device (acc_device_not_host)) { - int g = 0, w = 0, v = 0; + int g, w, v; - __asm__ volatile ("mov.u32 %0,%%ctaid.x;" : "=r" (g)); - __asm__ volatile ("mov.u32 %0,%%tid.y;" : "=r" (w)); - __asm__ volatile ("mov.u32 %0,%%tid.x;" : "=r" (v)); + g = __builtin_goacc_parlevel_id (GOMP_DIM_GANG); + w = __builtin_goacc_parlevel_id (GOMP_DIM_WORKER); + v = __builtin_goacc_parlevel_id (GOMP_DIM_VECTOR); ary[ix] = (g << 16) | (w << 8) | v; ondev = 1; } diff --git a/libgomp/testsuite/libgomp.oacc-c-c++-common/loop-gwv-1.c b/libgomp/testsuite/libgomp.oacc-c-c++-common/loop-gwv-1.c index 42b612a29d8..766e5782b46 100644 --- a/libgomp/testsuite/libgomp.oacc-c-c++-common/loop-gwv-1.c +++ b/libgomp/testsuite/libgomp.oacc-c-c++-common/loop-gwv-1.c @@ -1,8 +1,6 @@ -/* This code uses nvptx inline assembly guarded with acc_on_device, which is - not optimized away at -O0, and then confuses the target assembler. - { dg-skip-if "" { *-*-* } { "-O0" } { "" } } */ - #include +#include +#include #define N (32*32*32+17) int main () @@ -20,13 +18,14 @@ int main () #pragma acc loop gang worker vector for (unsigned ix = 0; ix < N; ix++) { - if (__builtin_acc_on_device (5)) + if (acc_on_device (acc_device_not_host)) { - int g = 0, w = 0, v = 0; + int g, w, v; + + g = __builtin_goacc_parlevel_id (GOMP_DIM_GANG); + w = __builtin_goacc_parlevel_id (GOMP_DIM_WORKER); + v = __builtin_goacc_parlevel_id (GOMP_DIM_VECTOR); - __asm__ volatile ("mov.u32 %0,%%ctaid.x;" : "=r" (g)); - __asm__ volatile ("mov.u32 %0,%%tid.y;" : "=r" (w)); - __asm__ volatile ("mov.u32 %0,%%tid.x;" : "=r" (v)); ary[ix] = (g << 16) | (w << 8) | v; ondev = 1; } diff --git a/libgomp/testsuite/libgomp.oacc-c-c++-common/loop-red-g-1.c b/libgomp/testsuite/libgomp.oacc-c-c++-common/loop-red-g-1.c index 929e01c447a..7107502e070 100644 --- a/libgomp/testsuite/libgomp.oacc-c-c++-common/loop-red-g-1.c +++ b/libgomp/testsuite/libgomp.oacc-c-c++-common/loop-red-g-1.c @@ -1,8 +1,6 @@ -/* This code uses nvptx inline assembly guarded with acc_on_device, which is - not optimized away at -O0, and then confuses the target assembler. - { dg-skip-if "" { *-*-* } { "-O0" } { "" } } */ - #include +#include +#include #define N (32*32*32+17) int main () @@ -18,13 +16,13 @@ int main () { int val = ix; - if (__builtin_acc_on_device (5)) + if (acc_on_device (acc_device_not_host)) { - int g = 0, w = 0, v = 0; + int g, w, v; - __asm__ volatile ("mov.u32 %0,%%ctaid.x;" : "=r" (g)); - __asm__ volatile ("mov.u32 %0,%%tid.y;" : "=r" (w)); - __asm__ volatile ("mov.u32 %0,%%tid.x;" : "=r" (v)); + g = __builtin_goacc_parlevel_id (GOMP_DIM_GANG); + w = __builtin_goacc_parlevel_id (GOMP_DIM_WORKER); + v = __builtin_goacc_parlevel_id (GOMP_DIM_VECTOR); val = (g << 16) | (w << 8) | v; ondev = 1; } diff --git a/libgomp/testsuite/libgomp.oacc-c-c++-common/loop-red-gwv-1.c b/libgomp/testsuite/libgomp.oacc-c-c++-common/loop-red-gwv-1.c index 4ae4b7c1246..0bec6e19510 100644 --- a/libgomp/testsuite/libgomp.oacc-c-c++-common/loop-red-gwv-1.c +++ b/libgomp/testsuite/libgomp.oacc-c-c++-common/loop-red-gwv-1.c @@ -1,8 +1,6 @@ -/* This code uses nvptx inline assembly guarded with acc_on_device, which is - not optimized away at -O0, and then confuses the target assembler. - { dg-skip-if "" { *-*-* } { "-O0" } { "" } } */ - #include +#include +#include #define N (32*32*32+17) int main () @@ -18,13 +16,13 @@ int main () { int val = ix; - if (__builtin_acc_on_device (5)) + if (acc_on_device (acc_device_not_host)) { - int g = 0, w = 0, v = 0; + int g, w, v; - __asm__ volatile ("mov.u32 %0,%%ctaid.x;" : "=r" (g)); - __asm__ volatile ("mov.u32 %0,%%tid.y;" : "=r" (w)); - __asm__ volatile ("mov.u32 %0,%%tid.x;" : "=r" (v)); + g = __builtin_goacc_parlevel_id (GOMP_DIM_GANG); + w = __builtin_goacc_parlevel_id (GOMP_DIM_WORKER); + v = __builtin_goacc_parlevel_id (GOMP_DIM_VECTOR); val = (g << 16) | (w << 8) | v; ondev = 1; } diff --git a/libgomp/testsuite/libgomp.oacc-c-c++-common/loop-red-v-1.c b/libgomp/testsuite/libgomp.oacc-c-c++-common/loop-red-v-1.c index 0556455d62f..da4921d15f9 100644 --- a/libgomp/testsuite/libgomp.oacc-c-c++-common/loop-red-v-1.c +++ b/libgomp/testsuite/libgomp.oacc-c-c++-common/loop-red-v-1.c @@ -1,8 +1,6 @@ -/* This code uses nvptx inline assembly guarded with acc_on_device, which is - not optimized away at -O0, and then confuses the target assembler. - { dg-skip-if "" { *-*-* } { "-O0" } { "" } } */ - #include +#include +#include #define N (32*32*32+17) @@ -19,13 +17,13 @@ int main () { int val = ix; - if (__builtin_acc_on_device (5)) + if (acc_on_device (acc_device_not_host)) { - int g = 0, w = 0, v = 0; + int g, w, v; - __asm__ volatile ("mov.u32 %0,%%ctaid.x;" : "=r" (g)); - __asm__ volatile ("mov.u32 %0,%%tid.y;" : "=r" (w)); - __asm__ volatile ("mov.u32 %0,%%tid.x;" : "=r" (v)); + g = __builtin_goacc_parlevel_id (GOMP_DIM_GANG); + w = __builtin_goacc_parlevel_id (GOMP_DIM_WORKER); + v = __builtin_goacc_parlevel_id (GOMP_DIM_VECTOR); val = (g << 16) | (w << 8) | v; ondev = 1; } diff --git a/libgomp/testsuite/libgomp.oacc-c-c++-common/loop-red-v-2.c b/libgomp/testsuite/libgomp.oacc-c-c++-common/loop-red-v-2.c index 16d8f9fbf2c..15e2bc2f83b 100644 --- a/libgomp/testsuite/libgomp.oacc-c-c++-common/loop-red-v-2.c +++ b/libgomp/testsuite/libgomp.oacc-c-c++-common/loop-red-v-2.c @@ -1,8 +1,6 @@ -/* This code uses nvptx inline assembly guarded with acc_on_device, which is - not optimized away at -O0, and then confuses the target assembler. - { dg-skip-if "" { *-*-* } { "-O0" } { "" } } */ - #include +#include +#include #define N (32*32*32+17) @@ -21,13 +19,13 @@ int main () { int val = ix; - if (__builtin_acc_on_device (5)) + if (acc_on_device (acc_device_not_host)) { - int g = 0, w = 0, v = 0; + int g, w, v; - __asm__ volatile ("mov.u32 %0,%%ctaid.x;" : "=r" (g)); - __asm__ volatile ("mov.u32 %0,%%tid.y;" : "=r" (w)); - __asm__ volatile ("mov.u32 %0,%%tid.x;" : "=r" (v)); + g = __builtin_goacc_parlevel_id (GOMP_DIM_GANG); + w = __builtin_goacc_parlevel_id (GOMP_DIM_WORKER); + v = __builtin_goacc_parlevel_id (GOMP_DIM_VECTOR); val = (g << 16) | (w << 8) | v; ondev = 1; } diff --git a/libgomp/testsuite/libgomp.oacc-c-c++-common/loop-red-w-1.c b/libgomp/testsuite/libgomp.oacc-c-c++-common/loop-red-w-1.c index 19021d9d062..6bbd04fffea 100644 --- a/libgomp/testsuite/libgomp.oacc-c-c++-common/loop-red-w-1.c +++ b/libgomp/testsuite/libgomp.oacc-c-c++-common/loop-red-w-1.c @@ -1,8 +1,6 @@ -/* This code uses nvptx inline assembly guarded with acc_on_device, which is - not optimized away at -O0, and then confuses the target assembler. - { dg-skip-if "" { *-*-* } { "-O0" } { "" } } */ - #include +#include +#include #define N (32*32*32+17) int main () @@ -18,13 +16,13 @@ int main () { int val = ix; - if (__builtin_acc_on_device (5)) + if (acc_on_device (acc_device_not_host)) { - int g = 0, w = 0, v = 0; + int g, w, v; - __asm__ volatile ("mov.u32 %0,%%ctaid.x;" : "=r" (g)); - __asm__ volatile ("mov.u32 %0,%%tid.y;" : "=r" (w)); - __asm__ volatile ("mov.u32 %0,%%tid.x;" : "=r" (v)); + g = __builtin_goacc_parlevel_id (GOMP_DIM_GANG); + w = __builtin_goacc_parlevel_id (GOMP_DIM_WORKER); + v = __builtin_goacc_parlevel_id (GOMP_DIM_VECTOR); val = (g << 16) | (w << 8) | v; ondev = 1; } diff --git a/libgomp/testsuite/libgomp.oacc-c-c++-common/loop-red-w-2.c b/libgomp/testsuite/libgomp.oacc-c-c++-common/loop-red-w-2.c index f0c9d8182e8..c63a5d4f808 100644 --- a/libgomp/testsuite/libgomp.oacc-c-c++-common/loop-red-w-2.c +++ b/libgomp/testsuite/libgomp.oacc-c-c++-common/loop-red-w-2.c @@ -1,8 +1,6 @@ -/* This code uses nvptx inline assembly guarded with acc_on_device, which is - not optimized away at -O0, and then confuses the target assembler. - { dg-skip-if "" { *-*-* } { "-O0" } { "" } } */ - #include +#include +#include #define N (32*32*32+17) int main () @@ -20,13 +18,13 @@ int main () { int val = ix; - if (__builtin_acc_on_device (5)) + if (acc_on_device (acc_device_not_host)) { - int g = 0, w = 0, v = 0; + int g, w, v; - __asm__ volatile ("mov.u32 %0,%%ctaid.x;" : "=r" (g)); - __asm__ volatile ("mov.u32 %0,%%tid.y;" : "=r" (w)); - __asm__ volatile ("mov.u32 %0,%%tid.x;" : "=r" (v)); + g = __builtin_goacc_parlevel_id (GOMP_DIM_GANG); + w = __builtin_goacc_parlevel_id (GOMP_DIM_WORKER); + v = __builtin_goacc_parlevel_id (GOMP_DIM_VECTOR); val = (g << 16) | (w << 8) | v; ondev = 1; } diff --git a/libgomp/testsuite/libgomp.oacc-c-c++-common/loop-red-wv-1.c b/libgomp/testsuite/libgomp.oacc-c-c++-common/loop-red-wv-1.c index 0fec2dcfd9c..6743afaca6a 100644 --- a/libgomp/testsuite/libgomp.oacc-c-c++-common/loop-red-wv-1.c +++ b/libgomp/testsuite/libgomp.oacc-c-c++-common/loop-red-wv-1.c @@ -2,6 +2,8 @@ /* { dg-additional-options "-O2" } */ #include +#include +#include #define N (32*32*32+17) int main () @@ -17,13 +19,13 @@ int main () { int val = ix; - if (__builtin_acc_on_device (5)) + if (acc_on_device (acc_device_not_host)) { - int g = 0, w = 0, v = 0; + int g, w, v; - __asm__ volatile ("mov.u32 %0,%%ctaid.x;" : "=r" (g)); - __asm__ volatile ("mov.u32 %0,%%tid.y;" : "=r" (w)); - __asm__ volatile ("mov.u32 %0,%%tid.x;" : "=r" (v)); + g = __builtin_goacc_parlevel_id (GOMP_DIM_GANG); + w = __builtin_goacc_parlevel_id (GOMP_DIM_WORKER); + v = __builtin_goacc_parlevel_id (GOMP_DIM_VECTOR); val = (g << 16) | (w << 8) | v; ondev = 1; } diff --git a/libgomp/testsuite/libgomp.oacc-c-c++-common/loop-v-1.c b/libgomp/testsuite/libgomp.oacc-c-c++-common/loop-v-1.c index 2974807925a..6010cd2498a 100644 --- a/libgomp/testsuite/libgomp.oacc-c-c++-common/loop-v-1.c +++ b/libgomp/testsuite/libgomp.oacc-c-c++-common/loop-v-1.c @@ -1,8 +1,6 @@ -/* This code uses nvptx inline assembly guarded with acc_on_device, which is - not optimized away at -O0, and then confuses the target assembler. - { dg-skip-if "" { *-*-* } { "-O0" } { "" } } */ - #include +#include +#include #define N (32*32*32+17) int main () @@ -20,13 +18,13 @@ int main () #pragma acc loop vector for (unsigned ix = 0; ix < N; ix++) { - if (__builtin_acc_on_device (5)) + if (acc_on_device (acc_device_not_host)) { - int g = 0, w = 0, v = 0; + int g, w, v; - __asm__ volatile ("mov.u32 %0,%%ctaid.x;" : "=r" (g)); - __asm__ volatile ("mov.u32 %0,%%tid.y;" : "=r" (w)); - __asm__ volatile ("mov.u32 %0,%%tid.x;" : "=r" (v)); + g = __builtin_goacc_parlevel_id (GOMP_DIM_GANG); + w = __builtin_goacc_parlevel_id (GOMP_DIM_WORKER); + v = __builtin_goacc_parlevel_id (GOMP_DIM_VECTOR); ary[ix] = (g << 16) | (w << 8) | v; ondev = 1; } diff --git a/libgomp/testsuite/libgomp.oacc-c-c++-common/loop-w-1.c b/libgomp/testsuite/libgomp.oacc-c-c++-common/loop-w-1.c index 33b6eae9e44..fa6fb9164e6 100644 --- a/libgomp/testsuite/libgomp.oacc-c-c++-common/loop-w-1.c +++ b/libgomp/testsuite/libgomp.oacc-c-c++-common/loop-w-1.c @@ -1,8 +1,6 @@ -/* This code uses nvptx inline assembly guarded with acc_on_device, which is - not optimized away at -O0, and then confuses the target assembler. - { dg-skip-if "" { *-*-* } { "-O0" } { "" } } */ - #include +#include +#include #define N (32*32*32+17) int main () @@ -20,13 +18,13 @@ int main () #pragma acc loop worker for (unsigned ix = 0; ix < N; ix++) { - if (__builtin_acc_on_device (5)) + if (acc_on_device (acc_device_not_host)) { - int g = 0, w = 0, v = 0; + int g, w, v; - __asm__ volatile ("mov.u32 %0,%%ctaid.x;" : "=r" (g)); - __asm__ volatile ("mov.u32 %0,%%tid.y;" : "=r" (w)); - __asm__ volatile ("mov.u32 %0,%%tid.x;" : "=r" (v)); + g = __builtin_goacc_parlevel_id (GOMP_DIM_GANG); + w = __builtin_goacc_parlevel_id (GOMP_DIM_WORKER); + v = __builtin_goacc_parlevel_id (GOMP_DIM_VECTOR); ary[ix] = (g << 16) | (w << 8) | v; ondev = 1; } diff --git a/libgomp/testsuite/libgomp.oacc-c-c++-common/loop-wv-1.c b/libgomp/testsuite/libgomp.oacc-c-c++-common/loop-wv-1.c index 578cfadd4d1..cd4cc994b82 100644 --- a/libgomp/testsuite/libgomp.oacc-c-c++-common/loop-wv-1.c +++ b/libgomp/testsuite/libgomp.oacc-c-c++-common/loop-wv-1.c @@ -1,8 +1,6 @@ -/* This code uses nvptx inline assembly guarded with acc_on_device, which is - not optimized away at -O0, and then confuses the target assembler. - { dg-skip-if "" { *-*-* } { "-O0" } { "" } } */ - #include +#include +#include #define N (32*32*32+17) int main () @@ -20,13 +18,13 @@ int main () #pragma acc loop worker vector for (unsigned ix = 0; ix < N; ix++) { - if (__builtin_acc_on_device (5)) + if (acc_on_device (acc_device_not_host)) { - int g = 0, w = 0, v = 0; + int g, w, v; - __asm__ volatile ("mov.u32 %0,%%ctaid.x;" : "=r" (g)); - __asm__ volatile ("mov.u32 %0,%%tid.y;" : "=r" (w)); - __asm__ volatile ("mov.u32 %0,%%tid.x;" : "=r" (v)); + g = __builtin_goacc_parlevel_id (GOMP_DIM_GANG); + w = __builtin_goacc_parlevel_id (GOMP_DIM_WORKER); + v = __builtin_goacc_parlevel_id (GOMP_DIM_VECTOR); ary[ix] = (g << 16) | (w << 8) | v; ondev = 1; } diff --git a/libgomp/testsuite/libgomp.oacc-c-c++-common/parallel-dims.c b/libgomp/testsuite/libgomp.oacc-c-c++-common/parallel-dims.c index 8308f7c541f..4a9854662cc 100644 --- a/libgomp/testsuite/libgomp.oacc-c-c++-common/parallel-dims.c +++ b/libgomp/testsuite/libgomp.oacc-c-c++-common/parallel-dims.c @@ -3,6 +3,7 @@ #include #include +#include /* TODO: "(int) acc_device_*" casts because of the C++ acc_on_device wrapper not behaving as expected for -O0. */ @@ -12,11 +13,7 @@ static unsigned int __attribute__ ((optimize ("O2"))) acc_gang () if (acc_on_device ((int) acc_device_host)) return 0; else if (acc_on_device ((int) acc_device_nvidia)) - { - unsigned int r; - asm volatile ("mov.u32 %0,%%ctaid.x;" : "=r" (r)); - return r; - } + return __builtin_goacc_parlevel_id (GOMP_DIM_GANG); else __builtin_abort (); } @@ -27,11 +24,7 @@ static unsigned int __attribute__ ((optimize ("O2"))) acc_worker () if (acc_on_device ((int) acc_device_host)) return 0; else if (acc_on_device ((int) acc_device_nvidia)) - { - unsigned int r; - asm volatile ("mov.u32 %0,%%tid.y;" : "=r" (r)); - return r; - } + return __builtin_goacc_parlevel_id (GOMP_DIM_WORKER); else __builtin_abort (); } @@ -42,11 +35,7 @@ static unsigned int __attribute__ ((optimize ("O2"))) acc_vector () if (acc_on_device ((int) acc_device_host)) return 0; else if (acc_on_device ((int) acc_device_nvidia)) - { - unsigned int r; - asm volatile ("mov.u32 %0,%%tid.x;" : "=r" (r)); - return r; - } + return __builtin_goacc_parlevel_id (GOMP_DIM_VECTOR); else __builtin_abort (); } diff --git a/libgomp/testsuite/libgomp.oacc-c-c++-common/routine-g-1.c b/libgomp/testsuite/libgomp.oacc-c-c++-common/routine-g-1.c index b6ab7134c0b..a164f576bc3 100644 --- a/libgomp/testsuite/libgomp.oacc-c-c++-common/routine-g-1.c +++ b/libgomp/testsuite/libgomp.oacc-c-c++-common/routine-g-1.c @@ -1,8 +1,6 @@ -/* This code uses nvptx inline assembly guarded with acc_on_device, which is - not optimized away at -O0, and then confuses the target assembler. - { dg-skip-if "" { *-*-* } { "-O0" } { "" } } */ - #include +#include +#include #define N (32*32*32+17) @@ -12,13 +10,13 @@ void __attribute__ ((noinline)) gang (int ary[N]) #pragma acc loop gang for (unsigned ix = 0; ix < N; ix++) { - if (__builtin_acc_on_device (5)) + if (acc_on_device (acc_device_not_host)) { - int g = 0, w = 0, v = 0; + int g, w, v; - __asm__ volatile ("mov.u32 %0,%%ctaid.x;" : "=r" (g)); - __asm__ volatile ("mov.u32 %0,%%tid.y;" : "=r" (w)); - __asm__ volatile ("mov.u32 %0,%%tid.x;" : "=r" (v)); + g = __builtin_goacc_parlevel_id (GOMP_DIM_GANG); + w = __builtin_goacc_parlevel_id (GOMP_DIM_WORKER); + v = __builtin_goacc_parlevel_id (GOMP_DIM_VECTOR); ary[ix] = (g << 16) | (w << 8) | v; } else @@ -38,7 +36,7 @@ int main () #pragma acc parallel num_gangs(32) copy(ary) copy(ondev) { - ondev = __builtin_acc_on_device (5); + ondev = acc_on_device (acc_device_not_host); gang (ary); } diff --git a/libgomp/testsuite/libgomp.oacc-c-c++-common/routine-gwv-1.c b/libgomp/testsuite/libgomp.oacc-c-c++-common/routine-gwv-1.c index ace2f499b1f..a97e046b687 100644 --- a/libgomp/testsuite/libgomp.oacc-c-c++-common/routine-gwv-1.c +++ b/libgomp/testsuite/libgomp.oacc-c-c++-common/routine-gwv-1.c @@ -1,8 +1,6 @@ -/* This code uses nvptx inline assembly guarded with acc_on_device, which is - not optimized away at -O0, and then confuses the target assembler. - { dg-skip-if "" { *-*-* } { "-O0" } { "" } } */ - #include +#include +#include #define N (32*32*32+17) @@ -12,13 +10,13 @@ void __attribute__ ((noinline)) gang (int ary[N]) #pragma acc loop gang worker vector for (unsigned ix = 0; ix < N; ix++) { - if (__builtin_acc_on_device (5)) + if (acc_on_device (acc_device_not_host)) { - int g = 0, w = 0, v = 0; + int g, w, v; - __asm__ volatile ("mov.u32 %0,%%ctaid.x;" : "=r" (g)); - __asm__ volatile ("mov.u32 %0,%%tid.y;" : "=r" (w)); - __asm__ volatile ("mov.u32 %0,%%tid.x;" : "=r" (v)); + g = __builtin_goacc_parlevel_id (GOMP_DIM_GANG); + w = __builtin_goacc_parlevel_id (GOMP_DIM_WORKER); + v = __builtin_goacc_parlevel_id (GOMP_DIM_VECTOR); ary[ix] = (g << 16) | (w << 8) | v; } else @@ -38,7 +36,7 @@ int main () #pragma acc parallel num_gangs(32) num_workers(32) vector_length(32) copy(ary) copy(ondev) { - ondev = __builtin_acc_on_device (5); + ondev = acc_on_device (acc_device_not_host); gang (ary); } diff --git a/libgomp/testsuite/libgomp.oacc-c-c++-common/routine-v-1.c b/libgomp/testsuite/libgomp.oacc-c-c++-common/routine-v-1.c index 2503e8d7d9d..b1e3e3a596a 100644 --- a/libgomp/testsuite/libgomp.oacc-c-c++-common/routine-v-1.c +++ b/libgomp/testsuite/libgomp.oacc-c-c++-common/routine-v-1.c @@ -1,8 +1,6 @@ -/* This code uses nvptx inline assembly guarded with acc_on_device, which is - not optimized away at -O0, and then confuses the target assembler. - { dg-skip-if "" { *-*-* } { "-O0" } { "" } } */ - #include +#include +#include #define N (32*32*32+17) @@ -12,13 +10,13 @@ void __attribute__ ((noinline)) vector (int ary[N]) #pragma acc loop vector for (unsigned ix = 0; ix < N; ix++) { - if (__builtin_acc_on_device (5)) + if (acc_on_device (acc_device_not_host)) { - int g = 0, w = 0, v = 0; + int g, w, v; - __asm__ volatile ("mov.u32 %0,%%ctaid.x;" : "=r" (g)); - __asm__ volatile ("mov.u32 %0,%%tid.y;" : "=r" (w)); - __asm__ volatile ("mov.u32 %0,%%tid.x;" : "=r" (v)); + g = __builtin_goacc_parlevel_id (GOMP_DIM_GANG); + w = __builtin_goacc_parlevel_id (GOMP_DIM_WORKER); + v = __builtin_goacc_parlevel_id (GOMP_DIM_VECTOR); ary[ix] = (g << 16) | (w << 8) | v; } else @@ -38,7 +36,7 @@ int main () #pragma acc parallel vector_length(32) copy(ary) copy(ondev) { - ondev = __builtin_acc_on_device (5); + ondev = acc_on_device (acc_device_not_host); vector (ary); } diff --git a/libgomp/testsuite/libgomp.oacc-c-c++-common/routine-w-1.c b/libgomp/testsuite/libgomp.oacc-c-c++-common/routine-w-1.c index 80cd4620b09..81f1e0361c0 100644 --- a/libgomp/testsuite/libgomp.oacc-c-c++-common/routine-w-1.c +++ b/libgomp/testsuite/libgomp.oacc-c-c++-common/routine-w-1.c @@ -1,8 +1,6 @@ -/* This code uses nvptx inline assembly guarded with acc_on_device, which is - not optimized away at -O0, and then confuses the target assembler. - { dg-skip-if "" { *-*-* } { "-O0" } { "" } } */ - #include +#include +#include #define N (32*32*32+17) @@ -12,13 +10,13 @@ void __attribute__ ((noinline)) worker (int ary[N]) #pragma acc loop worker for (unsigned ix = 0; ix < N; ix++) { - if (__builtin_acc_on_device (5)) + if (acc_on_device (acc_device_not_host)) { - int g = 0, w = 0, v = 0; + int g, w, v; - __asm__ volatile ("mov.u32 %0,%%ctaid.x;" : "=r" (g)); - __asm__ volatile ("mov.u32 %0,%%tid.y;" : "=r" (w)); - __asm__ volatile ("mov.u32 %0,%%tid.x;" : "=r" (v)); + g = __builtin_goacc_parlevel_id (GOMP_DIM_GANG); + w = __builtin_goacc_parlevel_id (GOMP_DIM_WORKER); + v = __builtin_goacc_parlevel_id (GOMP_DIM_VECTOR); ary[ix] = (g << 16) | (w << 8) | v; } else @@ -38,7 +36,7 @@ int main () #pragma acc parallel num_workers(32) vector_length(32) copy(ary) copy(ondev) { - ondev = __builtin_acc_on_device (5); + ondev = acc_on_device (acc_device_not_host); worker (ary); } diff --git a/libgomp/testsuite/libgomp.oacc-c-c++-common/routine-wv-1.c b/libgomp/testsuite/libgomp.oacc-c-c++-common/routine-wv-1.c index 5e45fad176f..23dbc1ae401 100644 --- a/libgomp/testsuite/libgomp.oacc-c-c++-common/routine-wv-1.c +++ b/libgomp/testsuite/libgomp.oacc-c-c++-common/routine-wv-1.c @@ -1,8 +1,6 @@ -/* This code uses nvptx inline assembly guarded with acc_on_device, which is - not optimized away at -O0, and then confuses the target assembler. - { dg-skip-if "" { *-*-* } { "-O0" } { "" } } */ - #include +#include +#include #define N (32*32*32+17) @@ -12,13 +10,13 @@ void __attribute__ ((noinline)) worker (int ary[N]) #pragma acc loop worker vector for (unsigned ix = 0; ix < N; ix++) { - if (__builtin_acc_on_device (5)) + if (acc_on_device (acc_device_not_host)) { - int g = 0, w = 0, v = 0; + int g, w, v; - __asm__ volatile ("mov.u32 %0,%%ctaid.x;" : "=r" (g)); - __asm__ volatile ("mov.u32 %0,%%tid.y;" : "=r" (w)); - __asm__ volatile ("mov.u32 %0,%%tid.x;" : "=r" (v)); + g = __builtin_goacc_parlevel_id (GOMP_DIM_GANG); + w = __builtin_goacc_parlevel_id (GOMP_DIM_WORKER); + v = __builtin_goacc_parlevel_id (GOMP_DIM_VECTOR); ary[ix] = (g << 16) | (w << 8) | v; } else @@ -38,7 +36,7 @@ int main () #pragma acc parallel num_workers(32) vector_length(32) copy(ary) copy(ondev) { - ondev = __builtin_acc_on_device (5); + ondev = acc_on_device (acc_device_not_host); worker (ary); } diff --git a/libgomp/testsuite/libgomp.oacc-c-c++-common/routine-wv-2.c b/libgomp/testsuite/libgomp.oacc-c-c++-common/routine-wv-2.c index b5cbc9014d6..886214843f1 100644 --- a/libgomp/testsuite/libgomp.oacc-c-c++-common/routine-wv-2.c +++ b/libgomp/testsuite/libgomp.oacc-c-c++-common/routine-wv-2.c @@ -1,9 +1,6 @@ -/* This code uses nvptx inline assembly guarded with acc_on_device, which is - not optimized away at -O0, and then confuses the target assembler. - { dg-skip-if "" { *-*-* } { "-O0" } { "" } } */ - #include #include +#include #define NUM_WORKERS 16 #define NUM_VECTORS 32 @@ -11,15 +8,13 @@ #define HEIGHT 32 #define WORK_ID(I,N) \ - (acc_on_device (acc_device_nvidia) \ - ? ({unsigned __r; \ - __asm__ volatile ("mov.u32 %0,%%tid.y;" : "=r" (__r)); \ - __r; }) : (I % N)) + (acc_on_device (acc_device_not_host) \ + ? __builtin_goacc_parlevel_id (GOMP_DIM_WORKER) \ + : (I % N)) #define VEC_ID(I,N) \ - (acc_on_device (acc_device_nvidia) \ - ? ({unsigned __r; \ - __asm__ volatile ("mov.u32 %0,%%tid.x;" : "=r" (__r)); \ - __r; }) : (I % N)) + (acc_on_device (acc_device_not_host) \ + ? __builtin_goacc_parlevel_id (GOMP_DIM_VECTOR) \ + : (I % N)) #pragma acc routine worker void __attribute__ ((noinline)) diff --git a/libgomp/testsuite/libgomp.oacc-c-c++-common/tile-1.c b/libgomp/testsuite/libgomp.oacc-c-c++-common/tile-1.c index 8dcb956c59f..5130591dd81 100644 --- a/libgomp/testsuite/libgomp.oacc-c-c++-common/tile-1.c +++ b/libgomp/testsuite/libgomp.oacc-c-c++-common/tile-1.c @@ -1,11 +1,8 @@ -/* This code uses nvptx inline assembly guarded with acc_on_device, which is - not optimized away at -O0, and then confuses the target assembler. - { dg-skip-if "" { *-*-* } { "-O0" } { "" } } */ - /* { dg-additional-options "-fopenacc-dim=32" } */ #include #include +#include static int check (const int *ary, int size, int gp, int wp, int vp) { @@ -79,13 +76,13 @@ static int __attribute__((noinline)) place () { int r = 0; - if (acc_on_device (acc_device_nvidia)) + if (acc_on_device (acc_device_not_host)) { - int g = 0, w = 0, v = 0; + int g, w, v; - __asm__ volatile ("mov.u32 %0,%%ctaid.x;" : "=r" (g)); - __asm__ volatile ("mov.u32 %0,%%tid.y;" : "=r" (w)); - __asm__ volatile ("mov.u32 %0,%%tid.x;" : "=r" (v)); + g = __builtin_goacc_parlevel_id (GOMP_DIM_GANG); + w = __builtin_goacc_parlevel_id (GOMP_DIM_WORKER); + v = __builtin_goacc_parlevel_id (GOMP_DIM_VECTOR); r = (g << 16) | (w << 8) | v; } return r;